From 9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80 Mon Sep 17 00:00:00 2001 From: maxim-yurchuk <maxim-yurchuk@yandex-team.com> Date: Wed, 9 Oct 2024 12:29:46 +0300 Subject: publishFullContrib: true for ydb <HIDDEN_URL> commit_hash:c82a80ac4594723cebf2c7387dec9c60217f603e --- .../include/numpy/core/feature_detection_cmath.h | 5 + .../include/numpy/core/feature_detection_locale.h | 5 + .../include/numpy/core/feature_detection_math.h | 5 + .../include/numpy/core/feature_detection_stdio.h | 5 + .../numpy/core/include/numpy/__multiarray_api.h | 5 + .../include/numpy/core/include/numpy/__ufunc_api.h | 5 + .../include/numpy/_neighborhood_iterator_imp.h | 5 + .../numpy/core/include/numpy/_numpyconfig-linux.h | 5 + .../core/include/numpy/_numpyconfig-osx-arm64.h | 5 + .../core/include/numpy/_numpyconfig-osx-x86_64.h | 5 + .../numpy/core/include/numpy/_numpyconfig-win.h | 5 + .../numpy/core/include/numpy/_numpyconfig.darwin.h | 5 + .../numpy/core/include/numpy/_numpyconfig.h | 5 + .../numpy/core/include/numpy/_numpyconfig.linux.h | 5 + .../core/include/numpy/_numpyconfig.windows.h | 5 + .../core/include/numpy/_umath_doc_generated.h | 5 + .../core/include/numpy/config-linux-aarch64.h | 5 + .../numpy/core/include/numpy/config-linux-ppc64.h | 5 + .../numpy/core/include/numpy/config-linux-x86_64.h | 5 + .../numpy/core/include/numpy/config-osx-arm64.h | 5 + .../numpy/core/include/numpy/config-osx-x86_64.h | 5 + .../numpy/core/include/numpy/config-win-x86_64.h | 5 + .../numpy/core/include/numpy/config.darwin.h | 5 + .../numpy/core/include/numpy/config.linux.h | 5 + .../numpy/core/include/numpy/config.windows.h | 5 + .../core/include/numpy/npy_1_7_deprecated_api.h | 5 + .../numpy/core/include/numpy/npy_interrupt.h | 5 + .../include/numpy/core/include/numpy/old_defines.h | 5 + .../include/numpy/core/include/numpy/oldnumeric.h | 5 + .../include/numpy/core/src/_simd/_simd.dispatch.h | 5 + .../numpy/include/numpy/core/src/_simd/_simd.h | 5 + .../numpy/include/numpy/core/src/_simd/_simd_inc.h | 5 + .../include/numpy/core/src/common/float_status.hpp | 5 + .../numpy/include/numpy/core/src/common/half.hpp | 5 + .../include/numpy/core/src/common/half_private.hpp | 5 + .../numpy/include/numpy/core/src/common/meta.hpp | 5 + .../numpy/include/numpy/core/src/common/npdef.hpp | 5 + .../numpy/include/numpy/core/src/common/npstd.hpp | 5 + .../include/numpy/core/src/common/npy_cblas_base.h | 5 + .../numpy/core/src/common/npy_cpuinfo_parser.h | 5 + .../numpy/core/src/common/simd/avx2/arithmetic.h | 5 + .../include/numpy/core/src/common/simd/avx2/avx2.h | 5 + .../numpy/core/src/common/simd/avx2/conversion.h | 5 + .../include/numpy/core/src/common/simd/avx2/math.h | 5 + .../numpy/core/src/common/simd/avx2/memory.h | 5 + .../include/numpy/core/src/common/simd/avx2/misc.h | 5 + .../numpy/core/src/common/simd/avx2/operators.h | 5 + .../numpy/core/src/common/simd/avx2/reorder.h | 5 + .../numpy/core/src/common/simd/avx2/utils.h | 5 + .../numpy/core/src/common/simd/avx512/arithmetic.h | 5 + .../numpy/core/src/common/simd/avx512/avx512.h | 5 + .../numpy/core/src/common/simd/avx512/conversion.h | 5 + .../numpy/core/src/common/simd/avx512/maskop.h | 5 + .../numpy/core/src/common/simd/avx512/math.h | 5 + .../numpy/core/src/common/simd/avx512/memory.h | 5 + .../numpy/core/src/common/simd/avx512/misc.h | 5 + .../numpy/core/src/common/simd/avx512/operators.h | 5 + .../numpy/core/src/common/simd/avx512/reorder.h | 5 + .../numpy/core/src/common/simd/avx512/utils.h | 5 + .../numpy/core/src/common/simd/emulate_maskop.h | 5 + .../include/numpy/core/src/common/simd/intdiv.h | 5 + .../numpy/core/src/common/simd/neon/arithmetic.h | 5 + .../numpy/core/src/common/simd/neon/conversion.h | 5 + .../include/numpy/core/src/common/simd/neon/math.h | 5 + .../numpy/core/src/common/simd/neon/memory.h | 5 + .../include/numpy/core/src/common/simd/neon/misc.h | 5 + .../include/numpy/core/src/common/simd/neon/neon.h | 5 + .../numpy/core/src/common/simd/neon/operators.h | 5 + .../numpy/core/src/common/simd/neon/reorder.h | 5 + .../include/numpy/core/src/common/simd/simd.h | 5 + .../numpy/core/src/common/simd/simd_utils.h | 5 + .../numpy/core/src/common/simd/sse/arithmetic.h | 5 + .../numpy/core/src/common/simd/sse/conversion.h | 5 + .../include/numpy/core/src/common/simd/sse/math.h | 5 + .../numpy/core/src/common/simd/sse/memory.h | 5 + .../include/numpy/core/src/common/simd/sse/misc.h | 5 + .../numpy/core/src/common/simd/sse/operators.h | 5 + .../numpy/core/src/common/simd/sse/reorder.h | 5 + .../include/numpy/core/src/common/simd/sse/sse.h | 5 + .../include/numpy/core/src/common/simd/sse/utils.h | 5 + .../numpy/core/src/common/simd/vec/arithmetic.h | 5 + .../numpy/core/src/common/simd/vec/conversion.h | 5 + .../include/numpy/core/src/common/simd/vec/math.h | 5 + .../numpy/core/src/common/simd/vec/memory.h | 5 + .../include/numpy/core/src/common/simd/vec/misc.h | 5 + .../numpy/core/src/common/simd/vec/operators.h | 5 + .../numpy/core/src/common/simd/vec/reorder.h | 5 + .../include/numpy/core/src/common/simd/vec/utils.h | 5 + .../include/numpy/core/src/common/simd/vec/vec.h | 5 + .../numpy/include/numpy/core/src/common/utils.hpp | 5 + .../include/numpy/core/src/multiarray/_datetime.h | 5 + .../numpy/core/src/multiarray/argfunc.dispatch.h | 5 + .../numpy/core/src/multiarray/arrayobject.h | 5 + .../include/numpy/core/src/multiarray/buffer.h | 5 + .../numpy/core/src/multiarray/calculation.h | 5 + .../numpy/core/src/multiarray/compiled_base.h | 5 + .../include/numpy/core/src/multiarray/convert.h | 5 + .../numpy/core/src/multiarray/datetime_busday.h | 5 + .../numpy/core/src/multiarray/datetime_busdaycal.h | 5 + .../numpy/core/src/multiarray/datetime_strings.h | 5 + .../include/numpy/core/src/multiarray/descriptor.h | 5 + .../include/numpy/core/src/multiarray/dragon4.h | 5 + .../numpy/core/src/multiarray/dtype_traversal.h | 5 + .../numpy/core/src/multiarray/einsum_debug.h | 5 + .../numpy/core/src/multiarray/einsum_sumprod.h | 5 + .../src/multiarray/experimental_public_dtype_api.h | 5 + .../include/numpy/core/src/multiarray/getset.h | 5 + .../include/numpy/core/src/multiarray/hashdescr.h | 5 + .../numpy/core/src/multiarray/item_selection.h | 5 + .../include/numpy/core/src/multiarray/iterators.h | 5 + .../src/multiarray/legacy_dtype_implementation.h | 5 + .../include/numpy/core/src/multiarray/mapping.h | 5 + .../include/numpy/core/src/multiarray/methods.h | 5 + .../numpy/core/src/multiarray/multiarraymodule.h | 5 + .../numpy/core/src/multiarray/nditer_pywrap.h | 5 + .../include/numpy/core/src/multiarray/npy_buffer.h | 5 + .../include/numpy/core/src/multiarray/refcount.h | 5 + .../include/numpy/core/src/multiarray/sequence.h | 5 + .../include/numpy/core/src/multiarray/strfuncs.h | 5 + .../include/numpy/core/src/multiarray/temp_elide.h | 5 + .../core/src/multiarray/textreading/readtext.h | 5 + .../include/numpy/core/src/multiarray/typeinfo.h | 5 + .../include/numpy/core/src/multiarray/usertypes.h | 5 + .../numpy/include/numpy/core/src/multiarray/vdot.h | 5 + .../numpy/core/src/npymath/npy_math_common.h | 5 + .../numpy/core/src/npymath/npy_math_private.h | 5 + .../numpy/core/src/npysort/npysort_common.h | 5 + .../numpy/core/src/npysort/npysort_heapsort.h | 5 + .../numpy/core/src/npysort/simd_qsort.dispatch.h | 5 + .../include/numpy/core/src/npysort/simd_qsort.hpp | 5 + .../core/src/npysort/simd_qsort_16bit.dispatch.h | 5 + .../x86-simd-sort/benchmarks/bench-argsort.hpp | 5 + .../benchmarks/bench-partial-qsort.hpp | 5 + .../x86-simd-sort/benchmarks/bench-qselect.hpp | 5 + .../x86-simd-sort/benchmarks/bench-qsort-common.h | 5 + .../x86-simd-sort/benchmarks/bench-qsort.hpp | 5 + .../x86-simd-sort/src/avx512-16bit-common.h | 5 + .../x86-simd-sort/src/avx512-16bit-qsort.hpp | 5 + .../x86-simd-sort/src/avx512-32bit-qsort.hpp | 5 + .../x86-simd-sort/src/avx512-64bit-argsort.hpp | 5 + .../x86-simd-sort/src/avx512-64bit-common.h | 5 + .../src/avx512-64bit-keyvalue-networks.hpp | 5 + .../src/avx512-64bit-keyvaluesort.hpp | 5 + .../x86-simd-sort/src/avx512-64bit-qsort.hpp | 5 + .../x86-simd-sort/src/avx512-common-argsort.h | 5 + .../x86-simd-sort/src/avx512-common-qsort.h | 5 + .../x86-simd-sort/src/avx512fp16-16bit-qsort.hpp | 5 + .../x86-simd-sort/tests/test-partial-qsort.hpp | 5 + .../npysort/x86-simd-sort/tests/test-qselect.hpp | 5 + .../x86-simd-sort/tests/test-qsort-common.h | 5 + .../npysort/x86-simd-sort/tests/test-qsort-fp.hpp | 5 + .../src/npysort/x86-simd-sort/tests/test-qsort.hpp | 5 + .../core/src/npysort/x86-simd-sort/utils/cpuinfo.h | 5 + .../src/npysort/x86-simd-sort/utils/rand_array.h | 5 + .../numpy/core/src/umath/_umath_tests.dispatch.h | 5 + .../numpy/include/numpy/core/src/umath/cpuid.h | 5 + .../include/numpy/core/src/umath/dispatching.h | 5 + .../numpy/include/numpy/core/src/umath/extobj.h | 5 + .../numpy/core/src/umath/fast_loop_macros.h | 5 + .../numpy/core/src/umath/legacy_array_method.h | 5 + .../numpy/core/src/umath/loops_modulo.dispatch.c | 5 + .../include/numpy/core/src/umath/loops_utils.h | 5 + .../include/numpy/core/src/umath/npy_simd_data.h | 5 + .../numpy/include/numpy/core/src/umath/override.h | 5 + .../numpy/include/numpy/core/src/umath/reduction.h | 5 + .../include/numpy/core/src/umath/string_ufuncs.h | 5 + .../include/npy_cpu_dispatch_config.aarch64.h | 5 + .../include/npy_cpu_dispatch_config.x86_64.h | 5 + contrib/python/numpy/include/numpy/fft/fftpack.h | 5 + .../numpy/include/numpy/linalg/lapack_lite/f2c.h | 5 + .../numpy/linalg/lapack_lite/lapack_lite_names.h | 5 + .../numpy/random/src/distributions/logfactorial.h | 5 + .../random/src/distributions/ziggurat_constants.h | 5 + .../numpy/random/src/mt19937/mt19937-jump.h | 5 + .../include/numpy/random/src/mt19937/randomkit.h | 5 + .../include/numpy/random/src/pcg64/pcg64.orig.h | 5 + .../numpy/random/src/splitmix64/splitmix64.h | 5 + contrib/python/numpy/include/ya.make | 19 + contrib/python/numpy/include_gen.py | 65 + contrib/python/numpy/py2/.dist-info/METADATA | 56 + .../python/numpy/py2/.dist-info/entry_points.txt | 5 + contrib/python/numpy/py2/.dist-info/top_level.txt | 1 + .../numpy/py2/numpy/_build_utils/__init__.py | 1 + .../py2/numpy/_build_utils/apple_accelerate.py | 28 + .../py2/numpy/_build_utils/src/apple_sgemv_fix.c | 253 + contrib/python/numpy/py2/numpy/compat/setup.py | 12 + .../numpy/py2/numpy/compat/tests/__init__.py | 0 .../numpy/py2/numpy/compat/tests/test_compat.py | 26 + contrib/python/numpy/py2/numpy/conftest.py | 67 + .../py2/numpy/core/code_generators/__init__.py | 1 + .../numpy/py2/numpy/core/code_generators/genapi.py | 510 + .../core/code_generators/generate_ufunc_api.py | 211 + .../numpy/core/code_generators/generate_umath.py | 1145 + .../py2/numpy/core/code_generators/numpy_api.py | 423 + .../numpy/core/code_generators/ufunc_docstrings.py | 3930 ++ .../py2/numpy/core/include/numpy/oldnumeric.h | 25 + .../py2/numpy/core/lib/npy-pkg-config/mlib.ini | 12 + .../py2/numpy/core/lib/npy-pkg-config/npymath.ini | 20 + contrib/python/numpy/py2/numpy/core/mlib.ini.in | 12 + contrib/python/numpy/py2/numpy/core/npymath.ini.in | 20 + .../py2/numpy/core/src/common/npy_binsearch.h.src | 144 + .../py2/numpy/core/src/common/npy_partition.h.src | 129 + .../numpy/py2/numpy/core/src/common/npy_sort.h.src | 83 + .../py2/numpy/core/src/common/python_xerbla.c | 51 + .../py2/numpy/core/src/common/templ_common.h.src | 46 + .../core/src/multiarray/_multiarray_tests.c.src | 2103 + .../py2/numpy/core/src/multiarray/arraytypes.c.src | 4921 +++ .../py2/numpy/core/src/multiarray/einsum.c.src | 2890 ++ .../src/multiarray/lowlevel_strided_loops.c.src | 1785 + .../numpy/core/src/multiarray/nditer_templ.c.src | 615 + .../numpy/py2/numpy/core/src/multiarray/refcount.h | 19 + .../numpy/core/src/multiarray/scalartypes.c.src | 4496 ++ .../numpy/py2/numpy/core/src/npymath/ieee754.c.src | 841 + .../numpy/core/src/npymath/npy_math_complex.c.src | 1811 + .../numpy/core/src/npymath/npy_math_internal.h.src | 718 + .../py2/numpy/core/src/npysort/binsearch.c.src | 250 + .../py2/numpy/core/src/npysort/heapsort.c.src | 402 + .../py2/numpy/core/src/npysort/mergesort.c.src | 511 + .../py2/numpy/core/src/npysort/quicksort.c.src | 634 + .../py2/numpy/core/src/npysort/selection.c.src | 418 + .../numpy/core/src/umath/_operand_flag_tests.c.src | 105 + .../py2/numpy/core/src/umath/_rational_tests.c.src | 1409 + .../numpy/core/src/umath/_struct_ufunc_tests.c.src | 125 + .../py2/numpy/core/src/umath/_umath_tests.c.src | 643 + .../numpy/py2/numpy/core/src/umath/funcs.inc.src | 432 + .../numpy/py2/numpy/core/src/umath/loops.c.src | 2988 ++ .../numpy/py2/numpy/core/src/umath/loops.h.src | 525 + .../numpy/py2/numpy/core/src/umath/matmul.c.src | 503 + .../numpy/py2/numpy/core/src/umath/matmul.h.src | 12 + .../py2/numpy/core/src/umath/scalarmath.c.src | 1704 + .../numpy/py2/numpy/core/src/umath/simd.inc.src | 1219 + .../python/numpy/py2/numpy/core/tests/__init__.py | 0 .../python/numpy/py2/numpy/core/tests/_locales.py | 76 + .../py2/numpy/core/tests/data/astype_copy.pkl | Bin 0 -> 716 bytes .../numpy/core/tests/data/recarray_from_file.fits | Bin 0 -> 8640 bytes .../python/numpy/py2/numpy/core/tests/test_abc.py | 56 + .../python/numpy/py2/numpy/core/tests/test_api.py | 516 + .../numpy/py2/numpy/core/tests/test_arrayprint.py | 893 + .../numpy/py2/numpy/core/tests/test_datetime.py | 2228 + .../py2/numpy/core/tests/test_defchararray.py | 692 + .../py2/numpy/core/tests/test_deprecations.py | 535 + .../numpy/py2/numpy/core/tests/test_dtype.py | 1138 + .../numpy/py2/numpy/core/tests/test_einsum.py | 1014 + .../numpy/py2/numpy/core/tests/test_errstate.py | 41 + .../numpy/py2/numpy/core/tests/test_extint128.py | 221 + .../py2/numpy/core/tests/test_function_base.py | 370 + .../numpy/py2/numpy/core/tests/test_getlimits.py | 123 + .../python/numpy/py2/numpy/core/tests/test_half.py | 518 + .../numpy/py2/numpy/core/tests/test_indexerrors.py | 123 + .../numpy/py2/numpy/core/tests/test_indexing.py | 1334 + .../py2/numpy/core/tests/test_item_selection.py | 87 + .../numpy/py2/numpy/core/tests/test_longdouble.py | 233 + .../numpy/py2/numpy/core/tests/test_machar.py | 32 + .../numpy/py2/numpy/core/tests/test_mem_overlap.py | 950 + .../numpy/py2/numpy/core/tests/test_memmap.py | 206 + .../numpy/py2/numpy/core/tests/test_multiarray.py | 8325 ++++ .../numpy/py2/numpy/core/tests/test_nditer.py | 2861 ++ .../numpy/py2/numpy/core/tests/test_numeric.py | 2797 ++ .../py2/numpy/core/tests/test_numerictypes.py | 500 + .../numpy/py2/numpy/core/tests/test_overrides.py | 392 + .../numpy/py2/numpy/core/tests/test_print.py | 205 + .../numpy/py2/numpy/core/tests/test_records.py | 499 + .../numpy/py2/numpy/core/tests/test_regression.py | 2487 ++ .../py2/numpy/core/tests/test_scalar_ctors.py | 65 + .../py2/numpy/core/tests/test_scalarbuffer.py | 105 + .../py2/numpy/core/tests/test_scalarinherit.py | 75 + .../numpy/py2/numpy/core/tests/test_scalarmath.py | 666 + .../numpy/py2/numpy/core/tests/test_scalarprint.py | 326 + .../numpy/py2/numpy/core/tests/test_shape_base.py | 706 + .../numpy/py2/numpy/core/tests/test_ufunc.py | 1859 + .../numpy/py2/numpy/core/tests/test_umath.py | 2920 ++ .../py2/numpy/core/tests/test_umath_complex.py | 543 + .../numpy/py2/numpy/core/tests/test_unicode.py | 396 + .../numpy/distutils/mingw/gfortran_vs2003_hack.c | 6 + contrib/python/numpy/py2/numpy/distutils/setup.py | 17 + .../numpy/py2/numpy/distutils/tests/__init__.py | 0 .../py2/numpy/distutils/tests/test_exec_command.py | 215 + .../py2/numpy/distutils/tests/test_fcompiler.py | 81 + .../numpy/distutils/tests/test_fcompiler_gnu.py | 57 + .../numpy/distutils/tests/test_fcompiler_intel.py | 32 + .../numpy/distutils/tests/test_fcompiler_nagfor.py | 24 + .../numpy/distutils/tests/test_from_template.py | 44 + .../py2/numpy/distutils/tests/test_misc_util.py | 84 + .../numpy/distutils/tests/test_npy_pkg_config.py | 86 + .../py2/numpy/distutils/tests/test_shell_utils.py | 79 + .../py2/numpy/distutils/tests/test_system_info.py | 237 + contrib/python/numpy/py2/numpy/f2py/setup.cfg | 3 + contrib/python/numpy/py2/numpy/f2py/setup.py | 73 + .../python/numpy/py2/numpy/f2py/src/test/Makefile | 96 + contrib/python/numpy/py2/numpy/f2py/src/test/bar.f | 11 + contrib/python/numpy/py2/numpy/f2py/src/test/foo.f | 11 + .../python/numpy/py2/numpy/f2py/src/test/foo90.f90 | 13 + .../numpy/py2/numpy/f2py/src/test/foomodule.c | 142 + .../python/numpy/py2/numpy/f2py/src/test/wrap.f | 70 + .../python/numpy/py2/numpy/f2py/tests/__init__.py | 0 .../f2py/tests/src/array_from_pyobj/wrapmodule.c | 224 + .../f2py/tests/src/assumed_shape/.f2py_f2cmap | 1 + .../f2py/tests/src/assumed_shape/foo_free.f90 | 34 + .../numpy/f2py/tests/src/assumed_shape/foo_mod.f90 | 41 + .../numpy/f2py/tests/src/assumed_shape/foo_use.f90 | 19 + .../f2py/tests/src/assumed_shape/precision.f90 | 4 + .../numpy/py2/numpy/f2py/tests/src/common/block.f | 11 + .../numpy/py2/numpy/f2py/tests/src/kind/foo.f90 | 20 + .../numpy/py2/numpy/f2py/tests/src/mixed/foo.f | 5 + .../py2/numpy/f2py/tests/src/mixed/foo_fixed.f90 | 8 + .../py2/numpy/f2py/tests/src/mixed/foo_free.f90 | 8 + .../f2py/tests/src/parameter/constant_both.f90 | 57 + .../f2py/tests/src/parameter/constant_compound.f90 | 15 + .../f2py/tests/src/parameter/constant_integer.f90 | 22 + .../tests/src/parameter/constant_non_compound.f90 | 23 + .../f2py/tests/src/parameter/constant_real.f90 | 23 + .../py2/numpy/f2py/tests/src/regression/inout.f90 | 9 + .../numpy/py2/numpy/f2py/tests/src/size/foo.f90 | 44 + .../numpy/py2/numpy/f2py/tests/src/string/char.f90 | 29 + .../py2/numpy/f2py/tests/test_array_from_pyobj.py | 581 + .../py2/numpy/f2py/tests/test_assumed_shape.py | 33 + .../py2/numpy/f2py/tests/test_block_docstring.py | 24 + .../numpy/py2/numpy/f2py/tests/test_callback.py | 165 + .../numpy/py2/numpy/f2py/tests/test_common.py | 27 + .../py2/numpy/f2py/tests/test_compile_function.py | 125 + .../python/numpy/py2/numpy/f2py/tests/test_kind.py | 34 + .../numpy/py2/numpy/f2py/tests/test_mixed.py | 38 + .../numpy/py2/numpy/f2py/tests/test_parameter.py | 118 + .../py2/numpy/f2py/tests/test_quoted_character.py | 35 + .../numpy/py2/numpy/f2py/tests/test_regression.py | 29 + .../py2/numpy/f2py/tests/test_return_character.py | 146 + .../py2/numpy/f2py/tests/test_return_complex.py | 169 + .../py2/numpy/f2py/tests/test_return_integer.py | 181 + .../py2/numpy/f2py/tests/test_return_logical.py | 189 + .../numpy/py2/numpy/f2py/tests/test_return_real.py | 210 + .../py2/numpy/f2py/tests/test_semicolon_split.py | 65 + .../python/numpy/py2/numpy/f2py/tests/test_size.py | 51 + .../numpy/py2/numpy/f2py/tests/test_string.py | 24 + contrib/python/numpy/py2/numpy/f2py/tests/util.py | 360 + contrib/python/numpy/py2/numpy/fft/setup.py | 19 + .../python/numpy/py2/numpy/fft/tests/__init__.py | 0 .../numpy/py2/numpy/fft/tests/test_fftpack.py | 185 + .../numpy/py2/numpy/fft/tests/test_helper.py | 248 + contrib/python/numpy/py2/numpy/lib/setup.py | 12 + .../python/numpy/py2/numpy/lib/tests/__init__.py | 0 .../numpy/py2/numpy/lib/tests/data/py2-objarr.npy | Bin 0 -> 258 bytes .../numpy/py2/numpy/lib/tests/data/py2-objarr.npz | Bin 0 -> 366 bytes .../numpy/py2/numpy/lib/tests/data/py3-objarr.npy | Bin 0 -> 341 bytes .../numpy/py2/numpy/lib/tests/data/py3-objarr.npz | Bin 0 -> 449 bytes .../numpy/py2/numpy/lib/tests/data/python3.npy | Bin 0 -> 96 bytes .../py2/numpy/lib/tests/data/win64python2.npy | Bin 0 -> 96 bytes .../numpy/py2/numpy/lib/tests/test__datasource.py | 378 + .../numpy/py2/numpy/lib/tests/test__iotools.py | 352 + .../numpy/py2/numpy/lib/tests/test__version.py | 66 + .../numpy/py2/numpy/lib/tests/test_arraypad.py | 1286 + .../numpy/py2/numpy/lib/tests/test_arraysetops.py | 623 + .../numpy/py2/numpy/lib/tests/test_arrayterator.py | 48 + .../numpy/py2/numpy/lib/tests/test_financial.py | 340 + .../numpy/py2/numpy/lib/tests/test_format.py | 940 + .../py2/numpy/lib/tests/test_function_base.py | 3141 ++ .../numpy/py2/numpy/lib/tests/test_histograms.py | 844 + .../numpy/py2/numpy/lib/tests/test_index_tricks.py | 454 + .../python/numpy/py2/numpy/lib/tests/test_io.py | 2518 ++ .../numpy/py2/numpy/lib/tests/test_mixins.py | 224 + .../numpy/py2/numpy/lib/tests/test_nanfunctions.py | 927 + .../numpy/py2/numpy/lib/tests/test_packbits.py | 268 + .../numpy/py2/numpy/lib/tests/test_polynomial.py | 261 + .../numpy/py2/numpy/lib/tests/test_recfunctions.py | 980 + .../numpy/py2/numpy/lib/tests/test_regression.py | 254 + .../numpy/py2/numpy/lib/tests/test_shape_base.py | 708 + .../py2/numpy/lib/tests/test_stride_tricks.py | 445 + .../numpy/py2/numpy/lib/tests/test_twodim_base.py | 534 + .../numpy/py2/numpy/lib/tests/test_type_check.py | 442 + .../numpy/py2/numpy/lib/tests/test_ufunclike.py | 106 + .../python/numpy/py2/numpy/lib/tests/test_utils.py | 91 + .../py2/numpy/linalg/lapack_lite/clapack_scrub.py | 310 + .../numpy/py2/numpy/linalg/lapack_lite/f2c.c | 764 + .../numpy/py2/numpy/linalg/lapack_lite/f2c.h | 388 + .../numpy/py2/numpy/linalg/lapack_lite/f2c_blas.c | 21615 ++++++++++ .../py2/numpy/linalg/lapack_lite/f2c_c_lapack.c | 29861 +++++++++++++ .../numpy/linalg/lapack_lite/f2c_c_lapack.f.patch | 32 + .../py2/numpy/linalg/lapack_lite/f2c_config.c | 2068 + .../numpy/linalg/lapack_lite/f2c_config.c.patch | 18 + .../py2/numpy/linalg/lapack_lite/f2c_d_lapack.c | 41864 +++++++++++++++++++ .../numpy/linalg/lapack_lite/f2c_d_lapack.f.patch | 32 + .../py2/numpy/linalg/lapack_lite/f2c_lapack.c | 1651 + .../numpy/linalg/lapack_lite/f2c_lapack.f.patch | 48 + .../py2/numpy/linalg/lapack_lite/f2c_s_lapack.c | 41691 ++++++++++++++++++ .../numpy/linalg/lapack_lite/f2c_s_lapack.f.patch | 32 + .../py2/numpy/linalg/lapack_lite/f2c_z_lapack.c | 29996 +++++++++++++ .../numpy/linalg/lapack_lite/f2c_z_lapack.f.patch | 32 + .../numpy/py2/numpy/linalg/lapack_lite/fortran.py | 124 + .../py2/numpy/linalg/lapack_lite/make_lite.py | 343 + .../py2/numpy/linalg/lapack_lite/python_xerbla.c | 48 + .../py2/numpy/linalg/lapack_lite/wrapped_routines | 51 + contrib/python/numpy/py2/numpy/linalg/setup.py | 60 + .../numpy/py2/numpy/linalg/tests/__init__.py | 0 .../numpy/py2/numpy/linalg/tests/test_build.py | 55 + .../py2/numpy/linalg/tests/test_deprecations.py | 22 + .../numpy/py2/numpy/linalg/tests/test_linalg.py | 1964 + .../py2/numpy/linalg/tests/test_regression.py | 150 + .../numpy/py2/numpy/linalg/umath_linalg.c.src | 3688 ++ contrib/python/numpy/py2/numpy/ma/setup.py | 13 + .../python/numpy/py2/numpy/ma/tests/__init__.py | 0 .../python/numpy/py2/numpy/ma/tests/test_core.py | 5205 +++ .../numpy/py2/numpy/ma/tests/test_deprecations.py | 70 + .../python/numpy/py2/numpy/ma/tests/test_extras.py | 1678 + .../numpy/py2/numpy/ma/tests/test_mrecords.py | 495 + .../python/numpy/py2/numpy/ma/tests/test_old_ma.py | 860 + .../numpy/py2/numpy/ma/tests/test_regression.py | 89 + .../numpy/py2/numpy/ma/tests/test_subclassing.py | 351 + contrib/python/numpy/py2/numpy/matrixlib/setup.py | 13 + .../numpy/py2/numpy/matrixlib/tests/__init__.py | 0 .../py2/numpy/matrixlib/tests/test_defmatrix.py | 460 + .../py2/numpy/matrixlib/tests/test_interaction.py | 363 + .../numpy/matrixlib/tests/test_masked_matrix.py | 231 + .../numpy/matrixlib/tests/test_matrix_linalg.py | 95 + .../py2/numpy/matrixlib/tests/test_multiarray.py | 18 + .../py2/numpy/matrixlib/tests/test_numeric.py | 19 + .../py2/numpy/matrixlib/tests/test_regression.py | 33 + contrib/python/numpy/py2/numpy/polynomial/setup.py | 11 + .../numpy/py2/numpy/polynomial/tests/__init__.py | 0 .../py2/numpy/polynomial/tests/test_chebyshev.py | 621 + .../py2/numpy/polynomial/tests/test_classes.py | 642 + .../py2/numpy/polynomial/tests/test_hermite.py | 557 + .../py2/numpy/polynomial/tests/test_hermite_e.py | 558 + .../py2/numpy/polynomial/tests/test_laguerre.py | 539 + .../py2/numpy/polynomial/tests/test_legendre.py | 558 + .../py2/numpy/polynomial/tests/test_polynomial.py | 578 + .../py2/numpy/polynomial/tests/test_polyutils.py | 108 + .../py2/numpy/polynomial/tests/test_printing.py | 68 + .../py2/numpy/random/mtrand/generate_mtrand_c.py | 42 + .../py2/numpy/random/mtrand/randint_helpers.pxi.in | 77 + contrib/python/numpy/py2/numpy/random/setup.py | 63 + .../numpy/py2/numpy/random/tests/__init__.py | 0 .../numpy/py2/numpy/random/tests/test_random.py | 1663 + .../py2/numpy/random/tests/test_regression.py | 157 + contrib/python/numpy/py2/numpy/setup.py | 28 + contrib/python/numpy/py2/numpy/testing/setup.py | 21 + .../numpy/py2/numpy/testing/tests/__init__.py | 0 .../py2/numpy/testing/tests/test_decorators.py | 216 + .../py2/numpy/testing/tests/test_doctesting.py | 59 + .../numpy/py2/numpy/testing/tests/test_utils.py | 1597 + contrib/python/numpy/py2/numpy/tests/__init__.py | 0 .../python/numpy/py2/numpy/tests/test_ctypeslib.py | 367 + .../python/numpy/py2/numpy/tests/test_matlib.py | 68 + .../numpy/py2/numpy/tests/test_numpy_version.py | 19 + .../numpy/py2/numpy/tests/test_public_api.py | 89 + .../python/numpy/py2/numpy/tests/test_reloading.py | 38 + .../python/numpy/py2/numpy/tests/test_scripts.py | 49 + .../python/numpy/py2/numpy/tests/test_warnings.py | 78 + .../numpy/py3/patches/01-fix-include-simd.h.patch | 105 + contrib/python/numpy/py3/patches/02-fix-doc.patch | 15 + contrib/python/numpy/py3/patches/03-fix-name.patch | 22 + .../python/numpy/py3/patches/04-fix-tests.patch | 281 + .../numpy/py3/patches/05-fix-win-build.patch | 38 + .../python/numpy/py3/patches/06-fix-config.h.patch | 19 + .../numpy/py3/patches/09-hack-for-matplotlib.patch | 12 + contrib/python/numpy/py3/patches/12-arrch64.patch | 327 + .../python/numpy/py3/patches/14-libunwind.h.patch | 22 + .../numpy/py3/patches/15-support-python-3.12.patch | 11 + 456 files changed, 295271 insertions(+) create mode 100644 contrib/python/numpy/include/numpy/core/feature_detection_cmath.h create mode 100644 contrib/python/numpy/include/numpy/core/feature_detection_locale.h create mode 100644 contrib/python/numpy/include/numpy/core/feature_detection_math.h create mode 100644 contrib/python/numpy/include/numpy/core/feature_detection_stdio.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/__multiarray_api.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/__ufunc_api.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/_neighborhood_iterator_imp.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-linux.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-osx-arm64.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-osx-x86_64.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-win.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.darwin.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.linux.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.windows.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/_umath_doc_generated.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/config-linux-aarch64.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/config-linux-ppc64.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/config-linux-x86_64.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/config-osx-arm64.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/config-osx-x86_64.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/config-win-x86_64.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/config.darwin.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/config.linux.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/config.windows.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/npy_1_7_deprecated_api.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/npy_interrupt.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/old_defines.h create mode 100644 contrib/python/numpy/include/numpy/core/include/numpy/oldnumeric.h create mode 100644 contrib/python/numpy/include/numpy/core/src/_simd/_simd.dispatch.h create mode 100644 contrib/python/numpy/include/numpy/core/src/_simd/_simd.h create mode 100644 contrib/python/numpy/include/numpy/core/src/_simd/_simd_inc.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/float_status.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/common/half.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/common/half_private.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/common/meta.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/common/npdef.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/common/npstd.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/common/npy_cblas_base.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/npy_cpuinfo_parser.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx2/arithmetic.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx2/avx2.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx2/conversion.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx2/math.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx2/memory.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx2/misc.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx2/operators.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx2/reorder.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx2/utils.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx512/arithmetic.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx512/avx512.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx512/conversion.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx512/maskop.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx512/math.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx512/memory.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx512/misc.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx512/operators.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx512/reorder.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/avx512/utils.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/emulate_maskop.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/intdiv.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/neon/arithmetic.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/neon/conversion.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/neon/math.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/neon/memory.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/neon/misc.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/neon/neon.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/neon/operators.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/neon/reorder.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/simd.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/simd_utils.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/sse/arithmetic.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/sse/conversion.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/sse/math.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/sse/memory.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/sse/misc.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/sse/operators.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/sse/reorder.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/sse/sse.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/sse/utils.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/vec/arithmetic.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/vec/conversion.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/vec/math.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/vec/memory.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/vec/misc.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/vec/operators.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/vec/reorder.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/vec/utils.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/simd/vec/vec.h create mode 100644 contrib/python/numpy/include/numpy/core/src/common/utils.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/_datetime.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/argfunc.dispatch.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/arrayobject.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/buffer.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/calculation.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/compiled_base.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/convert.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/datetime_busday.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/datetime_busdaycal.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/datetime_strings.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/descriptor.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/dragon4.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/dtype_traversal.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/einsum_debug.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/einsum_sumprod.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/experimental_public_dtype_api.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/getset.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/hashdescr.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/item_selection.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/iterators.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/legacy_dtype_implementation.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/mapping.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/methods.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/multiarraymodule.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/nditer_pywrap.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/npy_buffer.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/refcount.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/sequence.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/strfuncs.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/temp_elide.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/textreading/readtext.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/typeinfo.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/usertypes.h create mode 100644 contrib/python/numpy/include/numpy/core/src/multiarray/vdot.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npymath/npy_math_common.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npymath/npy_math_private.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/npysort_common.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/npysort_heapsort.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort.dispatch.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort_16bit.dispatch.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-argsort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-partial-qsort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qselect.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort-common.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-common.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-argsort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-common.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvalue-networks.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvaluesort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-argsort.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512fp16-16bit-qsort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-partial-qsort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qselect.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-common.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-fp.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort.hpp create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/utils/cpuinfo.h create mode 100644 contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/utils/rand_array.h create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/_umath_tests.dispatch.h create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/cpuid.h create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/dispatching.h create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/extobj.h create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/fast_loop_macros.h create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/legacy_array_method.h create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/loops_modulo.dispatch.c create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/loops_utils.h create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/npy_simd_data.h create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/override.h create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/reduction.h create mode 100644 contrib/python/numpy/include/numpy/core/src/umath/string_ufuncs.h create mode 100644 contrib/python/numpy/include/numpy/distutils/include/npy_cpu_dispatch_config.aarch64.h create mode 100644 contrib/python/numpy/include/numpy/distutils/include/npy_cpu_dispatch_config.x86_64.h create mode 100644 contrib/python/numpy/include/numpy/fft/fftpack.h create mode 100644 contrib/python/numpy/include/numpy/linalg/lapack_lite/f2c.h create mode 100644 contrib/python/numpy/include/numpy/linalg/lapack_lite/lapack_lite_names.h create mode 100644 contrib/python/numpy/include/numpy/random/src/distributions/logfactorial.h create mode 100644 contrib/python/numpy/include/numpy/random/src/distributions/ziggurat_constants.h create mode 100644 contrib/python/numpy/include/numpy/random/src/mt19937/mt19937-jump.h create mode 100644 contrib/python/numpy/include/numpy/random/src/mt19937/randomkit.h create mode 100644 contrib/python/numpy/include/numpy/random/src/pcg64/pcg64.orig.h create mode 100644 contrib/python/numpy/include/numpy/random/src/splitmix64/splitmix64.h create mode 100644 contrib/python/numpy/include/ya.make create mode 100755 contrib/python/numpy/include_gen.py create mode 100644 contrib/python/numpy/py2/.dist-info/METADATA create mode 100644 contrib/python/numpy/py2/.dist-info/entry_points.txt create mode 100644 contrib/python/numpy/py2/.dist-info/top_level.txt create mode 100644 contrib/python/numpy/py2/numpy/_build_utils/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/_build_utils/apple_accelerate.py create mode 100644 contrib/python/numpy/py2/numpy/_build_utils/src/apple_sgemv_fix.c create mode 100644 contrib/python/numpy/py2/numpy/compat/setup.py create mode 100644 contrib/python/numpy/py2/numpy/compat/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/compat/tests/test_compat.py create mode 100644 contrib/python/numpy/py2/numpy/conftest.py create mode 100644 contrib/python/numpy/py2/numpy/core/code_generators/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/core/code_generators/genapi.py create mode 100644 contrib/python/numpy/py2/numpy/core/code_generators/generate_ufunc_api.py create mode 100644 contrib/python/numpy/py2/numpy/core/code_generators/generate_umath.py create mode 100644 contrib/python/numpy/py2/numpy/core/code_generators/numpy_api.py create mode 100644 contrib/python/numpy/py2/numpy/core/code_generators/ufunc_docstrings.py create mode 100644 contrib/python/numpy/py2/numpy/core/include/numpy/oldnumeric.h create mode 100644 contrib/python/numpy/py2/numpy/core/lib/npy-pkg-config/mlib.ini create mode 100644 contrib/python/numpy/py2/numpy/core/lib/npy-pkg-config/npymath.ini create mode 100644 contrib/python/numpy/py2/numpy/core/mlib.ini.in create mode 100644 contrib/python/numpy/py2/numpy/core/npymath.ini.in create mode 100644 contrib/python/numpy/py2/numpy/core/src/common/npy_binsearch.h.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/common/npy_partition.h.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/common/npy_sort.h.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/common/python_xerbla.c create mode 100644 contrib/python/numpy/py2/numpy/core/src/common/templ_common.h.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/multiarray/_multiarray_tests.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/multiarray/arraytypes.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/multiarray/einsum.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/multiarray/lowlevel_strided_loops.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/multiarray/nditer_templ.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/multiarray/refcount.h create mode 100644 contrib/python/numpy/py2/numpy/core/src/multiarray/scalartypes.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/npymath/ieee754.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/npymath/npy_math_complex.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/npymath/npy_math_internal.h.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/npysort/binsearch.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/npysort/heapsort.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/npysort/mergesort.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/npysort/quicksort.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/npysort/selection.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/umath/_operand_flag_tests.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/umath/_rational_tests.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/umath/_struct_ufunc_tests.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/umath/_umath_tests.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/umath/funcs.inc.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/umath/loops.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/umath/loops.h.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/umath/matmul.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/umath/matmul.h.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/umath/scalarmath.c.src create mode 100644 contrib/python/numpy/py2/numpy/core/src/umath/simd.inc.src create mode 100644 contrib/python/numpy/py2/numpy/core/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/_locales.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/data/astype_copy.pkl create mode 100644 contrib/python/numpy/py2/numpy/core/tests/data/recarray_from_file.fits create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_abc.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_api.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_arrayprint.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_datetime.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_defchararray.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_deprecations.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_dtype.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_einsum.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_errstate.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_extint128.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_function_base.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_getlimits.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_half.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_indexerrors.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_indexing.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_item_selection.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_longdouble.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_machar.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_mem_overlap.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_memmap.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_multiarray.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_nditer.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_numeric.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_numerictypes.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_overrides.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_print.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_records.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_regression.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_scalar_ctors.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_scalarbuffer.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_scalarinherit.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_scalarmath.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_scalarprint.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_shape_base.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_ufunc.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_umath.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_umath_complex.py create mode 100644 contrib/python/numpy/py2/numpy/core/tests/test_unicode.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/mingw/gfortran_vs2003_hack.c create mode 100644 contrib/python/numpy/py2/numpy/distutils/setup.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/tests/test_exec_command.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_gnu.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_intel.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_nagfor.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/tests/test_from_template.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/tests/test_misc_util.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/tests/test_npy_pkg_config.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/tests/test_shell_utils.py create mode 100644 contrib/python/numpy/py2/numpy/distutils/tests/test_system_info.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/setup.cfg create mode 100644 contrib/python/numpy/py2/numpy/f2py/setup.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/src/test/Makefile create mode 100644 contrib/python/numpy/py2/numpy/f2py/src/test/bar.f create mode 100644 contrib/python/numpy/py2/numpy/f2py/src/test/foo.f create mode 100644 contrib/python/numpy/py2/numpy/f2py/src/test/foo90.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/src/test/foomodule.c create mode 100644 contrib/python/numpy/py2/numpy/f2py/src/test/wrap.f create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_free.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_mod.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_use.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/precision.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/common/block.f create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/kind/foo.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo.f create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo_fixed.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo_free.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_both.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_compound.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_integer.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_non_compound.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_real.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/regression/inout.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/size/foo.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/src/string/char.f90 create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_array_from_pyobj.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_assumed_shape.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_block_docstring.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_callback.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_common.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_compile_function.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_kind.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_mixed.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_parameter.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_quoted_character.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_regression.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_return_character.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_return_complex.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_return_integer.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_return_logical.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_return_real.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_semicolon_split.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_size.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/test_string.py create mode 100644 contrib/python/numpy/py2/numpy/f2py/tests/util.py create mode 100644 contrib/python/numpy/py2/numpy/fft/setup.py create mode 100644 contrib/python/numpy/py2/numpy/fft/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/fft/tests/test_fftpack.py create mode 100644 contrib/python/numpy/py2/numpy/fft/tests/test_helper.py create mode 100644 contrib/python/numpy/py2/numpy/lib/setup.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/data/py2-objarr.npy create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/data/py2-objarr.npz create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/data/py3-objarr.npy create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/data/py3-objarr.npz create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/data/python3.npy create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/data/win64python2.npy create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test__datasource.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test__iotools.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test__version.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_arraypad.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_arraysetops.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_arrayterator.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_financial.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_format.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_function_base.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_histograms.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_index_tricks.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_io.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_mixins.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_nanfunctions.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_packbits.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_polynomial.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_recfunctions.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_regression.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_shape_base.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_stride_tricks.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_twodim_base.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_type_check.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_ufunclike.py create mode 100644 contrib/python/numpy/py2/numpy/lib/tests/test_utils.py create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/clapack_scrub.py create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c.c create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c.h create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_blas.c create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_c_lapack.c create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_c_lapack.f.patch create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_config.c create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_config.c.patch create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_d_lapack.c create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_d_lapack.f.patch create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_lapack.c create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_lapack.f.patch create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_s_lapack.c create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_s_lapack.f.patch create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_z_lapack.c create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_z_lapack.f.patch create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/fortran.py create mode 100755 contrib/python/numpy/py2/numpy/linalg/lapack_lite/make_lite.py create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/python_xerbla.c create mode 100644 contrib/python/numpy/py2/numpy/linalg/lapack_lite/wrapped_routines create mode 100644 contrib/python/numpy/py2/numpy/linalg/setup.py create mode 100644 contrib/python/numpy/py2/numpy/linalg/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/linalg/tests/test_build.py create mode 100644 contrib/python/numpy/py2/numpy/linalg/tests/test_deprecations.py create mode 100644 contrib/python/numpy/py2/numpy/linalg/tests/test_linalg.py create mode 100644 contrib/python/numpy/py2/numpy/linalg/tests/test_regression.py create mode 100644 contrib/python/numpy/py2/numpy/linalg/umath_linalg.c.src create mode 100644 contrib/python/numpy/py2/numpy/ma/setup.py create mode 100644 contrib/python/numpy/py2/numpy/ma/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/ma/tests/test_core.py create mode 100644 contrib/python/numpy/py2/numpy/ma/tests/test_deprecations.py create mode 100644 contrib/python/numpy/py2/numpy/ma/tests/test_extras.py create mode 100644 contrib/python/numpy/py2/numpy/ma/tests/test_mrecords.py create mode 100644 contrib/python/numpy/py2/numpy/ma/tests/test_old_ma.py create mode 100644 contrib/python/numpy/py2/numpy/ma/tests/test_regression.py create mode 100644 contrib/python/numpy/py2/numpy/ma/tests/test_subclassing.py create mode 100644 contrib/python/numpy/py2/numpy/matrixlib/setup.py create mode 100644 contrib/python/numpy/py2/numpy/matrixlib/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/matrixlib/tests/test_defmatrix.py create mode 100644 contrib/python/numpy/py2/numpy/matrixlib/tests/test_interaction.py create mode 100644 contrib/python/numpy/py2/numpy/matrixlib/tests/test_masked_matrix.py create mode 100644 contrib/python/numpy/py2/numpy/matrixlib/tests/test_matrix_linalg.py create mode 100644 contrib/python/numpy/py2/numpy/matrixlib/tests/test_multiarray.py create mode 100644 contrib/python/numpy/py2/numpy/matrixlib/tests/test_numeric.py create mode 100644 contrib/python/numpy/py2/numpy/matrixlib/tests/test_regression.py create mode 100644 contrib/python/numpy/py2/numpy/polynomial/setup.py create mode 100644 contrib/python/numpy/py2/numpy/polynomial/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/polynomial/tests/test_chebyshev.py create mode 100644 contrib/python/numpy/py2/numpy/polynomial/tests/test_classes.py create mode 100644 contrib/python/numpy/py2/numpy/polynomial/tests/test_hermite.py create mode 100644 contrib/python/numpy/py2/numpy/polynomial/tests/test_hermite_e.py create mode 100644 contrib/python/numpy/py2/numpy/polynomial/tests/test_laguerre.py create mode 100644 contrib/python/numpy/py2/numpy/polynomial/tests/test_legendre.py create mode 100644 contrib/python/numpy/py2/numpy/polynomial/tests/test_polynomial.py create mode 100644 contrib/python/numpy/py2/numpy/polynomial/tests/test_polyutils.py create mode 100644 contrib/python/numpy/py2/numpy/polynomial/tests/test_printing.py create mode 100644 contrib/python/numpy/py2/numpy/random/mtrand/generate_mtrand_c.py create mode 100644 contrib/python/numpy/py2/numpy/random/mtrand/randint_helpers.pxi.in create mode 100644 contrib/python/numpy/py2/numpy/random/setup.py create mode 100644 contrib/python/numpy/py2/numpy/random/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/random/tests/test_random.py create mode 100644 contrib/python/numpy/py2/numpy/random/tests/test_regression.py create mode 100644 contrib/python/numpy/py2/numpy/setup.py create mode 100755 contrib/python/numpy/py2/numpy/testing/setup.py create mode 100644 contrib/python/numpy/py2/numpy/testing/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/testing/tests/test_decorators.py create mode 100644 contrib/python/numpy/py2/numpy/testing/tests/test_doctesting.py create mode 100644 contrib/python/numpy/py2/numpy/testing/tests/test_utils.py create mode 100644 contrib/python/numpy/py2/numpy/tests/__init__.py create mode 100644 contrib/python/numpy/py2/numpy/tests/test_ctypeslib.py create mode 100644 contrib/python/numpy/py2/numpy/tests/test_matlib.py create mode 100644 contrib/python/numpy/py2/numpy/tests/test_numpy_version.py create mode 100644 contrib/python/numpy/py2/numpy/tests/test_public_api.py create mode 100644 contrib/python/numpy/py2/numpy/tests/test_reloading.py create mode 100644 contrib/python/numpy/py2/numpy/tests/test_scripts.py create mode 100644 contrib/python/numpy/py2/numpy/tests/test_warnings.py create mode 100644 contrib/python/numpy/py3/patches/01-fix-include-simd.h.patch create mode 100644 contrib/python/numpy/py3/patches/02-fix-doc.patch create mode 100644 contrib/python/numpy/py3/patches/03-fix-name.patch create mode 100644 contrib/python/numpy/py3/patches/04-fix-tests.patch create mode 100644 contrib/python/numpy/py3/patches/05-fix-win-build.patch create mode 100644 contrib/python/numpy/py3/patches/06-fix-config.h.patch create mode 100644 contrib/python/numpy/py3/patches/09-hack-for-matplotlib.patch create mode 100644 contrib/python/numpy/py3/patches/12-arrch64.patch create mode 100644 contrib/python/numpy/py3/patches/14-libunwind.h.patch create mode 100644 contrib/python/numpy/py3/patches/15-support-python-3.12.patch (limited to 'contrib/python/numpy') diff --git a/contrib/python/numpy/include/numpy/core/feature_detection_cmath.h b/contrib/python/numpy/include/numpy/core/feature_detection_cmath.h new file mode 100644 index 0000000000..1c24d1824c --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/feature_detection_cmath.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/feature_detection_cmath.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/feature_detection_cmath.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/feature_detection_locale.h b/contrib/python/numpy/include/numpy/core/feature_detection_locale.h new file mode 100644 index 0000000000..48d8a43932 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/feature_detection_locale.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/feature_detection_locale.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/feature_detection_locale.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/feature_detection_math.h b/contrib/python/numpy/include/numpy/core/feature_detection_math.h new file mode 100644 index 0000000000..5c946b6a54 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/feature_detection_math.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/feature_detection_math.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/feature_detection_math.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/feature_detection_stdio.h b/contrib/python/numpy/include/numpy/core/feature_detection_stdio.h new file mode 100644 index 0000000000..b24b29c55b --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/feature_detection_stdio.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/feature_detection_stdio.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/feature_detection_stdio.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/__multiarray_api.h b/contrib/python/numpy/include/numpy/core/include/numpy/__multiarray_api.h new file mode 100644 index 0000000000..704e17b587 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/__multiarray_api.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/__multiarray_api.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/__multiarray_api.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/__ufunc_api.h b/contrib/python/numpy/include/numpy/core/include/numpy/__ufunc_api.h new file mode 100644 index 0000000000..6785df6d88 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/__ufunc_api.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/__ufunc_api.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/__ufunc_api.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/_neighborhood_iterator_imp.h b/contrib/python/numpy/include/numpy/core/include/numpy/_neighborhood_iterator_imp.h new file mode 100644 index 0000000000..be9857e8e2 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/_neighborhood_iterator_imp.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/_neighborhood_iterator_imp.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/_neighborhood_iterator_imp.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-linux.h b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-linux.h new file mode 100644 index 0000000000..44d5262b38 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-linux.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/_numpyconfig-linux.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/include/numpy/_numpyconfig-linux.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-osx-arm64.h b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-osx-arm64.h new file mode 100644 index 0000000000..63ef09b4a5 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-osx-arm64.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/_numpyconfig-osx-arm64.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/include/numpy/_numpyconfig-osx-arm64.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-osx-x86_64.h b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-osx-x86_64.h new file mode 100644 index 0000000000..70cbc4ba46 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-osx-x86_64.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/_numpyconfig-osx-x86_64.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/include/numpy/_numpyconfig-osx-x86_64.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-win.h b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-win.h new file mode 100644 index 0000000000..43b27500c4 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig-win.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/_numpyconfig-win.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/include/numpy/_numpyconfig-win.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.darwin.h b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.darwin.h new file mode 100644 index 0000000000..133454fb83 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.darwin.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#error #include <contrib/python/numpy/py3/numpy/core/include/numpy/_numpyconfig.darwin.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/_numpyconfig.darwin.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.h b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.h new file mode 100644 index 0000000000..b1e695e259 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/_numpyconfig.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/_numpyconfig.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.linux.h b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.linux.h new file mode 100644 index 0000000000..e2ef05c360 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.linux.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#error #include <contrib/python/numpy/py3/numpy/core/include/numpy/_numpyconfig.linux.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/_numpyconfig.linux.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.windows.h b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.windows.h new file mode 100644 index 0000000000..58e7c28cbe --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/_numpyconfig.windows.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#error #include <contrib/python/numpy/py3/numpy/core/include/numpy/_numpyconfig.windows.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/_numpyconfig.windows.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/_umath_doc_generated.h b/contrib/python/numpy/include/numpy/core/include/numpy/_umath_doc_generated.h new file mode 100644 index 0000000000..063303bdc1 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/_umath_doc_generated.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/_umath_doc_generated.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/include/numpy/_umath_doc_generated.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/config-linux-aarch64.h b/contrib/python/numpy/include/numpy/core/include/numpy/config-linux-aarch64.h new file mode 100644 index 0000000000..4ffead607e --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/config-linux-aarch64.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/config-linux-aarch64.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/include/numpy/config-linux-aarch64.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/config-linux-ppc64.h b/contrib/python/numpy/include/numpy/core/include/numpy/config-linux-ppc64.h new file mode 100644 index 0000000000..52a50f5f4a --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/config-linux-ppc64.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/config-linux-ppc64.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/include/numpy/config-linux-ppc64.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/config-linux-x86_64.h b/contrib/python/numpy/include/numpy/core/include/numpy/config-linux-x86_64.h new file mode 100644 index 0000000000..222d5e3eb4 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/config-linux-x86_64.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/config-linux-x86_64.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/include/numpy/config-linux-x86_64.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/config-osx-arm64.h b/contrib/python/numpy/include/numpy/core/include/numpy/config-osx-arm64.h new file mode 100644 index 0000000000..e6da63e6d0 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/config-osx-arm64.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/config-osx-arm64.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/include/numpy/config-osx-arm64.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/config-osx-x86_64.h b/contrib/python/numpy/include/numpy/core/include/numpy/config-osx-x86_64.h new file mode 100644 index 0000000000..2aec868649 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/config-osx-x86_64.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/config-osx-x86_64.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/include/numpy/config-osx-x86_64.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/config-win-x86_64.h b/contrib/python/numpy/include/numpy/core/include/numpy/config-win-x86_64.h new file mode 100644 index 0000000000..fa789661d2 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/config-win-x86_64.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/config-win-x86_64.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/include/numpy/config-win-x86_64.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/config.darwin.h b/contrib/python/numpy/include/numpy/core/include/numpy/config.darwin.h new file mode 100644 index 0000000000..d3eed4a319 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/config.darwin.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#error #include <contrib/python/numpy/py3/numpy/core/include/numpy/config.darwin.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/config.darwin.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/config.linux.h b/contrib/python/numpy/include/numpy/core/include/numpy/config.linux.h new file mode 100644 index 0000000000..1ceb3dfbb0 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/config.linux.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#error #include <contrib/python/numpy/py3/numpy/core/include/numpy/config.linux.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/config.linux.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/config.windows.h b/contrib/python/numpy/include/numpy/core/include/numpy/config.windows.h new file mode 100644 index 0000000000..2795fbaa9a --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/config.windows.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#error #include <contrib/python/numpy/py3/numpy/core/include/numpy/config.windows.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/config.windows.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/npy_1_7_deprecated_api.h b/contrib/python/numpy/include/numpy/core/include/numpy/npy_1_7_deprecated_api.h new file mode 100644 index 0000000000..cb15e5a3b6 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/npy_1_7_deprecated_api.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/npy_1_7_deprecated_api.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/npy_1_7_deprecated_api.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/npy_interrupt.h b/contrib/python/numpy/include/numpy/core/include/numpy/npy_interrupt.h new file mode 100644 index 0000000000..e56d1f5db9 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/npy_interrupt.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/npy_interrupt.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/npy_interrupt.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/old_defines.h b/contrib/python/numpy/include/numpy/core/include/numpy/old_defines.h new file mode 100644 index 0000000000..c6af8d9def --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/old_defines.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/old_defines.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/old_defines.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/include/numpy/oldnumeric.h b/contrib/python/numpy/include/numpy/core/include/numpy/oldnumeric.h new file mode 100644 index 0000000000..1e9c75cc33 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/include/numpy/oldnumeric.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/include/numpy/oldnumeric.h> +#else +#include <contrib/python/numpy/py2/numpy/core/include/numpy/oldnumeric.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/_simd/_simd.dispatch.h b/contrib/python/numpy/include/numpy/core/src/_simd/_simd.dispatch.h new file mode 100644 index 0000000000..fc59052238 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/_simd/_simd.dispatch.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/_simd/_simd.dispatch.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/_simd/_simd.dispatch.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/_simd/_simd.h b/contrib/python/numpy/include/numpy/core/src/_simd/_simd.h new file mode 100644 index 0000000000..f3d60f918a --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/_simd/_simd.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/_simd/_simd.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/_simd/_simd.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/_simd/_simd_inc.h b/contrib/python/numpy/include/numpy/core/src/_simd/_simd_inc.h new file mode 100644 index 0000000000..7397bdc1f5 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/_simd/_simd_inc.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/_simd/_simd_inc.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/_simd/_simd_inc.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/float_status.hpp b/contrib/python/numpy/include/numpy/core/src/common/float_status.hpp new file mode 100644 index 0000000000..f54c27864c --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/float_status.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/float_status.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/float_status.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/half.hpp b/contrib/python/numpy/include/numpy/core/src/common/half.hpp new file mode 100644 index 0000000000..9eb29fb4e3 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/half.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/half.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/half.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/half_private.hpp b/contrib/python/numpy/include/numpy/core/src/common/half_private.hpp new file mode 100644 index 0000000000..a51a2afb0f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/half_private.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/half_private.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/half_private.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/meta.hpp b/contrib/python/numpy/include/numpy/core/src/common/meta.hpp new file mode 100644 index 0000000000..9360d3ec2c --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/meta.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/meta.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/meta.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/npdef.hpp b/contrib/python/numpy/include/numpy/core/src/common/npdef.hpp new file mode 100644 index 0000000000..a1e4cb16fd --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/npdef.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/npdef.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/npdef.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/npstd.hpp b/contrib/python/numpy/include/numpy/core/src/common/npstd.hpp new file mode 100644 index 0000000000..b18879aace --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/npstd.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/npstd.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/npstd.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/npy_cblas_base.h b/contrib/python/numpy/include/numpy/core/src/common/npy_cblas_base.h new file mode 100644 index 0000000000..a2f2b85739 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/npy_cblas_base.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/npy_cblas_base.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/npy_cblas_base.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/npy_cpuinfo_parser.h b/contrib/python/numpy/include/numpy/core/src/common/npy_cpuinfo_parser.h new file mode 100644 index 0000000000..786f7ce58b --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/npy_cpuinfo_parser.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/npy_cpuinfo_parser.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/npy_cpuinfo_parser.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/arithmetic.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/arithmetic.h new file mode 100644 index 0000000000..1a900906bc --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/arithmetic.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx2/arithmetic.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx2/arithmetic.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/avx2.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/avx2.h new file mode 100644 index 0000000000..070011d476 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/avx2.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx2/avx2.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx2/avx2.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/conversion.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/conversion.h new file mode 100644 index 0000000000..5d6c6481ea --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/conversion.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx2/conversion.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx2/conversion.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/math.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/math.h new file mode 100644 index 0000000000..7bc1899045 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/math.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx2/math.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx2/math.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/memory.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/memory.h new file mode 100644 index 0000000000..0705c4a647 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/memory.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx2/memory.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx2/memory.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/misc.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/misc.h new file mode 100644 index 0000000000..9bd32194e5 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/misc.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx2/misc.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx2/misc.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/operators.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/operators.h new file mode 100644 index 0000000000..73daace51f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/operators.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx2/operators.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx2/operators.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/reorder.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/reorder.h new file mode 100644 index 0000000000..1f400d1a32 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/reorder.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx2/reorder.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx2/reorder.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/utils.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/utils.h new file mode 100644 index 0000000000..c47b55439d --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx2/utils.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx2/utils.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx2/utils.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/arithmetic.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/arithmetic.h new file mode 100644 index 0000000000..1ea858a5fd --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/arithmetic.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx512/arithmetic.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx512/arithmetic.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/avx512.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/avx512.h new file mode 100644 index 0000000000..70594caaf7 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/avx512.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx512/avx512.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx512/avx512.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/conversion.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/conversion.h new file mode 100644 index 0000000000..9c8097f80e --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/conversion.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx512/conversion.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx512/conversion.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/maskop.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/maskop.h new file mode 100644 index 0000000000..718290de66 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/maskop.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx512/maskop.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx512/maskop.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/math.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/math.h new file mode 100644 index 0000000000..ab8356e78e --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/math.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx512/math.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx512/math.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/memory.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/memory.h new file mode 100644 index 0000000000..583b11720d --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/memory.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx512/memory.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx512/memory.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/misc.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/misc.h new file mode 100644 index 0000000000..c84e64cfff --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/misc.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx512/misc.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx512/misc.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/operators.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/operators.h new file mode 100644 index 0000000000..1b6f30739f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/operators.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx512/operators.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx512/operators.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/reorder.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/reorder.h new file mode 100644 index 0000000000..3540c16e17 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/reorder.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx512/reorder.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx512/reorder.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/utils.h b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/utils.h new file mode 100644 index 0000000000..4b0b5a93c0 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/avx512/utils.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/avx512/utils.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/avx512/utils.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/emulate_maskop.h b/contrib/python/numpy/include/numpy/core/src/common/simd/emulate_maskop.h new file mode 100644 index 0000000000..4a9f7b84a9 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/emulate_maskop.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/emulate_maskop.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/emulate_maskop.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/intdiv.h b/contrib/python/numpy/include/numpy/core/src/common/simd/intdiv.h new file mode 100644 index 0000000000..fb5ea01c4e --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/intdiv.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/intdiv.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/intdiv.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/neon/arithmetic.h b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/arithmetic.h new file mode 100644 index 0000000000..fbb6f0ee81 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/arithmetic.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/neon/arithmetic.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/neon/arithmetic.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/neon/conversion.h b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/conversion.h new file mode 100644 index 0000000000..e03533df94 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/conversion.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/neon/conversion.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/neon/conversion.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/neon/math.h b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/math.h new file mode 100644 index 0000000000..c2a7acfcf0 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/math.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/neon/math.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/neon/math.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/neon/memory.h b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/memory.h new file mode 100644 index 0000000000..ea9e011896 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/memory.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/neon/memory.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/neon/memory.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/neon/misc.h b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/misc.h new file mode 100644 index 0000000000..221f66846f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/misc.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/neon/misc.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/neon/misc.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/neon/neon.h b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/neon.h new file mode 100644 index 0000000000..e674481409 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/neon.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/neon/neon.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/neon/neon.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/neon/operators.h b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/operators.h new file mode 100644 index 0000000000..c51848103d --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/operators.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/neon/operators.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/neon/operators.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/neon/reorder.h b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/reorder.h new file mode 100644 index 0000000000..e31d8ef62b --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/neon/reorder.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/neon/reorder.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/neon/reorder.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/simd.h b/contrib/python/numpy/include/numpy/core/src/common/simd/simd.h new file mode 100644 index 0000000000..ff876a9df0 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/simd.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/simd.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/simd_utils.h b/contrib/python/numpy/include/numpy/core/src/common/simd/simd_utils.h new file mode 100644 index 0000000000..e0a4df7e43 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/simd_utils.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/simd_utils.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/simd_utils.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/sse/arithmetic.h b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/arithmetic.h new file mode 100644 index 0000000000..4ac7024ab6 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/arithmetic.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/sse/arithmetic.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/sse/arithmetic.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/sse/conversion.h b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/conversion.h new file mode 100644 index 0000000000..027d54363b --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/conversion.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/sse/conversion.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/sse/conversion.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/sse/math.h b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/math.h new file mode 100644 index 0000000000..b7d7b42529 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/math.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/sse/math.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/sse/math.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/sse/memory.h b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/memory.h new file mode 100644 index 0000000000..6325cdccce --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/memory.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/sse/memory.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/sse/memory.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/sse/misc.h b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/misc.h new file mode 100644 index 0000000000..f37d3be694 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/misc.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/sse/misc.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/sse/misc.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/sse/operators.h b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/operators.h new file mode 100644 index 0000000000..19966ee3b8 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/operators.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/sse/operators.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/sse/operators.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/sse/reorder.h b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/reorder.h new file mode 100644 index 0000000000..22ab5afe1f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/reorder.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/sse/reorder.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/sse/reorder.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/sse/sse.h b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/sse.h new file mode 100644 index 0000000000..72e177228f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/sse.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/sse/sse.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/sse/sse.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/sse/utils.h b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/utils.h new file mode 100644 index 0000000000..a0e211337c --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/sse/utils.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/sse/utils.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/sse/utils.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/vec/arithmetic.h b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/arithmetic.h new file mode 100644 index 0000000000..be7309bf83 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/arithmetic.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/vec/arithmetic.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/vec/arithmetic.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/vec/conversion.h b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/conversion.h new file mode 100644 index 0000000000..c6bf463856 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/conversion.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/vec/conversion.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/vec/conversion.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/vec/math.h b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/math.h new file mode 100644 index 0000000000..59de95e87f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/math.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/vec/math.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/vec/math.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/vec/memory.h b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/memory.h new file mode 100644 index 0000000000..106918f4c7 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/memory.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/vec/memory.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/vec/memory.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/vec/misc.h b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/misc.h new file mode 100644 index 0000000000..5934f7270f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/misc.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/vec/misc.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/vec/misc.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/vec/operators.h b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/operators.h new file mode 100644 index 0000000000..fc62117dd8 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/operators.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/vec/operators.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/vec/operators.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/vec/reorder.h b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/reorder.h new file mode 100644 index 0000000000..864e22423b --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/reorder.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/vec/reorder.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/vec/reorder.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/vec/utils.h b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/utils.h new file mode 100644 index 0000000000..ced29076c5 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/utils.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/vec/utils.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/vec/utils.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/simd/vec/vec.h b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/vec.h new file mode 100644 index 0000000000..6355be9072 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/simd/vec/vec.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/simd/vec/vec.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/simd/vec/vec.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/common/utils.hpp b/contrib/python/numpy/include/numpy/core/src/common/utils.hpp new file mode 100644 index 0000000000..ea5f78257e --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/common/utils.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/common/utils.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/common/utils.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/_datetime.h b/contrib/python/numpy/include/numpy/core/src/multiarray/_datetime.h new file mode 100644 index 0000000000..3aba6ec97c --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/_datetime.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/_datetime.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/_datetime.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/argfunc.dispatch.h b/contrib/python/numpy/include/numpy/core/src/multiarray/argfunc.dispatch.h new file mode 100644 index 0000000000..8e0829a213 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/argfunc.dispatch.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/argfunc.dispatch.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/multiarray/argfunc.dispatch.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/arrayobject.h b/contrib/python/numpy/include/numpy/core/src/multiarray/arrayobject.h new file mode 100644 index 0000000000..7d961932dc --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/arrayobject.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/arrayobject.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/arrayobject.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/buffer.h b/contrib/python/numpy/include/numpy/core/src/multiarray/buffer.h new file mode 100644 index 0000000000..fedf041292 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/buffer.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#error #include <contrib/python/numpy/py3/numpy/core/src/multiarray/buffer.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/buffer.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/calculation.h b/contrib/python/numpy/include/numpy/core/src/multiarray/calculation.h new file mode 100644 index 0000000000..21f86a3955 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/calculation.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/calculation.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/calculation.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/compiled_base.h b/contrib/python/numpy/include/numpy/core/src/multiarray/compiled_base.h new file mode 100644 index 0000000000..a180c93e27 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/compiled_base.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/compiled_base.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/compiled_base.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/convert.h b/contrib/python/numpy/include/numpy/core/src/multiarray/convert.h new file mode 100644 index 0000000000..187d9287e7 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/convert.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/convert.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/convert.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/datetime_busday.h b/contrib/python/numpy/include/numpy/core/src/multiarray/datetime_busday.h new file mode 100644 index 0000000000..39b6f705be --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/datetime_busday.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/datetime_busday.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/datetime_busday.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/datetime_busdaycal.h b/contrib/python/numpy/include/numpy/core/src/multiarray/datetime_busdaycal.h new file mode 100644 index 0000000000..272fb8b559 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/datetime_busdaycal.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/datetime_busdaycal.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/datetime_busdaycal.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/datetime_strings.h b/contrib/python/numpy/include/numpy/core/src/multiarray/datetime_strings.h new file mode 100644 index 0000000000..e651a2eed6 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/datetime_strings.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/datetime_strings.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/datetime_strings.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/descriptor.h b/contrib/python/numpy/include/numpy/core/src/multiarray/descriptor.h new file mode 100644 index 0000000000..2443d50692 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/descriptor.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/descriptor.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/descriptor.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/dragon4.h b/contrib/python/numpy/include/numpy/core/src/multiarray/dragon4.h new file mode 100644 index 0000000000..0c79b9509f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/dragon4.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/dragon4.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/dragon4.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/dtype_traversal.h b/contrib/python/numpy/include/numpy/core/src/multiarray/dtype_traversal.h new file mode 100644 index 0000000000..ea7a722b1b --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/dtype_traversal.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/dtype_traversal.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/multiarray/dtype_traversal.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/einsum_debug.h b/contrib/python/numpy/include/numpy/core/src/multiarray/einsum_debug.h new file mode 100644 index 0000000000..bc896af84f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/einsum_debug.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/einsum_debug.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/multiarray/einsum_debug.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/einsum_sumprod.h b/contrib/python/numpy/include/numpy/core/src/multiarray/einsum_sumprod.h new file mode 100644 index 0000000000..6ba6d54ca3 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/einsum_sumprod.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/einsum_sumprod.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/multiarray/einsum_sumprod.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/experimental_public_dtype_api.h b/contrib/python/numpy/include/numpy/core/src/multiarray/experimental_public_dtype_api.h new file mode 100644 index 0000000000..ba724087fd --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/experimental_public_dtype_api.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/experimental_public_dtype_api.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/multiarray/experimental_public_dtype_api.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/getset.h b/contrib/python/numpy/include/numpy/core/src/multiarray/getset.h new file mode 100644 index 0000000000..8692de0eca --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/getset.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/getset.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/getset.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/hashdescr.h b/contrib/python/numpy/include/numpy/core/src/multiarray/hashdescr.h new file mode 100644 index 0000000000..8a88964ff7 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/hashdescr.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/hashdescr.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/hashdescr.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/item_selection.h b/contrib/python/numpy/include/numpy/core/src/multiarray/item_selection.h new file mode 100644 index 0000000000..12d8d7052b --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/item_selection.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/item_selection.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/item_selection.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/iterators.h b/contrib/python/numpy/include/numpy/core/src/multiarray/iterators.h new file mode 100644 index 0000000000..ac10067ea9 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/iterators.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/iterators.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/iterators.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/legacy_dtype_implementation.h b/contrib/python/numpy/include/numpy/core/src/multiarray/legacy_dtype_implementation.h new file mode 100644 index 0000000000..8b64ad37b3 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/legacy_dtype_implementation.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/legacy_dtype_implementation.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/multiarray/legacy_dtype_implementation.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/mapping.h b/contrib/python/numpy/include/numpy/core/src/multiarray/mapping.h new file mode 100644 index 0000000000..1b3a459a3c --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/mapping.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/mapping.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/mapping.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/methods.h b/contrib/python/numpy/include/numpy/core/src/multiarray/methods.h new file mode 100644 index 0000000000..4c6d56c096 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/methods.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/methods.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/methods.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/multiarraymodule.h b/contrib/python/numpy/include/numpy/core/src/multiarray/multiarraymodule.h new file mode 100644 index 0000000000..efd02a86a0 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/multiarraymodule.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/multiarraymodule.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/multiarraymodule.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/nditer_pywrap.h b/contrib/python/numpy/include/numpy/core/src/multiarray/nditer_pywrap.h new file mode 100644 index 0000000000..df41287ea8 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/nditer_pywrap.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/nditer_pywrap.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/nditer_pywrap.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/npy_buffer.h b/contrib/python/numpy/include/numpy/core/src/multiarray/npy_buffer.h new file mode 100644 index 0000000000..b45a96adb3 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/npy_buffer.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/npy_buffer.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/multiarray/npy_buffer.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/refcount.h b/contrib/python/numpy/include/numpy/core/src/multiarray/refcount.h new file mode 100644 index 0000000000..a8bed7f5cc --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/refcount.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/refcount.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/refcount.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/sequence.h b/contrib/python/numpy/include/numpy/core/src/multiarray/sequence.h new file mode 100644 index 0000000000..b412ebc724 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/sequence.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/sequence.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/sequence.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/strfuncs.h b/contrib/python/numpy/include/numpy/core/src/multiarray/strfuncs.h new file mode 100644 index 0000000000..c01908362c --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/strfuncs.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/strfuncs.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/strfuncs.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/temp_elide.h b/contrib/python/numpy/include/numpy/core/src/multiarray/temp_elide.h new file mode 100644 index 0000000000..3cd175e3f6 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/temp_elide.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/temp_elide.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/temp_elide.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/textreading/readtext.h b/contrib/python/numpy/include/numpy/core/src/multiarray/textreading/readtext.h new file mode 100644 index 0000000000..72e4a13ca4 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/textreading/readtext.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/textreading/readtext.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/multiarray/textreading/readtext.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/typeinfo.h b/contrib/python/numpy/include/numpy/core/src/multiarray/typeinfo.h new file mode 100644 index 0000000000..89150b2407 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/typeinfo.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/typeinfo.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/typeinfo.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/usertypes.h b/contrib/python/numpy/include/numpy/core/src/multiarray/usertypes.h new file mode 100644 index 0000000000..efa258da03 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/usertypes.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/usertypes.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/usertypes.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/multiarray/vdot.h b/contrib/python/numpy/include/numpy/core/src/multiarray/vdot.h new file mode 100644 index 0000000000..00d05a702a --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/multiarray/vdot.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/multiarray/vdot.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/multiarray/vdot.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npymath/npy_math_common.h b/contrib/python/numpy/include/numpy/core/src/npymath/npy_math_common.h new file mode 100644 index 0000000000..4a0ac172b2 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npymath/npy_math_common.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npymath/npy_math_common.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/npymath/npy_math_common.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npymath/npy_math_private.h b/contrib/python/numpy/include/numpy/core/src/npymath/npy_math_private.h new file mode 100644 index 0000000000..83abaa08db --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npymath/npy_math_private.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npymath/npy_math_private.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/npymath/npy_math_private.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/npysort_common.h b/contrib/python/numpy/include/numpy/core/src/npysort/npysort_common.h new file mode 100644 index 0000000000..a9b3036386 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/npysort_common.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/npysort_common.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/npysort/npysort_common.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/npysort_heapsort.h b/contrib/python/numpy/include/numpy/core/src/npysort/npysort_heapsort.h new file mode 100644 index 0000000000..db0dfef939 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/npysort_heapsort.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/npysort_heapsort.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/npysort_heapsort.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort.dispatch.h b/contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort.dispatch.h new file mode 100644 index 0000000000..297bdc8f29 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort.dispatch.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/simd_qsort.dispatch.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/simd_qsort.dispatch.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort.hpp new file mode 100644 index 0000000000..ed0658fe53 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/simd_qsort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/simd_qsort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort_16bit.dispatch.h b/contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort_16bit.dispatch.h new file mode 100644 index 0000000000..4a6d75341e --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/simd_qsort_16bit.dispatch.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/simd_qsort_16bit.dispatch.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/simd_qsort_16bit.dispatch.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-argsort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-argsort.hpp new file mode 100644 index 0000000000..d6ed00aadf --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-argsort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-argsort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-argsort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-partial-qsort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-partial-qsort.hpp new file mode 100644 index 0000000000..c18a783ae8 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-partial-qsort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-partial-qsort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-partial-qsort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qselect.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qselect.hpp new file mode 100644 index 0000000000..bc13fc722c --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qselect.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qselect.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qselect.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort-common.h b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort-common.h new file mode 100644 index 0000000000..833be756b8 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort-common.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort-common.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort-common.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort.hpp new file mode 100644 index 0000000000..e3dfda8977 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/benchmarks/bench-qsort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-common.h b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-common.h new file mode 100644 index 0000000000..58a8cdf454 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-common.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-common.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-common.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp new file mode 100644 index 0000000000..0c90d2139a --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp new file mode 100644 index 0000000000..5744750daf --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-argsort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-argsort.hpp new file mode 100644 index 0000000000..8147a5afd5 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-argsort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-argsort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-argsort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-common.h b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-common.h new file mode 100644 index 0000000000..4177db70f1 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-common.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-common.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-common.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvalue-networks.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvalue-networks.hpp new file mode 100644 index 0000000000..e5da8f42b8 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvalue-networks.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvalue-networks.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvalue-networks.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvaluesort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvaluesort.hpp new file mode 100644 index 0000000000..f866f502c6 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvaluesort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvaluesort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-keyvaluesort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp new file mode 100644 index 0000000000..1df108de40 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-argsort.h b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-argsort.h new file mode 100644 index 0000000000..b850bb7685 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-argsort.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-argsort.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-argsort.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h new file mode 100644 index 0000000000..0869ec21d1 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512fp16-16bit-qsort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512fp16-16bit-qsort.hpp new file mode 100644 index 0000000000..236cce71ef --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/src/avx512fp16-16bit-qsort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/src/avx512fp16-16bit-qsort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/src/avx512fp16-16bit-qsort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-partial-qsort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-partial-qsort.hpp new file mode 100644 index 0000000000..f8c817e6e1 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-partial-qsort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/tests/test-partial-qsort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/tests/test-partial-qsort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qselect.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qselect.hpp new file mode 100644 index 0000000000..f8ee42a6cb --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qselect.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/tests/test-qselect.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/tests/test-qselect.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-common.h b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-common.h new file mode 100644 index 0000000000..0a5a5ec7ea --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-common.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-common.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-common.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-fp.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-fp.hpp new file mode 100644 index 0000000000..7188f792e1 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-fp.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-fp.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort-fp.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort.hpp b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort.hpp new file mode 100644 index 0000000000..df2176cc92 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort.hpp @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort.hpp> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/tests/test-qsort.hpp> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/utils/cpuinfo.h b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/utils/cpuinfo.h new file mode 100644 index 0000000000..f8789522cf --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/utils/cpuinfo.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/utils/cpuinfo.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/utils/cpuinfo.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/utils/rand_array.h b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/utils/rand_array.h new file mode 100644 index 0000000000..9f9906d60f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/npysort/x86-simd-sort/utils/rand_array.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/npysort/x86-simd-sort/utils/rand_array.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/npysort/x86-simd-sort/utils/rand_array.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/_umath_tests.dispatch.h b/contrib/python/numpy/include/numpy/core/src/umath/_umath_tests.dispatch.h new file mode 100644 index 0000000000..a6626808cc --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/_umath_tests.dispatch.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/umath/_umath_tests.dispatch.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/umath/_umath_tests.dispatch.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/cpuid.h b/contrib/python/numpy/include/numpy/core/src/umath/cpuid.h new file mode 100644 index 0000000000..9225b43724 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/cpuid.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#error #include <contrib/python/numpy/py3/numpy/core/src/umath/cpuid.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/umath/cpuid.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/dispatching.h b/contrib/python/numpy/include/numpy/core/src/umath/dispatching.h new file mode 100644 index 0000000000..3396c47101 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/dispatching.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/umath/dispatching.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/umath/dispatching.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/extobj.h b/contrib/python/numpy/include/numpy/core/src/umath/extobj.h new file mode 100644 index 0000000000..481e29fc18 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/extobj.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/umath/extobj.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/umath/extobj.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/fast_loop_macros.h b/contrib/python/numpy/include/numpy/core/src/umath/fast_loop_macros.h new file mode 100644 index 0000000000..b93c1dec00 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/fast_loop_macros.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/umath/fast_loop_macros.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/umath/fast_loop_macros.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/legacy_array_method.h b/contrib/python/numpy/include/numpy/core/src/umath/legacy_array_method.h new file mode 100644 index 0000000000..9122f7db2c --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/legacy_array_method.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/umath/legacy_array_method.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/umath/legacy_array_method.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/loops_modulo.dispatch.c b/contrib/python/numpy/include/numpy/core/src/umath/loops_modulo.dispatch.c new file mode 100644 index 0000000000..cbc902510f --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/loops_modulo.dispatch.c @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/umath/loops_modulo.dispatch.c> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/umath/loops_modulo.dispatch.c> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/loops_utils.h b/contrib/python/numpy/include/numpy/core/src/umath/loops_utils.h new file mode 100644 index 0000000000..4befce878d --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/loops_utils.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/umath/loops_utils.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/umath/loops_utils.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/npy_simd_data.h b/contrib/python/numpy/include/numpy/core/src/umath/npy_simd_data.h new file mode 100644 index 0000000000..90a21b59b4 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/npy_simd_data.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/umath/npy_simd_data.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/umath/npy_simd_data.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/override.h b/contrib/python/numpy/include/numpy/core/src/umath/override.h new file mode 100644 index 0000000000..44ceb8a10d --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/override.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/umath/override.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/umath/override.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/reduction.h b/contrib/python/numpy/include/numpy/core/src/umath/reduction.h new file mode 100644 index 0000000000..47f9ef14c0 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/reduction.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/umath/reduction.h> +#else +#include <contrib/python/numpy/py2/numpy/core/src/umath/reduction.h> +#endif diff --git a/contrib/python/numpy/include/numpy/core/src/umath/string_ufuncs.h b/contrib/python/numpy/include/numpy/core/src/umath/string_ufuncs.h new file mode 100644 index 0000000000..a0d0211df2 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/src/umath/string_ufuncs.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/src/umath/string_ufuncs.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/src/umath/string_ufuncs.h> +#endif diff --git a/contrib/python/numpy/include/numpy/distutils/include/npy_cpu_dispatch_config.aarch64.h b/contrib/python/numpy/include/numpy/distutils/include/npy_cpu_dispatch_config.aarch64.h new file mode 100644 index 0000000000..c235efe894 --- /dev/null +++ b/contrib/python/numpy/include/numpy/distutils/include/npy_cpu_dispatch_config.aarch64.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/distutils/include/npy_cpu_dispatch_config.aarch64.h> +#else +#error #include <contrib/python/numpy/py2/numpy/distutils/include/npy_cpu_dispatch_config.aarch64.h> +#endif diff --git a/contrib/python/numpy/include/numpy/distutils/include/npy_cpu_dispatch_config.x86_64.h b/contrib/python/numpy/include/numpy/distutils/include/npy_cpu_dispatch_config.x86_64.h new file mode 100644 index 0000000000..81809b2716 --- /dev/null +++ b/contrib/python/numpy/include/numpy/distutils/include/npy_cpu_dispatch_config.x86_64.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/distutils/include/npy_cpu_dispatch_config.x86_64.h> +#else +#error #include <contrib/python/numpy/py2/numpy/distutils/include/npy_cpu_dispatch_config.x86_64.h> +#endif diff --git a/contrib/python/numpy/include/numpy/fft/fftpack.h b/contrib/python/numpy/include/numpy/fft/fftpack.h new file mode 100644 index 0000000000..ae64bd965b --- /dev/null +++ b/contrib/python/numpy/include/numpy/fft/fftpack.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#error #include <contrib/python/numpy/py3/numpy/fft/fftpack.h> +#else +#include <contrib/python/numpy/py2/numpy/fft/fftpack.h> +#endif diff --git a/contrib/python/numpy/include/numpy/linalg/lapack_lite/f2c.h b/contrib/python/numpy/include/numpy/linalg/lapack_lite/f2c.h new file mode 100644 index 0000000000..25bf0e7061 --- /dev/null +++ b/contrib/python/numpy/include/numpy/linalg/lapack_lite/f2c.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/linalg/lapack_lite/f2c.h> +#else +#include <contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c.h> +#endif diff --git a/contrib/python/numpy/include/numpy/linalg/lapack_lite/lapack_lite_names.h b/contrib/python/numpy/include/numpy/linalg/lapack_lite/lapack_lite_names.h new file mode 100644 index 0000000000..21e67c4572 --- /dev/null +++ b/contrib/python/numpy/include/numpy/linalg/lapack_lite/lapack_lite_names.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/linalg/lapack_lite/lapack_lite_names.h> +#else +#error #include <contrib/python/numpy/py2/numpy/linalg/lapack_lite/lapack_lite_names.h> +#endif diff --git a/contrib/python/numpy/include/numpy/random/src/distributions/logfactorial.h b/contrib/python/numpy/include/numpy/random/src/distributions/logfactorial.h new file mode 100644 index 0000000000..2087650753 --- /dev/null +++ b/contrib/python/numpy/include/numpy/random/src/distributions/logfactorial.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/random/src/distributions/logfactorial.h> +#else +#error #include <contrib/python/numpy/py2/numpy/random/src/distributions/logfactorial.h> +#endif diff --git a/contrib/python/numpy/include/numpy/random/src/distributions/ziggurat_constants.h b/contrib/python/numpy/include/numpy/random/src/distributions/ziggurat_constants.h new file mode 100644 index 0000000000..e9a126f9c3 --- /dev/null +++ b/contrib/python/numpy/include/numpy/random/src/distributions/ziggurat_constants.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/random/src/distributions/ziggurat_constants.h> +#else +#error #include <contrib/python/numpy/py2/numpy/random/src/distributions/ziggurat_constants.h> +#endif diff --git a/contrib/python/numpy/include/numpy/random/src/mt19937/mt19937-jump.h b/contrib/python/numpy/include/numpy/random/src/mt19937/mt19937-jump.h new file mode 100644 index 0000000000..6e573a11f7 --- /dev/null +++ b/contrib/python/numpy/include/numpy/random/src/mt19937/mt19937-jump.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/random/src/mt19937/mt19937-jump.h> +#else +#error #include <contrib/python/numpy/py2/numpy/random/src/mt19937/mt19937-jump.h> +#endif diff --git a/contrib/python/numpy/include/numpy/random/src/mt19937/randomkit.h b/contrib/python/numpy/include/numpy/random/src/mt19937/randomkit.h new file mode 100644 index 0000000000..3bd02a5c99 --- /dev/null +++ b/contrib/python/numpy/include/numpy/random/src/mt19937/randomkit.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/random/src/mt19937/randomkit.h> +#else +#error #include <contrib/python/numpy/py2/numpy/random/src/mt19937/randomkit.h> +#endif diff --git a/contrib/python/numpy/include/numpy/random/src/pcg64/pcg64.orig.h b/contrib/python/numpy/include/numpy/random/src/pcg64/pcg64.orig.h new file mode 100644 index 0000000000..6721e3fb13 --- /dev/null +++ b/contrib/python/numpy/include/numpy/random/src/pcg64/pcg64.orig.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/random/src/pcg64/pcg64.orig.h> +#else +#error #include <contrib/python/numpy/py2/numpy/random/src/pcg64/pcg64.orig.h> +#endif diff --git a/contrib/python/numpy/include/numpy/random/src/splitmix64/splitmix64.h b/contrib/python/numpy/include/numpy/random/src/splitmix64/splitmix64.h new file mode 100644 index 0000000000..4631d004ba --- /dev/null +++ b/contrib/python/numpy/include/numpy/random/src/splitmix64/splitmix64.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/random/src/splitmix64/splitmix64.h> +#else +#error #include <contrib/python/numpy/py2/numpy/random/src/splitmix64/splitmix64.h> +#endif diff --git a/contrib/python/numpy/include/ya.make b/contrib/python/numpy/include/ya.make new file mode 100644 index 0000000000..2c994eaa96 --- /dev/null +++ b/contrib/python/numpy/include/ya.make @@ -0,0 +1,19 @@ +SUBSCRIBER(g:python-contrib) + +LIBRARY() + +LICENSE(BSD-3-Clause) + +VERSION(1.25.2) + +ADDINCL( + GLOBAL contrib/python/numpy/include/numpy/core/include + GLOBAL contrib/python/numpy/include/numpy/core/include/numpy + GLOBAL contrib/python/numpy/include/numpy/core/src/common + GLOBAL contrib/python/numpy/include/numpy/core/src/npymath + GLOBAL contrib/python/numpy/include/numpy/distutils/include + GLOBAL FOR cython contrib/python/numpy/include/numpy/core/include + GLOBAL FOR cython contrib/python/numpy/include/numpy/core/include/numpy +) + +END() diff --git a/contrib/python/numpy/include_gen.py b/contrib/python/numpy/include_gen.py new file mode 100755 index 0000000000..60d187f814 --- /dev/null +++ b/contrib/python/numpy/include_gen.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +import os +import shutil +from os.path import dirname, exists, join, relpath + +template = '''\ +#ifdef USE_PYTHON3 +#{}include <{}> +#else +#{}include <{}> +#endif +''' + + +def main(): + os.chdir(dirname(__file__)) + if exists('include'): + shutil.rmtree('include') + include_gen('contrib/python/numpy', ['numpy']) + + +def include_gen(root, subpaths): + for path in list_subpaths(subpaths): + out = join('include', path) + py2 = join('py2', path) + py3 = join('py3', path) + makedir(dirname(out)) + with open(out, 'w') as f: + f.write(template.format( + '' if exists(py3) else 'error #', + join(root, py3), + '' if exists(py2) else 'error #', + join(root, py2), + )) + + +def is_header(s): + return s.endswith(('.h', '.hpp')) + + +def list_subpaths(subpaths, roots=('py2', 'py3'), test=is_header): + seen = set() + for root in roots: + for subpath in subpaths: + for dirpath, _, filenames in os.walk(join(root, subpath)): + rootrel = relpath(dirpath, root) + for filename in filenames: + if test(filename): + seen.add(join(rootrel, filename)) + if dirpath.endswith('numpy/core/src/umath') and filename == 'funcs.inc': + seen.add(join(rootrel, filename)) + if dirpath.endswith('numpy/core/include/numpy') and filename in ('__multiarray_api.c', '__ufunc_api.c', '__umath_generated.c'): + seen.add(join(rootrel, filename)) + if filename.endswith(('.dispatch.c', '.dispatch.cpp')): + seen.add(join(rootrel, filename)) + return seen + + +def makedir(path): + if not exists(path): + os.makedirs(path) + + +if __name__ == '__main__': + main() diff --git a/contrib/python/numpy/py2/.dist-info/METADATA b/contrib/python/numpy/py2/.dist-info/METADATA new file mode 100644 index 0000000000..40a5345b6f --- /dev/null +++ b/contrib/python/numpy/py2/.dist-info/METADATA @@ -0,0 +1,56 @@ +Metadata-Version: 2.1 +Name: numpy +Version: 1.16.6 +Summary: NumPy is the fundamental package for array computing with Python. +Home-page: https://www.numpy.org +Author: Travis E. Oliphant et al. +Maintainer: NumPy Developers +Maintainer-email: numpy-discussion@python.org +License: BSD +Download-URL: https://pypi.python.org/pypi/numpy +Project-URL: Bug Tracker, https://github.com/numpy/numpy/issues +Project-URL: Source Code, https://github.com/numpy/numpy +Platform: Windows +Platform: Linux +Platform: Solaris +Platform: Mac OS-X +Platform: Unix +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Science/Research +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved +Classifier: Programming Language :: C +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Topic :: Software Development +Classifier: Topic :: Scientific/Engineering +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX +Classifier: Operating System :: Unix +Classifier: Operating System :: MacOS +Requires-Python: >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.* + +It provides: + +- a powerful N-dimensional array object +- sophisticated (broadcasting) functions +- tools for integrating C/C++ and Fortran code +- useful linear algebra, Fourier transform, and random number capabilities +- and much more + +Besides its obvious scientific uses, NumPy can also be used as an efficient +multi-dimensional container of generic data. Arbitrary data-types can be +defined. This allows NumPy to seamlessly and speedily integrate with a wide +variety of databases. + +All NumPy wheels distributed on PyPI are BSD licensed. + + + diff --git a/contrib/python/numpy/py2/.dist-info/entry_points.txt b/contrib/python/numpy/py2/.dist-info/entry_points.txt new file mode 100644 index 0000000000..bddf93b180 --- /dev/null +++ b/contrib/python/numpy/py2/.dist-info/entry_points.txt @@ -0,0 +1,5 @@ +[console_scripts] +f2py = numpy.f2py.f2py2e:main +f2py2 = numpy.f2py.f2py2e:main +f2py2.7 = numpy.f2py.f2py2e:main + diff --git a/contrib/python/numpy/py2/.dist-info/top_level.txt b/contrib/python/numpy/py2/.dist-info/top_level.txt new file mode 100644 index 0000000000..24ce15ab7e --- /dev/null +++ b/contrib/python/numpy/py2/.dist-info/top_level.txt @@ -0,0 +1 @@ +numpy diff --git a/contrib/python/numpy/py2/numpy/_build_utils/__init__.py b/contrib/python/numpy/py2/numpy/_build_utils/__init__.py new file mode 100644 index 0000000000..1d0f69b67d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/_build_utils/__init__.py @@ -0,0 +1 @@ +from __future__ import division, absolute_import, print_function diff --git a/contrib/python/numpy/py2/numpy/_build_utils/apple_accelerate.py b/contrib/python/numpy/py2/numpy/_build_utils/apple_accelerate.py new file mode 100644 index 0000000000..36dd7584a6 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/_build_utils/apple_accelerate.py @@ -0,0 +1,28 @@ +from __future__ import division, absolute_import, print_function + +import os +import sys +import re + +__all__ = ['uses_accelerate_framework', 'get_sgemv_fix'] + +def uses_accelerate_framework(info): + """ Returns True if Accelerate framework is used for BLAS/LAPACK """ + # If we're not building on Darwin (macOS), don't use Accelerate + if sys.platform != "darwin": + return False + # If we're building on macOS, but targeting a different platform, + # don't use Accelerate. + if os.getenv('_PYTHON_HOST_PLATFORM', None): + return False + r_accelerate = re.compile("Accelerate") + extra_link_args = info.get('extra_link_args', '') + for arg in extra_link_args: + if r_accelerate.search(arg): + return True + return False + +def get_sgemv_fix(): + """ Returns source file needed to correct SGEMV """ + path = os.path.abspath(os.path.dirname(__file__)) + return [os.path.join(path, 'src', 'apple_sgemv_fix.c')] diff --git a/contrib/python/numpy/py2/numpy/_build_utils/src/apple_sgemv_fix.c b/contrib/python/numpy/py2/numpy/_build_utils/src/apple_sgemv_fix.c new file mode 100644 index 0000000000..c33c689929 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/_build_utils/src/apple_sgemv_fix.c @@ -0,0 +1,253 @@ +/* This is a collection of ugly hacks to circumvent a bug in + * Apple Accelerate framework's SGEMV subroutine. + * + * See: https://github.com/numpy/numpy/issues/4007 + * + * SGEMV in Accelerate framework will segfault on MacOS X version 10.9 + * (aka Mavericks) if arrays are not aligned to 32 byte boundaries + * and the CPU supports AVX instructions. This can produce segfaults + * in np.dot. + * + * This patch overshadows the symbols cblas_sgemv, sgemv_ and sgemv + * exported by Accelerate to produce the correct behavior. The MacOS X + * version and CPU specs are checked on module import. If Mavericks and + * AVX are detected the call to SGEMV is emulated with a call to SGEMM + * if the arrays are not 32 byte aligned. If the exported symbols cannot + * be overshadowed on module import, a fatal error is produced and the + * process aborts. All the fixes are in a self-contained C file + * and do not alter the multiarray C code. The patch is not applied + * unless NumPy is configured to link with Apple's Accelerate + * framework. + * + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#include "Python.h" +#include "numpy/arrayobject.h" + +#include <string.h> +#include <dlfcn.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/sysctl.h> +#include <string.h> + +/* ----------------------------------------------------------------- */ +/* Original cblas_sgemv */ + +#define VECLIB_FILE "/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/vecLib" + +enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; +enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; +extern void cblas_xerbla(int info, const char *rout, const char *form, ...); + +typedef void cblas_sgemv_t(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *X, const int incX, + const float beta, float *Y, const int incY); + +typedef void cblas_sgemm_t(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, + const int M, const int N, const int K, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, + const float beta, float *C, const int incC); + +typedef void fortran_sgemv_t( const char* trans, const int* m, const int* n, + const float* alpha, const float* A, const int* ldA, + const float* X, const int* incX, + const float* beta, float* Y, const int* incY ); + +static void *veclib = NULL; +static cblas_sgemv_t *accelerate_cblas_sgemv = NULL; +static cblas_sgemm_t *accelerate_cblas_sgemm = NULL; +static fortran_sgemv_t *accelerate_sgemv = NULL; +static int AVX_and_10_9 = 0; + +/* Dynamic check for AVX support + * __builtin_cpu_supports("avx") is available in gcc 4.8, + * but clang and icc do not currently support it. */ +static inline int +cpu_supports_avx() +{ + int enabled, r; + size_t length = sizeof(enabled); + r = sysctlbyname("hw.optional.avx1_0", &enabled, &length, NULL, 0); + if ( r == 0 && enabled != 0) { + return 1; + } + else { + return 0; + } +} + +/* Check if we are using MacOS X version 10.9 */ +static inline int +using_mavericks() +{ + int r; + char str[32] = {0}; + size_t size = sizeof(str); + r = sysctlbyname("kern.osproductversion", str, &size, NULL, 0); + if ( r == 0 && strncmp(str, "10.9", strlen("10.9")) == 0) { + return 1; + } + else { + return 0; + } +} + +__attribute__((destructor)) +static void unloadlib(void) +{ + if (veclib) dlclose(veclib); +} + +__attribute__((constructor)) +static void loadlib() +/* automatically executed on module import */ +{ + char errormsg[1024]; + int AVX, MAVERICKS; + memset((void*)errormsg, 0, sizeof(errormsg)); + /* check if the CPU supports AVX */ + AVX = cpu_supports_avx(); + /* check if the OS is MacOS X Mavericks */ + MAVERICKS = using_mavericks(); + /* we need the workaround when the CPU supports + * AVX and the OS version is Mavericks */ + AVX_and_10_9 = AVX && MAVERICKS; + /* load vecLib */ + veclib = dlopen(VECLIB_FILE, RTLD_LOCAL | RTLD_FIRST); + if (!veclib) { + veclib = NULL; + snprintf(errormsg, sizeof(errormsg), + "Failed to open vecLib from location '%s'.", VECLIB_FILE); + Py_FatalError(errormsg); /* calls abort() and dumps core */ + } + /* resolve Fortran SGEMV from Accelerate */ + accelerate_sgemv = (fortran_sgemv_t*) dlsym(veclib, "sgemv_"); + if (!accelerate_sgemv) { + unloadlib(); + Py_FatalError("Failed to resolve symbol 'sgemv_'."); + } + /* resolve cblas_sgemv from Accelerate */ + accelerate_cblas_sgemv = (cblas_sgemv_t*) dlsym(veclib, "cblas_sgemv"); + if (!accelerate_cblas_sgemv) { + unloadlib(); + Py_FatalError("Failed to resolve symbol 'cblas_sgemv'."); + } + /* resolve cblas_sgemm from Accelerate */ + accelerate_cblas_sgemm = (cblas_sgemm_t*) dlsym(veclib, "cblas_sgemm"); + if (!accelerate_cblas_sgemm) { + unloadlib(); + Py_FatalError("Failed to resolve symbol 'cblas_sgemm'."); + } +} + +/* ----------------------------------------------------------------- */ +/* Fortran SGEMV override */ + +void sgemv_( const char* trans, const int* m, const int* n, + const float* alpha, const float* A, const int* ldA, + const float* X, const int* incX, + const float* beta, float* Y, const int* incY ) +{ + /* It is safe to use the original SGEMV if we are not using AVX on Mavericks + * or the input arrays A, X and Y are all aligned on 32 byte boundaries. */ + #define BADARRAY(x) (((npy_intp)(void*)x) % 32) + const int use_sgemm = AVX_and_10_9 && (BADARRAY(A) || BADARRAY(X) || BADARRAY(Y)); + if (!use_sgemm) { + accelerate_sgemv(trans,m,n,alpha,A,ldA,X,incX,beta,Y,incY); + return; + } + + /* Arrays are misaligned, the CPU supports AVX, and we are running + * Mavericks. + * + * Emulation of SGEMV with SGEMM: + * + * SGEMV allows vectors to be strided. SGEMM requires all arrays to be + * contiguous along the leading dimension. To emulate striding in SGEMV + * with the leading dimension arguments in SGEMM we compute + * + * Y = alpha * op(A) @ X + beta * Y + * + * as + * + * Y.T = alpha * X.T @ op(A).T + beta * Y.T + * + * Because Fortran uses column major order and X.T and Y.T are row vectors, + * the leading dimensions of X.T and Y.T in SGEMM become equal to the + * strides of the column vectors X and Y in SGEMV. */ + + switch (*trans) { + case 'T': + case 't': + case 'C': + case 'c': + accelerate_cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, + 1, *n, *m, *alpha, X, *incX, A, *ldA, *beta, Y, *incY ); + break; + case 'N': + case 'n': + accelerate_cblas_sgemm( CblasColMajor, CblasNoTrans, CblasTrans, + 1, *m, *n, *alpha, X, *incX, A, *ldA, *beta, Y, *incY ); + break; + default: + cblas_xerbla(1, "SGEMV", "Illegal transpose setting: %c\n", *trans); + } +} + +/* ----------------------------------------------------------------- */ +/* Override for an alias symbol for sgemv_ in Accelerate */ + +void sgemv (char *trans, + const int *m, const int *n, + const float *alpha, + const float *A, const int *lda, + const float *B, const int *incB, + const float *beta, + float *C, const int *incC) +{ + sgemv_(trans,m,n,alpha,A,lda,B,incB,beta,C,incC); +} + +/* ----------------------------------------------------------------- */ +/* cblas_sgemv override, based on Netlib CBLAS code */ + +void cblas_sgemv(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *X, const int incX, const float beta, + float *Y, const int incY) +{ + char TA; + if (order == CblasColMajor) + { + if (TransA == CblasNoTrans) TA = 'N'; + else if (TransA == CblasTrans) TA = 'T'; + else if (TransA == CblasConjTrans) TA = 'C'; + else + { + cblas_xerbla(2, "cblas_sgemv","Illegal TransA setting, %d\n", TransA); + } + sgemv_(&TA, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); + } + else if (order == CblasRowMajor) + { + if (TransA == CblasNoTrans) TA = 'T'; + else if (TransA == CblasTrans) TA = 'N'; + else if (TransA == CblasConjTrans) TA = 'N'; + else + { + cblas_xerbla(2, "cblas_sgemv", "Illegal TransA setting, %d\n", TransA); + return; + } + sgemv_(&TA, &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY); + } + else + cblas_xerbla(1, "cblas_sgemv", "Illegal Order setting, %d\n", order); +} diff --git a/contrib/python/numpy/py2/numpy/compat/setup.py b/contrib/python/numpy/py2/numpy/compat/setup.py new file mode 100644 index 0000000000..882857428c --- /dev/null +++ b/contrib/python/numpy/py2/numpy/compat/setup.py @@ -0,0 +1,12 @@ +from __future__ import division, print_function + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + + config = Configuration('compat', parent_package, top_path) + config.add_data_dir('tests') + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/contrib/python/numpy/py2/numpy/compat/tests/__init__.py b/contrib/python/numpy/py2/numpy/compat/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/compat/tests/test_compat.py b/contrib/python/numpy/py2/numpy/compat/tests/test_compat.py new file mode 100644 index 0000000000..9bb316a4de --- /dev/null +++ b/contrib/python/numpy/py2/numpy/compat/tests/test_compat.py @@ -0,0 +1,26 @@ +from __future__ import division, absolute_import, print_function + +from os.path import join + +from numpy.compat import isfileobj, os_fspath +from numpy.testing import assert_ +from numpy.testing import tempdir + + +def test_isfileobj(): + with tempdir(prefix="numpy_test_compat_") as folder: + filename = join(folder, 'a.bin') + + with open(filename, 'wb') as f: + assert_(isfileobj(f)) + + with open(filename, 'ab') as f: + assert_(isfileobj(f)) + + with open(filename, 'rb') as f: + assert_(isfileobj(f)) + + +def test_os_fspath_strings(): + for string_path in (b'/a/b/c.d', u'/a/b/c.d'): + assert_(os_fspath(string_path) == string_path) diff --git a/contrib/python/numpy/py2/numpy/conftest.py b/contrib/python/numpy/py2/numpy/conftest.py new file mode 100644 index 0000000000..7834dd39df --- /dev/null +++ b/contrib/python/numpy/py2/numpy/conftest.py @@ -0,0 +1,67 @@ +""" +Pytest configuration and fixtures for the Numpy test suite. +""" +from __future__ import division, absolute_import, print_function + +import pytest +import numpy + +from numpy.core._multiarray_tests import get_fpu_mode + + +_old_fpu_mode = None +_collect_results = {} + + +def pytest_configure(config): + config.addinivalue_line("markers", + "valgrind_error: Tests that are known to error under valgrind.") + config.addinivalue_line("markers", + "slow: Tests that are very slow.") + + +#FIXME when yield tests are gone. +@pytest.hookimpl() +def pytest_itemcollected(item): + """ + Check FPU precision mode was not changed during test collection. + + The clumsy way we do it here is mainly necessary because numpy + still uses yield tests, which can execute code at test collection + time. + """ + global _old_fpu_mode + + mode = get_fpu_mode() + + if _old_fpu_mode is None: + _old_fpu_mode = mode + elif mode != _old_fpu_mode: + _collect_results[item] = (_old_fpu_mode, mode) + _old_fpu_mode = mode + + +@pytest.fixture(scope="function", autouse=True) +def check_fpu_mode(request): + """ + Check FPU precision mode was not changed during the test. + """ + old_mode = get_fpu_mode() + yield + new_mode = get_fpu_mode() + + if old_mode != new_mode: + raise AssertionError("FPU precision mode changed from {0:#x} to {1:#x}" + " during the test".format(old_mode, new_mode)) + + collect_result = _collect_results.get(request.node) + if collect_result is not None: + old_mode, new_mode = collect_result + raise AssertionError("FPU precision mode changed from {0:#x} to {1:#x}" + " when collecting the test".format(old_mode, + new_mode)) + + +@pytest.fixture(autouse=True) +def add_np(doctest_namespace): + doctest_namespace['np'] = numpy diff --git a/contrib/python/numpy/py2/numpy/core/code_generators/__init__.py b/contrib/python/numpy/py2/numpy/core/code_generators/__init__.py new file mode 100644 index 0000000000..1d0f69b67d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/code_generators/__init__.py @@ -0,0 +1 @@ +from __future__ import division, absolute_import, print_function diff --git a/contrib/python/numpy/py2/numpy/core/code_generators/genapi.py b/contrib/python/numpy/py2/numpy/core/code_generators/genapi.py new file mode 100644 index 0000000000..4aca2373c6 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/code_generators/genapi.py @@ -0,0 +1,510 @@ +""" +Get API information encoded in C files. + +See ``find_function`` for how functions should be formatted, and +``read_order`` for how the order of the functions should be +specified. + +""" +from __future__ import division, absolute_import, print_function + +import sys, os, re +import hashlib + +import textwrap + +from os.path import join + +__docformat__ = 'restructuredtext' + +# The files under src/ that are scanned for API functions +API_FILES = [join('multiarray', 'alloc.c'), + join('multiarray', 'arrayfunction_override.c'), + join('multiarray', 'array_assign_array.c'), + join('multiarray', 'array_assign_scalar.c'), + join('multiarray', 'arrayobject.c'), + join('multiarray', 'arraytypes.c.src'), + join('multiarray', 'buffer.c'), + join('multiarray', 'calculation.c'), + join('multiarray', 'conversion_utils.c'), + join('multiarray', 'convert.c'), + join('multiarray', 'convert_datatype.c'), + join('multiarray', 'ctors.c'), + join('multiarray', 'datetime.c'), + join('multiarray', 'datetime_busday.c'), + join('multiarray', 'datetime_busdaycal.c'), + join('multiarray', 'datetime_strings.c'), + join('multiarray', 'descriptor.c'), + join('multiarray', 'einsum.c.src'), + join('multiarray', 'flagsobject.c'), + join('multiarray', 'getset.c'), + join('multiarray', 'item_selection.c'), + join('multiarray', 'iterators.c'), + join('multiarray', 'mapping.c'), + join('multiarray', 'methods.c'), + join('multiarray', 'multiarraymodule.c'), + join('multiarray', 'nditer_api.c'), + join('multiarray', 'nditer_constr.c'), + join('multiarray', 'nditer_pywrap.c'), + join('multiarray', 'nditer_templ.c.src'), + join('multiarray', 'number.c'), + join('multiarray', 'refcount.c'), + join('multiarray', 'scalartypes.c.src'), + join('multiarray', 'scalarapi.c'), + join('multiarray', 'sequence.c'), + join('multiarray', 'shape.c'), + join('multiarray', 'strfuncs.c'), + join('multiarray', 'usertypes.c'), + join('umath', 'loops.c.src'), + join('umath', 'ufunc_object.c'), + join('umath', 'ufunc_type_resolution.c'), + join('umath', 'reduction.c'), + ] +THIS_DIR = os.path.dirname(__file__) +API_FILES = [os.path.join(THIS_DIR, '..', 'src', a) for a in API_FILES] + +def file_in_this_dir(filename): + return os.path.join(THIS_DIR, filename) + +def remove_whitespace(s): + return ''.join(s.split()) + +def _repl(str): + return str.replace('Bool', 'npy_bool') + + +class StealRef(object): + def __init__(self, arg): + self.arg = arg # counting from 1 + + def __str__(self): + try: + return ' '.join('NPY_STEALS_REF_TO_ARG(%d)' % x for x in self.arg) + except TypeError: + return 'NPY_STEALS_REF_TO_ARG(%d)' % self.arg + + +class NonNull(object): + def __init__(self, arg): + self.arg = arg # counting from 1 + + def __str__(self): + try: + return ' '.join('NPY_GCC_NONNULL(%d)' % x for x in self.arg) + except TypeError: + return 'NPY_GCC_NONNULL(%d)' % self.arg + + +class Function(object): + def __init__(self, name, return_type, args, doc=''): + self.name = name + self.return_type = _repl(return_type) + self.args = args + self.doc = doc + + def _format_arg(self, typename, name): + if typename.endswith('*'): + return typename + name + else: + return typename + ' ' + name + + def __str__(self): + argstr = ', '.join([self._format_arg(*a) for a in self.args]) + if self.doc: + doccomment = '/* %s */\n' % self.doc + else: + doccomment = '' + return '%s%s %s(%s)' % (doccomment, self.return_type, self.name, argstr) + + def to_ReST(self): + lines = ['::', '', ' ' + self.return_type] + argstr = ',\000'.join([self._format_arg(*a) for a in self.args]) + name = ' %s' % (self.name,) + s = textwrap.wrap('(%s)' % (argstr,), width=72, + initial_indent=name, + subsequent_indent=' ' * (len(name)+1), + break_long_words=False) + for l in s: + lines.append(l.replace('\000', ' ').rstrip()) + lines.append('') + if self.doc: + lines.append(textwrap.dedent(self.doc)) + return '\n'.join(lines) + + def api_hash(self): + m = hashlib.md5() + m.update(remove_whitespace(self.return_type)) + m.update('\000') + m.update(self.name) + m.update('\000') + for typename, name in self.args: + m.update(remove_whitespace(typename)) + m.update('\000') + return m.hexdigest()[:8] + +class ParseError(Exception): + def __init__(self, filename, lineno, msg): + self.filename = filename + self.lineno = lineno + self.msg = msg + + def __str__(self): + return '%s:%s:%s' % (self.filename, self.lineno, self.msg) + +def skip_brackets(s, lbrac, rbrac): + count = 0 + for i, c in enumerate(s): + if c == lbrac: + count += 1 + elif c == rbrac: + count -= 1 + if count == 0: + return i + raise ValueError("no match '%s' for '%s' (%r)" % (lbrac, rbrac, s)) + +def split_arguments(argstr): + arguments = [] + current_argument = [] + i = 0 + def finish_arg(): + if current_argument: + argstr = ''.join(current_argument).strip() + m = re.match(r'(.*(\s+|[*]))(\w+)$', argstr) + if m: + typename = m.group(1).strip() + name = m.group(3) + else: + typename = argstr + name = '' + arguments.append((typename, name)) + del current_argument[:] + while i < len(argstr): + c = argstr[i] + if c == ',': + finish_arg() + elif c == '(': + p = skip_brackets(argstr[i:], '(', ')') + current_argument += argstr[i:i+p] + i += p-1 + else: + current_argument += c + i += 1 + finish_arg() + return arguments + + +def find_functions(filename, tag='API'): + """ + Scan the file, looking for tagged functions. + + Assuming ``tag=='API'``, a tagged function looks like:: + + /*API*/ + static returntype* + function_name(argtype1 arg1, argtype2 arg2) + { + } + + where the return type must be on a separate line, the function + name must start the line, and the opening ``{`` must start the line. + + An optional documentation comment in ReST format may follow the tag, + as in:: + + /*API + This function does foo... + */ + """ + fo = open(filename, 'r') + functions = [] + return_type = None + function_name = None + function_args = [] + doclist = [] + SCANNING, STATE_DOC, STATE_RETTYPE, STATE_NAME, STATE_ARGS = list(range(5)) + state = SCANNING + tagcomment = '/*' + tag + for lineno, line in enumerate(fo): + try: + line = line.strip() + if state == SCANNING: + if line.startswith(tagcomment): + if line.endswith('*/'): + state = STATE_RETTYPE + else: + state = STATE_DOC + elif state == STATE_DOC: + if line.startswith('*/'): + state = STATE_RETTYPE + else: + line = line.lstrip(' *') + doclist.append(line) + elif state == STATE_RETTYPE: + # first line of declaration with return type + m = re.match(r'NPY_NO_EXPORT\s+(.*)$', line) + if m: + line = m.group(1) + return_type = line + state = STATE_NAME + elif state == STATE_NAME: + # second line, with function name + m = re.match(r'(\w+)\s*\(', line) + if m: + function_name = m.group(1) + else: + raise ParseError(filename, lineno+1, + 'could not find function name') + function_args.append(line[m.end():]) + state = STATE_ARGS + elif state == STATE_ARGS: + if line.startswith('{'): + # finished + fargs_str = ' '.join(function_args).rstrip(' )') + fargs = split_arguments(fargs_str) + f = Function(function_name, return_type, fargs, + '\n'.join(doclist)) + functions.append(f) + return_type = None + function_name = None + function_args = [] + doclist = [] + state = SCANNING + else: + function_args.append(line) + except Exception: + print(filename, lineno + 1) + raise + fo.close() + return functions + +def should_rebuild(targets, source_files): + from distutils.dep_util import newer_group + for t in targets: + if not os.path.exists(t): + return True + sources = API_FILES + list(source_files) + [__file__] + if newer_group(sources, targets[0], missing='newer'): + return True + return False + +def write_file(filename, data): + """ + Write data to filename + Only write changed data to avoid updating timestamps unnecessarily + """ + if os.path.exists(filename): + with open(filename) as f: + if data == f.read(): + return + + with open(filename, 'w') as fid: + fid.write(data) + + +# Those *Api classes instances know how to output strings for the generated code +class TypeApi(object): + def __init__(self, name, index, ptr_cast, api_name): + self.index = index + self.name = name + self.ptr_cast = ptr_cast + self.api_name = api_name + + def define_from_array_api_string(self): + return "#define %s (*(%s *)%s[%d])" % (self.name, + self.ptr_cast, + self.api_name, + self.index) + + def array_api_define(self): + return " (void *) &%s" % self.name + + def internal_define(self): + astr = """\ +extern NPY_NO_EXPORT PyTypeObject %(type)s; +""" % {'type': self.name} + return astr + +class GlobalVarApi(object): + def __init__(self, name, index, type, api_name): + self.name = name + self.index = index + self.type = type + self.api_name = api_name + + def define_from_array_api_string(self): + return "#define %s (*(%s *)%s[%d])" % (self.name, + self.type, + self.api_name, + self.index) + + def array_api_define(self): + return " (%s *) &%s" % (self.type, self.name) + + def internal_define(self): + astr = """\ +extern NPY_NO_EXPORT %(type)s %(name)s; +""" % {'type': self.type, 'name': self.name} + return astr + +# Dummy to be able to consistently use *Api instances for all items in the +# array api +class BoolValuesApi(object): + def __init__(self, name, index, api_name): + self.name = name + self.index = index + self.type = 'PyBoolScalarObject' + self.api_name = api_name + + def define_from_array_api_string(self): + return "#define %s ((%s *)%s[%d])" % (self.name, + self.type, + self.api_name, + self.index) + + def array_api_define(self): + return " (void *) &%s" % self.name + + def internal_define(self): + astr = """\ +extern NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[2]; +""" + return astr + +class FunctionApi(object): + def __init__(self, name, index, annotations, return_type, args, api_name): + self.name = name + self.index = index + self.annotations = annotations + self.return_type = return_type + self.args = args + self.api_name = api_name + + def _argtypes_string(self): + if not self.args: + return 'void' + argstr = ', '.join([_repl(a[0]) for a in self.args]) + return argstr + + def define_from_array_api_string(self): + define = """\ +#define %s \\\n (*(%s (*)(%s)) \\ + %s[%d])""" % (self.name, + self.return_type, + self._argtypes_string(), + self.api_name, + self.index) + return define + + def array_api_define(self): + return " (void *) %s" % self.name + + def internal_define(self): + annstr = [str(a) for a in self.annotations] + annstr = ' '.join(annstr) + astr = """\ +NPY_NO_EXPORT %s %s %s \\\n (%s);""" % (annstr, self.return_type, + self.name, + self._argtypes_string()) + return astr + +def order_dict(d): + """Order dict by its values.""" + o = list(d.items()) + def _key(x): + return x[1] + (x[0],) + return sorted(o, key=_key) + +def merge_api_dicts(dicts): + ret = {} + for d in dicts: + for k, v in d.items(): + ret[k] = v + + return ret + +def check_api_dict(d): + """Check that an api dict is valid (does not use the same index twice).""" + # remove the extra value fields that aren't the index + index_d = {k: v[0] for k, v in d.items()} + + # We have if a same index is used twice: we 'revert' the dict so that index + # become keys. If the length is different, it means one index has been used + # at least twice + revert_dict = {v: k for k, v in index_d.items()} + if not len(revert_dict) == len(index_d): + # We compute a dict index -> list of associated items + doubled = {} + for name, index in index_d.items(): + try: + doubled[index].append(name) + except KeyError: + doubled[index] = [name] + fmt = "Same index has been used twice in api definition: {}" + val = ''.join( + '\n\tindex {} -> {}'.format(index, names) + for index, names in doubled.items() if len(names) != 1 + ) + raise ValueError(fmt.format(val)) + + # No 'hole' in the indexes may be allowed, and it must starts at 0 + indexes = set(index_d.values()) + expected = set(range(len(indexes))) + if indexes != expected: + diff = expected.symmetric_difference(indexes) + msg = "There are some holes in the API indexing: " \ + "(symmetric diff is %s)" % diff + raise ValueError(msg) + +def get_api_functions(tagname, api_dict): + """Parse source files to get functions tagged by the given tag.""" + functions = [] + for f in API_FILES: + functions.extend(find_functions(f, tagname)) + dfunctions = [(api_dict[func.name][0], func) for func in functions] + dfunctions.sort() + return [a[1] for a in dfunctions] + +def fullapi_hash(api_dicts): + """Given a list of api dicts defining the numpy C API, compute a checksum + of the list of items in the API (as a string).""" + a = [] + for d in api_dicts: + for name, data in order_dict(d): + a.extend(name) + a.extend(','.join(map(str, data))) + + return hashlib.md5(''.join(a).encode('ascii')).hexdigest() + +# To parse strings like 'hex = checksum' where hex is e.g. 0x1234567F and +# checksum a 128 bits md5 checksum (hex format as well) +VERRE = re.compile(r'(^0x[\da-f]{8})\s*=\s*([\da-f]{32})') + +def get_versions_hash(): + d = [] + + file = os.path.join(os.path.dirname(__file__), 'cversions.txt') + fid = open(file, 'r') + try: + for line in fid: + m = VERRE.match(line) + if m: + d.append((int(m.group(1), 16), m.group(2))) + finally: + fid.close() + + return dict(d) + +def main(): + tagname = sys.argv[1] + order_file = sys.argv[2] + functions = get_api_functions(tagname, order_file) + m = hashlib.md5(tagname) + for func in functions: + print(func) + ah = func.api_hash() + m.update(ah) + print(hex(int(ah, 16))) + print(hex(int(m.hexdigest()[:8], 16))) + +if __name__ == '__main__': + main() diff --git a/contrib/python/numpy/py2/numpy/core/code_generators/generate_ufunc_api.py b/contrib/python/numpy/py2/numpy/core/code_generators/generate_ufunc_api.py new file mode 100644 index 0000000000..1b0143e88b --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/code_generators/generate_ufunc_api.py @@ -0,0 +1,211 @@ +from __future__ import division, print_function + +import os +import genapi + +import numpy_api + +from genapi import \ + TypeApi, GlobalVarApi, FunctionApi, BoolValuesApi + +h_template = r""" +#ifdef _UMATHMODULE + +extern NPY_NO_EXPORT PyTypeObject PyUFunc_Type; + +%s + +#else + +#if defined(PY_UFUNC_UNIQUE_SYMBOL) +#define PyUFunc_API PY_UFUNC_UNIQUE_SYMBOL +#endif + +#if defined(NO_IMPORT) || defined(NO_IMPORT_UFUNC) +extern void **PyUFunc_API; +#else +#if defined(PY_UFUNC_UNIQUE_SYMBOL) +void **PyUFunc_API; +#else +static void **PyUFunc_API=NULL; +#endif +#endif + +%s + +static NPY_INLINE int +_import_umath(void) +{ + PyObject *numpy = PyImport_ImportModule("numpy.core._multiarray_umath"); + PyObject *c_api = NULL; + + if (numpy == NULL) { + PyErr_SetString(PyExc_ImportError, + "numpy.core._multiarray_umath failed to import"); + return -1; + } + c_api = PyObject_GetAttrString(numpy, "_UFUNC_API"); + Py_DECREF(numpy); + if (c_api == NULL) { + PyErr_SetString(PyExc_AttributeError, "_UFUNC_API not found"); + return -1; + } + +#if PY_VERSION_HEX >= 0x03000000 + if (!PyCapsule_CheckExact(c_api)) { + PyErr_SetString(PyExc_RuntimeError, "_UFUNC_API is not PyCapsule object"); + Py_DECREF(c_api); + return -1; + } + PyUFunc_API = (void **)PyCapsule_GetPointer(c_api, NULL); +#else + if (!PyCObject_Check(c_api)) { + PyErr_SetString(PyExc_RuntimeError, "_UFUNC_API is not PyCObject object"); + Py_DECREF(c_api); + return -1; + } + PyUFunc_API = (void **)PyCObject_AsVoidPtr(c_api); +#endif + Py_DECREF(c_api); + if (PyUFunc_API == NULL) { + PyErr_SetString(PyExc_RuntimeError, "_UFUNC_API is NULL pointer"); + return -1; + } + return 0; +} + +#if PY_VERSION_HEX >= 0x03000000 +#define NUMPY_IMPORT_UMATH_RETVAL NULL +#else +#define NUMPY_IMPORT_UMATH_RETVAL +#endif + +#define import_umath() \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError,\ + "numpy.core.umath failed to import");\ + return NUMPY_IMPORT_UMATH_RETVAL;\ + }\ + } while(0) + +#define import_umath1(ret) \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError,\ + "numpy.core.umath failed to import");\ + return ret;\ + }\ + } while(0) + +#define import_umath2(ret, msg) \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError, msg);\ + return ret;\ + }\ + } while(0) + +#define import_ufunc() \ + do {\ + UFUNC_NOFPE\ + if (_import_umath() < 0) {\ + PyErr_Print();\ + PyErr_SetString(PyExc_ImportError,\ + "numpy.core.umath failed to import");\ + }\ + } while(0) + +#endif +""" + +c_template = r""" +/* These pointers will be stored in the C-object for use in other + extension modules +*/ + +void *PyUFunc_API[] = { +%s +}; +""" + +def generate_api(output_dir, force=False): + basename = 'ufunc_api' + + h_file = os.path.join(output_dir, '__%s.h' % basename) + c_file = os.path.join(output_dir, '__%s.c' % basename) + d_file = os.path.join(output_dir, '%s.txt' % basename) + targets = (h_file, c_file, d_file) + + sources = ['ufunc_api_order.txt'] + + if (not force and not genapi.should_rebuild(targets, sources + [__file__])): + return targets + else: + do_generate_api(targets, sources) + + return targets + +def do_generate_api(targets, sources): + header_file = targets[0] + c_file = targets[1] + doc_file = targets[2] + + ufunc_api_index = genapi.merge_api_dicts(( + numpy_api.ufunc_funcs_api, + numpy_api.ufunc_types_api)) + genapi.check_api_dict(ufunc_api_index) + + ufunc_api_list = genapi.get_api_functions('UFUNC_API', numpy_api.ufunc_funcs_api) + + # Create dict name -> *Api instance + ufunc_api_dict = {} + api_name = 'PyUFunc_API' + for f in ufunc_api_list: + name = f.name + index = ufunc_api_index[name][0] + annotations = ufunc_api_index[name][1:] + ufunc_api_dict[name] = FunctionApi(f.name, index, annotations, + f.return_type, f.args, api_name) + + for name, val in numpy_api.ufunc_types_api.items(): + index = val[0] + ufunc_api_dict[name] = TypeApi(name, index, 'PyTypeObject', api_name) + + # set up object API + module_list = [] + extension_list = [] + init_list = [] + + for name, index in genapi.order_dict(ufunc_api_index): + api_item = ufunc_api_dict[name] + extension_list.append(api_item.define_from_array_api_string()) + init_list.append(api_item.array_api_define()) + module_list.append(api_item.internal_define()) + + # Write to header + s = h_template % ('\n'.join(module_list), '\n'.join(extension_list)) + genapi.write_file(header_file, s) + + # Write to c-code + s = c_template % ',\n'.join(init_list) + genapi.write_file(c_file, s) + + # Write to documentation + s = ''' +================= +NumPy Ufunc C-API +================= +''' + for func in ufunc_api_list: + s += func.to_ReST() + s += '\n\n' + genapi.write_file(doc_file, s) + + return targets diff --git a/contrib/python/numpy/py2/numpy/core/code_generators/generate_umath.py b/contrib/python/numpy/py2/numpy/core/code_generators/generate_umath.py new file mode 100644 index 0000000000..daf5949d06 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/code_generators/generate_umath.py @@ -0,0 +1,1145 @@ +from __future__ import division, print_function + +import os +import re +import struct +import sys +import textwrap + +sys.path.insert(0, os.path.dirname(__file__)) +import ufunc_docstrings as docstrings +sys.path.pop(0) + +Zero = "PyInt_FromLong(0)" +One = "PyInt_FromLong(1)" +True_ = "(Py_INCREF(Py_True), Py_True)" +False_ = "(Py_INCREF(Py_False), Py_False)" +None_ = object() +AllOnes = "PyInt_FromLong(-1)" +MinusInfinity = 'PyFloat_FromDouble(-NPY_INFINITY)' +ReorderableNone = "(Py_INCREF(Py_None), Py_None)" + +# Sentinel value to specify using the full type description in the +# function name +class FullTypeDescr(object): + pass + +class FuncNameSuffix(object): + """Stores the suffix to append when generating functions names. + """ + def __init__(self, suffix): + self.suffix = suffix + +class TypeDescription(object): + """Type signature for a ufunc. + + Attributes + ---------- + type : str + Character representing the nominal type. + func_data : str or None or FullTypeDescr or FuncNameSuffix, optional + The string representing the expression to insert into the data + array, if any. + in_ : str or None, optional + The typecode(s) of the inputs. + out : str or None, optional + The typecode(s) of the outputs. + astype : dict or None, optional + If astype['x'] is 'y', uses PyUFunc_x_x_As_y_y/PyUFunc_xx_x_As_yy_y + instead of PyUFunc_x_x/PyUFunc_xx_x. + simd: list + Available SIMD ufunc loops, dispatched at runtime in specified order + Currently only supported for simples types (see make_arrays) + """ + def __init__(self, type, f=None, in_=None, out=None, astype=None, simd=None): + self.type = type + self.func_data = f + if astype is None: + astype = {} + self.astype_dict = astype + if in_ is not None: + in_ = in_.replace('P', type) + self.in_ = in_ + if out is not None: + out = out.replace('P', type) + self.out = out + self.simd = simd + + def finish_signature(self, nin, nout): + if self.in_ is None: + self.in_ = self.type * nin + assert len(self.in_) == nin + if self.out is None: + self.out = self.type * nout + assert len(self.out) == nout + self.astype = self.astype_dict.get(self.type, None) + +_fdata_map = dict(e='npy_%sf', f='npy_%sf', d='npy_%s', g='npy_%sl', + F='nc_%sf', D='nc_%s', G='nc_%sl') +def build_func_data(types, f): + func_data = [_fdata_map.get(t, '%s') % (f,) for t in types] + return func_data + +def TD(types, f=None, astype=None, in_=None, out=None, simd=None): + if f is not None: + if isinstance(f, str): + func_data = build_func_data(types, f) + elif len(f) != len(types): + raise ValueError("Number of types and f do not match") + else: + func_data = f + else: + func_data = (None,) * len(types) + if isinstance(in_, str): + in_ = (in_,) * len(types) + elif in_ is None: + in_ = (None,) * len(types) + elif len(in_) != len(types): + raise ValueError("Number of types and inputs do not match") + if isinstance(out, str): + out = (out,) * len(types) + elif out is None: + out = (None,) * len(types) + elif len(out) != len(types): + raise ValueError("Number of types and outputs do not match") + tds = [] + for t, fd, i, o in zip(types, func_data, in_, out): + # [(simd-name, list of types)] + if simd is not None: + simdt = [k for k, v in simd if t in v] + else: + simdt = [] + tds.append(TypeDescription(t, f=fd, in_=i, out=o, astype=astype, simd=simdt)) + return tds + +class Ufunc(object): + """Description of a ufunc. + + Attributes + ---------- + nin : number of input arguments + nout : number of output arguments + identity : identity element for a two-argument function + docstring : docstring for the ufunc + type_descriptions : list of TypeDescription objects + """ + def __init__(self, nin, nout, identity, docstring, typereso, + *type_descriptions, **kwargs): + self.nin = nin + self.nout = nout + if identity is None: + identity = None_ + self.identity = identity + self.docstring = docstring + self.typereso = typereso + self.type_descriptions = [] + self.signature = kwargs.pop('signature', None) + for td in type_descriptions: + self.type_descriptions.extend(td) + for td in self.type_descriptions: + td.finish_signature(self.nin, self.nout) + if kwargs: + raise ValueError('unknown kwargs %r' % str(kwargs)) + +# String-handling utilities to avoid locale-dependence. + +import string +if sys.version_info[0] < 3: + UPPER_TABLE = string.maketrans(string.ascii_lowercase, + string.ascii_uppercase) +else: + UPPER_TABLE = bytes.maketrans(bytes(string.ascii_lowercase, "ascii"), + bytes(string.ascii_uppercase, "ascii")) + +def english_upper(s): + """ Apply English case rules to convert ASCII strings to all upper case. + + This is an internal utility function to replace calls to str.upper() such + that we can avoid changing behavior with changing locales. In particular, + Turkish has distinct dotted and dotless variants of the Latin letter "I" in + both lowercase and uppercase. Thus, "i".upper() != "I" in a "tr" locale. + + Parameters + ---------- + s : str + + Returns + ------- + uppered : str + + Examples + -------- + >>> from numpy.lib.utils import english_upper + >>> s = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_' + >>> english_upper(s) + 'ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_' + >>> english_upper('') + '' + """ + uppered = s.translate(UPPER_TABLE) + return uppered + + +#each entry in defdict is a Ufunc object. + +#name: [string of chars for which it is defined, +# string of characters using func interface, +# tuple of strings giving funcs for data, +# (in, out), or (instr, outstr) giving the signature as character codes, +# identity, +# docstring, +# output specification (optional) +# ] + +chartoname = {'?': 'bool', + 'b': 'byte', + 'B': 'ubyte', + 'h': 'short', + 'H': 'ushort', + 'i': 'int', + 'I': 'uint', + 'l': 'long', + 'L': 'ulong', + 'q': 'longlong', + 'Q': 'ulonglong', + 'e': 'half', + 'f': 'float', + 'd': 'double', + 'g': 'longdouble', + 'F': 'cfloat', + 'D': 'cdouble', + 'G': 'clongdouble', + 'M': 'datetime', + 'm': 'timedelta', + 'O': 'OBJECT', + # '.' is like 'O', but calls a method of the object instead + # of a function + 'P': 'OBJECT', + } + +all = '?bBhHiIlLqQefdgFDGOMm' +O = 'O' +P = 'P' +ints = 'bBhHiIlLqQ' +times = 'Mm' +timedeltaonly = 'm' +intsO = ints + O +bints = '?' + ints +bintsO = bints + O +flts = 'efdg' +fltsO = flts + O +fltsP = flts + P +cmplx = 'FDG' +cmplxO = cmplx + O +cmplxP = cmplx + P +inexact = flts + cmplx +inexactvec = 'fd' +noint = inexact+O +nointP = inexact+P +allP = bints+times+flts+cmplxP +nobool = all[1:] +noobj = all[:-3]+all[-2:] +nobool_or_obj = all[1:-3]+all[-2:] +nobool_or_datetime = all[1:-2]+all[-1:] +intflt = ints+flts +intfltcmplx = ints+flts+cmplx +nocmplx = bints+times+flts +nocmplxO = nocmplx+O +nocmplxP = nocmplx+P +notimes_or_obj = bints + inexact +nodatetime_or_obj = bints + inexact + +# Find which code corresponds to int64. +int64 = '' +uint64 = '' +for code in 'bhilq': + if struct.calcsize(code) == 8: + int64 = code + uint64 = english_upper(code) + break + +# This dictionary describes all the ufunc implementations, generating +# all the function names and their corresponding ufunc signatures. TD is +# an object which expands a list of character codes into an array of +# TypeDescriptions. +defdict = { +'add': + Ufunc(2, 1, Zero, + docstrings.get('numpy.core.umath.add'), + 'PyUFunc_AdditionTypeResolver', + TD(notimes_or_obj, simd=[('avx2', ints)]), + [TypeDescription('M', FullTypeDescr, 'Mm', 'M'), + TypeDescription('m', FullTypeDescr, 'mm', 'm'), + TypeDescription('M', FullTypeDescr, 'mM', 'M'), + ], + TD(O, f='PyNumber_Add'), + ), +'subtract': + Ufunc(2, 1, None, # Zero is only a unit to the right, not the left + docstrings.get('numpy.core.umath.subtract'), + 'PyUFunc_SubtractionTypeResolver', + TD(notimes_or_obj, simd=[('avx2', ints)]), + [TypeDescription('M', FullTypeDescr, 'Mm', 'M'), + TypeDescription('m', FullTypeDescr, 'mm', 'm'), + TypeDescription('M', FullTypeDescr, 'MM', 'm'), + ], + TD(O, f='PyNumber_Subtract'), + ), +'multiply': + Ufunc(2, 1, One, + docstrings.get('numpy.core.umath.multiply'), + 'PyUFunc_MultiplicationTypeResolver', + TD(notimes_or_obj, simd=[('avx2', ints)]), + [TypeDescription('m', FullTypeDescr, 'mq', 'm'), + TypeDescription('m', FullTypeDescr, 'qm', 'm'), + TypeDescription('m', FullTypeDescr, 'md', 'm'), + TypeDescription('m', FullTypeDescr, 'dm', 'm'), + ], + TD(O, f='PyNumber_Multiply'), + ), +'divide': + Ufunc(2, 1, None, # One is only a unit to the right, not the left + docstrings.get('numpy.core.umath.divide'), + 'PyUFunc_MixedDivisionTypeResolver', + TD(intfltcmplx), + [TypeDescription('m', FullTypeDescr, 'mq', 'm'), + TypeDescription('m', FullTypeDescr, 'md', 'm'), + TypeDescription('m', FullTypeDescr, 'mm', 'd'), + ], + TD(O, f='PyNumber_Divide'), + ), +'floor_divide': + Ufunc(2, 1, None, # One is only a unit to the right, not the left + docstrings.get('numpy.core.umath.floor_divide'), + 'PyUFunc_DivisionTypeResolver', + TD(intfltcmplx), + [TypeDescription('m', FullTypeDescr, 'mq', 'm'), + TypeDescription('m', FullTypeDescr, 'md', 'm'), + TypeDescription('m', FullTypeDescr, 'mm', 'q'), + ], + TD(O, f='PyNumber_FloorDivide'), + ), +'true_divide': + Ufunc(2, 1, None, # One is only a unit to the right, not the left + docstrings.get('numpy.core.umath.true_divide'), + 'PyUFunc_TrueDivisionTypeResolver', + TD(flts+cmplx), + [TypeDescription('m', FullTypeDescr, 'mq', 'm'), + TypeDescription('m', FullTypeDescr, 'md', 'm'), + TypeDescription('m', FullTypeDescr, 'mm', 'd'), + ], + TD(O, f='PyNumber_TrueDivide'), + ), +'conjugate': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.conjugate'), + None, + TD(ints+flts+cmplx, simd=[('avx2', ints)]), + TD(P, f='conjugate'), + ), +'fmod': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.fmod'), + None, + TD(ints), + TD(flts, f='fmod', astype={'e':'f'}), + TD(P, f='fmod'), + ), +'square': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.square'), + None, + TD(ints+inexact, simd=[('avx2', ints)]), + TD(O, f='Py_square'), + ), +'reciprocal': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.reciprocal'), + None, + TD(ints+inexact, simd=[('avx2', ints)]), + TD(O, f='Py_reciprocal'), + ), +# This is no longer used as numpy.ones_like, however it is +# still used by some internal calls. +'_ones_like': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath._ones_like'), + 'PyUFunc_OnesLikeTypeResolver', + TD(noobj), + TD(O, f='Py_get_one'), + ), +'power': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.power'), + None, + TD(ints), + TD(inexact, f='pow', astype={'e':'f'}), + TD(O, f='npy_ObjectPower'), + ), +'float_power': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.float_power'), + None, + TD('dgDG', f='pow'), + ), +'absolute': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.absolute'), + 'PyUFunc_AbsoluteTypeResolver', + TD(bints+flts+timedeltaonly), + TD(cmplx, out=('f', 'd', 'g')), + TD(O, f='PyNumber_Absolute'), + ), +'_arg': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath._arg'), + None, + TD(cmplx, out=('f', 'd', 'g')), + ), +'negative': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.negative'), + 'PyUFunc_NegativeTypeResolver', + TD(bints+flts+timedeltaonly, simd=[('avx2', ints)]), + TD(cmplx, f='neg'), + TD(O, f='PyNumber_Negative'), + ), +'positive': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.positive'), + 'PyUFunc_SimpleUnaryOperationTypeResolver', + TD(ints+flts+timedeltaonly), + TD(cmplx, f='pos'), + TD(O, f='PyNumber_Positive'), + ), +'sign': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.sign'), + 'PyUFunc_SimpleUnaryOperationTypeResolver', + TD(nobool_or_datetime), + ), +'greater': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.greater'), + 'PyUFunc_SimpleBinaryComparisonTypeResolver', + TD(all, out='?', simd=[('avx2', ints)]), + [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + ), +'greater_equal': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.greater_equal'), + 'PyUFunc_SimpleBinaryComparisonTypeResolver', + TD(all, out='?', simd=[('avx2', ints)]), + [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + ), +'less': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.less'), + 'PyUFunc_SimpleBinaryComparisonTypeResolver', + TD(all, out='?', simd=[('avx2', ints)]), + [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + ), +'less_equal': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.less_equal'), + 'PyUFunc_SimpleBinaryComparisonTypeResolver', + TD(all, out='?', simd=[('avx2', ints)]), + [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + ), +'equal': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.equal'), + 'PyUFunc_SimpleBinaryComparisonTypeResolver', + TD(all, out='?', simd=[('avx2', ints)]), + [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + ), +'not_equal': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.not_equal'), + 'PyUFunc_SimpleBinaryComparisonTypeResolver', + TD(all, out='?', simd=[('avx2', ints)]), + [TypeDescription('O', FullTypeDescr, 'OO', 'O')], + ), +'logical_and': + Ufunc(2, 1, True_, + docstrings.get('numpy.core.umath.logical_and'), + 'PyUFunc_SimpleBinaryComparisonTypeResolver', + TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]), + TD(O, f='npy_ObjectLogicalAnd'), + ), +'logical_not': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.logical_not'), + None, + TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]), + TD(O, f='npy_ObjectLogicalNot'), + ), +'logical_or': + Ufunc(2, 1, False_, + docstrings.get('numpy.core.umath.logical_or'), + 'PyUFunc_SimpleBinaryComparisonTypeResolver', + TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]), + TD(O, f='npy_ObjectLogicalOr'), + ), +'logical_xor': + Ufunc(2, 1, False_, + docstrings.get('numpy.core.umath.logical_xor'), + 'PyUFunc_SimpleBinaryComparisonTypeResolver', + TD(nodatetime_or_obj, out='?'), + TD(P, f='logical_xor'), + ), +'maximum': + Ufunc(2, 1, ReorderableNone, + docstrings.get('numpy.core.umath.maximum'), + 'PyUFunc_SimpleBinaryOperationTypeResolver', + TD(noobj), + TD(O, f='npy_ObjectMax') + ), +'minimum': + Ufunc(2, 1, ReorderableNone, + docstrings.get('numpy.core.umath.minimum'), + 'PyUFunc_SimpleBinaryOperationTypeResolver', + TD(noobj), + TD(O, f='npy_ObjectMin') + ), +'fmax': + Ufunc(2, 1, ReorderableNone, + docstrings.get('numpy.core.umath.fmax'), + 'PyUFunc_SimpleBinaryOperationTypeResolver', + TD(noobj), + TD(O, f='npy_ObjectMax') + ), +'fmin': + Ufunc(2, 1, ReorderableNone, + docstrings.get('numpy.core.umath.fmin'), + 'PyUFunc_SimpleBinaryOperationTypeResolver', + TD(noobj), + TD(O, f='npy_ObjectMin') + ), +'logaddexp': + Ufunc(2, 1, MinusInfinity, + docstrings.get('numpy.core.umath.logaddexp'), + None, + TD(flts, f="logaddexp", astype={'e':'f'}) + ), +'logaddexp2': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.logaddexp2'), + None, + TD(flts, f="logaddexp2", astype={'e':'f'}) + ), +'bitwise_and': + Ufunc(2, 1, AllOnes, + docstrings.get('numpy.core.umath.bitwise_and'), + None, + TD(bints, simd=[('avx2', ints)]), + TD(O, f='PyNumber_And'), + ), +'bitwise_or': + Ufunc(2, 1, Zero, + docstrings.get('numpy.core.umath.bitwise_or'), + None, + TD(bints, simd=[('avx2', ints)]), + TD(O, f='PyNumber_Or'), + ), +'bitwise_xor': + Ufunc(2, 1, Zero, + docstrings.get('numpy.core.umath.bitwise_xor'), + None, + TD(bints, simd=[('avx2', ints)]), + TD(O, f='PyNumber_Xor'), + ), +'invert': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.invert'), + None, + TD(bints, simd=[('avx2', ints)]), + TD(O, f='PyNumber_Invert'), + ), +'left_shift': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.left_shift'), + None, + TD(ints, simd=[('avx2', ints)]), + TD(O, f='PyNumber_Lshift'), + ), +'right_shift': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.right_shift'), + None, + TD(ints, simd=[('avx2', ints)]), + TD(O, f='PyNumber_Rshift'), + ), +'heaviside': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.heaviside'), + None, + TD(flts, f='heaviside', astype={'e':'f'}), + ), +'degrees': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.degrees'), + None, + TD(fltsP, f='degrees', astype={'e':'f'}), + ), +'rad2deg': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.rad2deg'), + None, + TD(fltsP, f='rad2deg', astype={'e':'f'}), + ), +'radians': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.radians'), + None, + TD(fltsP, f='radians', astype={'e':'f'}), + ), +'deg2rad': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.deg2rad'), + None, + TD(fltsP, f='deg2rad', astype={'e':'f'}), + ), +'arccos': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.arccos'), + None, + TD(inexact, f='acos', astype={'e':'f'}), + TD(P, f='arccos'), + ), +'arccosh': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.arccosh'), + None, + TD(inexact, f='acosh', astype={'e':'f'}), + TD(P, f='arccosh'), + ), +'arcsin': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.arcsin'), + None, + TD(inexact, f='asin', astype={'e':'f'}), + TD(P, f='arcsin'), + ), +'arcsinh': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.arcsinh'), + None, + TD(inexact, f='asinh', astype={'e':'f'}), + TD(P, f='arcsinh'), + ), +'arctan': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.arctan'), + None, + TD(inexact, f='atan', astype={'e':'f'}), + TD(P, f='arctan'), + ), +'arctanh': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.arctanh'), + None, + TD(inexact, f='atanh', astype={'e':'f'}), + TD(P, f='arctanh'), + ), +'cos': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.cos'), + None, + TD(inexact, f='cos', astype={'e':'f'}), + TD(P, f='cos'), + ), +'sin': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.sin'), + None, + TD(inexact, f='sin', astype={'e':'f'}), + TD(P, f='sin'), + ), +'tan': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.tan'), + None, + TD(inexact, f='tan', astype={'e':'f'}), + TD(P, f='tan'), + ), +'cosh': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.cosh'), + None, + TD(inexact, f='cosh', astype={'e':'f'}), + TD(P, f='cosh'), + ), +'sinh': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.sinh'), + None, + TD(inexact, f='sinh', astype={'e':'f'}), + TD(P, f='sinh'), + ), +'tanh': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.tanh'), + None, + TD(inexact, f='tanh', astype={'e':'f'}), + TD(P, f='tanh'), + ), +'exp': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.exp'), + None, + TD(inexact, f='exp', astype={'e':'f'}), + TD(P, f='exp'), + ), +'exp2': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.exp2'), + None, + TD(inexact, f='exp2', astype={'e':'f'}), + TD(P, f='exp2'), + ), +'expm1': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.expm1'), + None, + TD(inexact, f='expm1', astype={'e':'f'}), + TD(P, f='expm1'), + ), +'log': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.log'), + None, + TD(inexact, f='log', astype={'e':'f'}), + TD(P, f='log'), + ), +'log2': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.log2'), + None, + TD(inexact, f='log2', astype={'e':'f'}), + TD(P, f='log2'), + ), +'log10': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.log10'), + None, + TD(inexact, f='log10', astype={'e':'f'}), + TD(P, f='log10'), + ), +'log1p': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.log1p'), + None, + TD(inexact, f='log1p', astype={'e':'f'}), + TD(P, f='log1p'), + ), +'sqrt': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.sqrt'), + None, + TD('e', f='sqrt', astype={'e':'f'}), + TD(inexactvec), + TD(inexact, f='sqrt', astype={'e':'f'}), + TD(P, f='sqrt'), + ), +'cbrt': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.cbrt'), + None, + TD(flts, f='cbrt', astype={'e':'f'}), + TD(P, f='cbrt'), + ), +'ceil': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.ceil'), + None, + TD(flts, f='ceil', astype={'e':'f'}), + TD(P, f='ceil'), + ), +'trunc': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.trunc'), + None, + TD(flts, f='trunc', astype={'e':'f'}), + TD(P, f='trunc'), + ), +'fabs': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.fabs'), + None, + TD(flts, f='fabs', astype={'e':'f'}), + TD(P, f='fabs'), + ), +'floor': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.floor'), + None, + TD(flts, f='floor', astype={'e':'f'}), + TD(P, f='floor'), + ), +'rint': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.rint'), + None, + TD(inexact, f='rint', astype={'e':'f'}), + TD(P, f='rint'), + ), +'arctan2': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.arctan2'), + None, + TD(flts, f='atan2', astype={'e':'f'}), + TD(P, f='arctan2'), + ), +'remainder': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.remainder'), + 'PyUFunc_RemainderTypeResolver', + TD(intflt), + [TypeDescription('m', FullTypeDescr, 'mm', 'm')], + TD(O, f='PyNumber_Remainder'), + ), +'divmod': + Ufunc(2, 2, None, + docstrings.get('numpy.core.umath.divmod'), + 'PyUFunc_DivmodTypeResolver', + TD(intflt), + [TypeDescription('m', FullTypeDescr, 'mm', 'qm')], + # TD(O, f='PyNumber_Divmod'), # gh-9730 + ), +'hypot': + Ufunc(2, 1, Zero, + docstrings.get('numpy.core.umath.hypot'), + None, + TD(flts, f='hypot', astype={'e':'f'}), + TD(P, f='hypot'), + ), +'isnan': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.isnan'), + None, + TD(inexact, out='?'), + ), +'isnat': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.isnat'), + 'PyUFunc_IsNaTTypeResolver', + TD(times, out='?'), + ), +'isinf': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.isinf'), + None, + TD(inexact, out='?'), + ), +'isfinite': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.isfinite'), + None, + TD(inexact, out='?'), + ), +'signbit': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.signbit'), + None, + TD(flts, out='?'), + ), +'copysign': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.copysign'), + None, + TD(flts), + ), +'nextafter': + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.nextafter'), + None, + TD(flts), + ), +'spacing': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.spacing'), + None, + TD(flts), + ), +'modf': + Ufunc(1, 2, None, + docstrings.get('numpy.core.umath.modf'), + None, + TD(flts), + ), +'ldexp' : + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.ldexp'), + None, + [TypeDescription('e', None, 'ei', 'e'), + TypeDescription('f', None, 'fi', 'f'), + TypeDescription('e', FuncNameSuffix('long'), 'el', 'e'), + TypeDescription('f', FuncNameSuffix('long'), 'fl', 'f'), + TypeDescription('d', None, 'di', 'd'), + TypeDescription('d', FuncNameSuffix('long'), 'dl', 'd'), + TypeDescription('g', None, 'gi', 'g'), + TypeDescription('g', FuncNameSuffix('long'), 'gl', 'g'), + ], + ), +'frexp' : + Ufunc(1, 2, None, + docstrings.get('numpy.core.umath.frexp'), + None, + [TypeDescription('e', None, 'e', 'ei'), + TypeDescription('f', None, 'f', 'fi'), + TypeDescription('d', None, 'd', 'di'), + TypeDescription('g', None, 'g', 'gi'), + ], + ), +'gcd' : + Ufunc(2, 1, Zero, + docstrings.get('numpy.core.umath.gcd'), + "PyUFunc_SimpleBinaryOperationTypeResolver", + TD(ints), + TD('O', f='npy_ObjectGCD'), + ), +'lcm' : + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.lcm'), + "PyUFunc_SimpleBinaryOperationTypeResolver", + TD(ints), + TD('O', f='npy_ObjectLCM'), + ), +'matmul' : + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.matmul'), + "PyUFunc_SimpleBinaryOperationTypeResolver", + TD(notimes_or_obj), + TD(O), + signature='(n?,k),(k,m?)->(n?,m?)', + ), +} + +if sys.version_info[0] >= 3: + # Will be aliased to true_divide in umathmodule.c.src:InitOtherOperators + del defdict['divide'] + +def indent(st, spaces): + indentation = ' '*spaces + indented = indentation + st.replace('\n', '\n'+indentation) + # trim off any trailing spaces + indented = re.sub(r' +$', r'', indented) + return indented + +chartotype1 = {'e': 'e_e', + 'f': 'f_f', + 'd': 'd_d', + 'g': 'g_g', + 'F': 'F_F', + 'D': 'D_D', + 'G': 'G_G', + 'O': 'O_O', + 'P': 'O_O_method'} + +chartotype2 = {'e': 'ee_e', + 'f': 'ff_f', + 'd': 'dd_d', + 'g': 'gg_g', + 'F': 'FF_F', + 'D': 'DD_D', + 'G': 'GG_G', + 'O': 'OO_O', + 'P': 'OO_O_method'} +#for each name +# 1) create functions, data, and signature +# 2) fill in functions and data in InitOperators +# 3) add function. + +def make_arrays(funcdict): + # functions array contains an entry for every type implemented NULL + # should be placed where PyUfunc_ style function will be filled in + # later + code1list = [] + code2list = [] + names = sorted(funcdict.keys()) + for name in names: + uf = funcdict[name] + funclist = [] + datalist = [] + siglist = [] + k = 0 + sub = 0 + + for t in uf.type_descriptions: + if t.func_data is FullTypeDescr: + tname = english_upper(chartoname[t.type]) + datalist.append('(void *)NULL') + funclist.append( + '%s_%s_%s_%s' % (tname, t.in_, t.out, name)) + elif isinstance(t.func_data, FuncNameSuffix): + datalist.append('(void *)NULL') + tname = english_upper(chartoname[t.type]) + funclist.append( + '%s_%s_%s' % (tname, name, t.func_data.suffix)) + elif t.func_data is None: + datalist.append('(void *)NULL') + tname = english_upper(chartoname[t.type]) + funclist.append('%s_%s' % (tname, name)) + if t.simd is not None: + for vt in t.simd: + code2list.append(textwrap.dedent("""\ + #ifdef HAVE_ATTRIBUTE_TARGET_{ISA} + if (npy_cpu_supports("{isa}")) {{ + {fname}_functions[{idx}] = {type}_{fname}_{isa}; + }} + #endif + """).format( + ISA=vt.upper(), isa=vt, + fname=name, type=tname, idx=k + )) + else: + funclist.append('NULL') + if (uf.nin, uf.nout) == (2, 1): + thedict = chartotype2 + elif (uf.nin, uf.nout) == (1, 1): + thedict = chartotype1 + else: + raise ValueError("Could not handle {}[{}]".format(name, t.type)) + + astype = '' + if not t.astype is None: + astype = '_As_%s' % thedict[t.astype] + astr = ('%s_functions[%d] = PyUFunc_%s%s;' % + (name, k, thedict[t.type], astype)) + code2list.append(astr) + if t.type == 'O': + astr = ('%s_data[%d] = (void *) %s;' % + (name, k, t.func_data)) + code2list.append(astr) + datalist.append('(void *)NULL') + elif t.type == 'P': + datalist.append('(void *)"%s"' % t.func_data) + else: + astr = ('%s_data[%d] = (void *) %s;' % + (name, k, t.func_data)) + code2list.append(astr) + datalist.append('(void *)NULL') + #datalist.append('(void *)%s' % t.func_data) + sub += 1 + + for x in t.in_ + t.out: + siglist.append('NPY_%s' % (english_upper(chartoname[x]),)) + + k += 1 + + funcnames = ', '.join(funclist) + signames = ', '.join(siglist) + datanames = ', '.join(datalist) + code1list.append("static PyUFuncGenericFunction %s_functions[] = {%s};" + % (name, funcnames)) + code1list.append("static void * %s_data[] = {%s};" + % (name, datanames)) + code1list.append("static char %s_signatures[] = {%s};" + % (name, signames)) + return "\n".join(code1list), "\n".join(code2list) + +def make_ufuncs(funcdict): + code3list = [] + names = sorted(funcdict.keys()) + for name in names: + uf = funcdict[name] + mlist = [] + docstring = textwrap.dedent(uf.docstring).strip() + if sys.version_info[0] < 3: + docstring = docstring.encode('string-escape') + docstring = docstring.replace(r'"', r'\"') + else: + docstring = docstring.encode('unicode-escape').decode('ascii') + docstring = docstring.replace(r'"', r'\"') + # XXX: I don't understand why the following replace is not + # necessary in the python 2 case. + docstring = docstring.replace(r"'", r"\'") + # Split the docstring because some compilers (like MS) do not like big + # string literal in C code. We split at endlines because textwrap.wrap + # do not play well with \n + docstring = '\\n\"\"'.join(docstring.split(r"\n")) + if uf.signature is None: + sig = "NULL" + else: + sig = '"{}"'.format(uf.signature) + fmt = textwrap.dedent("""\ + identity = {identity_expr}; + if ({has_identity} && identity == NULL) {{ + return -1; + }} + f = PyUFunc_FromFuncAndDataAndSignatureAndIdentity( + {name}_functions, {name}_data, {name}_signatures, {nloops}, + {nin}, {nout}, {identity}, "{name}", + "{doc}", 0, {sig}, identity + ); + if ({has_identity}) {{ + Py_DECREF(identity); + }} + if (f == NULL) {{ + return -1; + }} + """) + args = dict( + name=name, nloops=len(uf.type_descriptions), + nin=uf.nin, nout=uf.nout, + has_identity='0' if uf.identity is None_ else '1', + identity='PyUFunc_IdentityValue', + identity_expr=uf.identity, + doc=docstring, + sig=sig, + ) + + # Only PyUFunc_None means don't reorder - we pass this using the old + # argument + if uf.identity is None_: + args['identity'] = 'PyUFunc_None' + args['identity_expr'] = 'NULL' + + mlist.append(fmt.format(**args)) + if uf.typereso is not None: + mlist.append( + r"((PyUFuncObject *)f)->type_resolver = &%s;" % uf.typereso) + mlist.append(r"""PyDict_SetItemString(dictionary, "%s", f);""" % name) + mlist.append(r"""Py_DECREF(f);""") + code3list.append('\n'.join(mlist)) + return '\n'.join(code3list) + + +def make_code(funcdict, filename): + code1, code2 = make_arrays(funcdict) + code3 = make_ufuncs(funcdict) + code2 = indent(code2, 4) + code3 = indent(code3, 4) + code = textwrap.dedent(r""" + + /** Warning this file is autogenerated!!! + + Please make changes to the code generator program (%s) + **/ + #include "cpuid.h" + #include "ufunc_object.h" + #include "ufunc_type_resolution.h" + #include "loops.h" + #include "matmul.h" + %s + + static int + InitOperators(PyObject *dictionary) { + PyObject *f, *identity; + + %s + %s + + return 0; + } + """) % (filename, code1, code2, code3) + return code + + +if __name__ == "__main__": + filename = __file__ + fid = open('__umath_generated.c', 'w') + code = make_code(defdict, filename) + fid.write(code) + fid.close() diff --git a/contrib/python/numpy/py2/numpy/core/code_generators/numpy_api.py b/contrib/python/numpy/py2/numpy/core/code_generators/numpy_api.py new file mode 100644 index 0000000000..a71c236fdd --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/code_generators/numpy_api.py @@ -0,0 +1,423 @@ +"""Here we define the exported functions, types, etc... which need to be +exported through a global C pointer. + +Each dictionary contains name -> index pair. + +Whenever you change one index, you break the ABI (and the ABI version number +should be incremented). Whenever you add an item to one of the dict, the API +needs to be updated in both setup_common.py and by adding an appropriate +entry to cversion.txt (generate the hash via "python cversions.py"). + +When adding a function, make sure to use the next integer not used as an index +(in case you use an existing index or jump, the build will stop and raise an +exception, so it should hopefully not get unnoticed). + +""" +from __future__ import division, absolute_import, print_function + +from code_generators.genapi import StealRef, NonNull + +# index, type +multiarray_global_vars = { + 'NPY_NUMUSERTYPES': (7, 'int'), + 'NPY_DEFAULT_ASSIGN_CASTING': (292, 'NPY_CASTING'), +} + +multiarray_scalar_bool_values = { + '_PyArrayScalar_BoolValues': (9,) +} + +# index, annotations +# please mark functions that have been checked to not need any annotations +multiarray_types_api = { + 'PyBigArray_Type': (1,), + 'PyArray_Type': (2,), + 'PyArrayDescr_Type': (3,), + 'PyArrayFlags_Type': (4,), + 'PyArrayIter_Type': (5,), + 'PyArrayMultiIter_Type': (6,), + 'PyBoolArrType_Type': (8,), + 'PyGenericArrType_Type': (10,), + 'PyNumberArrType_Type': (11,), + 'PyIntegerArrType_Type': (12,), + 'PySignedIntegerArrType_Type': (13,), + 'PyUnsignedIntegerArrType_Type': (14,), + 'PyInexactArrType_Type': (15,), + 'PyFloatingArrType_Type': (16,), + 'PyComplexFloatingArrType_Type': (17,), + 'PyFlexibleArrType_Type': (18,), + 'PyCharacterArrType_Type': (19,), + 'PyByteArrType_Type': (20,), + 'PyShortArrType_Type': (21,), + 'PyIntArrType_Type': (22,), + 'PyLongArrType_Type': (23,), + 'PyLongLongArrType_Type': (24,), + 'PyUByteArrType_Type': (25,), + 'PyUShortArrType_Type': (26,), + 'PyUIntArrType_Type': (27,), + 'PyULongArrType_Type': (28,), + 'PyULongLongArrType_Type': (29,), + 'PyFloatArrType_Type': (30,), + 'PyDoubleArrType_Type': (31,), + 'PyLongDoubleArrType_Type': (32,), + 'PyCFloatArrType_Type': (33,), + 'PyCDoubleArrType_Type': (34,), + 'PyCLongDoubleArrType_Type': (35,), + 'PyObjectArrType_Type': (36,), + 'PyStringArrType_Type': (37,), + 'PyUnicodeArrType_Type': (38,), + 'PyVoidArrType_Type': (39,), + # End 1.5 API + 'PyTimeIntegerArrType_Type': (214,), + 'PyDatetimeArrType_Type': (215,), + 'PyTimedeltaArrType_Type': (216,), + 'PyHalfArrType_Type': (217,), + 'NpyIter_Type': (218,), + # End 1.6 API +} + +#define NPY_NUMUSERTYPES (*(int *)PyArray_API[6]) +#define PyBoolArrType_Type (*(PyTypeObject *)PyArray_API[7]) +#define _PyArrayScalar_BoolValues ((PyBoolScalarObject *)PyArray_API[8]) + +multiarray_funcs_api = { + 'PyArray_GetNDArrayCVersion': (0,), + 'PyArray_SetNumericOps': (40,), + 'PyArray_GetNumericOps': (41,), + 'PyArray_INCREF': (42,), + 'PyArray_XDECREF': (43,), + 'PyArray_SetStringFunction': (44,), + 'PyArray_DescrFromType': (45,), + 'PyArray_TypeObjectFromType': (46,), + 'PyArray_Zero': (47,), + 'PyArray_One': (48,), + 'PyArray_CastToType': (49, StealRef(2), NonNull(2)), + 'PyArray_CastTo': (50,), + 'PyArray_CastAnyTo': (51,), + 'PyArray_CanCastSafely': (52,), + 'PyArray_CanCastTo': (53,), + 'PyArray_ObjectType': (54,), + 'PyArray_DescrFromObject': (55,), + 'PyArray_ConvertToCommonType': (56,), + 'PyArray_DescrFromScalar': (57,), + 'PyArray_DescrFromTypeObject': (58,), + 'PyArray_Size': (59,), + 'PyArray_Scalar': (60,), + 'PyArray_FromScalar': (61, StealRef(2)), + 'PyArray_ScalarAsCtype': (62,), + 'PyArray_CastScalarToCtype': (63,), + 'PyArray_CastScalarDirect': (64,), + 'PyArray_ScalarFromObject': (65,), + 'PyArray_GetCastFunc': (66,), + 'PyArray_FromDims': (67,), + 'PyArray_FromDimsAndDataAndDescr': (68, StealRef(3)), + 'PyArray_FromAny': (69, StealRef(2)), + 'PyArray_EnsureArray': (70, StealRef(1)), + 'PyArray_EnsureAnyArray': (71, StealRef(1)), + 'PyArray_FromFile': (72,), + 'PyArray_FromString': (73,), + 'PyArray_FromBuffer': (74,), + 'PyArray_FromIter': (75, StealRef(2)), + 'PyArray_Return': (76, StealRef(1)), + 'PyArray_GetField': (77, StealRef(2), NonNull(2)), + 'PyArray_SetField': (78, StealRef(2), NonNull(2)), + 'PyArray_Byteswap': (79,), + 'PyArray_Resize': (80,), + 'PyArray_MoveInto': (81,), + 'PyArray_CopyInto': (82,), + 'PyArray_CopyAnyInto': (83,), + 'PyArray_CopyObject': (84,), + 'PyArray_NewCopy': (85, NonNull(1)), + 'PyArray_ToList': (86,), + 'PyArray_ToString': (87,), + 'PyArray_ToFile': (88,), + 'PyArray_Dump': (89,), + 'PyArray_Dumps': (90,), + 'PyArray_ValidType': (91,), + 'PyArray_UpdateFlags': (92,), + 'PyArray_New': (93, NonNull(1)), + 'PyArray_NewFromDescr': (94, StealRef(2), NonNull([1, 2])), + 'PyArray_DescrNew': (95,), + 'PyArray_DescrNewFromType': (96,), + 'PyArray_GetPriority': (97,), + 'PyArray_IterNew': (98,), + 'PyArray_MultiIterNew': (99,), + 'PyArray_PyIntAsInt': (100,), + 'PyArray_PyIntAsIntp': (101,), + 'PyArray_Broadcast': (102,), + 'PyArray_FillObjectArray': (103,), + 'PyArray_FillWithScalar': (104,), + 'PyArray_CheckStrides': (105,), + 'PyArray_DescrNewByteorder': (106,), + 'PyArray_IterAllButAxis': (107,), + 'PyArray_CheckFromAny': (108, StealRef(2)), + 'PyArray_FromArray': (109, StealRef(2)), + 'PyArray_FromInterface': (110,), + 'PyArray_FromStructInterface': (111,), + 'PyArray_FromArrayAttr': (112,), + 'PyArray_ScalarKind': (113,), + 'PyArray_CanCoerceScalar': (114,), + 'PyArray_NewFlagsObject': (115,), + 'PyArray_CanCastScalar': (116,), + 'PyArray_CompareUCS4': (117,), + 'PyArray_RemoveSmallest': (118,), + 'PyArray_ElementStrides': (119,), + 'PyArray_Item_INCREF': (120,), + 'PyArray_Item_XDECREF': (121,), + 'PyArray_FieldNames': (122,), + 'PyArray_Transpose': (123,), + 'PyArray_TakeFrom': (124,), + 'PyArray_PutTo': (125,), + 'PyArray_PutMask': (126,), + 'PyArray_Repeat': (127,), + 'PyArray_Choose': (128,), + 'PyArray_Sort': (129,), + 'PyArray_ArgSort': (130,), + 'PyArray_SearchSorted': (131,), + 'PyArray_ArgMax': (132,), + 'PyArray_ArgMin': (133,), + 'PyArray_Reshape': (134,), + 'PyArray_Newshape': (135,), + 'PyArray_Squeeze': (136,), + 'PyArray_View': (137, StealRef(2)), + 'PyArray_SwapAxes': (138,), + 'PyArray_Max': (139,), + 'PyArray_Min': (140,), + 'PyArray_Ptp': (141,), + 'PyArray_Mean': (142,), + 'PyArray_Trace': (143,), + 'PyArray_Diagonal': (144,), + 'PyArray_Clip': (145,), + 'PyArray_Conjugate': (146,), + 'PyArray_Nonzero': (147,), + 'PyArray_Std': (148,), + 'PyArray_Sum': (149,), + 'PyArray_CumSum': (150,), + 'PyArray_Prod': (151,), + 'PyArray_CumProd': (152,), + 'PyArray_All': (153,), + 'PyArray_Any': (154,), + 'PyArray_Compress': (155,), + 'PyArray_Flatten': (156,), + 'PyArray_Ravel': (157,), + 'PyArray_MultiplyList': (158,), + 'PyArray_MultiplyIntList': (159,), + 'PyArray_GetPtr': (160,), + 'PyArray_CompareLists': (161,), + 'PyArray_AsCArray': (162, StealRef(5)), + 'PyArray_As1D': (163,), + 'PyArray_As2D': (164,), + 'PyArray_Free': (165,), + 'PyArray_Converter': (166,), + 'PyArray_IntpFromSequence': (167,), + 'PyArray_Concatenate': (168,), + 'PyArray_InnerProduct': (169,), + 'PyArray_MatrixProduct': (170,), + 'PyArray_CopyAndTranspose': (171,), + 'PyArray_Correlate': (172,), + 'PyArray_TypestrConvert': (173,), + 'PyArray_DescrConverter': (174,), + 'PyArray_DescrConverter2': (175,), + 'PyArray_IntpConverter': (176,), + 'PyArray_BufferConverter': (177,), + 'PyArray_AxisConverter': (178,), + 'PyArray_BoolConverter': (179,), + 'PyArray_ByteorderConverter': (180,), + 'PyArray_OrderConverter': (181,), + 'PyArray_EquivTypes': (182,), + 'PyArray_Zeros': (183, StealRef(3)), + 'PyArray_Empty': (184, StealRef(3)), + 'PyArray_Where': (185,), + 'PyArray_Arange': (186,), + 'PyArray_ArangeObj': (187,), + 'PyArray_SortkindConverter': (188,), + 'PyArray_LexSort': (189,), + 'PyArray_Round': (190,), + 'PyArray_EquivTypenums': (191,), + 'PyArray_RegisterDataType': (192,), + 'PyArray_RegisterCastFunc': (193,), + 'PyArray_RegisterCanCast': (194,), + 'PyArray_InitArrFuncs': (195,), + 'PyArray_IntTupleFromIntp': (196,), + 'PyArray_TypeNumFromName': (197,), + 'PyArray_ClipmodeConverter': (198,), + 'PyArray_OutputConverter': (199,), + 'PyArray_BroadcastToShape': (200,), + '_PyArray_SigintHandler': (201,), + '_PyArray_GetSigintBuf': (202,), + 'PyArray_DescrAlignConverter': (203,), + 'PyArray_DescrAlignConverter2': (204,), + 'PyArray_SearchsideConverter': (205,), + 'PyArray_CheckAxis': (206,), + 'PyArray_OverflowMultiplyList': (207,), + 'PyArray_CompareString': (208,), + 'PyArray_MultiIterFromObjects': (209,), + 'PyArray_GetEndianness': (210,), + 'PyArray_GetNDArrayCFeatureVersion': (211,), + 'PyArray_Correlate2': (212,), + 'PyArray_NeighborhoodIterNew': (213,), + # End 1.5 API + 'PyArray_SetDatetimeParseFunction': (219,), + 'PyArray_DatetimeToDatetimeStruct': (220,), + 'PyArray_TimedeltaToTimedeltaStruct': (221,), + 'PyArray_DatetimeStructToDatetime': (222,), + 'PyArray_TimedeltaStructToTimedelta': (223,), + # NDIter API + 'NpyIter_New': (224,), + 'NpyIter_MultiNew': (225,), + 'NpyIter_AdvancedNew': (226,), + 'NpyIter_Copy': (227,), + 'NpyIter_Deallocate': (228,), + 'NpyIter_HasDelayedBufAlloc': (229,), + 'NpyIter_HasExternalLoop': (230,), + 'NpyIter_EnableExternalLoop': (231,), + 'NpyIter_GetInnerStrideArray': (232,), + 'NpyIter_GetInnerLoopSizePtr': (233,), + 'NpyIter_Reset': (234,), + 'NpyIter_ResetBasePointers': (235,), + 'NpyIter_ResetToIterIndexRange': (236,), + 'NpyIter_GetNDim': (237,), + 'NpyIter_GetNOp': (238,), + 'NpyIter_GetIterNext': (239,), + 'NpyIter_GetIterSize': (240,), + 'NpyIter_GetIterIndexRange': (241,), + 'NpyIter_GetIterIndex': (242,), + 'NpyIter_GotoIterIndex': (243,), + 'NpyIter_HasMultiIndex': (244,), + 'NpyIter_GetShape': (245,), + 'NpyIter_GetGetMultiIndex': (246,), + 'NpyIter_GotoMultiIndex': (247,), + 'NpyIter_RemoveMultiIndex': (248,), + 'NpyIter_HasIndex': (249,), + 'NpyIter_IsBuffered': (250,), + 'NpyIter_IsGrowInner': (251,), + 'NpyIter_GetBufferSize': (252,), + 'NpyIter_GetIndexPtr': (253,), + 'NpyIter_GotoIndex': (254,), + 'NpyIter_GetDataPtrArray': (255,), + 'NpyIter_GetDescrArray': (256,), + 'NpyIter_GetOperandArray': (257,), + 'NpyIter_GetIterView': (258,), + 'NpyIter_GetReadFlags': (259,), + 'NpyIter_GetWriteFlags': (260,), + 'NpyIter_DebugPrint': (261,), + 'NpyIter_IterationNeedsAPI': (262,), + 'NpyIter_GetInnerFixedStrideArray': (263,), + 'NpyIter_RemoveAxis': (264,), + 'NpyIter_GetAxisStrideArray': (265,), + 'NpyIter_RequiresBuffering': (266,), + 'NpyIter_GetInitialDataPtrArray': (267,), + 'NpyIter_CreateCompatibleStrides': (268,), + # + 'PyArray_CastingConverter': (269,), + 'PyArray_CountNonzero': (270,), + 'PyArray_PromoteTypes': (271,), + 'PyArray_MinScalarType': (272,), + 'PyArray_ResultType': (273,), + 'PyArray_CanCastArrayTo': (274,), + 'PyArray_CanCastTypeTo': (275,), + 'PyArray_EinsteinSum': (276,), + 'PyArray_NewLikeArray': (277, StealRef(3), NonNull(1)), + 'PyArray_GetArrayParamsFromObject': (278,), + 'PyArray_ConvertClipmodeSequence': (279,), + 'PyArray_MatrixProduct2': (280,), + # End 1.6 API + 'NpyIter_IsFirstVisit': (281,), + 'PyArray_SetBaseObject': (282, StealRef(2)), + 'PyArray_CreateSortedStridePerm': (283,), + 'PyArray_RemoveAxesInPlace': (284,), + 'PyArray_DebugPrint': (285,), + 'PyArray_FailUnlessWriteable': (286,), + 'PyArray_SetUpdateIfCopyBase': (287, StealRef(2)), + 'PyDataMem_NEW': (288,), + 'PyDataMem_FREE': (289,), + 'PyDataMem_RENEW': (290,), + 'PyDataMem_SetEventHook': (291,), + 'PyArray_MapIterSwapAxes': (293,), + 'PyArray_MapIterArray': (294,), + 'PyArray_MapIterNext': (295,), + # End 1.7 API + 'PyArray_Partition': (296,), + 'PyArray_ArgPartition': (297,), + 'PyArray_SelectkindConverter': (298,), + 'PyDataMem_NEW_ZEROED': (299,), + # End 1.8 API + # End 1.9 API + 'PyArray_CheckAnyScalarExact': (300, NonNull(1)), + # End 1.10 API + 'PyArray_MapIterArrayCopyIfOverlap': (301,), + # End 1.13 API + 'PyArray_ResolveWritebackIfCopy': (302,), + 'PyArray_SetWritebackIfCopyBase': (303,), + # End 1.14 API +} + +ufunc_types_api = { + 'PyUFunc_Type': (0,) +} + +ufunc_funcs_api = { + 'PyUFunc_FromFuncAndData': (1,), + 'PyUFunc_RegisterLoopForType': (2,), + 'PyUFunc_GenericFunction': (3,), + 'PyUFunc_f_f_As_d_d': (4,), + 'PyUFunc_d_d': (5,), + 'PyUFunc_f_f': (6,), + 'PyUFunc_g_g': (7,), + 'PyUFunc_F_F_As_D_D': (8,), + 'PyUFunc_F_F': (9,), + 'PyUFunc_D_D': (10,), + 'PyUFunc_G_G': (11,), + 'PyUFunc_O_O': (12,), + 'PyUFunc_ff_f_As_dd_d': (13,), + 'PyUFunc_ff_f': (14,), + 'PyUFunc_dd_d': (15,), + 'PyUFunc_gg_g': (16,), + 'PyUFunc_FF_F_As_DD_D': (17,), + 'PyUFunc_DD_D': (18,), + 'PyUFunc_FF_F': (19,), + 'PyUFunc_GG_G': (20,), + 'PyUFunc_OO_O': (21,), + 'PyUFunc_O_O_method': (22,), + 'PyUFunc_OO_O_method': (23,), + 'PyUFunc_On_Om': (24,), + 'PyUFunc_GetPyValues': (25,), + 'PyUFunc_checkfperr': (26,), + 'PyUFunc_clearfperr': (27,), + 'PyUFunc_getfperr': (28,), + 'PyUFunc_handlefperr': (29,), + 'PyUFunc_ReplaceLoopBySignature': (30,), + 'PyUFunc_FromFuncAndDataAndSignature': (31,), + 'PyUFunc_SetUsesArraysAsData': (32,), + # End 1.5 API + 'PyUFunc_e_e': (33,), + 'PyUFunc_e_e_As_f_f': (34,), + 'PyUFunc_e_e_As_d_d': (35,), + 'PyUFunc_ee_e': (36,), + 'PyUFunc_ee_e_As_ff_f': (37,), + 'PyUFunc_ee_e_As_dd_d': (38,), + # End 1.6 API + 'PyUFunc_DefaultTypeResolver': (39,), + 'PyUFunc_ValidateCasting': (40,), + # End 1.7 API + 'PyUFunc_RegisterLoopForDescr': (41,), + # End 1.8 API + 'PyUFunc_FromFuncAndDataAndSignatureAndIdentity': (42,), + # End 1.16 API +} + +# List of all the dicts which define the C API +# XXX: DO NOT CHANGE THE ORDER OF TUPLES BELOW ! +multiarray_api = ( + multiarray_global_vars, + multiarray_scalar_bool_values, + multiarray_types_api, + multiarray_funcs_api, +) + +ufunc_api = ( + ufunc_funcs_api, + ufunc_types_api +) + +full_api = multiarray_api + ufunc_api diff --git a/contrib/python/numpy/py2/numpy/core/code_generators/ufunc_docstrings.py b/contrib/python/numpy/py2/numpy/core/code_generators/ufunc_docstrings.py new file mode 100644 index 0000000000..6c0555e231 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/code_generators/ufunc_docstrings.py @@ -0,0 +1,3930 @@ +""" +Docstrings for generated ufuncs + +The syntax is designed to look like the function add_newdoc is being +called from numpy.lib, but in this file add_newdoc puts the docstrings +in a dictionary. This dictionary is used in +numpy/core/code_generators/generate_umath.py to generate the docstrings +for the ufuncs in numpy.core at the C level when the ufuncs are created +at compile time. + +""" +from __future__ import division, absolute_import, print_function +import textwrap + +docdict = {} + +def get(name): + return docdict.get(name) + +# common parameter text to all ufuncs +subst = { + 'PARAMS': textwrap.dedent(""" + out : ndarray, None, or tuple of ndarray and None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. A tuple (possible only as a + keyword argument) must have length equal to the number of outputs. + where : array_like, optional + Values of True indicate to calculate the ufunc at that position, values + of False indicate to leave the value in the output alone. + **kwargs + For other keyword-only arguments, see the + :ref:`ufunc docs <ufuncs.kwargs>`. + """).strip(), + 'OUT_SCALAR_1': "This is a scalar if `x` is a scalar.", + 'OUT_SCALAR_2': "This is a scalar if both `x1` and `x2` are scalars.", +} + +def add_newdoc(place, name, doc): + doc = textwrap.dedent(doc).strip() + + if name[0] != '_' and name != 'matmul': + # matmul is special, it does not use the OUT_SCALAR replacement strings + if '\nx :' in doc: + assert '$OUT_SCALAR_1' in doc, "in {}".format(name) + elif '\nx2 :' in doc or '\nx1, x2 :' in doc: + assert '$OUT_SCALAR_2' in doc, "in {}".format(name) + else: + assert False, "Could not detect number of inputs in {}".format(name) + for k, v in subst.items(): + doc = doc.replace('$' + k, v) + + docdict['.'.join((place, name))] = doc + + +add_newdoc('numpy.core.umath', 'absolute', + """ + Calculate the absolute value element-wise. + + ``np.abs`` is a shorthand for this function. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + absolute : ndarray + An ndarray containing the absolute value of + each element in `x`. For complex input, ``a + ib``, the + absolute value is :math:`\\sqrt{ a^2 + b^2 }`. + $OUT_SCALAR_1 + + Examples + -------- + >>> x = np.array([-1.2, 1.2]) + >>> np.absolute(x) + array([ 1.2, 1.2]) + >>> np.absolute(1.2 + 1j) + 1.5620499351813308 + + Plot the function over ``[-10, 10]``: + + >>> import matplotlib.pyplot as plt + + >>> x = np.linspace(start=-10, stop=10, num=101) + >>> plt.plot(x, np.absolute(x)) + >>> plt.show() + + Plot the function over the complex plane: + + >>> xx = x + 1j * x[:, np.newaxis] + >>> plt.imshow(np.abs(xx), extent=[-10, 10, -10, 10], cmap='gray') + >>> plt.show() + + """) + +add_newdoc('numpy.core.umath', 'add', + """ + Add arguments element-wise. + + Parameters + ---------- + x1, x2 : array_like + The arrays to be added. If ``x1.shape != x2.shape``, they must be + broadcastable to a common shape (which may be the shape of one or + the other). + $PARAMS + + Returns + ------- + add : ndarray or scalar + The sum of `x1` and `x2`, element-wise. + $OUT_SCALAR_2 + + Notes + ----- + Equivalent to `x1` + `x2` in terms of array broadcasting. + + Examples + -------- + >>> np.add(1.0, 4.0) + 5.0 + >>> x1 = np.arange(9.0).reshape((3, 3)) + >>> x2 = np.arange(3.0) + >>> np.add(x1, x2) + array([[ 0., 2., 4.], + [ 3., 5., 7.], + [ 6., 8., 10.]]) + + """) + +add_newdoc('numpy.core.umath', 'arccos', + """ + Trigonometric inverse cosine, element-wise. + + The inverse of `cos` so that, if ``y = cos(x)``, then ``x = arccos(y)``. + + Parameters + ---------- + x : array_like + `x`-coordinate on the unit circle. + For real arguments, the domain is [-1, 1]. + $PARAMS + + Returns + ------- + angle : ndarray + The angle of the ray intersecting the unit circle at the given + `x`-coordinate in radians [0, pi]. + $OUT_SCALAR_1 + + See Also + -------- + cos, arctan, arcsin, emath.arccos + + Notes + ----- + `arccos` is a multivalued function: for each `x` there are infinitely + many numbers `z` such that `cos(z) = x`. The convention is to return + the angle `z` whose real part lies in `[0, pi]`. + + For real-valued input data types, `arccos` always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, `arccos` is a complex analytic function that + has branch cuts `[-inf, -1]` and `[1, inf]` and is continuous from + above on the former and from below on the latter. + + The inverse `cos` is also known as `acos` or cos^-1. + + References + ---------- + M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions", + 10th printing, 1964, pp. 79. http://www.math.sfu.ca/~cbm/aands/ + + Examples + -------- + We expect the arccos of 1 to be 0, and of -1 to be pi: + + >>> np.arccos([1, -1]) + array([ 0. , 3.14159265]) + + Plot arccos: + + >>> import matplotlib.pyplot as plt + >>> x = np.linspace(-1, 1, num=100) + >>> plt.plot(x, np.arccos(x)) + >>> plt.axis('tight') + >>> plt.show() + + """) + +add_newdoc('numpy.core.umath', 'arccosh', + """ + Inverse hyperbolic cosine, element-wise. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + arccosh : ndarray + Array of the same shape as `x`. + $OUT_SCALAR_1 + + See Also + -------- + + cosh, arcsinh, sinh, arctanh, tanh + + Notes + ----- + `arccosh` is a multivalued function: for each `x` there are infinitely + many numbers `z` such that `cosh(z) = x`. The convention is to return the + `z` whose imaginary part lies in `[-pi, pi]` and the real part in + ``[0, inf]``. + + For real-valued input data types, `arccosh` always returns real output. + For each value that cannot be expressed as a real number or infinity, it + yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, `arccosh` is a complex analytical function that + has a branch cut `[-inf, 1]` and is continuous from above on it. + + References + ---------- + .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions", + 10th printing, 1964, pp. 86. http://www.math.sfu.ca/~cbm/aands/ + .. [2] Wikipedia, "Inverse hyperbolic function", + https://en.wikipedia.org/wiki/Arccosh + + Examples + -------- + >>> np.arccosh([np.e, 10.0]) + array([ 1.65745445, 2.99322285]) + >>> np.arccosh(1) + 0.0 + + """) + +add_newdoc('numpy.core.umath', 'arcsin', + """ + Inverse sine, element-wise. + + Parameters + ---------- + x : array_like + `y`-coordinate on the unit circle. + $PARAMS + + Returns + ------- + angle : ndarray + The inverse sine of each element in `x`, in radians and in the + closed interval ``[-pi/2, pi/2]``. + $OUT_SCALAR_1 + + See Also + -------- + sin, cos, arccos, tan, arctan, arctan2, emath.arcsin + + Notes + ----- + `arcsin` is a multivalued function: for each `x` there are infinitely + many numbers `z` such that :math:`sin(z) = x`. The convention is to + return the angle `z` whose real part lies in [-pi/2, pi/2]. + + For real-valued input data types, *arcsin* always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, `arcsin` is a complex analytic function that + has, by convention, the branch cuts [-inf, -1] and [1, inf] and is + continuous from above on the former and from below on the latter. + + The inverse sine is also known as `asin` or sin^{-1}. + + References + ---------- + Abramowitz, M. and Stegun, I. A., *Handbook of Mathematical Functions*, + 10th printing, New York: Dover, 1964, pp. 79ff. + http://www.math.sfu.ca/~cbm/aands/ + + Examples + -------- + >>> np.arcsin(1) # pi/2 + 1.5707963267948966 + >>> np.arcsin(-1) # -pi/2 + -1.5707963267948966 + >>> np.arcsin(0) + 0.0 + + """) + +add_newdoc('numpy.core.umath', 'arcsinh', + """ + Inverse hyperbolic sine element-wise. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Array of the same shape as `x`. + $OUT_SCALAR_1 + + Notes + ----- + `arcsinh` is a multivalued function: for each `x` there are infinitely + many numbers `z` such that `sinh(z) = x`. The convention is to return the + `z` whose imaginary part lies in `[-pi/2, pi/2]`. + + For real-valued input data types, `arcsinh` always returns real output. + For each value that cannot be expressed as a real number or infinity, it + returns ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, `arccos` is a complex analytical function that + has branch cuts `[1j, infj]` and `[-1j, -infj]` and is continuous from + the right on the former and from the left on the latter. + + The inverse hyperbolic sine is also known as `asinh` or ``sinh^-1``. + + References + ---------- + .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions", + 10th printing, 1964, pp. 86. http://www.math.sfu.ca/~cbm/aands/ + .. [2] Wikipedia, "Inverse hyperbolic function", + https://en.wikipedia.org/wiki/Arcsinh + + Examples + -------- + >>> np.arcsinh(np.array([np.e, 10.0])) + array([ 1.72538256, 2.99822295]) + + """) + +add_newdoc('numpy.core.umath', 'arctan', + """ + Trigonometric inverse tangent, element-wise. + + The inverse of tan, so that if ``y = tan(x)`` then ``x = arctan(y)``. + + Parameters + ---------- + x : array_like + $PARAMS + + Returns + ------- + out : ndarray or scalar + Out has the same shape as `x`. Its real part is in + ``[-pi/2, pi/2]`` (``arctan(+/-inf)`` returns ``+/-pi/2``). + $OUT_SCALAR_1 + + See Also + -------- + arctan2 : The "four quadrant" arctan of the angle formed by (`x`, `y`) + and the positive `x`-axis. + angle : Argument of complex values. + + Notes + ----- + `arctan` is a multi-valued function: for each `x` there are infinitely + many numbers `z` such that tan(`z`) = `x`. The convention is to return + the angle `z` whose real part lies in [-pi/2, pi/2]. + + For real-valued input data types, `arctan` always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, `arctan` is a complex analytic function that + has [`1j, infj`] and [`-1j, -infj`] as branch cuts, and is continuous + from the left on the former and from the right on the latter. + + The inverse tangent is also known as `atan` or tan^{-1}. + + References + ---------- + Abramowitz, M. and Stegun, I. A., *Handbook of Mathematical Functions*, + 10th printing, New York: Dover, 1964, pp. 79. + http://www.math.sfu.ca/~cbm/aands/ + + Examples + -------- + We expect the arctan of 0 to be 0, and of 1 to be pi/4: + + >>> np.arctan([0, 1]) + array([ 0. , 0.78539816]) + + >>> np.pi/4 + 0.78539816339744828 + + Plot arctan: + + >>> import matplotlib.pyplot as plt + >>> x = np.linspace(-10, 10) + >>> plt.plot(x, np.arctan(x)) + >>> plt.axis('tight') + >>> plt.show() + + """) + +add_newdoc('numpy.core.umath', 'arctan2', + """ + Element-wise arc tangent of ``x1/x2`` choosing the quadrant correctly. + + The quadrant (i.e., branch) is chosen so that ``arctan2(x1, x2)`` is + the signed angle in radians between the ray ending at the origin and + passing through the point (1,0), and the ray ending at the origin and + passing through the point (`x2`, `x1`). (Note the role reversal: the + "`y`-coordinate" is the first function parameter, the "`x`-coordinate" + is the second.) By IEEE convention, this function is defined for + `x2` = +/-0 and for either or both of `x1` and `x2` = +/-inf (see + Notes for specific values). + + This function is not defined for complex-valued arguments; for the + so-called argument of complex values, use `angle`. + + Parameters + ---------- + x1 : array_like, real-valued + `y`-coordinates. + x2 : array_like, real-valued + `x`-coordinates. `x2` must be broadcastable to match the shape of + `x1` or vice versa. + $PARAMS + + Returns + ------- + angle : ndarray + Array of angles in radians, in the range ``[-pi, pi]``. + $OUT_SCALAR_2 + + See Also + -------- + arctan, tan, angle + + Notes + ----- + *arctan2* is identical to the `atan2` function of the underlying + C library. The following special values are defined in the C + standard: [1]_ + + ====== ====== ================ + `x1` `x2` `arctan2(x1,x2)` + ====== ====== ================ + +/- 0 +0 +/- 0 + +/- 0 -0 +/- pi + > 0 +/-inf +0 / +pi + < 0 +/-inf -0 / -pi + +/-inf +inf +/- (pi/4) + +/-inf -inf +/- (3*pi/4) + ====== ====== ================ + + Note that +0 and -0 are distinct floating point numbers, as are +inf + and -inf. + + References + ---------- + .. [1] ISO/IEC standard 9899:1999, "Programming language C." + + Examples + -------- + Consider four points in different quadrants: + + >>> x = np.array([-1, +1, +1, -1]) + >>> y = np.array([-1, -1, +1, +1]) + >>> np.arctan2(y, x) * 180 / np.pi + array([-135., -45., 45., 135.]) + + Note the order of the parameters. `arctan2` is defined also when `x2` = 0 + and at several other special points, obtaining values in + the range ``[-pi, pi]``: + + >>> np.arctan2([1., -1.], [0., 0.]) + array([ 1.57079633, -1.57079633]) + >>> np.arctan2([0., 0., np.inf], [+0., -0., np.inf]) + array([ 0. , 3.14159265, 0.78539816]) + + """) + +add_newdoc('numpy.core.umath', '_arg', + """ + DO NOT USE, ONLY FOR TESTING + """) + +add_newdoc('numpy.core.umath', 'arctanh', + """ + Inverse hyperbolic tangent element-wise. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Array of the same shape as `x`. + $OUT_SCALAR_1 + + See Also + -------- + emath.arctanh + + Notes + ----- + `arctanh` is a multivalued function: for each `x` there are infinitely + many numbers `z` such that `tanh(z) = x`. The convention is to return + the `z` whose imaginary part lies in `[-pi/2, pi/2]`. + + For real-valued input data types, `arctanh` always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, `arctanh` is a complex analytical function + that has branch cuts `[-1, -inf]` and `[1, inf]` and is continuous from + above on the former and from below on the latter. + + The inverse hyperbolic tangent is also known as `atanh` or ``tanh^-1``. + + References + ---------- + .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions", + 10th printing, 1964, pp. 86. http://www.math.sfu.ca/~cbm/aands/ + .. [2] Wikipedia, "Inverse hyperbolic function", + https://en.wikipedia.org/wiki/Arctanh + + Examples + -------- + >>> np.arctanh([0, -0.5]) + array([ 0. , -0.54930614]) + + """) + +add_newdoc('numpy.core.umath', 'bitwise_and', + """ + Compute the bit-wise AND of two arrays element-wise. + + Computes the bit-wise AND of the underlying binary representation of + the integers in the input arrays. This ufunc implements the C/Python + operator ``&``. + + Parameters + ---------- + x1, x2 : array_like + Only integer and boolean types are handled. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Result. + $OUT_SCALAR_2 + + See Also + -------- + logical_and + bitwise_or + bitwise_xor + binary_repr : + Return the binary representation of the input number as a string. + + Examples + -------- + The number 13 is represented by ``00001101``. Likewise, 17 is + represented by ``00010001``. The bit-wise AND of 13 and 17 is + therefore ``000000001``, or 1: + + >>> np.bitwise_and(13, 17) + 1 + + >>> np.bitwise_and(14, 13) + 12 + >>> np.binary_repr(12) + '1100' + >>> np.bitwise_and([14,3], 13) + array([12, 1]) + + >>> np.bitwise_and([11,7], [4,25]) + array([0, 1]) + >>> np.bitwise_and(np.array([2,5,255]), np.array([3,14,16])) + array([ 2, 4, 16]) + >>> np.bitwise_and([True, True], [False, True]) + array([False, True]) + + """) + +add_newdoc('numpy.core.umath', 'bitwise_or', + """ + Compute the bit-wise OR of two arrays element-wise. + + Computes the bit-wise OR of the underlying binary representation of + the integers in the input arrays. This ufunc implements the C/Python + operator ``|``. + + Parameters + ---------- + x1, x2 : array_like + Only integer and boolean types are handled. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Result. + $OUT_SCALAR_2 + + See Also + -------- + logical_or + bitwise_and + bitwise_xor + binary_repr : + Return the binary representation of the input number as a string. + + Examples + -------- + The number 13 has the binaray representation ``00001101``. Likewise, + 16 is represented by ``00010000``. The bit-wise OR of 13 and 16 is + then ``000111011``, or 29: + + >>> np.bitwise_or(13, 16) + 29 + >>> np.binary_repr(29) + '11101' + + >>> np.bitwise_or(32, 2) + 34 + >>> np.bitwise_or([33, 4], 1) + array([33, 5]) + >>> np.bitwise_or([33, 4], [1, 2]) + array([33, 6]) + + >>> np.bitwise_or(np.array([2, 5, 255]), np.array([4, 4, 4])) + array([ 6, 5, 255]) + >>> np.array([2, 5, 255]) | np.array([4, 4, 4]) + array([ 6, 5, 255]) + >>> np.bitwise_or(np.array([2, 5, 255, 2147483647L], dtype=np.int32), + ... np.array([4, 4, 4, 2147483647L], dtype=np.int32)) + array([ 6, 5, 255, 2147483647]) + >>> np.bitwise_or([True, True], [False, True]) + array([ True, True]) + + """) + +add_newdoc('numpy.core.umath', 'bitwise_xor', + """ + Compute the bit-wise XOR of two arrays element-wise. + + Computes the bit-wise XOR of the underlying binary representation of + the integers in the input arrays. This ufunc implements the C/Python + operator ``^``. + + Parameters + ---------- + x1, x2 : array_like + Only integer and boolean types are handled. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Result. + $OUT_SCALAR_2 + + See Also + -------- + logical_xor + bitwise_and + bitwise_or + binary_repr : + Return the binary representation of the input number as a string. + + Examples + -------- + The number 13 is represented by ``00001101``. Likewise, 17 is + represented by ``00010001``. The bit-wise XOR of 13 and 17 is + therefore ``00011100``, or 28: + + >>> np.bitwise_xor(13, 17) + 28 + >>> np.binary_repr(28) + '11100' + + >>> np.bitwise_xor(31, 5) + 26 + >>> np.bitwise_xor([31,3], 5) + array([26, 6]) + + >>> np.bitwise_xor([31,3], [5,6]) + array([26, 5]) + >>> np.bitwise_xor([True, True], [False, True]) + array([ True, False]) + + """) + +add_newdoc('numpy.core.umath', 'ceil', + """ + Return the ceiling of the input, element-wise. + + The ceil of the scalar `x` is the smallest integer `i`, such that + `i >= x`. It is often denoted as :math:`\\lceil x \\rceil`. + + Parameters + ---------- + x : array_like + Input data. + $PARAMS + + Returns + ------- + y : ndarray or scalar + The ceiling of each element in `x`, with `float` dtype. + $OUT_SCALAR_1 + + See Also + -------- + floor, trunc, rint + + Examples + -------- + >>> a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) + >>> np.ceil(a) + array([-1., -1., -0., 1., 2., 2., 2.]) + + """) + +add_newdoc('numpy.core.umath', 'trunc', + """ + Return the truncated value of the input, element-wise. + + The truncated value of the scalar `x` is the nearest integer `i` which + is closer to zero than `x` is. In short, the fractional part of the + signed number `x` is discarded. + + Parameters + ---------- + x : array_like + Input data. + $PARAMS + + Returns + ------- + y : ndarray or scalar + The truncated value of each element in `x`. + $OUT_SCALAR_1 + + See Also + -------- + ceil, floor, rint + + Notes + ----- + .. versionadded:: 1.3.0 + + Examples + -------- + >>> a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) + >>> np.trunc(a) + array([-1., -1., -0., 0., 1., 1., 2.]) + + """) + +add_newdoc('numpy.core.umath', 'conjugate', + """ + Return the complex conjugate, element-wise. + + The complex conjugate of a complex number is obtained by changing the + sign of its imaginary part. + + Parameters + ---------- + x : array_like + Input value. + $PARAMS + + Returns + ------- + y : ndarray + The complex conjugate of `x`, with same dtype as `y`. + $OUT_SCALAR_1 + + Examples + -------- + >>> np.conjugate(1+2j) + (1-2j) + + >>> x = np.eye(2) + 1j * np.eye(2) + >>> np.conjugate(x) + array([[ 1.-1.j, 0.-0.j], + [ 0.-0.j, 1.-1.j]]) + + """) + +add_newdoc('numpy.core.umath', 'cos', + """ + Cosine element-wise. + + Parameters + ---------- + x : array_like + Input array in radians. + $PARAMS + + Returns + ------- + y : ndarray + The corresponding cosine values. + $OUT_SCALAR_1 + + Notes + ----- + If `out` is provided, the function writes the result into it, + and returns a reference to `out`. (See Examples) + + References + ---------- + M. Abramowitz and I. A. Stegun, Handbook of Mathematical Functions. + New York, NY: Dover, 1972. + + Examples + -------- + >>> np.cos(np.array([0, np.pi/2, np.pi])) + array([ 1.00000000e+00, 6.12303177e-17, -1.00000000e+00]) + >>> + >>> # Example of providing the optional output parameter + >>> out2 = np.cos([0.1], out1) + >>> out2 is out1 + True + >>> + >>> # Example of ValueError due to provision of shape mis-matched `out` + >>> np.cos(np.zeros((3,3)),np.zeros((2,2))) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + ValueError: operands could not be broadcast together with shapes (3,3) (2,2) + + """) + +add_newdoc('numpy.core.umath', 'cosh', + """ + Hyperbolic cosine, element-wise. + + Equivalent to ``1/2 * (np.exp(x) + np.exp(-x))`` and ``np.cos(1j*x)``. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Output array of same shape as `x`. + $OUT_SCALAR_1 + + Examples + -------- + >>> np.cosh(0) + 1.0 + + The hyperbolic cosine describes the shape of a hanging cable: + + >>> import matplotlib.pyplot as plt + >>> x = np.linspace(-4, 4, 1000) + >>> plt.plot(x, np.cosh(x)) + >>> plt.show() + + """) + +add_newdoc('numpy.core.umath', 'degrees', + """ + Convert angles from radians to degrees. + + Parameters + ---------- + x : array_like + Input array in radians. + $PARAMS + + Returns + ------- + y : ndarray of floats + The corresponding degree values; if `out` was supplied this is a + reference to it. + $OUT_SCALAR_1 + + See Also + -------- + rad2deg : equivalent function + + Examples + -------- + Convert a radian array to degrees + + >>> rad = np.arange(12.)*np.pi/6 + >>> np.degrees(rad) + array([ 0., 30., 60., 90., 120., 150., 180., 210., 240., + 270., 300., 330.]) + + >>> out = np.zeros((rad.shape)) + >>> r = degrees(rad, out) + >>> np.all(r == out) + True + + """) + +add_newdoc('numpy.core.umath', 'rad2deg', + """ + Convert angles from radians to degrees. + + Parameters + ---------- + x : array_like + Angle in radians. + $PARAMS + + Returns + ------- + y : ndarray + The corresponding angle in degrees. + $OUT_SCALAR_1 + + See Also + -------- + deg2rad : Convert angles from degrees to radians. + unwrap : Remove large jumps in angle by wrapping. + + Notes + ----- + .. versionadded:: 1.3.0 + + rad2deg(x) is ``180 * x / pi``. + + Examples + -------- + >>> np.rad2deg(np.pi/2) + 90.0 + + """) + +add_newdoc('numpy.core.umath', 'heaviside', + """ + Compute the Heaviside step function. + + The Heaviside step function is defined as:: + + 0 if x1 < 0 + heaviside(x1, x2) = x2 if x1 == 0 + 1 if x1 > 0 + + where `x2` is often taken to be 0.5, but 0 and 1 are also sometimes used. + + Parameters + ---------- + x1 : array_like + Input values. + x2 : array_like + The value of the function when x1 is 0. + $PARAMS + + Returns + ------- + out : ndarray or scalar + The output array, element-wise Heaviside step function of `x1`. + $OUT_SCALAR_2 + + Notes + ----- + .. versionadded:: 1.13.0 + + References + ---------- + .. Wikipedia, "Heaviside step function", + https://en.wikipedia.org/wiki/Heaviside_step_function + + Examples + -------- + >>> np.heaviside([-1.5, 0, 2.0], 0.5) + array([ 0. , 0.5, 1. ]) + >>> np.heaviside([-1.5, 0, 2.0], 1) + array([ 0., 1., 1.]) + """) + +add_newdoc('numpy.core.umath', 'divide', + """ + Divide arguments element-wise. + + Parameters + ---------- + x1 : array_like + Dividend array. + x2 : array_like + Divisor array. + $PARAMS + + Returns + ------- + y : ndarray or scalar + The quotient ``x1/x2``, element-wise. + $OUT_SCALAR_2 + + See Also + -------- + seterr : Set whether to raise or warn on overflow, underflow and + division by zero. + + Notes + ----- + Equivalent to ``x1`` / ``x2`` in terms of array-broadcasting. + + Behavior on division by zero can be changed using ``seterr``. + + In Python 2, when both ``x1`` and ``x2`` are of an integer type, + ``divide`` will behave like ``floor_divide``. In Python 3, it behaves + like ``true_divide``. + + Examples + -------- + >>> np.divide(2.0, 4.0) + 0.5 + >>> x1 = np.arange(9.0).reshape((3, 3)) + >>> x2 = np.arange(3.0) + >>> np.divide(x1, x2) + array([[ NaN, 1. , 1. ], + [ Inf, 4. , 2.5], + [ Inf, 7. , 4. ]]) + + Note the behavior with integer types (Python 2 only): + + >>> np.divide(2, 4) + 0 + >>> np.divide(2, 4.) + 0.5 + + Division by zero always yields zero in integer arithmetic (again, + Python 2 only), and does not raise an exception or a warning: + + >>> np.divide(np.array([0, 1], dtype=int), np.array([0, 0], dtype=int)) + array([0, 0]) + + Division by zero can, however, be caught using ``seterr``: + + >>> old_err_state = np.seterr(divide='raise') + >>> np.divide(1, 0) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + FloatingPointError: divide by zero encountered in divide + + >>> ignored_states = np.seterr(**old_err_state) + >>> np.divide(1, 0) + 0 + + """) + +add_newdoc('numpy.core.umath', 'equal', + """ + Return (x1 == x2) element-wise. + + Parameters + ---------- + x1, x2 : array_like + Input arrays of the same shape. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Output array, element-wise comparison of `x1` and `x2`. + Typically of type bool, unless ``dtype=object`` is passed. + $OUT_SCALAR_2 + + See Also + -------- + not_equal, greater_equal, less_equal, greater, less + + Examples + -------- + >>> np.equal([0, 1, 3], np.arange(3)) + array([ True, True, False]) + + What is compared are values, not types. So an int (1) and an array of + length one can evaluate as True: + + >>> np.equal(1, np.ones(1)) + array([ True]) + + """) + +add_newdoc('numpy.core.umath', 'exp', + """ + Calculate the exponential of all elements in the input array. + + Parameters + ---------- + x : array_like + Input values. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Output array, element-wise exponential of `x`. + $OUT_SCALAR_1 + + See Also + -------- + expm1 : Calculate ``exp(x) - 1`` for all elements in the array. + exp2 : Calculate ``2**x`` for all elements in the array. + + Notes + ----- + The irrational number ``e`` is also known as Euler's number. It is + approximately 2.718281, and is the base of the natural logarithm, + ``ln`` (this means that, if :math:`x = \\ln y = \\log_e y`, + then :math:`e^x = y`. For real input, ``exp(x)`` is always positive. + + For complex arguments, ``x = a + ib``, we can write + :math:`e^x = e^a e^{ib}`. The first term, :math:`e^a`, is already + known (it is the real argument, described above). The second term, + :math:`e^{ib}`, is :math:`\\cos b + i \\sin b`, a function with + magnitude 1 and a periodic phase. + + References + ---------- + .. [1] Wikipedia, "Exponential function", + https://en.wikipedia.org/wiki/Exponential_function + .. [2] M. Abramovitz and I. A. Stegun, "Handbook of Mathematical Functions + with Formulas, Graphs, and Mathematical Tables," Dover, 1964, p. 69, + http://www.math.sfu.ca/~cbm/aands/page_69.htm + + Examples + -------- + Plot the magnitude and phase of ``exp(x)`` in the complex plane: + + >>> import matplotlib.pyplot as plt + + >>> x = np.linspace(-2*np.pi, 2*np.pi, 100) + >>> xx = x + 1j * x[:, np.newaxis] # a + ib over complex plane + >>> out = np.exp(xx) + + >>> plt.subplot(121) + >>> plt.imshow(np.abs(out), + ... extent=[-2*np.pi, 2*np.pi, -2*np.pi, 2*np.pi], cmap='gray') + >>> plt.title('Magnitude of exp(x)') + + >>> plt.subplot(122) + >>> plt.imshow(np.angle(out), + ... extent=[-2*np.pi, 2*np.pi, -2*np.pi, 2*np.pi], cmap='hsv') + >>> plt.title('Phase (angle) of exp(x)') + >>> plt.show() + + """) + +add_newdoc('numpy.core.umath', 'exp2', + """ + Calculate `2**p` for all `p` in the input array. + + Parameters + ---------- + x : array_like + Input values. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Element-wise 2 to the power `x`. + $OUT_SCALAR_1 + + See Also + -------- + power + + Notes + ----- + .. versionadded:: 1.3.0 + + + + Examples + -------- + >>> np.exp2([2, 3]) + array([ 4., 8.]) + + """) + +add_newdoc('numpy.core.umath', 'expm1', + """ + Calculate ``exp(x) - 1`` for all elements in the array. + + Parameters + ---------- + x : array_like + Input values. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Element-wise exponential minus one: ``out = exp(x) - 1``. + $OUT_SCALAR_1 + + See Also + -------- + log1p : ``log(1 + x)``, the inverse of expm1. + + + Notes + ----- + This function provides greater precision than ``exp(x) - 1`` + for small values of ``x``. + + Examples + -------- + The true value of ``exp(1e-10) - 1`` is ``1.00000000005e-10`` to + about 32 significant digits. This example shows the superiority of + expm1 in this case. + + >>> np.expm1(1e-10) + 1.00000000005e-10 + >>> np.exp(1e-10) - 1 + 1.000000082740371e-10 + + """) + +add_newdoc('numpy.core.umath', 'fabs', + """ + Compute the absolute values element-wise. + + This function returns the absolute values (positive magnitude) of the + data in `x`. Complex values are not handled, use `absolute` to find the + absolute values of complex data. + + Parameters + ---------- + x : array_like + The array of numbers for which the absolute values are required. If + `x` is a scalar, the result `y` will also be a scalar. + $PARAMS + + Returns + ------- + y : ndarray or scalar + The absolute values of `x`, the returned values are always floats. + $OUT_SCALAR_1 + + See Also + -------- + absolute : Absolute values including `complex` types. + + Examples + -------- + >>> np.fabs(-1) + 1.0 + >>> np.fabs([-1.2, 1.2]) + array([ 1.2, 1.2]) + + """) + +add_newdoc('numpy.core.umath', 'floor', + """ + Return the floor of the input, element-wise. + + The floor of the scalar `x` is the largest integer `i`, such that + `i <= x`. It is often denoted as :math:`\\lfloor x \\rfloor`. + + Parameters + ---------- + x : array_like + Input data. + $PARAMS + + Returns + ------- + y : ndarray or scalar + The floor of each element in `x`. + $OUT_SCALAR_1 + + See Also + -------- + ceil, trunc, rint + + Notes + ----- + Some spreadsheet programs calculate the "floor-towards-zero", in other + words ``floor(-2.5) == -2``. NumPy instead uses the definition of + `floor` where `floor(-2.5) == -3`. + + Examples + -------- + >>> a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) + >>> np.floor(a) + array([-2., -2., -1., 0., 1., 1., 2.]) + + """) + +add_newdoc('numpy.core.umath', 'floor_divide', + """ + Return the largest integer smaller or equal to the division of the inputs. + It is equivalent to the Python ``//`` operator and pairs with the + Python ``%`` (`remainder`), function so that ``b = a % b + b * (a // b)`` + up to roundoff. + + Parameters + ---------- + x1 : array_like + Numerator. + x2 : array_like + Denominator. + $PARAMS + + Returns + ------- + y : ndarray + y = floor(`x1`/`x2`) + $OUT_SCALAR_2 + + See Also + -------- + remainder : Remainder complementary to floor_divide. + divmod : Simultaneous floor division and remainder. + divide : Standard division. + floor : Round a number to the nearest integer toward minus infinity. + ceil : Round a number to the nearest integer toward infinity. + + Examples + -------- + >>> np.floor_divide(7,3) + 2 + >>> np.floor_divide([1., 2., 3., 4.], 2.5) + array([ 0., 0., 1., 1.]) + + """) + +add_newdoc('numpy.core.umath', 'fmod', + """ + Return the element-wise remainder of division. + + This is the NumPy implementation of the C library function fmod, the + remainder has the same sign as the dividend `x1`. It is equivalent to + the Matlab(TM) ``rem`` function and should not be confused with the + Python modulus operator ``x1 % x2``. + + Parameters + ---------- + x1 : array_like + Dividend. + x2 : array_like + Divisor. + $PARAMS + + Returns + ------- + y : array_like + The remainder of the division of `x1` by `x2`. + $OUT_SCALAR_2 + + See Also + -------- + remainder : Equivalent to the Python ``%`` operator. + divide + + Notes + ----- + The result of the modulo operation for negative dividend and divisors + is bound by conventions. For `fmod`, the sign of result is the sign of + the dividend, while for `remainder` the sign of the result is the sign + of the divisor. The `fmod` function is equivalent to the Matlab(TM) + ``rem`` function. + + Examples + -------- + >>> np.fmod([-3, -2, -1, 1, 2, 3], 2) + array([-1, 0, -1, 1, 0, 1]) + >>> np.remainder([-3, -2, -1, 1, 2, 3], 2) + array([1, 0, 1, 1, 0, 1]) + + >>> np.fmod([5, 3], [2, 2.]) + array([ 1., 1.]) + >>> a = np.arange(-3, 3).reshape(3, 2) + >>> a + array([[-3, -2], + [-1, 0], + [ 1, 2]]) + >>> np.fmod(a, [2,2]) + array([[-1, 0], + [-1, 0], + [ 1, 0]]) + + """) + +add_newdoc('numpy.core.umath', 'greater', + """ + Return the truth value of (x1 > x2) element-wise. + + Parameters + ---------- + x1, x2 : array_like + Input arrays. If ``x1.shape != x2.shape``, they must be + broadcastable to a common shape (which may be the shape of one or + the other). + $PARAMS + + Returns + ------- + out : ndarray or scalar + Output array, element-wise comparison of `x1` and `x2`. + Typically of type bool, unless ``dtype=object`` is passed. + $OUT_SCALAR_2 + + + See Also + -------- + greater_equal, less, less_equal, equal, not_equal + + Examples + -------- + >>> np.greater([4,2],[2,2]) + array([ True, False]) + + If the inputs are ndarrays, then np.greater is equivalent to '>'. + + >>> a = np.array([4,2]) + >>> b = np.array([2,2]) + >>> a > b + array([ True, False]) + + """) + +add_newdoc('numpy.core.umath', 'greater_equal', + """ + Return the truth value of (x1 >= x2) element-wise. + + Parameters + ---------- + x1, x2 : array_like + Input arrays. If ``x1.shape != x2.shape``, they must be + broadcastable to a common shape (which may be the shape of one or + the other). + $PARAMS + + Returns + ------- + out : bool or ndarray of bool + Output array, element-wise comparison of `x1` and `x2`. + Typically of type bool, unless ``dtype=object`` is passed. + $OUT_SCALAR_2 + + See Also + -------- + greater, less, less_equal, equal, not_equal + + Examples + -------- + >>> np.greater_equal([4, 2, 1], [2, 2, 2]) + array([ True, True, False]) + + """) + +add_newdoc('numpy.core.umath', 'hypot', + """ + Given the "legs" of a right triangle, return its hypotenuse. + + Equivalent to ``sqrt(x1**2 + x2**2)``, element-wise. If `x1` or + `x2` is scalar_like (i.e., unambiguously cast-able to a scalar type), + it is broadcast for use with each element of the other argument. + (See Examples) + + Parameters + ---------- + x1, x2 : array_like + Leg of the triangle(s). + $PARAMS + + Returns + ------- + z : ndarray + The hypotenuse of the triangle(s). + $OUT_SCALAR_2 + + Examples + -------- + >>> np.hypot(3*np.ones((3, 3)), 4*np.ones((3, 3))) + array([[ 5., 5., 5.], + [ 5., 5., 5.], + [ 5., 5., 5.]]) + + Example showing broadcast of scalar_like argument: + + >>> np.hypot(3*np.ones((3, 3)), [4]) + array([[ 5., 5., 5.], + [ 5., 5., 5.], + [ 5., 5., 5.]]) + + """) + +add_newdoc('numpy.core.umath', 'invert', + """ + Compute bit-wise inversion, or bit-wise NOT, element-wise. + + Computes the bit-wise NOT of the underlying binary representation of + the integers in the input arrays. This ufunc implements the C/Python + operator ``~``. + + For signed integer inputs, the two's complement is returned. In a + two's-complement system negative numbers are represented by the two's + complement of the absolute value. This is the most common method of + representing signed integers on computers [1]_. A N-bit + two's-complement system can represent every integer in the range + :math:`-2^{N-1}` to :math:`+2^{N-1}-1`. + + Parameters + ---------- + x : array_like + Only integer and boolean types are handled. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Result. + $OUT_SCALAR_1 + + See Also + -------- + bitwise_and, bitwise_or, bitwise_xor + logical_not + binary_repr : + Return the binary representation of the input number as a string. + + Notes + ----- + `bitwise_not` is an alias for `invert`: + + >>> np.bitwise_not is np.invert + True + + References + ---------- + .. [1] Wikipedia, "Two's complement", + https://en.wikipedia.org/wiki/Two's_complement + + Examples + -------- + We've seen that 13 is represented by ``00001101``. + The invert or bit-wise NOT of 13 is then: + + >>> np.invert(np.array([13], dtype=uint8)) + array([242], dtype=uint8) + >>> np.binary_repr(x, width=8) + '00001101' + >>> np.binary_repr(242, width=8) + '11110010' + + The result depends on the bit-width: + + >>> np.invert(np.array([13], dtype=uint16)) + array([65522], dtype=uint16) + >>> np.binary_repr(x, width=16) + '0000000000001101' + >>> np.binary_repr(65522, width=16) + '1111111111110010' + + When using signed integer types the result is the two's complement of + the result for the unsigned type: + + >>> np.invert(np.array([13], dtype=int8)) + array([-14], dtype=int8) + >>> np.binary_repr(-14, width=8) + '11110010' + + Booleans are accepted as well: + + >>> np.invert(array([True, False])) + array([False, True]) + + """) + +add_newdoc('numpy.core.umath', 'isfinite', + """ + Test element-wise for finiteness (not infinity or not Not a Number). + + The result is returned as a boolean array. + + Parameters + ---------- + x : array_like + Input values. + $PARAMS + + Returns + ------- + y : ndarray, bool + True where ``x`` is not positive infinity, negative infinity, + or NaN; false otherwise. + $OUT_SCALAR_1 + + See Also + -------- + isinf, isneginf, isposinf, isnan + + Notes + ----- + Not a Number, positive infinity and negative infinity are considered + to be non-finite. + + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + Also that positive infinity is not equivalent to negative infinity. But + infinity is equivalent to positive infinity. Errors result if the + second argument is also supplied when `x` is a scalar input, or if + first and second arguments have different shapes. + + Examples + -------- + >>> np.isfinite(1) + True + >>> np.isfinite(0) + True + >>> np.isfinite(np.nan) + False + >>> np.isfinite(np.inf) + False + >>> np.isfinite(np.NINF) + False + >>> np.isfinite([np.log(-1.),1.,np.log(0)]) + array([False, True, False]) + + >>> x = np.array([-np.inf, 0., np.inf]) + >>> y = np.array([2, 2, 2]) + >>> np.isfinite(x, y) + array([0, 1, 0]) + >>> y + array([0, 1, 0]) + + """) + +add_newdoc('numpy.core.umath', 'isinf', + """ + Test element-wise for positive or negative infinity. + + Returns a boolean array of the same shape as `x`, True where ``x == + +/-inf``, otherwise False. + + Parameters + ---------- + x : array_like + Input values + $PARAMS + + Returns + ------- + y : bool (scalar) or boolean ndarray + True where ``x`` is positive or negative infinity, false otherwise. + $OUT_SCALAR_1 + + See Also + -------- + isneginf, isposinf, isnan, isfinite + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). + + Errors result if the second argument is supplied when the first + argument is a scalar, or if the first and second arguments have + different shapes. + + Examples + -------- + >>> np.isinf(np.inf) + True + >>> np.isinf(np.nan) + False + >>> np.isinf(np.NINF) + True + >>> np.isinf([np.inf, -np.inf, 1.0, np.nan]) + array([ True, True, False, False]) + + >>> x = np.array([-np.inf, 0., np.inf]) + >>> y = np.array([2, 2, 2]) + >>> np.isinf(x, y) + array([1, 0, 1]) + >>> y + array([1, 0, 1]) + + """) + +add_newdoc('numpy.core.umath', 'isnan', + """ + Test element-wise for NaN and return result as a boolean array. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + y : ndarray or bool + True where ``x`` is NaN, false otherwise. + $OUT_SCALAR_1 + + See Also + -------- + isinf, isneginf, isposinf, isfinite, isnat + + Notes + ----- + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + + Examples + -------- + >>> np.isnan(np.nan) + True + >>> np.isnan(np.inf) + False + >>> np.isnan([np.log(-1.),1.,np.log(0)]) + array([ True, False, False]) + + """) + +add_newdoc('numpy.core.umath', 'isnat', + """ + Test element-wise for NaT (not a time) and return result as a boolean array. + + .. versionadded:: 1.13.0 + + Parameters + ---------- + x : array_like + Input array with datetime or timedelta data type. + $PARAMS + + Returns + ------- + y : ndarray or bool + True where ``x`` is NaT, false otherwise. + $OUT_SCALAR_1 + + See Also + -------- + isnan, isinf, isneginf, isposinf, isfinite + + Examples + -------- + >>> np.isnat(np.datetime64("NaT")) + True + >>> np.isnat(np.datetime64("2016-01-01")) + False + >>> np.isnat(np.array(["NaT", "2016-01-01"], dtype="datetime64[ns]")) + array([ True, False]) + + """) + +add_newdoc('numpy.core.umath', 'left_shift', + """ + Shift the bits of an integer to the left. + + Bits are shifted to the left by appending `x2` 0s at the right of `x1`. + Since the internal representation of numbers is in binary format, this + operation is equivalent to multiplying `x1` by ``2**x2``. + + Parameters + ---------- + x1 : array_like of integer type + Input values. + x2 : array_like of integer type + Number of zeros to append to `x1`. Has to be non-negative. + $PARAMS + + Returns + ------- + out : array of integer type + Return `x1` with bits shifted `x2` times to the left. + $OUT_SCALAR_2 + + See Also + -------- + right_shift : Shift the bits of an integer to the right. + binary_repr : Return the binary representation of the input number + as a string. + + Examples + -------- + >>> np.binary_repr(5) + '101' + >>> np.left_shift(5, 2) + 20 + >>> np.binary_repr(20) + '10100' + + >>> np.left_shift(5, [1,2,3]) + array([10, 20, 40]) + + """) + +add_newdoc('numpy.core.umath', 'less', + """ + Return the truth value of (x1 < x2) element-wise. + + Parameters + ---------- + x1, x2 : array_like + Input arrays. If ``x1.shape != x2.shape``, they must be + broadcastable to a common shape (which may be the shape of one or + the other). + $PARAMS + + Returns + ------- + out : ndarray or scalar + Output array, element-wise comparison of `x1` and `x2`. + Typically of type bool, unless ``dtype=object`` is passed. + $OUT_SCALAR_2 + + See Also + -------- + greater, less_equal, greater_equal, equal, not_equal + + Examples + -------- + >>> np.less([1, 2], [2, 2]) + array([ True, False]) + + """) + +add_newdoc('numpy.core.umath', 'less_equal', + """ + Return the truth value of (x1 =< x2) element-wise. + + Parameters + ---------- + x1, x2 : array_like + Input arrays. If ``x1.shape != x2.shape``, they must be + broadcastable to a common shape (which may be the shape of one or + the other). + $PARAMS + + Returns + ------- + out : ndarray or scalar + Output array, element-wise comparison of `x1` and `x2`. + Typically of type bool, unless ``dtype=object`` is passed. + $OUT_SCALAR_2 + + See Also + -------- + greater, less, greater_equal, equal, not_equal + + Examples + -------- + >>> np.less_equal([4, 2, 1], [2, 2, 2]) + array([False, True, True]) + + """) + +add_newdoc('numpy.core.umath', 'log', + """ + Natural logarithm, element-wise. + + The natural logarithm `log` is the inverse of the exponential function, + so that `log(exp(x)) = x`. The natural logarithm is logarithm in base + `e`. + + Parameters + ---------- + x : array_like + Input value. + $PARAMS + + Returns + ------- + y : ndarray + The natural logarithm of `x`, element-wise. + $OUT_SCALAR_1 + + See Also + -------- + log10, log2, log1p, emath.log + + Notes + ----- + Logarithm is a multivalued function: for each `x` there is an infinite + number of `z` such that `exp(z) = x`. The convention is to return the + `z` whose imaginary part lies in `[-pi, pi]`. + + For real-valued input data types, `log` always returns real output. For + each value that cannot be expressed as a real number or infinity, it + yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, `log` is a complex analytical function that + has a branch cut `[-inf, 0]` and is continuous from above on it. `log` + handles the floating-point negative zero as an infinitesimal negative + number, conforming to the C99 standard. + + References + ---------- + .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions", + 10th printing, 1964, pp. 67. http://www.math.sfu.ca/~cbm/aands/ + .. [2] Wikipedia, "Logarithm". https://en.wikipedia.org/wiki/Logarithm + + Examples + -------- + >>> np.log([1, np.e, np.e**2, 0]) + array([ 0., 1., 2., -Inf]) + + """) + +add_newdoc('numpy.core.umath', 'log10', + """ + Return the base 10 logarithm of the input array, element-wise. + + Parameters + ---------- + x : array_like + Input values. + $PARAMS + + Returns + ------- + y : ndarray + The logarithm to the base 10 of `x`, element-wise. NaNs are + returned where x is negative. + $OUT_SCALAR_1 + + See Also + -------- + emath.log10 + + Notes + ----- + Logarithm is a multivalued function: for each `x` there is an infinite + number of `z` such that `10**z = x`. The convention is to return the + `z` whose imaginary part lies in `[-pi, pi]`. + + For real-valued input data types, `log10` always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, `log10` is a complex analytical function that + has a branch cut `[-inf, 0]` and is continuous from above on it. + `log10` handles the floating-point negative zero as an infinitesimal + negative number, conforming to the C99 standard. + + References + ---------- + .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions", + 10th printing, 1964, pp. 67. http://www.math.sfu.ca/~cbm/aands/ + .. [2] Wikipedia, "Logarithm". https://en.wikipedia.org/wiki/Logarithm + + Examples + -------- + >>> np.log10([1e-15, -3.]) + array([-15., NaN]) + + """) + +add_newdoc('numpy.core.umath', 'log2', + """ + Base-2 logarithm of `x`. + + Parameters + ---------- + x : array_like + Input values. + $PARAMS + + Returns + ------- + y : ndarray + Base-2 logarithm of `x`. + $OUT_SCALAR_1 + + See Also + -------- + log, log10, log1p, emath.log2 + + Notes + ----- + .. versionadded:: 1.3.0 + + Logarithm is a multivalued function: for each `x` there is an infinite + number of `z` such that `2**z = x`. The convention is to return the `z` + whose imaginary part lies in `[-pi, pi]`. + + For real-valued input data types, `log2` always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, `log2` is a complex analytical function that + has a branch cut `[-inf, 0]` and is continuous from above on it. `log2` + handles the floating-point negative zero as an infinitesimal negative + number, conforming to the C99 standard. + + Examples + -------- + >>> x = np.array([0, 1, 2, 2**4]) + >>> np.log2(x) + array([-Inf, 0., 1., 4.]) + + >>> xi = np.array([0+1.j, 1, 2+0.j, 4.j]) + >>> np.log2(xi) + array([ 0.+2.26618007j, 0.+0.j , 1.+0.j , 2.+2.26618007j]) + + """) + +add_newdoc('numpy.core.umath', 'logaddexp', + """ + Logarithm of the sum of exponentiations of the inputs. + + Calculates ``log(exp(x1) + exp(x2))``. This function is useful in + statistics where the calculated probabilities of events may be so small + as to exceed the range of normal floating point numbers. In such cases + the logarithm of the calculated probability is stored. This function + allows adding probabilities stored in such a fashion. + + Parameters + ---------- + x1, x2 : array_like + Input values. + $PARAMS + + Returns + ------- + result : ndarray + Logarithm of ``exp(x1) + exp(x2)``. + $OUT_SCALAR_2 + + See Also + -------- + logaddexp2: Logarithm of the sum of exponentiations of inputs in base 2. + + Notes + ----- + .. versionadded:: 1.3.0 + + Examples + -------- + >>> prob1 = np.log(1e-50) + >>> prob2 = np.log(2.5e-50) + >>> prob12 = np.logaddexp(prob1, prob2) + >>> prob12 + -113.87649168120691 + >>> np.exp(prob12) + 3.5000000000000057e-50 + + """) + +add_newdoc('numpy.core.umath', 'logaddexp2', + """ + Logarithm of the sum of exponentiations of the inputs in base-2. + + Calculates ``log2(2**x1 + 2**x2)``. This function is useful in machine + learning when the calculated probabilities of events may be so small as + to exceed the range of normal floating point numbers. In such cases + the base-2 logarithm of the calculated probability can be used instead. + This function allows adding probabilities stored in such a fashion. + + Parameters + ---------- + x1, x2 : array_like + Input values. + $PARAMS + + Returns + ------- + result : ndarray + Base-2 logarithm of ``2**x1 + 2**x2``. + $OUT_SCALAR_2 + + See Also + -------- + logaddexp: Logarithm of the sum of exponentiations of the inputs. + + Notes + ----- + .. versionadded:: 1.3.0 + + Examples + -------- + >>> prob1 = np.log2(1e-50) + >>> prob2 = np.log2(2.5e-50) + >>> prob12 = np.logaddexp2(prob1, prob2) + >>> prob1, prob2, prob12 + (-166.09640474436813, -164.77447664948076, -164.28904982231052) + >>> 2**prob12 + 3.4999999999999914e-50 + + """) + +add_newdoc('numpy.core.umath', 'log1p', + """ + Return the natural logarithm of one plus the input array, element-wise. + + Calculates ``log(1 + x)``. + + Parameters + ---------- + x : array_like + Input values. + $PARAMS + + Returns + ------- + y : ndarray + Natural logarithm of `1 + x`, element-wise. + $OUT_SCALAR_1 + + See Also + -------- + expm1 : ``exp(x) - 1``, the inverse of `log1p`. + + Notes + ----- + For real-valued input, `log1p` is accurate also for `x` so small + that `1 + x == 1` in floating-point accuracy. + + Logarithm is a multivalued function: for each `x` there is an infinite + number of `z` such that `exp(z) = 1 + x`. The convention is to return + the `z` whose imaginary part lies in `[-pi, pi]`. + + For real-valued input data types, `log1p` always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, `log1p` is a complex analytical function that + has a branch cut `[-inf, -1]` and is continuous from above on it. + `log1p` handles the floating-point negative zero as an infinitesimal + negative number, conforming to the C99 standard. + + References + ---------- + .. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions", + 10th printing, 1964, pp. 67. http://www.math.sfu.ca/~cbm/aands/ + .. [2] Wikipedia, "Logarithm". https://en.wikipedia.org/wiki/Logarithm + + Examples + -------- + >>> np.log1p(1e-99) + 1e-99 + >>> np.log(1 + 1e-99) + 0.0 + + """) + +add_newdoc('numpy.core.umath', 'logical_and', + """ + Compute the truth value of x1 AND x2 element-wise. + + Parameters + ---------- + x1, x2 : array_like + Input arrays. `x1` and `x2` must be of the same shape. + $PARAMS + + Returns + ------- + y : ndarray or bool + Boolean result with the same shape as `x1` and `x2` of the logical + AND operation on corresponding elements of `x1` and `x2`. + $OUT_SCALAR_2 + + See Also + -------- + logical_or, logical_not, logical_xor + bitwise_and + + Examples + -------- + >>> np.logical_and(True, False) + False + >>> np.logical_and([True, False], [False, False]) + array([False, False]) + + >>> x = np.arange(5) + >>> np.logical_and(x>1, x<4) + array([False, False, True, True, False]) + + """) + +add_newdoc('numpy.core.umath', 'logical_not', + """ + Compute the truth value of NOT x element-wise. + + Parameters + ---------- + x : array_like + Logical NOT is applied to the elements of `x`. + $PARAMS + + Returns + ------- + y : bool or ndarray of bool + Boolean result with the same shape as `x` of the NOT operation + on elements of `x`. + $OUT_SCALAR_1 + + See Also + -------- + logical_and, logical_or, logical_xor + + Examples + -------- + >>> np.logical_not(3) + False + >>> np.logical_not([True, False, 0, 1]) + array([False, True, True, False]) + + >>> x = np.arange(5) + >>> np.logical_not(x<3) + array([False, False, False, True, True]) + + """) + +add_newdoc('numpy.core.umath', 'logical_or', + """ + Compute the truth value of x1 OR x2 element-wise. + + Parameters + ---------- + x1, x2 : array_like + Logical OR is applied to the elements of `x1` and `x2`. + They have to be of the same shape. + $PARAMS + + Returns + ------- + y : ndarray or bool + Boolean result with the same shape as `x1` and `x2` of the logical + OR operation on elements of `x1` and `x2`. + $OUT_SCALAR_2 + + See Also + -------- + logical_and, logical_not, logical_xor + bitwise_or + + Examples + -------- + >>> np.logical_or(True, False) + True + >>> np.logical_or([True, False], [False, False]) + array([ True, False]) + + >>> x = np.arange(5) + >>> np.logical_or(x < 1, x > 3) + array([ True, False, False, False, True]) + + """) + +add_newdoc('numpy.core.umath', 'logical_xor', + """ + Compute the truth value of x1 XOR x2, element-wise. + + Parameters + ---------- + x1, x2 : array_like + Logical XOR is applied to the elements of `x1` and `x2`. They must + be broadcastable to the same shape. + $PARAMS + + Returns + ------- + y : bool or ndarray of bool + Boolean result of the logical XOR operation applied to the elements + of `x1` and `x2`; the shape is determined by whether or not + broadcasting of one or both arrays was required. + $OUT_SCALAR_2 + + See Also + -------- + logical_and, logical_or, logical_not, bitwise_xor + + Examples + -------- + >>> np.logical_xor(True, False) + True + >>> np.logical_xor([True, True, False, False], [True, False, True, False]) + array([False, True, True, False]) + + >>> x = np.arange(5) + >>> np.logical_xor(x < 1, x > 3) + array([ True, False, False, False, True]) + + Simple example showing support of broadcasting + + >>> np.logical_xor(0, np.eye(2)) + array([[ True, False], + [False, True]]) + + """) + +add_newdoc('numpy.core.umath', 'maximum', + """ + Element-wise maximum of array elements. + + Compare two arrays and returns a new array containing the element-wise + maxima. If one of the elements being compared is a NaN, then that + element is returned. If both elements are NaNs then the first is + returned. The latter distinction is important for complex NaNs, which + are defined as at least one of the real or imaginary parts being a NaN. + The net effect is that NaNs are propagated. + + Parameters + ---------- + x1, x2 : array_like + The arrays holding the elements to be compared. They must have + the same shape, or shapes that can be broadcast to a single shape. + $PARAMS + + Returns + ------- + y : ndarray or scalar + The maximum of `x1` and `x2`, element-wise. + $OUT_SCALAR_2 + + See Also + -------- + minimum : + Element-wise minimum of two arrays, propagates NaNs. + fmax : + Element-wise maximum of two arrays, ignores NaNs. + amax : + The maximum value of an array along a given axis, propagates NaNs. + nanmax : + The maximum value of an array along a given axis, ignores NaNs. + + fmin, amin, nanmin + + Notes + ----- + The maximum is equivalent to ``np.where(x1 >= x2, x1, x2)`` when + neither x1 nor x2 are nans, but it is faster and does proper + broadcasting. + + Examples + -------- + >>> np.maximum([2, 3, 4], [1, 5, 2]) + array([2, 5, 4]) + + >>> np.maximum(np.eye(2), [0.5, 2]) # broadcasting + array([[ 1. , 2. ], + [ 0.5, 2. ]]) + + >>> np.maximum([np.nan, 0, np.nan], [0, np.nan, np.nan]) + array([ NaN, NaN, NaN]) + >>> np.maximum(np.Inf, 1) + inf + + """) + +add_newdoc('numpy.core.umath', 'minimum', + """ + Element-wise minimum of array elements. + + Compare two arrays and returns a new array containing the element-wise + minima. If one of the elements being compared is a NaN, then that + element is returned. If both elements are NaNs then the first is + returned. The latter distinction is important for complex NaNs, which + are defined as at least one of the real or imaginary parts being a NaN. + The net effect is that NaNs are propagated. + + Parameters + ---------- + x1, x2 : array_like + The arrays holding the elements to be compared. They must have + the same shape, or shapes that can be broadcast to a single shape. + $PARAMS + + Returns + ------- + y : ndarray or scalar + The minimum of `x1` and `x2`, element-wise. + $OUT_SCALAR_2 + + See Also + -------- + maximum : + Element-wise maximum of two arrays, propagates NaNs. + fmin : + Element-wise minimum of two arrays, ignores NaNs. + amin : + The minimum value of an array along a given axis, propagates NaNs. + nanmin : + The minimum value of an array along a given axis, ignores NaNs. + + fmax, amax, nanmax + + Notes + ----- + The minimum is equivalent to ``np.where(x1 <= x2, x1, x2)`` when + neither x1 nor x2 are NaNs, but it is faster and does proper + broadcasting. + + Examples + -------- + >>> np.minimum([2, 3, 4], [1, 5, 2]) + array([1, 3, 2]) + + >>> np.minimum(np.eye(2), [0.5, 2]) # broadcasting + array([[ 0.5, 0. ], + [ 0. , 1. ]]) + + >>> np.minimum([np.nan, 0, np.nan],[0, np.nan, np.nan]) + array([ NaN, NaN, NaN]) + >>> np.minimum(-np.Inf, 1) + -inf + + """) + +add_newdoc('numpy.core.umath', 'fmax', + """ + Element-wise maximum of array elements. + + Compare two arrays and returns a new array containing the element-wise + maxima. If one of the elements being compared is a NaN, then the + non-nan element is returned. If both elements are NaNs then the first + is returned. The latter distinction is important for complex NaNs, + which are defined as at least one of the real or imaginary parts being + a NaN. The net effect is that NaNs are ignored when possible. + + Parameters + ---------- + x1, x2 : array_like + The arrays holding the elements to be compared. They must have + the same shape. + $PARAMS + + Returns + ------- + y : ndarray or scalar + The maximum of `x1` and `x2`, element-wise. + $OUT_SCALAR_2 + + See Also + -------- + fmin : + Element-wise minimum of two arrays, ignores NaNs. + maximum : + Element-wise maximum of two arrays, propagates NaNs. + amax : + The maximum value of an array along a given axis, propagates NaNs. + nanmax : + The maximum value of an array along a given axis, ignores NaNs. + + minimum, amin, nanmin + + Notes + ----- + .. versionadded:: 1.3.0 + + The fmax is equivalent to ``np.where(x1 >= x2, x1, x2)`` when neither + x1 nor x2 are NaNs, but it is faster and does proper broadcasting. + + Examples + -------- + >>> np.fmax([2, 3, 4], [1, 5, 2]) + array([ 2., 5., 4.]) + + >>> np.fmax(np.eye(2), [0.5, 2]) + array([[ 1. , 2. ], + [ 0.5, 2. ]]) + + >>> np.fmax([np.nan, 0, np.nan],[0, np.nan, np.nan]) + array([ 0., 0., NaN]) + + """) + +add_newdoc('numpy.core.umath', 'fmin', + """ + Element-wise minimum of array elements. + + Compare two arrays and returns a new array containing the element-wise + minima. If one of the elements being compared is a NaN, then the + non-nan element is returned. If both elements are NaNs then the first + is returned. The latter distinction is important for complex NaNs, + which are defined as at least one of the real or imaginary parts being + a NaN. The net effect is that NaNs are ignored when possible. + + Parameters + ---------- + x1, x2 : array_like + The arrays holding the elements to be compared. They must have + the same shape. + $PARAMS + + Returns + ------- + y : ndarray or scalar + The minimum of `x1` and `x2`, element-wise. + $OUT_SCALAR_2 + + See Also + -------- + fmax : + Element-wise maximum of two arrays, ignores NaNs. + minimum : + Element-wise minimum of two arrays, propagates NaNs. + amin : + The minimum value of an array along a given axis, propagates NaNs. + nanmin : + The minimum value of an array along a given axis, ignores NaNs. + + maximum, amax, nanmax + + Notes + ----- + .. versionadded:: 1.3.0 + + The fmin is equivalent to ``np.where(x1 <= x2, x1, x2)`` when neither + x1 nor x2 are NaNs, but it is faster and does proper broadcasting. + + Examples + -------- + >>> np.fmin([2, 3, 4], [1, 5, 2]) + array([1, 3, 2]) + + >>> np.fmin(np.eye(2), [0.5, 2]) + array([[ 0.5, 0. ], + [ 0. , 1. ]]) + + >>> np.fmin([np.nan, 0, np.nan],[0, np.nan, np.nan]) + array([ 0., 0., NaN]) + + """) + +add_newdoc('numpy.core.umath', 'matmul', + """ + Matrix product of two arrays. + + Parameters + ---------- + x1, x2 : array_like + Input arrays, scalars not allowed. + out : ndarray, optional + A location into which the result is stored. If provided, it must have + a shape that matches the signature `(n,k),(k,m)->(n,m)`. If not + provided or `None`, a freshly-allocated array is returned. + **kwargs + For other keyword-only arguments, see the + :ref:`ufunc docs <ufuncs.kwargs>`. + + ..versionadded:: 1.16 + Now handles ufunc kwargs + + Returns + ------- + y : ndarray + The matrix product of the inputs. + This is a scalar only when both x1, x2 are 1-d vectors. + + Raises + ------ + ValueError + If the last dimension of `a` is not the same size as + the second-to-last dimension of `b`. + + If a scalar value is passed in. + + See Also + -------- + vdot : Complex-conjugating dot product. + tensordot : Sum products over arbitrary axes. + einsum : Einstein summation convention. + dot : alternative matrix product with different broadcasting rules. + + Notes + ----- + + The behavior depends on the arguments in the following way. + + - If both arguments are 2-D they are multiplied like conventional + matrices. + - If either argument is N-D, N > 2, it is treated as a stack of + matrices residing in the last two indexes and broadcast accordingly. + - If the first argument is 1-D, it is promoted to a matrix by + prepending a 1 to its dimensions. After matrix multiplication + the prepended 1 is removed. + - If the second argument is 1-D, it is promoted to a matrix by + appending a 1 to its dimensions. After matrix multiplication + the appended 1 is removed. + + ``matmul`` differs from ``dot`` in two important ways: + + - Multiplication by scalars is not allowed, use ``*`` instead. + - Stacks of matrices are broadcast together as if the matrices + were elements, respecting the signature ``(n,k),(k,m)->(n,m)``: + + >>> a = np.ones([9, 5, 7, 4]) + >>> c = np.ones([9, 5, 4, 3]) + >>> np.dot(a, c).shape + (9, 5, 7, 9, 5, 3) + >>> np.matmul(a, c).shape + (9, 5, 7, 3) + >>> # n is 7, k is 4, m is 3 + + The matmul function implements the semantics of the `@` operator introduced + in Python 3.5 following PEP465. + + Examples + -------- + For 2-D arrays it is the matrix product: + + >>> a = np.array([[1, 0], + ... [0, 1]]) + >>> b = np.array([[4, 1], + ... [2, 2]] + >>> np.matmul(a, b) + array([[4, 1], + [2, 2]]) + + For 2-D mixed with 1-D, the result is the usual. + + >>> a = np.array([[1, 0], + ... [0, 1]] + >>> b = np.array([1, 2]) + >>> np.matmul(a, b) + array([1, 2]) + >>> np.matmul(b, a) + array([1, 2]) + + + Broadcasting is conventional for stacks of arrays + + >>> a = np.arange(2 * 2 * 4).reshape((2, 2, 4)) + >>> b = np.arange(2 * 2 * 4).reshape((2, 4, 2)) + >>> np.matmul(a,b).shape + (2, 2, 2) + >>> np.matmul(a, b)[0, 1, 1] + 98 + >>> sum(a[0, 1, :] * b[0 , :, 1]) + 98 + + Vector, vector returns the scalar inner product, but neither argument + is complex-conjugated: + + >>> np.matmul([2j, 3j], [2j, 3j]) + (-13+0j) + + Scalar multiplication raises an error. + + >>> np.matmul([1,2], 3) + Traceback (most recent call last): + ... + ValueError: matmul: Input operand 1 does not have enough dimensions ... + + .. versionadded:: 1.10.0 + """) + +add_newdoc('numpy.core.umath', 'modf', + """ + Return the fractional and integral parts of an array, element-wise. + + The fractional and integral parts are negative if the given number is + negative. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + y1 : ndarray + Fractional part of `x`. + $OUT_SCALAR_1 + y2 : ndarray + Integral part of `x`. + $OUT_SCALAR_1 + + Notes + ----- + For integer input the return values are floats. + + See Also + -------- + divmod : ``divmod(x, 1)`` is equivalent to ``modf`` with the return values + switched, except it always has a positive remainder. + + Examples + -------- + >>> np.modf([0, 3.5]) + (array([ 0. , 0.5]), array([ 0., 3.])) + >>> np.modf(-0.5) + (-0.5, -0) + + """) + +add_newdoc('numpy.core.umath', 'multiply', + """ + Multiply arguments element-wise. + + Parameters + ---------- + x1, x2 : array_like + Input arrays to be multiplied. + $PARAMS + + Returns + ------- + y : ndarray + The product of `x1` and `x2`, element-wise. + $OUT_SCALAR_2 + + Notes + ----- + Equivalent to `x1` * `x2` in terms of array broadcasting. + + Examples + -------- + >>> np.multiply(2.0, 4.0) + 8.0 + + >>> x1 = np.arange(9.0).reshape((3, 3)) + >>> x2 = np.arange(3.0) + >>> np.multiply(x1, x2) + array([[ 0., 1., 4.], + [ 0., 4., 10.], + [ 0., 7., 16.]]) + + """) + +add_newdoc('numpy.core.umath', 'negative', + """ + Numerical negative, element-wise. + + Parameters + ---------- + x : array_like or scalar + Input array. + $PARAMS + + Returns + ------- + y : ndarray or scalar + Returned array or scalar: `y = -x`. + $OUT_SCALAR_1 + + Examples + -------- + >>> np.negative([1.,-1.]) + array([-1., 1.]) + + """) + +add_newdoc('numpy.core.umath', 'positive', + """ + Numerical positive, element-wise. + + .. versionadded:: 1.13.0 + + Parameters + ---------- + x : array_like or scalar + Input array. + + Returns + ------- + y : ndarray or scalar + Returned array or scalar: `y = +x`. + $OUT_SCALAR_1 + + Notes + ----- + Equivalent to `x.copy()`, but only defined for types that support + arithmetic. + + """) + +add_newdoc('numpy.core.umath', 'not_equal', + """ + Return (x1 != x2) element-wise. + + Parameters + ---------- + x1, x2 : array_like + Input arrays. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Output array, element-wise comparison of `x1` and `x2`. + Typically of type bool, unless ``dtype=object`` is passed. + $OUT_SCALAR_2 + + See Also + -------- + equal, greater, greater_equal, less, less_equal + + Examples + -------- + >>> np.not_equal([1.,2.], [1., 3.]) + array([False, True]) + >>> np.not_equal([1, 2], [[1, 3],[1, 4]]) + array([[False, True], + [False, True]]) + + """) + +add_newdoc('numpy.core.umath', '_ones_like', + """ + This function used to be the numpy.ones_like, but now a specific + function for that has been written for consistency with the other + *_like functions. It is only used internally in a limited fashion now. + + See Also + -------- + ones_like + + """) + +add_newdoc('numpy.core.umath', 'power', + """ + First array elements raised to powers from second array, element-wise. + + Raise each base in `x1` to the positionally-corresponding power in + `x2`. `x1` and `x2` must be broadcastable to the same shape. Note that an + integer type raised to a negative integer power will raise a ValueError. + + Parameters + ---------- + x1 : array_like + The bases. + x2 : array_like + The exponents. + $PARAMS + + Returns + ------- + y : ndarray + The bases in `x1` raised to the exponents in `x2`. + $OUT_SCALAR_2 + + See Also + -------- + float_power : power function that promotes integers to float + + Examples + -------- + Cube each element in a list. + + >>> x1 = range(6) + >>> x1 + [0, 1, 2, 3, 4, 5] + >>> np.power(x1, 3) + array([ 0, 1, 8, 27, 64, 125]) + + Raise the bases to different exponents. + + >>> x2 = [1.0, 2.0, 3.0, 3.0, 2.0, 1.0] + >>> np.power(x1, x2) + array([ 0., 1., 8., 27., 16., 5.]) + + The effect of broadcasting. + + >>> x2 = np.array([[1, 2, 3, 3, 2, 1], [1, 2, 3, 3, 2, 1]]) + >>> x2 + array([[1, 2, 3, 3, 2, 1], + [1, 2, 3, 3, 2, 1]]) + >>> np.power(x1, x2) + array([[ 0, 1, 8, 27, 16, 5], + [ 0, 1, 8, 27, 16, 5]]) + + """) + +add_newdoc('numpy.core.umath', 'float_power', + """ + First array elements raised to powers from second array, element-wise. + + Raise each base in `x1` to the positionally-corresponding power in `x2`. + `x1` and `x2` must be broadcastable to the same shape. This differs from + the power function in that integers, float16, and float32 are promoted to + floats with a minimum precision of float64 so that the result is always + inexact. The intent is that the function will return a usable result for + negative powers and seldom overflow for positive powers. + + .. versionadded:: 1.12.0 + + Parameters + ---------- + x1 : array_like + The bases. + x2 : array_like + The exponents. + $PARAMS + + Returns + ------- + y : ndarray + The bases in `x1` raised to the exponents in `x2`. + $OUT_SCALAR_2 + + See Also + -------- + power : power function that preserves type + + Examples + -------- + Cube each element in a list. + + >>> x1 = range(6) + >>> x1 + [0, 1, 2, 3, 4, 5] + >>> np.float_power(x1, 3) + array([ 0., 1., 8., 27., 64., 125.]) + + Raise the bases to different exponents. + + >>> x2 = [1.0, 2.0, 3.0, 3.0, 2.0, 1.0] + >>> np.float_power(x1, x2) + array([ 0., 1., 8., 27., 16., 5.]) + + The effect of broadcasting. + + >>> x2 = np.array([[1, 2, 3, 3, 2, 1], [1, 2, 3, 3, 2, 1]]) + >>> x2 + array([[1, 2, 3, 3, 2, 1], + [1, 2, 3, 3, 2, 1]]) + >>> np.float_power(x1, x2) + array([[ 0., 1., 8., 27., 16., 5.], + [ 0., 1., 8., 27., 16., 5.]]) + + """) + +add_newdoc('numpy.core.umath', 'radians', + """ + Convert angles from degrees to radians. + + Parameters + ---------- + x : array_like + Input array in degrees. + $PARAMS + + Returns + ------- + y : ndarray + The corresponding radian values. + $OUT_SCALAR_1 + + See Also + -------- + deg2rad : equivalent function + + Examples + -------- + Convert a degree array to radians + + >>> deg = np.arange(12.) * 30. + >>> np.radians(deg) + array([ 0. , 0.52359878, 1.04719755, 1.57079633, 2.0943951 , + 2.61799388, 3.14159265, 3.66519143, 4.1887902 , 4.71238898, + 5.23598776, 5.75958653]) + + >>> out = np.zeros((deg.shape)) + >>> ret = np.radians(deg, out) + >>> ret is out + True + + """) + +add_newdoc('numpy.core.umath', 'deg2rad', + """ + Convert angles from degrees to radians. + + Parameters + ---------- + x : array_like + Angles in degrees. + $PARAMS + + Returns + ------- + y : ndarray + The corresponding angle in radians. + $OUT_SCALAR_1 + + See Also + -------- + rad2deg : Convert angles from radians to degrees. + unwrap : Remove large jumps in angle by wrapping. + + Notes + ----- + .. versionadded:: 1.3.0 + + ``deg2rad(x)`` is ``x * pi / 180``. + + Examples + -------- + >>> np.deg2rad(180) + 3.1415926535897931 + + """) + +add_newdoc('numpy.core.umath', 'reciprocal', + """ + Return the reciprocal of the argument, element-wise. + + Calculates ``1/x``. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + y : ndarray + Return array. + $OUT_SCALAR_1 + + Notes + ----- + .. note:: + This function is not designed to work with integers. + + For integer arguments with absolute value larger than 1 the result is + always zero because of the way Python handles integer division. For + integer zero the result is an overflow. + + Examples + -------- + >>> np.reciprocal(2.) + 0.5 + >>> np.reciprocal([1, 2., 3.33]) + array([ 1. , 0.5 , 0.3003003]) + + """) + +add_newdoc('numpy.core.umath', 'remainder', + """ + Return element-wise remainder of division. + + Computes the remainder complementary to the `floor_divide` function. It is + equivalent to the Python modulus operator``x1 % x2`` and has the same sign + as the divisor `x2`. The MATLAB function equivalent to ``np.remainder`` + is ``mod``. + + .. warning:: + + This should not be confused with: + + * Python 3.7's `math.remainder` and C's ``remainder``, which + computes the IEEE remainder, which are the complement to + ``round(x1 / x2)``. + * The MATLAB ``rem`` function and or the C ``%`` operator which is the + complement to ``int(x1 / x2)``. + + Parameters + ---------- + x1 : array_like + Dividend array. + x2 : array_like + Divisor array. + $PARAMS + + Returns + ------- + y : ndarray + The element-wise remainder of the quotient ``floor_divide(x1, x2)``. + $OUT_SCALAR_2 + + See Also + -------- + floor_divide : Equivalent of Python ``//`` operator. + divmod : Simultaneous floor division and remainder. + fmod : Equivalent of the MATLAB ``rem`` function. + divide, floor + + Notes + ----- + Returns 0 when `x2` is 0 and both `x1` and `x2` are (arrays of) + integers. + + Examples + -------- + >>> np.remainder([4, 7], [2, 3]) + array([0, 1]) + >>> np.remainder(np.arange(7), 5) + array([0, 1, 2, 3, 4, 0, 1]) + + """) + +add_newdoc('numpy.core.umath', 'divmod', + """ + Return element-wise quotient and remainder simultaneously. + + .. versionadded:: 1.13.0 + + ``np.divmod(x, y)`` is equivalent to ``(x // y, x % y)``, but faster + because it avoids redundant work. It is used to implement the Python + built-in function ``divmod`` on NumPy arrays. + + Parameters + ---------- + x1 : array_like + Dividend array. + x2 : array_like + Divisor array. + $PARAMS + + Returns + ------- + out1 : ndarray + Element-wise quotient resulting from floor division. + $OUT_SCALAR_2 + out2 : ndarray + Element-wise remainder from floor division. + $OUT_SCALAR_2 + + See Also + -------- + floor_divide : Equivalent to Python's ``//`` operator. + remainder : Equivalent to Python's ``%`` operator. + modf : Equivalent to ``divmod(x, 1)`` for positive ``x`` with the return + values switched. + + Examples + -------- + >>> np.divmod(np.arange(5), 3) + (array([0, 0, 0, 1, 1]), array([0, 1, 2, 0, 1])) + + """) + +add_newdoc('numpy.core.umath', 'right_shift', + """ + Shift the bits of an integer to the right. + + Bits are shifted to the right `x2`. Because the internal + representation of numbers is in binary format, this operation is + equivalent to dividing `x1` by ``2**x2``. + + Parameters + ---------- + x1 : array_like, int + Input values. + x2 : array_like, int + Number of bits to remove at the right of `x1`. + $PARAMS + + Returns + ------- + out : ndarray, int + Return `x1` with bits shifted `x2` times to the right. + $OUT_SCALAR_2 + + See Also + -------- + left_shift : Shift the bits of an integer to the left. + binary_repr : Return the binary representation of the input number + as a string. + + Examples + -------- + >>> np.binary_repr(10) + '1010' + >>> np.right_shift(10, 1) + 5 + >>> np.binary_repr(5) + '101' + + >>> np.right_shift(10, [1,2,3]) + array([5, 2, 1]) + + """) + +add_newdoc('numpy.core.umath', 'rint', + """ + Round elements of the array to the nearest integer. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Output array is same shape and type as `x`. + $OUT_SCALAR_1 + + See Also + -------- + ceil, floor, trunc + + Examples + -------- + >>> a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) + >>> np.rint(a) + array([-2., -2., -0., 0., 2., 2., 2.]) + + """) + +add_newdoc('numpy.core.umath', 'sign', + """ + Returns an element-wise indication of the sign of a number. + + The `sign` function returns ``-1 if x < 0, 0 if x==0, 1 if x > 0``. nan + is returned for nan inputs. + + For complex inputs, the `sign` function returns + ``sign(x.real) + 0j if x.real != 0 else sign(x.imag) + 0j``. + + complex(nan, 0) is returned for complex nan inputs. + + Parameters + ---------- + x : array_like + Input values. + $PARAMS + + Returns + ------- + y : ndarray + The sign of `x`. + $OUT_SCALAR_1 + + Notes + ----- + There is more than one definition of sign in common use for complex + numbers. The definition used here is equivalent to :math:`x/\\sqrt{x*x}` + which is different from a common alternative, :math:`x/|x|`. + + Examples + -------- + >>> np.sign([-5., 4.5]) + array([-1., 1.]) + >>> np.sign(0) + 0 + >>> np.sign(5-2j) + (1+0j) + + """) + +add_newdoc('numpy.core.umath', 'signbit', + """ + Returns element-wise True where signbit is set (less than zero). + + Parameters + ---------- + x : array_like + The input value(s). + $PARAMS + + Returns + ------- + result : ndarray of bool + Output array, or reference to `out` if that was supplied. + $OUT_SCALAR_1 + + Examples + -------- + >>> np.signbit(-1.2) + True + >>> np.signbit(np.array([1, -2.3, 2.1])) + array([False, True, False]) + + """) + +add_newdoc('numpy.core.umath', 'copysign', + """ + Change the sign of x1 to that of x2, element-wise. + + If both arguments are arrays or sequences, they have to be of the same + length. If `x2` is a scalar, its sign will be copied to all elements of + `x1`. + + Parameters + ---------- + x1 : array_like + Values to change the sign of. + x2 : array_like + The sign of `x2` is copied to `x1`. + $PARAMS + + Returns + ------- + out : ndarray or scalar + The values of `x1` with the sign of `x2`. + $OUT_SCALAR_2 + + Examples + -------- + >>> np.copysign(1.3, -1) + -1.3 + >>> 1/np.copysign(0, 1) + inf + >>> 1/np.copysign(0, -1) + -inf + + >>> np.copysign([-1, 0, 1], -1.1) + array([-1., -0., -1.]) + >>> np.copysign([-1, 0, 1], np.arange(3)-1) + array([-1., 0., 1.]) + + """) + +add_newdoc('numpy.core.umath', 'nextafter', + """ + Return the next floating-point value after x1 towards x2, element-wise. + + Parameters + ---------- + x1 : array_like + Values to find the next representable value of. + x2 : array_like + The direction where to look for the next representable value of `x1`. + $PARAMS + + Returns + ------- + out : ndarray or scalar + The next representable values of `x1` in the direction of `x2`. + $OUT_SCALAR_2 + + Examples + -------- + >>> eps = np.finfo(np.float64).eps + >>> np.nextafter(1, 2) == eps + 1 + True + >>> np.nextafter([1, 2], [2, 1]) == [eps + 1, 2 - eps] + array([ True, True]) + + """) + +add_newdoc('numpy.core.umath', 'spacing', + """ + Return the distance between x and the nearest adjacent number. + + Parameters + ---------- + x : array_like + Values to find the spacing of. + $PARAMS + + Returns + ------- + out : ndarray or scalar + The spacing of values of `x`. + $OUT_SCALAR_1 + + Notes + ----- + It can be considered as a generalization of EPS: + ``spacing(np.float64(1)) == np.finfo(np.float64).eps``, and there + should not be any representable number between ``x + spacing(x)`` and + x for any finite x. + + Spacing of +- inf and NaN is NaN. + + Examples + -------- + >>> np.spacing(1) == np.finfo(np.float64).eps + True + + """) + +add_newdoc('numpy.core.umath', 'sin', + """ + Trigonometric sine, element-wise. + + Parameters + ---------- + x : array_like + Angle, in radians (:math:`2 \\pi` rad equals 360 degrees). + $PARAMS + + Returns + ------- + y : array_like + The sine of each element of x. + $OUT_SCALAR_1 + + See Also + -------- + arcsin, sinh, cos + + Notes + ----- + The sine is one of the fundamental functions of trigonometry (the + mathematical study of triangles). Consider a circle of radius 1 + centered on the origin. A ray comes in from the :math:`+x` axis, makes + an angle at the origin (measured counter-clockwise from that axis), and + departs from the origin. The :math:`y` coordinate of the outgoing + ray's intersection with the unit circle is the sine of that angle. It + ranges from -1 for :math:`x=3\\pi / 2` to +1 for :math:`\\pi / 2.` The + function has zeroes where the angle is a multiple of :math:`\\pi`. + Sines of angles between :math:`\\pi` and :math:`2\\pi` are negative. + The numerous properties of the sine and related functions are included + in any standard trigonometry text. + + Examples + -------- + Print sine of one angle: + + >>> np.sin(np.pi/2.) + 1.0 + + Print sines of an array of angles given in degrees: + + >>> np.sin(np.array((0., 30., 45., 60., 90.)) * np.pi / 180. ) + array([ 0. , 0.5 , 0.70710678, 0.8660254 , 1. ]) + + Plot the sine function: + + >>> import matplotlib.pylab as plt + >>> x = np.linspace(-np.pi, np.pi, 201) + >>> plt.plot(x, np.sin(x)) + >>> plt.xlabel('Angle [rad]') + >>> plt.ylabel('sin(x)') + >>> plt.axis('tight') + >>> plt.show() + + """) + +add_newdoc('numpy.core.umath', 'sinh', + """ + Hyperbolic sine, element-wise. + + Equivalent to ``1/2 * (np.exp(x) - np.exp(-x))`` or + ``-1j * np.sin(1j*x)``. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + y : ndarray + The corresponding hyperbolic sine values. + $OUT_SCALAR_1 + + Notes + ----- + If `out` is provided, the function writes the result into it, + and returns a reference to `out`. (See Examples) + + References + ---------- + M. Abramowitz and I. A. Stegun, Handbook of Mathematical Functions. + New York, NY: Dover, 1972, pg. 83. + + Examples + -------- + >>> np.sinh(0) + 0.0 + >>> np.sinh(np.pi*1j/2) + 1j + >>> np.sinh(np.pi*1j) # (exact value is 0) + 1.2246063538223773e-016j + >>> # Discrepancy due to vagaries of floating point arithmetic. + + >>> # Example of providing the optional output parameter + >>> out2 = np.sinh([0.1], out1) + >>> out2 is out1 + True + + >>> # Example of ValueError due to provision of shape mis-matched `out` + >>> np.sinh(np.zeros((3,3)),np.zeros((2,2))) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + ValueError: operands could not be broadcast together with shapes (3,3) (2,2) + + """) + +add_newdoc('numpy.core.umath', 'sqrt', + """ + Return the non-negative square-root of an array, element-wise. + + Parameters + ---------- + x : array_like + The values whose square-roots are required. + $PARAMS + + Returns + ------- + y : ndarray + An array of the same shape as `x`, containing the positive + square-root of each element in `x`. If any element in `x` is + complex, a complex array is returned (and the square-roots of + negative reals are calculated). If all of the elements in `x` + are real, so is `y`, with negative elements returning ``nan``. + If `out` was provided, `y` is a reference to it. + $OUT_SCALAR_1 + + See Also + -------- + lib.scimath.sqrt + A version which returns complex numbers when given negative reals. + + Notes + ----- + *sqrt* has--consistent with common convention--as its branch cut the + real "interval" [`-inf`, 0), and is continuous from above on it. + A branch cut is a curve in the complex plane across which a given + complex function fails to be continuous. + + Examples + -------- + >>> np.sqrt([1,4,9]) + array([ 1., 2., 3.]) + + >>> np.sqrt([4, -1, -3+4J]) + array([ 2.+0.j, 0.+1.j, 1.+2.j]) + + >>> np.sqrt([4, -1, numpy.inf]) + array([ 2., NaN, Inf]) + + """) + +add_newdoc('numpy.core.umath', 'cbrt', + """ + Return the cube-root of an array, element-wise. + + .. versionadded:: 1.10.0 + + Parameters + ---------- + x : array_like + The values whose cube-roots are required. + $PARAMS + + Returns + ------- + y : ndarray + An array of the same shape as `x`, containing the cube + cube-root of each element in `x`. + If `out` was provided, `y` is a reference to it. + $OUT_SCALAR_1 + + + Examples + -------- + >>> np.cbrt([1,8,27]) + array([ 1., 2., 3.]) + + """) + +add_newdoc('numpy.core.umath', 'square', + """ + Return the element-wise square of the input. + + Parameters + ---------- + x : array_like + Input data. + $PARAMS + + Returns + ------- + out : ndarray or scalar + Element-wise `x*x`, of the same shape and dtype as `x`. + $OUT_SCALAR_1 + + See Also + -------- + numpy.linalg.matrix_power + sqrt + power + + Examples + -------- + >>> np.square([-1j, 1]) + array([-1.-0.j, 1.+0.j]) + + """) + +add_newdoc('numpy.core.umath', 'subtract', + """ + Subtract arguments, element-wise. + + Parameters + ---------- + x1, x2 : array_like + The arrays to be subtracted from each other. + $PARAMS + + Returns + ------- + y : ndarray + The difference of `x1` and `x2`, element-wise. + $OUT_SCALAR_2 + + Notes + ----- + Equivalent to ``x1 - x2`` in terms of array broadcasting. + + Examples + -------- + >>> np.subtract(1.0, 4.0) + -3.0 + + >>> x1 = np.arange(9.0).reshape((3, 3)) + >>> x2 = np.arange(3.0) + >>> np.subtract(x1, x2) + array([[ 0., 0., 0.], + [ 3., 3., 3.], + [ 6., 6., 6.]]) + + """) + +add_newdoc('numpy.core.umath', 'tan', + """ + Compute tangent element-wise. + + Equivalent to ``np.sin(x)/np.cos(x)`` element-wise. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + y : ndarray + The corresponding tangent values. + $OUT_SCALAR_1 + + Notes + ----- + If `out` is provided, the function writes the result into it, + and returns a reference to `out`. (See Examples) + + References + ---------- + M. Abramowitz and I. A. Stegun, Handbook of Mathematical Functions. + New York, NY: Dover, 1972. + + Examples + -------- + >>> from math import pi + >>> np.tan(np.array([-pi,pi/2,pi])) + array([ 1.22460635e-16, 1.63317787e+16, -1.22460635e-16]) + >>> + >>> # Example of providing the optional output parameter illustrating + >>> # that what is returned is a reference to said parameter + >>> out2 = np.cos([0.1], out1) + >>> out2 is out1 + True + >>> + >>> # Example of ValueError due to provision of shape mis-matched `out` + >>> np.cos(np.zeros((3,3)),np.zeros((2,2))) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + ValueError: operands could not be broadcast together with shapes (3,3) (2,2) + + """) + +add_newdoc('numpy.core.umath', 'tanh', + """ + Compute hyperbolic tangent element-wise. + + Equivalent to ``np.sinh(x)/np.cosh(x)`` or ``-1j * np.tan(1j*x)``. + + Parameters + ---------- + x : array_like + Input array. + $PARAMS + + Returns + ------- + y : ndarray + The corresponding hyperbolic tangent values. + $OUT_SCALAR_1 + + Notes + ----- + If `out` is provided, the function writes the result into it, + and returns a reference to `out`. (See Examples) + + References + ---------- + .. [1] M. Abramowitz and I. A. Stegun, Handbook of Mathematical Functions. + New York, NY: Dover, 1972, pg. 83. + http://www.math.sfu.ca/~cbm/aands/ + + .. [2] Wikipedia, "Hyperbolic function", + https://en.wikipedia.org/wiki/Hyperbolic_function + + Examples + -------- + >>> np.tanh((0, np.pi*1j, np.pi*1j/2)) + array([ 0. +0.00000000e+00j, 0. -1.22460635e-16j, 0. +1.63317787e+16j]) + + >>> # Example of providing the optional output parameter illustrating + >>> # that what is returned is a reference to said parameter + >>> out2 = np.tanh([0.1], out1) + >>> out2 is out1 + True + + >>> # Example of ValueError due to provision of shape mis-matched `out` + >>> np.tanh(np.zeros((3,3)),np.zeros((2,2))) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + ValueError: operands could not be broadcast together with shapes (3,3) (2,2) + + """) + +add_newdoc('numpy.core.umath', 'true_divide', + """ + Returns a true division of the inputs, element-wise. + + Instead of the Python traditional 'floor division', this returns a true + division. True division adjusts the output type to present the best + answer, regardless of input types. + + Parameters + ---------- + x1 : array_like + Dividend array. + x2 : array_like + Divisor array. + $PARAMS + + Returns + ------- + out : ndarray or scalar + $OUT_SCALAR_2 + + Notes + ----- + The floor division operator ``//`` was added in Python 2.2 making + ``//`` and ``/`` equivalent operators. The default floor division + operation of ``/`` can be replaced by true division with ``from + __future__ import division``. + + In Python 3.0, ``//`` is the floor division operator and ``/`` the + true division operator. The ``true_divide(x1, x2)`` function is + equivalent to true division in Python. + + Examples + -------- + >>> x = np.arange(5) + >>> np.true_divide(x, 4) + array([ 0. , 0.25, 0.5 , 0.75, 1. ]) + + >>> x/4 + array([0, 0, 0, 0, 1]) + >>> x//4 + array([0, 0, 0, 0, 1]) + + >>> from __future__ import division + >>> x/4 + array([ 0. , 0.25, 0.5 , 0.75, 1. ]) + >>> x//4 + array([0, 0, 0, 0, 1]) + + """) + +add_newdoc('numpy.core.umath', 'frexp', + """ + Decompose the elements of x into mantissa and twos exponent. + + Returns (`mantissa`, `exponent`), where `x = mantissa * 2**exponent``. + The mantissa is lies in the open interval(-1, 1), while the twos + exponent is a signed integer. + + Parameters + ---------- + x : array_like + Array of numbers to be decomposed. + out1 : ndarray, optional + Output array for the mantissa. Must have the same shape as `x`. + out2 : ndarray, optional + Output array for the exponent. Must have the same shape as `x`. + $PARAMS + + Returns + ------- + mantissa : ndarray + Floating values between -1 and 1. + $OUT_SCALAR_1 + exponent : ndarray + Integer exponents of 2. + $OUT_SCALAR_1 + + See Also + -------- + ldexp : Compute ``y = x1 * 2**x2``, the inverse of `frexp`. + + Notes + ----- + Complex dtypes are not supported, they will raise a TypeError. + + Examples + -------- + >>> x = np.arange(9) + >>> y1, y2 = np.frexp(x) + >>> y1 + array([ 0. , 0.5 , 0.5 , 0.75 , 0.5 , 0.625, 0.75 , 0.875, + 0.5 ]) + >>> y2 + array([0, 1, 2, 2, 3, 3, 3, 3, 4]) + >>> y1 * 2**y2 + array([ 0., 1., 2., 3., 4., 5., 6., 7., 8.]) + + """) + +add_newdoc('numpy.core.umath', 'ldexp', + """ + Returns x1 * 2**x2, element-wise. + + The mantissas `x1` and twos exponents `x2` are used to construct + floating point numbers ``x1 * 2**x2``. + + Parameters + ---------- + x1 : array_like + Array of multipliers. + x2 : array_like, int + Array of twos exponents. + $PARAMS + + Returns + ------- + y : ndarray or scalar + The result of ``x1 * 2**x2``. + $OUT_SCALAR_2 + + See Also + -------- + frexp : Return (y1, y2) from ``x = y1 * 2**y2``, inverse to `ldexp`. + + Notes + ----- + Complex dtypes are not supported, they will raise a TypeError. + + `ldexp` is useful as the inverse of `frexp`, if used by itself it is + more clear to simply use the expression ``x1 * 2**x2``. + + Examples + -------- + >>> np.ldexp(5, np.arange(4)) + array([ 5., 10., 20., 40.], dtype=float32) + + >>> x = np.arange(6) + >>> np.ldexp(*np.frexp(x)) + array([ 0., 1., 2., 3., 4., 5.]) + + """) + +add_newdoc('numpy.core.umath', 'gcd', + """ + Returns the greatest common divisor of ``|x1|`` and ``|x2|`` + + Parameters + ---------- + x1, x2 : array_like, int + Arrays of values + + Returns + ------- + y : ndarray or scalar + The greatest common divisor of the absolute value of the inputs + $OUT_SCALAR_2 + + See Also + -------- + lcm : The lowest common multiple + + Examples + -------- + >>> np.gcd(12, 20) + 4 + >>> np.gcd.reduce([15, 25, 35]) + 5 + >>> np.gcd(np.arange(6), 20) + array([20, 1, 2, 1, 4, 5]) + + """) + +add_newdoc('numpy.core.umath', 'lcm', + """ + Returns the lowest common multiple of ``|x1|`` and ``|x2|`` + + Parameters + ---------- + x1, x2 : array_like, int + Arrays of values + + Returns + ------- + y : ndarray or scalar + The lowest common multiple of the absolute value of the inputs + $OUT_SCALAR_2 + + See Also + -------- + gcd : The greatest common divisor + + Examples + -------- + >>> np.lcm(12, 20) + 60 + >>> np.lcm.reduce([3, 12, 20]) + 60 + >>> np.lcm.reduce([40, 12, 20]) + 120 + >>> np.lcm(np.arange(6), 20) + array([ 0, 20, 20, 60, 20, 20]) + + """) diff --git a/contrib/python/numpy/py2/numpy/core/include/numpy/oldnumeric.h b/contrib/python/numpy/py2/numpy/core/include/numpy/oldnumeric.h new file mode 100644 index 0000000000..38530faf04 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/include/numpy/oldnumeric.h @@ -0,0 +1,25 @@ +#include "arrayobject.h" + +#ifndef PYPY_VERSION +#ifndef REFCOUNT +# define REFCOUNT NPY_REFCOUNT +# define MAX_ELSIZE 16 +#endif +#endif + +#define PyArray_UNSIGNED_TYPES +#define PyArray_SBYTE NPY_BYTE +#define PyArray_CopyArray PyArray_CopyInto +#define _PyArray_multiply_list PyArray_MultiplyIntList +#define PyArray_ISSPACESAVER(m) NPY_FALSE +#define PyScalarArray_Check PyArray_CheckScalar + +#define CONTIGUOUS NPY_CONTIGUOUS +#define OWN_DIMENSIONS 0 +#define OWN_STRIDES 0 +#define OWN_DATA NPY_OWNDATA +#define SAVESPACE 0 +#define SAVESPACEBIT 0 + +#undef import_array +#define import_array() { if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import"); } } diff --git a/contrib/python/numpy/py2/numpy/core/lib/npy-pkg-config/mlib.ini b/contrib/python/numpy/py2/numpy/core/lib/npy-pkg-config/mlib.ini new file mode 100644 index 0000000000..5840f5e1bc --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/lib/npy-pkg-config/mlib.ini @@ -0,0 +1,12 @@ +[meta] +Name = mlib +Description = Math library used with this version of numpy +Version = 1.0 + +[default] +Libs=-lm +Cflags= + +[msvc] +Libs=m.lib +Cflags= diff --git a/contrib/python/numpy/py2/numpy/core/lib/npy-pkg-config/npymath.ini b/contrib/python/numpy/py2/numpy/core/lib/npy-pkg-config/npymath.ini new file mode 100644 index 0000000000..3e465ad2ac --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/lib/npy-pkg-config/npymath.ini @@ -0,0 +1,20 @@ +[meta] +Name=npymath +Description=Portable, core math library implementing C99 standard +Version=0.1 + +[variables] +pkgname=numpy.core +prefix=${pkgdir} +libdir=${prefix}/lib +includedir=${prefix}/include + +[default] +Libs=-L${libdir} -lnpymath +Cflags=-I${includedir} +Requires=mlib + +[msvc] +Libs=/LIBPATH:${libdir} npymath.lib +Cflags=/INCLUDE:${includedir} +Requires=mlib diff --git a/contrib/python/numpy/py2/numpy/core/mlib.ini.in b/contrib/python/numpy/py2/numpy/core/mlib.ini.in new file mode 100644 index 0000000000..badaa2ae9d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/mlib.ini.in @@ -0,0 +1,12 @@ +[meta] +Name = mlib +Description = Math library used with this version of numpy +Version = 1.0 + +[default] +Libs=@posix_mathlib@ +Cflags= + +[msvc] +Libs=@msvc_mathlib@ +Cflags= diff --git a/contrib/python/numpy/py2/numpy/core/npymath.ini.in b/contrib/python/numpy/py2/numpy/core/npymath.ini.in new file mode 100644 index 0000000000..a233b8f3bf --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/npymath.ini.in @@ -0,0 +1,20 @@ +[meta] +Name=npymath +Description=Portable, core math library implementing C99 standard +Version=0.1 + +[variables] +pkgname=@pkgname@ +prefix=${pkgdir} +libdir=${prefix}@sep@lib +includedir=${prefix}@sep@include + +[default] +Libs=-L${libdir} -lnpymath +Cflags=-I${includedir} +Requires=mlib + +[msvc] +Libs=/LIBPATH:${libdir} npymath.lib +Cflags=/INCLUDE:${includedir} +Requires=mlib diff --git a/contrib/python/numpy/py2/numpy/core/src/common/npy_binsearch.h.src b/contrib/python/numpy/py2/numpy/core/src/common/npy_binsearch.h.src new file mode 100644 index 0000000000..ce3b34b0ef --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/common/npy_binsearch.h.src @@ -0,0 +1,144 @@ +#ifndef __NPY_BINSEARCH_H__ +#define __NPY_BINSEARCH_H__ + +#include "npy_sort.h" +#include <numpy/npy_common.h> +#include <numpy/ndarraytypes.h> + +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) + +typedef void (PyArray_BinSearchFunc)(const char*, const char*, char*, + npy_intp, npy_intp, + npy_intp, npy_intp, npy_intp, + PyArrayObject*); + +typedef int (PyArray_ArgBinSearchFunc)(const char*, const char*, + const char*, char*, + npy_intp, npy_intp, npy_intp, + npy_intp, npy_intp, npy_intp, + PyArrayObject*); + +typedef struct { + int typenum; + PyArray_BinSearchFunc *binsearch[NPY_NSEARCHSIDES]; +} binsearch_map; + +typedef struct { + int typenum; + PyArray_ArgBinSearchFunc *argbinsearch[NPY_NSEARCHSIDES]; +} argbinsearch_map; + +/**begin repeat + * + * #side = left, right# + */ + +/**begin repeat1 + * + * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, half, float, double, longdouble, + * cfloat, cdouble, clongdouble, datetime, timedelta# + */ + +NPY_VISIBILITY_HIDDEN void +binsearch_@side@_@suff@(const char *arr, const char *key, char *ret, + npy_intp arr_len, npy_intp key_len, + npy_intp arr_str, npy_intp key_str, npy_intp ret_str, + PyArrayObject *unused); +NPY_VISIBILITY_HIDDEN int +argbinsearch_@side@_@suff@(const char *arr, const char *key, + const char *sort, char *ret, + npy_intp arr_len, npy_intp key_len, + npy_intp arr_str, npy_intp key_str, + npy_intp sort_str, npy_intp ret_str, + PyArrayObject *unused); +/**end repeat1**/ + +NPY_VISIBILITY_HIDDEN void +npy_binsearch_@side@(const char *arr, const char *key, char *ret, + npy_intp arr_len, npy_intp key_len, + npy_intp arr_str, npy_intp key_str, + npy_intp ret_str, PyArrayObject *cmp); +NPY_VISIBILITY_HIDDEN int +npy_argbinsearch_@side@(const char *arr, const char *key, + const char *sort, char *ret, + npy_intp arr_len, npy_intp key_len, + npy_intp arr_str, npy_intp key_str, + npy_intp sort_str, npy_intp ret_str, + PyArrayObject *cmp); +/**end repeat**/ + +/**begin repeat + * + * #arg = , arg# + * #Arg = , Arg# + */ + +static @arg@binsearch_map _@arg@binsearch_map[] = { + /* If adding new types, make sure to keep them ordered by type num */ + /**begin repeat1 + * + * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA, HALF# + * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, float, double, longdouble, + * cfloat, cdouble, clongdouble, datetime, timedelta, half# + */ + {NPY_@TYPE@, + { + &@arg@binsearch_left_@suff@, + &@arg@binsearch_right_@suff@, + }, + }, + /**end repeat1**/ +}; + +static PyArray_@Arg@BinSearchFunc *gen@arg@binsearch_map[] = { + &npy_@arg@binsearch_left, + &npy_@arg@binsearch_right, +}; + +static NPY_INLINE PyArray_@Arg@BinSearchFunc* +get_@arg@binsearch_func(PyArray_Descr *dtype, NPY_SEARCHSIDE side) +{ + npy_intp nfuncs = ARRAY_SIZE(_@arg@binsearch_map); + npy_intp min_idx = 0; + npy_intp max_idx = nfuncs; + int type = dtype->type_num; + + if (side >= NPY_NSEARCHSIDES) { + return NULL; + } + + /* + * It seems only fair that a binary search function be searched for + * using a binary search... + */ + while (min_idx < max_idx) { + npy_intp mid_idx = min_idx + ((max_idx - min_idx) >> 1); + + if (_@arg@binsearch_map[mid_idx].typenum < type) { + min_idx = mid_idx + 1; + } + else { + max_idx = mid_idx; + } + } + + if (min_idx < nfuncs && + _@arg@binsearch_map[min_idx].typenum == type) { + return _@arg@binsearch_map[min_idx].@arg@binsearch[side]; + } + + if (dtype->f->compare) { + return gen@arg@binsearch_map[side]; + } + + return NULL; +} +/**end repeat**/ + +#undef ARRAY_SIZE + +#endif diff --git a/contrib/python/numpy/py2/numpy/core/src/common/npy_partition.h.src b/contrib/python/numpy/py2/numpy/core/src/common/npy_partition.h.src new file mode 100644 index 0000000000..a22cf911c7 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/common/npy_partition.h.src @@ -0,0 +1,129 @@ +/* + ***************************************************************************** + ** IMPORTANT NOTE for npy_partition.h.src -> npy_partition.h ** + ***************************************************************************** + * The template file loops.h.src is not automatically converted into + * loops.h by the build system. If you edit this file, you must manually + * do the conversion using numpy/distutils/conv_template.py from the + * command line as follows: + * + * $ cd <NumPy source root directory> + * $ python numpy/distutils/conv_template.py numpy/core/src/private/npy_partition.h.src + * $ + */ + + +#ifndef __NPY_PARTITION_H__ +#define __NPY_PARTITION_H__ + + +#include "npy_sort.h" + +/* Python include is for future object sorts */ +#include <Python.h> +#include <numpy/npy_common.h> +#include <numpy/ndarraytypes.h> + +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) + +#define NPY_MAX_PIVOT_STACK 50 + +/**begin repeat + * + * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, + * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, + * npy_cdouble, npy_clongdouble# + */ + +NPY_VISIBILITY_HIDDEN int introselect_@suff@(@type@ *v, npy_intp num, + npy_intp kth, + npy_intp * pivots, + npy_intp * npiv, + void *NOT_USED); +NPY_VISIBILITY_HIDDEN int aintroselect_@suff@(@type@ *v, npy_intp* tosort, npy_intp num, + npy_intp kth, + npy_intp * pivots, + npy_intp * npiv, + void *NOT_USED); + + +/**end repeat**/ + +typedef struct { + int typenum; + PyArray_PartitionFunc * part[NPY_NSELECTS]; + PyArray_ArgPartitionFunc * argpart[NPY_NSELECTS]; +} part_map; + +static part_map _part_map[] = { +/**begin repeat + * + * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, + * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, + * npy_cdouble, npy_clongdouble# + */ + { + NPY_@TYPE@, + { + (PyArray_PartitionFunc *)&introselect_@suff@, + }, + { + (PyArray_ArgPartitionFunc *)&aintroselect_@suff@, + } + }, +/**end repeat**/ +}; + + +static NPY_INLINE PyArray_PartitionFunc * +get_partition_func(int type, NPY_SELECTKIND which) +{ + npy_intp i; + npy_intp ntypes = ARRAY_SIZE(_part_map); + + if (which >= NPY_NSELECTS) { + return NULL; + } + for (i = 0; i < ntypes; i++) { + if (type == _part_map[i].typenum) { + return _part_map[i].part[which]; + } + } + return NULL; +} + + +static NPY_INLINE PyArray_ArgPartitionFunc * +get_argpartition_func(int type, NPY_SELECTKIND which) +{ + npy_intp i; + npy_intp ntypes = ARRAY_SIZE(_part_map); + + if (which >= NPY_NSELECTS) { + return NULL; + } + for (i = 0; i < ntypes; i++) { + if (type == _part_map[i].typenum) { + return _part_map[i].argpart[which]; + } + } + return NULL; +} + +#undef ARRAY_SIZE + +#endif diff --git a/contrib/python/numpy/py2/numpy/core/src/common/npy_sort.h.src b/contrib/python/numpy/py2/numpy/core/src/common/npy_sort.h.src new file mode 100644 index 0000000000..c31a827645 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/common/npy_sort.h.src @@ -0,0 +1,83 @@ +#ifndef __NPY_SORT_H__ +#define __NPY_SORT_H__ + +/* Python include is for future object sorts */ +#include <Python.h> +#include <numpy/npy_common.h> +#include <numpy/ndarraytypes.h> + +#define NPY_ENOMEM 1 +#define NPY_ECOMP 2 + +static NPY_INLINE int npy_get_msb(npy_uintp unum) +{ + int depth_limit = 0; + while (unum >>= 1) { + depth_limit++; + } + return depth_limit; +} + + +/* + ***************************************************************************** + ** NUMERIC SORTS ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, half, float, double, longdouble, + * cfloat, cdouble, clongdouble, datetime, timedelta# + */ + +int quicksort_@suff@(void *vec, npy_intp cnt, void *null); +int heapsort_@suff@(void *vec, npy_intp cnt, void *null); +int mergesort_@suff@(void *vec, npy_intp cnt, void *null); +int aquicksort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); +int aheapsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); +int amergesort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); + +/**end repeat**/ + + + +/* + ***************************************************************************** + ** STRING SORTS ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #suff = string, unicode# + */ + +int quicksort_@suff@(void *vec, npy_intp cnt, void *arr); +int heapsort_@suff@(void *vec, npy_intp cnt, void *arr); +int mergesort_@suff@(void *vec, npy_intp cnt, void *arr); +int aquicksort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *arr); +int aheapsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *arr); +int amergesort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *arr); + +/**end repeat**/ + + +/* + ***************************************************************************** + ** GENERIC SORT ** + ***************************************************************************** + */ + + +int npy_quicksort(void *vec, npy_intp cnt, void *arr); +int npy_heapsort(void *vec, npy_intp cnt, void *arr); +int npy_mergesort(void *vec, npy_intp cnt, void *arr); +int npy_aquicksort(void *vec, npy_intp *ind, npy_intp cnt, void *arr); +int npy_aheapsort(void *vec, npy_intp *ind, npy_intp cnt, void *arr); +int npy_amergesort(void *vec, npy_intp *ind, npy_intp cnt, void *arr); + +#endif diff --git a/contrib/python/numpy/py2/numpy/core/src/common/python_xerbla.c b/contrib/python/numpy/py2/numpy/core/src/common/python_xerbla.c new file mode 100644 index 0000000000..bdf0b9058f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/common/python_xerbla.c @@ -0,0 +1,51 @@ +#include "Python.h" + +/* + * From f2c.h, this should be safe unless fortran is set to use 64 + * bit integers. We don't seem to have any good way to detect that. + */ +typedef int integer; + +/* + From the original manpage: + -------------------------- + XERBLA is an error handler for the LAPACK routines. + It is called by an LAPACK routine if an input parameter has an invalid value. + A message is printed and execution stops. + + Instead of printing a message and stopping the execution, a + ValueError is raised with the message. + + Parameters: + ----------- + srname: Subroutine name to use in error message, maximum six characters. + Spaces at the end are skipped. + info: Number of the invalid parameter. +*/ + +int xerbla_(char *srname, integer *info) +{ + static const char format[] = "On entry to %.*s" \ + " parameter number %d had an illegal value"; + char buf[sizeof(format) + 6 + 4]; /* 6 for name, 4 for param. num. */ + + int len = 0; /* length of subroutine name*/ +#ifdef WITH_THREAD + PyGILState_STATE save; +#endif + + while( len<6 && srname[len]!='\0' ) + len++; + while( len && srname[len-1]==' ' ) + len--; +#ifdef WITH_THREAD + save = PyGILState_Ensure(); +#endif + PyOS_snprintf(buf, sizeof(buf), format, len, srname, *info); + PyErr_SetString(PyExc_ValueError, buf); +#ifdef WITH_THREAD + PyGILState_Release(save); +#endif + + return 0; +} diff --git a/contrib/python/numpy/py2/numpy/core/src/common/templ_common.h.src b/contrib/python/numpy/py2/numpy/core/src/common/templ_common.h.src new file mode 100644 index 0000000000..a65a007586 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/common/templ_common.h.src @@ -0,0 +1,46 @@ +#ifndef __NPY_TYPED_COMMON_INC +#define __NPY_TYPED_COMMON_INC + +/* utility functions that profit from templates */ + +#include "numpy/npy_common.h" + +/**begin repeat + * #name = int, uint, long, ulong, + * longlong, ulonglong, intp# + * #type = npy_int, npy_uint, npy_long, npy_ulong, + * npy_longlong, npy_ulonglong, npy_intp# + * #MAX = NPY_MAX_INT, NPY_MAX_UINT, NPY_MAX_LONG, NPY_MAX_ULONG, + * NPY_MAX_LONGLONG, NPY_MAX_ULONGLONG, NPY_MAX_INTP# + */ + +/* + * writes result of a * b into r + * returns 1 if a * b overflowed else returns 0 + * + * These functions are not designed to work if either a or b is negative, but + * that is not checked. Could use absolute values and adjust the sign if that + * functionality was desired. + */ +static NPY_INLINE int +npy_mul_with_overflow_@name@(@type@ * r, @type@ a, @type@ b) +{ +#ifdef HAVE___BUILTIN_MUL_OVERFLOW + return __builtin_mul_overflow(a, b, r); +#else + const @type@ half_sz = ((@type@)1 << ((sizeof(a) * 8 - 1 ) / 2)); + + *r = a * b; + /* + * avoid expensive division on common no overflow case + */ + if (NPY_UNLIKELY((a | b) >= half_sz) && + a != 0 && b > @MAX@ / a) { + return 1; + } + return 0; +#endif +} +/**end repeat**/ + +#endif diff --git a/contrib/python/numpy/py2/numpy/core/src/multiarray/_multiarray_tests.c.src b/contrib/python/numpy/py2/numpy/core/src/multiarray/_multiarray_tests.c.src new file mode 100644 index 0000000000..9061c05184 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/multiarray/_multiarray_tests.c.src @@ -0,0 +1,2103 @@ +/* -*-c-*- */ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#include <Python.h> +#define _NPY_NO_DEPRECATIONS /* for NPY_CHAR */ +#include "numpy/arrayobject.h" +#include "numpy/arrayscalars.h" +#include "numpy/npy_math.h" +#include "numpy/halffloat.h" +#include "common.h" +#include "mem_overlap.h" +#include "npy_extint128.h" +#include "common.h" + + +#if defined(MS_WIN32) || defined(__CYGWIN__) +#define EXPORT(x) __declspec(dllexport) x +#else +#define EXPORT(x) x +#endif + +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) + +/* test PyArray_IsPythonScalar, before including private py3 compat header */ +static PyObject * +IsPythonScalar(PyObject * dummy, PyObject *args) +{ + PyObject *arg = NULL; + if (!PyArg_ParseTuple(args, "O", &arg)) { + return NULL; + } + if (PyArray_IsPythonScalar(arg)) { + Py_RETURN_TRUE; + } + else { + Py_RETURN_FALSE; + } +} + +#include "npy_pycompat.h" + +/** Function to test calling via ctypes */ +EXPORT(void*) forward_pointer(void *x) +{ + return x; +} + +/* + * TODO: + * - Handle mode + */ + +/**begin repeat + * #name = double, int# + * #type = npy_double, npy_int# + * #typenum = NPY_DOUBLE, NPY_INT# + */ +static int copy_@name@(PyArrayIterObject *itx, PyArrayNeighborhoodIterObject *niterx, + npy_intp *bounds, + PyObject **out) +{ + npy_intp i, j; + @type@ *ptr; + npy_intp odims[NPY_MAXDIMS]; + PyArrayObject *aout; + + /* + * For each point in itx, copy the current neighborhood into an array which + * is appended at the output list + */ + for (i = 0; i < itx->size; ++i) { + PyArrayNeighborhoodIter_Reset(niterx); + + for (j = 0; j < PyArray_NDIM(itx->ao); ++j) { + odims[j] = bounds[2 * j + 1] - bounds[2 * j] + 1; + } + aout = (PyArrayObject*)PyArray_SimpleNew( + PyArray_NDIM(itx->ao), odims, @typenum@); + if (aout == NULL) { + return -1; + } + + ptr = (@type@*)PyArray_DATA(aout); + + for (j = 0; j < niterx->size; ++j) { + *ptr = *((@type@*)niterx->dataptr); + PyArrayNeighborhoodIter_Next(niterx); + ptr += 1; + } + + PyList_Append(*out, (PyObject*)aout); + Py_DECREF(aout); + PyArray_ITER_NEXT(itx); + } + + return 0; +} +/**end repeat**/ + +static int copy_object(PyArrayIterObject *itx, PyArrayNeighborhoodIterObject *niterx, + npy_intp *bounds, + PyObject **out) +{ + npy_intp i, j; + npy_intp odims[NPY_MAXDIMS]; + PyArrayObject *aout; + PyArray_CopySwapFunc *copyswap = PyArray_DESCR(itx->ao)->f->copyswap; + npy_int itemsize = PyArray_ITEMSIZE(itx->ao); + + /* + * For each point in itx, copy the current neighborhood into an array which + * is appended at the output list + */ + for (i = 0; i < itx->size; ++i) { + PyArrayNeighborhoodIter_Reset(niterx); + + for (j = 0; j < PyArray_NDIM(itx->ao); ++j) { + odims[j] = bounds[2 * j + 1] - bounds[2 * j] + 1; + } + aout = (PyArrayObject*)PyArray_SimpleNew(PyArray_NDIM(itx->ao), odims, NPY_OBJECT); + if (aout == NULL) { + return -1; + } + + for (j = 0; j < niterx->size; ++j) { + copyswap(PyArray_BYTES(aout) + j * itemsize, niterx->dataptr, 0, NULL); + PyArrayNeighborhoodIter_Next(niterx); + } + + PyList_Append(*out, (PyObject*)aout); + Py_DECREF(aout); + PyArray_ITER_NEXT(itx); + } + + return 0; +} + +static PyObject* +test_neighborhood_iterator(PyObject* NPY_UNUSED(self), PyObject* args) +{ + PyObject *x, *fill, *out, *b; + PyArrayObject *ax, *afill; + PyArrayIterObject *itx; + int i, typenum, mode, st; + npy_intp bounds[NPY_MAXDIMS*2]; + PyArrayNeighborhoodIterObject *niterx; + + if (!PyArg_ParseTuple(args, "OOOi", &x, &b, &fill, &mode)) { + return NULL; + } + + if (!PySequence_Check(b)) { + return NULL; + } + + typenum = PyArray_ObjectType(x, 0); + typenum = PyArray_ObjectType(fill, typenum); + + ax = (PyArrayObject*)PyArray_FromObject(x, typenum, 1, 10); + if (ax == NULL) { + return NULL; + } + if (PySequence_Size(b) != 2 * PyArray_NDIM(ax)) { + PyErr_SetString(PyExc_ValueError, + "bounds sequence size not compatible with x input"); + goto clean_ax; + } + + out = PyList_New(0); + if (out == NULL) { + goto clean_ax; + } + + itx = (PyArrayIterObject*)PyArray_IterNew(x); + if (itx == NULL) { + goto clean_out; + } + + /* Compute boundaries for the neighborhood iterator */ + for (i = 0; i < 2 * PyArray_NDIM(ax); ++i) { + PyObject* bound; + bound = PySequence_GetItem(b, i); + if (bound == NULL) { + goto clean_itx; + } + if (!PyInt_Check(bound)) { + PyErr_SetString(PyExc_ValueError, + "bound not long"); + Py_DECREF(bound); + goto clean_itx; + } + bounds[i] = PyInt_AsLong(bound); + Py_DECREF(bound); + } + + /* Create the neighborhood iterator */ + afill = NULL; + if (mode == NPY_NEIGHBORHOOD_ITER_CONSTANT_PADDING) { + afill = (PyArrayObject *)PyArray_FromObject(fill, typenum, 0, 0); + if (afill == NULL) { + goto clean_itx; + } + } + + niterx = (PyArrayNeighborhoodIterObject*)PyArray_NeighborhoodIterNew( + (PyArrayIterObject*)itx, bounds, mode, afill); + if (niterx == NULL) { + goto clean_afill; + } + + switch (typenum) { + case NPY_OBJECT: + st = copy_object(itx, niterx, bounds, &out); + break; + case NPY_INT: + st = copy_int(itx, niterx, bounds, &out); + break; + case NPY_DOUBLE: + st = copy_double(itx, niterx, bounds, &out); + break; + default: + PyErr_SetString(PyExc_ValueError, + "Type not supported"); + goto clean_niterx; + } + + if (st) { + goto clean_niterx; + } + + Py_DECREF(niterx); + Py_XDECREF(afill); + Py_DECREF(itx); + + Py_DECREF(ax); + + return out; + +clean_niterx: + Py_DECREF(niterx); +clean_afill: + Py_XDECREF(afill); +clean_itx: + Py_DECREF(itx); +clean_out: + Py_DECREF(out); +clean_ax: + Py_DECREF(ax); + return NULL; +} + +static int +copy_double_double(PyArrayNeighborhoodIterObject *itx, + PyArrayNeighborhoodIterObject *niterx, + npy_intp *bounds, + PyObject **out) +{ + npy_intp i, j; + double *ptr; + npy_intp odims[NPY_MAXDIMS]; + PyArrayObject *aout; + + /* + * For each point in itx, copy the current neighborhood into an array which + * is appended at the output list + */ + PyArrayNeighborhoodIter_Reset(itx); + for (i = 0; i < itx->size; ++i) { + for (j = 0; j < PyArray_NDIM(itx->ao); ++j) { + odims[j] = bounds[2 * j + 1] - bounds[2 * j] + 1; + } + aout = (PyArrayObject*)PyArray_SimpleNew( + PyArray_NDIM(itx->ao), odims, NPY_DOUBLE); + if (aout == NULL) { + return -1; + } + + ptr = (double*)PyArray_DATA(aout); + + PyArrayNeighborhoodIter_Reset(niterx); + for (j = 0; j < niterx->size; ++j) { + *ptr = *((double*)niterx->dataptr); + ptr += 1; + PyArrayNeighborhoodIter_Next(niterx); + } + PyList_Append(*out, (PyObject*)aout); + Py_DECREF(aout); + PyArrayNeighborhoodIter_Next(itx); + } + return 0; +} + +static PyObject* +test_neighborhood_iterator_oob(PyObject* NPY_UNUSED(self), PyObject* args) +{ + PyObject *x, *out, *b1, *b2; + PyArrayObject *ax; + PyArrayIterObject *itx; + int i, typenum, mode1, mode2, st; + npy_intp bounds[NPY_MAXDIMS*2]; + PyArrayNeighborhoodIterObject *niterx1, *niterx2; + + if (!PyArg_ParseTuple(args, "OOiOi", &x, &b1, &mode1, &b2, &mode2)) { + return NULL; + } + + if (!PySequence_Check(b1) || !PySequence_Check(b2)) { + return NULL; + } + + typenum = PyArray_ObjectType(x, 0); + + ax = (PyArrayObject*)PyArray_FromObject(x, typenum, 1, 10); + if (ax == NULL) { + return NULL; + } + if (PySequence_Size(b1) != 2 * PyArray_NDIM(ax)) { + PyErr_SetString(PyExc_ValueError, + "bounds sequence 1 size not compatible with x input"); + goto clean_ax; + } + if (PySequence_Size(b2) != 2 * PyArray_NDIM(ax)) { + PyErr_SetString(PyExc_ValueError, + "bounds sequence 2 size not compatible with x input"); + goto clean_ax; + } + + out = PyList_New(0); + if (out == NULL) { + goto clean_ax; + } + + itx = (PyArrayIterObject*)PyArray_IterNew(x); + if (itx == NULL) { + goto clean_out; + } + + /* Compute boundaries for the neighborhood iterator */ + for (i = 0; i < 2 * PyArray_NDIM(ax); ++i) { + PyObject* bound; + bound = PySequence_GetItem(b1, i); + if (bound == NULL) { + goto clean_itx; + } + if (!PyInt_Check(bound)) { + PyErr_SetString(PyExc_ValueError, + "bound not long"); + Py_DECREF(bound); + goto clean_itx; + } + bounds[i] = PyInt_AsLong(bound); + Py_DECREF(bound); + } + + /* Create the neighborhood iterator */ + niterx1 = (PyArrayNeighborhoodIterObject*)PyArray_NeighborhoodIterNew( + (PyArrayIterObject*)itx, bounds, + mode1, NULL); + if (niterx1 == NULL) { + goto clean_out; + } + + for (i = 0; i < 2 * PyArray_NDIM(ax); ++i) { + PyObject* bound; + bound = PySequence_GetItem(b2, i); + if (bound == NULL) { + goto clean_itx; + } + if (!PyInt_Check(bound)) { + PyErr_SetString(PyExc_ValueError, + "bound not long"); + Py_DECREF(bound); + goto clean_itx; + } + bounds[i] = PyInt_AsLong(bound); + Py_DECREF(bound); + } + + niterx2 = (PyArrayNeighborhoodIterObject*)PyArray_NeighborhoodIterNew( + (PyArrayIterObject*)niterx1, bounds, + mode2, NULL); + if (niterx1 == NULL) { + goto clean_niterx1; + } + + switch (typenum) { + case NPY_DOUBLE: + st = copy_double_double(niterx1, niterx2, bounds, &out); + break; + default: + PyErr_SetString(PyExc_ValueError, + "Type not supported"); + goto clean_niterx2; + } + + if (st) { + goto clean_niterx2; + } + + Py_DECREF(niterx2); + Py_DECREF(niterx1); + Py_DECREF(itx); + Py_DECREF(ax); + return out; + +clean_niterx2: + Py_DECREF(niterx2); +clean_niterx1: + Py_DECREF(niterx1); +clean_itx: + Py_DECREF(itx); +clean_out: + Py_DECREF(out); +clean_ax: + Py_DECREF(ax); + return NULL; +} + +/* PyDataMem_SetHook tests */ +static int malloc_free_counts[2]; +static PyDataMem_EventHookFunc *old_hook = NULL; +static void *old_data; + +static void test_hook(void *old, void *new, size_t size, void *user_data) +{ + int* counters = (int *) user_data; + if (old == NULL) { + counters[0]++; /* malloc counter */ + } + if (size == 0) { + counters[1]++; /* free counter */ + } +} + +static PyObject* +test_pydatamem_seteventhook_start(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args)) +{ + malloc_free_counts[0] = malloc_free_counts[1] = 0; + old_hook = PyDataMem_SetEventHook(test_hook, (void *) malloc_free_counts, &old_data); + Py_RETURN_NONE; +} + +static PyObject* +test_pydatamem_seteventhook_end(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args)) +{ + PyDataMem_EventHookFunc *my_hook; + void *my_data; + + my_hook = PyDataMem_SetEventHook(old_hook, old_data, &my_data); + if ((my_hook != test_hook) || (my_data != (void *) malloc_free_counts)) { + PyErr_SetString(PyExc_ValueError, + "hook/data was not the expected test hook"); + return NULL; + } + + if (malloc_free_counts[0] == 0) { + PyErr_SetString(PyExc_ValueError, + "malloc count is zero after test"); + return NULL; + } + if (malloc_free_counts[1] == 0) { + PyErr_SetString(PyExc_ValueError, + "free count is zero after test"); + return NULL; + } + + Py_RETURN_NONE; +} + + +typedef void (*inplace_map_binop)(PyArrayMapIterObject *, PyArrayIterObject *); + +static void npy_float64_inplace_add(PyArrayMapIterObject *mit, PyArrayIterObject *it) +{ + int index = mit->size; + while (index--) { + ((npy_float64*)mit->dataptr)[0] = ((npy_float64*)mit->dataptr)[0] + ((npy_float64*)it->dataptr)[0]; + + PyArray_MapIterNext(mit); + PyArray_ITER_NEXT(it); + } +} + +inplace_map_binop addition_funcs[] = { +npy_float64_inplace_add, +NULL}; + +int type_numbers[] = { +NPY_FLOAT64, +-1000}; + + + +static int +map_increment(PyArrayMapIterObject *mit, PyObject *op, inplace_map_binop add_inplace) +{ + PyArrayObject *arr = NULL; + PyArrayIterObject *it; + PyArray_Descr *descr; + + if (mit->ait == NULL) { + return -1; + } + descr = PyArray_DESCR(mit->ait->ao); + Py_INCREF(descr); + arr = (PyArrayObject *)PyArray_FromAny(op, descr, + 0, 0, NPY_ARRAY_FORCECAST, NULL); + if (arr == NULL) { + return -1; + } + + if ((mit->subspace != NULL) && (mit->consec)) { + PyArray_MapIterSwapAxes(mit, (PyArrayObject **)&arr, 0); + if (arr == NULL) { + return -1; + } + } + + if ((it = (PyArrayIterObject *)\ + PyArray_BroadcastToShape((PyObject *)arr, mit->dimensions, + mit->nd)) == NULL) { + Py_DECREF(arr); + + return -1; + } + + (*add_inplace)(mit, it); + + Py_DECREF(arr); + Py_DECREF(it); + return 0; +} + + +static PyObject * +inplace_increment(PyObject *dummy, PyObject *args) +{ + PyObject *arg_a = NULL, *index=NULL, *inc=NULL; + PyArrayObject *a; + inplace_map_binop add_inplace = NULL; + int type_number = -1; + int i =0; + PyArrayMapIterObject * mit; + + if (!PyArg_ParseTuple(args, "OOO", &arg_a, &index, + &inc)) { + return NULL; + } + if (!PyArray_Check(arg_a)) { + PyErr_SetString(PyExc_ValueError, "needs an ndarray as first argument"); + return NULL; + } + a = (PyArrayObject *) arg_a; + + if (PyArray_FailUnlessWriteable(a, "input/output array") < 0) { + return NULL; + } + + if (PyArray_NDIM(a) == 0) { + PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed."); + return NULL; + } + type_number = PyArray_TYPE(a); + + while (type_numbers[i] >= 0 && addition_funcs[i] != NULL){ + if (type_number == type_numbers[i]) { + add_inplace = addition_funcs[i]; + break; + } + i++ ; + } + + if (add_inplace == NULL) { + PyErr_SetString(PyExc_TypeError, "unsupported type for a"); + return NULL; + } + + mit = (PyArrayMapIterObject *) PyArray_MapIterArray(a, index); + if (mit == NULL) { + goto fail; + } + + if (map_increment(mit, inc, add_inplace) != 0) { + goto fail; + } + + Py_DECREF(mit); + + Py_RETURN_NONE; + +fail: + Py_XDECREF(mit); + + return NULL; +} + +/* check no elison for avoided increfs */ +static PyObject * +incref_elide(PyObject *dummy, PyObject *args) +{ + PyObject *arg = NULL, *res, *tup; + if (!PyArg_ParseTuple(args, "O", &arg)) { + return NULL; + } + + /* refcount 1 array but should not be elided */ + arg = PyArray_NewCopy((PyArrayObject*)arg, NPY_KEEPORDER); + res = PyNumber_Add(arg, arg); + + /* return original copy, should be equal to input */ + tup = PyTuple_Pack(2, arg, res); + Py_DECREF(arg); + Py_DECREF(res); + return tup; +} + +/* check no elison for get from list without incref */ +static PyObject * +incref_elide_l(PyObject *dummy, PyObject *args) +{ + PyObject *arg = NULL, *r, *res; + if (!PyArg_ParseTuple(args, "O", &arg)) { + return NULL; + } + /* get item without increasing refcount, item may still be on the python + * stack but above the inaccessible top */ + r = PyList_GetItem(arg, 4); + res = PyNumber_Add(r, r); + + return res; +} + +/* used to test NPY_CHAR usage emits deprecation warning */ +static PyObject* +npy_char_deprecation(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args)) +{ + PyArray_Descr * descr = PyArray_DescrFromType(NPY_CHAR); + return (PyObject *)descr; +} + +/* used to test UPDATEIFCOPY usage emits deprecation warning */ +static PyObject* +npy_updateifcopy_deprecation(PyObject* NPY_UNUSED(self), PyObject* args) +{ + int flags; + PyObject* array; + if (!PyArray_Check(args)) { + PyErr_SetString(PyExc_TypeError, "test needs ndarray input"); + return NULL; + } + flags = NPY_ARRAY_CARRAY | NPY_ARRAY_UPDATEIFCOPY; + array = PyArray_FromArray((PyArrayObject*)args, NULL, flags); + if (array == NULL) + return NULL; + PyArray_ResolveWritebackIfCopy((PyArrayObject*)array); + Py_DECREF(array); + Py_RETURN_NONE; +} + +/* used to create array with WRITEBACKIFCOPY flag */ +static PyObject* +npy_create_writebackifcopy(PyObject* NPY_UNUSED(self), PyObject* args) +{ + int flags; + PyObject* array; + if (!PyArray_Check(args)) { + PyErr_SetString(PyExc_TypeError, "test needs ndarray input"); + return NULL; + } + flags = NPY_ARRAY_CARRAY | NPY_ARRAY_WRITEBACKIFCOPY; + array = PyArray_FromArray((PyArrayObject*)args, NULL, flags); + if (array == NULL) + return NULL; + return array; +} + +/* used to test WRITEBACKIFCOPY without resolution emits runtime warning */ +static PyObject* +npy_abuse_writebackifcopy(PyObject* NPY_UNUSED(self), PyObject* args) +{ + int flags; + PyObject* array; + if (!PyArray_Check(args)) { + PyErr_SetString(PyExc_TypeError, "test needs ndarray input"); + return NULL; + } + flags = NPY_ARRAY_CARRAY | NPY_ARRAY_WRITEBACKIFCOPY; + array = PyArray_FromArray((PyArrayObject*)args, NULL, flags); + if (array == NULL) + return NULL; + Py_DECREF(array); /* calls array_dealloc even on PyPy */ + Py_RETURN_NONE; +} + +/* resolve WRITEBACKIFCOPY */ +static PyObject* +npy_resolve(PyObject* NPY_UNUSED(self), PyObject* args) +{ + if (!PyArray_Check(args)) { + PyErr_SetString(PyExc_TypeError, "test needs ndarray input"); + return NULL; + } + PyArray_ResolveWritebackIfCopy((PyArrayObject*)args); + Py_RETURN_NONE; +} + +/* resolve WRITEBACKIFCOPY */ +static PyObject* +npy_discard(PyObject* NPY_UNUSED(self), PyObject* args) +{ + if (!PyArray_Check(args)) { + PyErr_SetString(PyExc_TypeError, "test needs ndarray input"); + return NULL; + } + PyArray_DiscardWritebackIfCopy((PyArrayObject*)args); + Py_RETURN_NONE; +} + +#if !defined(NPY_PY3K) +static PyObject * +int_subclass(PyObject *dummy, PyObject *args) +{ + + PyObject *result = NULL; + PyObject *scalar_object = NULL; + + if (!PyArg_UnpackTuple(args, "test_int_subclass", 1, 1, &scalar_object)) + return NULL; + + if (PyInt_Check(scalar_object)) + result = Py_True; + else + result = Py_False; + + Py_INCREF(result); + + return result; + +} +#endif + + +/* + * Create python string from a FLAG and or the corresponding PyBuf flag + * for the use in get_buffer_info. + */ +#define GET_PYBUF_FLAG(FLAG) \ + buf_flag = PyUnicode_FromString(#FLAG); \ + flag_matches = PyObject_RichCompareBool(buf_flag, tmp, Py_EQ); \ + Py_DECREF(buf_flag); \ + if (flag_matches == 1) { \ + Py_DECREF(tmp); \ + flags |= PyBUF_##FLAG; \ + continue; \ + } \ + else if (flag_matches == -1) { \ + Py_DECREF(tmp); \ + return NULL; \ + } + + +/* + * Get information for a buffer through PyBuf_GetBuffer with the + * corresponding flags or'ed. Note that the python caller has to + * make sure that or'ing those flags actually makes sense. + * More information should probably be returned for future tests. + */ +static PyObject * +get_buffer_info(PyObject *NPY_UNUSED(self), PyObject *args) +{ + PyObject *buffer_obj, *pyflags; + PyObject *tmp, *buf_flag; + Py_buffer buffer; + PyObject *shape, *strides; + Py_ssize_t i, n; + int flag_matches; + int flags = 0; + + if (!PyArg_ParseTuple(args, "OO", &buffer_obj, &pyflags)) { + return NULL; + } + + n = PySequence_Length(pyflags); + if (n < 0) { + return NULL; + } + + for (i=0; i < n; i++) { + tmp = PySequence_GetItem(pyflags, i); + if (tmp == NULL) { + return NULL; + } + + GET_PYBUF_FLAG(SIMPLE); + GET_PYBUF_FLAG(WRITABLE); + GET_PYBUF_FLAG(STRIDES); + GET_PYBUF_FLAG(ND); + GET_PYBUF_FLAG(C_CONTIGUOUS); + GET_PYBUF_FLAG(F_CONTIGUOUS); + GET_PYBUF_FLAG(ANY_CONTIGUOUS); + GET_PYBUF_FLAG(INDIRECT); + GET_PYBUF_FLAG(FORMAT); + GET_PYBUF_FLAG(STRIDED); + GET_PYBUF_FLAG(STRIDED_RO); + GET_PYBUF_FLAG(RECORDS); + GET_PYBUF_FLAG(RECORDS_RO); + GET_PYBUF_FLAG(FULL); + GET_PYBUF_FLAG(FULL_RO); + GET_PYBUF_FLAG(CONTIG); + GET_PYBUF_FLAG(CONTIG_RO); + + Py_DECREF(tmp); + + /* One of the flags must match */ + PyErr_SetString(PyExc_ValueError, "invalid flag used."); + return NULL; + } + + if (PyObject_GetBuffer(buffer_obj, &buffer, flags) < 0) { + return NULL; + } + + if (buffer.shape == NULL) { + Py_INCREF(Py_None); + shape = Py_None; + } + else { + shape = PyTuple_New(buffer.ndim); + for (i=0; i < buffer.ndim; i++) { + PyTuple_SET_ITEM(shape, i, PyLong_FromSsize_t(buffer.shape[i])); + } + } + + if (buffer.strides == NULL) { + Py_INCREF(Py_None); + strides = Py_None; + } + else { + strides = PyTuple_New(buffer.ndim); + for (i=0; i < buffer.ndim; i++) { + PyTuple_SET_ITEM(strides, i, PyLong_FromSsize_t(buffer.strides[i])); + } + } + + PyBuffer_Release(&buffer); + return Py_BuildValue("(NN)", shape, strides); +} + +#undef GET_PYBUF_FLAG + +/* + * Return a new array object wrapping existing C-allocated (dummy) data. + * Such an array does not own its data (must not free it), but because it + * wraps C data, it also has no base object. Used to test arr.flags.writeable + * setting behaviour. + */ +static PyObject* +get_c_wrapping_array(PyObject* NPY_UNUSED(self), PyObject* arg) +{ + int writeable, flags; + npy_intp zero = 0; + + writeable = PyObject_IsTrue(arg); + if (error_converting(writeable)) { + return NULL; + } + + flags = writeable ? NPY_ARRAY_WRITEABLE : 0; + /* Create an empty array (which points to a random place) */ + return PyArray_NewFromDescr(&PyArray_Type, PyArray_DescrFromType(NPY_INTP), + 1, &zero, NULL, &zero, flags, NULL); +} + + +/* + * Test C-api level item getting. + */ +static PyObject * +array_indexing(PyObject *NPY_UNUSED(self), PyObject *args) +{ + int mode; + Py_ssize_t i; + PyObject *arr, *op = NULL; + + if (!PyArg_ParseTuple(args, "iOn|O", &mode, &arr, &i, &op)) { + return NULL; + } + + if (mode == 0) { + return PySequence_GetItem(arr, i); + } + if (mode == 1) { + if (PySequence_SetItem(arr, i, op) < 0) { + return NULL; + } + Py_RETURN_NONE; + } + + PyErr_SetString(PyExc_ValueError, + "invalid mode. 0: item 1: assign"); + return NULL; +} + +/* + * Test C-api PyArray_AsCArray item getter + */ +static PyObject * +test_as_c_array(PyObject *NPY_UNUSED(self), PyObject *args) +{ + PyArrayObject *array_obj; + npy_intp dims[3]; /* max 3-dim */ + npy_intp i=0, j=0, k=0; + npy_intp num_dims = 0; + PyArray_Descr *descr = NULL; + double *array1 = NULL; + double **array2 = NULL; + double ***array3 = NULL; + double temp = 9999; + + if (!PyArg_ParseTuple(args, "O!l|ll", + &PyArray_Type, &array_obj, + &i, &j, &k)) { + return NULL; + } + + if (NULL == array_obj) { + return NULL; + } + + num_dims = PyArray_NDIM(array_obj); + descr = PyArray_DESCR(array_obj); + + switch (num_dims) { + case 1: + if (PyArray_AsCArray( + (PyObject **) &array_obj, + (void *) &array1, + dims, + 1, + descr) < 0) { + PyErr_SetString(PyExc_RuntimeError, "error converting 1D array"); + return NULL; + } + temp = array1[i]; + PyArray_Free((PyObject *) array_obj, (void *) array1); + break; + case 2: + if (PyArray_AsCArray( + (PyObject **) &array_obj, + (void **) &array2, + dims, + 2, + descr) < 0) { + PyErr_SetString(PyExc_RuntimeError, "error converting 2D array"); + return NULL; + } + temp = array2[i][j]; + PyArray_Free((PyObject *) array_obj, (void *) array2); + break; + case 3: + if (PyArray_AsCArray( + (PyObject **) &array_obj, + (void ***) &array3, + dims, + 3, + descr) < 0) { + PyErr_SetString(PyExc_RuntimeError, "error converting 3D array"); + return NULL; + } + temp = array3[i][j][k]; + PyArray_Free((PyObject *) array_obj, (void *) array3); + break; + default: + PyErr_SetString(PyExc_ValueError, "array.ndim not in [1, 3]"); + return NULL; + } + return Py_BuildValue("f", temp); +} + +/* + * Test nditer of too large arrays using remove axis, etc. + */ +static PyObject * +test_nditer_too_large(PyObject *NPY_UNUSED(self), PyObject *args) { + NpyIter *iter; + PyObject *array_tuple, *arr; + PyArrayObject *arrays[NPY_MAXARGS]; + npy_uint32 op_flags[NPY_MAXARGS]; + Py_ssize_t nop; + int i, axis, mode; + + npy_intp index[NPY_MAXARGS] = {0}; + char *msg; + + if (!PyArg_ParseTuple(args, "Oii", &array_tuple, &axis, &mode)) { + return NULL; + } + + if (!PyTuple_CheckExact(array_tuple)) { + PyErr_SetString(PyExc_ValueError, "tuple required as first argument"); + return NULL; + } + nop = PyTuple_Size(array_tuple); + if (nop > NPY_MAXARGS) { + PyErr_SetString(PyExc_ValueError, "tuple must be smaller then maxargs"); + return NULL; + } + + for (i=0; i < nop; i++) { + arr = PyTuple_GET_ITEM(array_tuple, i); + if (!PyArray_CheckExact(arr)) { + PyErr_SetString(PyExc_ValueError, "require base class ndarray"); + return NULL; + } + arrays[i] = (PyArrayObject *)arr; + op_flags[i] = NPY_ITER_READONLY; + } + + iter = NpyIter_MultiNew(nop, arrays, NPY_ITER_MULTI_INDEX | NPY_ITER_RANGED, + NPY_KEEPORDER, NPY_NO_CASTING, op_flags, NULL); + + if (iter == NULL) { + return NULL; + } + + /* Remove an axis (negative, do not remove any) */ + if (axis >= 0) { + if (!NpyIter_RemoveAxis(iter, axis)) { + goto fail; + } + } + + switch (mode) { + /* Test IterNext getting */ + case 0: + if (NpyIter_GetIterNext(iter, NULL) == NULL) { + goto fail; + } + break; + case 1: + if (NpyIter_GetIterNext(iter, &msg) == NULL) { + PyErr_SetString(PyExc_ValueError, msg); + goto fail; + } + break; + /* Test Multi Index removal */ + case 2: + if (!NpyIter_RemoveMultiIndex(iter)) { + goto fail; + } + break; + /* Test GotoMultiIndex (just 0 hardcoded) */ + case 3: + if (!NpyIter_GotoMultiIndex(iter, index)) { + goto fail; + } + break; + /* Test setting iterrange (hardcoded range of 0, 1) */ + case 4: + if (!NpyIter_ResetToIterIndexRange(iter, 0, 1, NULL)) { + goto fail; + } + break; + case 5: + if (!NpyIter_ResetToIterIndexRange(iter, 0, 1, &msg)) { + PyErr_SetString(PyExc_ValueError, msg); + goto fail; + } + break; + /* Do nothing */ + default: + break; + } + + NpyIter_Deallocate(iter); + Py_RETURN_NONE; + fail: + NpyIter_Deallocate(iter); + return NULL; +} + +static PyObject * +array_solve_diophantine(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) +{ + PyObject *A = NULL; + PyObject *U = NULL; + Py_ssize_t b_input = 0; + Py_ssize_t max_work = -1; + int simplify = 0; + int require_ub_nontrivial = 0; + static char *kwlist[] = {"A", "U", "b", "max_work", "simplify", + "require_ub_nontrivial", NULL}; + + diophantine_term_t terms[2*NPY_MAXDIMS+2]; + npy_int64 x[2*NPY_MAXDIMS+2]; + npy_int64 b; + unsigned int nterms, j; + mem_overlap_t result = MEM_OVERLAP_YES; + PyObject *retval = NULL; + NPY_BEGIN_THREADS_DEF; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O!n|nii", kwlist, + &PyTuple_Type, &A, + &PyTuple_Type, &U, + &b_input, &max_work, &simplify, + &require_ub_nontrivial)) { + return NULL; + } + + if (PyTuple_GET_SIZE(A) > (Py_ssize_t)ARRAY_SIZE(terms)) { + PyErr_SetString(PyExc_ValueError, "too many terms in equation"); + goto fail; + } + + nterms = PyTuple_GET_SIZE(A); + + if (PyTuple_GET_SIZE(U) != nterms) { + PyErr_SetString(PyExc_ValueError, "A, U must be tuples of equal length"); + goto fail; + } + + for (j = 0; j < nterms; ++j) { + terms[j].a = (npy_int64)PyInt_AsSsize_t(PyTuple_GET_ITEM(A, j)); + if (error_converting(terms[j].a)) { + goto fail; + } + terms[j].ub = (npy_int64)PyInt_AsSsize_t(PyTuple_GET_ITEM(U, j)); + if (error_converting(terms[j].ub)) { + goto fail; + } + } + + b = b_input; + + NPY_BEGIN_THREADS; + if (simplify && !require_ub_nontrivial) { + if (diophantine_simplify(&nterms, terms, b)) { + result = MEM_OVERLAP_OVERFLOW; + } + } + if (result == MEM_OVERLAP_YES) { + result = solve_diophantine(nterms, terms, b, max_work, require_ub_nontrivial, x); + } + NPY_END_THREADS; + + if (result == MEM_OVERLAP_YES) { + retval = PyTuple_New(nterms); + if (retval == NULL) { + goto fail; + } + + for (j = 0; j < nterms; ++j) { + PyObject *obj; +#if defined(NPY_PY3K) + obj = PyLong_FromSsize_t(x[j]); +#else + obj = PyInt_FromSsize_t(x[j]); +#endif + if (obj == NULL) { + goto fail; + } + PyTuple_SET_ITEM(retval, j, obj); + } + } + else if (result == MEM_OVERLAP_NO) { + retval = Py_None; + Py_INCREF(retval); + } + else if (result == MEM_OVERLAP_ERROR) { + PyErr_SetString(PyExc_ValueError, "Invalid arguments"); + } + else if (result == MEM_OVERLAP_OVERFLOW) { + PyErr_SetString(PyExc_OverflowError, "Integer overflow"); + } + else if (result == MEM_OVERLAP_TOO_HARD) { + PyErr_SetString(PyExc_RuntimeError, "Too much work done"); + } + else { + PyErr_SetString(PyExc_RuntimeError, "Unknown error"); + } + + return retval; + +fail: + Py_XDECREF(retval); + return NULL; +} + + +static PyObject * +array_internal_overlap(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) +{ + PyArrayObject * self = NULL; + static char *kwlist[] = {"self", "max_work", NULL}; + + mem_overlap_t result; + Py_ssize_t max_work = NPY_MAY_SHARE_EXACT; + NPY_BEGIN_THREADS_DEF; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&|n", kwlist, + PyArray_Converter, &self, + &max_work)) { + return NULL; + } + + if (max_work < -2) { + PyErr_SetString(PyExc_ValueError, "Invalid value for max_work"); + goto fail; + } + + NPY_BEGIN_THREADS; + result = solve_may_have_internal_overlap(self, max_work); + NPY_END_THREADS; + + Py_XDECREF(self); + + if (result == MEM_OVERLAP_NO) { + Py_RETURN_FALSE; + } + else if (result == MEM_OVERLAP_YES) { + Py_RETURN_TRUE; + } + else if (result == MEM_OVERLAP_OVERFLOW) { + PyErr_SetString(PyExc_OverflowError, + "Integer overflow in computing overlap"); + return NULL; + } + else if (result == MEM_OVERLAP_TOO_HARD) { + PyErr_SetString(PyExc_ValueError, + "Exceeded max_work"); + return NULL; + } + else { + /* Doesn't happen usually */ + PyErr_SetString(PyExc_RuntimeError, + "Error in computing overlap"); + return NULL; + } + +fail: + Py_XDECREF(self); + return NULL; +} + + +static PyObject * +pylong_from_int128(npy_extint128_t value) +{ + PyObject *val_64 = NULL, *val = NULL, *tmp = NULL, *tmp2 = NULL; + + val_64 = PyLong_FromLong(64); + if (val_64 == NULL) { + goto fail; + } + + val = PyLong_FromUnsignedLongLong(value.hi); + if (val == NULL) { + goto fail; + } + + tmp = PyNumber_Lshift(val, val_64); + if (tmp == NULL) { + goto fail; + } + + Py_DECREF(val); + val = tmp; + + tmp = PyLong_FromUnsignedLongLong(value.lo); + if (tmp == NULL) { + goto fail; + } + + tmp2 = PyNumber_Or(val, tmp); + if (tmp2 == NULL) { + goto fail; + } + + Py_DECREF(val); + Py_DECREF(tmp); + + val = NULL; + tmp = NULL; + + if (value.sign < 0) { + val = PyNumber_Negative(tmp2); + if (val == NULL) { + goto fail; + } + Py_DECREF(tmp2); + return val; + } + else { + val = tmp2; + } + return val; + +fail: + Py_XDECREF(val_64); + Py_XDECREF(tmp); + Py_XDECREF(tmp2); + Py_XDECREF(val); + return NULL; +} + + +static int +int128_from_pylong(PyObject *obj, npy_extint128_t *result) +{ + PyObject *long_obj = NULL, *val_64 = NULL, *val_0 = NULL, + *mask_64 = NULL, *max_128 = NULL, *hi_bits = NULL, + *lo_bits = NULL, *tmp = NULL; + int cmp; + int negative_zero = 0; + + if (PyBool_Check(obj)) { + /* False means negative zero */ + negative_zero = 1; + } + + long_obj = PyObject_CallFunction((PyObject*)&PyLong_Type, "O", obj); + if (long_obj == NULL) { + goto fail; + } + + val_0 = PyLong_FromLong(0); + if (val_0 == NULL) { + goto fail; + } + + val_64 = PyLong_FromLong(64); + if (val_64 == NULL) { + goto fail; + } + + mask_64 = PyLong_FromUnsignedLongLong(0xffffffffffffffffULL); + if (mask_64 == NULL) { + goto fail; + } + + tmp = PyNumber_Lshift(mask_64, val_64); + if (tmp == NULL) { + goto fail; + } + max_128 = PyNumber_Or(tmp, mask_64); + if (max_128 == NULL) { + goto fail; + } + Py_DECREF(tmp); + tmp = NULL; + + cmp = PyObject_RichCompareBool(long_obj, val_0, Py_LT); + if (cmp == -1) { + goto fail; + } + else if (cmp == 1) { + tmp = PyNumber_Negative(long_obj); + if (tmp == NULL) { + goto fail; + } + Py_DECREF(long_obj); + long_obj = tmp; + tmp = NULL; + result->sign = -1; + } + else { + result->sign = 1; + } + + cmp = PyObject_RichCompareBool(long_obj, max_128, Py_GT); + if (cmp == 1) { + PyErr_SetString(PyExc_OverflowError, ""); + goto fail; + } + else if (cmp == -1) { + goto fail; + } + + hi_bits = PyNumber_Rshift(long_obj, val_64); + if (hi_bits == NULL) { + goto fail; + } + + lo_bits = PyNumber_And(long_obj, mask_64); + if (lo_bits == NULL) { + goto fail; + } + + result->hi = PyLong_AsUnsignedLongLong(hi_bits); + if (result->hi == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred()) { + goto fail; + } + + result->lo = PyLong_AsUnsignedLongLong(lo_bits); + if (result->lo == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred()) { + goto fail; + } + + if (negative_zero && result->hi == 0 && result->lo == 0) { + result->sign = -1; + } + + Py_XDECREF(long_obj); + Py_XDECREF(val_64); + Py_XDECREF(val_0); + Py_XDECREF(mask_64); + Py_XDECREF(max_128); + Py_XDECREF(hi_bits); + Py_XDECREF(lo_bits); + Py_XDECREF(tmp); + return 0; + +fail: + Py_XDECREF(long_obj); + Py_XDECREF(val_64); + Py_XDECREF(val_0); + Py_XDECREF(mask_64); + Py_XDECREF(max_128); + Py_XDECREF(hi_bits); + Py_XDECREF(lo_bits); + Py_XDECREF(tmp); + return -1; +} + + +static PyObject * +extint_safe_binop(PyObject *NPY_UNUSED(self), PyObject *args) { + PY_LONG_LONG a, b, c; + int op; + char overflow = 0; + if (!PyArg_ParseTuple(args, "LLi", &a, &b, &op)) { + return NULL; + } + if (op == 1) { + c = safe_add(a, b, &overflow); + } + else if (op == 2) { + c = safe_sub(a, b, &overflow); + } + else if (op == 3) { + c = safe_mul(a, b, &overflow); + } + else { + PyErr_SetString(PyExc_ValueError, "invalid op"); + return NULL; + } + if (overflow) { + PyErr_SetString(PyExc_OverflowError, ""); + return NULL; + } + return PyLong_FromLongLong(c); +} + + +static PyObject * +extint_to_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PY_LONG_LONG a; + if (!PyArg_ParseTuple(args, "L", &a)) { + return NULL; + } + return pylong_from_int128(to_128(a)); +} + + +static PyObject * +extint_to_64(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a; + PY_LONG_LONG r; + char overflow = 0; + if (!PyArg_ParseTuple(args, "O", &a_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + r = to_64(a, &overflow); + if (overflow) { + PyErr_SetString(PyExc_OverflowError, ""); + return NULL; + } + return PyLong_FromLongLong(r); +} + + +static PyObject * +extint_mul_64_64(PyObject *NPY_UNUSED(self), PyObject *args) { + PY_LONG_LONG a, b; + npy_extint128_t c; + if (!PyArg_ParseTuple(args, "LL", &a, &b)) { + return NULL; + } + c = mul_64_64(a, b); + return pylong_from_int128(c); +} + + +static PyObject * +extint_add_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj, *b_obj; + npy_extint128_t a, b, c; + char overflow = 0; + if (!PyArg_ParseTuple(args, "OO", &a_obj, &b_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a) || int128_from_pylong(b_obj, &b)) { + return NULL; + } + c = add_128(a, b, &overflow); + if (overflow) { + PyErr_SetString(PyExc_OverflowError, ""); + return NULL; + } + return pylong_from_int128(c); +} + + +static PyObject * +extint_sub_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj, *b_obj; + npy_extint128_t a, b, c; + char overflow = 0; + if (!PyArg_ParseTuple(args, "OO", &a_obj, &b_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a) || int128_from_pylong(b_obj, &b)) { + return NULL; + } + c = sub_128(a, b, &overflow); + if (overflow) { + PyErr_SetString(PyExc_OverflowError, ""); + return NULL; + } + return pylong_from_int128(c); +} + + +static PyObject * +extint_neg_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a, b; + if (!PyArg_ParseTuple(args, "O", &a_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + b = neg_128(a); + return pylong_from_int128(b); +} + + +static PyObject * +extint_shl_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a, b; + if (!PyArg_ParseTuple(args, "O", &a_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + b = shl_128(a); + return pylong_from_int128(b); +} + + +static PyObject * +extint_shr_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a, b; + if (!PyArg_ParseTuple(args, "O", &a_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + b = shr_128(a); + return pylong_from_int128(b); +} + + +static PyObject * +extint_gt_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj, *b_obj; + npy_extint128_t a, b; + if (!PyArg_ParseTuple(args, "OO", &a_obj, &b_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a) || int128_from_pylong(b_obj, &b)) { + return NULL; + } + if (gt_128(a, b)) { + Py_RETURN_TRUE; + } + else { + Py_RETURN_FALSE; + } +} + + +static PyObject * +extint_divmod_128_64(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj, *ret = NULL, *tmp = NULL; + npy_extint128_t a, c; + PY_LONG_LONG b; + npy_int64 mod; + if (!PyArg_ParseTuple(args, "OL", &a_obj, &b)) { + goto fail; + } + if (b <= 0) { + PyErr_SetString(PyExc_ValueError, ""); + goto fail; + } + if (int128_from_pylong(a_obj, &a)) { + goto fail; + } + + c = divmod_128_64(a, b, &mod); + + ret = PyTuple_New(2); + + tmp = pylong_from_int128(c); + if (tmp == NULL) { + goto fail; + } + PyTuple_SET_ITEM(ret, 0, tmp); + + tmp = PyLong_FromLongLong(mod); + if (tmp == NULL) { + goto fail; + } + PyTuple_SET_ITEM(ret, 1, tmp); + return ret; + +fail: + Py_XDECREF(ret); + Py_XDECREF(tmp); + return NULL; +} + + +static PyObject * +extint_floordiv_128_64(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a, c; + PY_LONG_LONG b; + if (!PyArg_ParseTuple(args, "OL", &a_obj, &b)) { + return NULL; + } + if (b <= 0) { + PyErr_SetString(PyExc_ValueError, ""); + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + c = floordiv_128_64(a, b); + return pylong_from_int128(c); +} + + +static PyObject * +extint_ceildiv_128_64(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a, c; + PY_LONG_LONG b; + if (!PyArg_ParseTuple(args, "OL", &a_obj, &b)) { + return NULL; + } + if (b <= 0) { + PyErr_SetString(PyExc_ValueError, ""); + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + c = ceildiv_128_64(a, b); + return pylong_from_int128(c); +} + +struct TestStruct1 { + npy_uint8 a; + npy_complex64 b; +}; + +struct TestStruct2 { + npy_uint32 a; + npy_complex64 b; +}; + +struct TestStruct3 { + npy_uint8 a; + struct TestStruct1 b; +}; + +static PyObject * +get_struct_alignments(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *ret = PyTuple_New(3); + PyObject *alignment, *size, *val; + +/**begin repeat + * #N = 1,2,3# + */ + alignment = PyInt_FromLong(_ALIGN(struct TestStruct@N@)); + size = PyInt_FromLong(sizeof(struct TestStruct@N@)); + val = PyTuple_Pack(2, alignment, size); + Py_DECREF(alignment); + Py_DECREF(size); + if (val == NULL) { + return NULL; + } + PyTuple_SET_ITEM(ret, @N@-1, val); +/**end repeat**/ + return ret; +} + + +static char get_fpu_mode_doc[] = ( + "get_fpu_mode()\n" + "\n" + "Get the current FPU control word, in a platform-dependent format.\n" + "Returns None if not implemented on current platform."); + +static PyObject * +get_fpu_mode(PyObject *NPY_UNUSED(self), PyObject *args) +{ + if (!PyArg_ParseTuple(args, "")) { + return NULL; + } + +#if defined(_MSC_VER) + { + unsigned int result = 0; + result = _controlfp(0, 0); + return PyLong_FromLongLong(result); + } +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) + { + unsigned short cw = 0; + __asm__("fstcw %w0" : "=m" (cw)); + return PyLong_FromLongLong(cw); + } +#else + Py_RETURN_NONE; +#endif +} + +/* + * npymath wrappers + */ + +/**begin repeat + * #name = cabs, carg# + */ + +/**begin repeat1 + * #itype = npy_cfloat, npy_cdouble, npy_clongdouble# + * #ITYPE = NPY_CFLOAT, NPY_CDOUBLE, NPY_CLONGDOUBLE# + * #otype = npy_float, npy_double, npy_longdouble# + * #OTYPE = NPY_FLOAT, NPY_DOUBLE, NPY_LONGDOUBLE# + * #suffix= f, , l# + */ + +static PyObject * +call_npy_@name@@suffix@(PyObject *NPY_UNUSED(self), PyObject *args) +{ + PyObject *z_py = NULL, *z_arr = NULL, *w_arr = NULL; + + if (!PyArg_ParseTuple(args, "O", &z_py)) { + return NULL; + } + + z_arr = PyArray_FROMANY(z_py, @ITYPE@, 0, 0, NPY_ARRAY_CARRAY_RO); + if (z_arr == NULL) { + return NULL; + } + + w_arr = PyArray_SimpleNew(0, NULL, @OTYPE@); + if (w_arr == NULL) { + Py_DECREF(z_arr); + return NULL; + } + + *(@otype@*)PyArray_DATA((PyArrayObject *)w_arr) = + npy_@name@@suffix@(*(@itype@*)PyArray_DATA((PyArrayObject *)z_arr)); + + Py_DECREF(z_arr); + return w_arr; +} + +/**end repeat1**/ + +/**end repeat**/ + +/**begin repeat + * #name = log10, cosh, sinh, tan, tanh# + */ + +/**begin repeat1 + * #type = npy_float, npy_double, npy_longdouble# + * #TYPE = NPY_FLOAT, NPY_DOUBLE, NPY_LONGDOUBLE# + * #suffix= f, , l# + */ + +static PyObject * +call_npy_@name@@suffix@(PyObject *NPY_UNUSED(self), PyObject *args) +{ + PyObject *z_py = NULL, *z_arr = NULL, *w_arr = NULL; + + if (!PyArg_ParseTuple(args, "O", &z_py)) { + return NULL; + } + + z_arr = PyArray_FROMANY(z_py, @TYPE@, 0, 0, NPY_ARRAY_CARRAY_RO); + if (z_arr == NULL) { + return NULL; + } + + w_arr = PyArray_SimpleNew(0, NULL, @TYPE@); + if (w_arr == NULL) { + Py_DECREF(z_arr); + return NULL; + } + + *(@type@*)PyArray_DATA((PyArrayObject *)w_arr) = + npy_@name@@suffix@(*(@type@*)PyArray_DATA((PyArrayObject *)z_arr)); + + Py_DECREF(z_arr); + return w_arr; +} + +/**end repeat1**/ + +/**end repeat**/ + +/* + * For development/testing purposes, it's convenient to have access to the + * system printf for floats. This is a very simple printf interface. + */ +PyObject * +PrintFloat_Printf_g(PyObject *obj, int precision) +{ + char str[1024]; + + if (PyArray_IsScalar(obj, Half)) { + npy_half x = ((PyHalfScalarObject *)obj)->obval; + PyOS_snprintf(str, sizeof(str), "%.*g", precision, + npy_half_to_double(x)); + } + else if (PyArray_IsScalar(obj, Float)) { + npy_float x = ((PyFloatScalarObject *)obj)->obval; + PyOS_snprintf(str, sizeof(str), "%.*g", precision, x); + } + else if (PyArray_IsScalar(obj, Double)) { + npy_double x = ((PyDoubleScalarObject *)obj)->obval; + PyOS_snprintf(str, sizeof(str), "%.*g", precision, x); + /* would be better to use lg, but not available in C90 */ + } + else if (PyArray_IsScalar(obj, LongDouble)) { + npy_longdouble x = ((PyLongDoubleScalarObject *)obj)->obval; + PyOS_snprintf(str, sizeof(str), "%.*Lg", precision, x); + } + else{ + double val = PyFloat_AsDouble(obj); + if (error_converting(val)) { + return NULL; + } + PyOS_snprintf(str, sizeof(str), "%.*g", precision, val); + } + + return PyUString_FromString(str); +} + + +static PyObject * +printf_float_g(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds) +{ + PyObject *obj; + int precision; + + if (!PyArg_ParseTuple(args,"Oi:format_float_OSprintf_g", &obj, + &precision)) { + return NULL; + } + + if (precision < 0) { + PyErr_SetString(PyExc_TypeError, "precision must be non-negative"); + return NULL; + } + + return PrintFloat_Printf_g(obj, precision); +} + +static PyObject * +getset_numericops(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args)) +{ + PyObject *ret; + PyObject *ops = PyArray_GetNumericOps(); + if (ops == NULL) { + return NULL; + } + ret = PyLong_FromLong(PyArray_SetNumericOps(ops)); + Py_DECREF(ops); + return ret; +} + +static PyMethodDef Multiarray_TestsMethods[] = { + {"IsPythonScalar", + IsPythonScalar, + METH_VARARGS, NULL}, + {"test_neighborhood_iterator", + test_neighborhood_iterator, + METH_VARARGS, NULL}, + {"test_neighborhood_iterator_oob", + test_neighborhood_iterator_oob, + METH_VARARGS, NULL}, + {"test_pydatamem_seteventhook_start", + test_pydatamem_seteventhook_start, + METH_NOARGS, NULL}, + {"test_pydatamem_seteventhook_end", + test_pydatamem_seteventhook_end, + METH_NOARGS, NULL}, + {"test_inplace_increment", + inplace_increment, + METH_VARARGS, NULL}, + {"incref_elide", + incref_elide, + METH_VARARGS, NULL}, + {"incref_elide_l", + incref_elide_l, + METH_VARARGS, NULL}, + {"npy_char_deprecation", + npy_char_deprecation, + METH_NOARGS, NULL}, + {"npy_updateifcopy_deprecation", + npy_updateifcopy_deprecation, + METH_O, NULL}, + {"npy_create_writebackifcopy", + npy_create_writebackifcopy, + METH_O, NULL}, + {"npy_abuse_writebackifcopy", + npy_abuse_writebackifcopy, + METH_O, NULL}, + {"npy_resolve", + npy_resolve, + METH_O, NULL}, + {"npy_discard", + npy_discard, + METH_O, NULL}, +#if !defined(NPY_PY3K) + {"test_int_subclass", + int_subclass, + METH_VARARGS, NULL}, +#endif + {"get_buffer_info", + get_buffer_info, + METH_VARARGS, NULL}, + {"get_c_wrapping_array", + get_c_wrapping_array, + METH_O, NULL}, + {"array_indexing", + array_indexing, + METH_VARARGS, NULL}, + {"test_as_c_array", + test_as_c_array, + METH_VARARGS, NULL}, + {"test_nditer_too_large", + test_nditer_too_large, + METH_VARARGS, NULL}, + {"solve_diophantine", + (PyCFunction)array_solve_diophantine, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"internal_overlap", + (PyCFunction)array_internal_overlap, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"extint_safe_binop", + extint_safe_binop, + METH_VARARGS, NULL}, + {"extint_to_128", + extint_to_128, + METH_VARARGS, NULL}, + {"extint_to_64", + extint_to_64, + METH_VARARGS, NULL}, + {"extint_mul_64_64", + extint_mul_64_64, + METH_VARARGS, NULL}, + {"extint_add_128", + extint_add_128, + METH_VARARGS, NULL}, + {"extint_sub_128", + extint_sub_128, + METH_VARARGS, NULL}, + {"extint_neg_128", + extint_neg_128, + METH_VARARGS, NULL}, + {"extint_shl_128", + extint_shl_128, + METH_VARARGS, NULL}, + {"extint_shr_128", + extint_shr_128, + METH_VARARGS, NULL}, + {"extint_gt_128", + extint_gt_128, + METH_VARARGS, NULL}, + {"extint_divmod_128_64", + extint_divmod_128_64, + METH_VARARGS, NULL}, + {"extint_floordiv_128_64", + extint_floordiv_128_64, + METH_VARARGS, NULL}, + {"extint_ceildiv_128_64", + extint_ceildiv_128_64, + METH_VARARGS, NULL}, + {"get_fpu_mode", + get_fpu_mode, + METH_VARARGS, get_fpu_mode_doc}, + {"getset_numericops", + getset_numericops, + METH_NOARGS, NULL}, +/**begin repeat + * #name = cabs, carg# + */ + +/**begin repeat1 + * #suffix = f, , l# + */ + {"npy_@name@@suffix@", + call_npy_@name@@suffix@, + METH_VARARGS, NULL}, +/**end repeat1**/ + +/**end repeat**/ + +/**begin repeat + * #name = log10, cosh, sinh, tan, tanh# + */ + +/**begin repeat1 + * #suffix= f, , l# + */ + {"npy_@name@@suffix@", + call_npy_@name@@suffix@, + METH_VARARGS, NULL}, +/**end repeat1**/ + +/**end repeat**/ + {"format_float_OSprintf_g", + (PyCFunction)printf_float_g, + METH_VARARGS , NULL}, + {"get_struct_alignments", + get_struct_alignments, + METH_VARARGS, NULL}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + + +#if defined(NPY_PY3K) +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_multiarray_tests", + NULL, + -1, + Multiarray_TestsMethods, + NULL, + NULL, + NULL, + NULL +}; +#endif + +#if defined(NPY_PY3K) +#define RETVAL m +PyMODINIT_FUNC PyInit__multiarray_tests(void) +#else +#define RETVAL +PyMODINIT_FUNC +init_multiarray_tests(void) +#endif +{ + PyObject *m; + +#if defined(NPY_PY3K) + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule("_multiarray_tests", Multiarray_TestsMethods); +#endif + if (m == NULL) { + return RETVAL; + } + import_array(); + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, + "cannot load _multiarray_tests module."); + } + return RETVAL; +} + +NPY_NO_EXPORT int +test_not_exported(void) +{ + return 1; +} diff --git a/contrib/python/numpy/py2/numpy/core/src/multiarray/arraytypes.c.src b/contrib/python/numpy/py2/numpy/core/src/multiarray/arraytypes.c.src new file mode 100644 index 0000000000..d921b9d902 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/multiarray/arraytypes.c.src @@ -0,0 +1,4921 @@ +/* -*- c -*- */ +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" +#include <limits.h> +#include <assert.h> + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE +#define _NPY_NO_DEPRECATIONS /* for NPY_CHAR */ + +#include "numpy/npy_common.h" +#include "numpy/arrayobject.h" +#include "numpy/arrayscalars.h" +#include "npy_pycompat.h" +#include "numpy/npy_math.h" +#include "numpy/halffloat.h" + +#include "npy_config.h" +#include "npy_sort.h" +#include "common.h" +#include "ctors.h" +#include "lowlevel_strided_loops.h" +#include "usertypes.h" +#include "_datetime.h" +#include "arrayobject.h" +#include "alloc.h" +#include "typeinfo.h" +#ifdef NPY_HAVE_SSE2_INTRINSICS +#include <emmintrin.h> +#endif + +#include "npy_longdouble.h" +#include "numpyos.h" +#include <string.h> + +#include "cblasfuncs.h" +#include "npy_cblas.h" +#include "buffer.h" + +/* check for sequences, but ignore the types numpy considers scalars */ +static NPY_INLINE npy_bool +PySequence_NoString_Check(PyObject *op) { + return + PySequence_Check(op) && + !PyString_Check(op) && + !PyUnicode_Check(op) && + !PyArray_IsZeroDim(op); +} + +/* + ***************************************************************************** + ** PYTHON TYPES TO C TYPES ** + ***************************************************************************** + */ + +static double +MyPyFloat_AsDouble(PyObject *obj) +{ + double ret = 0; + PyObject *num; + + if (obj == Py_None) { + return NPY_NAN; + } + num = PyNumber_Float(obj); + if (num == NULL) { + return NPY_NAN; + } + ret = PyFloat_AsDouble(num); + Py_DECREF(num); + return ret; +} + +static npy_half +MyPyFloat_AsHalf(PyObject *obj) +{ + return npy_double_to_half(MyPyFloat_AsDouble(obj)); +} + +static PyObject * +MyPyFloat_FromHalf(npy_half h) +{ + return PyFloat_FromDouble(npy_half_to_double(h)); +} + +/* Handle case of assigning from an array scalar in setitem */ +static int +convert_to_scalar_and_retry(PyObject *op, void *ov, void *vap, + int (*setitem)(PyObject *op, void *ov, void *vap)) +{ + PyObject *temp; + + assert(PyArray_IsZeroDim(op)); + temp = PyArray_ToScalar(PyArray_BYTES((PyArrayObject *)op), + (PyArrayObject *)op); + if (temp == NULL) { + return -1; + } + else { + int res = setitem(temp, ov, vap); + Py_DECREF(temp); + return res; + } +} + + +/**begin repeat + * + * #Type = Long, LongLong# + * #type = npy_long, npy_longlong# + */ +static @type@ +MyPyLong_As@Type@ (PyObject *obj) +{ + @type@ ret; + PyObject *num = PyNumber_Long(obj); + + if (num == NULL) { + return -1; + } + ret = PyLong_As@Type@(num); + Py_DECREF(num); + return ret; +} + +/**end repeat**/ + +/**begin repeat + * + * #Type = Long, LongLong# + * #type = npy_ulong, npy_ulonglong# + */ +static @type@ +MyPyLong_AsUnsigned@Type@ (PyObject *obj) +{ + @type@ ret; + PyObject *num = PyNumber_Long(obj); + + if (num == NULL) { + return -1; + } + ret = PyLong_AsUnsigned@Type@(num); + if (PyErr_Occurred()) { + PyErr_Clear(); + ret = PyLong_As@Type@(num); + } + Py_DECREF(num); + return ret; +} + +/**end repeat**/ + +/* + ***************************************************************************** + ** GETITEM AND SETITEM ** + ***************************************************************************** + */ + +#define _ALIGN(type) offsetof(struct {char c; type v;}, v) +/* + * Disable harmless compiler warning "4116: unnamed type definition in + * parentheses" which is caused by the _ALIGN macro. + */ +#if defined(_MSC_VER) +#pragma warning(disable:4116) +#endif + + +/**begin repeat + * + * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, LONG, UINT, ULONG, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE# + * #func1 = PyBool_FromLong, PyInt_FromLong*6, PyLong_FromUnsignedLong*2, + * PyLong_FromLongLong, PyLong_FromUnsignedLongLong, + * MyPyFloat_FromHalf, PyFloat_FromDouble*2# + * #func2 = PyObject_IsTrue, MyPyLong_AsLong*6, MyPyLong_AsUnsignedLong*2, + * MyPyLong_AsLongLong, MyPyLong_AsUnsignedLongLong, + * MyPyFloat_AsHalf, MyPyFloat_AsDouble*2# + * #type = npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, + * npy_long, npy_uint, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double# + * #type1 = long*7, npy_ulong*2, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double# + * #kind = Bool, Byte, UByte, Short, UShort, Int, Long, UInt, ULong, + * LongLong, ULongLong, Half, Float, Double# +*/ +static PyObject * +@TYPE@_getitem(void *input, void *vap) +{ + PyArrayObject *ap = vap; + char *ip = input; + @type@ t1; + + if ((ap == NULL) || PyArray_ISBEHAVED_RO(ap)) { + t1 = *((@type@ *)ip); + return @func1@((@type1@)t1); + } + else { + PyArray_DESCR(ap)->f->copyswap(&t1, ip, PyArray_ISBYTESWAPPED(ap), ap); + return @func1@((@type1@)t1); + } +} + +static int +@TYPE@_setitem(PyObject *op, void *ov, void *vap) +{ + PyArrayObject *ap = vap; + @type@ temp; /* ensures alignment */ + + if (PyArray_IsScalar(op, @kind@)) { + temp = ((Py@kind@ScalarObject *)op)->obval; + } + else { + temp = (@type@)@func2@(op); + } + if (PyErr_Occurred()) { + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + if (PySequence_NoString_Check(op)) { + PyErr_SetString(PyExc_ValueError, + "setting an array element with a sequence."); + Py_DECREF(type); + Py_XDECREF(value); + Py_XDECREF(traceback); + } + else { + PyErr_Restore(type, value, traceback); + } + return -1; + } + if (ap == NULL || PyArray_ISBEHAVED(ap)) { + assert(npy_is_aligned(ov, _ALIGN(@type@))); + *((@type@ *)ov)=temp; + } + else { + PyArray_DESCR(ap)->f->copyswap(ov, &temp, PyArray_ISBYTESWAPPED(ap), + ap); + } + return 0; +} + +/**end repeat**/ + +/**begin repeat + * + * #TYPE = CFLOAT, CDOUBLE# + * #type = npy_float, npy_double# + */ +static PyObject * +@TYPE@_getitem(void *input, void *vap) +{ + PyArrayObject *ap = vap; + char *ip = input; + @type@ t1, t2; + + if ((ap == NULL) || PyArray_ISBEHAVED_RO(ap)) { + return PyComplex_FromDoubles((double)((@type@ *)ip)[0], + (double)((@type@ *)ip)[1]); + } + else { + int size = sizeof(@type@); + + npy_bool swap = PyArray_ISBYTESWAPPED(ap); + copy_and_swap(&t1, ip, size, 1, 0, swap); + copy_and_swap(&t2, ip + size, size, 1, 0, swap); + return PyComplex_FromDoubles((double)t1, (double)t2); + } +} + +/**end repeat**/ + + + +/**begin repeat + * + * #NAME = CFLOAT, CDOUBLE, CLONGDOUBLE# + * #type = npy_cfloat, npy_cdouble, npy_clongdouble# + * #ftype = npy_float, npy_double, npy_longdouble# + * #kind = CFloat, CDouble, CLongDouble# + */ +static int +@NAME@_setitem(PyObject *op, void *ov, void *vap) +{ + PyArrayObject *ap = vap; + Py_complex oop; + @type@ temp; + int rsize; + + if (PyArray_IsZeroDim(op)) { + return convert_to_scalar_and_retry(op, ov, vap, @NAME@_setitem); + } + + if (PyArray_IsScalar(op, @kind@)){ + temp = ((Py@kind@ScalarObject *)op)->obval; + } + else { + if (op == Py_None) { + oop.real = NPY_NAN; + oop.imag = NPY_NAN; + } + else { + oop = PyComplex_AsCComplex (op); + if (PyErr_Occurred()) { + return -1; + } + } + temp.real = (@ftype@) oop.real; + temp.imag = (@ftype@) oop.imag; + } + + memcpy(ov, &temp, PyArray_DESCR(ap)->elsize); + if (PyArray_ISBYTESWAPPED(ap)) { + byte_swap_vector(ov, 2, sizeof(@ftype@)); + } + rsize = sizeof(@ftype@); + copy_and_swap(ov, &temp, rsize, 2, rsize, PyArray_ISBYTESWAPPED(ap)); + return 0; +} + +/**end repeat**/ + +static NPY_INLINE npy_longdouble +string_to_long_double(PyObject*op) +{ + char *s; + char *end; + npy_longdouble temp; + PyObject* b; + + /* Convert python long objects to a longdouble, without precision or range + * loss via a double. + */ + if ((PyLong_Check(op) && !PyBool_Check(op)) +#if !defined(NPY_PY3K) + || (PyInt_Check(op) && !PyBool_Check(op)) +#endif + ) { + return npy_longdouble_from_PyLong(op); + } + + if (PyUnicode_Check(op)) { + b = PyUnicode_AsUTF8String(op); + if (!b) { + return 0; + } + } + else { + b = op; + Py_XINCREF(b); + } + s = PyBytes_AsString(b); + if (s) { + errno = 0; + temp = NumPyOS_ascii_strtold(s, &end); + if (errno == ERANGE) { + if (PyErr_Warn(PyExc_RuntimeWarning, + "overflow encountered in conversion from string") < 0) { + Py_XDECREF(b); + return 0; + } + /* strtold returns INFINITY of the correct sign. */ + } + else if (errno) { + PyErr_Format(PyExc_ValueError, + "invalid literal for long double: %s (%s)", + s, + strerror(errno)); + Py_XDECREF(b); + return 0; + } + + /* Extra characters at the end of the string, or nothing parsed */ + if (end == s || *end) { + PyErr_Format(PyExc_ValueError, + "invalid literal for long double: %s", + s); + Py_XDECREF(b); + return 0; + } + Py_XDECREF(b); + } + else { + /* Probably wasn't a string, try converting it via a python double */ + PyErr_Clear(); + Py_XDECREF(b); + temp = (npy_longdouble) MyPyFloat_AsDouble(op); + } + return temp; +} + +/* + * These return array scalars which are different than other date-types. + */ + +static PyObject * +LONGDOUBLE_getitem(void *ip, void *ap) +{ + return PyArray_Scalar(ip, PyArray_DESCR((PyArrayObject *)ap), NULL); +} + +static int +LONGDOUBLE_setitem(PyObject *op, void *ov, void *vap) +{ + PyArrayObject *ap = vap; + /* ensure alignment */ + npy_longdouble temp; + + if (PyArray_IsZeroDim(op)) { + return convert_to_scalar_and_retry(op, ov, vap, LONGDOUBLE_setitem); + } + + if (PyArray_IsScalar(op, LongDouble)) { + temp = ((PyLongDoubleScalarObject *)op)->obval; + } + else { + /* In case something funny happened in PyArray_IsScalar */ + if (PyErr_Occurred()) { + return -1; + } + temp = string_to_long_double(op); + } + if (PyErr_Occurred()) { + return -1; + } + if (ap == NULL || PyArray_ISBEHAVED(ap)) { + *((npy_longdouble *)ov) = temp; + } + else { + copy_and_swap(ov, &temp, PyArray_DESCR(ap)->elsize, 1, 0, + PyArray_ISBYTESWAPPED(ap)); + } + return 0; +} + +static PyObject * +CLONGDOUBLE_getitem(void *ip, void *ap) +{ + return PyArray_Scalar(ip, PyArray_DESCR((PyArrayObject *)ap), NULL); +} + +/* UNICODE */ +static PyObject * +UNICODE_getitem(void *ip, void *vap) +{ + PyArrayObject *ap = vap; + Py_ssize_t size = PyArray_ITEMSIZE(ap); + int swap = PyArray_ISBYTESWAPPED(ap); + int align = !PyArray_ISALIGNED(ap); + + return (PyObject *)PyUnicode_FromUCS4(ip, size, swap, align); +} + +static int +UNICODE_setitem(PyObject *op, void *ov, void *vap) +{ + PyArrayObject *ap = vap; + PyObject *temp; + Py_UNICODE *ptr; + int datalen; +#ifndef Py_UNICODE_WIDE + char *buffer; +#endif + + if (PyArray_IsZeroDim(op)) { + return convert_to_scalar_and_retry(op, ov, vap, UNICODE_setitem); + } + + if (PySequence_NoString_Check(op)) { + PyErr_SetString(PyExc_ValueError, + "setting an array element with a sequence"); + return -1; + } +#if defined(NPY_PY3K) + if (PyBytes_Check(op)) { + /* Try to decode from ASCII */ + temp = PyUnicode_FromEncodedObject(op, "ASCII", "strict"); + if (temp == NULL) { + return -1; + } + } + else if ((temp=PyObject_Str(op)) == NULL) { +#else + if ((temp=PyObject_Unicode(op)) == NULL) { +#endif + return -1; + } + ptr = PyUnicode_AS_UNICODE(temp); + if ((ptr == NULL) || (PyErr_Occurred())) { + Py_DECREF(temp); + return -1; + } + datalen = PyUnicode_GET_DATA_SIZE(temp); + +#ifdef Py_UNICODE_WIDE + memcpy(ov, ptr, PyArray_MIN(PyArray_DESCR(ap)->elsize, datalen)); +#else + if (!PyArray_ISALIGNED(ap)) { + buffer = PyArray_malloc(PyArray_DESCR(ap)->elsize); + if (buffer == NULL) { + Py_DECREF(temp); + PyErr_NoMemory(); + return -1; + } + } + else { + buffer = ov; + } + datalen = PyUCS2Buffer_AsUCS4(ptr, (npy_ucs4 *)buffer, + datalen >> 1, PyArray_DESCR(ap)->elsize >> 2); + datalen <<= 2; + if (!PyArray_ISALIGNED(ap)) { + memcpy(ov, buffer, datalen); + PyArray_free(buffer); + } +#endif + /* Fill in the rest of the space with 0 */ + if (PyArray_DESCR(ap)->elsize > datalen) { + memset((char*)ov + datalen, 0, (PyArray_DESCR(ap)->elsize - datalen)); + } + if (PyArray_ISBYTESWAPPED(ap)) { + byte_swap_vector(ov, PyArray_DESCR(ap)->elsize >> 2, 4); + } + Py_DECREF(temp); + return 0; +} + +/* STRING + * + * can handle both NULL-terminated and not NULL-terminated cases + * will truncate all ending NULLs in returned string. + */ +static PyObject * +STRING_getitem(void *ip, void *vap) +{ + PyArrayObject *ap = vap; + /* Will eliminate NULLs at the end */ + char *ptr; + int size = PyArray_DESCR(ap)->elsize; + + ptr = (char *)ip + size - 1; + while (size > 0 && *ptr-- == '\0') { + size--; + } + return PyBytes_FromStringAndSize(ip,size); +} + +static int +STRING_setitem(PyObject *op, void *ov, void *vap) +{ + PyArrayObject *ap = vap; + char *ptr; + Py_ssize_t len; + PyObject *temp = NULL; + + if (PyArray_IsZeroDim(op)) { + return convert_to_scalar_and_retry(op, ov, vap, STRING_setitem); + } + + if (PySequence_NoString_Check(op)) { + PyErr_SetString(PyExc_ValueError, + "setting an array element with a sequence"); + return -1; + } +#if defined(NPY_PY3K) + if (PyUnicode_Check(op)) { + /* Assume ASCII codec -- function similarly as Python 2 */ + temp = PyUnicode_AsASCIIString(op); + if (temp == NULL) { + return -1; + } + } + else if (PyBytes_Check(op) || PyMemoryView_Check(op)) { + temp = PyObject_Bytes(op); + if (temp == NULL) { + return -1; + } + } + else { + /* Emulate similar casting behavior as on Python 2 */ + PyObject *str; + str = PyObject_Str(op); + if (str == NULL) { + return -1; + } + temp = PyUnicode_AsASCIIString(str); + Py_DECREF(str); + if (temp == NULL) { + return -1; + } + } +#else + if ((temp = PyObject_Str(op)) == NULL) { + return -1; + } +#endif + if (PyBytes_AsStringAndSize(temp, &ptr, &len) < 0) { + Py_DECREF(temp); + return -1; + } + memcpy(ov, ptr, PyArray_MIN(PyArray_DESCR(ap)->elsize,len)); + /* + * If string length is smaller than room in array + * Then fill the rest of the element size with NULL + */ + if (PyArray_DESCR(ap)->elsize > len) { + memset((char *)ov + len, 0, (PyArray_DESCR(ap)->elsize - len)); + } + Py_DECREF(temp); + return 0; +} + +/* OBJECT */ + +#define __ALIGNED(obj, sz) ((((size_t) obj) % (sz))==0) + +static PyObject * +OBJECT_getitem(void *ip, void *NPY_UNUSED(ap)) +{ + PyObject *obj; + NPY_COPY_PYOBJECT_PTR(&obj, ip); + if (obj == NULL) { + Py_RETURN_NONE; + } + else { + Py_INCREF(obj); + return obj; + } +} + + +static int +OBJECT_setitem(PyObject *op, void *ov, void *NPY_UNUSED(ap)) +{ + PyObject *obj; + + NPY_COPY_PYOBJECT_PTR(&obj, ov); + + Py_INCREF(op); + Py_XDECREF(obj); + + NPY_COPY_PYOBJECT_PTR(ov, &op); + + return PyErr_Occurred() ? -1 : 0; +} + +/* VOID */ + +static PyObject * +VOID_getitem(void *input, void *vap) +{ + PyArrayObject *ap = vap; + char *ip = input; + PyArray_Descr* descr; + + descr = PyArray_DESCR(ap); + if (PyDataType_HASFIELDS(descr)) { + PyObject *key; + PyObject *names; + int i, n; + PyObject *ret; + PyObject *tup; + int savedflags; + + /* get the names from the fields dictionary*/ + names = descr->names; + n = PyTuple_GET_SIZE(names); + ret = PyTuple_New(n); + savedflags = PyArray_FLAGS(ap); + for (i = 0; i < n; i++) { + npy_intp offset; + PyArray_Descr *new; + key = PyTuple_GET_ITEM(names, i); + tup = PyDict_GetItem(descr->fields, key); + if (_unpack_field(tup, &new, &offset) < 0) { + Py_DECREF(ret); + ((PyArrayObject_fields *)ap)->descr = descr; + return NULL; + } + /* + * TODO: temporarily modifying the array like this + * is bad coding style, should be changed. + */ + ((PyArrayObject_fields *)ap)->descr = new; + /* update alignment based on offset */ + if ((new->alignment > 1) + && ((((npy_intp)(ip+offset)) % new->alignment) != 0)) { + PyArray_CLEARFLAGS(ap, NPY_ARRAY_ALIGNED); + } + else { + PyArray_ENABLEFLAGS(ap, NPY_ARRAY_ALIGNED); + } + PyTuple_SET_ITEM(ret, i, PyArray_GETITEM(ap, ip+offset)); + ((PyArrayObject_fields *)ap)->flags = savedflags; + } + ((PyArrayObject_fields *)ap)->descr = descr; + return ret; + } + + if (descr->subarray) { + /* return an array of the basic type */ + PyArray_Dims shape = {NULL, -1}; + PyArrayObject *ret; + + if (!(PyArray_IntpConverter(descr->subarray->shape, &shape))) { + npy_free_cache_dim_obj(shape); + PyErr_SetString(PyExc_ValueError, + "invalid shape in fixed-type tuple."); + return NULL; + } + Py_INCREF(descr->subarray->base); + ret = (PyArrayObject *)PyArray_NewFromDescrAndBase( + &PyArray_Type, descr->subarray->base, + shape.len, shape.ptr, NULL, ip, + PyArray_FLAGS(ap) & ~NPY_ARRAY_F_CONTIGUOUS, + NULL, (PyObject *)ap); + npy_free_cache_dim_obj(shape); + return (PyObject *)ret; + } + + return PyBytes_FromStringAndSize(ip, descr->elsize); +} + + +NPY_NO_EXPORT int PyArray_CopyObject(PyArrayObject *, PyObject *); + +/* Given a structured PyArrayObject arr, index i and structured datatype descr, + * modify the dtype of arr to contain a single field corresponding to the ith + * field of descr, recompute the alignment flag, and return the offset of the + * field (in offset_p). This is useful in preparation for calling copyswap on + * individual fields of a numpy structure, in VOID_setitem. Compare to inner + * loops in VOID_getitem and VOID_nonzero. + * + * WARNING: Clobbers arr's dtype and alignment flag. + */ +NPY_NO_EXPORT int +_setup_field(int i, PyArray_Descr *descr, PyArrayObject *arr, + npy_intp *offset_p, char *dstdata) +{ + PyObject *key; + PyObject *tup; + PyArray_Descr *new; + npy_intp offset; + + key = PyTuple_GET_ITEM(descr->names, i); + tup = PyDict_GetItem(descr->fields, key); + if (_unpack_field(tup, &new, &offset) < 0) { + return -1; + } + + ((PyArrayObject_fields *)(arr))->descr = new; + if ((new->alignment > 1) && + ((((uintptr_t)dstdata + offset) % new->alignment) != 0)) { + PyArray_CLEARFLAGS(arr, NPY_ARRAY_ALIGNED); + } + else { + PyArray_ENABLEFLAGS(arr, NPY_ARRAY_ALIGNED); + } + + *offset_p = offset; + return 0; +} + +/* Helper function for VOID_setitem, which uses the copyswap or casting code to + * copy structured datatypes between numpy arrays or scalars. + */ +static int +_copy_and_return_void_setitem(PyArray_Descr *dstdescr, char *dstdata, + PyArray_Descr *srcdescr, char *srcdata){ + PyArrayObject_fields dummy_struct; + PyArrayObject *dummy = (PyArrayObject *)&dummy_struct; + npy_int names_size = PyTuple_GET_SIZE(dstdescr->names); + npy_intp offset; + npy_int i; + int ret; + + /* Fast path if dtypes are equal */ + if (PyArray_EquivTypes(srcdescr, dstdescr)) { + for (i = 0; i < names_size; i++) { + /* neither line can ever fail, in principle */ + if (_setup_field(i, dstdescr, dummy, &offset, dstdata)) { + return -1; + } + PyArray_DESCR(dummy)->f->copyswap(dstdata + offset, + srcdata + offset, 0, dummy); + } + return 0; + } + + /* Slow path */ + ret = PyArray_CastRawArrays(1, srcdata, dstdata, 0, 0, + srcdescr, dstdescr, 0); + if (ret != NPY_SUCCEED) { + return -1; + } + return 0; +} + +static int +VOID_setitem(PyObject *op, void *input, void *vap) +{ + char *ip = input; + PyArrayObject *ap = vap; + PyArray_Descr *descr; + int flags; + int itemsize=PyArray_DESCR(ap)->elsize; + int res; + + descr = PyArray_DESCR(ap); + flags = PyArray_FLAGS(ap); + if (PyDataType_HASFIELDS(descr)) { + PyObject *errmsg; + npy_int i; + npy_intp offset; + int failed = 0; + + /* If op is 0d-ndarray or numpy scalar, directly get dtype & data ptr */ + if (PyArray_Check(op)) { + PyArrayObject *oparr = (PyArrayObject *)op; + if (PyArray_SIZE(oparr) != 1) { + PyErr_SetString(PyExc_ValueError, + "setting an array element with a sequence."); + return -1; + } + return _copy_and_return_void_setitem(descr, ip, + PyArray_DESCR(oparr), PyArray_DATA(oparr)); + } + else if (PyArray_IsScalar(op, Void)) { + PyArray_Descr *srcdescr = ((PyVoidScalarObject *)op)->descr; + char *srcdata = ((PyVoidScalarObject *)op)->obval; + return _copy_and_return_void_setitem(descr, ip, srcdescr, srcdata); + } + else if (PyTuple_Check(op)) { + /* if it's a tuple, copy field-by-field to ap, */ + npy_intp names_size = PyTuple_GET_SIZE(descr->names); + + if (names_size != PyTuple_Size(op)) { + errmsg = PyUString_FromFormat( + "could not assign tuple of length %zd to structure " + "with %" NPY_INTP_FMT " fields.", + PyTuple_Size(op), names_size); + PyErr_SetObject(PyExc_ValueError, errmsg); + Py_DECREF(errmsg); + return -1; + } + + for (i = 0; i < names_size; i++) { + PyObject *item; + + /* temporarily make ap have only this field */ + if (_setup_field(i, descr, ap, &offset, ip) == -1) { + failed = 1; + break; + } + item = PyTuple_GetItem(op, i); + if (item == NULL) { + failed = 1; + break; + } + /* use setitem to set this field */ + if (PyArray_SETITEM(ap, ip + offset, item) < 0) { + failed = 1; + break; + } + } + } + else { + /* Otherwise must be non-void scalar. Try to assign to each field */ + npy_intp names_size = PyTuple_GET_SIZE(descr->names); + + for (i = 0; i < names_size; i++) { + /* temporarily make ap have only this field */ + if (_setup_field(i, descr, ap, &offset, ip) == -1) { + failed = 1; + break; + } + /* use setitem to set this field */ + if (PyArray_SETITEM(ap, ip + offset, op) < 0) { + failed = 1; + break; + } + } + } + + /* reset clobbered attributes */ + ((PyArrayObject_fields *)(ap))->descr = descr; + ((PyArrayObject_fields *)(ap))->flags = flags; + + if (failed) { + return -1; + } + return 0; + } + else if (PyDataType_HASSUBARRAY(descr)) { + /* copy into an array of the same basic type */ + PyArray_Dims shape = {NULL, -1}; + PyArrayObject *ret; + if (!(PyArray_IntpConverter(descr->subarray->shape, &shape))) { + npy_free_cache_dim_obj(shape); + PyErr_SetString(PyExc_ValueError, + "invalid shape in fixed-type tuple."); + return -1; + } + Py_INCREF(descr->subarray->base); + ret = (PyArrayObject *)PyArray_NewFromDescrAndBase( + &PyArray_Type, descr->subarray->base, + shape.len, shape.ptr, NULL, ip, + PyArray_FLAGS(ap), NULL, (PyObject *)ap); + npy_free_cache_dim_obj(shape); + if (!ret) { + return -1; + } + res = PyArray_CopyObject(ret, op); + Py_DECREF(ret); + return res; + } + + /* + * Fall through case - non-structured void datatype. This is a very + * undiscerning case: It interprets any object as a buffer + * and reads as many bytes as possible, padding with 0. + */ +#if defined(NPY_PY3K) + { + Py_buffer view; + + if (PyObject_GetBuffer(op, &view, PyBUF_SIMPLE) < 0) { + return -1; + } + memcpy(ip, view.buf, PyArray_MIN(view.len, itemsize)); + if (itemsize > view.len) { + memset(ip + view.len, 0, itemsize - view.len); + } + PyBuffer_Release(&view); + _dealloc_cached_buffer_info(op); + } +#else + { + const void *buffer; + Py_ssize_t buflen; + + if (PyObject_AsReadBuffer(op, &buffer, &buflen) < 0) { + return -1; + } + memcpy(ip, buffer, PyArray_MIN(buflen, itemsize)); + if (itemsize > buflen) { + memset(ip + buflen, 0, itemsize - buflen); + } + } +#endif + return 0; +} + +static PyObject * +DATETIME_getitem(void *ip, void *vap) +{ + PyArrayObject *ap = vap; + npy_datetime dt; + PyArray_DatetimeMetaData *meta = NULL; + + /* Get the datetime units metadata */ + meta = get_datetime_metadata_from_dtype(PyArray_DESCR(ap)); + if (meta == NULL) { + return NULL; + } + + if ((ap == NULL) || PyArray_ISBEHAVED_RO(ap)) { + dt = *((npy_datetime *)ip); + } + else { + PyArray_DESCR(ap)->f->copyswap(&dt, ip, PyArray_ISBYTESWAPPED(ap), ap); + } + + return convert_datetime_to_pyobject(dt, meta); +} + + +static PyObject * +TIMEDELTA_getitem(void *ip, void *vap) +{ + PyArrayObject *ap = vap; + npy_timedelta td; + PyArray_DatetimeMetaData *meta = NULL; + + /* Get the datetime units metadata */ + meta = get_datetime_metadata_from_dtype(PyArray_DESCR(ap)); + if (meta == NULL) { + return NULL; + } + + if ((ap == NULL) || PyArray_ISBEHAVED_RO(ap)) { + td = *((npy_timedelta *)ip); + } + else { + PyArray_DESCR(ap)->f->copyswap(&td, ip, PyArray_ISBYTESWAPPED(ap), ap); + } + + return convert_timedelta_to_pyobject(td, meta); +} + +static int +DATETIME_setitem(PyObject *op, void *ov, void *vap) +{ + PyArrayObject *ap = vap; + /* ensure alignment */ + npy_datetime temp = 0; + PyArray_DatetimeMetaData *meta = NULL; + + /* Get the datetime units metadata */ + meta = get_datetime_metadata_from_dtype(PyArray_DESCR(ap)); + if (meta == NULL) { + return -1; + } + + /* Convert the object into a NumPy datetime */ + if (convert_pyobject_to_datetime(meta, op, + NPY_SAME_KIND_CASTING, &temp) < 0) { + return -1; + } + + /* Copy the value into the output */ + if (ap == NULL || PyArray_ISBEHAVED(ap)) { + *((npy_datetime *)ov)=temp; + } + else { + PyArray_DESCR(ap)->f->copyswap(ov, &temp, PyArray_ISBYTESWAPPED(ap), + ap); + } + + return 0; +} + +static int +TIMEDELTA_setitem(PyObject *op, void *ov, void *vap) +{ + PyArrayObject *ap = vap; + /* ensure alignment */ + npy_timedelta temp = 0; + PyArray_DatetimeMetaData *meta = NULL; + + /* Get the datetime units metadata */ + meta = get_datetime_metadata_from_dtype(PyArray_DESCR(ap)); + if (meta == NULL) { + return -1; + } + + /* Convert the object into a NumPy datetime */ + if (convert_pyobject_to_timedelta(meta, op, + NPY_SAME_KIND_CASTING, &temp) < 0) { + return -1; + } + + /* Copy the value into the output */ + if (ap == NULL || PyArray_ISBEHAVED(ap)) { + *((npy_timedelta *)ov)=temp; + } + else { + PyArray_DESCR(ap)->f->copyswap(ov, &temp, PyArray_ISBYTESWAPPED(ap), + ap); + } + + return 0; +} + + +/* + ***************************************************************************** + ** TYPE TO TYPE CONVERSIONS ** + ***************************************************************************** + */ + + +/* Assumes contiguous, and aligned, from and to */ + + +/**begin repeat + * + * #TOTYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, DATETIME, + * TIMEDELTA# + * #totype = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_float, npy_double, npy_longdouble, + * npy_datetime, npy_timedelta# + */ + +/**begin repeat1 + * + * #FROMTYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, DATETIME, + * TIMEDELTA# + * #fromtype = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_float, npy_double, npy_longdouble, + * npy_datetime, npy_timedelta# + */ +static void +@FROMTYPE@_to_@TOTYPE@(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const @fromtype@ *ip = input; + @totype@ *op = output; + + while (n--) { + *op++ = (@totype@)*ip++; + } +} +/**end repeat1**/ + +/**begin repeat1 + * + * #FROMTYPE = CFLOAT, CDOUBLE, CLONGDOUBLE# + * #fromtype = npy_float, npy_double, npy_longdouble# + */ +static void +@FROMTYPE@_to_@TOTYPE@(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const @fromtype@ *ip = input; + @totype@ *op = output; + + while (n--) { + *op++ = (@totype@)*ip; + ip += 2; + } +} +/**end repeat1**/ + +/**end repeat**/ + + +/**begin repeat + * + * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, LONGDOUBLE, DATETIME, + * TIMEDELTA# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_longdouble, + * npy_datetime, npy_timedelta# + */ + +static void +@TYPE@_to_HALF(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const @type@ *ip = input; + npy_half *op = output; + + while (n--) { + *op++ = npy_float_to_half((float)(*ip++)); + } +} + +static void +HALF_to_@TYPE@(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const npy_half *ip = input; + @type@ *op = output; + + while (n--) { + *op++ = (@type@)npy_half_to_float(*ip++); + } +} + +/**end repeat**/ +#if NPY_SIZEOF_SHORT == 2 +#define HALF_to_HALF SHORT_to_SHORT +#elif NPY_SIZEOF_INT == 2 +#define HALF_to_HALF INT_to_INT +#endif + +/**begin repeat + * + * #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + * #name = float, double, float, double# + * #itype = npy_uint32, npy_uint64, npy_uint32, npy_uint64# + * #iscomplex = 0, 0, 1, 1# + */ + +static void +@TYPE@_to_HALF(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const @itype@ *ip = input; + npy_half *op = output; + + while (n--) { + *op++ = npy_@name@bits_to_halfbits(*ip); +#if @iscomplex@ + ip += 2; +#else + ip++; +#endif + } +} + +static void +HALF_to_@TYPE@(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const npy_half *ip = input; + @itype@ *op = output; + + while (n--) { + *op++ = npy_halfbits_to_@name@bits(*ip++); +#if @iscomplex@ + *op++ = 0; +#endif + } +} + +/**end repeat**/ + +static void +CLONGDOUBLE_to_HALF(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const npy_longdouble *ip = input; + npy_half *op = output; + + while (n--) { + *op++ = npy_double_to_half((double) (*ip++)); + ip += 2; + } +} + +static void +HALF_to_CLONGDOUBLE(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const npy_half *ip = input; + npy_longdouble *op = output; + + while (n--) { + *op++ = npy_half_to_double(*ip++); + *op++ = 0; + } +} + +/**begin repeat + * + * #FROMTYPE = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * FLOAT, DOUBLE, LONGDOUBLE, + * DATETIME, TIMEDELTA# + * #fromtype = npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_float, npy_double, npy_longdouble, + * npy_datetime, npy_timedelta# + */ +static void +@FROMTYPE@_to_BOOL(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const @fromtype@ *ip = input; + npy_bool *op = output; + + while (n--) { + *op++ = (npy_bool)(*ip++ != NPY_FALSE); + } +} +/**end repeat**/ + +static void +HALF_to_BOOL(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const npy_half *ip = input; + npy_bool *op = output; + + while (n--) { + *op++ = (npy_bool)(!npy_half_iszero(*ip++)); + } +} + +/**begin repeat + * + * #FROMTYPE = CFLOAT, CDOUBLE, CLONGDOUBLE# + * #fromtype = npy_cfloat, npy_cdouble, npy_clongdouble# + */ +static void +@FROMTYPE@_to_BOOL(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const @fromtype@ *ip = input; + npy_bool *op = output; + + while (n--) { + *op = (npy_bool)((ip->real != NPY_FALSE) || + (ip->imag != NPY_FALSE)); + op++; + ip++; + } +} +/**end repeat**/ + +/**begin repeat + * #TOTYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * DATETIME, TIMEDELTA# + * #totype = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_datetime, npy_timedelta# + * #one = 1*10, NPY_HALF_ONE, 1*5# + * #zero = 0*10, NPY_HALF_ZERO, 0*5# + */ +static void +BOOL_to_@TOTYPE@(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const npy_bool *ip = input; + @totype@ *op = output; + + while (n--) { + *op++ = (@totype@)((*ip++ != NPY_FALSE) ? @one@ : @zero@); + } +} +/**end repeat**/ + +/**begin repeat + * + * #TOTYPE = CFLOAT, CDOUBLE,CLONGDOUBLE# + * #totype = npy_float, npy_double, npy_longdouble# + */ + +/**begin repeat1 + * #FROMTYPE = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * FLOAT, DOUBLE, LONGDOUBLE, + * DATETIME, TIMEDELTA# + * #fromtype = npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_float, npy_double, npy_longdouble, + * npy_datetime, npy_timedelta# + */ +static void +@FROMTYPE@_to_@TOTYPE@(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const @fromtype@ *ip = input; + @totype@ *op = output; + + while (n--) { + *op++ = (@totype@)*ip++; + *op++ = 0.0; + } + +} +/**end repeat1**/ +/**end repeat**/ + +/**begin repeat + * + * #TOTYPE = CFLOAT,CDOUBLE,CLONGDOUBLE# + * #totype = npy_float, npy_double, npy_longdouble# + */ + +/**begin repeat1 + * #FROMTYPE = CFLOAT,CDOUBLE,CLONGDOUBLE# + * #fromtype = npy_float, npy_double, npy_longdouble# + */ +static void +@FROMTYPE@_to_@TOTYPE@(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *NPY_UNUSED(aop)) +{ + const @fromtype@ *ip = input; + @totype@ *op = output; + + n <<= 1; + while (n--) { + *op++ = (@totype@)*ip++; + } +} + +/**end repeat1**/ +/**end repeat**/ + +/**begin repeat + * + * #FROMTYPE = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * STRING, UNICODE, VOID, OBJECT, + * DATETIME, TIMEDELTA# + * #fromtype = npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * npy_char, npy_char, npy_char, PyObject *, + * npy_datetime, npy_timedelta# + * #skip = 1*18, PyArray_DESCR(aip)->elsize*3, 1*3# + */ +static void +@FROMTYPE@_to_OBJECT(void *input, void *output, npy_intp n, + void *vaip, void *NPY_UNUSED(aop)) +{ + @fromtype@ *ip = input; + PyObject **op = output; + PyArrayObject *aip = vaip; + + npy_intp i; + int skip = @skip@; + PyObject *tmp; + for (i = 0; i < n; i++, ip +=skip, op++) { + tmp = *op; + *op = @FROMTYPE@_getitem(ip, aip); + Py_XDECREF(tmp); + } +} +/**end repeat**/ + +#define _NPY_UNUSEDBOOL NPY_UNUSED +#define _NPY_UNUSEDBYTE NPY_UNUSED +#define _NPY_UNUSEDUBYTE NPY_UNUSED +#define _NPY_UNUSEDSHORT NPY_UNUSED +#define _NPY_UNUSEDUSHORT NPY_UNUSED +#define _NPY_UNUSEDINT NPY_UNUSED +#define _NPY_UNUSEDUINT NPY_UNUSED +#define _NPY_UNUSEDLONG NPY_UNUSED +#define _NPY_UNUSEDULONG NPY_UNUSED +#define _NPY_UNUSEDLONGLONG NPY_UNUSED +#define _NPY_UNUSEDULONGLONG NPY_UNUSED +#define _NPY_UNUSEDHALF NPY_UNUSED +#define _NPY_UNUSEDFLOAT NPY_UNUSED +#define _NPY_UNUSEDDOUBLE NPY_UNUSED +#define _NPY_UNUSEDLONGDOUBLE NPY_UNUSED +#define _NPY_UNUSEDCFLOAT NPY_UNUSED +#define _NPY_UNUSEDCDOUBLE NPY_UNUSED +#define _NPY_UNUSEDCLONGDOUBLE NPY_UNUSED +#define _NPY_UNUSEDDATETIME NPY_UNUSED +#define _NPY_UNUSEDTIMEDELTA NPY_UNUSED +#define _NPY_UNUSEDHALF NPY_UNUSED +#define _NPY_UNUSEDSTRING +#define _NPY_UNUSEDVOID +#define _NPY_UNUSEDUNICODE + +/**begin repeat + * + * #TOTYPE = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * STRING, UNICODE, VOID, + * DATETIME, TIMEDELTA# + * #totype = npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * npy_char, npy_char, npy_char, + * npy_datetime, npy_timedelta# + * #skip = 1*18, PyArray_DESCR(aop)->elsize*3, 1*2# + */ +static void +OBJECT_to_@TOTYPE@(void *input, void *output, npy_intp n, + void *NPY_UNUSED(aip), void *aop) +{ + PyObject **ip = input; + @totype@ *op = output; + + npy_intp i; + int skip = @skip@; + + for (i = 0; i < n; i++, ip++, op += skip) { + if (*ip == NULL) { + if (@TOTYPE@_setitem(Py_False, op, aop) < 0) { + return; + } + } + else { + if (@TOTYPE@_setitem(*ip, op, aop) < 0) { + return; + } + } + } +} +/**end repeat**/ + + +/**begin repeat + * + * #from = STRING*23, UNICODE*23, VOID*23# + * #fromtyp = npy_char*69# + * #to = (BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * STRING, UNICODE, VOID, + * DATETIME, TIMEDELTA)*3# + * #totyp = (npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * npy_char, npy_char, npy_char, + * npy_datetime, npy_timedelta)*3# + * #oskip = 1*18,(PyArray_DESCR(aop)->elsize)*3,1*2, + * 1*18,(PyArray_DESCR(aop)->elsize)*3,1*2, + * 1*18,(PyArray_DESCR(aop)->elsize)*3,1*2# + * #convert = 1*18, 0*3, 1*2, + * 1*18, 0*3, 1*2, + * 0*23# + * #convstr = (Int*9, Long*2, Float*4, Complex*3, Tuple*3, Long*2)*3# + */ + +#if @convert@ + +#define IS_@from@ + +static void +@from@_to_@to@(void *input, void *output, npy_intp n, + void *vaip, void *aop) +{ + @fromtyp@ *ip = input; + @totyp@ *op = output; + PyArrayObject *aip = vaip; + + npy_intp i; + int skip = PyArray_DESCR(aip)->elsize; + int oskip = @oskip@; + + for (i = 0; i < n; i++, ip+=skip, op+=oskip) { + PyObject *new; + PyObject *temp = PyArray_Scalar(ip, PyArray_DESCR(aip), (PyObject *)aip); + if (temp == NULL) { + return; + } + +#if defined(NPY_PY3K) && defined(IS_STRING) + /* Work around some Python 3K */ + new = PyUnicode_FromEncodedObject(temp, "ascii", "strict"); + Py_DECREF(temp); + temp = new; + if (temp == NULL) { + return; + } +#endif + /* convert from Python object to needed one */ + { + PyObject *args; + + /* call out to the Python builtin given by convstr */ + args = Py_BuildValue("(N)", temp); +#if defined(NPY_PY3K) +#define PyInt_Type PyLong_Type +#endif + new = Py@convstr@_Type.tp_new(&Py@convstr@_Type, args, NULL); +#if defined(NPY_PY3K) +#undef PyInt_Type +#endif + Py_DECREF(args); + temp = new; + if (temp == NULL) { + return; + } + } + + if (@to@_setitem(temp, op, aop)) { + Py_DECREF(temp); + return; + } + Py_DECREF(temp); + } +} + +#undef IS_@from@ + +#else + +static void +@from@_to_@to@(void *input, void *output, npy_intp n, + void *vaip, void *aop) +{ + @fromtyp@ *ip = input; + @totyp@ *op = output; + PyArrayObject *aip = vaip; + + npy_intp i; + int skip = PyArray_DESCR(aip)->elsize; + int oskip = @oskip@; + + for (i = 0; i < n; i++, ip+=skip, op+=oskip) { + PyObject *temp = PyArray_Scalar(ip, PyArray_DESCR(aip), (PyObject *)aip); + if (temp == NULL) { + return; + } + if (@to@_setitem(temp, op, aop)) { + Py_DECREF(temp); + return; + } + Py_DECREF(temp); + } +} + +#endif + +/**end repeat**/ + + +/**begin repeat + * + * #to = STRING*20, UNICODE*20, VOID*20# + * #totyp = npy_char*20, npy_char*20, npy_char*20# + * #from = (BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * DATETIME, TIMEDELTA)*3# + * #fromtyp = (npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * npy_datetime, npy_timedelta)*3# + */ +static void +@from@_to_@to@(void *input, void *output, npy_intp n, + void *vaip, void *vaop) +{ + @fromtyp@ *ip = input; + @totyp@ *op = output; + PyArrayObject *aip = vaip; + PyArrayObject *aop = vaop; + + npy_intp i; + PyObject *temp = NULL; + int skip = 1; + int oskip = PyArray_DESCR(aop)->elsize; + for (i = 0; i < n; i++, ip += skip, op += oskip) { + temp = PyArray_Scalar(ip, PyArray_DESCR(aip), (PyObject *)aip); + if (temp == NULL) { + Py_INCREF(Py_False); + temp = Py_False; + } + if (@to@_setitem(temp, op, aop)) { + Py_DECREF(temp); + return; + } + Py_DECREF(temp); + } +} + +/**end repeat**/ + + +/* + ***************************************************************************** + ** SCAN ** + ***************************************************************************** + */ + + +/* + * The first ignore argument is for backwards compatibility. + * Should be removed when the API version is bumped up. + */ + +/**begin repeat + * #fname = SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG# + * #type = npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + * #format = "hd", "hu", "d", "u", + * "ld", "lu", NPY_LONGLONG_FMT, NPY_ULONGLONG_FMT# + */ +static int +@fname@_scan(FILE *fp, @type@ *ip, void *NPY_UNUSED(ignore), + PyArray_Descr *NPY_UNUSED(ignored)) +{ + return fscanf(fp, "%"@format@, ip); +} +/**end repeat**/ + +/**begin repeat + * #fname = FLOAT, DOUBLE# + * #type = npy_float, npy_double# + */ +static int +@fname@_scan(FILE *fp, @type@ *ip, void *NPY_UNUSED(ignore), + PyArray_Descr *NPY_UNUSED(ignored)) +{ + double result; + int ret; + + ret = NumPyOS_ascii_ftolf(fp, &result); + *ip = (@type@) result; + return ret; +} +/**end repeat**/ + +static int +LONGDOUBLE_scan(FILE *fp, npy_longdouble *ip, void *NPY_UNUSED(ignore), + PyArray_Descr *NPY_UNUSED(ignored)) +{ + long double result; + int ret; + + ret = NumPyOS_ascii_ftoLf(fp, &result); + *ip = (npy_longdouble) result; + return ret; +} + +static int +HALF_scan(FILE *fp, npy_half *ip, void *NPY_UNUSED(ignore), + PyArray_Descr *NPY_UNUSED(ignored)) +{ + double result; + int ret; + + ret = NumPyOS_ascii_ftolf(fp, &result); + *ip = npy_double_to_half(result); + return ret; +} + +/**begin repeat + * #fname = BYTE, UBYTE# + * #type = npy_byte, npy_ubyte# + * #btype = npy_int, npy_uint# + * #format = "d", "u"# + */ +static int +@fname@_scan(FILE *fp, @type@ *ip, void *NPY_UNUSED(ignore), + PyArray_Descr *NPY_UNUSED(ignore2)) +{ + @btype@ temp; + int num; + + num = fscanf(fp, "%"@format@, &temp); + *ip = (@type@) temp; + return num; +} +/**end repeat**/ + +static int +BOOL_scan(FILE *fp, npy_bool *ip, void *NPY_UNUSED(ignore), + PyArray_Descr *NPY_UNUSED(ignore2)) +{ + double result; + int ret; + + ret = NumPyOS_ascii_ftolf(fp, &result); + *ip = (npy_bool) (result != 0.0); + return ret; +} + +/**begin repeat + * #fname = CFLOAT, CDOUBLE, CLONGDOUBLE, + * OBJECT, STRING, UNICODE, VOID, + * DATETIME, TIMEDELTA# + */ + +#define @fname@_scan NULL + +/**end repeat**/ + + +/* + ***************************************************************************** + ** FROMSTR ** + ***************************************************************************** + */ + + +/**begin repeat + * #fname = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * DATETIME, TIMEDELTA# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_datetime, npy_timedelta# + * #func = (PyOS_strtol, PyOS_strtoul)*4, NumPyOS_strtoll, NumPyOS_strtoull, + * NumPyOS_strtoll*2# + * #btype = (npy_long, npy_ulong)*4, npy_longlong, npy_ulonglong, + * npy_longlong*2# + */ +static int +@fname@_fromstr(char *str, void *ip, char **endptr, + PyArray_Descr *NPY_UNUSED(ignore)) +{ + @btype@ result; + + result = @func@(str, endptr, 10); + *(@type@ *)ip = result; + return 0; +} +/**end repeat**/ + +/**begin repeat + * + * #fname = FLOAT, DOUBLE# + * #type = npy_float, npy_double# + */ +static int +@fname@_fromstr(char *str, void *ip, char **endptr, + PyArray_Descr *NPY_UNUSED(ignore)) +{ + double result; + + result = NumPyOS_ascii_strtod(str, endptr); + *(@type@ *)ip = result; + return 0; +} +/**end repeat**/ + +static int +LONGDOUBLE_fromstr(char *str, void *ip, char **endptr, + PyArray_Descr *NPY_UNUSED(ignore)) +{ + long double result; + + result = NumPyOS_ascii_strtold(str, endptr); + *(npy_longdouble *)ip = result; + return 0; +} + +static int +HALF_fromstr(char *str, void *ip, char **endptr, + PyArray_Descr *NPY_UNUSED(ignore)) +{ + double result; + + result = NumPyOS_ascii_strtod(str, endptr); + *(npy_half *)ip = npy_double_to_half(result); + return 0; +} + +static int +BOOL_fromstr(char *str, void *ip, char **endptr, + PyArray_Descr *NPY_UNUSED(ignore)) +{ + double result; + + result = NumPyOS_ascii_strtod(str, endptr); + *(npy_bool *)ip = (result != 0.0); + return 0; +} + +/**begin repeat + * #fname = CFLOAT, CDOUBLE, CLONGDOUBLE, + * OBJECT, STRING, UNICODE, VOID# + */ + +#define @fname@_fromstr NULL + +/**end repeat**/ + + +/* + ***************************************************************************** + ** COPYSWAPN ** + ***************************************************************************** + */ + + +static NPY_INLINE void +_basic_copyn(void *dst, npy_intp dstride, void *src, npy_intp sstride, + npy_intp n, int elsize) { + if (src == NULL) { + return; + } + if (sstride == elsize && dstride == elsize) { + memcpy(dst, src, n*elsize); + } + else { + _unaligned_strided_byte_copy(dst, dstride, src, sstride, + n, elsize); + } +} + +static NPY_INLINE void +_basic_copy(void *dst, void *src, int elsize) { + if (src == NULL) { + return; + } + memcpy(dst, src, elsize); +} + + +/**begin repeat + * + * #fname = SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * DATETIME, TIMEDELTA# + * #fsize = SHORT, SHORT, INT, INT, + * LONG, LONG, LONGLONG, LONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * DATETIME, TIMEDELTA# + * #type = npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_datetime, npy_timedelta# + */ +static void +@fname@_copyswapn (void *dst, npy_intp dstride, void *src, npy_intp sstride, + npy_intp n, int swap, void *NPY_UNUSED(arr)) +{ + /* copy first if needed */ + _basic_copyn(dst, dstride, src, sstride, n, sizeof(@type@)); + if (swap) { + _strided_byte_swap(dst, dstride, n, sizeof(@type@)); + } +} + +static void +@fname@_copyswap (void *dst, void *src, int swap, void *NPY_UNUSED(arr)) +{ + /* copy first if needed */ + _basic_copy(dst, src, sizeof(@type@)); + + if (swap) { + char *a, *b, c; + + a = (char *)dst; +#if NPY_SIZEOF_@fsize@ == 2 + b = a + 1; + c = *a; *a++ = *b; *b = c; +#elif NPY_SIZEOF_@fsize@ == 4 + b = a + 3; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; +#elif NPY_SIZEOF_@fsize@ == 8 + b = a + 7; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; +#elif NPY_SIZEOF_@fsize@ == 10 + b = a + 9; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; +#elif NPY_SIZEOF_@fsize@ == 12 + b = a + 11; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; +#elif NPY_SIZEOF_@fsize@ == 16 + b = a + 15; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; +#else + { + int i, nn; + + b = a + (NPY_SIZEOF_@fsize@-1); + nn = NPY_SIZEOF_@fsize@ / 2; + for (i = 0; i < nn; i++) { + c = *a; + *a++ = *b; + *b-- = c; + } + } +#endif + } +} + +/**end repeat**/ + +/**begin repeat + * + * #fname = BOOL, + * BYTE, UBYTE# + * #type = npy_bool, + * npy_byte, npy_ubyte# + */ +static void +@fname@_copyswapn (void *dst, npy_intp dstride, void *src, npy_intp sstride, + npy_intp n, int NPY_UNUSED(swap), void *NPY_UNUSED(arr)) +{ + /* copy first if needed */ + _basic_copyn(dst, dstride, src, sstride, n, sizeof(@type@)); + /* ignore swap */ +} + +static void +@fname@_copyswap (void *dst, void *src, int NPY_UNUSED(swap), + void *NPY_UNUSED(arr)) +{ + /* copy first if needed */ + _basic_copy(dst, src, sizeof(@type@)); + /* ignore swap */ +} + +/**end repeat**/ + + + +/**begin repeat + * + * #fname = CFLOAT, CDOUBLE, CLONGDOUBLE# + * #fsize = FLOAT, DOUBLE, LONGDOUBLE# + * #type = npy_cfloat, npy_cdouble, npy_clongdouble# +*/ +static void +@fname@_copyswapn (void *dst, npy_intp dstride, void *src, npy_intp sstride, + npy_intp n, int swap, void *NPY_UNUSED(arr)) +{ + /* copy first if needed */ + _basic_copyn(dst, dstride, src, sstride, n, sizeof(@type@)); + + if (swap) { + _strided_byte_swap(dst, dstride, n, NPY_SIZEOF_@fsize@); + _strided_byte_swap(((char *)dst + NPY_SIZEOF_@fsize@), dstride, + n, NPY_SIZEOF_@fsize@); + } +} + +static void +@fname@_copyswap (void *dst, void *src, int swap, void *NPY_UNUSED(arr)) +{ + /* copy first if needed */ + _basic_copy(dst, src, sizeof(@type@)); + + if (swap) { + char *a, *b, c; + a = (char *)dst; +#if NPY_SIZEOF_@fsize@ == 4 + b = a + 3; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; + a += 2; + b = a + 3; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; +#elif NPY_SIZEOF_@fsize@ == 8 + b = a + 7; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; + a += 4; + b = a + 7; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; +#elif NPY_SIZEOF_@fsize@ == 10 + b = a + 9; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; + a += 5; + b = a + 9; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; +#elif NPY_SIZEOF_@fsize@ == 12 + b = a + 11; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; + a += 6; + b = a + 11; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; +#elif NPY_SIZEOF_@fsize@ == 16 + b = a + 15; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; + a += 8; + b = a + 15; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b-- = c; + c = *a; *a++ = *b; *b = c; +#else + { + int i, nn; + + b = a + (NPY_SIZEOF_@fsize@ - 1); + nn = NPY_SIZEOF_@fsize@ / 2; + for (i = 0; i < nn; i++) { + c = *a; + *a++ = *b; + *b-- = c; + } + a += nn; + b = a + (NPY_SIZEOF_@fsize@ - 1); + for (i = 0; i < nn; i++) { + c = *a; + *a++ = *b; + *b-- = c; + } + } +#endif + } +} + +/**end repeat**/ + +static void +OBJECT_copyswapn(PyObject **dst, npy_intp dstride, PyObject **src, + npy_intp sstride, npy_intp n, int NPY_UNUSED(swap), + void *NPY_UNUSED(arr)) +{ + npy_intp i; + if (src != NULL) { + if (__ALIGNED(dst, sizeof(PyObject **)) + && __ALIGNED(src, sizeof(PyObject **)) + && __ALIGNED(dstride, sizeof(PyObject **)) + && __ALIGNED(sstride, sizeof(PyObject **))) { + dstride /= sizeof(PyObject **); + sstride /= sizeof(PyObject **); + for (i = 0; i < n; i++) { + Py_XINCREF(*src); + Py_XDECREF(*dst); + *dst = *src; + dst += dstride; + src += sstride; + } + } + else { + unsigned char *dstp, *srcp; + PyObject *tmp; + dstp = (unsigned char*)dst; + srcp = (unsigned char*)src; + for (i = 0; i < n; i++) { + NPY_COPY_PYOBJECT_PTR(&tmp, srcp); + Py_XINCREF(tmp); + NPY_COPY_PYOBJECT_PTR(&tmp, dstp); + Py_XDECREF(tmp); + NPY_COPY_PYOBJECT_PTR(dstp, srcp); + dstp += dstride; + srcp += sstride; + } + } + } + /* ignore swap */ + return; +} + +static void +OBJECT_copyswap(PyObject **dst, PyObject **src, int NPY_UNUSED(swap), + void *NPY_UNUSED(arr)) +{ + + if (src != NULL) { + if (__ALIGNED(dst,sizeof(PyObject **)) && + __ALIGNED(src,sizeof(PyObject **))) { + Py_XINCREF(*src); + Py_XDECREF(*dst); + *dst = *src; + } + else { + PyObject *tmp; + NPY_COPY_PYOBJECT_PTR(&tmp, src); + Py_XINCREF(tmp); + NPY_COPY_PYOBJECT_PTR(&tmp, dst); + Py_XDECREF(tmp); + NPY_COPY_PYOBJECT_PTR(dst, src); + } + } +} + +/* ignore swap */ +static void +STRING_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride, + npy_intp n, int NPY_UNUSED(swap), PyArrayObject *arr) +{ + if (arr == NULL) { + return; + } + _basic_copyn(dst, dstride, src, sstride, n, PyArray_DESCR(arr)->elsize); + return; +} + +/* */ +static void +VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride, + npy_intp n, int swap, PyArrayObject *arr) +{ + PyArray_Descr *descr; + + if (arr == NULL) { + return; + } + + descr = PyArray_DESCR(arr); + + if (PyArray_HASFIELDS(arr)) { + PyObject *key, *value; + + Py_ssize_t pos = 0; + + while (PyDict_Next(descr->fields, &pos, &key, &value)) { + npy_intp offset; + PyArray_Descr * new; + if (NPY_TITLE_KEY(key, value)) { + continue; + } + if (_unpack_field(value, &new, &offset) < 0) { + ((PyArrayObject_fields *)arr)->descr = descr; + return; + } + /* + * TODO: temporarily modifying the array like this + * is bad coding style, should be changed. + */ + ((PyArrayObject_fields *)arr)->descr = new; + new->f->copyswapn(dst+offset, dstride, + (src != NULL ? src+offset : NULL), + sstride, n, swap, arr); + } + ((PyArrayObject_fields *)arr)->descr = descr; + return; + } + if (PyDataType_HASSUBARRAY(descr)) { + PyArray_Descr *new; + npy_intp num; + npy_intp i; + int subitemsize; + char *dstptr, *srcptr; + /* + * In certain cases subarray copy can be optimized. This is when + * swapping is unecessary and the subarrays data type can certainly + * be simply copied (no object, fields, subarray, and not a user dtype). + */ + npy_bool can_optimize_subarray = (!swap && + !PyDataType_HASFIELDS(descr->subarray->base) && + !PyDataType_HASSUBARRAY(descr->subarray->base) && + !PyDataType_REFCHK(descr->subarray->base) && + (descr->subarray->base->type_num < NPY_NTYPES)); + + if (can_optimize_subarray) { + _basic_copyn(dst, dstride, src, sstride, n, descr->elsize); + return; + } + + new = descr->subarray->base; + /* + * TODO: temporarily modifying the array like this + * is bad coding style, should be changed. + */ + ((PyArrayObject_fields *)arr)->descr = new; + dstptr = dst; + srcptr = src; + subitemsize = new->elsize; + if (subitemsize == 0) { + /* There cannot be any elements, so return */ + return; + } + num = descr->elsize / subitemsize; + for (i = 0; i < n; i++) { + new->f->copyswapn(dstptr, subitemsize, srcptr, + subitemsize, num, swap, arr); + dstptr += dstride; + if (srcptr) { + srcptr += sstride; + } + } + ((PyArrayObject_fields *)arr)->descr = descr; + return; + } + /* Must be a naive Void type (e.g. a "V8") so simple copy is sufficient. */ + _basic_copyn(dst, dstride, src, sstride, n, descr->elsize); + return; +} + +static void +VOID_copyswap (char *dst, char *src, int swap, PyArrayObject *arr) +{ + PyArray_Descr *descr; + + if (arr == NULL) { + return; + } + + descr = PyArray_DESCR(arr); + + if (PyArray_HASFIELDS(arr)) { + PyObject *key, *value; + Py_ssize_t pos = 0; + + while (PyDict_Next(descr->fields, &pos, &key, &value)) { + npy_intp offset; + PyArray_Descr * new; + if (NPY_TITLE_KEY(key, value)) { + continue; + } + if (_unpack_field(value, &new, &offset) < 0) { + ((PyArrayObject_fields *)arr)->descr = descr; + return; + } + /* + * TODO: temporarily modifying the array like this + * is bad coding style, should be changed. + */ + ((PyArrayObject_fields *)arr)->descr = new; + new->f->copyswap(dst+offset, + (src != NULL ? src+offset : NULL), + swap, arr); + } + ((PyArrayObject_fields *)arr)->descr = descr; + return; + } + if (PyDataType_HASSUBARRAY(descr)) { + PyArray_Descr *new; + npy_intp num; + int subitemsize; + /* + * In certain cases subarray copy can be optimized. This is when + * swapping is unecessary and the subarrays data type can certainly + * be simply copied (no object, fields, subarray, and not a user dtype). + */ + npy_bool can_optimize_subarray = (!swap && + !PyDataType_HASFIELDS(descr->subarray->base) && + !PyDataType_HASSUBARRAY(descr->subarray->base) && + !PyDataType_REFCHK(descr->subarray->base) && + (descr->subarray->base->type_num < NPY_NTYPES)); + + if (can_optimize_subarray) { + _basic_copy(dst, src, descr->elsize); + return; + } + + new = descr->subarray->base; + /* + * TODO: temporarily modifying the array like this + * is bad coding style, should be changed. + */ + ((PyArrayObject_fields *)arr)->descr = new; + subitemsize = new->elsize; + if (subitemsize == 0) { + /* There cannot be any elements, so return */ + return; + } + num = descr->elsize / subitemsize; + new->f->copyswapn(dst, subitemsize, src, + subitemsize, num, swap, arr); + ((PyArrayObject_fields *)arr)->descr = descr; + return; + } + /* Must be a naive Void type (e.g. a "V8") so simple copy is sufficient. */ + _basic_copy(dst, src, descr->elsize); + return; +} + + +static void +UNICODE_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride, + npy_intp n, int swap, PyArrayObject *arr) +{ + int itemsize; + + if (arr == NULL) { + return; + } + itemsize = PyArray_DESCR(arr)->elsize; + _basic_copyn(dst, dstride, src, sstride, n, itemsize); + + if (swap) { + int i; + char *_dst; + itemsize = itemsize / 4; + + while (n > 0) { + _dst = dst; + for (i=0; i < itemsize; i++) { + npy_bswap4_unaligned(_dst); + _dst += 4; + } + dst += dstride; + --n; + } + } +} + + +static void +STRING_copyswap(char *dst, char *src, int NPY_UNUSED(swap), PyArrayObject *arr) +{ + if (arr == NULL) { + return; + } + /* copy first if needed */ + _basic_copy(dst, src, PyArray_DESCR(arr)->elsize); +} + +static void +UNICODE_copyswap (char *dst, char *src, int swap, PyArrayObject *arr) +{ + int itemsize; + + if (arr == NULL) { + return; + } + itemsize = PyArray_DESCR(arr)->elsize; + _basic_copy(dst, src, itemsize); + + if (swap) { + int i; + char *_dst; + itemsize = itemsize / 4; + + _dst = dst; + for (i=0; i < itemsize; i++) { + npy_bswap4_unaligned(_dst); + _dst += 4; + } + } +} + + +/* + ***************************************************************************** + ** NONZERO ** + ***************************************************************************** + */ + +#define _NONZERO(a) ((a) != 0) + +/**begin repeat + * + * #fname = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * DATETIME, TIMEDELTA# + * #type = npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_datetime, npy_timedelta# + * #isfloat = 0*11, 1*4, 0*2# + * #nonzero = _NONZERO*11, !npy_half_iszero, _NONZERO*5# + */ +static npy_bool +@fname@_nonzero (char *ip, PyArrayObject *ap) +{ + if (ap == NULL || PyArray_ISBEHAVED_RO(ap)) { + @type@ *ptmp = (@type@ *)ip; + return (npy_bool) @nonzero@(*ptmp); + } + else { + /* + * Don't worry about swapping for integer types, + * since we are just testing for equality with 0. + * For float types, the signed zeros require us to swap. + */ + @type@ tmp; +#if @isfloat@ + PyArray_DESCR(ap)->f->copyswap(&tmp, ip, PyArray_ISBYTESWAPPED(ap), + ap); +#else + memcpy(&tmp, ip, sizeof(@type@)); +#endif + return (npy_bool) @nonzero@(tmp); + } +} +/**end repeat**/ + +/**begin repeat + * + * #fname = CFLOAT, CDOUBLE, CLONGDOUBLE# + * #type = npy_cfloat, npy_cdouble, npy_clongdouble# + */ +static npy_bool +@fname@_nonzero (char *ip, PyArrayObject *ap) +{ + if (ap == NULL || PyArray_ISBEHAVED_RO(ap)) { + @type@ *ptmp = (@type@ *)ip; + return (npy_bool) ((ptmp->real != 0) || (ptmp->imag != 0)); + } + else { + @type@ tmp; + PyArray_DESCR(ap)->f->copyswap(&tmp, ip, PyArray_ISBYTESWAPPED(ap), + ap); + return (npy_bool) ((tmp.real != 0) || (tmp.imag != 0)); + } +} +/**end repeat**/ + + +#define WHITESPACE " \t\n\r\v\f" +#define WHITELEN 6 + +static npy_bool +Py_STRING_ISSPACE(char ch) +{ + char white[] = WHITESPACE; + int j; + npy_bool space = NPY_FALSE; + + for (j = 0; j < WHITELEN; j++) { + if (ch == white[j]) { + space = NPY_TRUE; + break; + } + } + return space; +} + +static npy_bool +STRING_nonzero (char *ip, PyArrayObject *ap) +{ + int len = PyArray_DESCR(ap)->elsize; + int i; + npy_bool nonz = NPY_FALSE; + npy_bool seen_null = NPY_FALSE; + + for (i = 0; i < len; i++) { + if (*ip == '\0') { + seen_null = NPY_TRUE; + } + else if (seen_null || !Py_STRING_ISSPACE(*ip)) { + nonz = NPY_TRUE; + break; + } + ip++; + } + return nonz; +} + +#ifdef Py_UNICODE_WIDE +#define PyArray_UCS4_ISSPACE Py_UNICODE_ISSPACE +#else +#define PyArray_UCS4_ISSPACE(ch) Py_STRING_ISSPACE((char)ch) +#endif + +static npy_bool +UNICODE_nonzero (npy_ucs4 *ip, PyArrayObject *ap) +{ + int len = PyArray_DESCR(ap)->elsize >> 2; + int i; + npy_bool nonz = NPY_FALSE; + npy_bool seen_null = NPY_FALSE; + char *buffer = NULL; + + if (PyArray_ISBYTESWAPPED(ap) || !PyArray_ISALIGNED(ap)) { + buffer = PyArray_malloc(PyArray_DESCR(ap)->elsize); + if (buffer == NULL) { + return nonz; + } + memcpy(buffer, ip, PyArray_DESCR(ap)->elsize); + if (PyArray_ISBYTESWAPPED(ap)) { + byte_swap_vector(buffer, len, 4); + } + ip = (npy_ucs4 *)buffer; + } + + for (i = 0; i < len; i++) { + if (*ip == '\0') { + seen_null = NPY_TRUE; + } + else if (seen_null || !PyArray_UCS4_ISSPACE(*ip)) { + nonz = NPY_TRUE; + break; + } + ip++; + } + PyArray_free(buffer); + return nonz; +} + +static npy_bool +OBJECT_nonzero (PyObject **ip, PyArrayObject *ap) +{ + + if (PyArray_ISALIGNED(ap)) { + if (*ip == NULL) { + return NPY_FALSE; + } + return (npy_bool) PyObject_IsTrue(*ip); + } + else { + PyObject *obj; + NPY_COPY_PYOBJECT_PTR(&obj, ip); + if (obj == NULL) { + return NPY_FALSE; + } + return (npy_bool) PyObject_IsTrue(obj); + } +} + +/* + * if we have fields, then nonzero only if all sub-fields are nonzero. + */ +static npy_bool +VOID_nonzero (char *ip, PyArrayObject *ap) +{ + int i; + int len; + npy_bool nonz = NPY_FALSE; + + if (PyArray_HASFIELDS(ap)) { + PyArray_Descr *descr; + PyObject *key, *value; + int savedflags; + Py_ssize_t pos = 0; + + descr = PyArray_DESCR(ap); + savedflags = PyArray_FLAGS(ap); + while (PyDict_Next(descr->fields, &pos, &key, &value)) { + PyArray_Descr * new; + npy_intp offset; + if (NPY_TITLE_KEY(key, value)) { + continue; + } + if (_unpack_field(value, &new, &offset) < 0) { + PyErr_Clear(); + continue; + } + /* + * TODO: temporarily modifying the array like this + * is bad coding style, should be changed. + */ + ((PyArrayObject_fields *)ap)->descr = new; + ((PyArrayObject_fields *)ap)->flags = savedflags; + if ((new->alignment > 1) && !__ALIGNED(ip + offset, + new->alignment)) { + PyArray_CLEARFLAGS(ap, NPY_ARRAY_ALIGNED); + } + else { + PyArray_ENABLEFLAGS(ap, NPY_ARRAY_ALIGNED); + } + if (new->f->nonzero(ip+offset, ap)) { + nonz = NPY_TRUE; + break; + } + } + ((PyArrayObject_fields *)ap)->descr = descr; + ((PyArrayObject_fields *)ap)->flags = savedflags; + return nonz; + } + len = PyArray_DESCR(ap)->elsize; + for (i = 0; i < len; i++) { + if (*ip != '\0') { + nonz = NPY_TRUE; + break; + } + ip++; + } + return nonz; +} + +#undef __ALIGNED + + +/* + ***************************************************************************** + ** COMPARE ** + ***************************************************************************** + */ + + +/* boolean type */ + +static int +BOOL_compare(npy_bool *ip1, npy_bool *ip2, PyArrayObject *NPY_UNUSED(ap)) +{ + return (*ip1 ? (*ip2 ? 0 : 1) : (*ip2 ? -1 : 0)); +} + + +/* integer types */ + +/**begin repeat + * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * DATETIME, TIMEDELTA# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_datetime, npy_timedelta# + */ + +static int +@TYPE@_compare (@type@ *pa, @type@ *pb, PyArrayObject *NPY_UNUSED(ap)) +{ + const @type@ a = *pa; + const @type@ b = *pb; + + return a < b ? -1 : a == b ? 0 : 1; +} + +/**end repeat**/ + + +/* float types */ + +/* + * The real/complex comparison functions are compatible with the new sort + * order for nans introduced in numpy 1.4.0. All nan values now compare + * larger than non-nan values and are sorted to the end. The comparison + * order is: + * + * Real: [R, nan] + * Complex: [R + Rj, R + nanj, nan + Rj, nan + nanj] + * + * where complex values with the same nan placements are sorted according + * to the non-nan part if it exists. If both the real and imaginary parts + * of complex types are non-nan the order is the same as the real parts + * unless they happen to be equal, in which case the order is that of the + * imaginary parts. + */ + +/**begin repeat + * + * #TYPE = FLOAT, DOUBLE, LONGDOUBLE# + * #type = npy_float, npy_double, npy_longdouble# + */ + +#define LT(a,b) ((a) < (b) || ((b) != (b) && (a) ==(a))) + +static int +@TYPE@_compare(@type@ *pa, @type@ *pb) +{ + const @type@ a = *pa; + const @type@ b = *pb; + int ret; + + if (LT(a,b)) { + ret = -1; + } + else if (LT(b,a)) { + ret = 1; + } + else { + ret = 0; + } + return ret; +} + + +static int +C@TYPE@_compare(@type@ *pa, @type@ *pb) +{ + const @type@ ar = pa[0]; + const @type@ ai = pa[1]; + const @type@ br = pb[0]; + const @type@ bi = pb[1]; + int ret; + + if (ar < br) { + if (ai == ai || bi != bi) { + ret = -1; + } + else { + ret = 1; + } + } + else if (br < ar) { + if (bi == bi || ai != ai) { + ret = 1; + } + else { + ret = -1; + } + } + else if (ar == br || (ar != ar && br != br)) { + if (LT(ai,bi)) { + ret = -1; + } + else if (LT(bi,ai)) { + ret = 1; + } + else { + ret = 0; + } + } + else if (ar == ar) { + ret = -1; + } + else { + ret = 1; + } + + return ret; +} + +#undef LT + +/**end repeat**/ + +static int +HALF_compare (npy_half *pa, npy_half *pb, PyArrayObject *NPY_UNUSED(ap)) +{ + npy_half a = *pa, b = *pb; + npy_bool a_isnan, b_isnan; + int ret; + + a_isnan = npy_half_isnan(a); + b_isnan = npy_half_isnan(b); + + if (a_isnan) { + ret = b_isnan ? 0 : -1; + } + else if (b_isnan) { + ret = 1; + } + else if(npy_half_lt_nonan(a, b)) { + ret = -1; + } + else if(npy_half_lt_nonan(b, a)) { + ret = 1; + } + else { + ret = 0; + } + + return ret; +} + + +/* object type */ + +static int +OBJECT_compare(PyObject **ip1, PyObject **ip2, PyArrayObject *NPY_UNUSED(ap)) +{ + /* + * ALIGNMENT NOTE: It seems that PyArray_Sort is already handling + * the alignment of pointers, so it doesn't need to be handled + * here. + */ + + int ret; + /* + * work around gh-3879, we cannot abort an in-progress quicksort + * so at least do not raise again + */ + if (PyErr_Occurred()) { + return 0; + } + if ((*ip1 == NULL) || (*ip2 == NULL)) { + if (ip1 == ip2) { + return 1; + } + if (ip1 == NULL) { + return -1; + } + return 1; + } + + ret = PyObject_RichCompareBool(*ip1, *ip2, Py_LT); + if (ret < 0) { + /* error occurred, avoid the next call to PyObject_RichCompareBool */ + return 0; + } + if (ret == 1) { + return -1; + } + else if (PyObject_RichCompareBool(*ip1, *ip2, Py_GT) == 1) { + return 1; + } + else { + return 0; + } +} + + +/* string type */ + +static int +STRING_compare(char *ip1, char *ip2, PyArrayObject *ap) +{ + const unsigned char *c1 = (unsigned char *)ip1; + const unsigned char *c2 = (unsigned char *)ip2; + const size_t len = PyArray_DESCR(ap)->elsize; + int i; + + i = memcmp(c1, c2, len); + if (i > 0) { + return 1; + } + else if (i < 0) { + return -1; + } + return 0; +} + + +/* unicode type */ + +static int +UNICODE_compare(npy_ucs4 *ip1, npy_ucs4 *ip2, + PyArrayObject *ap) +{ + int itemsize = PyArray_DESCR(ap)->elsize; + + if (itemsize < 0) { + return 0; + } + itemsize /= sizeof(npy_ucs4); + while (itemsize-- > 0) { + npy_ucs4 c1 = *ip1++; + npy_ucs4 c2 = *ip2++; + if (c1 != c2) { + return (c1 < c2) ? -1 : 1; + } + } + return 0; +} + + +/* void type */ + +/* + * If fields are defined, then compare on first field and if equal + * compare on second field. Continue until done or comparison results + * in not_equal. + * + * Must align data passed on to sub-comparisons. + * Also must swap data based on to sub-comparisons. + */ +static int +VOID_compare(char *ip1, char *ip2, PyArrayObject *ap) +{ + PyArray_Descr *descr; + PyObject *names, *key; + PyObject *tup; + PyArrayObject_fields dummy_struct; + PyArrayObject *dummy = (PyArrayObject *)&dummy_struct; + char *nip1, *nip2; + int i, res = 0, swap = 0; + + if (!PyArray_HASFIELDS(ap)) { + return STRING_compare(ip1, ip2, ap); + } + descr = PyArray_DESCR(ap); + /* + * Compare on the first-field. If equal, then + * compare on the second-field, etc. + */ + names = descr->names; + for (i = 0; i < PyTuple_GET_SIZE(names); i++) { + PyArray_Descr *new; + npy_intp offset; + key = PyTuple_GET_ITEM(names, i); + tup = PyDict_GetItem(descr->fields, key); + if (_unpack_field(tup, &new, &offset) < 0) { + goto finish; + } + /* descr is the only field checked by compare or copyswap */ + dummy_struct.descr = new; + swap = PyArray_ISBYTESWAPPED(dummy); + nip1 = ip1 + offset; + nip2 = ip2 + offset; + if (swap || new->alignment > 1) { + if (swap || !npy_is_aligned(nip1, new->alignment)) { + /* create buffer and copy */ + nip1 = npy_alloc_cache(new->elsize); + if (nip1 == NULL) { + goto finish; + } + memcpy(nip1, ip1 + offset, new->elsize); + if (swap) + new->f->copyswap(nip1, NULL, swap, dummy); + } + if (swap || !npy_is_aligned(nip2, new->alignment)) { + /* create buffer and copy */ + nip2 = npy_alloc_cache(new->elsize); + if (nip2 == NULL) { + if (nip1 != ip1 + offset) { + npy_free_cache(nip1, new->elsize); + } + goto finish; + } + memcpy(nip2, ip2 + offset, new->elsize); + if (swap) + new->f->copyswap(nip2, NULL, swap, dummy); + } + } + res = new->f->compare(nip1, nip2, dummy); + if (swap || new->alignment > 1) { + if (nip1 != ip1 + offset) { + npy_free_cache(nip1, new->elsize); + } + if (nip2 != ip2 + offset) { + npy_free_cache(nip2, new->elsize); + } + } + if (res != 0) { + break; + } + } + +finish: + return res; +} + + +/* + ***************************************************************************** + ** ARGFUNC ** + ***************************************************************************** + */ + +#define _LESS_THAN_OR_EQUAL(a,b) ((a) <= (b)) + +static int +BOOL_argmax(npy_bool *ip, npy_intp n, npy_intp *max_ind, + PyArrayObject *NPY_UNUSED(aip)) + +{ + npy_intp i = 0; + /* memcmp like logical_and on i386 is maybe slower for small arrays */ +#ifdef NPY_HAVE_SSE2_INTRINSICS + const __m128i zero = _mm_setzero_si128(); + for (; i < n - (n % 32); i+=32) { + __m128i d1 = _mm_loadu_si128((__m128i*)&ip[i]); + __m128i d2 = _mm_loadu_si128((__m128i*)&ip[i + 16]); + d1 = _mm_cmpeq_epi8(d1, zero); + d2 = _mm_cmpeq_epi8(d2, zero); + if (_mm_movemask_epi8(_mm_min_epu8(d1, d2)) != 0xFFFF) { + break; + } + } +#endif + for (; i < n; i++) { + if (ip[i]) { + *max_ind = i; + return 0; + } + } + *max_ind = 0; + return 0; +} + +/**begin repeat + * + * #fname = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * DATETIME, TIMEDELTA# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_float, npy_double, npy_longdouble, + * npy_datetime, npy_timedelta# + * #isfloat = 0*10, 1*7, 0*2# + * #isnan = nop*10, npy_half_isnan, npy_isnan*6, nop*2# + * #le = _LESS_THAN_OR_EQUAL*10, npy_half_le, _LESS_THAN_OR_EQUAL*8# + * #iscomplex = 0*14, 1*3, 0*2# + * #incr = ip++*14, ip+=2*3, ip++*2# + */ +static int +@fname@_argmax(@type@ *ip, npy_intp n, npy_intp *max_ind, + PyArrayObject *NPY_UNUSED(aip)) +{ + npy_intp i; + @type@ mp = *ip; +#if @iscomplex@ + @type@ mp_im = ip[1]; +#endif + + *max_ind = 0; + +#if @isfloat@ + if (@isnan@(mp)) { + /* nan encountered; it's maximal */ + return 0; + } +#endif +#if @iscomplex@ + if (@isnan@(mp_im)) { + /* nan encountered; it's maximal */ + return 0; + } +#endif + + for (i = 1; i < n; i++) { + @incr@; + /* + * Propagate nans, similarly as max() and min() + */ +#if @iscomplex@ + /* Lexical order for complex numbers */ + if ((ip[0] > mp) || ((ip[0] == mp) && (ip[1] > mp_im)) + || @isnan@(ip[0]) || @isnan@(ip[1])) { + mp = ip[0]; + mp_im = ip[1]; + *max_ind = i; + if (@isnan@(mp) || @isnan@(mp_im)) { + /* nan encountered, it's maximal */ + break; + } + } +#else + if (!@le@(*ip, mp)) { /* negated, for correct nan handling */ + mp = *ip; + *max_ind = i; +#if @isfloat@ + if (@isnan@(mp)) { + /* nan encountered, it's maximal */ + break; + } +#endif + } +#endif + } + return 0; +} + +/**end repeat**/ + +static int +BOOL_argmin(npy_bool *ip, npy_intp n, npy_intp *min_ind, + PyArrayObject *NPY_UNUSED(aip)) + +{ + npy_bool * p = memchr(ip, 0, n * sizeof(*ip)); + if (p == NULL) { + *min_ind = 0; + return 0; + } + *min_ind = p - ip; + return 0; +} + +/**begin repeat + * + * #fname = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_float, npy_double, npy_longdouble# + * #isfloat = 0*10, 1*7# + * #isnan = nop*10, npy_half_isnan, npy_isnan*6# + * #le = _LESS_THAN_OR_EQUAL*10, npy_half_le, _LESS_THAN_OR_EQUAL*6# + * #iscomplex = 0*14, 1*3# + * #incr = ip++*14, ip+=2*3# + */ +static int +@fname@_argmin(@type@ *ip, npy_intp n, npy_intp *min_ind, + PyArrayObject *NPY_UNUSED(aip)) +{ + npy_intp i; + @type@ mp = *ip; +#if @iscomplex@ + @type@ mp_im = ip[1]; +#endif + + *min_ind = 0; + +#if @isfloat@ + if (@isnan@(mp)) { + /* nan encountered; it's minimal */ + return 0; + } +#endif +#if @iscomplex@ + if (@isnan@(mp_im)) { + /* nan encountered; it's minimal */ + return 0; + } +#endif + + for (i = 1; i < n; i++) { + @incr@; + /* + * Propagate nans, similarly as max() and min() + */ +#if @iscomplex@ + /* Lexical order for complex numbers */ + if ((mp > ip[0]) || ((ip[0] == mp) && (mp_im > ip[1])) + || @isnan@(ip[0]) || @isnan@(ip[1])) { + mp = ip[0]; + mp_im = ip[1]; + *min_ind = i; + if (@isnan@(mp) || @isnan@(mp_im)) { + /* nan encountered, it's minimal */ + break; + } + } +#else + if (!@le@(mp, *ip)) { /* negated, for correct nan handling */ + mp = *ip; + *min_ind = i; +#if @isfloat@ + if (@isnan@(mp)) { + /* nan encountered, it's minimal */ + break; + } +#endif + } +#endif + } + return 0; +} + +/**end repeat**/ + +#undef _LESS_THAN_OR_EQUAL + +/**begin repeat + * + * #fname = DATETIME, TIMEDELTA# + * #type = npy_datetime, npy_timedelta# + */ +static int +@fname@_argmin(@type@ *ip, npy_intp n, npy_intp *min_ind, + PyArrayObject *NPY_UNUSED(aip)) +{ + /* NPY_DATETIME_NAT is smaller than every other value, we skip + * it for consistency with min(). + */ + npy_intp i; + @type@ mp = NPY_DATETIME_NAT; + + i = 0; + while (i < n && mp == NPY_DATETIME_NAT) { + mp = ip[i]; + i++; + } + if (i == n) { + /* All NaTs: return 0 */ + *min_ind = 0; + return 0; + } + *min_ind = i - 1; + for (; i < n; i++) { + if (mp > ip[i] && ip[i] != NPY_DATETIME_NAT) { + mp = ip[i]; + *min_ind = i; + } + } + return 0; +} + +/**end repeat**/ + +static int +OBJECT_argmax(PyObject **ip, npy_intp n, npy_intp *max_ind, + PyArrayObject *NPY_UNUSED(aip)) +{ + npy_intp i; + + *max_ind = 0; + /* Skip over all leading NULL entries */ + for (i = 0; i < n && ip[i] == NULL; ++i); + if (i < n) { + /* Found first non-NULL entry */ + PyObject *mp = ip[i]; + *max_ind = i; + for (i = i + 1; i < n; ++i) { + PyObject *val = ip[i]; + if (val != NULL) { + int greater_than = PyObject_RichCompareBool(val, mp, Py_GT); + + if (greater_than < 0) { + return 0; + } + if (greater_than) { + mp = val; + *max_ind = i; + } + } + } + } + + return 0; +} + +/**begin repeat + * + * #fname = STRING, UNICODE# + * #type = npy_char, npy_ucs4# + */ +static int +@fname@_argmax(@type@ *ip, npy_intp n, npy_intp *max_ind, PyArrayObject *aip) +{ + npy_intp i; + int elsize = PyArray_DESCR(aip)->elsize; + @type@ *mp = (@type@ *)PyArray_malloc(elsize); + + if (mp == NULL) { + return 0; + } + memcpy(mp, ip, elsize); + *max_ind = 0; + for (i = 1; i < n; i++) { + ip += elsize / sizeof(@type@); + if (@fname@_compare(ip, mp, aip) > 0) { + memcpy(mp, ip, elsize); + *max_ind = i; + } + } + PyArray_free(mp); + return 0; +} + +/**end repeat**/ + +#define VOID_argmax NULL + +static int +OBJECT_argmin(PyObject **ip, npy_intp n, npy_intp *min_ind, + PyArrayObject *NPY_UNUSED(aip)) +{ + npy_intp i; + + *min_ind = 0; + /* Skip over all leading NULL entries */ + for (i = 0; i < n && ip[i] == NULL; ++i); + if (i < n) { + /* Found first non-NULL entry */ + PyObject *mp = ip[i]; + *min_ind = i; + for (i = i + 1; i < n ; ++i) { + PyObject *val = ip[i]; + if (val != NULL) { + int less_than = PyObject_RichCompareBool(val, mp, Py_LT); + + if (less_than < 0) { + return 0; + } + if (less_than) { + mp = val; + *min_ind = i; + } + } + } + } + + return 0; +} + +/**begin repeat + * + * #fname = STRING, UNICODE# + * #type = npy_char, npy_ucs4# + */ +static int +@fname@_argmin(@type@ *ip, npy_intp n, npy_intp *min_ind, PyArrayObject *aip) +{ + npy_intp i; + int elsize = PyArray_DESCR(aip)->elsize; + @type@ *mp = (@type@ *)PyArray_malloc(elsize); + + if (mp==NULL) return 0; + memcpy(mp, ip, elsize); + *min_ind = 0; + for(i=1; i<n; i++) { + ip += elsize / sizeof(@type@); + if (@fname@_compare(mp,ip,aip) > 0) { + memcpy(mp, ip, elsize); + *min_ind=i; + } + } + PyArray_free(mp); + return 0; +} + +/**end repeat**/ + + +#define VOID_argmin NULL + + +/* + ***************************************************************************** + ** DOT ** + ***************************************************************************** + */ + +/* + * dot means inner product + */ + +/************************** MAYBE USE CBLAS *********************************/ + + +/**begin repeat + * + * #name = FLOAT, DOUBLE# + * #type = npy_float, npy_double# + * #prefix = s, d# + */ +NPY_NO_EXPORT void +@name@_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, + npy_intp n, void *NPY_UNUSED(ignore)) +{ +#if defined(HAVE_CBLAS) + int is1b = blas_stride(is1, sizeof(@type@)); + int is2b = blas_stride(is2, sizeof(@type@)); + + if (is1b && is2b) + { + double sum = 0.; /* double for stability */ + + while (n > 0) { + int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK; + + sum += cblas_@prefix@dot(chunk, + (@type@ *) ip1, is1b, + (@type@ *) ip2, is2b); + /* use char strides here */ + ip1 += chunk * is1; + ip2 += chunk * is2; + n -= chunk; + } + *((@type@ *)op) = (@type@)sum; + } + else +#endif + { + @type@ sum = (@type@)0; /* could make this double */ + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + const @type@ ip1r = *((@type@ *)ip1); + const @type@ ip2r = *((@type@ *)ip2); + + sum += ip1r * ip2r; + } + *((@type@ *)op) = sum; + } +} +/**end repeat**/ + +/**begin repeat + * + * #name = CFLOAT, CDOUBLE# + * #ctype = npy_cfloat, npy_cdouble# + * #type = npy_float, npy_double# + * #prefix = c, z# + */ +NPY_NO_EXPORT void +@name@_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, + char *op, npy_intp n, void *NPY_UNUSED(ignore)) +{ +#if defined(HAVE_CBLAS) + int is1b = blas_stride(is1, sizeof(@ctype@)); + int is2b = blas_stride(is2, sizeof(@ctype@)); + + if (is1b && is2b) { + double sum[2] = {0., 0.}; /* double for stability */ + + while (n > 0) { + int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK; + @type@ tmp[2]; + + cblas_@prefix@dotu_sub((int)n, ip1, is1b, ip2, is2b, tmp); + sum[0] += (double)tmp[0]; + sum[1] += (double)tmp[1]; + /* use char strides here */ + ip1 += chunk * is1; + ip2 += chunk * is2; + n -= chunk; + } + ((@type@ *)op)[0] = (@type@)sum[0]; + ((@type@ *)op)[1] = (@type@)sum[1]; + } + else +#endif + { + @type@ sumr = (@type@)0.0; + @type@ sumi = (@type@)0.0; + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + const @type@ ip1r = ((@type@ *)ip1)[0]; + const @type@ ip1i = ((@type@ *)ip1)[1]; + const @type@ ip2r = ((@type@ *)ip2)[0]; + const @type@ ip2i = ((@type@ *)ip2)[1]; + + sumr += ip1r * ip2r - ip1i * ip2i; + sumi += ip1r * ip2i + ip1i * ip2r; + } + ((@type@ *)op)[0] = sumr; + ((@type@ *)op)[1] = sumi; + } +} + +/**end repeat**/ + +/**************************** NO CBLAS VERSIONS *****************************/ + +static void +BOOL_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, + void *NPY_UNUSED(ignore)) +{ + npy_bool tmp = NPY_FALSE; + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + if ((*((npy_bool *)ip1) != 0) && (*((npy_bool *)ip2) != 0)) { + tmp = NPY_TRUE; + break; + } + } + *((npy_bool *)op) = tmp; +} + +/**begin repeat + * + * #name = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * LONGDOUBLE, DATETIME, TIMEDELTA# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_longdouble, npy_datetime, npy_timedelta# + * #out = npy_long, npy_ulong, npy_long, npy_ulong, npy_long, npy_ulong, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_longdouble, npy_datetime, npy_timedelta# + */ +static void +@name@_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, + void *NPY_UNUSED(ignore)) +{ + @out@ tmp = (@out@)0; + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + tmp += (@out@)(*((@type@ *)ip1)) * + (@out@)(*((@type@ *)ip2)); + } + *((@type@ *)op) = (@type@) tmp; +} +/**end repeat**/ + +static void +HALF_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, + npy_intp n, void *NPY_UNUSED(ignore)) +{ + float tmp = 0.0f; + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + tmp += npy_half_to_float(*((npy_half *)ip1)) * + npy_half_to_float(*((npy_half *)ip2)); + } + *((npy_half *)op) = npy_float_to_half(tmp); +} + +static void +CLONGDOUBLE_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, + char *op, npy_intp n, void *NPY_UNUSED(ignore)) +{ + npy_longdouble tmpr = 0.0L; + npy_longdouble tmpi = 0.0L; + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + const npy_longdouble ip1r = ((npy_longdouble *)ip1)[0]; + const npy_longdouble ip1i = ((npy_longdouble *)ip1)[1]; + const npy_longdouble ip2r = ((npy_longdouble *)ip2)[0]; + const npy_longdouble ip2i = ((npy_longdouble *)ip2)[1]; + + tmpr += ip1r * ip2r - ip1i * ip2i; + tmpi += ip1r * ip2i + ip1i * ip2r; + } + ((npy_longdouble *)op)[0] = tmpr; + ((npy_longdouble *)op)[1] = tmpi; +} + +static void +OBJECT_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, + void *NPY_UNUSED(ignore)) +{ + /* + * ALIGNMENT NOTE: np.dot, np.inner etc. enforce that the array is + * BEHAVED before getting to this point, so unaligned pointers aren't + * handled here. + */ + npy_intp i; + PyObject *tmp1, *tmp2, *tmp = NULL; + PyObject **tmp3; + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + if ((*((PyObject **)ip1) == NULL) || (*((PyObject **)ip2) == NULL)) { + tmp1 = Py_False; + Py_INCREF(Py_False); + } + else { + tmp1 = PyNumber_Multiply(*((PyObject **)ip1), *((PyObject **)ip2)); + if (!tmp1) { + Py_XDECREF(tmp); + return; + } + } + if (i == 0) { + tmp = tmp1; + } + else { + tmp2 = PyNumber_Add(tmp, tmp1); + Py_XDECREF(tmp); + Py_XDECREF(tmp1); + if (!tmp2) { + return; + } + tmp = tmp2; + } + } + tmp3 = (PyObject**) op; + tmp2 = *tmp3; + *((PyObject **)op) = tmp; + Py_XDECREF(tmp2); +} + + +/* + ***************************************************************************** + ** FILL ** + ***************************************************************************** + */ + + +#define BOOL_fill NULL + +/* this requires buffer to be filled with objects or NULL */ +static int +OBJECT_fill(PyObject **buffer, npy_intp length, void *NPY_UNUSED(ignored)) +{ + int retval = 0; + npy_intp i; + PyObject *start = buffer[0]; + PyObject *delta = buffer[1]; + PyObject *second; + + delta = PyNumber_Subtract(delta, start); + if (!delta) { + return -1; + } + second = start = PyNumber_Add(start, delta); + if (!start) { + goto error; + } + buffer += 2; + + for (i = 2; i < length; i++, buffer++) { + start = PyNumber_Add(start, delta); + if (!start) { + goto error; + } + Py_XDECREF(*buffer); + *buffer = start; + } + goto finish; + +error: + retval = -1; + +finish: + Py_XDECREF(second); + Py_DECREF(delta); + return retval; +} + +/**begin repeat + * + * #NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * FLOAT, DOUBLE, LONGDOUBLE, + * DATETIME, TIMEDELTA# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_float, npy_double, npy_longdouble, + * npy_datetime, npy_timedelta# +*/ +static int +@NAME@_fill(@type@ *buffer, npy_intp length, void *NPY_UNUSED(ignored)) +{ + npy_intp i; + @type@ start = buffer[0]; + @type@ delta = buffer[1]; + + delta -= start; + for (i = 2; i < length; ++i) { + buffer[i] = start + i*delta; + } + return 0; +} +/**end repeat**/ + +static int +HALF_fill(npy_half *buffer, npy_intp length, void *NPY_UNUSED(ignored)) +{ + npy_intp i; + float start = npy_half_to_float(buffer[0]); + float delta = npy_half_to_float(buffer[1]); + + delta -= start; + for (i = 2; i < length; ++i) { + buffer[i] = npy_float_to_half(start + i*delta); + } + return 0; +} + +/**begin repeat + * + * #NAME = CFLOAT, CDOUBLE, CLONGDOUBLE# + * #type = npy_cfloat, npy_cdouble, npy_clongdouble# +*/ +static int +@NAME@_fill(@type@ *buffer, npy_intp length, void *NPY_UNUSED(ignore)) +{ + npy_intp i; + @type@ start; + @type@ delta; + + start.real = buffer->real; + start.imag = buffer->imag; + delta.real = buffer[1].real; + delta.imag = buffer[1].imag; + delta.real -= start.real; + delta.imag -= start.imag; + buffer += 2; + for (i = 2; i < length; i++, buffer++) { + buffer->real = start.real + i*delta.real; + buffer->imag = start.imag + i*delta.imag; + } + return 0; +} +/**end repeat**/ + + +/* this requires buffer to be filled with objects or NULL */ +static void +OBJECT_fillwithscalar(PyObject **buffer, npy_intp length, PyObject **value, + void *NPY_UNUSED(ignored)) +{ + npy_intp i; + PyObject *val = *value; + for (i = 0; i < length; i++) { + Py_XINCREF(val); + Py_XDECREF(buffer[i]); + buffer[i] = val; + } +} +/**begin repeat + * + * #NAME = BOOL, BYTE, UBYTE# + * #type = npy_bool, npy_byte, npy_ubyte# + */ +static void +@NAME@_fillwithscalar(@type@ *buffer, npy_intp length, @type@ *value, + void *NPY_UNUSED(ignored)) +{ + memset(buffer, *value, length); +} +/**end repeat**/ + +/**begin repeat + * + * #NAME = SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * DATETIME, TIMEDELTA# + * #type = npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * npy_datetime, npy_timedelta# + */ +static void +@NAME@_fillwithscalar(@type@ *buffer, npy_intp length, @type@ *value, + void *NPY_UNUSED(ignored)) +{ + npy_intp i; + @type@ val = *value; + + for (i = 0; i < length; ++i) { + buffer[i] = val; + } +} +/**end repeat**/ + + +/* + ***************************************************************************** + ** FASTCLIP ** + ***************************************************************************** + */ + +#define _LESS_THAN(a, b) ((a) < (b)) +#define _GREATER_THAN(a, b) ((a) > (b)) + +/* + * In fastclip, 'b' was already checked for NaN, so the half comparison + * only needs to check 'a' for NaN. + */ + +#define _HALF_LESS_THAN(a, b) (!npy_half_isnan(a) && npy_half_lt_nonan(a, b)) +#define _HALF_GREATER_THAN(a, b) (!npy_half_isnan(a) && npy_half_lt_nonan(b, a)) + +/**begin repeat + * + * #name = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * DATETIME, TIMEDELTA# + * #type = npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_datetime, npy_timedelta# + * #isfloat = 0*11, 1*4, 0*2# + * #isnan = nop*11, npy_half_isnan, npy_isnan*3, nop*2# + * #lt = _LESS_THAN*11, _HALF_LESS_THAN, _LESS_THAN*5# + * #gt = _GREATER_THAN*11, _HALF_GREATER_THAN, _GREATER_THAN*5# + */ +static void +@name@_fastclip(@type@ *in, npy_intp ni, @type@ *min, @type@ *max, @type@ *out) +{ + npy_intp i; + @type@ max_val = 0, min_val = 0; + + if (max != NULL) { + max_val = *max; +#if @isfloat@ + /* NaNs result in no clipping, so optimize the case away */ + if (@isnan@(max_val)) { + if (min == NULL) { + memmove(out, in, ni * sizeof(@type@)); + return; + } + max = NULL; + } +#endif + } + if (min != NULL) { + min_val = *min; +#if @isfloat@ + if (@isnan@(min_val)) { + if (max == NULL) { + memmove(out, in, ni * sizeof(@type@)); + return; + } + min = NULL; + } +#endif + } + if (max == NULL) { + for (i = 0; i < ni; i++) { + if (@lt@(in[i], min_val)) { + out[i] = min_val; + } + else { + out[i] = in[i]; + } + } + } + else if (min == NULL) { + for (i = 0; i < ni; i++) { + if (@gt@(in[i], max_val)) { + out[i] = max_val; + } + else { + out[i] = in[i]; + } + } + } + else { + /* + * Visual Studio 2015 loop vectorizer handles NaN in an unexpected + * manner, see: https://github.com/numpy/numpy/issues/7601 + */ + #if (_MSC_VER == 1900) + #pragma loop( no_vector ) + #endif + for (i = 0; i < ni; i++) { + if (@lt@(in[i], min_val)) { + out[i] = min_val; + } + else if (@gt@(in[i], max_val)) { + out[i] = max_val; + } + else { + out[i] = in[i]; + } + } + } +} +/**end repeat**/ + +#undef _LESS_THAN +#undef _GREATER_THAN +#undef _HALF_LESS_THAN +#undef _HALF_GREATER_THAN + +/**begin repeat + * + * #name = CFLOAT, CDOUBLE, CLONGDOUBLE# + * #type = npy_cfloat, npy_cdouble, npy_clongdouble# + */ +static void +@name@_fastclip(@type@ *in, npy_intp ni, @type@ *min, @type@ *max, @type@ *out) +{ + npy_intp i; + @type@ max_val, min_val; + + if (max != NULL) { + max_val = *max; + } + if (min != NULL) { + min_val = *min; + } + if (max == NULL) { + for (i = 0; i < ni; i++) { + if (PyArray_CLT(in[i],min_val)) { + out[i] = min_val; + } + else { + out[i] = in[i]; + } + } + } + else if (min == NULL) { + for (i = 0; i < ni; i++) { + if (PyArray_CGT(in[i], max_val)) { + out[i] = max_val; + } + else { + out[i] = in[i]; + } + } + } + else { + for (i = 0; i < ni; i++) { + if (PyArray_CLT(in[i], min_val)) { + out[i] = min_val; + } + else if (PyArray_CGT(in[i], max_val)) { + out[i] = max_val; + } + else { + out[i] = in[i]; + } + } + } +} + +/**end repeat**/ + +#define OBJECT_fastclip NULL + + +/* + ***************************************************************************** + ** FASTPUTMASK ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #name = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * DATETIME, TIMEDELTA# + * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * npy_datetime, npy_timedelta# +*/ +static void +@name@_fastputmask(@type@ *in, npy_bool *mask, npy_intp ni, @type@ *vals, + npy_intp nv) +{ + npy_intp i, j; + + if (nv == 1) { + @type@ s_val = *vals; + for (i = 0; i < ni; i++) { + if (mask[i]) { + in[i] = s_val; + } + } + } + else { + for (i = 0, j = 0; i < ni; i++, j++) { + if (j >= nv) { + j = 0; + } + if (mask[i]) { + in[i] = vals[j]; + } + } + } + return; +} +/**end repeat**/ + +#define OBJECT_fastputmask NULL + + +/* + ***************************************************************************** + ** FASTTAKE ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #name = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * DATETIME, TIMEDELTA# + * #type = npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * npy_datetime, npy_timedelta# +*/ +static int +@name@_fasttake(@type@ *dest, @type@ *src, npy_intp *indarray, + npy_intp nindarray, npy_intp n_outer, + npy_intp m_middle, npy_intp nelem, + NPY_CLIPMODE clipmode) +{ + npy_intp i, j, k, tmp; + NPY_BEGIN_THREADS_DEF; + + NPY_BEGIN_THREADS; + + switch(clipmode) { + case NPY_RAISE: + for (i = 0; i < n_outer; i++) { + for (j = 0; j < m_middle; j++) { + tmp = indarray[j]; + /* + * We don't know what axis we're operating on, + * so don't report it in case of an error. + */ + if (check_and_adjust_index(&tmp, nindarray, -1, _save) < 0) { + return 1; + } + if (NPY_LIKELY(nelem == 1)) { + *dest++ = *(src + tmp); + } + else { + for (k = 0; k < nelem; k++) { + *dest++ = *(src + tmp*nelem + k); + } + } + } + src += nelem*nindarray; + } + break; + case NPY_WRAP: + for (i = 0; i < n_outer; i++) { + for (j = 0; j < m_middle; j++) { + tmp = indarray[j]; + if (tmp < 0) { + while (tmp < 0) { + tmp += nindarray; + } + } + else if (tmp >= nindarray) { + while (tmp >= nindarray) { + tmp -= nindarray; + } + } + if (NPY_LIKELY(nelem == 1)) { + *dest++ = *(src+tmp); + } + else { + for (k = 0; k < nelem; k++) { + *dest++ = *(src+tmp*nelem+k); + } + } + } + src += nelem*nindarray; + } + break; + case NPY_CLIP: + for (i = 0; i < n_outer; i++) { + for (j = 0; j < m_middle; j++) { + tmp = indarray[j]; + if (tmp < 0) { + tmp = 0; + } + else if (tmp >= nindarray) { + tmp = nindarray - 1; + } + if (NPY_LIKELY(nelem == 1)) { + *dest++ = *(src + tmp); + } + else { + for (k = 0; k < nelem; k++) { + *dest++ = *(src + tmp*nelem + k); + } + } + } + src += nelem*nindarray; + } + break; + } + + NPY_END_THREADS; + return 0; +} +/**end repeat**/ + +#define OBJECT_fasttake NULL + +/* + ***************************************************************************** + ** small correlate ** + ***************************************************************************** + */ + +/* + * Compute correlation of data with with small kernels + * Calling a BLAS dot product for the inner loop of the correlation is overkill + * for small kernels. It is faster to compute it directly. + * Intended to be used by _pyarray_correlate so no input verifications is done + * especially it does not handle the boundaries, they should be handled by the + * caller. + * Returns 0 if kernel is considered too large or types are not supported, then + * the regular array dot should be used to process the data. + * + * d_, dstride, nd, dtype: data pointer, its stride in bytes, number of + * elements and type of data + * k_, kstride, nk, ktype: kernel pointer, its stride in bytes, number of + * elements and type of data + * out_, ostride: output data pointer and its stride in bytes + */ +NPY_NO_EXPORT int +small_correlate(const char * d_, npy_intp dstride, + npy_intp nd, enum NPY_TYPES dtype, + const char * k_, npy_intp kstride, + npy_intp nk, enum NPY_TYPES ktype, + char * out_, npy_intp ostride) +{ + /* only handle small kernels and uniform types */ + if (nk > 11 || dtype != ktype) { + return 0; + } + + switch (dtype) { +/**begin repeat + * Float types + * #type = npy_float, npy_double# + * #TYPE = NPY_FLOAT, NPY_DOUBLE# + */ + case @TYPE@: + { + npy_intp i; + const @type@ * d = (@type@*)d_; + const @type@ * k = (@type@*)k_; + @type@ * out = (@type@*)out_; + dstride /= sizeof(@type@); + kstride /= sizeof(@type@); + ostride /= sizeof(@type@); + /* unroll inner loop to optimize register usage of the kernel*/ + switch (nk) { +/**begin repeat1 + * #ksz_outer = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11# */ + case @ksz_outer@: + { +/**begin repeat2 + * #ksz = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11# */ +#if @ksz@ <= @ksz_outer@ + /* load kernel */ + const @type@ k@ksz@ = k[(@ksz@ - 1) * kstride]; +#endif +/**end repeat2**/ + for (i = 0; i < nd; i++) { + @type@ s = 0; +/**begin repeat2 + * #ksz = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11# */ +#if @ksz@ <= @ksz_outer@ + s += d[(i + @ksz@ - 1) * dstride] * k@ksz@; +#endif +/**end repeat2**/ + out[i * ostride] = s; + } + return 1; + } +/**end repeat1**/ + default: + return 0; + } + } +/**end repeat**/ + default: + return 0; + } +} + +/* +*/ + +/* A clone function for the datetime dtype c_metadata */ +static NpyAuxData * +_datetime_dtype_metadata_clone(NpyAuxData *data) +{ + PyArray_DatetimeDTypeMetaData *newdata = + (PyArray_DatetimeDTypeMetaData *)PyArray_malloc( + sizeof(*newdata)); + if (newdata == NULL) { + PyErr_NoMemory(); + return NULL; + } + + memcpy(newdata, data, sizeof(*newdata)); + + return (NpyAuxData *)newdata; +} + +/* + * Allcoate and initialize a PyArray_DatetimeDTypeMetaData object + */ +static NpyAuxData* +_create_datetime_metadata(NPY_DATETIMEUNIT base, int num) +{ + PyArray_DatetimeDTypeMetaData *data; + + /* Allocate memory for the metadata */ + data = PyArray_malloc(sizeof(*data)); + if (data == NULL) { + PyErr_NoMemory(); + return NULL; + } + + /* Initialize the base aux data */ + memset(data, 0, sizeof(PyArray_DatetimeDTypeMetaData)); + data->base.free = (NpyAuxData_FreeFunc *)PyArray_free; + data->base.clone = _datetime_dtype_metadata_clone; + + data->meta.base = base; + data->meta.num = num; + + return (NpyAuxData*)data; +} + + +/* + ***************************************************************************** + ** SETUP FUNCTION POINTERS ** + ***************************************************************************** + */ + +/**begin repeat + * + * #from = VOID, STRING, UNICODE# + * #suff = void, string, unicode# + * #sort = 0, 1, 1# + * #align = char, char, npy_ucs4# + * #NAME = Void, String, Unicode# + * #endian = |, |, =# + * #flags = 0, 0, NPY_NEEDS_INIT# + */ +static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = { + { + @from@_to_BOOL, + @from@_to_BYTE, + @from@_to_UBYTE, + @from@_to_SHORT, + @from@_to_USHORT, + @from@_to_INT, + @from@_to_UINT, + @from@_to_LONG, + @from@_to_ULONG, + @from@_to_LONGLONG, + @from@_to_ULONGLONG, + @from@_to_FLOAT, + @from@_to_DOUBLE, + @from@_to_LONGDOUBLE, + @from@_to_CFLOAT, + @from@_to_CDOUBLE, + @from@_to_CLONGDOUBLE, + @from@_to_OBJECT, + @from@_to_STRING, + @from@_to_UNICODE, + @from@_to_VOID + }, + @from@_getitem, + @from@_setitem, + (PyArray_CopySwapNFunc*)@from@_copyswapn, + (PyArray_CopySwapFunc*)@from@_copyswap, + (PyArray_CompareFunc*)@from@_compare, + (PyArray_ArgFunc*)@from@_argmax, + (PyArray_DotFunc*)NULL, + (PyArray_ScanFunc*)@from@_scan, + @from@_fromstr, + (PyArray_NonzeroFunc*)@from@_nonzero, + (PyArray_FillFunc*)NULL, + (PyArray_FillWithScalarFunc*)NULL, +#if @sort@ + { + quicksort_@suff@, + heapsort_@suff@, + mergesort_@suff@ + }, + { + aquicksort_@suff@, + aheapsort_@suff@, + amergesort_@suff@ + }, +#else + { + NULL, NULL, NULL + }, + { + NULL, NULL, NULL + }, +#endif + NULL, + (PyArray_ScalarKindFunc*)NULL, + NULL, + NULL, + (PyArray_FastClipFunc *)NULL, + (PyArray_FastPutmaskFunc *)NULL, + (PyArray_FastTakeFunc *)NULL, + (PyArray_ArgFunc*)@from@_argmin +}; + +/* + * FIXME: check for PY3K + */ +static PyArray_Descr @from@_Descr = { + PyObject_HEAD_INIT(&PyArrayDescr_Type) + /* typeobj */ + &Py@NAME@ArrType_Type, + /* kind */ + NPY_@from@LTR, + /* type */ + NPY_@from@LTR, + /* byteorder */ + '@endian@', + /* flags, unicode needs init as py3.3 does not like printing garbage */ + @flags@, + /* type_num */ + NPY_@from@, + /* elsize */ + 0, + /* alignment */ + _ALIGN(@align@), + /* subarray */ + NULL, + /* fields */ + NULL, + /* names */ + NULL, + /* f */ + &_Py@NAME@_ArrFuncs, + /* metadata */ + NULL, + /* c_metadata */ + NULL, + /* hash */ + -1, +}; + +/**end repeat**/ + +/**begin repeat + * + * #from = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * OBJECT, DATETIME, TIMEDELTA# + * #suff = bool, + * byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble, + * object, datetime, timedelta# + * #sort = 1*18, 0*1, 1*2# + * #fromtype = npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * PyObject *, npy_datetime, npy_timedelta# + * #NAME = Bool, + * Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong, LongLong, ULongLong, + * Half, Float, Double, LongDouble, + * CFloat, CDouble, CLongDouble, + * Object, Datetime, Timedelta# + * #kind = GENBOOL, + * SIGNED, UNSIGNED, SIGNED, UNSIGNED, SIGNED, UNSIGNED, + * SIGNED, UNSIGNED, SIGNED, UNSIGNED, + * FLOATING, FLOATING, FLOATING, FLOATING, + * COMPLEX, COMPLEX, COMPLEX, + * OBJECT, DATETIME, TIMEDELTA# + * #endian = |*3, =*15, |, =*2# + * #isobject= 0*18,NPY_OBJECT_DTYPE_FLAGS,0*2# + */ + +static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = { + { + @from@_to_BOOL, + @from@_to_BYTE, + @from@_to_UBYTE, + @from@_to_SHORT, + @from@_to_USHORT, + @from@_to_INT, + @from@_to_UINT, + @from@_to_LONG, + @from@_to_ULONG, + @from@_to_LONGLONG, + @from@_to_ULONGLONG, + @from@_to_FLOAT, + @from@_to_DOUBLE, + @from@_to_LONGDOUBLE, + @from@_to_CFLOAT, + @from@_to_CDOUBLE, + @from@_to_CLONGDOUBLE, + @from@_to_OBJECT, + @from@_to_STRING, + @from@_to_UNICODE, + @from@_to_VOID + }, + @from@_getitem, + @from@_setitem, + (PyArray_CopySwapNFunc*)@from@_copyswapn, + (PyArray_CopySwapFunc*)@from@_copyswap, + (PyArray_CompareFunc*)@from@_compare, + (PyArray_ArgFunc*)@from@_argmax, + (PyArray_DotFunc*)@from@_dot, + (PyArray_ScanFunc*)@from@_scan, + @from@_fromstr, + (PyArray_NonzeroFunc*)@from@_nonzero, + (PyArray_FillFunc*)@from@_fill, + (PyArray_FillWithScalarFunc*)@from@_fillwithscalar, +#if @sort@ + { + quicksort_@suff@, + heapsort_@suff@, + mergesort_@suff@ + }, + { + aquicksort_@suff@, + aheapsort_@suff@, + amergesort_@suff@ + }, +#else + { + NULL, NULL, NULL + }, + { + NULL, NULL, NULL + }, +#endif + NULL, + (PyArray_ScalarKindFunc*)NULL, + NULL, + NULL, + (PyArray_FastClipFunc*)@from@_fastclip, + (PyArray_FastPutmaskFunc*)@from@_fastputmask, + (PyArray_FastTakeFunc*)@from@_fasttake, + (PyArray_ArgFunc*)@from@_argmin +}; + +/* + * FIXME: check for PY3K + */ +NPY_NO_EXPORT PyArray_Descr @from@_Descr = { + PyObject_HEAD_INIT(&PyArrayDescr_Type) + /* typeobj */ + &Py@NAME@ArrType_Type, + /* kind */ + NPY_@kind@LTR, + /* type */ + NPY_@from@LTR, + /* byteorder */ + '@endian@', + /* flags */ + @isobject@, + /* type_num */ + NPY_@from@, + /* elsize */ + sizeof(@fromtype@), + /* alignment */ + _ALIGN(@fromtype@), + /* subarray */ + NULL, + /* fields */ + NULL, + /* names */ + NULL, + /* f */ + &_Py@NAME@_ArrFuncs, + /* metadata */ + NULL, + /* c_metadata */ + NULL, + /* hash */ + -1, +}; + +/**end repeat**/ + +#define _MAX_LETTER 128 +static char _letter_to_num[_MAX_LETTER]; + +static PyArray_Descr *_builtin_descrs[] = { + &BOOL_Descr, + &BYTE_Descr, + &UBYTE_Descr, + &SHORT_Descr, + &USHORT_Descr, + &INT_Descr, + &UINT_Descr, + &LONG_Descr, + &ULONG_Descr, + &LONGLONG_Descr, + &ULONGLONG_Descr, + &FLOAT_Descr, + &DOUBLE_Descr, + &LONGDOUBLE_Descr, + &CFLOAT_Descr, + &CDOUBLE_Descr, + &CLONGDOUBLE_Descr, + &OBJECT_Descr, + &STRING_Descr, + &UNICODE_Descr, + &VOID_Descr, + &DATETIME_Descr, + &TIMEDELTA_Descr, + &HALF_Descr +}; + +/*NUMPY_API + * Get the PyArray_Descr structure for a type. + */ +NPY_NO_EXPORT PyArray_Descr * +PyArray_DescrFromType(int type) +{ + PyArray_Descr *ret = NULL; + + if (type < NPY_NTYPES) { + ret = _builtin_descrs[type]; + } + else if (type == NPY_NOTYPE) { + /* + * This needs to not raise an error so + * that PyArray_DescrFromType(NPY_NOTYPE) + * works for backwards-compatible C-API + */ + return NULL; + } + else if ((type == NPY_CHAR) || (type == NPY_CHARLTR)) { + if (type == NPY_CHAR) { + /* + * warning added 2017-04-25, 1.13 + * deprecated in 1.7 + * */ + if (DEPRECATE("The NPY_CHAR type_num is deprecated. " + "Please port your code to use " + "NPY_STRING instead.") < 0) { + return NULL; + } + } + ret = PyArray_DescrNew(_builtin_descrs[NPY_STRING]); + if (ret == NULL) { + return NULL; + } + ret->elsize = 1; + ret->type = NPY_CHARLTR; + return ret; + } + else if (PyTypeNum_ISUSERDEF(type)) { + ret = userdescrs[type - NPY_USERDEF]; + } + else { + int num = NPY_NTYPES; + if (type < _MAX_LETTER) { + num = (int) _letter_to_num[type]; + } + if (num >= NPY_NTYPES) { + ret = NULL; + } + else { + ret = _builtin_descrs[num]; + } + } + if (ret == NULL) { + PyErr_SetString(PyExc_ValueError, + "Invalid data-type for array"); + } + else { + Py_INCREF(ret); + } + + return ret; +} + +/* + ***************************************************************************** + ** SETUP TYPE INFO ** + ***************************************************************************** + */ + + +/* + * This function is called during numpy module initialization, + * and is used to initialize internal dtype tables. + */ +NPY_NO_EXPORT int +set_typeinfo(PyObject *dict) +{ + PyObject *infodict, *s; + int i; + + PyArray_Descr *dtype; + PyObject *cobj, *key; + + /* + * Add cast functions for the new types + */ + + /**begin repeat + * + * #name1 = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * OBJECT, STRING, UNICODE, VOID, + * DATETIME,TIMEDELTA# + */ + + /**begin repeat1 + * + * #name2 = HALF, DATETIME, TIMEDELTA# + */ + + dtype = _builtin_descrs[NPY_@name1@]; + if (dtype->f->castdict == NULL) { + dtype->f->castdict = PyDict_New(); + if (dtype->f->castdict == NULL) { + return -1; + } + } + key = PyInt_FromLong(NPY_@name2@); + if (key == NULL) { + return -1; + } + cobj = NpyCapsule_FromVoidPtr((void *)@name1@_to_@name2@, NULL); + if (cobj == NULL) { + Py_DECREF(key); + return -1; + } + if (PyDict_SetItem(dtype->f->castdict, key, cobj) < 0) { + Py_DECREF(key); + Py_DECREF(cobj); + return -1; + } + Py_DECREF(key); + Py_DECREF(cobj); + + /**end repeat1**/ + + /**end repeat**/ + + _builtin_descrs[NPY_DATETIME]->c_metadata = _create_datetime_metadata( + NPY_DATETIME_DEFAULTUNIT, 1); + if (_builtin_descrs[NPY_DATETIME]->c_metadata == NULL) { + return -1; + } + _builtin_descrs[NPY_TIMEDELTA]->c_metadata = _create_datetime_metadata( + NPY_DATETIME_DEFAULTUNIT, 1); + if (_builtin_descrs[NPY_DATETIME]->c_metadata == NULL) { + return -1; + } + + for (i = 0; i < _MAX_LETTER; i++) { + _letter_to_num[i] = NPY_NTYPES; + } + + /**begin repeat + * + * #name = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * INTP, UINTP, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * OBJECT, STRING, UNICODE, VOID, + * DATETIME,TIMEDELTA# + */ + + _letter_to_num[NPY_@name@LTR] = NPY_@name@; + + /**end repeat**/ + + _letter_to_num[NPY_STRINGLTR2] = NPY_STRING; + + /**begin repeat + * #name = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * OBJECT, STRING, UNICODE, VOID, + * DATETIME, TIMEDELTA# + */ + + @name@_Descr.fields = Py_None; + + /**end repeat**/ + + + /**begin repeat + * #name = STRING, UNICODE, VOID# + */ + + PyDataType_MAKEUNSIZED(&@name@_Descr); + + /**end repeat**/ + + /* Set a dictionary with type information */ + infodict = PyDict_New(); + if (infodict == NULL) return -1; + + + /**begin repeat + * + * #name = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * INTP, UINTP, + * LONG, ULONG, LONGLONG, ULONGLONG# + * #uname = BOOL, + * BYTE*2, SHORT*2, INT*2, + * INTP*2, + * LONG*2, LONGLONG*2# + * #Name = Bool, + * Byte, UByte, Short, UShort, Int, UInt, + * Intp, UIntp, + * Long, ULong, LongLong, ULongLong# + * #type = npy_bool, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_intp, npy_uintp, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + * #max= 1, + * NPY_MAX_BYTE, NPY_MAX_UBYTE, NPY_MAX_SHORT, + * NPY_MAX_USHORT, NPY_MAX_INT, PyLong_FromUnsignedLong(NPY_MAX_UINT), + * PyLong_FromLongLong((npy_longlong) NPY_MAX_INTP), + * PyLong_FromUnsignedLongLong((npy_ulonglong) NPY_MAX_UINTP), + * NPY_MAX_LONG, + * PyLong_FromUnsignedLong((npy_ulong) NPY_MAX_ULONG), + * PyLong_FromLongLong((npy_longlong) NPY_MAX_LONGLONG), + * PyLong_FromUnsignedLongLong((npy_ulonglong) NPY_MAX_ULONGLONG)# + * #min = 0, NPY_MIN_BYTE, 0, NPY_MIN_SHORT, 0, NPY_MIN_INT, 0, + * PyLong_FromLongLong((npy_longlong) NPY_MIN_INTP), + * 0, NPY_MIN_LONG, 0, + * PyLong_FromLongLong((npy_longlong) NPY_MIN_LONGLONG), 0# + * #cx = i*6, N, N, N, l, N, N, N# + * #cn = i*7, N, i, l, i, N, i# + */ + + s = PyArray_typeinforanged( + NPY_@name@LTR, NPY_@name@, NPY_BITSOF_@uname@, _ALIGN(@type@), + Py_BuildValue("@cx@", @max@), + Py_BuildValue("@cn@", @min@), + &Py@Name@ArrType_Type + ); + if (s == NULL) { + return -1; + } + PyDict_SetItemString(infodict, "@name@", s); + Py_DECREF(s); + + + /**end repeat**/ + + + /**begin repeat + * + * #type = npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble# + * #name = HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #Name = Half, Float, Double, LongDouble, + * CFloat, CDouble, CLongDouble# + */ + s = PyArray_typeinfo( + NPY_@name@LTR, NPY_@name@, NPY_BITSOF_@name@, + _ALIGN(@type@), &Py@Name@ArrType_Type + ); + if (s == NULL) { + return -1; + } + PyDict_SetItemString(infodict, "@name@", s); + Py_DECREF(s); + + /**end repeat**/ + + s = PyArray_typeinfo( + NPY_OBJECTLTR, NPY_OBJECT, sizeof(PyObject *) * CHAR_BIT, + _ALIGN(PyObject *), + &PyObjectArrType_Type + ); + if (s == NULL) { + return -1; + } + PyDict_SetItemString(infodict, "OBJECT", s); + Py_DECREF(s); + s = PyArray_typeinfo( + NPY_STRINGLTR, NPY_STRING, 0, _ALIGN(char), + &PyStringArrType_Type + ); + if (s == NULL) { + return -1; + } + PyDict_SetItemString(infodict, "STRING", s); + Py_DECREF(s); + s = PyArray_typeinfo( + NPY_UNICODELTR, NPY_UNICODE, 0, _ALIGN(npy_ucs4), + &PyUnicodeArrType_Type + ); + if (s == NULL) { + return -1; + } + PyDict_SetItemString(infodict, "UNICODE", s); + Py_DECREF(s); + s = PyArray_typeinfo( + NPY_VOIDLTR, NPY_VOID, 0, _ALIGN(char), + &PyVoidArrType_Type + ); + if (s == NULL) { + return -1; + } + PyDict_SetItemString(infodict, "VOID", s); + Py_DECREF(s); + s = PyArray_typeinforanged( + NPY_DATETIMELTR, NPY_DATETIME, NPY_BITSOF_DATETIME, + _ALIGN(npy_datetime), + MyPyLong_FromInt64(NPY_MAX_DATETIME), + MyPyLong_FromInt64(NPY_MIN_DATETIME), + &PyDatetimeArrType_Type + ); + if (s == NULL) { + return -1; + } + PyDict_SetItemString(infodict, "DATETIME", s); + Py_DECREF(s); + s = PyArray_typeinforanged( + NPY_TIMEDELTALTR, NPY_TIMEDELTA, NPY_BITSOF_TIMEDELTA, + _ALIGN(npy_timedelta), + MyPyLong_FromInt64(NPY_MAX_TIMEDELTA), + MyPyLong_FromInt64(NPY_MIN_TIMEDELTA), + &PyTimedeltaArrType_Type + ); + if (s == NULL) { + return -1; + } + PyDict_SetItemString(infodict, "TIMEDELTA", s); + Py_DECREF(s); + +#define SETTYPE(name) \ + Py_INCREF(&Py##name##ArrType_Type); \ + PyDict_SetItemString(infodict, #name, \ + (PyObject *)&Py##name##ArrType_Type) + + SETTYPE(Generic); + SETTYPE(Number); + SETTYPE(Integer); + SETTYPE(Inexact); + SETTYPE(SignedInteger); + SETTYPE(UnsignedInteger); + SETTYPE(Floating); + SETTYPE(ComplexFloating); + SETTYPE(Flexible); + SETTYPE(Character); + +#undef SETTYPE + + PyDict_SetItemString(dict, "typeinfo", infodict); + Py_DECREF(infodict); + return 0; +} + +#undef _MAX_LETTER diff --git a/contrib/python/numpy/py2/numpy/core/src/multiarray/einsum.c.src b/contrib/python/numpy/py2/numpy/core/src/multiarray/einsum.c.src new file mode 100644 index 0000000000..58af440919 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/multiarray/einsum.c.src @@ -0,0 +1,2890 @@ +/* + * This file contains the implementation of the 'einsum' function, + * which provides an einstein-summation operation. + * + * Copyright (c) 2011 by Mark Wiebe (mwwiebe@gmail.com) + * The University of British Columbia + * + * See LICENSE.txt for the license. + */ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE +#include <numpy/npy_common.h> +#include <numpy/arrayobject.h> +#include <numpy/halffloat.h> +#include <npy_pycompat.h> + +#include <ctype.h> + +#include "convert.h" +#include "common.h" +#include "ctors.h" + +#ifdef NPY_HAVE_SSE_INTRINSICS +#define EINSUM_USE_SSE1 1 +#else +#define EINSUM_USE_SSE1 0 +#endif + +/* + * TODO: Only some SSE2 for float64 is implemented. + */ +#ifdef NPY_HAVE_SSE2_INTRINSICS +#define EINSUM_USE_SSE2 1 +#else +#define EINSUM_USE_SSE2 0 +#endif + +#if EINSUM_USE_SSE1 +#include <xmmintrin.h> +#endif + +#if EINSUM_USE_SSE2 +#include <emmintrin.h> +#endif + +#define EINSUM_IS_SSE_ALIGNED(x) ((((npy_intp)x)&0xf) == 0) + +/********** PRINTF DEBUG TRACING **************/ +#define NPY_EINSUM_DBG_TRACING 0 + +#if NPY_EINSUM_DBG_TRACING +#define NPY_EINSUM_DBG_PRINT(s) printf("%s", s); +#define NPY_EINSUM_DBG_PRINT1(s, p1) printf(s, p1); +#define NPY_EINSUM_DBG_PRINT2(s, p1, p2) printf(s, p1, p2); +#define NPY_EINSUM_DBG_PRINT3(s, p1, p2, p3) printf(s); +#else +#define NPY_EINSUM_DBG_PRINT(s) +#define NPY_EINSUM_DBG_PRINT1(s, p1) +#define NPY_EINSUM_DBG_PRINT2(s, p1, p2) +#define NPY_EINSUM_DBG_PRINT3(s, p1, p2, p3) +#endif +/**********************************************/ + +/**begin repeat + * #name = byte, short, int, long, longlong, + * ubyte, ushort, uint, ulong, ulonglong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble# + * #temptype = npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_float, npy_float, npy_double, npy_longdouble, + * npy_float, npy_double, npy_longdouble# + * #to = ,,,,, + * ,,,,, + * npy_float_to_half,,,, + * ,,# + * #from = ,,,,, + * ,,,,, + * npy_half_to_float,,,, + * ,,# + * #complex = 0*5, + * 0*5, + * 0*4, + * 1*3# + * #float32 = 0*5, + * 0*5, + * 0,1,0,0, + * 0*3# + * #float64 = 0*5, + * 0*5, + * 0,0,1,0, + * 0*3# + */ + +/**begin repeat1 + * #nop = 1, 2, 3, 1000# + * #noplabel = one, two, three, any# + */ +static void +@name@_sum_of_products_@noplabel@(int nop, char **dataptr, + npy_intp *strides, npy_intp count) +{ +#if (@nop@ == 1) || (@nop@ <= 3 && !@complex@) + char *data0 = dataptr[0]; + npy_intp stride0 = strides[0]; +#endif +#if (@nop@ == 2 || @nop@ == 3) && !@complex@ + char *data1 = dataptr[1]; + npy_intp stride1 = strides[1]; +#endif +#if (@nop@ == 3) && !@complex@ + char *data2 = dataptr[2]; + npy_intp stride2 = strides[2]; +#endif +#if (@nop@ == 1) || (@nop@ <= 3 && !@complex@) + char *data_out = dataptr[@nop@]; + npy_intp stride_out = strides[@nop@]; +#endif + + NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_@noplabel@ (%d)\n", (int)count); + + while (count--) { +#if !@complex@ +# if @nop@ == 1 + *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) + + @from@(*(@type@ *)data_out)); + data0 += stride0; + data_out += stride_out; +# elif @nop@ == 2 + *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) * + @from@(*(@type@ *)data1) + + @from@(*(@type@ *)data_out)); + data0 += stride0; + data1 += stride1; + data_out += stride_out; +# elif @nop@ == 3 + *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) * + @from@(*(@type@ *)data1) * + @from@(*(@type@ *)data2) + + @from@(*(@type@ *)data_out)); + data0 += stride0; + data1 += stride1; + data2 += stride2; + data_out += stride_out; +# else + @temptype@ temp = @from@(*(@type@ *)dataptr[0]); + int i; + for (i = 1; i < nop; ++i) { + temp *= @from@(*(@type@ *)dataptr[i]); + } + *(@type@ *)dataptr[nop] = @to@(temp + + @from@(*(@type@ *)dataptr[i])); + for (i = 0; i <= nop; ++i) { + dataptr[i] += strides[i]; + } +# endif +#else /* complex */ +# if @nop@ == 1 + ((@temptype@ *)data_out)[0] = ((@temptype@ *)data0)[0] + + ((@temptype@ *)data_out)[0]; + ((@temptype@ *)data_out)[1] = ((@temptype@ *)data0)[1] + + ((@temptype@ *)data_out)[1]; + data0 += stride0; + data_out += stride_out; +# else +# if @nop@ <= 3 +#define _SUMPROD_NOP @nop@ +# else +#define _SUMPROD_NOP nop +# endif + @temptype@ re, im, tmp; + int i; + re = ((@temptype@ *)dataptr[0])[0]; + im = ((@temptype@ *)dataptr[0])[1]; + for (i = 1; i < _SUMPROD_NOP; ++i) { + tmp = re * ((@temptype@ *)dataptr[i])[0] - + im * ((@temptype@ *)dataptr[i])[1]; + im = re * ((@temptype@ *)dataptr[i])[1] + + im * ((@temptype@ *)dataptr[i])[0]; + re = tmp; + } + ((@temptype@ *)dataptr[_SUMPROD_NOP])[0] = re + + ((@temptype@ *)dataptr[_SUMPROD_NOP])[0]; + ((@temptype@ *)dataptr[_SUMPROD_NOP])[1] = im + + ((@temptype@ *)dataptr[_SUMPROD_NOP])[1]; + + for (i = 0; i <= _SUMPROD_NOP; ++i) { + dataptr[i] += strides[i]; + } +#undef _SUMPROD_NOP +# endif +#endif + } +} + +#if @nop@ == 1 + +static void +@name@_sum_of_products_contig_one(int nop, char **dataptr, + npy_intp *NPY_UNUSED(strides), npy_intp count) +{ + @type@ *data0 = (@type@ *)dataptr[0]; + @type@ *data_out = (@type@ *)dataptr[1]; + + NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_one (%d)\n", + (int)count); + +/* This is placed before the main loop to make small counts faster */ +finish_after_unrolled_loop: + switch (count) { +/**begin repeat2 + * #i = 6, 5, 4, 3, 2, 1, 0# + */ + case @i@+1: +#if !@complex@ + data_out[@i@] = @to@(@from@(data0[@i@]) + + @from@(data_out[@i@])); +#else + ((@temptype@ *)data_out + 2*@i@)[0] = + ((@temptype@ *)data0 + 2*@i@)[0] + + ((@temptype@ *)data_out + 2*@i@)[0]; + ((@temptype@ *)data_out + 2*@i@)[1] = + ((@temptype@ *)data0 + 2*@i@)[1] + + ((@temptype@ *)data_out + 2*@i@)[1]; +#endif +/**end repeat2**/ + case 0: + return; + } + + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +/**begin repeat2 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ +#if !@complex@ + data_out[@i@] = @to@(@from@(data0[@i@]) + + @from@(data_out[@i@])); +#else /* complex */ + ((@temptype@ *)data_out + 2*@i@)[0] = + ((@temptype@ *)data0 + 2*@i@)[0] + + ((@temptype@ *)data_out + 2*@i@)[0]; + ((@temptype@ *)data_out + 2*@i@)[1] = + ((@temptype@ *)data0 + 2*@i@)[1] + + ((@temptype@ *)data_out + 2*@i@)[1]; +#endif +/**end repeat2**/ + data0 += 8; + data_out += 8; + } + + /* Finish off the loop */ + goto finish_after_unrolled_loop; +} + +#elif @nop@ == 2 && !@complex@ + +static void +@name@_sum_of_products_contig_two(int nop, char **dataptr, + npy_intp *NPY_UNUSED(strides), npy_intp count) +{ + @type@ *data0 = (@type@ *)dataptr[0]; + @type@ *data1 = (@type@ *)dataptr[1]; + @type@ *data_out = (@type@ *)dataptr[2]; + +#if EINSUM_USE_SSE1 && @float32@ + __m128 a, b; +#endif + + NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_two (%d)\n", + (int)count); + +/* This is placed before the main loop to make small counts faster */ +finish_after_unrolled_loop: + switch (count) { +/**begin repeat2 + * #i = 6, 5, 4, 3, 2, 1, 0# + */ + case @i@+1: + data_out[@i@] = @to@(@from@(data0[@i@]) * + @from@(data1[@i@]) + + @from@(data_out[@i@])); +/**end repeat2**/ + case 0: + return; + } + +#if EINSUM_USE_SSE1 && @float32@ + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1) && + EINSUM_IS_SSE_ALIGNED(data_out)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +/**begin repeat2 + * #i = 0, 4# + */ + a = _mm_mul_ps(_mm_load_ps(data0+@i@), _mm_load_ps(data1+@i@)); + b = _mm_add_ps(a, _mm_load_ps(data_out+@i@)); + _mm_store_ps(data_out+@i@, b); +/**end repeat2**/ + data0 += 8; + data1 += 8; + data_out += 8; + } + + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } +#endif + + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +#if EINSUM_USE_SSE1 && @float32@ +/**begin repeat2 + * #i = 0, 4# + */ + a = _mm_mul_ps(_mm_loadu_ps(data0+@i@), _mm_loadu_ps(data1+@i@)); + b = _mm_add_ps(a, _mm_loadu_ps(data_out+@i@)); + _mm_storeu_ps(data_out+@i@, b); +/**end repeat2**/ +#else +/**begin repeat2 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ + data_out[@i@] = @to@(@from@(data0[@i@]) * + @from@(data1[@i@]) + + @from@(data_out[@i@])); +/**end repeat2**/ +#endif + data0 += 8; + data1 += 8; + data_out += 8; + } + + /* Finish off the loop */ + goto finish_after_unrolled_loop; +} + +/* Some extra specializations for the two operand case */ +static void +@name@_sum_of_products_stride0_contig_outcontig_two(int nop, char **dataptr, + npy_intp *NPY_UNUSED(strides), npy_intp count) +{ + @temptype@ value0 = @from@(*(@type@ *)dataptr[0]); + @type@ *data1 = (@type@ *)dataptr[1]; + @type@ *data_out = (@type@ *)dataptr[2]; + +#if EINSUM_USE_SSE1 && @float32@ + __m128 a, b, value0_sse; +#elif EINSUM_USE_SSE2 && @float64@ + __m128d a, b, value0_sse; +#endif + + NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_stride0_contig_outcontig_two (%d)\n", + (int)count); + +/* This is placed before the main loop to make small counts faster */ +finish_after_unrolled_loop: + switch (count) { +/**begin repeat2 + * #i = 6, 5, 4, 3, 2, 1, 0# + */ + case @i@+1: + data_out[@i@] = @to@(value0 * + @from@(data1[@i@]) + + @from@(data_out[@i@])); +/**end repeat2**/ + case 0: + return; + } + +#if EINSUM_USE_SSE1 && @float32@ + value0_sse = _mm_set_ps1(value0); + + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data1) && EINSUM_IS_SSE_ALIGNED(data_out)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +/**begin repeat2 + * #i = 0, 4# + */ + a = _mm_mul_ps(value0_sse, _mm_load_ps(data1+@i@)); + b = _mm_add_ps(a, _mm_load_ps(data_out+@i@)); + _mm_store_ps(data_out+@i@, b); +/**end repeat2**/ + data1 += 8; + data_out += 8; + } + + /* Finish off the loop */ + if (count > 0) { + goto finish_after_unrolled_loop; + } + else { + return; + } + } +#elif EINSUM_USE_SSE2 && @float64@ + value0_sse = _mm_set1_pd(value0); + + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data1) && EINSUM_IS_SSE_ALIGNED(data_out)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + a = _mm_mul_pd(value0_sse, _mm_load_pd(data1+@i@)); + b = _mm_add_pd(a, _mm_load_pd(data_out+@i@)); + _mm_store_pd(data_out+@i@, b); +/**end repeat2**/ + data1 += 8; + data_out += 8; + } + + /* Finish off the loop */ + if (count > 0) { + goto finish_after_unrolled_loop; + } + else { + return; + } + } +#endif + + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +#if EINSUM_USE_SSE1 && @float32@ +/**begin repeat2 + * #i = 0, 4# + */ + a = _mm_mul_ps(value0_sse, _mm_loadu_ps(data1+@i@)); + b = _mm_add_ps(a, _mm_loadu_ps(data_out+@i@)); + _mm_storeu_ps(data_out+@i@, b); +/**end repeat2**/ +#elif EINSUM_USE_SSE2 && @float64@ +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + a = _mm_mul_pd(value0_sse, _mm_loadu_pd(data1+@i@)); + b = _mm_add_pd(a, _mm_loadu_pd(data_out+@i@)); + _mm_storeu_pd(data_out+@i@, b); +/**end repeat2**/ +#else +/**begin repeat2 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ + data_out[@i@] = @to@(value0 * + @from@(data1[@i@]) + + @from@(data_out[@i@])); +/**end repeat2**/ +#endif + data1 += 8; + data_out += 8; + } + + /* Finish off the loop */ + if (count > 0) { + goto finish_after_unrolled_loop; + } +} + +static void +@name@_sum_of_products_contig_stride0_outcontig_two(int nop, char **dataptr, + npy_intp *NPY_UNUSED(strides), npy_intp count) +{ + @type@ *data0 = (@type@ *)dataptr[0]; + @temptype@ value1 = @from@(*(@type@ *)dataptr[1]); + @type@ *data_out = (@type@ *)dataptr[2]; + +#if EINSUM_USE_SSE1 && @float32@ + __m128 a, b, value1_sse; +#endif + + NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_stride0_outcontig_two (%d)\n", + (int)count); + +/* This is placed before the main loop to make small counts faster */ +finish_after_unrolled_loop: + switch (count) { +/**begin repeat2 + * #i = 6, 5, 4, 3, 2, 1, 0# + */ + case @i@+1: + data_out[@i@] = @to@(@from@(data0[@i@])* + value1 + + @from@(data_out[@i@])); +/**end repeat2**/ + case 0: + return; + } + +#if EINSUM_USE_SSE1 && @float32@ + value1_sse = _mm_set_ps1(value1); + + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data_out)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +/**begin repeat2 + * #i = 0, 4# + */ + a = _mm_mul_ps(_mm_load_ps(data0+@i@), value1_sse); + b = _mm_add_ps(a, _mm_load_ps(data_out+@i@)); + _mm_store_ps(data_out+@i@, b); +/**end repeat2**/ + data0 += 8; + data_out += 8; + } + + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } +#endif + + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +#if EINSUM_USE_SSE1 && @float32@ +/**begin repeat2 + * #i = 0, 4# + */ + a = _mm_mul_ps(_mm_loadu_ps(data0+@i@), value1_sse); + b = _mm_add_ps(a, _mm_loadu_ps(data_out+@i@)); + _mm_storeu_ps(data_out+@i@, b); +/**end repeat2**/ +#else +/**begin repeat2 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ + data_out[@i@] = @to@(@from@(data0[@i@])* + value1 + + @from@(data_out[@i@])); +/**end repeat2**/ +#endif + data0 += 8; + data_out += 8; + } + + /* Finish off the loop */ + goto finish_after_unrolled_loop; +} + +static void +@name@_sum_of_products_contig_contig_outstride0_two(int nop, char **dataptr, + npy_intp *NPY_UNUSED(strides), npy_intp count) +{ + @type@ *data0 = (@type@ *)dataptr[0]; + @type@ *data1 = (@type@ *)dataptr[1]; + @temptype@ accum = 0; + +#if EINSUM_USE_SSE1 && @float32@ + __m128 a, accum_sse = _mm_setzero_ps(); +#elif EINSUM_USE_SSE2 && @float64@ + __m128d a, accum_sse = _mm_setzero_pd(); +#endif + + NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_contig_outstride0_two (%d)\n", + (int)count); + +/* This is placed before the main loop to make small counts faster */ +finish_after_unrolled_loop: + switch (count) { +/**begin repeat2 + * #i = 6, 5, 4, 3, 2, 1, 0# + */ + case @i@+1: + accum += @from@(data0[@i@]) * @from@(data1[@i@]); +/**end repeat2**/ + case 0: + *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + accum); + return; + } + +#if EINSUM_USE_SSE1 && @float32@ + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + + _mm_prefetch(data0 + 512, _MM_HINT_T0); + _mm_prefetch(data1 + 512, _MM_HINT_T0); + +/**begin repeat2 + * #i = 0, 4# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + a = _mm_mul_ps(_mm_load_ps(data0+@i@), _mm_load_ps(data1+@i@)); + accum_sse = _mm_add_ps(accum_sse, a); +/**end repeat2**/ + data0 += 8; + data1 += 8; + } + + /* Add the four SSE values and put in accum */ + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1)); + accum_sse = _mm_add_ps(a, accum_sse); + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); + accum_sse = _mm_add_ps(a, accum_sse); + _mm_store_ss(&accum, accum_sse); + + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } +#elif EINSUM_USE_SSE2 && @float64@ + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + + _mm_prefetch(data0 + 512, _MM_HINT_T0); + _mm_prefetch(data1 + 512, _MM_HINT_T0); + +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + a = _mm_mul_pd(_mm_load_pd(data0+@i@), _mm_load_pd(data1+@i@)); + accum_sse = _mm_add_pd(accum_sse, a); +/**end repeat2**/ + data0 += 8; + data1 += 8; + } + + /* Add the two SSE2 values and put in accum */ + a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1)); + accum_sse = _mm_add_pd(a, accum_sse); + _mm_store_sd(&accum, accum_sse); + + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } +#endif + + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +#if EINSUM_USE_SSE1 && @float32@ + _mm_prefetch(data0 + 512, _MM_HINT_T0); + _mm_prefetch(data1 + 512, _MM_HINT_T0); + +/**begin repeat2 + * #i = 0, 4# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + a = _mm_mul_ps(_mm_loadu_ps(data0+@i@), _mm_loadu_ps(data1+@i@)); + accum_sse = _mm_add_ps(accum_sse, a); +/**end repeat2**/ +#elif EINSUM_USE_SSE2 && @float64@ + _mm_prefetch(data0 + 512, _MM_HINT_T0); + _mm_prefetch(data1 + 512, _MM_HINT_T0); + +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + a = _mm_mul_pd(_mm_loadu_pd(data0+@i@), _mm_loadu_pd(data1+@i@)); + accum_sse = _mm_add_pd(accum_sse, a); +/**end repeat2**/ +#else +/**begin repeat2 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ + accum += @from@(data0[@i@]) * @from@(data1[@i@]); +/**end repeat2**/ +#endif + data0 += 8; + data1 += 8; + } + +#if EINSUM_USE_SSE1 && @float32@ + /* Add the four SSE values and put in accum */ + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1)); + accum_sse = _mm_add_ps(a, accum_sse); + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); + accum_sse = _mm_add_ps(a, accum_sse); + _mm_store_ss(&accum, accum_sse); +#elif EINSUM_USE_SSE2 && @float64@ + /* Add the two SSE2 values and put in accum */ + a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1)); + accum_sse = _mm_add_pd(a, accum_sse); + _mm_store_sd(&accum, accum_sse); +#endif + + /* Finish off the loop */ + goto finish_after_unrolled_loop; +} + +static void +@name@_sum_of_products_stride0_contig_outstride0_two(int nop, char **dataptr, + npy_intp *NPY_UNUSED(strides), npy_intp count) +{ + @temptype@ value0 = @from@(*(@type@ *)dataptr[0]); + @type@ *data1 = (@type@ *)dataptr[1]; + @temptype@ accum = 0; + +#if EINSUM_USE_SSE1 && @float32@ + __m128 a, accum_sse = _mm_setzero_ps(); +#endif + + NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_stride0_contig_outstride0_two (%d)\n", + (int)count); + +/* This is placed before the main loop to make small counts faster */ +finish_after_unrolled_loop: + switch (count) { +/**begin repeat2 + * #i = 6, 5, 4, 3, 2, 1, 0# + */ + case @i@+1: + accum += @from@(data1[@i@]); +/**end repeat2**/ + case 0: + *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + value0 * accum); + return; + } + +#if EINSUM_USE_SSE1 && @float32@ + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data1)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +/**begin repeat2 + * #i = 0, 4# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_ps(accum_sse, _mm_load_ps(data1+@i@)); +/**end repeat2**/ + data1 += 8; + } + +#if EINSUM_USE_SSE1 && @float32@ + /* Add the four SSE values and put in accum */ + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1)); + accum_sse = _mm_add_ps(a, accum_sse); + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); + accum_sse = _mm_add_ps(a, accum_sse); + _mm_store_ss(&accum, accum_sse); +#endif + + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } +#endif + + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +#if EINSUM_USE_SSE1 && @float32@ +/**begin repeat2 + * #i = 0, 4# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_ps(accum_sse, _mm_loadu_ps(data1+@i@)); +/**end repeat2**/ +#else +/**begin repeat2 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ + accum += @from@(data1[@i@]); +/**end repeat2**/ +#endif + data1 += 8; + } + +#if EINSUM_USE_SSE1 && @float32@ + /* Add the four SSE values and put in accum */ + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1)); + accum_sse = _mm_add_ps(a, accum_sse); + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); + accum_sse = _mm_add_ps(a, accum_sse); + _mm_store_ss(&accum, accum_sse); +#endif + + /* Finish off the loop */ + goto finish_after_unrolled_loop; +} + +static void +@name@_sum_of_products_contig_stride0_outstride0_two(int nop, char **dataptr, + npy_intp *NPY_UNUSED(strides), npy_intp count) +{ + @type@ *data0 = (@type@ *)dataptr[0]; + @temptype@ value1 = @from@(*(@type@ *)dataptr[1]); + @temptype@ accum = 0; + +#if EINSUM_USE_SSE1 && @float32@ + __m128 a, accum_sse = _mm_setzero_ps(); +#endif + + NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_stride0_outstride0_two (%d)\n", + (int)count); + +/* This is placed before the main loop to make small counts faster */ +finish_after_unrolled_loop: + switch (count) { +/**begin repeat2 + * #i = 6, 5, 4, 3, 2, 1, 0# + */ + case @i@+1: + accum += @from@(data0[@i@]); +/**end repeat2**/ + case 0: + *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + accum * value1); + return; + } + +#if EINSUM_USE_SSE1 && @float32@ + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data0)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +/**begin repeat2 + * #i = 0, 4# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_ps(accum_sse, _mm_load_ps(data0+@i@)); +/**end repeat2**/ + data0 += 8; + } + +#if EINSUM_USE_SSE1 && @float32@ + /* Add the four SSE values and put in accum */ + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1)); + accum_sse = _mm_add_ps(a, accum_sse); + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); + accum_sse = _mm_add_ps(a, accum_sse); + _mm_store_ss(&accum, accum_sse); +#endif + + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } +#endif + + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +#if EINSUM_USE_SSE1 && @float32@ +/**begin repeat2 + * #i = 0, 4# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_ps(accum_sse, _mm_loadu_ps(data0+@i@)); +/**end repeat2**/ +#else +/**begin repeat2 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ + accum += @from@(data0[@i@]); +/**end repeat2**/ +#endif + data0 += 8; + } + +#if EINSUM_USE_SSE1 && @float32@ + /* Add the four SSE values and put in accum */ + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1)); + accum_sse = _mm_add_ps(a, accum_sse); + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); + accum_sse = _mm_add_ps(a, accum_sse); + _mm_store_ss(&accum, accum_sse); +#endif + + /* Finish off the loop */ + goto finish_after_unrolled_loop; +} + +#elif @nop@ == 3 && !@complex@ + +static void +@name@_sum_of_products_contig_three(int nop, char **dataptr, + npy_intp *NPY_UNUSED(strides), npy_intp count) +{ + @type@ *data0 = (@type@ *)dataptr[0]; + @type@ *data1 = (@type@ *)dataptr[1]; + @type@ *data2 = (@type@ *)dataptr[2]; + @type@ *data_out = (@type@ *)dataptr[3]; + + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +/**begin repeat2 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ + data_out[@i@] = @to@(@from@(data0[@i@]) * + @from@(data1[@i@]) * + @from@(data2[@i@]) + + @from@(data_out[@i@])); +/**end repeat2**/ + data0 += 8; + data1 += 8; + data2 += 8; + data_out += 8; + } + + /* Finish off the loop */ + +/**begin repeat2 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ + if (count-- == 0) { + return; + } + data_out[@i@] = @to@(@from@(data0[@i@]) * + @from@(data1[@i@]) * + @from@(data2[@i@]) + + @from@(data_out[@i@])); +/**end repeat2**/ +} + +#else /* @nop@ > 3 || @complex */ + +static void +@name@_sum_of_products_contig_@noplabel@(int nop, char **dataptr, + npy_intp *NPY_UNUSED(strides), npy_intp count) +{ + NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_@noplabel@ (%d)\n", + (int)count); + + while (count--) { +#if !@complex@ + @temptype@ temp = @from@(*(@type@ *)dataptr[0]); + int i; + for (i = 1; i < nop; ++i) { + temp *= @from@(*(@type@ *)dataptr[i]); + } + *(@type@ *)dataptr[nop] = @to@(temp + + @from@(*(@type@ *)dataptr[i])); + for (i = 0; i <= nop; ++i) { + dataptr[i] += sizeof(@type@); + } +#else /* complex */ +# if @nop@ <= 3 +# define _SUMPROD_NOP @nop@ +# else +# define _SUMPROD_NOP nop +# endif + @temptype@ re, im, tmp; + int i; + re = ((@temptype@ *)dataptr[0])[0]; + im = ((@temptype@ *)dataptr[0])[1]; + for (i = 1; i < _SUMPROD_NOP; ++i) { + tmp = re * ((@temptype@ *)dataptr[i])[0] - + im * ((@temptype@ *)dataptr[i])[1]; + im = re * ((@temptype@ *)dataptr[i])[1] + + im * ((@temptype@ *)dataptr[i])[0]; + re = tmp; + } + ((@temptype@ *)dataptr[_SUMPROD_NOP])[0] = re + + ((@temptype@ *)dataptr[_SUMPROD_NOP])[0]; + ((@temptype@ *)dataptr[_SUMPROD_NOP])[1] = im + + ((@temptype@ *)dataptr[_SUMPROD_NOP])[1]; + + for (i = 0; i <= _SUMPROD_NOP; ++i) { + dataptr[i] += sizeof(@type@); + } +# undef _SUMPROD_NOP +#endif + } +} + +#endif /* functions for various @nop@ */ + +#if @nop@ == 1 + +static void +@name@_sum_of_products_contig_outstride0_one(int nop, char **dataptr, + npy_intp *strides, npy_intp count) +{ +#if @complex@ + @temptype@ accum_re = 0, accum_im = 0; + @temptype@ *data0 = (@temptype@ *)dataptr[0]; +#else + @temptype@ accum = 0; + @type@ *data0 = (@type@ *)dataptr[0]; +#endif + +#if EINSUM_USE_SSE1 && @float32@ + __m128 a, accum_sse = _mm_setzero_ps(); +#elif EINSUM_USE_SSE2 && @float64@ + __m128d a, accum_sse = _mm_setzero_pd(); +#endif + + + NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_outstride0_one (%d)\n", + (int)count); + +/* This is placed before the main loop to make small counts faster */ +finish_after_unrolled_loop: + switch (count) { +/**begin repeat2 + * #i = 6, 5, 4, 3, 2, 1, 0# + */ + case @i@+1: +#if !@complex@ + accum += @from@(data0[@i@]); +#else /* complex */ + accum_re += data0[2*@i@+0]; + accum_im += data0[2*@i@+1]; +#endif +/**end repeat2**/ + case 0: +#if @complex@ + ((@temptype@ *)dataptr[1])[0] += accum_re; + ((@temptype@ *)dataptr[1])[1] += accum_im; +#else + *((@type@ *)dataptr[1]) = @to@(accum + + @from@(*((@type@ *)dataptr[1]))); +#endif + return; + } + +#if EINSUM_USE_SSE1 && @float32@ + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data0)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + + _mm_prefetch(data0 + 512, _MM_HINT_T0); + +/**begin repeat2 + * #i = 0, 4# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_ps(accum_sse, _mm_load_ps(data0+@i@)); +/**end repeat2**/ + data0 += 8; + } + + /* Add the four SSE values and put in accum */ + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1)); + accum_sse = _mm_add_ps(a, accum_sse); + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); + accum_sse = _mm_add_ps(a, accum_sse); + _mm_store_ss(&accum, accum_sse); + + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } +#elif EINSUM_USE_SSE2 && @float64@ + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data0)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + + _mm_prefetch(data0 + 512, _MM_HINT_T0); + +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_pd(accum_sse, _mm_load_pd(data0+@i@)); +/**end repeat2**/ + data0 += 8; + } + + /* Add the two SSE2 values and put in accum */ + a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1)); + accum_sse = _mm_add_pd(a, accum_sse); + _mm_store_sd(&accum, accum_sse); + + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } +#endif + + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +#if EINSUM_USE_SSE1 && @float32@ + _mm_prefetch(data0 + 512, _MM_HINT_T0); + +/**begin repeat2 + * #i = 0, 4# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_ps(accum_sse, _mm_loadu_ps(data0+@i@)); +/**end repeat2**/ +#elif EINSUM_USE_SSE2 && @float64@ + _mm_prefetch(data0 + 512, _MM_HINT_T0); + +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_pd(accum_sse, _mm_loadu_pd(data0+@i@)); +/**end repeat2**/ +#else +/**begin repeat2 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ +# if !@complex@ + accum += @from@(data0[@i@]); +# else /* complex */ + accum_re += data0[2*@i@+0]; + accum_im += data0[2*@i@+1]; +# endif +/**end repeat2**/ +#endif + +#if !@complex@ + data0 += 8; +#else + data0 += 8*2; +#endif + } + +#if EINSUM_USE_SSE1 && @float32@ + /* Add the four SSE values and put in accum */ + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1)); + accum_sse = _mm_add_ps(a, accum_sse); + a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); + accum_sse = _mm_add_ps(a, accum_sse); + _mm_store_ss(&accum, accum_sse); +#elif EINSUM_USE_SSE2 && @float64@ + /* Add the two SSE2 values and put in accum */ + a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1)); + accum_sse = _mm_add_pd(a, accum_sse); + _mm_store_sd(&accum, accum_sse); +#endif + + /* Finish off the loop */ + goto finish_after_unrolled_loop; +} + +#endif /* @nop@ == 1 */ + +static void +@name@_sum_of_products_outstride0_@noplabel@(int nop, char **dataptr, + npy_intp *strides, npy_intp count) +{ +#if @complex@ + @temptype@ accum_re = 0, accum_im = 0; +#else + @temptype@ accum = 0; +#endif + +#if (@nop@ == 1) || (@nop@ <= 3 && !@complex@) + char *data0 = dataptr[0]; + npy_intp stride0 = strides[0]; +#endif +#if (@nop@ == 2 || @nop@ == 3) && !@complex@ + char *data1 = dataptr[1]; + npy_intp stride1 = strides[1]; +#endif +#if (@nop@ == 3) && !@complex@ + char *data2 = dataptr[2]; + npy_intp stride2 = strides[2]; +#endif + + NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_outstride0_@noplabel@ (%d)\n", + (int)count); + + while (count--) { +#if !@complex@ +# if @nop@ == 1 + accum += @from@(*(@type@ *)data0); + data0 += stride0; +# elif @nop@ == 2 + accum += @from@(*(@type@ *)data0) * + @from@(*(@type@ *)data1); + data0 += stride0; + data1 += stride1; +# elif @nop@ == 3 + accum += @from@(*(@type@ *)data0) * + @from@(*(@type@ *)data1) * + @from@(*(@type@ *)data2); + data0 += stride0; + data1 += stride1; + data2 += stride2; +# else + @temptype@ temp = @from@(*(@type@ *)dataptr[0]); + int i; + for (i = 1; i < nop; ++i) { + temp *= @from@(*(@type@ *)dataptr[i]); + } + accum += temp; + for (i = 0; i < nop; ++i) { + dataptr[i] += strides[i]; + } +# endif +#else /* complex */ +# if @nop@ == 1 + accum_re += ((@temptype@ *)data0)[0]; + accum_im += ((@temptype@ *)data0)[1]; + data0 += stride0; +# else +# if @nop@ <= 3 +#define _SUMPROD_NOP @nop@ +# else +#define _SUMPROD_NOP nop +# endif + @temptype@ re, im, tmp; + int i; + re = ((@temptype@ *)dataptr[0])[0]; + im = ((@temptype@ *)dataptr[0])[1]; + for (i = 1; i < _SUMPROD_NOP; ++i) { + tmp = re * ((@temptype@ *)dataptr[i])[0] - + im * ((@temptype@ *)dataptr[i])[1]; + im = re * ((@temptype@ *)dataptr[i])[1] + + im * ((@temptype@ *)dataptr[i])[0]; + re = tmp; + } + accum_re += re; + accum_im += im; + for (i = 0; i < _SUMPROD_NOP; ++i) { + dataptr[i] += strides[i]; + } +#undef _SUMPROD_NOP +# endif +#endif + } + +#if @complex@ +# if @nop@ <= 3 + ((@temptype@ *)dataptr[@nop@])[0] += accum_re; + ((@temptype@ *)dataptr[@nop@])[1] += accum_im; +# else + ((@temptype@ *)dataptr[nop])[0] += accum_re; + ((@temptype@ *)dataptr[nop])[1] += accum_im; +# endif +#else +# if @nop@ <= 3 + *((@type@ *)dataptr[@nop@]) = @to@(accum + + @from@(*((@type@ *)dataptr[@nop@]))); +# else + *((@type@ *)dataptr[nop]) = @to@(accum + + @from@(*((@type@ *)dataptr[nop]))); +# endif +#endif + +} + +/**end repeat1**/ + +/**end repeat**/ + + +/* Do OR of ANDs for the boolean type */ + +/**begin repeat + * #nop = 1, 2, 3, 1000# + * #noplabel = one, two, three, any# + */ + +static void +bool_sum_of_products_@noplabel@(int nop, char **dataptr, + npy_intp *strides, npy_intp count) +{ +#if (@nop@ <= 3) + char *data0 = dataptr[0]; + npy_intp stride0 = strides[0]; +#endif +#if (@nop@ == 2 || @nop@ == 3) + char *data1 = dataptr[1]; + npy_intp stride1 = strides[1]; +#endif +#if (@nop@ == 3) + char *data2 = dataptr[2]; + npy_intp stride2 = strides[2]; +#endif +#if (@nop@ <= 3) + char *data_out = dataptr[@nop@]; + npy_intp stride_out = strides[@nop@]; +#endif + + while (count--) { +#if @nop@ == 1 + *(npy_bool *)data_out = *(npy_bool *)data0 || + *(npy_bool *)data_out; + data0 += stride0; + data_out += stride_out; +#elif @nop@ == 2 + *(npy_bool *)data_out = (*(npy_bool *)data0 && + *(npy_bool *)data1) || + *(npy_bool *)data_out; + data0 += stride0; + data1 += stride1; + data_out += stride_out; +#elif @nop@ == 3 + *(npy_bool *)data_out = (*(npy_bool *)data0 && + *(npy_bool *)data1 && + *(npy_bool *)data2) || + *(npy_bool *)data_out; + data0 += stride0; + data1 += stride1; + data2 += stride2; + data_out += stride_out; +#else + npy_bool temp = *(npy_bool *)dataptr[0]; + int i; + for (i = 1; i < nop; ++i) { + temp = temp && *(npy_bool *)dataptr[i]; + } + *(npy_bool *)dataptr[nop] = temp || *(npy_bool *)dataptr[i]; + for (i = 0; i <= nop; ++i) { + dataptr[i] += strides[i]; + } +#endif + } +} + +static void +bool_sum_of_products_contig_@noplabel@(int nop, char **dataptr, + npy_intp *strides, npy_intp count) +{ +#if (@nop@ <= 3) + char *data0 = dataptr[0]; +#endif +#if (@nop@ == 2 || @nop@ == 3) + char *data1 = dataptr[1]; +#endif +#if (@nop@ == 3) + char *data2 = dataptr[2]; +#endif +#if (@nop@ <= 3) + char *data_out = dataptr[@nop@]; +#endif + +#if (@nop@ <= 3) +/* This is placed before the main loop to make small counts faster */ +finish_after_unrolled_loop: + switch (count) { +/**begin repeat1 + * #i = 6, 5, 4, 3, 2, 1, 0# + */ + case @i@+1: +# if @nop@ == 1 + ((npy_bool *)data_out)[@i@] = ((npy_bool *)data0)[@i@] || + ((npy_bool *)data_out)[@i@]; +# elif @nop@ == 2 + ((npy_bool *)data_out)[@i@] = + (((npy_bool *)data0)[@i@] && + ((npy_bool *)data1)[@i@]) || + ((npy_bool *)data_out)[@i@]; +# elif @nop@ == 3 + ((npy_bool *)data_out)[@i@] = + (((npy_bool *)data0)[@i@] && + ((npy_bool *)data1)[@i@] && + ((npy_bool *)data2)[@i@]) || + ((npy_bool *)data_out)[@i@]; +# endif +/**end repeat1**/ + case 0: + return; + } +#endif + +/* Unroll the loop by 8 for fixed-size nop */ +#if (@nop@ <= 3) + while (count >= 8) { + count -= 8; +#else + while (count--) { +#endif + +# if @nop@ == 1 +/**begin repeat1 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ + *((npy_bool *)data_out + @i@) = (*((npy_bool *)data0 + @i@)) || + (*((npy_bool *)data_out + @i@)); +/**end repeat1**/ + data0 += 8*sizeof(npy_bool); + data_out += 8*sizeof(npy_bool); +# elif @nop@ == 2 +/**begin repeat1 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ + *((npy_bool *)data_out + @i@) = + ((*((npy_bool *)data0 + @i@)) && + (*((npy_bool *)data1 + @i@))) || + (*((npy_bool *)data_out + @i@)); +/**end repeat1**/ + data0 += 8*sizeof(npy_bool); + data1 += 8*sizeof(npy_bool); + data_out += 8*sizeof(npy_bool); +# elif @nop@ == 3 +/**begin repeat1 + * #i = 0, 1, 2, 3, 4, 5, 6, 7# + */ + *((npy_bool *)data_out + @i@) = + ((*((npy_bool *)data0 + @i@)) && + (*((npy_bool *)data1 + @i@)) && + (*((npy_bool *)data2 + @i@))) || + (*((npy_bool *)data_out + @i@)); +/**end repeat1**/ + data0 += 8*sizeof(npy_bool); + data1 += 8*sizeof(npy_bool); + data2 += 8*sizeof(npy_bool); + data_out += 8*sizeof(npy_bool); +# else + npy_bool temp = *(npy_bool *)dataptr[0]; + int i; + for (i = 1; i < nop; ++i) { + temp = temp && *(npy_bool *)dataptr[i]; + } + *(npy_bool *)dataptr[nop] = temp || *(npy_bool *)dataptr[i]; + for (i = 0; i <= nop; ++i) { + dataptr[i] += sizeof(npy_bool); + } +# endif + } + + /* If the loop was unrolled, we need to finish it off */ +#if (@nop@ <= 3) + goto finish_after_unrolled_loop; +#endif +} + +static void +bool_sum_of_products_outstride0_@noplabel@(int nop, char **dataptr, + npy_intp *strides, npy_intp count) +{ + npy_bool accum = 0; + +#if (@nop@ <= 3) + char *data0 = dataptr[0]; + npy_intp stride0 = strides[0]; +#endif +#if (@nop@ == 2 || @nop@ == 3) + char *data1 = dataptr[1]; + npy_intp stride1 = strides[1]; +#endif +#if (@nop@ == 3) + char *data2 = dataptr[2]; + npy_intp stride2 = strides[2]; +#endif + + while (count--) { +#if @nop@ == 1 + accum = *(npy_bool *)data0 || accum; + data0 += stride0; +#elif @nop@ == 2 + accum = (*(npy_bool *)data0 && *(npy_bool *)data1) || accum; + data0 += stride0; + data1 += stride1; +#elif @nop@ == 3 + accum = (*(npy_bool *)data0 && + *(npy_bool *)data1 && + *(npy_bool *)data2) || accum; + data0 += stride0; + data1 += stride1; + data2 += stride2; +#else + npy_bool temp = *(npy_bool *)dataptr[0]; + int i; + for (i = 1; i < nop; ++i) { + temp = temp && *(npy_bool *)dataptr[i]; + } + accum = temp || accum; + for (i = 0; i <= nop; ++i) { + dataptr[i] += strides[i]; + } +#endif + } + +# if @nop@ <= 3 + *((npy_bool *)dataptr[@nop@]) = accum || *((npy_bool *)dataptr[@nop@]); +# else + *((npy_bool *)dataptr[nop]) = accum || *((npy_bool *)dataptr[nop]); +# endif +} + +/**end repeat**/ + +typedef void (*sum_of_products_fn)(int, char **, npy_intp *, npy_intp); + +/* These tables need to match up with the type enum */ +static sum_of_products_fn +_contig_outstride0_unary_specialization_table[NPY_NTYPES] = { +/**begin repeat + * #name = bool, + * byte, ubyte, + * short, ushort, + * int, uint, + * long, ulong, + * longlong, ulonglong, + * float, double, longdouble, + * cfloat, cdouble, clongdouble, + * object, string, unicode, void, + * datetime, timedelta, half# + * #use = 0, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, 1, + * 1, 1, 1, + * 0, 0, 0, 0, + * 0, 0, 1# + */ +#if @use@ + &@name@_sum_of_products_contig_outstride0_one, +#else + NULL, +#endif +/**end repeat**/ +}; /* End of _contig_outstride0_unary_specialization_table */ + +static sum_of_products_fn _binary_specialization_table[NPY_NTYPES][5] = { +/**begin repeat + * #name = bool, + * byte, ubyte, + * short, ushort, + * int, uint, + * long, ulong, + * longlong, ulonglong, + * float, double, longdouble, + * cfloat, cdouble, clongdouble, + * object, string, unicode, void, + * datetime, timedelta, half# + * #use = 0, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, 1, + * 0, 0, 0, + * 0, 0, 0, 0, + * 0, 0, 1# + */ +#if @use@ +{ + &@name@_sum_of_products_stride0_contig_outstride0_two, + &@name@_sum_of_products_stride0_contig_outcontig_two, + &@name@_sum_of_products_contig_stride0_outstride0_two, + &@name@_sum_of_products_contig_stride0_outcontig_two, + &@name@_sum_of_products_contig_contig_outstride0_two, +}, +#else + {NULL, NULL, NULL, NULL, NULL}, +#endif +/**end repeat**/ +}; /* End of _binary_specialization_table */ + +static sum_of_products_fn _outstride0_specialized_table[NPY_NTYPES][4] = { +/**begin repeat + * #name = bool, + * byte, ubyte, + * short, ushort, + * int, uint, + * long, ulong, + * longlong, ulonglong, + * float, double, longdouble, + * cfloat, cdouble, clongdouble, + * object, string, unicode, void, + * datetime, timedelta, half# + * #use = 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, 1, + * 1, 1, 1, + * 0, 0, 0, 0, + * 0, 0, 1# + */ +#if @use@ +{ + &@name@_sum_of_products_outstride0_any, + &@name@_sum_of_products_outstride0_one, + &@name@_sum_of_products_outstride0_two, + &@name@_sum_of_products_outstride0_three +}, +#else + {NULL, NULL, NULL, NULL}, +#endif +/**end repeat**/ +}; /* End of _outstride0_specialized_table */ + +static sum_of_products_fn _allcontig_specialized_table[NPY_NTYPES][4] = { +/**begin repeat + * #name = bool, + * byte, ubyte, + * short, ushort, + * int, uint, + * long, ulong, + * longlong, ulonglong, + * float, double, longdouble, + * cfloat, cdouble, clongdouble, + * object, string, unicode, void, + * datetime, timedelta, half# + * #use = 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, 1, + * 1, 1, 1, + * 0, 0, 0, 0, + * 0, 0, 1# + */ +#if @use@ +{ + &@name@_sum_of_products_contig_any, + &@name@_sum_of_products_contig_one, + &@name@_sum_of_products_contig_two, + &@name@_sum_of_products_contig_three +}, +#else + {NULL, NULL, NULL, NULL}, +#endif +/**end repeat**/ +}; /* End of _allcontig_specialized_table */ + +static sum_of_products_fn _unspecialized_table[NPY_NTYPES][4] = { +/**begin repeat + * #name = bool, + * byte, ubyte, + * short, ushort, + * int, uint, + * long, ulong, + * longlong, ulonglong, + * float, double, longdouble, + * cfloat, cdouble, clongdouble, + * object, string, unicode, void, + * datetime, timedelta, half# + * #use = 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, + * 1, 1, 1, + * 1, 1, 1, + * 0, 0, 0, 0, + * 0, 0, 1# + */ +#if @use@ +{ + &@name@_sum_of_products_any, + &@name@_sum_of_products_one, + &@name@_sum_of_products_two, + &@name@_sum_of_products_three +}, +#else + {NULL, NULL, NULL, NULL}, +#endif +/**end repeat**/ +}; /* End of _unnspecialized_table */ + +static sum_of_products_fn +get_sum_of_products_function(int nop, int type_num, + npy_intp itemsize, npy_intp *fixed_strides) +{ + int iop; + + if (type_num >= NPY_NTYPES) { + return NULL; + } + + /* contiguous reduction */ + if (nop == 1 && fixed_strides[0] == itemsize && fixed_strides[1] == 0) { + sum_of_products_fn ret = + _contig_outstride0_unary_specialization_table[type_num]; + if (ret != NULL) { + return ret; + } + } + + /* nop of 2 has more specializations */ + if (nop == 2) { + /* Encode the zero/contiguous strides */ + int code; + code = (fixed_strides[0] == 0) ? 0 : + (fixed_strides[0] == itemsize) ? 2*2*1 : 8; + code += (fixed_strides[1] == 0) ? 0 : + (fixed_strides[1] == itemsize) ? 2*1 : 8; + code += (fixed_strides[2] == 0) ? 0 : + (fixed_strides[2] == itemsize) ? 1 : 8; + if (code >= 2 && code < 7) { + sum_of_products_fn ret = + _binary_specialization_table[type_num][code-2]; + if (ret != NULL) { + return ret; + } + } + } + + /* Inner loop with an output stride of 0 */ + if (fixed_strides[nop] == 0) { + return _outstride0_specialized_table[type_num][nop <= 3 ? nop : 0]; + } + + /* Check for all contiguous */ + for (iop = 0; iop < nop + 1; ++iop) { + if (fixed_strides[iop] != itemsize) { + break; + } + } + + /* Contiguous loop */ + if (iop == nop + 1) { + return _allcontig_specialized_table[type_num][nop <= 3 ? nop : 0]; + } + + /* None of the above specializations caught it, general loops */ + return _unspecialized_table[type_num][nop <= 3 ? nop : 0]; +} + + +/* + * Parses the subscripts for one operand into an output of 'ndim' + * labels. The resulting 'op_labels' array will have: + * - the ASCII code of the label for the first occurrence of a label; + * - the (negative) offset to the first occurrence of the label for + * repeated labels; + * - zero for broadcast dimensions, if subscripts has an ellipsis. + * For example: + * - subscripts="abbcbc", ndim=6 -> op_labels=[97, 98, -1, 99, -3, -2] + * - subscripts="ab...bc", ndim=6 -> op_labels=[97, 98, 0, 0, -3, 99] + */ + +static int +parse_operand_subscripts(char *subscripts, int length, + int ndim, int iop, char *op_labels, + char *label_counts, int *min_label, int *max_label) +{ + int i; + int idim = 0; + int ellipsis = -1; + + /* Process all labels for this operand */ + for (i = 0; i < length; ++i) { + int label = subscripts[i]; + + /* A proper label for an axis. */ + if (label > 0 && isalpha(label)) { + /* Check we don't exceed the operator dimensions. */ + if (idim >= ndim) { + PyErr_Format(PyExc_ValueError, + "einstein sum subscripts string contains " + "too many subscripts for operand %d", iop); + return -1; + } + + op_labels[idim++] = label; + if (label < *min_label) { + *min_label = label; + } + if (label > *max_label) { + *max_label = label; + } + label_counts[label]++; + } + /* The beginning of the ellipsis. */ + else if (label == '.') { + /* Check it's a proper ellipsis. */ + if (ellipsis != -1 || i + 2 >= length + || subscripts[++i] != '.' || subscripts[++i] != '.') { + PyErr_Format(PyExc_ValueError, + "einstein sum subscripts string contains a " + "'.' that is not part of an ellipsis ('...') " + "in operand %d", iop); + return -1; + } + + ellipsis = idim; + } + else if (label != ' ') { + PyErr_Format(PyExc_ValueError, + "invalid subscript '%c' in einstein sum " + "subscripts string, subscripts must " + "be letters", (char)label); + return -1; + } + } + + /* No ellipsis found, labels must match dimensions exactly. */ + if (ellipsis == -1) { + if (idim != ndim) { + PyErr_Format(PyExc_ValueError, + "operand has more dimensions than subscripts " + "given in einstein sum, but no '...' ellipsis " + "provided to broadcast the extra dimensions."); + return -1; + } + } + /* Ellipsis found, may have to add broadcast dimensions. */ + else if (idim < ndim) { + /* Move labels after ellipsis to the end. */ + for (i = 0; i < idim - ellipsis; ++i) { + op_labels[ndim - i - 1] = op_labels[idim - i - 1]; + } + /* Set all broadcast dimensions to zero. */ + for (i = 0; i < ndim - idim; ++i) { + op_labels[ellipsis + i] = 0; + } + } + + /* + * Find any labels duplicated for this operand, and turn them + * into negative offsets to the axis to merge with. + * + * In C, the char type may be signed or unsigned, but with + * twos complement arithmetic the char is ok either way here, and + * later where it matters the char is cast to a signed char. + */ + for (idim = 0; idim < ndim - 1; ++idim) { + int label = (signed char)op_labels[idim]; + /* If it is a proper label, find any duplicates of it. */ + if (label > 0) { + /* Search for the next matching label. */ + char *next = memchr(op_labels + idim + 1, label, ndim - idim - 1); + + while (next != NULL) { + /* The offset from next to op_labels[idim] (negative). */ + *next = (char)((op_labels + idim) - next); + /* Search for the next matching label. */ + next = memchr(next + 1, label, op_labels + ndim - 1 - next); + } + } + } + + return 0; +} + + +/* + * Parses the subscripts for the output operand into an output that + * includes 'ndim_broadcast' unlabeled dimensions, and returns the total + * number of output dimensions, or -1 if there is an error. Similarly + * to parse_operand_subscripts, the 'out_labels' array will have, for + * each dimension: + * - the ASCII code of the corresponding label; + * - zero for broadcast dimensions, if subscripts has an ellipsis. + */ +static int +parse_output_subscripts(char *subscripts, int length, + int ndim_broadcast, + const char *label_counts, char *out_labels) +{ + int i, bdim; + int ndim = 0; + int ellipsis = 0; + + /* Process all the output labels. */ + for (i = 0; i < length; ++i) { + int label = subscripts[i]; + + /* A proper label for an axis. */ + if (label > 0 && isalpha(label)) { + /* Check that it doesn't occur again. */ + if (memchr(subscripts + i + 1, label, length - i - 1) != NULL) { + PyErr_Format(PyExc_ValueError, + "einstein sum subscripts string includes " + "output subscript '%c' multiple times", + (char)label); + return -1; + } + /* Check that it was used in the inputs. */ + if (label_counts[label] == 0) { + PyErr_Format(PyExc_ValueError, + "einstein sum subscripts string included " + "output subscript '%c' which never appeared " + "in an input", (char)label); + return -1; + } + /* Check that there is room in out_labels for this label. */ + if (ndim >= NPY_MAXDIMS) { + PyErr_Format(PyExc_ValueError, + "einstein sum subscripts string contains " + "too many subscripts in the output"); + return -1; + } + + out_labels[ndim++] = label; + } + /* The beginning of the ellipsis. */ + else if (label == '.') { + /* Check it is a proper ellipsis. */ + if (ellipsis || i + 2 >= length + || subscripts[++i] != '.' || subscripts[++i] != '.') { + PyErr_SetString(PyExc_ValueError, + "einstein sum subscripts string " + "contains a '.' that is not part of " + "an ellipsis ('...') in the output"); + return -1; + } + /* Check there is room in out_labels for broadcast dims. */ + if (ndim + ndim_broadcast > NPY_MAXDIMS) { + PyErr_Format(PyExc_ValueError, + "einstein sum subscripts string contains " + "too many subscripts in the output"); + return -1; + } + + ellipsis = 1; + for (bdim = 0; bdim < ndim_broadcast; ++bdim) { + out_labels[ndim++] = 0; + } + } + else if (label != ' ') { + PyErr_Format(PyExc_ValueError, + "invalid subscript '%c' in einstein sum " + "subscripts string, subscripts must " + "be letters", (char)label); + return -1; + } + } + + /* If no ellipsis was found there should be no broadcast dimensions. */ + if (!ellipsis && ndim_broadcast > 0) { + PyErr_SetString(PyExc_ValueError, + "output has more dimensions than subscripts " + "given in einstein sum, but no '...' ellipsis " + "provided to broadcast the extra dimensions."); + return -1; + } + + return ndim; +} + + +/* + * When there's just one operand and no reduction we can return a view + * into 'op'. This calculates the view and stores it in 'ret', if + * possible. Returns -1 on error, 0 otherwise. Note that a 0 return + * does not mean that a view was successfully created. + */ +static int +get_single_op_view(PyArrayObject *op, char *labels, + int ndim_output, char *output_labels, + PyArrayObject **ret) +{ + npy_intp new_strides[NPY_MAXDIMS]; + npy_intp new_dims[NPY_MAXDIMS]; + char *out_label; + int label, i, idim, ndim, ibroadcast = 0; + + ndim = PyArray_NDIM(op); + + /* Initialize the dimensions and strides to zero */ + for (idim = 0; idim < ndim_output; ++idim) { + new_dims[idim] = 0; + new_strides[idim] = 0; + } + + /* Match the labels in the operand with the output labels */ + for (idim = 0; idim < ndim; ++idim) { + /* + * The char type may be either signed or unsigned, we + * need it to be signed here. + */ + label = (signed char)labels[idim]; + /* If this label says to merge axes, get the actual label */ + if (label < 0) { + label = labels[idim+label]; + } + /* If the label is 0, it's an unlabeled broadcast dimension */ + if (label == 0) { + /* The next output label that's a broadcast dimension */ + for (; ibroadcast < ndim_output; ++ibroadcast) { + if (output_labels[ibroadcast] == 0) { + break; + } + } + if (ibroadcast == ndim_output) { + PyErr_SetString(PyExc_ValueError, + "output had too few broadcast dimensions"); + return -1; + } + new_dims[ibroadcast] = PyArray_DIM(op, idim); + new_strides[ibroadcast] = PyArray_STRIDE(op, idim); + ++ibroadcast; + } + else { + /* Find the position for this dimension in the output */ + out_label = (char *)memchr(output_labels, label, + ndim_output); + /* If it's not found, reduction -> can't return a view */ + if (out_label == NULL) { + break; + } + /* Update the dimensions and strides of the output */ + i = out_label - output_labels; + if (new_dims[i] != 0 && new_dims[i] != PyArray_DIM(op, idim)) { + PyErr_Format(PyExc_ValueError, + "dimensions in single operand for collapsing " + "index '%c' don't match (%d != %d)", + label, (int)new_dims[i], (int)PyArray_DIM(op, idim)); + return -1; + } + new_dims[i] = PyArray_DIM(op, idim); + new_strides[i] += PyArray_STRIDE(op, idim); + } + } + /* If we processed all the input axes, return a view */ + if (idim == ndim) { + Py_INCREF(PyArray_DESCR(op)); + *ret = (PyArrayObject *)PyArray_NewFromDescr_int( + Py_TYPE(op), PyArray_DESCR(op), + ndim_output, new_dims, new_strides, PyArray_DATA(op), + PyArray_ISWRITEABLE(op) ? NPY_ARRAY_WRITEABLE : 0, + (PyObject *)op, (PyObject *)op, + 0, 0); + + if (*ret == NULL) { + return -1; + } + return 0; + } + + /* Return success, but that we couldn't make a view */ + *ret = NULL; + return 0; +} + + +/* + * The char type may be either signed or unsigned, we need it to be + * signed here. + */ +static int +_any_labels_are_negative(signed char *labels, int ndim) +{ + int idim; + + for (idim = 0; idim < ndim; ++idim) { + if (labels[idim] < 0) { + return 1; + } + } + + return 0; +} + +/* + * Given the labels for an operand array, returns a view of the array + * with all repeated labels collapsed into a single dimension along + * the corresponding diagonal. The labels are also updated to match + * the dimensions of the new array. If no label is repeated, the + * original array is reference increased and returned unchanged. + */ +static PyArrayObject * +get_combined_dims_view(PyArrayObject *op, int iop, char *labels) +{ + npy_intp new_strides[NPY_MAXDIMS]; + npy_intp new_dims[NPY_MAXDIMS]; + int idim, icombine; + int icombinemap[NPY_MAXDIMS]; + int ndim = PyArray_NDIM(op); + PyArrayObject *ret = NULL; + + /* A fast path to avoid unnecessary calculations. */ + if (!_any_labels_are_negative((signed char *)labels, ndim)) { + Py_INCREF(op); + + return op; + } + + /* Combine repeated labels. */ + icombine = 0; + for(idim = 0; idim < ndim; ++idim) { + /* + * The char type may be either signed or unsigned, we + * need it to be signed here. + */ + int label = (signed char)labels[idim]; + npy_intp dim = PyArray_DIM(op, idim); + npy_intp stride = PyArray_STRIDE(op, idim); + + /* A label seen for the first time, add it to the op view. */ + if (label >= 0) { + /* + * icombinemap maps dimensions in the original array to + * their position in the combined dimensions view. + */ + icombinemap[idim] = icombine; + new_dims[icombine] = dim; + new_strides[icombine] = stride; + ++icombine; + } + /* A repeated label, find the original one and merge them. */ + else { + int i = icombinemap[idim + label]; + + icombinemap[idim] = -1; + if (new_dims[i] != dim) { + char orig_label = labels[idim + label]; + PyErr_Format(PyExc_ValueError, + "dimensions in operand %d for collapsing " + "index '%c' don't match (%d != %d)", + iop, orig_label, (int)new_dims[i], (int)dim); + return NULL; + } + new_strides[i] += stride; + } + } + + /* Overwrite labels to match the new operand view. */ + for (idim = 0; idim < ndim; ++idim) { + int i = icombinemap[idim]; + + if (i >= 0) { + labels[i] = labels[idim]; + } + } + + /* The number of dimensions of the combined view. */ + ndim = icombine; + + /* Create a view of the operand with the compressed dimensions. */ + Py_INCREF(PyArray_DESCR(op)); + ret = (PyArrayObject *)PyArray_NewFromDescrAndBase( + Py_TYPE(op), PyArray_DESCR(op), + ndim, new_dims, new_strides, PyArray_DATA(op), + PyArray_ISWRITEABLE(op) ? NPY_ARRAY_WRITEABLE : 0, + (PyObject *)op, (PyObject *)op); + + return ret; +} + +static int +prepare_op_axes(int ndim, int iop, char *labels, int *axes, + int ndim_iter, char *iter_labels) +{ + int i, label, ibroadcast; + + ibroadcast = ndim-1; + for (i = ndim_iter-1; i >= 0; --i) { + label = iter_labels[i]; + /* + * If it's an unlabeled broadcast dimension, choose + * the next broadcast dimension from the operand. + */ + if (label == 0) { + while (ibroadcast >= 0 && labels[ibroadcast] != 0) { + --ibroadcast; + } + /* + * If we used up all the operand broadcast dimensions, + * extend it with a "newaxis" + */ + if (ibroadcast < 0) { + axes[i] = -1; + } + /* Otherwise map to the broadcast axis */ + else { + axes[i] = ibroadcast; + --ibroadcast; + } + } + /* It's a labeled dimension, find the matching one */ + else { + char *match = memchr(labels, label, ndim); + /* If the op doesn't have the label, broadcast it */ + if (match == NULL) { + axes[i] = -1; + } + /* Otherwise use it */ + else { + axes[i] = match - labels; + } + } + } + + return 0; +} + +static int +unbuffered_loop_nop1_ndim2(NpyIter *iter) +{ + npy_intp coord, shape[2], strides[2][2]; + char *ptrs[2][2], *ptr; + sum_of_products_fn sop; + NPY_BEGIN_THREADS_DEF; + +#if NPY_EINSUM_DBG_TRACING + NpyIter_DebugPrint(iter); +#endif + NPY_EINSUM_DBG_PRINT("running hand-coded 1-op 2-dim loop\n"); + + NpyIter_GetShape(iter, shape); + memcpy(strides[0], NpyIter_GetAxisStrideArray(iter, 0), + 2*sizeof(npy_intp)); + memcpy(strides[1], NpyIter_GetAxisStrideArray(iter, 1), + 2*sizeof(npy_intp)); + memcpy(ptrs[0], NpyIter_GetInitialDataPtrArray(iter), + 2*sizeof(char *)); + memcpy(ptrs[1], ptrs[0], 2*sizeof(char*)); + + sop = get_sum_of_products_function(1, + NpyIter_GetDescrArray(iter)[0]->type_num, + NpyIter_GetDescrArray(iter)[0]->elsize, + strides[0]); + + if (sop == NULL) { + PyErr_SetString(PyExc_TypeError, + "invalid data type for einsum"); + return -1; + } + + /* + * Since the iterator wasn't tracking coordinates, the + * loop provided by the iterator is in Fortran-order. + */ + NPY_BEGIN_THREADS_THRESHOLDED(shape[1] * shape[0]); + for (coord = shape[1]; coord > 0; --coord) { + sop(1, ptrs[0], strides[0], shape[0]); + + ptr = ptrs[1][0] + strides[1][0]; + ptrs[0][0] = ptrs[1][0] = ptr; + ptr = ptrs[1][1] + strides[1][1]; + ptrs[0][1] = ptrs[1][1] = ptr; + } + NPY_END_THREADS; + + return 0; +} + +static int +unbuffered_loop_nop1_ndim3(NpyIter *iter) +{ + npy_intp coords[2], shape[3], strides[3][2]; + char *ptrs[3][2], *ptr; + sum_of_products_fn sop; + NPY_BEGIN_THREADS_DEF; + +#if NPY_EINSUM_DBG_TRACING + NpyIter_DebugPrint(iter); +#endif + NPY_EINSUM_DBG_PRINT("running hand-coded 1-op 3-dim loop\n"); + + NpyIter_GetShape(iter, shape); + memcpy(strides[0], NpyIter_GetAxisStrideArray(iter, 0), + 2*sizeof(npy_intp)); + memcpy(strides[1], NpyIter_GetAxisStrideArray(iter, 1), + 2*sizeof(npy_intp)); + memcpy(strides[2], NpyIter_GetAxisStrideArray(iter, 2), + 2*sizeof(npy_intp)); + memcpy(ptrs[0], NpyIter_GetInitialDataPtrArray(iter), + 2*sizeof(char *)); + memcpy(ptrs[1], ptrs[0], 2*sizeof(char*)); + memcpy(ptrs[2], ptrs[0], 2*sizeof(char*)); + + sop = get_sum_of_products_function(1, + NpyIter_GetDescrArray(iter)[0]->type_num, + NpyIter_GetDescrArray(iter)[0]->elsize, + strides[0]); + + if (sop == NULL) { + PyErr_SetString(PyExc_TypeError, + "invalid data type for einsum"); + return -1; + } + + /* + * Since the iterator wasn't tracking coordinates, the + * loop provided by the iterator is in Fortran-order. + */ + NPY_BEGIN_THREADS_THRESHOLDED(shape[2] * shape[1] * shape[0]); + for (coords[1] = shape[2]; coords[1] > 0; --coords[1]) { + for (coords[0] = shape[1]; coords[0] > 0; --coords[0]) { + sop(1, ptrs[0], strides[0], shape[0]); + + ptr = ptrs[1][0] + strides[1][0]; + ptrs[0][0] = ptrs[1][0] = ptr; + ptr = ptrs[1][1] + strides[1][1]; + ptrs[0][1] = ptrs[1][1] = ptr; + } + ptr = ptrs[2][0] + strides[2][0]; + ptrs[0][0] = ptrs[1][0] = ptrs[2][0] = ptr; + ptr = ptrs[2][1] + strides[2][1]; + ptrs[0][1] = ptrs[1][1] = ptrs[2][1] = ptr; + } + NPY_END_THREADS; + + return 0; +} + +static int +unbuffered_loop_nop2_ndim2(NpyIter *iter) +{ + npy_intp coord, shape[2], strides[2][3]; + char *ptrs[2][3], *ptr; + sum_of_products_fn sop; + NPY_BEGIN_THREADS_DEF; + +#if NPY_EINSUM_DBG_TRACING + NpyIter_DebugPrint(iter); +#endif + NPY_EINSUM_DBG_PRINT("running hand-coded 2-op 2-dim loop\n"); + + NpyIter_GetShape(iter, shape); + memcpy(strides[0], NpyIter_GetAxisStrideArray(iter, 0), + 3*sizeof(npy_intp)); + memcpy(strides[1], NpyIter_GetAxisStrideArray(iter, 1), + 3*sizeof(npy_intp)); + memcpy(ptrs[0], NpyIter_GetInitialDataPtrArray(iter), + 3*sizeof(char *)); + memcpy(ptrs[1], ptrs[0], 3*sizeof(char*)); + + sop = get_sum_of_products_function(2, + NpyIter_GetDescrArray(iter)[0]->type_num, + NpyIter_GetDescrArray(iter)[0]->elsize, + strides[0]); + + if (sop == NULL) { + PyErr_SetString(PyExc_TypeError, + "invalid data type for einsum"); + return -1; + } + + /* + * Since the iterator wasn't tracking coordinates, the + * loop provided by the iterator is in Fortran-order. + */ + NPY_BEGIN_THREADS_THRESHOLDED(shape[1] * shape[0]); + for (coord = shape[1]; coord > 0; --coord) { + sop(2, ptrs[0], strides[0], shape[0]); + + ptr = ptrs[1][0] + strides[1][0]; + ptrs[0][0] = ptrs[1][0] = ptr; + ptr = ptrs[1][1] + strides[1][1]; + ptrs[0][1] = ptrs[1][1] = ptr; + ptr = ptrs[1][2] + strides[1][2]; + ptrs[0][2] = ptrs[1][2] = ptr; + } + NPY_END_THREADS; + + return 0; +} + +static int +unbuffered_loop_nop2_ndim3(NpyIter *iter) +{ + npy_intp coords[2], shape[3], strides[3][3]; + char *ptrs[3][3], *ptr; + sum_of_products_fn sop; + NPY_BEGIN_THREADS_DEF; + +#if NPY_EINSUM_DBG_TRACING + NpyIter_DebugPrint(iter); +#endif + NPY_EINSUM_DBG_PRINT("running hand-coded 2-op 3-dim loop\n"); + + NpyIter_GetShape(iter, shape); + memcpy(strides[0], NpyIter_GetAxisStrideArray(iter, 0), + 3*sizeof(npy_intp)); + memcpy(strides[1], NpyIter_GetAxisStrideArray(iter, 1), + 3*sizeof(npy_intp)); + memcpy(strides[2], NpyIter_GetAxisStrideArray(iter, 2), + 3*sizeof(npy_intp)); + memcpy(ptrs[0], NpyIter_GetInitialDataPtrArray(iter), + 3*sizeof(char *)); + memcpy(ptrs[1], ptrs[0], 3*sizeof(char*)); + memcpy(ptrs[2], ptrs[0], 3*sizeof(char*)); + + sop = get_sum_of_products_function(2, + NpyIter_GetDescrArray(iter)[0]->type_num, + NpyIter_GetDescrArray(iter)[0]->elsize, + strides[0]); + + if (sop == NULL) { + PyErr_SetString(PyExc_TypeError, + "invalid data type for einsum"); + return -1; + } + + /* + * Since the iterator wasn't tracking coordinates, the + * loop provided by the iterator is in Fortran-order. + */ + NPY_BEGIN_THREADS_THRESHOLDED(shape[2] * shape[1] * shape[0]); + for (coords[1] = shape[2]; coords[1] > 0; --coords[1]) { + for (coords[0] = shape[1]; coords[0] > 0; --coords[0]) { + sop(2, ptrs[0], strides[0], shape[0]); + + ptr = ptrs[1][0] + strides[1][0]; + ptrs[0][0] = ptrs[1][0] = ptr; + ptr = ptrs[1][1] + strides[1][1]; + ptrs[0][1] = ptrs[1][1] = ptr; + ptr = ptrs[1][2] + strides[1][2]; + ptrs[0][2] = ptrs[1][2] = ptr; + } + ptr = ptrs[2][0] + strides[2][0]; + ptrs[0][0] = ptrs[1][0] = ptrs[2][0] = ptr; + ptr = ptrs[2][1] + strides[2][1]; + ptrs[0][1] = ptrs[1][1] = ptrs[2][1] = ptr; + ptr = ptrs[2][2] + strides[2][2]; + ptrs[0][2] = ptrs[1][2] = ptrs[2][2] = ptr; + } + NPY_END_THREADS; + + return 0; +} + + +/*NUMPY_API + * This function provides summation of array elements according to + * the Einstein summation convention. For example: + * - trace(a) -> einsum("ii", a) + * - transpose(a) -> einsum("ji", a) + * - multiply(a,b) -> einsum(",", a, b) + * - inner(a,b) -> einsum("i,i", a, b) + * - outer(a,b) -> einsum("i,j", a, b) + * - matvec(a,b) -> einsum("ij,j", a, b) + * - matmat(a,b) -> einsum("ij,jk", a, b) + * + * subscripts: The string of subscripts for einstein summation. + * nop: The number of operands + * op_in: The array of operands + * dtype: Either NULL, or the data type to force the calculation as. + * order: The order for the calculation/the output axes. + * casting: What kind of casts should be permitted. + * out: Either NULL, or an array into which the output should be placed. + * + * By default, the labels get placed in alphabetical order + * at the end of the output. So, if c = einsum("i,j", a, b) + * then c[i,j] == a[i]*b[j], but if c = einsum("j,i", a, b) + * then c[i,j] = a[j]*b[i]. + * + * Alternatively, you can control the output order or prevent + * an axis from being summed/force an axis to be summed by providing + * indices for the output. This allows us to turn 'trace' into + * 'diag', for example. + * - diag(a) -> einsum("ii->i", a) + * - sum(a, axis=0) -> einsum("i...->", a) + * + * Subscripts at the beginning and end may be specified by + * putting an ellipsis "..." in the middle. For example, + * the function einsum("i...i", a) takes the diagonal of + * the first and last dimensions of the operand, and + * einsum("ij...,jk...->ik...") takes the matrix product using + * the first two indices of each operand instead of the last two. + * + * When there is only one operand, no axes being summed, and + * no output parameter, this function returns a view + * into the operand instead of making a copy. + */ +NPY_NO_EXPORT PyArrayObject * +PyArray_EinsteinSum(char *subscripts, npy_intp nop, + PyArrayObject **op_in, + PyArray_Descr *dtype, + NPY_ORDER order, NPY_CASTING casting, + PyArrayObject *out) +{ + int iop, label, min_label = 127, max_label = 0; + char label_counts[128]; + char op_labels[NPY_MAXARGS][NPY_MAXDIMS]; + char output_labels[NPY_MAXDIMS], *iter_labels; + int idim, ndim_output, ndim_broadcast, ndim_iter; + + PyArrayObject *op[NPY_MAXARGS], *ret = NULL; + PyArray_Descr *op_dtypes_array[NPY_MAXARGS], **op_dtypes; + + int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS]; + int *op_axes[NPY_MAXARGS]; + npy_uint32 iter_flags, op_flags[NPY_MAXARGS]; + + NpyIter *iter; + sum_of_products_fn sop; + npy_intp fixed_strides[NPY_MAXARGS]; + + /* nop+1 (+1 is for the output) must fit in NPY_MAXARGS */ + if (nop >= NPY_MAXARGS) { + PyErr_SetString(PyExc_ValueError, + "too many operands provided to einstein sum function"); + return NULL; + } + else if (nop < 1) { + PyErr_SetString(PyExc_ValueError, + "not enough operands provided to einstein sum function"); + return NULL; + } + + /* Parse the subscripts string into label_counts and op_labels */ + memset(label_counts, 0, sizeof(label_counts)); + for (iop = 0; iop < nop; ++iop) { + int length = (int)strcspn(subscripts, ",-"); + + if (iop == nop-1 && subscripts[length] == ',') { + PyErr_SetString(PyExc_ValueError, + "more operands provided to einstein sum function " + "than specified in the subscripts string"); + return NULL; + } + else if(iop < nop-1 && subscripts[length] != ',') { + PyErr_SetString(PyExc_ValueError, + "fewer operands provided to einstein sum function " + "than specified in the subscripts string"); + return NULL; + } + + if (parse_operand_subscripts(subscripts, length, + PyArray_NDIM(op_in[iop]), + iop, op_labels[iop], label_counts, + &min_label, &max_label) < 0) { + return NULL; + } + + /* Move subscripts to the start of the labels for the next op */ + subscripts += length; + if (iop < nop-1) { + subscripts++; + } + } + + /* + * Find the number of broadcast dimensions, which is the maximum + * number of labels == 0 in an op_labels array. + */ + ndim_broadcast = 0; + for (iop = 0; iop < nop; ++iop) { + npy_intp count_zeros = 0; + int ndim; + char *labels = op_labels[iop]; + + ndim = PyArray_NDIM(op_in[iop]); + for (idim = 0; idim < ndim; ++idim) { + if (labels[idim] == 0) { + ++count_zeros; + } + } + + if (count_zeros > ndim_broadcast) { + ndim_broadcast = count_zeros; + } + } + + /* + * If there is no output signature, fill output_labels and ndim_output + * using each label that appeared once, in alphabetical order. + */ + if (subscripts[0] == '\0') { + /* If no output was specified, always broadcast left, as usual. */ + for (ndim_output = 0; ndim_output < ndim_broadcast; ++ndim_output) { + output_labels[ndim_output] = 0; + } + for (label = min_label; label <= max_label; ++label) { + if (label_counts[label] == 1) { + if (ndim_output < NPY_MAXDIMS) { + output_labels[ndim_output++] = label; + } + else { + PyErr_SetString(PyExc_ValueError, + "einstein sum subscript string has too many " + "distinct labels"); + return NULL; + } + } + } + } + else { + if (subscripts[0] != '-' || subscripts[1] != '>') { + PyErr_SetString(PyExc_ValueError, + "einstein sum subscript string does not " + "contain proper '->' output specified"); + return NULL; + } + subscripts += 2; + + /* Parse the output subscript string. */ + ndim_output = parse_output_subscripts(subscripts, strlen(subscripts), + ndim_broadcast, label_counts, + output_labels); + if (ndim_output < 0) { + return NULL; + } + } + + if (out != NULL && PyArray_NDIM(out) != ndim_output) { + PyErr_Format(PyExc_ValueError, + "out parameter does not have the correct number of " + "dimensions, has %d but should have %d", + (int)PyArray_NDIM(out), (int)ndim_output); + return NULL; + } + + /* + * If there's just one operand and no output parameter, + * first try remapping the axes to the output to return + * a view instead of a copy. + */ + if (nop == 1 && out == NULL) { + ret = NULL; + + if (get_single_op_view(op_in[0], op_labels[0], ndim_output, + output_labels, &ret) < 0) { + return NULL; + } + + if (ret != NULL) { + return ret; + } + } + + /* Set all the op references to NULL */ + for (iop = 0; iop < nop; ++iop) { + op[iop] = NULL; + } + + /* + * Process all the input ops, combining dimensions into their + * diagonal where specified. + */ + for (iop = 0; iop < nop; ++iop) { + char *labels = op_labels[iop]; + + op[iop] = get_combined_dims_view(op_in[iop], iop, labels); + if (op[iop] == NULL) { + goto fail; + } + } + + /* Set the output op */ + op[nop] = out; + + /* + * Set up the labels for the iterator (output + combined labels). + * Can just share the output_labels memory, because iter_labels + * is output_labels with some more labels appended. + */ + iter_labels = output_labels; + ndim_iter = ndim_output; + for (label = min_label; label <= max_label; ++label) { + if (label_counts[label] > 0 && + memchr(output_labels, label, ndim_output) == NULL) { + if (ndim_iter >= NPY_MAXDIMS) { + PyErr_SetString(PyExc_ValueError, + "too many subscripts in einsum"); + goto fail; + } + iter_labels[ndim_iter++] = label; + } + } + + /* Set up the op_axes for the iterator */ + for (iop = 0; iop < nop; ++iop) { + op_axes[iop] = op_axes_arrays[iop]; + + if (prepare_op_axes(PyArray_NDIM(op[iop]), iop, op_labels[iop], + op_axes[iop], ndim_iter, iter_labels) < 0) { + goto fail; + } + } + + /* Set up the op_dtypes if dtype was provided */ + if (dtype == NULL) { + op_dtypes = NULL; + } + else { + op_dtypes = op_dtypes_array; + for (iop = 0; iop <= nop; ++iop) { + op_dtypes[iop] = dtype; + } + } + + /* Set the op_axes for the output */ + op_axes[nop] = op_axes_arrays[nop]; + for (idim = 0; idim < ndim_output; ++idim) { + op_axes[nop][idim] = idim; + } + for (idim = ndim_output; idim < ndim_iter; ++idim) { + op_axes[nop][idim] = -1; + } + + /* Set the iterator per-op flags */ + + for (iop = 0; iop < nop; ++iop) { + op_flags[iop] = NPY_ITER_READONLY| + NPY_ITER_NBO| + NPY_ITER_ALIGNED; + } + op_flags[nop] = NPY_ITER_READWRITE| + NPY_ITER_NBO| + NPY_ITER_ALIGNED| + NPY_ITER_ALLOCATE| + NPY_ITER_NO_BROADCAST; + iter_flags = NPY_ITER_EXTERNAL_LOOP| + NPY_ITER_BUFFERED| + NPY_ITER_DELAY_BUFALLOC| + NPY_ITER_GROWINNER| + NPY_ITER_REDUCE_OK| + NPY_ITER_REFS_OK| + NPY_ITER_ZEROSIZE_OK; + if (out != NULL) { + iter_flags |= NPY_ITER_COPY_IF_OVERLAP; + } + if (dtype == NULL) { + iter_flags |= NPY_ITER_COMMON_DTYPE; + } + + /* Allocate the iterator */ + iter = NpyIter_AdvancedNew(nop+1, op, iter_flags, order, casting, op_flags, + op_dtypes, ndim_iter, op_axes, NULL, 0); + + if (iter == NULL) { + goto fail; + } + + /* Initialize the output to all zeros */ + ret = NpyIter_GetOperandArray(iter)[nop]; + if (PyArray_AssignZero(ret, NULL) < 0) { + goto fail; + } + + /***************************/ + /* + * Acceleration for some specific loop structures. Note + * that with axis coalescing, inputs with more dimensions can + * be reduced to fit into these patterns. + */ + if (!NpyIter_RequiresBuffering(iter)) { + int ndim = NpyIter_GetNDim(iter); + switch (nop) { + case 1: + if (ndim == 2) { + if (unbuffered_loop_nop1_ndim2(iter) < 0) { + goto fail; + } + goto finish; + } + else if (ndim == 3) { + if (unbuffered_loop_nop1_ndim3(iter) < 0) { + goto fail; + } + goto finish; + } + break; + case 2: + if (ndim == 2) { + if (unbuffered_loop_nop2_ndim2(iter) < 0) { + goto fail; + } + goto finish; + } + else if (ndim == 3) { + if (unbuffered_loop_nop2_ndim3(iter) < 0) { + goto fail; + } + goto finish; + } + break; + } + } + /***************************/ + + if (NpyIter_Reset(iter, NULL) != NPY_SUCCEED) { + goto fail; + } + + /* + * Get an inner loop function, specializing it based on + * the strides that are fixed for the whole loop. + */ + NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + sop = get_sum_of_products_function(nop, + NpyIter_GetDescrArray(iter)[0]->type_num, + NpyIter_GetDescrArray(iter)[0]->elsize, + fixed_strides); + +#if NPY_EINSUM_DBG_TRACING + NpyIter_DebugPrint(iter); +#endif + + /* Finally, the main loop */ + if (sop == NULL) { + PyErr_SetString(PyExc_TypeError, + "invalid data type for einsum"); + } + else if (NpyIter_GetIterSize(iter) != 0) { + NpyIter_IterNextFunc *iternext; + char **dataptr; + npy_intp *stride; + npy_intp *countptr; + NPY_BEGIN_THREADS_DEF; + + iternext = NpyIter_GetIterNext(iter, NULL); + if (iternext == NULL) { + NpyIter_Deallocate(iter); + goto fail; + } + dataptr = NpyIter_GetDataPtrArray(iter); + stride = NpyIter_GetInnerStrideArray(iter); + countptr = NpyIter_GetInnerLoopSizePtr(iter); + + NPY_BEGIN_THREADS_NDITER(iter); + NPY_EINSUM_DBG_PRINT("Einsum loop\n"); + do { + sop(nop, dataptr, stride, *countptr); + } while(iternext(iter)); + NPY_END_THREADS; + + /* If the API was needed, it may have thrown an error */ + if (NpyIter_IterationNeedsAPI(iter) && PyErr_Occurred()) { + goto fail; + } + } + +finish: + if (out != NULL) { + ret = out; + } + Py_INCREF(ret); + + NpyIter_Deallocate(iter); + for (iop = 0; iop < nop; ++iop) { + Py_DECREF(op[iop]); + } + + return ret; + +fail: + for (iop = 0; iop < nop; ++iop) { + Py_XDECREF(op[iop]); + } + + return NULL; +} diff --git a/contrib/python/numpy/py2/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/contrib/python/numpy/py2/numpy/core/src/multiarray/lowlevel_strided_loops.c.src new file mode 100644 index 0000000000..16bacf1abc --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -0,0 +1,1785 @@ +/* + * This file contains low-level loops for copying and byte-swapping + * strided data. + * + * Copyright (c) 2010 by Mark Wiebe (mwwiebe@gmail.com) + * The University of British Columbia + * + * See LICENSE.txt for the license. + */ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE +#include <numpy/arrayobject.h> +#include <numpy/npy_cpu.h> +#include <numpy/halffloat.h> + +#include "lowlevel_strided_loops.h" +#include "array_assign.h" + + +/* + * x86 platform works with unaligned access but the compiler is allowed to + * assume all data is aligned to its size by the C standard. This means it can + * vectorize instructions peeling only by the size of the type, if the data is + * not aligned to this size one ends up with data not correctly aligned for SSE + * instructions (16 byte). + * So this flag can only be enabled if autovectorization is disabled. + */ +#if NPY_CPU_HAVE_UNALIGNED_ACCESS +# define NPY_USE_UNALIGNED_ACCESS 0 +#else +# define NPY_USE_UNALIGNED_ACCESS 0 +#endif + +#define _NPY_NOP1(x) (x) +#define _NPY_NOP2(x) (x) +#define _NPY_NOP4(x) (x) +#define _NPY_NOP8(x) (x) + +#define _NPY_SWAP2(x) npy_bswap2(x) + +#define _NPY_SWAP4(x) npy_bswap4(x) + +#define _NPY_SWAP_PAIR4(x) (((((npy_uint32)x)&0xffu) << 8) | \ + ((((npy_uint32)x)&0xff00u) >> 8) | \ + ((((npy_uint32)x)&0xff0000u) << 8) | \ + ((((npy_uint32)x)&0xff000000u) >> 8)) + +#define _NPY_SWAP8(x) npy_bswap8(x) + +#define _NPY_SWAP_PAIR8(x) (((((npy_uint64)x)&0xffULL) << 24) | \ + ((((npy_uint64)x)&0xff00ULL) << 8) | \ + ((((npy_uint64)x)&0xff0000ULL) >> 8) | \ + ((((npy_uint64)x)&0xff000000ULL) >> 24) | \ + ((((npy_uint64)x)&0xff00000000ULL) << 24) | \ + ((((npy_uint64)x)&0xff0000000000ULL) << 8) | \ + ((((npy_uint64)x)&0xff000000000000ULL) >> 8) | \ + ((((npy_uint64)x)&0xff00000000000000ULL) >> 24)) + +#define _NPY_SWAP_INPLACE2(x) npy_bswap2_unaligned(x) + +#define _NPY_SWAP_INPLACE4(x) npy_bswap4_unaligned(x) + +#define _NPY_SWAP_INPLACE8(x) npy_bswap8_unaligned(x) + +#define _NPY_SWAP_INPLACE16(x) { \ + char a = (x)[0]; (x)[0] = (x)[15]; (x)[15] = a; \ + a = (x)[1]; (x)[1] = (x)[14]; (x)[14] = a; \ + a = (x)[2]; (x)[2] = (x)[13]; (x)[13] = a; \ + a = (x)[3]; (x)[3] = (x)[12]; (x)[12] = a; \ + a = (x)[4]; (x)[4] = (x)[11]; (x)[11] = a; \ + a = (x)[5]; (x)[5] = (x)[10]; (x)[10] = a; \ + a = (x)[6]; (x)[6] = (x)[9]; (x)[9] = a; \ + a = (x)[7]; (x)[7] = (x)[8]; (x)[8] = a; \ + } + +/************* STRIDED COPYING/SWAPPING SPECIALIZED FUNCTIONS *************/ + +/**begin repeat + * #elsize = 1, 2, 4, 8, 16# + * #elsize_half = 0, 1, 2, 4, 8# + * #type = npy_uint8, npy_uint16, npy_uint32, npy_uint64, npy_uint64# + */ +/**begin repeat1 + * #oper = strided_to_strided, strided_to_contig, + * contig_to_strided, contig_to_contig# + * #src_contig = 0, 0, 1 ,1# + * #dst_contig = 0, 1, 0 ,1# + */ +/**begin repeat2 + * #swap = _NPY_NOP, _NPY_NOP, _NPY_SWAP_INPLACE, _NPY_SWAP, + * _NPY_SWAP_INPLACE, _NPY_SWAP_PAIR# + * #prefix = , _aligned, _swap, _aligned_swap, _swap_pair, _aligned_swap_pair# + * #is_aligned = 0, 1, 0, 1, 0, 1# + * #minelsize = 1, 1, 2, 2, 4, 4# + * #is_swap = 0, 0, 1, 1, 2, 2# + */ + +#if (@elsize@ >= @minelsize@) && \ + (@elsize@ > 1 || @is_aligned@) && \ + (!NPY_USE_UNALIGNED_ACCESS || @is_aligned@) + + +#if @is_swap@ || @src_contig@ == 0 || @dst_contig@ == 0 +/* + * unrolling gains about 20-50% if the copy can be done in one mov instruction + * if not it can decrease performance + * tested to improve performance on intel xeon 5x/7x, core2duo, amd phenom x4 + */ +static void +#if @is_aligned@ && @is_swap@ == 0 && @elsize@ <= NPY_SIZEOF_INTP + NPY_GCC_UNROLL_LOOPS +#endif +@prefix@_@oper@_size@elsize@(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + NpyAuxData *NPY_UNUSED(data)) +{ +#if @is_aligned@ + /* sanity check */ + assert(N == 0 || npy_is_aligned(dst, _UINT_ALIGN(@type@))); + assert(N == 0 || npy_is_aligned(src, _UINT_ALIGN(@type@))); +#endif + /*printf("fn @prefix@_@oper@_size@elsize@\n");*/ + while (N > 0) { +#if @is_aligned@ + + /* aligned copy and swap */ +# if @elsize@ != 16 + (*((@type@ *)dst)) = @swap@@elsize@(*((@type@ *)src)); +# else +# if @is_swap@ == 0 + (*((npy_uint64 *)dst)) = (*((npy_uint64 *)src)); + (*((npy_uint64 *)dst + 1)) = (*((npy_uint64 *)src + 1)); +# elif @is_swap@ == 1 + (*((npy_uint64 *)dst)) = _NPY_SWAP8(*((npy_uint64 *)src + 1)); + (*((npy_uint64 *)dst + 1)) = _NPY_SWAP8(*((npy_uint64 *)src)); +# elif @is_swap@ == 2 + (*((npy_uint64 *)dst)) = _NPY_SWAP8(*((npy_uint64 *)src)); + (*((npy_uint64 *)dst + 1)) = _NPY_SWAP8(*((npy_uint64 *)src + 1)); +# endif +# endif + +#else + + /* unaligned copy and swap */ + memmove(dst, src, @elsize@); +# if @is_swap@ == 1 + @swap@@elsize@(dst); +# elif @is_swap@ == 2 + @swap@@elsize_half@(dst); + @swap@@elsize_half@(dst + @elsize_half@); +# endif + +#endif + +#if @dst_contig@ + dst += @elsize@; +#else + dst += dst_stride; +#endif + +#if @src_contig@ + src += @elsize@; +#else + src += src_stride; +#endif + + --N; + } +} +#endif + + +/* + * specialized copy and swap for source stride 0, + * interestingly unrolling here is like above is only marginally profitable for + * small types and detrimental for >= 8byte moves on x86 + * but it profits from vectorization enabled with -O3 + */ +#if (@src_contig@ == 0) && @is_aligned@ +static NPY_GCC_OPT_3 void +@prefix@_@oper@_size@elsize@_srcstride0(char *dst, + npy_intp dst_stride, + char *src, npy_intp NPY_UNUSED(src_stride), + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + NpyAuxData *NPY_UNUSED(data)) +{ +#if @elsize@ != 16 +# if !(@elsize@ == 1 && @dst_contig@) + @type@ temp; +# endif +#else + npy_uint64 temp0, temp1; +#endif + if (N == 0) { + return; + } +#if @is_aligned@ && @elsize@ != 16 + /* sanity check */ + assert(N == 0 || npy_is_aligned(dst, _UINT_ALIGN(@type@))); + assert(N == 0 || npy_is_aligned(src, _UINT_ALIGN(@type@))); +#endif +#if @elsize@ == 1 && @dst_contig@ + memset(dst, *src, N); +#else + +# if @elsize@ != 16 + temp = @swap@@elsize@(*((@type@ *)src)); +# else +# if @is_swap@ == 0 + temp0 = (*((npy_uint64 *)src)); + temp1 = (*((npy_uint64 *)src + 1)); +# elif @is_swap@ == 1 + temp0 = _NPY_SWAP8(*((npy_uint64 *)src + 1)); + temp1 = _NPY_SWAP8(*((npy_uint64 *)src)); +# elif @is_swap@ == 2 + temp0 = _NPY_SWAP8(*((npy_uint64 *)src)); + temp1 = _NPY_SWAP8(*((npy_uint64 *)src + 1)); +# endif +# endif + + while (N > 0) { +# if @elsize@ != 16 + *((@type@ *)dst) = temp; +# else + *((npy_uint64 *)dst) = temp0; + *((npy_uint64 *)dst + 1) = temp1; +# endif +# if @dst_contig@ + dst += @elsize@; +# else + dst += dst_stride; +# endif + --N; + } +#endif/* @elsize == 1 && @dst_contig@ -- else */ +} +#endif/* (@src_contig@ == 0) && @is_aligned@ */ + +#endif/* @elsize@ >= @minelsize@ */ + +/**end repeat2**/ +/**end repeat1**/ +/**end repeat**/ + +static void +_strided_to_strided(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + NpyAuxData *NPY_UNUSED(data)) +{ + while (N > 0) { + memmove(dst, src, src_itemsize); + dst += dst_stride; + src += src_stride; + --N; + } +} + +static void +_swap_strided_to_strided(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + NpyAuxData *NPY_UNUSED(data)) +{ + char *a, *b, c; + + while (N > 0) { + memmove(dst, src, src_itemsize); + /* general in-place swap */ + a = dst; + b = dst + src_itemsize - 1; + while (a < b) { + c = *a; + *a = *b; + *b = c; + ++a; --b; + } + dst += dst_stride; + src += src_stride; + --N; + } +} + +static void +_swap_pair_strided_to_strided(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + NpyAuxData *NPY_UNUSED(data)) +{ + char *a, *b, c; + npy_intp itemsize_half = src_itemsize / 2; + + while (N > 0) { + memmove(dst, src, src_itemsize); + /* general in-place swap */ + a = dst; + b = dst + itemsize_half - 1; + while (a < b) { + c = *a; + *a = *b; + *b = c; + ++a; --b; + } + /* general in-place swap */ + a = dst + itemsize_half; + b = dst + 2*itemsize_half - 1; + while (a < b) { + c = *a; + *a = *b; + *b = c; + ++a; --b; + } + dst += dst_stride; + src += src_stride; + --N; + } +} + +static void +_contig_to_contig(char *dst, npy_intp NPY_UNUSED(dst_stride), + char *src, npy_intp NPY_UNUSED(src_stride), + npy_intp N, npy_intp src_itemsize, + NpyAuxData *NPY_UNUSED(data)) +{ + memmove(dst, src, src_itemsize*N); +} + + +NPY_NO_EXPORT PyArray_StridedUnaryOp * +PyArray_GetStridedCopyFn(int aligned, npy_intp src_stride, + npy_intp dst_stride, npy_intp itemsize) +{ +/* + * Skip the "unaligned" versions on CPUs which support unaligned + * memory accesses. + */ +#if !NPY_USE_UNALIGNED_ACCESS + if (aligned) { +#endif/*!NPY_USE_UNALIGNED_ACCESS*/ + + /* contiguous dst */ + if (itemsize != 0 && dst_stride == itemsize) { + /* constant src */ + if (src_stride == 0) { + switch (itemsize) { +/**begin repeat + * #elsize = 1, 2, 4, 8, 16# + */ + case @elsize@: + return + &_aligned_strided_to_contig_size@elsize@_srcstride0; +/**end repeat**/ + } + } + /* contiguous src */ + else if (src_stride == itemsize) { + return &_contig_to_contig; + } + /* general src */ + else { + switch (itemsize) { +/**begin repeat + * #elsize = 1, 2, 4, 8, 16# + */ + case @elsize@: + return &_aligned_strided_to_contig_size@elsize@; +/**end repeat**/ + } + } + + return &_strided_to_strided; + } + /* general dst */ + else { + /* constant src */ + if (src_stride == 0) { + switch (itemsize) { +/**begin repeat + * #elsize = 1, 2, 4, 8, 16# + */ + case @elsize@: + return + &_aligned_strided_to_strided_size@elsize@_srcstride0; +/**end repeat**/ + } + } + /* contiguous src */ + else if (src_stride == itemsize) { + switch (itemsize) { +/**begin repeat + * #elsize = 1, 2, 4, 8, 16# + */ + case @elsize@: + return &_aligned_contig_to_strided_size@elsize@; +/**end repeat**/ + } + + return &_strided_to_strided; + } + else { + switch (itemsize) { +/**begin repeat + * #elsize = 1, 2, 4, 8, 16# + */ + case @elsize@: + return &_aligned_strided_to_strided_size@elsize@; +/**end repeat**/ + } + } + } + +#if !NPY_USE_UNALIGNED_ACCESS + } + else { + /* contiguous dst */ + if (itemsize != 0 && dst_stride == itemsize) { + /* contiguous src */ + if (itemsize != 0 && src_stride == itemsize) { + return &_contig_to_contig; + } + /* general src */ + else { + switch (itemsize) { + case 1: + return &_aligned_strided_to_contig_size1; +/**begin repeat + * #elsize = 2, 4, 8, 16# + */ + case @elsize@: + return &_strided_to_contig_size@elsize@; +/**end repeat**/ + } + } + + return &_strided_to_strided; + } + /* general dst */ + else { + /* contiguous src */ + if (itemsize != 0 && src_stride == itemsize) { + switch (itemsize) { + case 1: + return &_aligned_contig_to_strided_size1; +/**begin repeat + * #elsize = 2, 4, 8, 16# + */ + case @elsize@: + return &_contig_to_strided_size@elsize@; +/**end repeat**/ + } + + return &_strided_to_strided; + } + /* general src */ + else { + switch (itemsize) { + case 1: + return &_aligned_strided_to_strided_size1; +/**begin repeat + * #elsize = 2, 4, 8, 16# + */ + case @elsize@: + return &_strided_to_strided_size@elsize@; +/**end repeat**/ + } + } + } + } +#endif/*!NPY_USE_UNALIGNED_ACCESS*/ + + return &_strided_to_strided; +} + +/* + * PyArray_GetStridedCopySwapFn and PyArray_GetStridedCopySwapPairFn are + * nearly identical, so can do a repeat for them. + */ +/**begin repeat + * #function = PyArray_GetStridedCopySwapFn, PyArray_GetStridedCopySwapPairFn# + * #tag = , _pair# + * #not_pair = 1, 0# + */ + +NPY_NO_EXPORT PyArray_StridedUnaryOp * +@function@(int aligned, npy_intp src_stride, + npy_intp dst_stride, npy_intp itemsize) +{ +/* + * Skip the "unaligned" versions on CPUs which support unaligned + * memory accesses. + */ +#if !NPY_USE_UNALIGNED_ACCESS + if (aligned) { +#endif/*!NPY_USE_UNALIGNED_ACCESS*/ + + /* contiguous dst */ + if (itemsize != 0 && dst_stride == itemsize) { + /* constant src */ + if (src_stride == 0) { + switch (itemsize) { +/**begin repeat1 + * #elsize = 2, 4, 8, 16# + */ +#if @not_pair@ || @elsize@ > 2 + case @elsize@: + return + &_aligned_swap@tag@_strided_to_contig_size@elsize@_srcstride0; +#endif +/**end repeat1**/ + } + } + /* contiguous src */ + else if (src_stride == itemsize) { + switch (itemsize) { +/**begin repeat1 + * #elsize = 2, 4, 8, 16# + */ +#if @not_pair@ || @elsize@ > 2 + case @elsize@: + return &_aligned_swap@tag@_contig_to_contig_size@elsize@; +#endif +/**end repeat1**/ + } + } + /* general src */ + else { + switch (itemsize) { +/**begin repeat1 + * #elsize = 2, 4, 8, 16# + */ +#if @not_pair@ || @elsize@ > 2 + case @elsize@: + return &_aligned_swap@tag@_strided_to_contig_size@elsize@; +#endif +/**end repeat1**/ + } + } + } + /* general dst */ + else { + /* constant src */ + if (src_stride == 0) { + switch (itemsize) { +/**begin repeat1 + * #elsize = 2, 4, 8, 16# + */ +#if @not_pair@ || @elsize@ > 2 + case @elsize@: + return + &_aligned_swap@tag@_strided_to_strided_size@elsize@_srcstride0; +#endif +/**end repeat1**/ + } + } + /* contiguous src */ + else if (src_stride == itemsize) { + switch (itemsize) { +/**begin repeat1 + * #elsize = 2, 4, 8, 16# + */ +#if @not_pair@ || @elsize@ > 2 + case @elsize@: + return &_aligned_swap@tag@_contig_to_strided_size@elsize@; +#endif +/**end repeat1**/ + } + + return &_swap@tag@_strided_to_strided; + } + else { + switch (itemsize) { +/**begin repeat1 + * #elsize = 2, 4, 8, 16# + */ +#if @not_pair@ || @elsize@ > 2 + case @elsize@: + return &_aligned_swap@tag@_strided_to_strided_size@elsize@; +#endif +/**end repeat1**/ + } + } + } + +#if !NPY_USE_UNALIGNED_ACCESS + } + else { + /* contiguous dst */ + if (itemsize != 0 && dst_stride == itemsize) { + /* contiguous src */ + if (itemsize != 0 && src_stride == itemsize) { + switch (itemsize) { +/**begin repeat1 + * #elsize = 2, 4, 8, 16# + */ +#if @not_pair@ || @elsize@ > 2 + case @elsize@: + return &_swap@tag@_contig_to_contig_size@elsize@; +#endif +/**end repeat1**/ + } + } + /* general src */ + else { + switch (itemsize) { +/**begin repeat1 + * #elsize = 2, 4, 8, 16# + */ +#if @not_pair@ || @elsize@ > 2 + case @elsize@: + return &_swap@tag@_strided_to_contig_size@elsize@; +#endif +/**end repeat1**/ + } + } + + return &_swap@tag@_strided_to_strided; + } + /* general dst */ + else { + /* contiguous src */ + if (itemsize != 0 && src_stride == itemsize) { + switch (itemsize) { +/**begin repeat1 + * #elsize = 2, 4, 8, 16# + */ +#if @not_pair@ || @elsize@ > 2 + case @elsize@: + return &_swap@tag@_contig_to_strided_size@elsize@; +#endif +/**end repeat1**/ + } + + return &_swap@tag@_strided_to_strided; + } + /* general src */ + else { + switch (itemsize) { +/**begin repeat1 + * #elsize = 2, 4, 8, 16# + */ +#if @not_pair@ || @elsize@ > 2 + case @elsize@: + return &_swap@tag@_strided_to_strided_size@elsize@; +#endif +/**end repeat1**/ + } + } + } + } +#endif/*!NPY_USE_UNALIGNED_ACCESS*/ + + return &_swap@tag@_strided_to_strided; +} + +/**end repeat**/ + +/************* STRIDED CASTING SPECIALIZED FUNCTIONS *************/ + +/**begin repeat + * + * #NAME1 = BOOL, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #name1 = bool, + * ubyte, ushort, uint, ulong, ulonglong, + * byte, short, int, long, longlong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #type1 = npy_bool, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble# + * #rtype1 = npy_bool, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_float, npy_double, npy_longdouble# + * #is_bool1 = 1, 0*17# + * #is_half1 = 0*11, 1, 0*6# + * #is_float1 = 0*12, 1, 0, 0, 1, 0, 0# + * #is_double1 = 0*13, 1, 0, 0, 1, 0# + * #is_complex1 = 0*15, 1*3# + */ + +/**begin repeat1 + * + * #NAME2 = BOOL, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #name2 = bool, + * ubyte, ushort, uint, ulong, ulonglong, + * byte, short, int, long, longlong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #type2 = npy_bool, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble# + * #rtype2 = npy_bool, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_float, npy_double, npy_longdouble# + * #is_bool2 = 1, 0*17# + * #is_half2 = 0*11, 1, 0*6# + * #is_float2 = 0*12, 1, 0, 0, 1, 0, 0# + * #is_double2 = 0*13, 1, 0, 0, 1, 0# + * #is_complex2 = 0*15, 1*3# + */ + +/**begin repeat2 + * #prefix = _aligned,,_aligned_contig,_contig# + * #aligned = 1,0,1,0# + * #contig = 0,0,1,1# + */ + +#if !(NPY_USE_UNALIGNED_ACCESS && !@aligned@) + +/* For half types, don't use actual double/float types in conversion */ +#if @is_half1@ || @is_half2@ + +# if @is_float1@ +# define _TYPE1 npy_uint32 +# elif @is_double1@ +# define _TYPE1 npy_uint64 +# else +# define _TYPE1 @rtype1@ +# endif + +# if @is_float2@ +# define _TYPE2 npy_uint32 +# elif @is_double2@ +# define _TYPE2 npy_uint64 +# else +# define _TYPE2 @rtype2@ +# endif + +#else + +#define _TYPE1 @rtype1@ +#define _TYPE2 @rtype2@ + +#endif + +/* Determine an appropriate casting conversion function */ +#if @is_half1@ + +# if @is_float2@ +# define _CONVERT_FN(x) npy_halfbits_to_floatbits(x) +# elif @is_double2@ +# define _CONVERT_FN(x) npy_halfbits_to_doublebits(x) +# elif @is_half2@ +# define _CONVERT_FN(x) (x) +# elif @is_bool2@ +# define _CONVERT_FN(x) ((npy_bool)!npy_half_iszero(x)) +# else +# define _CONVERT_FN(x) ((_TYPE2)npy_half_to_float(x)) +# endif + +#elif @is_half2@ + +# if @is_float1@ +# define _CONVERT_FN(x) npy_floatbits_to_halfbits(x) +# elif @is_double1@ +# define _CONVERT_FN(x) npy_doublebits_to_halfbits(x) +# else +# define _CONVERT_FN(x) npy_float_to_half((float)x) +# endif + +#else + +# if @is_bool2@ || @is_bool1@ +# define _CONVERT_FN(x) ((npy_bool)(x != 0)) +# else +# define _CONVERT_FN(x) ((_TYPE2)x) +# endif + +#endif + +static NPY_GCC_OPT_3 void +@prefix@_cast_@name1@_to_@name2@( + char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + NpyAuxData *NPY_UNUSED(data)) +{ +#if @is_complex1@ + _TYPE1 src_value[2]; +#elif !@aligned@ + _TYPE1 src_value; +#endif +#if @is_complex2@ + _TYPE2 dst_value[2]; +#elif !@aligned@ + _TYPE2 dst_value; +#endif + +#if @aligned@ + /* sanity check */ + assert(N == 0 || npy_is_aligned(src, _ALIGN(_TYPE1))); + assert(N == 0 || npy_is_aligned(dst, _ALIGN(_TYPE2))); +#endif + + /*printf("@prefix@_cast_@name1@_to_@name2@\n");*/ + + while (N--) { +#if @aligned@ +# if @is_complex1@ + src_value[0] = ((_TYPE1 *)src)[0]; + src_value[1] = ((_TYPE1 *)src)[1]; +# endif +#else + memmove(&src_value, src, sizeof(src_value)); +#endif + +/* Do the cast */ +#if @is_complex1@ +# if @is_complex2@ + dst_value[0] = _CONVERT_FN(src_value[0]); + dst_value[1] = _CONVERT_FN(src_value[1]); +# elif !@aligned@ +# if @is_bool2@ + dst_value = _CONVERT_FN(src_value[0]) || _CONVERT_FN(src_value[1]); +# else + dst_value = _CONVERT_FN(src_value[0]); +# endif +# else +# if @is_bool2@ + *(_TYPE2 *)dst = _CONVERT_FN(src_value[0]) || _CONVERT_FN(src_value[1]); +# else + *(_TYPE2 *)dst = _CONVERT_FN(src_value[0]); +# endif +# endif +#else +# if @is_complex2@ +# if !@aligned@ + dst_value[0] = _CONVERT_FN(src_value); +# else + dst_value[0] = _CONVERT_FN(*(_TYPE1 *)src); +# endif + dst_value[1] = 0; +# elif !@aligned@ + dst_value = _CONVERT_FN(src_value); +# else + *(_TYPE2 *)dst = _CONVERT_FN(*(_TYPE1 *)src); +# endif +#endif + +#if @aligned@ +# if @is_complex2@ + ((_TYPE2 *)dst)[0] = dst_value[0]; + ((_TYPE2 *)dst)[1] = dst_value[1]; +# endif +#else + memmove(dst, &dst_value, sizeof(dst_value)); +#endif + +#if @contig@ + dst += sizeof(@type2@); + src += sizeof(@type1@); +#else + dst += dst_stride; + src += src_stride; +#endif + } +} + +#undef _CONVERT_FN +#undef _TYPE2 +#undef _TYPE1 + +#endif + +/**end repeat2**/ + +/**end repeat1**/ + +/**end repeat**/ + +NPY_NO_EXPORT PyArray_StridedUnaryOp * +PyArray_GetStridedNumericCastFn(int aligned, npy_intp src_stride, + npy_intp dst_stride, + int src_type_num, int dst_type_num) +{ + switch (src_type_num) { +/**begin repeat + * + * #NAME1 = BOOL, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #name1 = bool, + * ubyte, ushort, uint, ulong, ulonglong, + * byte, short, int, long, longlong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #type1 = npy_bool, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble# + */ + + case NPY_@NAME1@: + /*printf("test fn %d - second %d\n", NPY_@NAME1@, dst_type_num);*/ + switch (dst_type_num) { +/**begin repeat1 + * + * #NAME2 = BOOL, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #name2 = bool, + * ubyte, ushort, uint, ulong, ulonglong, + * byte, short, int, long, longlong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #type2 = npy_bool, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble# + */ + + case NPY_@NAME2@: + /*printf("ret fn %d %d\n", NPY_@NAME1@, NPY_@NAME2@);*/ +# if NPY_USE_UNALIGNED_ACCESS + if (src_stride == sizeof(@type1@) && + dst_stride == sizeof(@type2@)) { + return &_aligned_contig_cast_@name1@_to_@name2@; + } + else { + return &_aligned_cast_@name1@_to_@name2@; + } +# else + if (src_stride == sizeof(@type1@) && + dst_stride == sizeof(@type2@)) { + return aligned ? + &_aligned_contig_cast_@name1@_to_@name2@ : + &_contig_cast_@name1@_to_@name2@; + } + else { + return aligned ? &_aligned_cast_@name1@_to_@name2@ : + &_cast_@name1@_to_@name2@; + } +# endif + +/**end repeat1**/ + } + /*printf("switched test fn %d - second %d\n", NPY_@NAME1@, dst_type_num);*/ + +/**end repeat**/ + } + + return NULL; +} + + +/****************** PRIMITIVE FLAT TO/FROM NDIM FUNCTIONS ******************/ + +/* See documentation of arguments in lowlevel_strided_loops.h */ +NPY_NO_EXPORT npy_intp +PyArray_TransferNDimToStrided(npy_intp ndim, + char *dst, npy_intp dst_stride, + char *src, npy_intp *src_strides, npy_intp src_strides_inc, + npy_intp *coords, npy_intp coords_inc, + npy_intp *shape, npy_intp shape_inc, + npy_intp count, npy_intp src_itemsize, + PyArray_StridedUnaryOp *stransfer, + NpyAuxData *data) +{ + npy_intp i, M, N, coord0, shape0, src_stride0, coord1, shape1, src_stride1; + + /* Finish off dimension 0 */ + coord0 = coords[0]; + shape0 = shape[0]; + src_stride0 = src_strides[0]; + N = shape0 - coord0; + if (N >= count) { + stransfer(dst, dst_stride, src, src_stride0, count, src_itemsize, data); + return 0; + } + stransfer(dst, dst_stride, src, src_stride0, N, src_itemsize, data); + count -= N; + + /* If it's 1-dimensional, there's no more to copy */ + if (ndim == 1) { + return count; + } + + /* Adjust the src and dst pointers */ + coord1 = (coords + coords_inc)[0]; + shape1 = (shape + shape_inc)[0]; + src_stride1 = (src_strides + src_strides_inc)[0]; + src = src - coord0*src_stride0 + src_stride1; + dst += N*dst_stride; + + /* Finish off dimension 1 */ + M = (shape1 - coord1 - 1); + N = shape0*M; + for (i = 0; i < M; ++i) { + if (shape0 >= count) { + stransfer(dst, dst_stride, src, src_stride0, + count, src_itemsize, data); + return 0; + } + else { + stransfer(dst, dst_stride, src, src_stride0, + shape0, src_itemsize, data); + } + count -= shape0; + src += src_stride1; + dst += shape0*dst_stride; + } + + /* If it's 2-dimensional, there's no more to copy */ + if (ndim == 2) { + return count; + } + + /* General-case loop for everything else */ + else { + /* Iteration structure for dimensions 2 and up */ + struct { + npy_intp coord, shape, src_stride; + } it[NPY_MAXDIMS]; + + /* Copy the coordinates and shape */ + coords += 2*coords_inc; + shape += 2*shape_inc; + src_strides += 2*src_strides_inc; + for (i = 0; i < ndim-2; ++i) { + it[i].coord = coords[0]; + it[i].shape = shape[0]; + it[i].src_stride = src_strides[0]; + coords += coords_inc; + shape += shape_inc; + src_strides += src_strides_inc; + } + + for (;;) { + /* Adjust the src pointer from the dimension 0 and 1 loop */ + src = src - shape1*src_stride1; + + /* Increment to the next coordinate */ + for (i = 0; i < ndim-2; ++i) { + src += it[i].src_stride; + if (++it[i].coord >= it[i].shape) { + it[i].coord = 0; + src -= it[i].src_stride*it[i].shape; + } + else { + break; + } + } + /* If the last dimension rolled over, we're done */ + if (i == ndim-2) { + return count; + } + + /* A loop for dimensions 0 and 1 */ + for (i = 0; i < shape1; ++i) { + if (shape0 >= count) { + stransfer(dst, dst_stride, src, src_stride0, + count, src_itemsize, data); + return 0; + } + else { + stransfer(dst, dst_stride, src, src_stride0, + shape0, src_itemsize, data); + } + count -= shape0; + src += src_stride1; + dst += shape0*dst_stride; + } + } + } +} + +/* See documentation of arguments in lowlevel_strided_loops.h */ +NPY_NO_EXPORT npy_intp +PyArray_TransferStridedToNDim(npy_intp ndim, + char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc, + char *src, npy_intp src_stride, + npy_intp *coords, npy_intp coords_inc, + npy_intp *shape, npy_intp shape_inc, + npy_intp count, npy_intp src_itemsize, + PyArray_StridedUnaryOp *stransfer, + NpyAuxData *data) +{ + npy_intp i, M, N, coord0, shape0, dst_stride0, coord1, shape1, dst_stride1; + + /* Finish off dimension 0 */ + coord0 = coords[0]; + shape0 = shape[0]; + dst_stride0 = dst_strides[0]; + N = shape0 - coord0; + if (N >= count) { + stransfer(dst, dst_stride0, src, src_stride, count, src_itemsize, data); + return 0; + } + stransfer(dst, dst_stride0, src, src_stride, N, src_itemsize, data); + count -= N; + + /* If it's 1-dimensional, there's no more to copy */ + if (ndim == 1) { + return count; + } + + /* Adjust the src and dst pointers */ + coord1 = (coords + coords_inc)[0]; + shape1 = (shape + shape_inc)[0]; + dst_stride1 = (dst_strides + dst_strides_inc)[0]; + dst = dst - coord0*dst_stride0 + dst_stride1; + src += N*src_stride; + + /* Finish off dimension 1 */ + M = (shape1 - coord1 - 1); + N = shape0*M; + for (i = 0; i < M; ++i) { + if (shape0 >= count) { + stransfer(dst, dst_stride0, src, src_stride, + count, src_itemsize, data); + return 0; + } + else { + stransfer(dst, dst_stride0, src, src_stride, + shape0, src_itemsize, data); + } + count -= shape0; + dst += dst_stride1; + src += shape0*src_stride; + } + + /* If it's 2-dimensional, there's no more to copy */ + if (ndim == 2) { + return count; + } + + /* General-case loop for everything else */ + else { + /* Iteration structure for dimensions 2 and up */ + struct { + npy_intp coord, shape, dst_stride; + } it[NPY_MAXDIMS]; + + /* Copy the coordinates and shape */ + coords += 2*coords_inc; + shape += 2*shape_inc; + dst_strides += 2*dst_strides_inc; + for (i = 0; i < ndim-2; ++i) { + it[i].coord = coords[0]; + it[i].shape = shape[0]; + it[i].dst_stride = dst_strides[0]; + coords += coords_inc; + shape += shape_inc; + dst_strides += dst_strides_inc; + } + + for (;;) { + /* Adjust the dst pointer from the dimension 0 and 1 loop */ + dst = dst - shape1*dst_stride1; + + /* Increment to the next coordinate */ + for (i = 0; i < ndim-2; ++i) { + dst += it[i].dst_stride; + if (++it[i].coord >= it[i].shape) { + it[i].coord = 0; + dst -= it[i].dst_stride*it[i].shape; + } + else { + break; + } + } + /* If the last dimension rolled over, we're done */ + if (i == ndim-2) { + return count; + } + + /* A loop for dimensions 0 and 1 */ + for (i = 0; i < shape1; ++i) { + if (shape0 >= count) { + stransfer(dst, dst_stride0, src, src_stride, + count, src_itemsize, data); + return 0; + } + else { + stransfer(dst, dst_stride0, src, src_stride, + shape0, src_itemsize, data); + } + count -= shape0; + dst += dst_stride1; + src += shape0*src_stride; + } + } + } +} + +/* See documentation of arguments in lowlevel_strided_loops.h */ +NPY_NO_EXPORT npy_intp +PyArray_TransferMaskedStridedToNDim(npy_intp ndim, + char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc, + char *src, npy_intp src_stride, + npy_uint8 *mask, npy_intp mask_stride, + npy_intp *coords, npy_intp coords_inc, + npy_intp *shape, npy_intp shape_inc, + npy_intp count, npy_intp src_itemsize, + PyArray_MaskedStridedUnaryOp *stransfer, + NpyAuxData *data) +{ + npy_intp i, M, N, coord0, shape0, dst_stride0, coord1, shape1, dst_stride1; + + /* Finish off dimension 0 */ + coord0 = coords[0]; + shape0 = shape[0]; + dst_stride0 = dst_strides[0]; + N = shape0 - coord0; + if (N >= count) { + stransfer(dst, dst_stride0, + src, src_stride, + mask, mask_stride, + count, src_itemsize, data); + return 0; + } + stransfer(dst, dst_stride0, + src, src_stride, + mask, mask_stride, + N, src_itemsize, data); + count -= N; + + /* If it's 1-dimensional, there's no more to copy */ + if (ndim == 1) { + return count; + } + + /* Adjust the src and dst pointers */ + coord1 = (coords + coords_inc)[0]; + shape1 = (shape + shape_inc)[0]; + dst_stride1 = (dst_strides + dst_strides_inc)[0]; + dst = dst - coord0*dst_stride0 + dst_stride1; + src += N*src_stride; + mask += N*mask_stride; + + /* Finish off dimension 1 */ + M = (shape1 - coord1 - 1); + N = shape0*M; + for (i = 0; i < M; ++i) { + if (shape0 >= count) { + stransfer(dst, dst_stride0, + src, src_stride, + mask, mask_stride, + count, src_itemsize, data); + return 0; + } + else { + stransfer(dst, dst_stride0, + src, src_stride, + mask, mask_stride, + shape0, src_itemsize, data); + } + count -= shape0; + dst += dst_stride1; + src += shape0*src_stride; + mask += shape0*mask_stride; + } + + /* If it's 2-dimensional, there's no more to copy */ + if (ndim == 2) { + return count; + } + + /* General-case loop for everything else */ + else { + /* Iteration structure for dimensions 2 and up */ + struct { + npy_intp coord, shape, dst_stride; + } it[NPY_MAXDIMS]; + + /* Copy the coordinates and shape */ + coords += 2*coords_inc; + shape += 2*shape_inc; + dst_strides += 2*dst_strides_inc; + for (i = 0; i < ndim-2; ++i) { + it[i].coord = coords[0]; + it[i].shape = shape[0]; + it[i].dst_stride = dst_strides[0]; + coords += coords_inc; + shape += shape_inc; + dst_strides += dst_strides_inc; + } + + for (;;) { + /* Adjust the dst pointer from the dimension 0 and 1 loop */ + dst = dst - shape1*dst_stride1; + + /* Increment to the next coordinate */ + for (i = 0; i < ndim-2; ++i) { + dst += it[i].dst_stride; + if (++it[i].coord >= it[i].shape) { + it[i].coord = 0; + dst -= it[i].dst_stride*it[i].shape; + } + else { + break; + } + } + /* If the last dimension rolled over, we're done */ + if (i == ndim-2) { + return count; + } + + /* A loop for dimensions 0 and 1 */ + for (i = 0; i < shape1; ++i) { + if (shape0 >= count) { + stransfer(dst, dst_stride0, + src, src_stride, + mask, mask_stride, + count, src_itemsize, data); + return 0; + } + else { + stransfer(dst, dst_stride0, + src, src_stride, + mask, mask_stride, + shape0, src_itemsize, data); + } + count -= shape0; + dst += dst_stride1; + src += shape0*src_stride; + mask += shape0*mask_stride; + } + } + } +} + + +/***************************************************************************/ +/****************** MapIter (Advanced indexing) Get/Set ********************/ +/***************************************************************************/ + +/**begin repeat + * #name = set, get# + * #isget = 0, 1# + */ + +/* + * Advanced indexing iteration of arrays when there is a single indexing + * array which has the same memory order as the value array and both + * can be trivially iterated (single stride, aligned, no casting necessary). + */ +NPY_NO_EXPORT int +mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind, + PyArrayObject *result) +{ + char *base_ptr, *ind_ptr, *result_ptr; + npy_intp self_stride, ind_stride, result_stride; + npy_intp fancy_dim = PyArray_DIM(self, 0); + + npy_intp itersize; + + int is_aligned = IsUintAligned(self) && IsUintAligned(result); + int needs_api = PyDataType_REFCHK(PyArray_DESCR(self)); + + PyArray_CopySwapFunc *copyswap = PyArray_DESCR(self)->f->copyswap; + NPY_BEGIN_THREADS_DEF; + + base_ptr = PyArray_BYTES(self); + self_stride = PyArray_STRIDE(self, 0); + + PyArray_PREPARE_TRIVIAL_PAIR_ITERATION(ind, result, itersize, + ind_ptr, result_ptr, + ind_stride, result_stride) + + if (!needs_api) { + NPY_BEGIN_THREADS_THRESHOLDED(PyArray_SIZE(ind)); + } +#if !@isget@ + /* Check the indices beforehand */ + while (itersize--) { + npy_intp indval = *((npy_intp*)ind_ptr); + if (check_and_adjust_index(&indval, fancy_dim, 0, _save) < 0 ) { + return -1; + } + ind_ptr += ind_stride; + } + + /* + * Reset the ind_ptr and itersize, due to broadcasting it is always + * the size of ind. + */ + ind_ptr = PyArray_BYTES(ind); + itersize = PyArray_SIZE(ind); +#endif + + /* Optimization for aligned types that do not need the api */ + switch ((is_aligned && !needs_api) ? PyArray_ITEMSIZE(self) : 0) { + +/**begin repeat1 + * #elsize = 1, 2, 4, 8, 0# + * #copytype = npy_uint8, npy_uint16, npy_uint32, npy_uint64, 0# + */ + +#if @elsize@ + case @elsize@: +#else + default: +#endif + while (itersize--) { + char * self_ptr; + npy_intp indval = *((npy_intp*)ind_ptr); + assert(npy_is_aligned(ind_ptr, _UINT_ALIGN(npy_intp))); +#if @isget@ + if (check_and_adjust_index(&indval, fancy_dim, 0, _save) < 0 ) { + return -1; + } +#else + if (indval < 0) { + indval += fancy_dim; + } +#endif + self_ptr = base_ptr + indval * self_stride; + +#if @isget@ +#if @elsize@ + assert(npy_is_aligned(result_ptr, _UINT_ALIGN(@copytype@))); + assert(npy_is_aligned(self_ptr, _UINT_ALIGN(@copytype@))); + *(@copytype@ *)result_ptr = *(@copytype@ *)self_ptr; +#else + copyswap(result_ptr, self_ptr, 0, self); +#endif + +#else /* !@isget@ */ +#if @elsize@ + assert(npy_is_aligned(result_ptr, _UINT_ALIGN(@copytype@))); + assert(npy_is_aligned(self_ptr, _UINT_ALIGN(@copytype@))); + *(@copytype@ *)self_ptr = *(@copytype@ *)result_ptr; +#else + copyswap(self_ptr, result_ptr, 0, self); +#endif +#endif + + ind_ptr += ind_stride; + result_ptr += result_stride; + } + break; + +/**end repeat1**/ + } + + NPY_END_THREADS; + + return 0; +} + + +/* + * General advanced indexing iteration. + */ +NPY_NO_EXPORT int +mapiter_@name@(PyArrayMapIterObject *mit) +{ + npy_intp *counter, count; + int i, is_aligned; + + /* Cached mit info */ + int numiter = mit->numiter; + int needs_api = mit->needs_api; + /* Constant information */ + npy_intp fancy_dims[NPY_MAXDIMS]; + npy_intp fancy_strides[NPY_MAXDIMS]; +#if @isget@ + int iteraxis; +#endif + + char *baseoffset = mit->baseoffset; + char **outer_ptrs = mit->outer_ptrs; + npy_intp *outer_strides = mit->outer_strides; + PyArrayObject *array= mit->array; + + /* Fill constant information */ +#if @isget@ + iteraxis = mit->iteraxes[0]; +#endif + for (i = 0; i < numiter; i++) { + fancy_dims[i] = mit->fancy_dims[i]; + fancy_strides[i] = mit->fancy_strides[i]; + } + + /* + * Alignment information (swapping is never needed, since we buffer), + * could also check extra_op is buffered, but it should rarely matter. + */ + + is_aligned = IsUintAligned(array) && IsUintAligned(mit->extra_op); + + if (mit->size == 0) { + return 0; + } + + if (mit->subspace_iter == NULL) { + /* + * Item by item copy situation, the operand is buffered + * so use copyswap. + */ + PyArray_CopySwapFunc *copyswap = PyArray_DESCR(array)->f->copyswap; + + /* We have only one iterator handling everything */ + counter = NpyIter_GetInnerLoopSizePtr(mit->outer); + + /************ Optimized inner loops without subspace *************/ + +/**begin repeat1 + * #one_iter = 1, 0# + * #numiter = 1, numiter# + */ + +#if @one_iter@ + if (numiter == 1) { +#else + else { +#endif + NPY_BEGIN_THREADS_DEF; + if (!needs_api) { + NPY_BEGIN_THREADS; + } + + /* Optimization for aligned types that do not need the api */ + switch ((is_aligned && !needs_api) ? PyArray_ITEMSIZE(array) : 0) { + +/**begin repeat2 + * #elsize = 1, 2, 4, 8, 0# + * #copytype = npy_uint8, npy_uint16, npy_uint32, npy_uint64, 0# + */ + +#if @elsize@ + case @elsize@: +#else + default: +#endif + /* Outer iteration (safe because mit->size != 0) */ + do { +#if !@isget@ + /* + * When the API is needed the casting might fail + * TODO: (only if buffering is enabled). + */ + if (needs_api && PyErr_Occurred()) { + return -1; + } +#endif + count = *counter; + while (count--) { + char * self_ptr = baseoffset; + for (i=0; i < @numiter@; i++) { + npy_intp indval = *((npy_intp*)outer_ptrs[i]); + assert(npy_is_aligned(outer_ptrs[i], + _UINT_ALIGN(npy_intp))); + +#if @isget@ && @one_iter@ + if (check_and_adjust_index(&indval, fancy_dims[i], + iteraxis, _save) < 0 ) { + return -1; + } +#else + if (indval < 0) { + indval += fancy_dims[i]; + } +#endif + self_ptr += indval * fancy_strides[i]; + + /* advance indexing arrays */ + outer_ptrs[i] += outer_strides[i]; + } + +#if @isget@ +#if @elsize@ + assert(npy_is_aligned(outer_ptrs[i], + _UINT_ALIGN(@copytype@))); + assert(npy_is_aligned(self_ptr, + _UINT_ALIGN(@copytype@))); + *(@copytype@ *)(outer_ptrs[i]) = *(@copytype@ *)self_ptr; +#else + copyswap(outer_ptrs[i], self_ptr, 0, array); +#endif +#else /* !@isget@ */ +#if @elsize@ + assert(npy_is_aligned(outer_ptrs[i], + _UINT_ALIGN(@copytype@))); + assert(npy_is_aligned(self_ptr, + _UINT_ALIGN(@copytype@))); + *(@copytype@ *)self_ptr = *(@copytype@ *)(outer_ptrs[i]); +#else + copyswap(self_ptr, outer_ptrs[i], 0, array); +#endif +#endif + /* advance extra operand */ + outer_ptrs[i] += outer_strides[i]; + } + } while (mit->outer_next(mit->outer)); + + break; + +/**end repeat2**/ + } + NPY_END_THREADS; + } +/**end repeat1**/ + } + + /******************* Nested Iteration Situation *******************/ + else { + char *subspace_baseptrs[2]; + char **subspace_ptrs = mit->subspace_ptrs; + npy_intp *subspace_strides = mit->subspace_strides; + int is_subiter_trivial = 0; /* has three states */ + npy_intp reset_offsets[2] = {0, 0}; + + /* Use strided transfer functions for the inner loop */ + PyArray_StridedUnaryOp *stransfer = NULL; + NpyAuxData *transferdata = NULL; + npy_intp fixed_strides[2]; + +#if @isget@ + npy_intp src_itemsize = PyArray_ITEMSIZE(array); +#else + npy_intp src_itemsize = PyArray_ITEMSIZE(mit->extra_op); +#endif + + /* + * Get a dtype transfer function, since there are no + * buffers, this is safe. + */ + NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides); + + if (PyArray_GetDTypeTransferFunction(is_aligned, +#if @isget@ + fixed_strides[0], fixed_strides[1], + PyArray_DESCR(array), PyArray_DESCR(mit->extra_op), +#else + fixed_strides[1], fixed_strides[0], + PyArray_DESCR(mit->extra_op), PyArray_DESCR(array), +#endif + 0, + &stransfer, &transferdata, + &needs_api) != NPY_SUCCEED) { + return -1; + } + + counter = NpyIter_GetInnerLoopSizePtr(mit->subspace_iter); + if (*counter == PyArray_SIZE(mit->subspace)) { + /* + * subspace is trivially iterable. + * manipulate pointers to avoid expensive resetting + */ + is_subiter_trivial = 1; + } +/**begin repeat1 + * #one_iter = 1, 0# + * #numiter = 1, numiter# + */ + +#if @one_iter@ + if (numiter == 1) { +#else + else { +#endif + NPY_BEGIN_THREADS_DEF; + if (!needs_api) { + NPY_BEGIN_THREADS; + } + + /* Outer iteration (safe because mit->size != 0) */ + do { + char * self_ptr = baseoffset; + for (i=0; i < @numiter@; i++) { + npy_intp indval = *((npy_intp*)outer_ptrs[i]); + +#if @isget@ && @one_iter@ + if (check_and_adjust_index(&indval, fancy_dims[i], + iteraxis, _save) < 0 ) { + NPY_AUXDATA_FREE(transferdata); + return -1; + } +#else + if (indval < 0) { + indval += fancy_dims[i]; + } +#endif + + self_ptr += indval * fancy_strides[i]; + } + + /* + * Resetting is slow, so try to avoid resetting + * if subspace iteration is trivial. + * Watch out: reset_offsets are kept outside of the loop, + * assuming the subspaces of different external iterations + * share the same structure. + */ + if (is_subiter_trivial <= 1) { + /* slower resetting: first iteration or non-trivial subspace */ + + char * errmsg = NULL; + subspace_baseptrs[0] = self_ptr; + subspace_baseptrs[1] = mit->extra_op_ptrs[0]; + + /* (can't really fail, since no buffering necessary) */ + if (!NpyIter_ResetBasePointers(mit->subspace_iter, + subspace_baseptrs, + &errmsg)) { + NPY_END_THREADS; + PyErr_SetString(PyExc_ValueError, errmsg); + NPY_AUXDATA_FREE(transferdata); + return -1; + } + if (is_subiter_trivial != 0) { + /* reset_offsets are nonzero for negative strides.*/ + reset_offsets[0] = subspace_ptrs[0] - self_ptr; + reset_offsets[1] = subspace_ptrs[1] - mit->extra_op_ptrs[0]; + + /* use the faster adjustment further on */ + is_subiter_trivial ++; + } + } + else { + /* + * faster resetting if the subspace iteration is trivial. + * reset_offsets are zero for positive strides, + * for negative strides this shifts the pointer to the last + * item. + */ + subspace_ptrs[0] = self_ptr + reset_offsets[0]; + subspace_ptrs[1] = mit->extra_op_ptrs[0] + reset_offsets[1]; + } + +#if !@isget@ + /* + * When the API is needed the casting might fail + * TODO: Could only check if casting is unsafe, or even just + * not at all... + */ + if (needs_api && PyErr_Occurred()) { + NPY_AUXDATA_FREE(transferdata); + return -1; + } +#endif + + do { + +#if @isget@ + stransfer(subspace_ptrs[1], subspace_strides[1], + subspace_ptrs[0], subspace_strides[0], + *counter, src_itemsize, transferdata); +#else + stransfer(subspace_ptrs[0], subspace_strides[0], + subspace_ptrs[1], subspace_strides[1], + *counter, src_itemsize, transferdata); +#endif + } while (mit->subspace_next(mit->subspace_iter)); + + mit->extra_op_next(mit->extra_op_iter); + } while (mit->outer_next(mit->outer)); + NPY_END_THREADS; + } +/**end repeat1**/ + + NPY_AUXDATA_FREE(transferdata); + } + return 0; +} + +/**end repeat**/ diff --git a/contrib/python/numpy/py2/numpy/core/src/multiarray/nditer_templ.c.src b/contrib/python/numpy/py2/numpy/core/src/multiarray/nditer_templ.c.src new file mode 100644 index 0000000000..0f0d599723 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/multiarray/nditer_templ.c.src @@ -0,0 +1,615 @@ +/* + * This file implements the API functions for NumPy's nditer that + * are specialized using the templating system. + * + * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com) + * The University of British Columbia + * + * See LICENSE.txt for the license. + */ + +/* Indicate that this .c file is allowed to include the header */ +#define NPY_ITERATOR_IMPLEMENTATION_CODE +#include "nditer_impl.h" + +/* SPECIALIZED iternext functions that handle the non-buffering part */ + +/**begin repeat + * #const_itflags = 0, + * NPY_ITFLAG_HASINDEX, + * NPY_ITFLAG_EXLOOP, + * NPY_ITFLAG_RANGE, + * NPY_ITFLAG_RANGE|NPY_ITFLAG_HASINDEX# + * #tag_itflags = 0, IND, NOINN, RNG, RNGuIND# + */ +/**begin repeat1 + * #const_ndim = 1, 2, NPY_MAXDIMS# + * #tag_ndim = 1, 2, ANY# + */ +/**begin repeat2 + * #const_nop = 1, 2, NPY_MAXDIMS# + * #tag_nop = 1, 2, ANY# + */ + +/* Specialized iternext (@const_itflags@,@tag_ndim@,@tag_nop@) */ +static int +npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_iters@tag_nop@( + NpyIter *iter) +{ +#if !(@const_itflags@&NPY_ITFLAG_EXLOOP) || (@const_ndim@ > 1) + const npy_uint32 itflags = @const_itflags@; +# if @const_ndim@ >= NPY_MAXDIMS + int idim, ndim = NIT_NDIM(iter); +# endif +# if @const_nop@ < NPY_MAXDIMS + const int nop = @const_nop@; +# else + int nop = NIT_NOP(iter); +# endif + + NpyIter_AxisData *axisdata0; + npy_intp istrides, nstrides = NAD_NSTRIDES(); +#endif +#if @const_ndim@ > 1 + NpyIter_AxisData *axisdata1; + npy_intp sizeof_axisdata; +#endif +#if @const_ndim@ > 2 + NpyIter_AxisData *axisdata2; +#endif + +#if (@const_itflags@&NPY_ITFLAG_RANGE) + /* When ranged iteration is enabled, use the iterindex */ + if (++NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { + return 0; + } +#endif + +#if @const_ndim@ > 1 + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); +#endif + +# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) || (@const_ndim@ > 1) + axisdata0 = NIT_AXISDATA(iter); +# endif +# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) + /* Increment index 0 */ + NAD_INDEX(axisdata0)++; + /* Increment pointer 0 */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata0)[istrides] += NAD_STRIDES(axisdata0)[istrides]; + } +# endif + +#if @const_ndim@ == 1 + +# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) + /* Finished when the index equals the shape */ + return NAD_INDEX(axisdata0) < NAD_SHAPE(axisdata0); +# else + return 0; +# endif + +#else + +# if !(@const_itflags@&NPY_ITFLAG_EXLOOP) + if (NAD_INDEX(axisdata0) < NAD_SHAPE(axisdata0)) { + return 1; + } +# endif + + axisdata1 = NIT_INDEX_AXISDATA(axisdata0, 1); + /* Increment index 1 */ + NAD_INDEX(axisdata1)++; + /* Increment pointer 1 */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata1)[istrides] += NAD_STRIDES(axisdata1)[istrides]; + } + + if (NAD_INDEX(axisdata1) < NAD_SHAPE(axisdata1)) { + /* Reset the 1st index to 0 */ + NAD_INDEX(axisdata0) = 0; + /* Reset the 1st pointer to the value of the 2nd */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata0)[istrides] = NAD_PTRS(axisdata1)[istrides]; + } + return 1; + } + +# if @const_ndim@ == 2 + return 0; +# else + + axisdata2 = NIT_INDEX_AXISDATA(axisdata1, 1); + /* Increment index 2 */ + NAD_INDEX(axisdata2)++; + /* Increment pointer 2 */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata2)[istrides] += NAD_STRIDES(axisdata2)[istrides]; + } + + if (NAD_INDEX(axisdata2) < NAD_SHAPE(axisdata2)) { + /* Reset the 1st and 2nd indices to 0 */ + NAD_INDEX(axisdata0) = 0; + NAD_INDEX(axisdata1) = 0; + /* Reset the 1st and 2nd pointers to the value of the 3nd */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata0)[istrides] = NAD_PTRS(axisdata2)[istrides]; + NAD_PTRS(axisdata1)[istrides] = NAD_PTRS(axisdata2)[istrides]; + } + return 1; + } + + for (idim = 3; idim < ndim; ++idim) { + NIT_ADVANCE_AXISDATA(axisdata2, 1); + /* Increment the index */ + NAD_INDEX(axisdata2)++; + /* Increment the pointer */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata2)[istrides] += NAD_STRIDES(axisdata2)[istrides]; + } + + + if (NAD_INDEX(axisdata2) < NAD_SHAPE(axisdata2)) { + /* Reset the indices and pointers of all previous axisdatas */ + axisdata1 = axisdata2; + do { + NIT_ADVANCE_AXISDATA(axisdata1, -1); + /* Reset the index to 0 */ + NAD_INDEX(axisdata1) = 0; + /* Reset the pointer to the updated value */ + for (istrides = 0; istrides < nstrides; ++istrides) { + NAD_PTRS(axisdata1)[istrides] = + NAD_PTRS(axisdata2)[istrides]; + } + } while (axisdata1 != axisdata0); + + return 1; + } + } + + return 0; + +# endif /* ndim != 2 */ + +#endif /* ndim != 1 */ +} + +/**end repeat2**/ +/**end repeat1**/ +/**end repeat**/ + + +/**begin repeat + * #const_nop = 1, 2, 3, 4, NPY_MAXDIMS# + * #tag_nop = 1, 2, 3, 4, ANY# + */ + +/* + * Iternext function that handles the reduction buffering part. This + * is done with a double loop to avoid frequent re-buffering. + */ +static int +npyiter_buffered_reduce_iternext_iters@tag_nop@(NpyIter *iter) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + /*int ndim = NIT_NDIM(iter);*/ +#if @const_nop@ >= NPY_MAXDIMS + int nop = NIT_NOP(iter); +#else + const int nop = @const_nop@; +#endif + + int iop; + + NpyIter_AxisData *axisdata; + NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); + char **ptrs; + char *prev_dataptrs[NPY_MAXARGS]; + + ptrs = NBF_PTRS(bufferdata); + + /* + * If the iterator handles the inner loop, need to increment all + * the indices and pointers + */ + if (!(itflags&NPY_ITFLAG_EXLOOP)) { + /* Increment within the buffer */ + if (++NIT_ITERINDEX(iter) < NBF_BUFITEREND(bufferdata)) { + npy_intp *strides; + + strides = NBF_STRIDES(bufferdata); + for (iop = 0; iop < nop; ++iop) { + ptrs[iop] += strides[iop]; + } + return 1; + } + } + else { + NIT_ITERINDEX(iter) += NBF_SIZE(bufferdata); + } + + NPY_IT_DBG_PRINT1("Iterator: Finished iteration %d of outer reduce loop\n", + (int)NBF_REDUCE_POS(bufferdata)); + /* The outer increment for the reduce double loop */ + if (++NBF_REDUCE_POS(bufferdata) < NBF_REDUCE_OUTERSIZE(bufferdata)) { + npy_intp *reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata); + char **reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata); + for (iop = 0; iop < nop; ++iop) { + char *ptr = reduce_outerptrs[iop] + reduce_outerstrides[iop]; + ptrs[iop] = ptr; + reduce_outerptrs[iop] = ptr; + } + NBF_BUFITEREND(bufferdata) = NIT_ITERINDEX(iter) + NBF_SIZE(bufferdata); + return 1; + } + + /* Save the previously used data pointers */ + axisdata = NIT_AXISDATA(iter); + memcpy(prev_dataptrs, NAD_PTRS(axisdata), NPY_SIZEOF_INTP*nop); + + /* Write back to the arrays */ + npyiter_copy_from_buffers(iter); + + /* Check if we're past the end */ + if (NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { + NBF_SIZE(bufferdata) = 0; + return 0; + } + /* Increment to the next buffer */ + else { + npyiter_goto_iterindex(iter, NIT_ITERINDEX(iter)); + } + + /* Prepare the next buffers and set iterend/size */ + npyiter_copy_to_buffers(iter, prev_dataptrs); + + return 1; +} + +/**end repeat**/ + +/* iternext function that handles the buffering part */ +static int +npyiter_buffered_iternext(NpyIter *iter) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + /*int ndim = NIT_NDIM(iter);*/ + int nop = NIT_NOP(iter); + + NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); + + /* + * If the iterator handles the inner loop, need to increment all + * the indices and pointers + */ + if (!(itflags&NPY_ITFLAG_EXLOOP)) { + /* Increment within the buffer */ + if (++NIT_ITERINDEX(iter) < NBF_BUFITEREND(bufferdata)) { + int iop; + npy_intp *strides; + char **ptrs; + + strides = NBF_STRIDES(bufferdata); + ptrs = NBF_PTRS(bufferdata); + for (iop = 0; iop < nop; ++iop) { + ptrs[iop] += strides[iop]; + } + return 1; + } + } + else { + NIT_ITERINDEX(iter) += NBF_SIZE(bufferdata); + } + + /* Write back to the arrays */ + npyiter_copy_from_buffers(iter); + + /* Check if we're past the end */ + if (NIT_ITERINDEX(iter) >= NIT_ITEREND(iter)) { + NBF_SIZE(bufferdata) = 0; + return 0; + } + /* Increment to the next buffer */ + else { + npyiter_goto_iterindex(iter, NIT_ITERINDEX(iter)); + } + + /* Prepare the next buffers and set iterend/size */ + npyiter_copy_to_buffers(iter, NULL); + + return 1; +} + +/**end repeat2**/ +/**end repeat1**/ +/**end repeat**/ + +/* Specialization of iternext for when the iteration size is 1 */ +static int +npyiter_iternext_sizeone(NpyIter *iter) +{ + return 0; +} + +/*NUMPY_API + * Compute the specialized iteration function for an iterator + * + * If errmsg is non-NULL, it should point to a variable which will + * receive the error message, and no Python exception will be set. + * This is so that the function can be called from code not holding + * the GIL. + */ +NPY_NO_EXPORT NpyIter_IterNextFunc * +NpyIter_GetIterNext(NpyIter *iter, char **errmsg) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int ndim = NIT_NDIM(iter); + int nop = NIT_NOP(iter); + + if (NIT_ITERSIZE(iter) < 0) { + if (errmsg == NULL) { + PyErr_SetString(PyExc_ValueError, "iterator is too large"); + } + else { + *errmsg = "iterator is too large"; + } + return NULL; + } + + /* + * When there is just one iteration and buffering is disabled + * the iternext function is very simple. + */ + if (itflags&NPY_ITFLAG_ONEITERATION) { + return &npyiter_iternext_sizeone; + } + + /* + * If buffering is enabled. + */ + if (itflags&NPY_ITFLAG_BUFFER) { + if (itflags&NPY_ITFLAG_REDUCE) { + switch (nop) { + case 1: + return &npyiter_buffered_reduce_iternext_iters1; + case 2: + return &npyiter_buffered_reduce_iternext_iters2; + case 3: + return &npyiter_buffered_reduce_iternext_iters3; + case 4: + return &npyiter_buffered_reduce_iternext_iters4; + default: + return &npyiter_buffered_reduce_iternext_itersANY; + } + } + else { + return &npyiter_buffered_iternext; + } + } + + /* + * Ignore all the flags that don't affect the iterator memory + * layout or the iternext function. Currently only HASINDEX, + * EXLOOP, and RANGE affect them here. + */ + itflags &= (NPY_ITFLAG_HASINDEX|NPY_ITFLAG_EXLOOP|NPY_ITFLAG_RANGE); + + /* Switch statements let the compiler optimize this most effectively */ + switch (itflags) { + /* + * The combinations HASINDEX|EXLOOP and RANGE|EXLOOP are excluded + * by the New functions + */ +/**begin repeat + * #const_itflags = 0, + * NPY_ITFLAG_HASINDEX, + * NPY_ITFLAG_EXLOOP, + * NPY_ITFLAG_RANGE, + * NPY_ITFLAG_RANGE|NPY_ITFLAG_HASINDEX# + * #tag_itflags = 0, IND, NOINN, RNG, RNGuIND# + */ + case @const_itflags@: + switch (ndim) { +/**begin repeat1 + * #const_ndim = 1, 2# + * #tag_ndim = 1, 2# + */ + case @const_ndim@: + switch (nop) { +/**begin repeat2 + * #const_nop = 1, 2# + * #tag_nop = 1, 2# + */ + case @const_nop@: + return &npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_iters@tag_nop@; +/**end repeat2**/ + /* Not specialized on nop */ + default: + return &npyiter_iternext_itflags@tag_itflags@_dims@tag_ndim@_itersANY; + } +/**end repeat1**/ + /* Not specialized on ndim */ + default: + switch (nop) { +/**begin repeat1 + * #const_nop = 1, 2# + * #tag_nop = 1, 2# + */ + case @const_nop@: + return &npyiter_iternext_itflags@tag_itflags@_dimsANY_iters@tag_nop@; +/**end repeat1**/ + /* Not specialized on nop */ + default: + return &npyiter_iternext_itflags@tag_itflags@_dimsANY_itersANY; + } + } +/**end repeat**/ + } + /* The switch above should have caught all the possibilities. */ + if (errmsg == NULL) { + PyErr_Format(PyExc_ValueError, + "GetIterNext internal iterator error - unexpected " + "itflags/ndim/nop combination (%04x/%d/%d)", + (int)itflags, (int)ndim, (int)nop); + } + else { + *errmsg = "GetIterNext internal iterator error - unexpected " + "itflags/ndim/nop combination"; + } + return NULL; +} + + +/* SPECIALIZED getindex functions */ + +/**begin repeat + * #const_itflags = 0, + * NPY_ITFLAG_HASINDEX, + * NPY_ITFLAG_IDENTPERM, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM, + * NPY_ITFLAG_NEGPERM, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM, + * NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER# + * #tag_itflags = 0, IND, IDP, INDuIDP, NEGP, INDuNEGP, + * BUF, INDuBUF, IDPuBUF, INDuIDPuBUF, NEGPuBUF, INDuNEGPuBUF# + */ +static void +npyiter_get_multi_index_itflags@tag_itflags@( + NpyIter *iter, npy_intp *out_multi_index) +{ + const npy_uint32 itflags = @const_itflags@; + int idim, ndim = NIT_NDIM(iter); + int nop = NIT_NOP(iter); + + npy_intp sizeof_axisdata; + NpyIter_AxisData *axisdata; +#if !((@const_itflags@)&NPY_ITFLAG_IDENTPERM) + npy_int8 *perm = NIT_PERM(iter); +#endif + + axisdata = NIT_AXISDATA(iter); + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); +#if ((@const_itflags@)&NPY_ITFLAG_IDENTPERM) + out_multi_index += ndim-1; + for(idim = 0; idim < ndim; ++idim, --out_multi_index, + NIT_ADVANCE_AXISDATA(axisdata, 1)) { + *out_multi_index = NAD_INDEX(axisdata); + } +#elif !((@const_itflags@)&NPY_ITFLAG_NEGPERM) + for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + npy_int8 p = perm[idim]; + out_multi_index[ndim-p-1] = NAD_INDEX(axisdata); + } +#else + for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + npy_int8 p = perm[idim]; + if (p < 0) { + /* If the perm entry is negative, reverse the index */ + out_multi_index[ndim+p] = NAD_SHAPE(axisdata) - NAD_INDEX(axisdata) - 1; + } + else { + out_multi_index[ndim-p-1] = NAD_INDEX(axisdata); + } + } +#endif /* not ident perm */ +} +/**end repeat**/ + +/*NUMPY_API + * Compute a specialized get_multi_index function for the iterator + * + * If errmsg is non-NULL, it should point to a variable which will + * receive the error message, and no Python exception will be set. + * This is so that the function can be called from code not holding + * the GIL. + */ +NPY_NO_EXPORT NpyIter_GetMultiIndexFunc * +NpyIter_GetGetMultiIndex(NpyIter *iter, char **errmsg) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + int ndim = NIT_NDIM(iter); + int nop = NIT_NOP(iter); + + /* These flags must be correct */ + if ((itflags&(NPY_ITFLAG_HASMULTIINDEX|NPY_ITFLAG_DELAYBUF)) != + NPY_ITFLAG_HASMULTIINDEX) { + if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) { + if (errmsg == NULL) { + PyErr_SetString(PyExc_ValueError, + "Cannot retrieve a GetMultiIndex function for an " + "iterator that doesn't track a multi-index."); + } + else { + *errmsg = "Cannot retrieve a GetMultiIndex function for an " + "iterator that doesn't track a multi-index."; + } + return NULL; + } + else { + if (errmsg == NULL) { + PyErr_SetString(PyExc_ValueError, + "Cannot retrieve a GetMultiIndex function for an " + "iterator that used DELAY_BUFALLOC before a Reset call"); + } + else { + *errmsg = "Cannot retrieve a GetMultiIndex function for an " + "iterator that used DELAY_BUFALLOC before a " + "Reset call"; + } + return NULL; + } + } + + /* + * Only these flags affect the iterator memory layout or + * the get_multi_index behavior. IDENTPERM and NEGPERM are mutually + * exclusive, so that reduces the number of cases slightly. + */ + itflags &= (NPY_ITFLAG_HASINDEX | + NPY_ITFLAG_IDENTPERM | + NPY_ITFLAG_NEGPERM | + NPY_ITFLAG_BUFFER); + + switch (itflags) { +/**begin repeat + * #const_itflags = 0, + * NPY_ITFLAG_HASINDEX, + * NPY_ITFLAG_IDENTPERM, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM, + * NPY_ITFLAG_NEGPERM, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM, + * NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER, + * NPY_ITFLAG_HASINDEX|NPY_ITFLAG_NEGPERM|NPY_ITFLAG_BUFFER# + * #tag_itflags = 0, IND, IDP, INDuIDP, NEGP, INDuNEGP, + * BUF, INDuBUF, IDPuBUF, INDuIDPuBUF, NEGPuBUF, INDuNEGPuBUF# + */ + case @const_itflags@: + return npyiter_get_multi_index_itflags@tag_itflags@; +/**end repeat**/ + } + /* The switch above should have caught all the possibilities. */ + if (errmsg == NULL) { + PyErr_Format(PyExc_ValueError, + "GetGetMultiIndex internal iterator error - unexpected " + "itflags/ndim/nop combination (%04x/%d/%d)", + (int)itflags, (int)ndim, (int)nop); + } + else { + *errmsg = "GetGetMultiIndex internal iterator error - unexpected " + "itflags/ndim/nop combination"; + } + return NULL; + +} + +#undef NPY_ITERATOR_IMPLEMENTATION_CODE diff --git a/contrib/python/numpy/py2/numpy/core/src/multiarray/refcount.h b/contrib/python/numpy/py2/numpy/core/src/multiarray/refcount.h new file mode 100644 index 0000000000..761d53dd0d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/multiarray/refcount.h @@ -0,0 +1,19 @@ +#ifndef _NPY_PRIVATE_REFCOUNT_H_ +#define _NPY_PRIVATE_REFCOUNT_H_ + +NPY_NO_EXPORT void +PyArray_Item_INCREF(char *data, PyArray_Descr *descr); + +NPY_NO_EXPORT void +PyArray_Item_XDECREF(char *data, PyArray_Descr *descr); + +NPY_NO_EXPORT int +PyArray_INCREF(PyArrayObject *mp); + +NPY_NO_EXPORT int +PyArray_XDECREF(PyArrayObject *mp); + +NPY_NO_EXPORT void +PyArray_FillObjectArray(PyArrayObject *arr, PyObject *obj); + +#endif diff --git a/contrib/python/numpy/py2/numpy/core/src/multiarray/scalartypes.c.src b/contrib/python/numpy/py2/numpy/core/src/multiarray/scalartypes.c.src new file mode 100644 index 0000000000..52de31289d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/multiarray/scalartypes.c.src @@ -0,0 +1,4496 @@ +/* -*- c -*- */ +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#ifndef _MULTIARRAYMODULE +#define _MULTIARRAYMODULE +#endif + +#include "numpy/arrayobject.h" +#include "numpy/npy_math.h" +#include "numpy/halffloat.h" +#include "numpy/arrayscalars.h" + +#include "npy_pycompat.h" + +#include "npy_config.h" +#include "mapping.h" +#include "ctors.h" +#include "usertypes.h" +#include "numpyos.h" +#include "common.h" +#include "scalartypes.h" +#include "_datetime.h" +#include "datetime_strings.h" +#include "alloc.h" +#include "npy_import.h" +#include "dragon4.h" +#include "npy_longdouble.h" +#include "buffer.h" + +#include <stdlib.h> + +#include "binop_override.h" + +NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[] = { + {PyObject_HEAD_INIT(&PyBoolArrType_Type) 0}, + {PyObject_HEAD_INIT(&PyBoolArrType_Type) 1}, +}; + +/* TimeInteger is deleted, but still here to fill the API slot */ +NPY_NO_EXPORT PyTypeObject PyTimeIntegerArrType_Type; + +/* + * Inheritance is established later when tp_bases is set (or tp_base for + * single inheritance) + */ + +/**begin repeat + * #name = number, integer, signedinteger, unsignedinteger, inexact, + * floating, complexfloating, flexible, character# + * #NAME = Number, Integer, SignedInteger, UnsignedInteger, Inexact, + * Floating, ComplexFloating, Flexible, Character# + */ +NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = { +#if defined(NPY_PY3K) + PyVarObject_HEAD_INIT(NULL, 0) +#else + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ +#endif + "numpy.@name@", /* tp_name*/ + sizeof(PyObject), /* tp_basicsize*/ + 0, /* tp_itemsize */ + /* methods */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ +#if defined(NPY_PY3K) + 0, /* tp_reserved */ +#else + 0, /* tp_compare */ +#endif + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + 0, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ +}; +/**end repeat**/ + +static PyObject * +gentype_alloc(PyTypeObject *type, Py_ssize_t nitems) +{ + PyObject *obj; + const size_t size = _PyObject_VAR_SIZE(type, nitems + 1); + + obj = (PyObject *)PyObject_Malloc(size); + /* + * Fixme. Need to check for no memory. + * If we don't need to zero memory, we could use + * PyObject_{New, NewVar} for this whole function. + */ + memset(obj, 0, size); + if (type->tp_itemsize == 0) { + PyObject_Init(obj, type); + } + else { + (void) PyObject_InitVar((PyVarObject *)obj, type, nitems); + } + return obj; +} + +static void +gentype_dealloc(PyObject *v) +{ + _dealloc_cached_buffer_info(v); + Py_TYPE(v)->tp_free(v); +} + +static void +gentype_free(PyObject *v) +{ + /* + * have an explicit tp_free to enforce inheritance from it. + * PyObject_Free is also the tp_free of PyBaseObject so python does not + * COPYSLOT it, instead it takes the next parent PyInt which has a + * different allocator + */ + PyObject_Free(v); +} + + +static PyObject * +gentype_power(PyObject *m1, PyObject *m2, PyObject *modulo) +{ + if (modulo != Py_None) { + /* modular exponentiation is not implemented (gh-8804) */ + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + BINOP_GIVE_UP_IF_NEEDED(m1, m2, nb_power, gentype_power); + return PyArray_Type.tp_as_number->nb_power(m1, m2, Py_None); +} + +static PyObject * +gentype_generic_method(PyObject *self, PyObject *args, PyObject *kwds, + char *str) +{ + PyObject *arr, *meth, *ret; + + arr = PyArray_FromScalar(self, NULL); + if (arr == NULL) { + return NULL; + } + meth = PyObject_GetAttrString(arr, str); + if (meth == NULL) { + Py_DECREF(arr); + return NULL; + } + if (kwds == NULL) { + ret = PyObject_CallObject(meth, args); + } + else { + ret = PyObject_Call(meth, args, kwds); + } + Py_DECREF(meth); + Py_DECREF(arr); + if (ret && PyArray_Check(ret)) { + return PyArray_Return((PyArrayObject *)ret); + } + else { + return ret; + } +} + +static PyObject * +gentype_add(PyObject *m1, PyObject* m2) +{ + /* special case str.__radd__, which should not call array_add */ + if (PyString_Check(m1) || PyUnicode_Check(m1)) { + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + BINOP_GIVE_UP_IF_NEEDED(m1, m2, nb_add, gentype_add); + return PyArray_Type.tp_as_number->nb_add(m1, m2); +} + +/**begin repeat + * + * #name = subtract, remainder, divmod, lshift, rshift, + * and, xor, or, floor_divide, true_divide# + */ +static PyObject * +gentype_@name@(PyObject *m1, PyObject *m2) +{ + BINOP_GIVE_UP_IF_NEEDED(m1, m2, nb_@name@, gentype_@name@); + return PyArray_Type.tp_as_number->nb_@name@(m1, m2); +} + +/**end repeat**/ + +#if !defined(NPY_PY3K) +/**begin repeat + * + * #name = divide# + */ +static PyObject * +gentype_@name@(PyObject *m1, PyObject *m2) +{ + BINOP_GIVE_UP_IF_NEEDED(m1, m2, nb_@name@, gentype_@name@); + return PyArray_Type.tp_as_number->nb_@name@(m1, m2); +} +/**end repeat**/ +#endif + +/* Get a nested slot, or NULL if absent */ +#define GET_NESTED_SLOT(type, group, slot) \ + ((type)->group == NULL ? NULL : (type)->group->slot) + +static PyObject * +gentype_multiply(PyObject *m1, PyObject *m2) +{ + /* + * If the other object supports sequence repeat and not number multiply + * we fall back on the python builtin to invoke the sequence repeat, rather + * than promoting both arguments to ndarray. + * This covers a list repeat by numpy scalars. + * A python defined class will always only have the nb_multiply slot and + * some classes may have neither defined. For the latter we want need + * to give the normal case a chance to convert the object to ndarray. + * Probably no class has both defined, but if they do, prefer number. + */ + if (!PyArray_IsScalar(m1, Generic) && + GET_NESTED_SLOT(Py_TYPE(m1), tp_as_sequence, sq_repeat) != NULL && + GET_NESTED_SLOT(Py_TYPE(m1), tp_as_number, nb_multiply) == NULL) { + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + if (!PyArray_IsScalar(m2, Generic) && + GET_NESTED_SLOT(Py_TYPE(m2), tp_as_sequence, sq_repeat) != NULL && + GET_NESTED_SLOT(Py_TYPE(m2), tp_as_number, nb_multiply) == NULL) { + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + /* All normal cases are handled by PyArray's multiply */ + BINOP_GIVE_UP_IF_NEEDED(m1, m2, nb_multiply, gentype_multiply); + return PyArray_Type.tp_as_number->nb_multiply(m1, m2); +} + +/**begin repeat + * + * #name = positive, negative, absolute, invert, int, float# + */ +static PyObject * +gentype_@name@(PyObject *m1) +{ + PyObject *arr, *ret; + + arr = PyArray_FromScalar(m1, NULL); + if (arr == NULL) { + return NULL; + } + ret = Py_TYPE(arr)->tp_as_number->nb_@name@(arr); + Py_DECREF(arr); + return ret; +} +/**end repeat**/ + +#if !defined(NPY_PY3K) +/**begin repeat + * + * #name = long, oct, hex# + */ +static PyObject * +gentype_@name@(PyObject *m1) +{ + PyObject *arr, *ret; + + arr = PyArray_FromScalar(m1, NULL); + if (arr == NULL) { + return NULL; + } + ret = Py_TYPE(arr)->tp_as_number->nb_@name@(arr); + Py_DECREF(arr); + return ret; +} +/**end repeat**/ +#endif + +static int +gentype_nonzero_number(PyObject *m1) +{ + PyObject *arr; + int ret; + + arr = PyArray_FromScalar(m1, NULL); + if (arr == NULL) { + return -1; + } +#if defined(NPY_PY3K) + ret = Py_TYPE(arr)->tp_as_number->nb_bool(arr); +#else + ret = Py_TYPE(arr)->tp_as_number->nb_nonzero(arr); +#endif + Py_DECREF(arr); + return ret; +} + +static PyObject * +genint_type_str(PyObject *self) +{ + PyObject *item, *item_str; + item = gentype_generic_method(self, NULL, NULL, "item"); + if (item == NULL) { + return NULL; + } + + item_str = PyObject_Str(item); + Py_DECREF(item); + return item_str; +} + +/* + * The __format__ method for PEP 3101. + */ +static PyObject * +gentype_format(PyObject *self, PyObject *args) +{ + PyObject *format_spec; + PyObject *obj, *ret; + +#if defined(NPY_PY3K) + if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) { + return NULL; + } +#else + if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) { + return NULL; + } + + if (!PyUnicode_Check(format_spec) && !PyString_Check(format_spec)) { + PyErr_SetString(PyExc_TypeError, + "format must be a string"); + return NULL; + } +#endif + + /* + * Convert to an appropriate Python type and call its format. + * TODO: For some types, like long double, this isn't right, + * because it throws away precision. + */ + if (Py_TYPE(self) == &PyBoolArrType_Type) { + obj = PyBool_FromLong(((PyBoolScalarObject *)self)->obval); + } + else if (PyArray_IsScalar(self, Integer)) { +#if defined(NPY_PY3K) + obj = Py_TYPE(self)->tp_as_number->nb_int(self); +#else + obj = Py_TYPE(self)->tp_as_number->nb_long(self); +#endif + } + else if (PyArray_IsScalar(self, Floating)) { + obj = Py_TYPE(self)->tp_as_number->nb_float(self); + } + else if (PyArray_IsScalar(self, ComplexFloating)) { + double val[2]; + PyArray_Descr *dtype = PyArray_DescrFromScalar(self); + + if (dtype == NULL) { + return NULL; + } + if (PyArray_CastScalarDirect(self, dtype, &val[0], NPY_CDOUBLE) < 0) { + Py_DECREF(dtype); + return NULL; + } + obj = PyComplex_FromDoubles(val[0], val[1]); + Py_DECREF(dtype); + } + else { + obj = PyObject_Str(self); + } + + if (obj == NULL) { + return NULL; + } + + ret = PyObject_Format(obj, format_spec); + Py_DECREF(obj); + return ret; +} + +#ifdef FORCE_NO_LONG_DOUBLE_FORMATTING +#undef NPY_LONGDOUBLE_FMT +#define NPY_LONGDOUBLE_FMT NPY_DOUBLE_FMT +#endif + +/**begin repeat + * #name = half, float, double, longdouble# + * #Name = Half, Float, Double, LongDouble# + * #NAME = HALF, FLOAT, DOUBLE, LONGDOUBLE# + * #type = npy_half, npy_float, npy_double, npy_longdouble# + * #suff = h, f, d, l# + */ + +NPY_NO_EXPORT PyObject * +format_@name@(@type@ val, npy_bool scientific, + int precision, int sign, TrimMode trim, + int pad_left, int pad_right, int exp_digits) +{ + if (scientific) { + return Dragon4_Scientific_@Name@(&val, + DigitMode_Unique, precision, + sign, trim, pad_left, exp_digits); + } + else { + return Dragon4_Positional_@Name@(&val, + DigitMode_Unique, CutoffMode_TotalLength, precision, + sign, trim, pad_left, pad_right); + } +} + + +/**end repeat**/ + +/* + * over-ride repr and str of array-scalar strings and unicode to + * remove NULL bytes and then call the corresponding functions + * of string and unicode. + */ + +/**begin repeat + * #name = string*2,unicode*2# + * #form = (repr,str)*2# + * #Name = String*2,Unicode*2# + * #NAME = STRING*2,UNICODE*2# + * #extra = AndSize*2,,# + * #type = npy_char*2, Py_UNICODE*2# + */ +static PyObject * +@name@type_@form@(PyObject *self) +{ + const @type@ *dptr, *ip; + int len; + PyObject *new; + PyObject *ret; + + ip = dptr = Py@Name@_AS_@NAME@(self); + len = Py@Name@_GET_SIZE(self); + dptr += len-1; + while(len > 0 && *dptr-- == 0) { + len--; + } + new = Py@Name@_From@Name@@extra@(ip, len); + if (new == NULL) { + return PyUString_FromString(""); + } + ret = Py@Name@_Type.tp_@form@(new); + Py_DECREF(new); + return ret; +} +/**end repeat**/ + + +/* + * Convert array of bytes to a string representation much like bytes.__repr__, + * but convert all bytes (including ASCII) to the `\x00` notation with + * uppercase hex codes (FF not ff). + * + * Largely copied from _Py_strhex_impl in CPython implementation + */ +static NPY_INLINE PyObject * +_void_to_hex(const char* argbuf, const Py_ssize_t arglen, + const char *schars, const char *bprefix, const char *echars) +{ + PyObject *retval; + int extrachars, slen; + char *retbuf; + Py_ssize_t i, j; + char const *hexdigits = "0123456789ABCDEF"; + + extrachars = strlen(schars) + strlen(echars); + slen = extrachars + arglen*(2 + strlen(bprefix)); + + if (arglen > (PY_SSIZE_T_MAX / 2) - extrachars) { + return PyErr_NoMemory(); + } + + retbuf = (char *)PyMem_Malloc(slen); + if (!retbuf) { + return PyErr_NoMemory(); + } + + memcpy(retbuf, schars, strlen(schars)); + j = strlen(schars); + + for (i = 0; i < arglen; i++) { + unsigned char c; + memcpy(&retbuf[j], bprefix, strlen(bprefix)); + j += strlen(bprefix); + c = (argbuf[i] >> 4) & 0xf; + retbuf[j++] = hexdigits[c]; + c = argbuf[i] & 0xf; + retbuf[j++] = hexdigits[c]; + } + memcpy(&retbuf[j], echars, strlen(echars)); + + retval = PyUString_FromStringAndSize(retbuf, slen); + PyMem_Free(retbuf); + + return retval; +} + +static PyObject * +_void_scalar_repr(PyObject *obj) { + static PyObject *reprfunc = NULL; + npy_cache_import("numpy.core.arrayprint", + "_void_scalar_repr", &reprfunc); + if (reprfunc == NULL) { + return NULL; + } + return PyObject_CallFunction(reprfunc, "O", obj); +} + +static PyObject * +voidtype_repr(PyObject *self) +{ + PyVoidScalarObject *s = (PyVoidScalarObject*) self; + if (PyDataType_HASFIELDS(s->descr)) { + return _void_scalar_repr(self); + } + return _void_to_hex(s->obval, s->descr->elsize, "void(b'", "\\x", "')"); +} + +static PyObject * +voidtype_str(PyObject *self) +{ + PyVoidScalarObject *s = (PyVoidScalarObject*) self; + if (PyDataType_HASFIELDS(s->descr)) { + return _void_scalar_repr(self); + } + return _void_to_hex(s->obval, s->descr->elsize, "b'", "\\x", "'"); +} + +static PyObject * +datetimetype_repr(PyObject *self) +{ + PyDatetimeScalarObject *scal; + npy_datetimestruct dts; + PyObject *ret; + char iso[NPY_DATETIME_MAX_ISO8601_STRLEN]; + NPY_DATETIMEUNIT unit; + + if (!PyArray_IsScalar(self, Datetime)) { + PyErr_SetString(PyExc_RuntimeError, + "Called NumPy datetime repr on a non-datetime type"); + return NULL; + } + + scal = (PyDatetimeScalarObject *)self; + + if (convert_datetime_to_datetimestruct(&scal->obmeta, + scal->obval, &dts) < 0) { + return NULL; + } + + unit = scal->obmeta.base; + if (make_iso_8601_datetime(&dts, iso, sizeof(iso), 0, 0, + unit, -1, NPY_SAFE_CASTING) < 0) { + return NULL; + } + + /* + * For straight units or generic units, the unit will be deduced + * from the string, so it's not necessary to specify it. + */ + if ((scal->obmeta.num == 1 && scal->obmeta.base != NPY_FR_h) || + scal->obmeta.base == NPY_FR_GENERIC) { + ret = PyUString_FromString("numpy.datetime64('"); + PyUString_ConcatAndDel(&ret, + PyUString_FromString(iso)); + PyUString_ConcatAndDel(&ret, + PyUString_FromString("')")); + } + else { + ret = PyUString_FromString("numpy.datetime64('"); + PyUString_ConcatAndDel(&ret, + PyUString_FromString(iso)); + PyUString_ConcatAndDel(&ret, + PyUString_FromString("','")); + ret = append_metastr_to_string(&scal->obmeta, 1, ret); + PyUString_ConcatAndDel(&ret, + PyUString_FromString("')")); + } + + return ret; +} + +static PyObject * +timedeltatype_repr(PyObject *self) +{ + PyTimedeltaScalarObject *scal; + PyObject *ret; + + if (!PyArray_IsScalar(self, Timedelta)) { + PyErr_SetString(PyExc_RuntimeError, + "Called NumPy timedelta repr on a non-datetime type"); + return NULL; + } + + scal = (PyTimedeltaScalarObject *)self; + + /* The value */ + if (scal->obval == NPY_DATETIME_NAT) { + ret = PyUString_FromString("numpy.timedelta64('NaT'"); + } + else { + /* + * Can't use "%lld" if HAVE_LONG_LONG is not defined + */ +#if defined(HAVE_LONG_LONG) + ret = PyUString_FromFormat("numpy.timedelta64(%lld", + (long long)scal->obval); +#else + ret = PyUString_FromFormat("numpy.timedelta64(%ld", + (long)scal->obval); +#endif + } + /* The metadata unit */ + if (scal->obmeta.base == NPY_FR_GENERIC) { + PyUString_ConcatAndDel(&ret, + PyUString_FromString(")")); + } + else { + PyUString_ConcatAndDel(&ret, + PyUString_FromString(",'")); + ret = append_metastr_to_string(&scal->obmeta, 1, ret); + PyUString_ConcatAndDel(&ret, + PyUString_FromString("')")); + } + + return ret; +} + +static PyObject * +datetimetype_str(PyObject *self) +{ + PyDatetimeScalarObject *scal; + npy_datetimestruct dts; + char iso[NPY_DATETIME_MAX_ISO8601_STRLEN]; + NPY_DATETIMEUNIT unit; + + if (!PyArray_IsScalar(self, Datetime)) { + PyErr_SetString(PyExc_RuntimeError, + "Called NumPy datetime str on a non-datetime type"); + return NULL; + } + + scal = (PyDatetimeScalarObject *)self; + + if (convert_datetime_to_datetimestruct(&scal->obmeta, scal->obval, + &dts) < 0) { + return NULL; + } + + unit = scal->obmeta.base; + if (make_iso_8601_datetime(&dts, iso, sizeof(iso), 0, 0, + unit, -1, NPY_SAFE_CASTING) < 0) { + return NULL; + } + + return PyUString_FromString(iso); +} + +static char *_datetime_verbose_strings[NPY_DATETIME_NUMUNITS] = { + "years", + "months", + "weeks", + "<invalid>", + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", + "picoseconds", + "femtoseconds", + "attoseconds", + "generic time units" +}; + +static PyObject * +timedeltatype_str(PyObject *self) +{ + PyTimedeltaScalarObject *scal; + PyObject *ret; + char *basestr = "invalid"; + + if (!PyArray_IsScalar(self, Timedelta)) { + PyErr_SetString(PyExc_RuntimeError, + "Called NumPy timedelta str on a non-datetime type"); + return NULL; + } + + scal = (PyTimedeltaScalarObject *)self; + + if (scal->obmeta.base >= 0 && scal->obmeta.base < NPY_DATETIME_NUMUNITS) { + basestr = _datetime_verbose_strings[scal->obmeta.base]; + } + else { + PyErr_SetString(PyExc_RuntimeError, + "NumPy datetime metadata is corrupted"); + return NULL; + } + + if (scal->obval == NPY_DATETIME_NAT) { + ret = PyUString_FromString("NaT"); + } + else { + /* + * Can't use "%lld" if HAVE_LONG_LONG is not defined + */ +#if defined(HAVE_LONG_LONG) + ret = PyUString_FromFormat("%lld ", + (long long)(scal->obval * scal->obmeta.num)); +#else + ret = PyUString_FromFormat("%ld ", + (long)(scal->obval * scal->obmeta.num)); +#endif + PyUString_ConcatAndDel(&ret, + PyUString_FromString(basestr)); + } + + return ret; +} + +/* + * float type str and repr + * + * These functions will return NULL if PyString creation fails. + */ + + +/* + * *** BEGIN LEGACY PRINTING MODE CODE *** + * + * This code is legacy code needed to reproduce the printing behavior of + * scalars in numpy 1.13. One day we hope to remove it. + */ + +/* determines if legacy mode is enabled, global set in multiarraymodule.c */ +extern int npy_legacy_print_mode; + +#define HALFPREC_REPR 5 +#define HALFPREC_STR 5 +#define FLOATPREC_REPR 8 +#define FLOATPREC_STR 6 +#define DOUBLEPREC_REPR 17 +#define DOUBLEPREC_STR 12 +#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE +#define LONGDOUBLEPREC_REPR DOUBLEPREC_REPR +#define LONGDOUBLEPREC_STR DOUBLEPREC_STR +#else /* More than probably needed on Intel FP */ +#define LONGDOUBLEPREC_REPR 20 +#define LONGDOUBLEPREC_STR 12 +#endif + +/**begin repeat + * #kind = str, repr# + * #KIND = STR, REPR# + */ + +/**begin repeat1 + * #name = cfloat, cdouble, clongdouble# + * #NAME = FLOAT, DOUBLE, LONGDOUBLE# + * #type = npy_cfloat, npy_cdouble, npy_clongdouble# + * #suff = f, d, l# + */ + +#define _FMT1 "%%.%i" NPY_@NAME@_FMT +#define _FMT2 "%%+.%i" NPY_@NAME@_FMT + +static PyObject* +legacy_@name@_format@kind@(@type@ val) +{ + /* XXX: Find a correct size here for format string */ + char format[64], buf[100], *res; + + /* + * Ideally, we should handle this nan/inf stuff in NumpyOS_ascii_format* + */ + if (val.real == 0.0 && npy_signbit(val.real) == 0) { + PyOS_snprintf(format, sizeof(format), _FMT1, @NAME@PREC_@KIND@); + res = NumPyOS_ascii_format@suff@(buf, sizeof(buf) - 1, format, val.imag, 0); + if (res == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Error while formatting"); + return NULL; + } + if (!npy_isfinite(val.imag)) { + strncat(buf, "*", 1); + } + strncat(buf, "j", 1); + } + else { + char re[64], im[64]; + if (npy_isfinite(val.real)) { + PyOS_snprintf(format, sizeof(format), _FMT1, @NAME@PREC_@KIND@); + res = NumPyOS_ascii_format@suff@(re, sizeof(re), format, + val.real, 0); + if (res == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Error while formatting"); + return NULL; + } + } + else { + if (npy_isnan(val.real)) { + strcpy(re, "nan"); + } + else if (val.real > 0){ + strcpy(re, "inf"); + } + else { + strcpy(re, "-inf"); + } + } + + + if (npy_isfinite(val.imag)) { + PyOS_snprintf(format, sizeof(format), _FMT2, @NAME@PREC_@KIND@); + res = NumPyOS_ascii_format@suff@(im, sizeof(im), format, + val.imag, 0); + if (res == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Error while formatting"); + return NULL; + } + } + else { + if (npy_isnan(val.imag)) { + strcpy(im, "+nan"); + } + else if (val.imag > 0){ + strcpy(im, "+inf"); + } + else { + strcpy(im, "-inf"); + } + if (!npy_isfinite(val.imag)) { + strncat(im, "*", 1); + } + } + PyOS_snprintf(buf, sizeof(buf), "(%s%sj)", re, im); + } + + return PyUString_FromString(buf); +} + +#undef _FMT1 +#undef _FMT2 + +/**end repeat1**/ + +/**begin repeat1 + * #name = float, double, longdouble# + * #Name = Float, Double, LongDouble# + * #NAME = FLOAT, DOUBLE, LONGDOUBLE# + * #suff = f, d, l# + */ + +#define _FMT1 "%%.%i" NPY_@NAME@_FMT + +static PyObject * +legacy_@name@_format@kind@(npy_@name@ val){ + /* XXX: Find a correct size here for format string */ + char format[64], buf[100], *res; + size_t i, cnt; + + PyOS_snprintf(format, sizeof(format), _FMT1, @NAME@PREC_@KIND@); + res = NumPyOS_ascii_format@suff@(buf, sizeof(buf), format, val, 0); + if (res == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Error while formatting"); + return NULL; + } + + /* If nothing but digits after sign, append ".0" */ + cnt = strlen(buf); + for (i = (buf[0] == '-') ? 1 : 0; i < cnt; ++i) { + if (!isdigit(Py_CHARMASK(buf[i]))) { + break; + } + } + if (i == cnt && sizeof(buf) >= cnt + 3) { + strcpy(&buf[cnt],".0"); + } + + return PyUString_FromString(buf); +} + +#undef _FMT1 + +/**end repeat1**/ + +/**end repeat**/ + + +/* + * *** END LEGACY PRINTING MODE CODE *** + */ + + +/**begin repeat + * #kind = str, repr# + */ + +/**begin repeat1 + * #name = float, double, longdouble# + * #Name = Float, Double, LongDouble# + * #NAME = FLOAT, DOUBLE, LONGDOUBLE# + */ + +/* helper function choose scientific of fractional output, based on a cutoff */ +static PyObject * +@name@type_@kind@_either(npy_@name@ val, TrimMode trim_pos, TrimMode trim_sci, + npy_bool sign) +{ + npy_@name@ absval; + + if (npy_legacy_print_mode == 113) { + return legacy_@name@_format@kind@(val); + } + + absval = val < 0 ? -val : val; + + if (absval == 0 || (absval < 1.e16L && absval >= 1.e-4L) ) { + return format_@name@(val, 0, -1, sign, trim_pos, -1, -1, -1); + } + return format_@name@(val, 1, -1, sign, trim_sci, -1, -1, -1); +} + +static PyObject * +@name@type_@kind@(PyObject *self) +{ + return @name@type_@kind@_either(((Py@Name@ScalarObject *)self)->obval, + TrimMode_LeaveOneZero, TrimMode_DptZeros, 0); +} + +static PyObject * +c@name@type_@kind@(PyObject *self) +{ + PyObject *rstr, *istr, *ret; + npy_c@name@ val = ((PyC@Name@ScalarObject *)self)->obval; + TrimMode trim = TrimMode_DptZeros; + + if (npy_legacy_print_mode == 113) { + return legacy_c@name@_format@kind@(val); + } + + if (val.real == 0.0 && npy_signbit(val.real) == 0) { + istr = @name@type_@kind@_either(val.imag, trim, trim, 0); + if (istr == NULL) { + return NULL; + } + + PyUString_ConcatAndDel(&istr, PyUString_FromString("j")); + return istr; + } + + if (npy_isfinite(val.real)) { + rstr = @name@type_@kind@_either(val.real, trim, trim, 0); + if (rstr == NULL) { + return NULL; + } + } + else if (npy_isnan(val.real)) { + rstr = PyUString_FromString("nan"); + } + else if (val.real > 0){ + rstr = PyUString_FromString("inf"); + } + else { + rstr = PyUString_FromString("-inf"); + } + + if (npy_isfinite(val.imag)) { + istr = @name@type_@kind@_either(val.imag, trim, trim, 1); + if (istr == NULL) { + return NULL; + } + } + else if (npy_isnan(val.imag)) { + istr = PyUString_FromString("+nan"); + } + else if (val.imag > 0){ + istr = PyUString_FromString("+inf"); + } + else { + istr = PyUString_FromString("-inf"); + } + + ret = PyUString_FromString("("); + PyUString_ConcatAndDel(&ret, rstr); + PyUString_ConcatAndDel(&ret, istr); + PyUString_ConcatAndDel(&ret, PyUString_FromString("j)")); + return ret; +} + +#undef PREC + +/**end repeat1**/ + + +static PyObject * +halftype_@kind@(PyObject *self) +{ + npy_half val = ((PyHalfScalarObject *)self)->obval; + float floatval = npy_half_to_float(val); + float absval; + + if (npy_legacy_print_mode == 113) { + return legacy_float_format@kind@(floatval); + } + + absval = floatval < 0 ? -floatval : floatval; + + if (absval == 0 || (absval < 1.e16 && absval >= 1.e-4) ) { + return format_half(val, 0, -1, 0, TrimMode_LeaveOneZero, -1, -1, -1); + } + return format_half(val, 1, -1, 0, TrimMode_DptZeros, -1, -1, -1); +} + + +/**end repeat**/ + +/**begin repeat + * #char = ,c# + * #CHAR = ,C# + * #POST = ,.real# + */ +static PyObject * +@char@longdoubletype_float(PyObject *self) +{ + npy_longdouble val = PyArrayScalar_VAL(self, @CHAR@LongDouble)@POST@; + return PyFloat_FromDouble((double) val); +} + +static PyObject * +@char@longdoubletype_long(PyObject *self) +{ + npy_longdouble val = PyArrayScalar_VAL(self, @CHAR@LongDouble)@POST@; + return npy_longdouble_to_PyLong(val); +} + +#if !defined(NPY_PY3K) + +/**begin repeat1 + * #name = int, hex, oct# + */ +static PyObject * +@char@longdoubletype_@name@(PyObject *self) +{ + PyObject *ret; + PyObject *obj = @char@longdoubletype_long(self); + if (obj == NULL) { + return NULL; + } + ret = Py_TYPE(obj)->tp_as_number->nb_@name@(obj); + Py_DECREF(obj); + return ret; +} +/**end repeat1**/ + +#endif /* !defined(NPY_PY3K) */ + +/**end repeat**/ + +static PyNumberMethods gentype_as_number = { + (binaryfunc)gentype_add, /*nb_add*/ + (binaryfunc)gentype_subtract, /*nb_subtract*/ + (binaryfunc)gentype_multiply, /*nb_multiply*/ +#if !defined(NPY_PY3K) + (binaryfunc)gentype_divide, /*nb_divide*/ +#endif + (binaryfunc)gentype_remainder, /*nb_remainder*/ + (binaryfunc)gentype_divmod, /*nb_divmod*/ + (ternaryfunc)gentype_power, /*nb_power*/ + (unaryfunc)gentype_negative, + (unaryfunc)gentype_positive, /*nb_pos*/ + (unaryfunc)gentype_absolute, /*(unaryfunc)gentype_abs,*/ + (inquiry)gentype_nonzero_number, /*nb_nonzero*/ + (unaryfunc)gentype_invert, /*nb_invert*/ + (binaryfunc)gentype_lshift, /*nb_lshift*/ + (binaryfunc)gentype_rshift, /*nb_rshift*/ + (binaryfunc)gentype_and, /*nb_and*/ + (binaryfunc)gentype_xor, /*nb_xor*/ + (binaryfunc)gentype_or, /*nb_or*/ +#if !defined(NPY_PY3K) + 0, /*nb_coerce*/ +#endif + (unaryfunc)gentype_int, /*nb_int*/ +#if defined(NPY_PY3K) + 0, /*nb_reserved*/ +#else + (unaryfunc)gentype_long, /*nb_long*/ +#endif + (unaryfunc)gentype_float, /*nb_float*/ +#if !defined(NPY_PY3K) + (unaryfunc)gentype_oct, /*nb_oct*/ + (unaryfunc)gentype_hex, /*nb_hex*/ +#endif + 0, /*inplace_add*/ + 0, /*inplace_subtract*/ + 0, /*inplace_multiply*/ +#if !defined(NPY_PY3K) + 0, /*inplace_divide*/ +#endif + 0, /*inplace_remainder*/ + 0, /*inplace_power*/ + 0, /*inplace_lshift*/ + 0, /*inplace_rshift*/ + 0, /*inplace_and*/ + 0, /*inplace_xor*/ + 0, /*inplace_or*/ + (binaryfunc)gentype_floor_divide, /*nb_floor_divide*/ + (binaryfunc)gentype_true_divide, /*nb_true_divide*/ + 0, /*nb_inplace_floor_divide*/ + 0, /*nb_inplace_true_divide*/ + (unaryfunc)NULL, /*nb_index*/ +#if PY_VERSION_HEX >= 0x03050000 + 0, /*np_matmul*/ + 0, /*np_inplace_matmul*/ +#endif +}; + + +static PyObject * +gentype_richcompare(PyObject *self, PyObject *other, int cmp_op) +{ + PyObject *arr, *ret; + + /* + * If the other object is None, False is always right. This avoids + * the array None comparison, at least until deprecation it is fixed. + * After that, this may be removed and numpy false would be returned. + * + * NOTE: np.equal(NaT, None) evaluates to TRUE! This is an + * an inconsistency, which may has to be considered + * when the deprecation is finished. + */ + if (other == Py_None) { + if (cmp_op == Py_EQ) { + Py_RETURN_FALSE; + } + if (cmp_op == Py_NE) { + Py_RETURN_TRUE; + } + } + + arr = PyArray_FromScalar(self, NULL); + if (arr == NULL) { + return NULL; + } + /* + * Call via PyObject_RichCompare to ensure that other.__eq__ + * has a chance to run when necessary + */ + ret = PyObject_RichCompare(arr, other, cmp_op); + Py_DECREF(arr); + return ret; +} + +static PyObject * +gentype_ndim_get(PyObject *NPY_UNUSED(self)) +{ + return PyInt_FromLong(0); +} + +static PyObject * +gentype_flags_get(PyObject *NPY_UNUSED(self)) +{ + return PyArray_NewFlagsObject(NULL); +} + +static PyObject * +voidtype_flags_get(PyVoidScalarObject *self) +{ + PyObject *flagobj; + flagobj = PyArrayFlags_Type.tp_alloc(&PyArrayFlags_Type, 0); + if (flagobj == NULL) { + return NULL; + } + ((PyArrayFlagsObject *)flagobj)->arr = NULL; + ((PyArrayFlagsObject *)flagobj)->flags = self->flags; + return flagobj; +} + +static PyObject * +voidtype_dtypedescr_get(PyVoidScalarObject *self) +{ + Py_INCREF(self->descr); + return (PyObject *)self->descr; +} + + +static PyObject * +inttype_numerator_get(PyObject *self) +{ + Py_INCREF(self); + return self; +} + + +static PyObject * +inttype_denominator_get(PyObject *self) +{ + return PyInt_FromLong(1); +} + + +static PyObject * +gentype_data_get(PyObject *self) +{ +#if defined(NPY_PY3K) + return PyMemoryView_FromObject(self); +#else + return PyBuffer_FromObject(self, 0, Py_END_OF_BUFFER); +#endif +} + + +static PyObject * +gentype_itemsize_get(PyObject *self) +{ + PyArray_Descr *typecode; + PyObject *ret; + int elsize; + + typecode = PyArray_DescrFromScalar(self); + elsize = typecode->elsize; +#ifndef Py_UNICODE_WIDE + if (typecode->type_num == NPY_UNICODE) { + elsize >>= 1; + } +#endif + ret = PyInt_FromLong((long) elsize); + Py_DECREF(typecode); + return ret; +} + +static PyObject * +gentype_size_get(PyObject *NPY_UNUSED(self)) +{ + return PyInt_FromLong(1); +} + +static PyObject * +gentype_sizeof(PyObject *self) +{ + Py_ssize_t nbytes; + PyObject * isz = gentype_itemsize_get(self); + if (isz == NULL) { + return NULL; + } + nbytes = PyLong_AsLong(isz) + Py_TYPE(self)->tp_basicsize + + Py_SIZE(self) * Py_TYPE(self)->tp_itemsize; + Py_DECREF(isz); + return PyLong_FromSsize_t(nbytes); +} + +#if PY_VERSION_HEX >= 0x03000000 +NPY_NO_EXPORT void +gentype_struct_free(PyObject *ptr) +{ + PyArrayInterface *arrif; + PyObject *context; + + arrif = (PyArrayInterface*)PyCapsule_GetPointer(ptr, NULL); + context = (PyObject *)PyCapsule_GetContext(ptr); + Py_DECREF(context); + Py_XDECREF(arrif->descr); + PyArray_free(arrif->shape); + PyArray_free(arrif); +} +#else +NPY_NO_EXPORT void +gentype_struct_free(void *ptr, void *arg) +{ + PyArrayInterface *arrif = (PyArrayInterface *)ptr; + Py_DECREF((PyObject *)arg); + Py_XDECREF(arrif->descr); + PyArray_free(arrif->shape); + PyArray_free(arrif); +} +#endif + +static PyObject * +gentype_struct_get(PyObject *self) +{ + PyArrayObject *arr; + PyArrayInterface *inter; + PyObject *ret; + + arr = (PyArrayObject *)PyArray_FromScalar(self, NULL); + inter = (PyArrayInterface *)PyArray_malloc(sizeof(PyArrayInterface)); + inter->two = 2; + inter->nd = 0; + inter->flags = PyArray_FLAGS(arr); + inter->flags &= ~(NPY_ARRAY_UPDATEIFCOPY | NPY_ARRAY_WRITEBACKIFCOPY | + NPY_ARRAY_OWNDATA); + inter->flags |= NPY_ARRAY_NOTSWAPPED; + inter->typekind = PyArray_DESCR(arr)->kind; + inter->itemsize = PyArray_DESCR(arr)->elsize; + inter->strides = NULL; + inter->shape = NULL; + inter->data = PyArray_DATA(arr); + inter->descr = NULL; + + ret = NpyCapsule_FromVoidPtrAndDesc(inter, arr, gentype_struct_free); + return ret; +} + +static PyObject * +gentype_priority_get(PyObject *NPY_UNUSED(self)) +{ + return PyFloat_FromDouble(NPY_SCALAR_PRIORITY); +} + +static PyObject * +gentype_shape_get(PyObject *NPY_UNUSED(self)) +{ + return PyTuple_New(0); +} + + +static PyObject * +gentype_interface_get(PyObject *self) +{ + PyArrayObject *arr; + PyObject *inter; + + arr = (PyArrayObject *)PyArray_FromScalar(self, NULL); + if (arr == NULL) { + return NULL; + } + inter = PyObject_GetAttrString((PyObject *)arr, "__array_interface__"); + if (inter != NULL) { + PyDict_SetItemString(inter, "__ref", (PyObject *)arr); + } + Py_DECREF(arr); + return inter; +} + + + +static PyObject * +gentype_typedescr_get(PyObject *self) +{ + return (PyObject *)PyArray_DescrFromScalar(self); +} + + +static PyObject * +gentype_base_get(PyObject *NPY_UNUSED(self)) +{ + Py_RETURN_NONE; +} + +static PyObject * +voidtype_base_get(PyVoidScalarObject *self) +{ + if (self->base == NULL) { + Py_RETURN_NONE; + } + else { + Py_INCREF(self->base); + return self->base; + } +} + + +static PyArray_Descr * +_realdescr_fromcomplexscalar(PyObject *self, int *typenum) +{ + if (PyArray_IsScalar(self, CDouble)) { + *typenum = NPY_CDOUBLE; + return PyArray_DescrFromType(NPY_DOUBLE); + } + if (PyArray_IsScalar(self, CFloat)) { + *typenum = NPY_CFLOAT; + return PyArray_DescrFromType(NPY_FLOAT); + } + if (PyArray_IsScalar(self, CLongDouble)) { + *typenum = NPY_CLONGDOUBLE; + return PyArray_DescrFromType(NPY_LONGDOUBLE); + } + return NULL; +} + +static PyObject * +gentype_real_get(PyObject *self) +{ + PyArray_Descr *typecode; + PyObject *ret; + int typenum; + + if (PyArray_IsScalar(self, ComplexFloating)) { + void *ptr; + typecode = _realdescr_fromcomplexscalar(self, &typenum); + ptr = scalar_value(self, NULL); + ret = PyArray_Scalar(ptr, typecode, NULL); + Py_DECREF(typecode); + return ret; + } + else if (PyArray_IsScalar(self, Object)) { + PyObject *obj = ((PyObjectScalarObject *)self)->obval; + ret = PyObject_GetAttrString(obj, "real"); + if (ret != NULL) { + return ret; + } + PyErr_Clear(); + } + Py_INCREF(self); + return (PyObject *)self; +} + +static PyObject * +gentype_imag_get(PyObject *self) +{ + PyArray_Descr *typecode=NULL; + PyObject *ret; + int typenum; + + if (PyArray_IsScalar(self, ComplexFloating)) { + char *ptr; + typecode = _realdescr_fromcomplexscalar(self, &typenum); + ptr = (char *)scalar_value(self, NULL); + ret = PyArray_Scalar(ptr + typecode->elsize, typecode, NULL); + } + else if (PyArray_IsScalar(self, Object)) { + PyObject *obj = ((PyObjectScalarObject *)self)->obval; + PyArray_Descr *newtype; + ret = PyObject_GetAttrString(obj, "imag"); + if (ret == NULL) { + PyErr_Clear(); + obj = PyInt_FromLong(0); + newtype = PyArray_DescrFromType(NPY_OBJECT); + ret = PyArray_Scalar((char *)&obj, newtype, NULL); + Py_DECREF(newtype); + Py_DECREF(obj); + } + } + else { + char *temp; + int elsize; + typecode = PyArray_DescrFromScalar(self); + elsize = typecode->elsize; + temp = npy_alloc_cache_zero(elsize); + ret = PyArray_Scalar(temp, typecode, NULL); + npy_free_cache(temp, elsize); + } + + Py_XDECREF(typecode); + return ret; +} + +static PyObject * +gentype_flat_get(PyObject *self) +{ + PyObject *ret, *arr; + + arr = PyArray_FromScalar(self, NULL); + if (arr == NULL) { + return NULL; + } + ret = PyArray_IterNew(arr); + Py_DECREF(arr); + return ret; +} + + +static PyObject * +gentype_transpose_get(PyObject *self) +{ + Py_INCREF(self); + return self; +} + + +static PyGetSetDef gentype_getsets[] = { + {"ndim", + (getter)gentype_ndim_get, + (setter) 0, + "number of array dimensions", + NULL}, + {"flags", + (getter)gentype_flags_get, + (setter)0, + "integer value of flags", + NULL}, + {"shape", + (getter)gentype_shape_get, + (setter)0, + "tuple of array dimensions", + NULL}, + {"strides", + (getter)gentype_shape_get, + (setter) 0, + "tuple of bytes steps in each dimension", + NULL}, + {"data", + (getter)gentype_data_get, + (setter) 0, + "pointer to start of data", + NULL}, + {"itemsize", + (getter)gentype_itemsize_get, + (setter)0, + "length of one element in bytes", + NULL}, + {"size", + (getter)gentype_size_get, + (setter)0, + "number of elements in the gentype", + NULL}, + {"nbytes", + (getter)gentype_itemsize_get, + (setter)0, + "length of item in bytes", + NULL}, + {"base", + (getter)gentype_base_get, + (setter)0, + "base object", + NULL}, + {"dtype", + (getter)gentype_typedescr_get, + NULL, + "get array data-descriptor", + NULL}, + {"real", + (getter)gentype_real_get, + (setter)0, + "real part of scalar", + NULL}, + {"imag", + (getter)gentype_imag_get, + (setter)0, + "imaginary part of scalar", + NULL}, + {"flat", + (getter)gentype_flat_get, + (setter)0, + "a 1-d view of scalar", + NULL}, + {"T", + (getter)gentype_transpose_get, + (setter)0, + "transpose", + NULL}, + {"__array_interface__", + (getter)gentype_interface_get, + NULL, + "Array protocol: Python side", + NULL}, + {"__array_struct__", + (getter)gentype_struct_get, + NULL, + "Array protocol: struct", + NULL}, + {"__array_priority__", + (getter)gentype_priority_get, + NULL, + "Array priority.", + NULL}, + {NULL, NULL, NULL, NULL, NULL} /* Sentinel */ +}; + + +/* 0-dim array from scalar object */ + +static char doc_getarray[] = "sc.__array__(dtype) return 0-dim array from " + "scalar with specified dtype"; + +static PyObject * +gentype_getarray(PyObject *scalar, PyObject *args) +{ + PyArray_Descr *outcode=NULL; + PyObject *ret; + + if (!PyArg_ParseTuple(args, "|O&:__array__", &PyArray_DescrConverter, + &outcode)) { + Py_XDECREF(outcode); + return NULL; + } + ret = PyArray_FromScalar(scalar, outcode); + return ret; +} + +static char doc_sc_wraparray[] = "sc.__array_wrap__(obj) return scalar from array"; + +static PyObject * +gentype_wraparray(PyObject *NPY_UNUSED(scalar), PyObject *args) +{ + PyObject *obj; + PyArrayObject *arr; + + if (PyTuple_Size(args) < 1) { + PyErr_SetString(PyExc_TypeError, + "only accepts 1 argument."); + return NULL; + } + obj = PyTuple_GET_ITEM(args, 0); + if (!PyArray_Check(obj)) { + PyErr_SetString(PyExc_TypeError, + "can only be called with ndarray object"); + return NULL; + } + arr = (PyArrayObject *)obj; + + return PyArray_Scalar(PyArray_DATA(arr), + PyArray_DESCR(arr), (PyObject *)arr); +} + +/* + * These gentype_* functions do not take keyword arguments. + * The proper flag is METH_VARARGS. + */ +/**begin repeat + * + * #name = tolist, item, __deepcopy__, __copy__, + * swapaxes, conj, conjugate, nonzero, + * fill, transpose, newbyteorder# + */ +static PyObject * +gentype_@name@(PyObject *self, PyObject *args) +{ + return gentype_generic_method(self, args, NULL, "@name@"); +} +/**end repeat**/ + +static PyObject * +gentype_itemset(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args)) +{ + PyErr_SetString(PyExc_ValueError, "array-scalars are immutable"); + return NULL; +} + +static Py_ssize_t +gentype_getreadbuf(PyObject *, Py_ssize_t, void **); + +static PyObject * +gentype_byteswap(PyObject *self, PyObject *args, PyObject *kwds) +{ + npy_bool inplace = NPY_FALSE; + static char *kwlist[] = {"inplace", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&:byteswap", kwlist, + PyArray_BoolConverter, &inplace)) { + return NULL; + } + if (inplace) { + PyErr_SetString(PyExc_ValueError, + "cannot byteswap a scalar in-place"); + return NULL; + } + else { + /* get the data, copyswap it and pass it to a new Array scalar */ + char *data; + PyArray_Descr *descr; + PyObject *new; + char *newmem; + + gentype_getreadbuf(self, 0, (void **)&data); + descr = PyArray_DescrFromScalar(self); + newmem = PyObject_Malloc(descr->elsize); + if (newmem == NULL) { + Py_DECREF(descr); + return PyErr_NoMemory(); + } + else { + descr->f->copyswap(newmem, data, 1, NULL); + } + new = PyArray_Scalar(newmem, descr, NULL); + PyObject_Free(newmem); + Py_DECREF(descr); + return new; + } +} + + +/* + * These gentype_* functions take keyword arguments. + * The proper flag is METH_VARARGS | METH_KEYWORDS. + */ +/**begin repeat + * + * #name = take, getfield, put, repeat, tofile, mean, trace, diagonal, clip, + * std, var, sum, cumsum, prod, cumprod, compress, sort, argsort, + * round, argmax, argmin, max, min, ptp, any, all, astype, resize, + * reshape, choose, tostring, tobytes, copy, searchsorted, view, + * flatten, ravel, squeeze# + */ +static PyObject * +gentype_@name@(PyObject *self, PyObject *args, PyObject *kwds) +{ + return gentype_generic_method(self, args, kwds, "@name@"); +} +/**end repeat**/ + +static PyObject * +voidtype_getfield(PyVoidScalarObject *self, PyObject *args, PyObject *kwds) +{ + /* Use ndarray's getfield to obtain the field safely */ + return gentype_generic_method((PyObject *)self, args, kwds, "getfield"); +} + +static PyObject * +gentype_setfield(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args), + PyObject *NPY_UNUSED(kwds)) +{ + PyErr_SetString(PyExc_TypeError, + "Can't set fields in a non-void array scalar."); + return NULL; +} + +static PyObject * +voidtype_setfield(PyVoidScalarObject *self, PyObject *args, PyObject *kwds) +{ + /* + * We would like to use ndarray's setfield because it performs safety + * checks on the field datatypes and because it broadcasts properly. + * However, as a special case, void-scalar assignment broadcasts + * differently from ndarrays when assigning to an object field: Assignment + * to an ndarray object field broadcasts, but assignment to a void-scalar + * object-field should not, in order to allow nested ndarrays. + * These lines should then behave identically: + * + * b = np.zeros(1, dtype=[('x', 'O')]) + * b[0]['x'] = arange(3) # uses voidtype_setfield + * b['x'][0] = arange(3) # uses ndarray setitem + * + * Ndarray's setfield would try to broadcast the lhs. Instead we use + * ndarray getfield to get the field safely, then setitem with an empty + * tuple to set the value without broadcast. Note we also want subarrays to + * be set properly, ie + * + * a = np.zeros(1, dtype=[('x', 'i', 5)]) + * a[0]['x'] = 1 + * + * sets all values to 1. "getfield + setitem with empty tuple" takes + * care of both object arrays and subarrays. + */ + PyObject *getfield_args, *value, *arr, *meth, *arr_field, *emptytuple; + + value = PyTuple_GetItem(args, 0); + if (value == NULL) { + return NULL; + } + getfield_args = PyTuple_GetSlice(args, 1, 3); + if (getfield_args == NULL) { + return NULL; + } + + /* 1. Convert to 0-d array and use getfield */ + arr = PyArray_FromScalar((PyObject*)self, NULL); + if (arr == NULL) { + Py_DECREF(getfield_args); + return NULL; + } + meth = PyObject_GetAttrString(arr, "getfield"); + if (meth == NULL) { + Py_DECREF(getfield_args); + Py_DECREF(arr); + return NULL; + } + if (kwds == NULL) { + arr_field = PyObject_CallObject(meth, getfield_args); + } + else { + arr_field = PyObject_Call(meth, getfield_args, kwds); + } + Py_DECREF(getfield_args); + Py_DECREF(meth); + Py_DECREF(arr); + + if(arr_field == NULL){ + return NULL; + } + + /* 2. Assign the value using setitem with empty tuple. */ + emptytuple = PyTuple_New(0); + if (PyObject_SetItem(arr_field, emptytuple, value) < 0) { + Py_DECREF(arr_field); + Py_DECREF(emptytuple); + return NULL; + } + Py_DECREF(emptytuple); + Py_DECREF(arr_field); + + Py_RETURN_NONE; +} + + +static PyObject * +gentype_reduce(PyObject *self, PyObject *NPY_UNUSED(args)) +{ + PyObject *ret = NULL, *obj = NULL, *mod = NULL; +#if defined(NPY_PY3K) + Py_buffer view; +#endif + const char *buffer; + Py_ssize_t buflen; + + /* Return a tuple of (callable object, arguments) */ + ret = PyTuple_New(2); + if (ret == NULL) { + return NULL; + } + +#if defined(NPY_PY3K) + if (PyArray_IsScalar(self, Unicode)) { + /* Unicode on Python 3 does not expose the buffer interface */ + buffer = PyUnicode_AS_DATA(self); + buflen = PyUnicode_GET_DATA_SIZE(self); + } + else if (PyObject_GetBuffer(self, &view, PyBUF_SIMPLE) >= 0) { + buffer = view.buf; + buflen = view.len; + /* + * In Python 3 both of the deprecated functions PyObject_AsWriteBuffer and + * PyObject_AsReadBuffer that this code replaces release the buffer. It is + * up to the object that supplies the buffer to guarantee that the buffer + * sticks around after the release. + */ + PyBuffer_Release(&view); + _dealloc_cached_buffer_info(self); + } + else { + Py_DECREF(ret); + return NULL; + } +#else + if (PyObject_AsReadBuffer(self, (const void **)&buffer, &buflen)<0) { + Py_DECREF(ret); + return NULL; + } +#endif + + mod = PyImport_ImportModule("numpy.core._multiarray_umath"); + if (mod == NULL) { + return NULL; + } + obj = PyObject_GetAttrString(mod, "scalar"); + Py_DECREF(mod); + if (obj == NULL) { + return NULL; + } + PyTuple_SET_ITEM(ret, 0, obj); + obj = PyObject_GetAttrString((PyObject *)self, "dtype"); + if (PyArray_IsScalar(self, Object)) { + mod = ((PyObjectScalarObject *)self)->obval; + PyTuple_SET_ITEM(ret, 1, Py_BuildValue("NO", obj, mod)); + } + else { +#ifndef Py_UNICODE_WIDE + /* + * We need to expand the buffer so that we always write + * UCS4 to disk for pickle of unicode scalars. + * + * This could be in a unicode_reduce function, but + * that would require re-factoring. + */ + int alloc = 0; + char *tmp; + int newlen; + + if (PyArray_IsScalar(self, Unicode)) { + tmp = PyArray_malloc(buflen*2); + if (tmp == NULL) { + Py_DECREF(ret); + return PyErr_NoMemory(); + } + alloc = 1; + newlen = PyUCS2Buffer_AsUCS4((Py_UNICODE *)buffer, + (npy_ucs4 *)tmp, + buflen / 2, buflen / 2); + buflen = newlen*4; + buffer = tmp; + } +#endif + mod = PyBytes_FromStringAndSize(buffer, buflen); + if (mod == NULL) { + Py_DECREF(ret); +#ifndef Py_UNICODE_WIDE + ret = NULL; + goto fail; +#else + return NULL; +#endif + } + PyTuple_SET_ITEM(ret, 1, + Py_BuildValue("NN", obj, mod)); +#ifndef Py_UNICODE_WIDE +fail: + if (alloc) PyArray_free((char *)buffer); +#endif + } + return ret; +} + +/* ignores everything */ +static PyObject * +gentype_setstate(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args)) +{ + Py_RETURN_NONE; +} + +static PyObject * +gentype_dump(PyObject *self, PyObject *args) +{ + PyObject *file = NULL; + int ret; + + if (!PyArg_ParseTuple(args, "O:dump", &file)) { + return NULL; + } + ret = PyArray_Dump(self, file, 2); + if (ret < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject * +gentype_dumps(PyObject *self, PyObject *args) +{ + if (!PyArg_ParseTuple(args, "")) { + return NULL; + } + return PyArray_Dumps(self, 2); +} + + +/* setting flags cannot be done for scalars */ +static PyObject * +gentype_setflags(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args), + PyObject *NPY_UNUSED(kwds)) +{ + Py_RETURN_NONE; +} + +/* + * casting complex numbers (that don't inherit from Python complex) + * to Python complex + */ + +/**begin repeat + * #name = cfloat, clongdouble# + * #Name = CFloat, CLongDouble# + */ +static PyObject * +@name@_complex(PyObject *self, PyObject *NPY_UNUSED(args), + PyObject *NPY_UNUSED(kwds)) +{ + return PyComplex_FromDoubles(PyArrayScalar_VAL(self, @Name@).real, + PyArrayScalar_VAL(self, @Name@).imag); +} +/**end repeat**/ + +/* + * need to fill in doc-strings for these methods on import -- copy from + * array docstrings + */ +static PyMethodDef gentype_methods[] = { + {"tolist", + (PyCFunction)gentype_tolist, + METH_VARARGS, NULL}, + {"item", + (PyCFunction)gentype_item, + METH_VARARGS, NULL}, + {"itemset", + (PyCFunction)gentype_itemset, + METH_VARARGS, NULL}, + {"tobytes", + (PyCFunction)gentype_tobytes, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"tofile", + (PyCFunction)gentype_tofile, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"tostring", + (PyCFunction)gentype_tostring, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"byteswap", + (PyCFunction)gentype_byteswap, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"astype", + (PyCFunction)gentype_astype, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"getfield", + (PyCFunction)gentype_getfield, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"setfield", + (PyCFunction)gentype_setfield, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"copy", + (PyCFunction)gentype_copy, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"resize", + (PyCFunction)gentype_resize, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"__array__", + (PyCFunction)gentype_getarray, + METH_VARARGS, doc_getarray}, + {"__array_wrap__", + (PyCFunction)gentype_wraparray, + METH_VARARGS, doc_sc_wraparray}, + + /* for the sys module */ + {"__sizeof__", + (PyCFunction)gentype_sizeof, + METH_NOARGS, NULL}, + + /* for the copy module */ + {"__copy__", + (PyCFunction)gentype___copy__, + METH_VARARGS, NULL}, + {"__deepcopy__", + (PyCFunction)gentype___deepcopy__, + METH_VARARGS, NULL}, + + {"__reduce__", + (PyCFunction) gentype_reduce, + METH_VARARGS, NULL}, + /* For consistency does nothing */ + {"__setstate__", + (PyCFunction) gentype_setstate, + METH_VARARGS, NULL}, + + {"dumps", + (PyCFunction) gentype_dumps, + METH_VARARGS, NULL}, + {"dump", + (PyCFunction) gentype_dump, + METH_VARARGS, NULL}, + + /* Methods for array */ + {"fill", + (PyCFunction)gentype_fill, + METH_VARARGS, NULL}, + {"transpose", + (PyCFunction)gentype_transpose, + METH_VARARGS, NULL}, + {"take", + (PyCFunction)gentype_take, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"put", + (PyCFunction)gentype_put, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"repeat", + (PyCFunction)gentype_repeat, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"choose", + (PyCFunction)gentype_choose, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"sort", + (PyCFunction)gentype_sort, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"argsort", + (PyCFunction)gentype_argsort, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"searchsorted", + (PyCFunction)gentype_searchsorted, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"argmax", + (PyCFunction)gentype_argmax, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"argmin", + (PyCFunction)gentype_argmin, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"reshape", + (PyCFunction)gentype_reshape, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"squeeze", + (PyCFunction)gentype_squeeze, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"view", + (PyCFunction)gentype_view, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"swapaxes", + (PyCFunction)gentype_swapaxes, + METH_VARARGS, NULL}, + {"max", + (PyCFunction)gentype_max, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"min", + (PyCFunction)gentype_min, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"ptp", + (PyCFunction)gentype_ptp, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"mean", + (PyCFunction)gentype_mean, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"trace", + (PyCFunction)gentype_trace, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"diagonal", + (PyCFunction)gentype_diagonal, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"clip", + (PyCFunction)gentype_clip, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"conj", + (PyCFunction)gentype_conj, + METH_VARARGS, NULL}, + {"conjugate", + (PyCFunction)gentype_conjugate, + METH_VARARGS, NULL}, + {"nonzero", + (PyCFunction)gentype_nonzero, + METH_VARARGS, NULL}, + {"std", + (PyCFunction)gentype_std, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"var", + (PyCFunction)gentype_var, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"sum", + (PyCFunction)gentype_sum, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"cumsum", + (PyCFunction)gentype_cumsum, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"prod", + (PyCFunction)gentype_prod, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"cumprod", + (PyCFunction)gentype_cumprod, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"all", + (PyCFunction)gentype_all, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"any", + (PyCFunction)gentype_any, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"compress", + (PyCFunction)gentype_compress, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"flatten", + (PyCFunction)gentype_flatten, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"ravel", + (PyCFunction)gentype_ravel, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"round", + (PyCFunction)gentype_round, + METH_VARARGS | METH_KEYWORDS, NULL}, +#if defined(NPY_PY3K) + /* Hook for the round() builtin */ + {"__round__", + (PyCFunction)gentype_round, + METH_VARARGS | METH_KEYWORDS, NULL}, +#endif + /* For the format function */ + {"__format__", + gentype_format, + METH_VARARGS, + "NumPy array scalar formatter"}, + {"setflags", + (PyCFunction)gentype_setflags, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"newbyteorder", + (PyCFunction)gentype_newbyteorder, + METH_VARARGS, NULL}, + {NULL, NULL, 0, NULL} /* sentinel */ +}; + + +static PyGetSetDef voidtype_getsets[] = { + {"flags", + (getter)voidtype_flags_get, + (setter)0, + "integer value of flags", + NULL}, + {"dtype", + (getter)voidtype_dtypedescr_get, + (setter)0, + "dtype object", + NULL}, + {"base", + (getter)voidtype_base_get, + (setter)0, + "base object", + NULL}, + {NULL, NULL, NULL, NULL, NULL} +}; + +static PyMethodDef voidtype_methods[] = { + {"getfield", + (PyCFunction)voidtype_getfield, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"setfield", + (PyCFunction)voidtype_setfield, + METH_VARARGS | METH_KEYWORDS, NULL}, + {NULL, NULL, 0, NULL} +}; + +static PyGetSetDef inttype_getsets[] = { + {"numerator", + (getter)inttype_numerator_get, + (setter)0, + "numerator of value (the value itself)", + NULL}, + {"denominator", + (getter)inttype_denominator_get, + (setter)0, + "denominator of value (1)", + NULL}, + {NULL, NULL, NULL, NULL, NULL} +}; + +/**begin repeat + * #name = cfloat,clongdouble# + */ +static PyMethodDef @name@type_methods[] = { + {"__complex__", + (PyCFunction)@name@_complex, + METH_VARARGS | METH_KEYWORDS, NULL}, + {NULL, NULL, 0, NULL} +}; +/**end repeat**/ + +/************* As_mapping functions for void array scalar ************/ + +static Py_ssize_t +voidtype_length(PyVoidScalarObject *self) +{ + if (!PyDataType_HASFIELDS(self->descr)) { + return 0; + } + else { + /* return the number of fields */ + return (Py_ssize_t) PyTuple_GET_SIZE(self->descr->names); + } +} + +static PyObject * +voidtype_subscript(PyVoidScalarObject *self, PyObject *ind); + +static PyObject * +voidtype_item(PyVoidScalarObject *self, Py_ssize_t n) +{ + npy_intp m; + PyObject *flist=NULL; + + if (!(PyDataType_HASFIELDS(self->descr))) { + PyErr_SetString(PyExc_IndexError, + "can't index void scalar without fields"); + return NULL; + } + flist = self->descr->names; + m = PyTuple_GET_SIZE(flist); + if (n < 0) { + n += m; + } + if (n < 0 || n >= m) { + PyErr_Format(PyExc_IndexError, "invalid index (%d)", (int) n); + return NULL; + } + + return voidtype_subscript(self, PyTuple_GetItem(flist, n)); +} + +/* get field by name or number */ +static PyObject * +voidtype_subscript(PyVoidScalarObject *self, PyObject *ind) +{ + npy_intp n; + PyObject *ret, *res; + + /* structured voids will accept an integer index */ + if (PyDataType_HASFIELDS(self->descr)) { + n = PyArray_PyIntAsIntp(ind); + if (!error_converting(n)) { + return voidtype_item(self, (Py_ssize_t)n); + } + PyErr_Clear(); + } + + res = PyArray_FromScalar((PyObject*)self, NULL); + + /* ellipsis should return 0d array */ + if(ind == Py_Ellipsis){ + return res; + } + + /* + * other cases (field names, empty tuple) will return either + * scalar or non-0d array. Compute this using ndarray subscript. + */ + ret = array_subscript((PyArrayObject *)res, ind); + Py_DECREF(res); + return PyArray_Return((PyArrayObject*)ret); +} + +static int +voidtype_ass_subscript(PyVoidScalarObject *self, PyObject *ind, PyObject *val); + +static int +voidtype_ass_item(PyVoidScalarObject *self, Py_ssize_t n, PyObject *val) +{ + npy_intp m; + PyObject *flist=NULL; + + if (!(PyDataType_HASFIELDS(self->descr))) { + PyErr_SetString(PyExc_IndexError, + "can't index void scalar without fields"); + return -1; + } + + flist = self->descr->names; + m = PyTuple_GET_SIZE(flist); + if (n < 0) { + n += m; + } + if (n < 0 || n >= m) { + PyErr_Format(PyExc_IndexError, "invalid index (%d)", (int) n); + return -1; + } + + return voidtype_ass_subscript(self, PyTuple_GetItem(flist, n), val); +} + +static int +voidtype_ass_subscript(PyVoidScalarObject *self, PyObject *ind, PyObject *val) +{ + npy_intp n; + char *msg = "invalid index"; + PyObject *args; + + if (!PyDataType_HASFIELDS(self->descr)) { + PyErr_SetString(PyExc_IndexError, + "can't index void scalar without fields"); + return -1; + } + + if (!val) { + PyErr_SetString(PyExc_ValueError, + "cannot delete scalar field"); + return -1; + } + + if (PyBaseString_Check(ind)) { + /* + * Much like in voidtype_setfield, we cannot simply use ndarray's + * __setitem__ since assignment to void scalars should not broadcast + * the lhs. Instead we get a view through __getitem__ and then assign + * the value using setitem with an empty tuple (which treats both + * object arrays and subarrays properly). + * + * Also we do not want to use voidtype_setfield here, since we do + * not need to do the (slow) view safety checks, since we already + * know the dtype/offset are safe. + */ + + PyObject *arr, *arr_field, *meth, *emptytuple; + + /* 1. Convert to 0-d array and use getitem */ + arr = PyArray_FromScalar((PyObject*)self, NULL); + if (arr == NULL) { + return -1; + } + meth = PyObject_GetAttrString(arr, "__getitem__"); + if (meth == NULL) { + Py_DECREF(arr); + return -1; + } + args = Py_BuildValue("(O)", ind); + arr_field = PyObject_CallObject(meth, args); + Py_DECREF(meth); + Py_DECREF(arr); + Py_DECREF(args); + + if(arr_field == NULL){ + return -1; + } + + /* 2. Assign the value using setitem with empty tuple. */ + emptytuple = PyTuple_New(0); + if (PyObject_SetItem(arr_field, emptytuple, val) < 0) { + Py_DECREF(arr_field); + Py_DECREF(emptytuple); + return -1; + } + Py_DECREF(emptytuple); + Py_DECREF(arr_field); + return 0; + } + + /* try to convert it to a number */ + n = PyArray_PyIntAsIntp(ind); + if (error_converting(n)) { + goto fail; + } + return voidtype_ass_item(self, (Py_ssize_t)n, val); + +fail: + PyErr_SetString(PyExc_IndexError, msg); + return -1; +} + +static PyMappingMethods voidtype_as_mapping = { + (lenfunc)voidtype_length, /*mp_length*/ + (binaryfunc)voidtype_subscript, /*mp_subscript*/ + (objobjargproc)voidtype_ass_subscript, /*mp_ass_subscript*/ +}; + + +static PySequenceMethods voidtype_as_sequence = { + (lenfunc)voidtype_length, /*sq_length*/ + 0, /*sq_concat*/ + 0, /*sq_repeat*/ + (ssizeargfunc)voidtype_item, /*sq_item*/ + 0, /*sq_slice*/ + (ssizeobjargproc)voidtype_ass_item, /*sq_ass_item*/ + 0, /* ssq_ass_slice */ + 0, /* sq_contains */ + 0, /* sq_inplace_concat */ + 0, /* sq_inplace_repeat */ +}; + + +static Py_ssize_t +gentype_getreadbuf(PyObject *self, Py_ssize_t segment, void **ptrptr) +{ + int numbytes; + PyArray_Descr *outcode; + + if (segment != 0) { + PyErr_SetString(PyExc_SystemError, + "Accessing non-existent array segment"); + return -1; + } + + outcode = PyArray_DescrFromScalar(self); + numbytes = outcode->elsize; + *ptrptr = (void *)scalar_value(self, outcode); + +#ifndef Py_UNICODE_WIDE + if (outcode->type_num == NPY_UNICODE) { + numbytes >>= 1; + } +#endif + Py_DECREF(outcode); + return numbytes; +} + +#if !defined(NPY_PY3K) +static Py_ssize_t +gentype_getsegcount(PyObject *self, Py_ssize_t *lenp) +{ + PyArray_Descr *outcode; + + outcode = PyArray_DescrFromScalar(self); + if (lenp) { + *lenp = outcode->elsize; +#ifndef Py_UNICODE_WIDE + if (outcode->type_num == NPY_UNICODE) { + *lenp >>= 1; + } +#endif + } + Py_DECREF(outcode); + return 1; +} + +static Py_ssize_t +gentype_getcharbuf(PyObject *self, Py_ssize_t segment, constchar **ptrptr) +{ + if (PyArray_IsScalar(self, String) || + PyArray_IsScalar(self, Unicode)) { + return gentype_getreadbuf(self, segment, (void **)ptrptr); + } + else { + PyErr_SetString(PyExc_TypeError, + "Non-character array cannot be interpreted "\ + "as character buffer."); + return -1; + } +} +#endif /* !defined(NPY_PY3K) */ + +static PyBufferProcs gentype_as_buffer = { +#if !defined(NPY_PY3K) + gentype_getreadbuf, /* bf_getreadbuffer*/ + NULL, /* bf_getwritebuffer*/ + gentype_getsegcount, /* bf_getsegcount*/ + gentype_getcharbuf, /* bf_getcharbuffer*/ +#endif + gentype_getbuffer, /* bf_getbuffer */ + NULL, /* bf_releasebuffer */ +}; + + +#if defined(NPY_PY3K) +#define BASEFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE +#define LEAFFLAGS Py_TPFLAGS_DEFAULT +#else +#define BASEFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES +#define LEAFFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES +#endif + +NPY_NO_EXPORT PyTypeObject PyGenericArrType_Type = { +#if defined(NPY_PY3K) + PyVarObject_HEAD_INIT(NULL, 0) +#else + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ +#endif + "numpy.generic", /* tp_name*/ + sizeof(PyObject), /* tp_basicsize*/ + 0, /* tp_itemsize */ + /* methods */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ +#if defined(NPY_PY3K) + 0, /* tp_reserved */ +#else + 0, /* tp_compare */ +#endif + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + 0, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ +}; + +static void +void_dealloc(PyVoidScalarObject *v) +{ + _dealloc_cached_buffer_info((PyObject *)v); + + if (v->flags & NPY_ARRAY_OWNDATA) { + npy_free_cache(v->obval, Py_SIZE(v)); + } + Py_XDECREF(v->descr); + Py_XDECREF(v->base); + Py_TYPE(v)->tp_free(v); +} + +static void +object_arrtype_dealloc(PyObject *v) +{ + Py_XDECREF(((PyObjectScalarObject *)v)->obval); + Py_TYPE(v)->tp_free(v); +} + +/* + * string and unicode inherit from Python Type first and so GET_ITEM + * is different to get to the Python Type. + * + * ok is a work-around for a bug in complex_new that doesn't allocate + * memory from the sub-types memory allocator. + */ + +#define _WORK(num) \ + if (type->tp_bases && (PyTuple_GET_SIZE(type->tp_bases)==2)) { \ + PyTypeObject *sup; \ + /* We are inheriting from a Python type as well so \ + give it first dibs on conversion */ \ + sup = (PyTypeObject *)PyTuple_GET_ITEM(type->tp_bases, num); \ + /* Prevent recursion */ \ + if (thisfunc != sup->tp_new) { \ + robj = sup->tp_new(type, args, kwds); \ + if (robj != NULL) goto finish; \ + if (PyTuple_GET_SIZE(args)!=1) return NULL; \ + PyErr_Clear(); \ + } \ + /* now do default conversion */ \ + } + +#define _WORK1 _WORK(1) +#define _WORKz _WORK(0) +#define _WORK0 + +/**begin repeat + * #name = byte, short, int, long, longlong, ubyte, ushort, uint, ulong, + * ulonglong, half, float, double, longdouble, cfloat, cdouble, + * clongdouble, string, unicode, object# + * #Name = Byte, Short, Int, Long, LongLong, UByte, UShort, UInt, ULong, + * ULongLong, Half, Float, Double, LongDouble, CFloat, CDouble, + * CLongDouble, String, Unicode, Object# + * #TYPE = BYTE, SHORT, INT, LONG, LONGLONG, UBYTE, USHORT, UINT, ULONG, + * ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, + * CLONGDOUBLE, STRING, UNICODE, OBJECT# + * #work = 0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,z,z,0# + * #default = 0*17,1*2,2# + */ + +#define _NPY_UNUSED2_1 +#define _NPY_UNUSED2_z +#define _NPY_UNUSED2_0 NPY_UNUSED +#define _NPY_UNUSED1_0 +#define _NPY_UNUSED1_1 +#define _NPY_UNUSED1_2 NPY_UNUSED + +static PyObject * +@name@_arrtype_new(PyTypeObject *_NPY_UNUSED1_@default@(type), PyObject *args, PyObject *_NPY_UNUSED2_@work@(kwds)) +{ + PyObject *obj = NULL; + PyObject *robj; + PyArrayObject *arr; + PyArray_Descr *typecode = NULL; +#if (@work@ != 0) || (@default@ == 1) + void *thisfunc = (void *)@name@_arrtype_new; +#endif +#if !(@default@ == 2) + int itemsize; + void *dest, *src; +#endif + + /* + * allow base-class (if any) to do conversion + * If successful, this will jump to finish: + */ + _WORK@work@ + + /* TODO: include type name in error message, which is not @name@ */ + if (!PyArg_ParseTuple(args, "|O", &obj)) { + return NULL; + } + typecode = PyArray_DescrFromType(NPY_@TYPE@); + if (typecode == NULL) { + return NULL; + } + /* + * typecode is new reference and stolen by + * PyArray_FromAny but not PyArray_Scalar + */ + if (obj == NULL) { +#if @default@ == 0 + robj = PyArray_Scalar(NULL, typecode, NULL); + if (robj == NULL) { + Py_DECREF(typecode); + return NULL; + } + memset(&((Py@Name@ScalarObject *)robj)->obval, 0, sizeof(npy_@name@)); +#elif @default@ == 1 + robj = PyArray_Scalar(NULL, typecode, NULL); +#elif @default@ == 2 + Py_INCREF(Py_None); + robj = Py_None; +#endif + Py_DECREF(typecode); + goto finish; + } + + /* + * It is expected at this point that robj is a PyArrayScalar + * (even for Object Data Type) + */ + arr = (PyArrayObject *)PyArray_FromAny(obj, typecode, + 0, 0, NPY_ARRAY_FORCECAST, NULL); + if ((arr == NULL) || (PyArray_NDIM(arr) > 0)) { + return (PyObject *)arr; + } + /* 0-d array */ + robj = PyArray_ToScalar(PyArray_DATA(arr), arr); + Py_DECREF(arr); + +finish: + /* + * In OBJECT case, robj is no longer a + * PyArrayScalar at this point but the + * remaining code assumes it is + */ +#if @default@ == 2 + return robj; +#else + /* Normal return */ + if ((robj == NULL) || (Py_TYPE(robj) == type)) { + return robj; + } + + /* + * This return path occurs when the requested type is not created + * but another scalar object is created instead (i.e. when + * the base-class does the conversion in _WORK macro) + */ + + /* Need to allocate new type and copy data-area over */ + if (type->tp_itemsize) { + itemsize = PyBytes_GET_SIZE(robj); + } + else { + itemsize = 0; + } + obj = type->tp_alloc(type, itemsize); + if (obj == NULL) { + Py_DECREF(robj); + return NULL; + } + /* typecode will be NULL */ + typecode = PyArray_DescrFromType(NPY_@TYPE@); + dest = scalar_value(obj, typecode); + src = scalar_value(robj, typecode); + Py_DECREF(typecode); +#if @default@ == 0 + *((npy_@name@ *)dest) = *((npy_@name@ *)src); +#elif @default@ == 1 /* unicode and strings */ + if (itemsize == 0) { /* unicode */ +#if PY_VERSION_HEX >= 0x03030000 + itemsize = PyUnicode_GetLength(robj) * PyUnicode_KIND(robj); +#else + itemsize = ((PyUnicodeObject *)robj)->length * sizeof(Py_UNICODE); +#endif + } + memcpy(dest, src, itemsize); + /* @default@ == 2 won't get here */ +#endif + Py_DECREF(robj); + return obj; +#endif +} +/**end repeat**/ + +#undef _WORK1 +#undef _WORKz +#undef _WORK0 +#undef _WORK + +/**begin repeat + * #name = datetime, timedelta# + * #Name = Datetime, Timedelta# + * #NAME = DATETIME, TIMEDELTA# + * #is_datetime = 1, 0# + */ + +static PyObject * +@name@_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *obj = NULL, *meta_obj = NULL; + Py@Name@ScalarObject *ret; + + /* TODO: include type name in error message, which is not @name@ */ + if (!PyArg_ParseTuple(args, "|OO", &obj, &meta_obj)) { + return NULL; + } + + /* Allocate the return scalar */ + ret = (Py@Name@ScalarObject *)Py@Name@ArrType_Type.tp_alloc( + &Py@Name@ArrType_Type, 0); + if (ret == NULL) { + return NULL; + } + + /* Incorporate the metadata if its provided */ + if (meta_obj != NULL) { + /* Parse the provided metadata input */ + if (convert_pyobject_to_datetime_metadata(meta_obj, &ret->obmeta) + < 0) { + Py_DECREF(ret); + return NULL; + } + } + else { + /* + * A unit of -1 signals that convert_pyobject_to_datetime + * should populate. + */ + ret->obmeta.base = -1; + } + + if (obj == NULL) { + if (ret->obmeta.base == -1) { + ret->obmeta.base = NPY_DATETIME_DEFAULTUNIT; + ret->obmeta.num = 1; + } + + /* Make datetime default to NaT, timedelta default to zero */ +#if @is_datetime@ + ret->obval = NPY_DATETIME_NAT; +#else + ret->obval = 0; +#endif + } + else if (convert_pyobject_to_@name@(&ret->obmeta, obj, + NPY_SAME_KIND_CASTING, &ret->obval) < 0) { + Py_DECREF(ret); + return NULL; + } + + return (PyObject *)ret; +} +/**end repeat**/ + +/* bool->tp_new only returns Py_True or Py_False */ +static PyObject * +bool_arrtype_new(PyTypeObject *NPY_UNUSED(type), PyObject *args, PyObject *NPY_UNUSED(kwds)) +{ + PyObject *obj = NULL; + PyArrayObject *arr; + + if (!PyArg_ParseTuple(args, "|O:bool_", &obj)) { + return NULL; + } + if (obj == NULL) { + PyArrayScalar_RETURN_FALSE; + } + if (obj == Py_False) { + PyArrayScalar_RETURN_FALSE; + } + if (obj == Py_True) { + PyArrayScalar_RETURN_TRUE; + } + arr = (PyArrayObject *)PyArray_FROM_OTF(obj, + NPY_BOOL, NPY_ARRAY_FORCECAST); + if (arr && 0 == PyArray_NDIM(arr)) { + npy_bool val = *((npy_bool *)PyArray_DATA(arr)); + Py_DECREF(arr); + PyArrayScalar_RETURN_BOOL_FROM_LONG(val); + } + return PyArray_Return((PyArrayObject *)arr); +} + +static PyObject * +bool_arrtype_and(PyObject *a, PyObject *b) +{ + if (PyArray_IsScalar(a, Bool) && PyArray_IsScalar(b, Bool)) { + PyArrayScalar_RETURN_BOOL_FROM_LONG + ((a == PyArrayScalar_True) & (b == PyArrayScalar_True)); + } + return PyGenericArrType_Type.tp_as_number->nb_and(a, b); +} + +static PyObject * +bool_arrtype_or(PyObject *a, PyObject *b) +{ + if (PyArray_IsScalar(a, Bool) && PyArray_IsScalar(b, Bool)) { + PyArrayScalar_RETURN_BOOL_FROM_LONG + ((a == PyArrayScalar_True)|(b == PyArrayScalar_True)); + } + return PyGenericArrType_Type.tp_as_number->nb_or(a, b); +} + +static PyObject * +bool_arrtype_xor(PyObject *a, PyObject *b) +{ + if (PyArray_IsScalar(a, Bool) && PyArray_IsScalar(b, Bool)) { + PyArrayScalar_RETURN_BOOL_FROM_LONG + ((a == PyArrayScalar_True)^(b == PyArrayScalar_True)); + } + return PyGenericArrType_Type.tp_as_number->nb_xor(a, b); +} + +static int +bool_arrtype_nonzero(PyObject *a) +{ + return a == PyArrayScalar_True; +} + +/**begin repeat + * #name = byte, short, int, long, ubyte, ushort, longlong, uint, + * ulong, ulonglong# + * #Name = Byte, Short, Int, Long, UByte, UShort, LongLong, UInt, + * ULong, ULongLong# + * #type = PyInt_FromLong*6, PyLong_FromLongLong*1, + * PyLong_FromUnsignedLong*2, PyLong_FromUnsignedLongLong# + */ +static PyNumberMethods @name@_arrtype_as_number; +static PyObject * +@name@_index(PyObject *self) +{ + return @type@(PyArrayScalar_VAL(self, @Name@)); +} +/**end repeat**/ + +/**begin repeat + * #name = half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #NAME = Half, Float, Double, LongDouble, + * CFloat, CDouble, CLongDouble# + */ +static PyNumberMethods @name@_arrtype_as_number; +/**end repeat**/ + +static PyObject * +bool_index(PyObject *a) +{ + if (DEPRECATE( + "In future, it will be an error for 'np.bool_' scalars to be " + "interpreted as an index") < 0) { + return NULL; + } + else { + return PyInt_FromLong(PyArrayScalar_VAL(a, Bool)); + } +} + +/* Arithmetic methods -- only so we can override &, |, ^. */ +NPY_NO_EXPORT PyNumberMethods bool_arrtype_as_number = { + 0, /* nb_add */ + 0, /* nb_subtract */ + 0, /* nb_multiply */ +#if defined(NPY_PY3K) +#else + 0, /* nb_divide */ +#endif + 0, /* nb_remainder */ + 0, /* nb_divmod */ + 0, /* nb_power */ + 0, /* nb_negative */ + 0, /* nb_positive */ + 0, /* nb_absolute */ + (inquiry)bool_arrtype_nonzero, /* nb_nonzero / nb_bool */ + 0, /* nb_invert */ + 0, /* nb_lshift */ + 0, /* nb_rshift */ + (binaryfunc)bool_arrtype_and, /* nb_and */ + (binaryfunc)bool_arrtype_xor, /* nb_xor */ + (binaryfunc)bool_arrtype_or, /* nb_or */ +#if defined(NPY_PY3K) +#else + 0, /* nb_coerce */ +#endif + 0, /* nb_int */ +#if defined(NPY_PY3K) + 0, /* nb_reserved */ +#else + 0, /* nb_long */ +#endif + 0, /* nb_float */ +#if defined(NPY_PY3K) +#else + 0, /* nb_oct */ + 0, /* nb_hex */ +#endif + /* Added in release 2.0 */ + 0, /* nb_inplace_add */ + 0, /* nb_inplace_subtract */ + 0, /* nb_inplace_multiply */ +#if defined(NPY_PY3K) +#else + 0, /* nb_inplace_divide */ +#endif + 0, /* nb_inplace_remainder */ + 0, /* nb_inplace_power */ + 0, /* nb_inplace_lshift */ + 0, /* nb_inplace_rshift */ + 0, /* nb_inplace_and */ + 0, /* nb_inplace_xor */ + 0, /* nb_inplace_or */ + /* Added in release 2.2 */ + /* The following require the Py_TPFLAGS_HAVE_CLASS flag */ + 0, /* nb_floor_divide */ + 0, /* nb_true_divide */ + 0, /* nb_inplace_floor_divide */ + 0, /* nb_inplace_true_divide */ + /* Added in release 2.5 */ + 0, /* nb_index */ +}; + +static PyObject * +void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *NPY_UNUSED(kwds)) +{ + PyObject *obj, *arr; + PyObject *new = NULL; + + if (!PyArg_ParseTuple(args, "O:void", &obj)) { + return NULL; + } + /* + * For a VOID scalar first see if obj is an integer or long + * and create new memory of that size (filled with 0) for the scalar + */ + if (PyLong_Check(obj) || PyInt_Check(obj) || + PyArray_IsScalar(obj, Integer) || + (PyArray_Check(obj) && + PyArray_NDIM((PyArrayObject *)obj)==0 && + PyArray_ISINTEGER((PyArrayObject *)obj))) { +#if defined(NPY_PY3K) + new = Py_TYPE(obj)->tp_as_number->nb_int(obj); +#else + new = Py_TYPE(obj)->tp_as_number->nb_long(obj); +#endif + } + if (new && PyLong_Check(new)) { + PyObject *ret; + char *destptr; + npy_ulonglong memu = PyLong_AsUnsignedLongLong(new); + Py_DECREF(new); + if (PyErr_Occurred() || (memu > NPY_MAX_INT)) { + PyErr_Clear(); + PyErr_Format(PyExc_OverflowError, + "size must be non-negative and not greater than %d", + (int) NPY_MAX_INT); + return NULL; + } + destptr = npy_alloc_cache_zero(memu); + if (destptr == NULL) { + return PyErr_NoMemory(); + } + ret = type->tp_alloc(type, 0); + if (ret == NULL) { + npy_free_cache(destptr, memu); + return PyErr_NoMemory(); + } + ((PyVoidScalarObject *)ret)->obval = destptr; + Py_SIZE((PyVoidScalarObject *)ret) = (int) memu; + ((PyVoidScalarObject *)ret)->descr = + PyArray_DescrNewFromType(NPY_VOID); + ((PyVoidScalarObject *)ret)->descr->elsize = (int) memu; + ((PyVoidScalarObject *)ret)->flags = NPY_ARRAY_BEHAVED | + NPY_ARRAY_OWNDATA; + ((PyVoidScalarObject *)ret)->base = NULL; + return ret; + } + + arr = PyArray_FROM_OTF(obj, NPY_VOID, NPY_ARRAY_FORCECAST); + return PyArray_Return((PyArrayObject *)arr); +} + + +/**************** Define Hash functions ********************/ + +/**begin repeat + * #lname = bool, ubyte, ushort# + * #name = Bool,UByte, UShort# + */ +static npy_hash_t +@lname@_arrtype_hash(PyObject *obj) +{ + return (npy_hash_t)(((Py@name@ScalarObject *)obj)->obval); +} +/**end repeat**/ + +/**begin repeat + * #lname = byte, short, uint# + * #name = Byte, Short, UInt# + */ +static npy_hash_t +@lname@_arrtype_hash(PyObject *obj) +{ + npy_hash_t x = (npy_hash_t)(((Py@name@ScalarObject *)obj)->obval); + if (x == -1) { + x = -2; + } + return x; +} +/**end repeat**/ + +static npy_hash_t +ulong_arrtype_hash(PyObject *obj) +{ + PyObject * l = PyLong_FromUnsignedLong(((PyULongScalarObject*)obj)->obval); + npy_hash_t x = PyObject_Hash(l); + Py_DECREF(l); + return x; +} + +#if (NPY_SIZEOF_INT != NPY_SIZEOF_LONG) || defined(NPY_PY3K) +static npy_hash_t +int_arrtype_hash(PyObject *obj) +{ + npy_hash_t x = (npy_hash_t)(((PyIntScalarObject *)obj)->obval); + if (x == -1) { + x = -2; + } + return x; +} +#endif + +#if defined(NPY_PY3K) +static npy_hash_t +long_arrtype_hash(PyObject *obj) +{ + PyObject * l = PyLong_FromLong(((PyLongScalarObject*)obj)->obval); + npy_hash_t x = PyObject_Hash(l); + Py_DECREF(l); + return x; +} +#endif + +/**begin repeat + * #char = ,u# + * #Char = ,U# + * #Word = ,Unsigned# + */ +static NPY_INLINE npy_hash_t +@char@longlong_arrtype_hash(PyObject *obj) +{ + PyObject * l = PyLong_From@Word@LongLong( + ((Py@Char@LongLongScalarObject*)obj)->obval); + npy_hash_t x = PyObject_Hash(l); + Py_DECREF(l); + return x; +} +/**end repeat**/ + + +/**begin repeat + * #lname = datetime, timedelta# + * #name = Datetime, Timedelta# + */ +#if NPY_SIZEOF_HASH_T==NPY_SIZEOF_DATETIME +static npy_hash_t +@lname@_arrtype_hash(PyObject *obj) +{ + npy_hash_t x = (npy_hash_t)(((Py@name@ScalarObject *)obj)->obval); + if (x == -1) { + x = -2; + } + return x; +} +#elif NPY_SIZEOF_LONGLONG==NPY_SIZEOF_DATETIME +static npy_hash_t +@lname@_arrtype_hash(PyObject *obj) +{ + npy_hash_t y; + npy_longlong x = (((Py@name@ScalarObject *)obj)->obval); + + if ((x <= LONG_MAX)) { + y = (npy_hash_t) x; + } + else { + union Mask { + long hashvals[2]; + npy_longlong v; + } both; + + both.v = x; + y = both.hashvals[0] + (1000003)*both.hashvals[1]; + } + if (y == -1) { + y = -2; + } + return y; +} +#endif +/**end repeat**/ + + + +/* Wrong thing to do for longdouble, but....*/ + +/**begin repeat + * #lname = float, longdouble# + * #name = Float, LongDouble# + */ +static npy_hash_t +@lname@_arrtype_hash(PyObject *obj) +{ + return _Py_HashDouble((double) ((Py@name@ScalarObject *)obj)->obval); +} + +/* borrowed from complex_hash */ +static npy_hash_t +c@lname@_arrtype_hash(PyObject *obj) +{ + npy_hash_t hashreal, hashimag, combined; + hashreal = _Py_HashDouble((double) + (((PyC@name@ScalarObject *)obj)->obval).real); + + if (hashreal == -1) { + return -1; + } + hashimag = _Py_HashDouble((double) + (((PyC@name@ScalarObject *)obj)->obval).imag); + if (hashimag == -1) { + return -1; + } + combined = hashreal + 1000003 * hashimag; + if (combined == -1) { + combined = -2; + } + return combined; +} +/**end repeat**/ + +static npy_hash_t +half_arrtype_hash(PyObject *obj) +{ + return _Py_HashDouble(npy_half_to_double(((PyHalfScalarObject *)obj)->obval)); +} + +static npy_hash_t +object_arrtype_hash(PyObject *obj) +{ + return PyObject_Hash(((PyObjectScalarObject *)obj)->obval); +} + +/* we used to just hash the pointer */ +/* now use tuplehash algorithm using voidtype_item to get the object +*/ +static npy_hash_t +void_arrtype_hash(PyObject *obj) +{ + npy_hash_t x, y; + Py_ssize_t len, n; + PyVoidScalarObject *p; + PyObject *element; + npy_hash_t mult = 1000003L; + x = 0x345678L; + p = (PyVoidScalarObject *)obj; + /* Cannot hash mutable void scalars */ + if (p->flags & NPY_ARRAY_WRITEABLE) { + PyErr_SetString(PyExc_TypeError, "unhashable type: 'writeable void-scalar'"); + return -1; + } + len = voidtype_length(p); + for (n=0; n < len; n++) { + element = voidtype_item(p, n); + y = PyObject_Hash(element); + Py_DECREF(element); + if (y == -1) + return -1; + x = (x ^ y) * mult; + mult += (npy_hash_t)(82520L + len + len); + } + x += 97531L; + if (x == -1) + x = -2; + return x; +} + +/*object arrtype getattro and setattro */ +static PyObject * +object_arrtype_getattro(PyObjectScalarObject *obj, PyObject *attr) { + PyObject *res; + + /* first look in object and then hand off to generic type */ + + res = PyObject_GenericGetAttr(obj->obval, attr); + if (res) { + return res; + } + PyErr_Clear(); + return PyObject_GenericGetAttr((PyObject *)obj, attr); +} + +static int +object_arrtype_setattro(PyObjectScalarObject *obj, PyObject *attr, PyObject *val) { + int res; + /* first look in object and then hand off to generic type */ + + res = PyObject_GenericSetAttr(obj->obval, attr, val); + if (res >= 0) { + return res; + } + PyErr_Clear(); + return PyObject_GenericSetAttr((PyObject *)obj, attr, val); +} + +static PyObject * +object_arrtype_concat(PyObjectScalarObject *self, PyObject *other) +{ + return PySequence_Concat(self->obval, other); +} + +static Py_ssize_t +object_arrtype_length(PyObjectScalarObject *self) +{ + return PyObject_Length(self->obval); +} + +static PyObject * +object_arrtype_repeat(PyObjectScalarObject *self, Py_ssize_t count) +{ + return PySequence_Repeat(self->obval, count); +} + +static PyObject * +object_arrtype_subscript(PyObjectScalarObject *self, PyObject *key) +{ + return PyObject_GetItem(self->obval, key); +} + +static int +object_arrtype_ass_subscript(PyObjectScalarObject *self, PyObject *key, + PyObject *value) +{ + return PyObject_SetItem(self->obval, key, value); +} + +static int +object_arrtype_contains(PyObjectScalarObject *self, PyObject *ob) +{ + return PySequence_Contains(self->obval, ob); +} + +static PyObject * +object_arrtype_inplace_concat(PyObjectScalarObject *self, PyObject *o) +{ + return PySequence_InPlaceConcat(self->obval, o); +} + +static PyObject * +object_arrtype_inplace_repeat(PyObjectScalarObject *self, Py_ssize_t count) +{ + return PySequence_InPlaceRepeat(self->obval, count); +} + +static PySequenceMethods object_arrtype_as_sequence = { + (lenfunc)object_arrtype_length, /*sq_length*/ + (binaryfunc)object_arrtype_concat, /*sq_concat*/ + (ssizeargfunc)object_arrtype_repeat, /*sq_repeat*/ + 0, /*sq_item*/ + 0, /*sq_slice*/ + 0, /* sq_ass_item */ + 0, /* sq_ass_slice */ + (objobjproc)object_arrtype_contains, /* sq_contains */ + (binaryfunc)object_arrtype_inplace_concat, /* sq_inplace_concat */ + (ssizeargfunc)object_arrtype_inplace_repeat, /* sq_inplace_repeat */ +}; + +static PyMappingMethods object_arrtype_as_mapping = { + (lenfunc)object_arrtype_length, + (binaryfunc)object_arrtype_subscript, + (objobjargproc)object_arrtype_ass_subscript, +}; + +#if !defined(NPY_PY3K) +static Py_ssize_t +object_arrtype_getsegcount(PyObjectScalarObject *self, Py_ssize_t *lenp) +{ + Py_ssize_t newlen; + int cnt; + PyBufferProcs *pb = Py_TYPE(self->obval)->tp_as_buffer; + + if (pb == NULL || + pb->bf_getsegcount == NULL || + (cnt = (*pb->bf_getsegcount)(self->obval, &newlen)) != 1) { + return 0; + } + if (lenp) { + *lenp = newlen; + } + return cnt; +} + +static Py_ssize_t +object_arrtype_getreadbuf(PyObjectScalarObject *self, Py_ssize_t segment, void **ptrptr) +{ + PyBufferProcs *pb = Py_TYPE(self->obval)->tp_as_buffer; + + if (pb == NULL || + pb->bf_getreadbuffer == NULL || + pb->bf_getsegcount == NULL) { + PyErr_SetString(PyExc_TypeError, + "expected a readable buffer object"); + return -1; + } + return (*pb->bf_getreadbuffer)(self->obval, segment, ptrptr); +} + +static Py_ssize_t +object_arrtype_getwritebuf(PyObjectScalarObject *self, Py_ssize_t segment, void **ptrptr) +{ + PyBufferProcs *pb = Py_TYPE(self->obval)->tp_as_buffer; + + if (pb == NULL || + pb->bf_getwritebuffer == NULL || + pb->bf_getsegcount == NULL) { + PyErr_SetString(PyExc_TypeError, + "expected a writeable buffer object"); + return -1; + } + return (*pb->bf_getwritebuffer)(self->obval, segment, ptrptr); +} + +static Py_ssize_t +object_arrtype_getcharbuf(PyObjectScalarObject *self, Py_ssize_t segment, + constchar **ptrptr) +{ + PyBufferProcs *pb = Py_TYPE(self->obval)->tp_as_buffer; + + if (pb == NULL || + pb->bf_getcharbuffer == NULL || + pb->bf_getsegcount == NULL) { + PyErr_SetString(PyExc_TypeError, + "expected a character buffer object"); + return -1; + } + return (*pb->bf_getcharbuffer)(self->obval, segment, ptrptr); +} +#endif + +static int +object_arrtype_getbuffer(PyObjectScalarObject *self, Py_buffer *view, int flags) +{ + PyBufferProcs *pb = Py_TYPE(self->obval)->tp_as_buffer; + if (pb == NULL || pb->bf_getbuffer == NULL) { + PyErr_SetString(PyExc_TypeError, + "expected a readable buffer object"); + return -1; + } + return (*pb->bf_getbuffer)(self->obval, view, flags); +} + +static void +object_arrtype_releasebuffer(PyObjectScalarObject *self, Py_buffer *view) +{ + PyBufferProcs *pb = Py_TYPE(self->obval)->tp_as_buffer; + if (pb == NULL) { + PyErr_SetString(PyExc_TypeError, + "expected a readable buffer object"); + return; + } + if (pb->bf_releasebuffer != NULL) { + (*pb->bf_releasebuffer)(self->obval, view); + } +} + +static PyBufferProcs object_arrtype_as_buffer = { +#if !defined(NPY_PY3K) + (readbufferproc)object_arrtype_getreadbuf, + (writebufferproc)object_arrtype_getwritebuf, + (segcountproc)object_arrtype_getsegcount, + (charbufferproc)object_arrtype_getcharbuf, +#endif + (getbufferproc)object_arrtype_getbuffer, + (releasebufferproc)object_arrtype_releasebuffer, +}; + +static PyObject * +object_arrtype_call(PyObjectScalarObject *obj, PyObject *args, PyObject *kwds) +{ + return PyObject_Call(obj->obval, args, kwds); +} + +NPY_NO_EXPORT PyTypeObject PyObjectArrType_Type = { +#if defined(NPY_PY3K) + PyVarObject_HEAD_INIT(NULL, 0) +#else + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ +#endif + "numpy.object_", /* tp_name*/ + sizeof(PyObjectScalarObject), /* tp_basicsize*/ + 0, /* tp_itemsize */ + (destructor)object_arrtype_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ +#if defined(NPY_PY3K) + 0, /* tp_reserved */ +#else + 0, /* tp_compare */ +#endif + 0, /* tp_repr */ + 0, /* tp_as_number */ + &object_arrtype_as_sequence, /* tp_as_sequence */ + &object_arrtype_as_mapping, /* tp_as_mapping */ + 0, /* tp_hash */ + (ternaryfunc)object_arrtype_call, /* tp_call */ + 0, /* tp_str */ + (getattrofunc)object_arrtype_getattro, /* tp_getattro */ + (setattrofunc)object_arrtype_setattro, /* tp_setattro */ + &object_arrtype_as_buffer, /* tp_as_buffer */ + 0, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ +}; + +static PyObject * +gen_arrtype_subscript(PyObject *self, PyObject *key) +{ + /* + * Only [...], [...,<???>], [<???>, ...], + * is allowed for indexing a scalar + * + * These return a new N-d array with a copy of + * the data where N is the number of None's in <???>. + */ + PyObject *res, *ret; + + res = PyArray_FromScalar(self, NULL); + + ret = array_subscript((PyArrayObject *)res, key); + Py_DECREF(res); + if (ret == NULL) { + PyErr_SetString(PyExc_IndexError, + "invalid index to scalar variable."); + } + return ret; +} + + +#define NAME_bool "bool" +#define NAME_void "void" +#if defined(NPY_PY3K) +#define NAME_string "bytes" +#define NAME_unicode "str" +#else +#define NAME_string "string" +#define NAME_unicode "unicode" +#endif + +/**begin repeat + * #name = bool, string, unicode, void# + * #NAME = Bool, String, Unicode, Void# + * #ex = _,_,_,# + */ +NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = { +#if defined(NPY_PY3K) + PyVarObject_HEAD_INIT(NULL, 0) +#else + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ +#endif + "numpy." NAME_@name@ "@ex@", /* tp_name*/ + sizeof(Py@NAME@ScalarObject), /* tp_basicsize*/ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ +#if defined(NPY_PY3K) + 0, /* tp_reserved */ +#else + 0, /* tp_compare */ +#endif + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + 0, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ +}; +/**end repeat**/ + +#undef NAME_bool +#undef NAME_void +#undef NAME_string +#undef NAME_unicode + +/**begin repeat + * #NAME = Byte, Short, Int, Long, LongLong, UByte, UShort, UInt, ULong, + * ULongLong, Half, Float, Double, LongDouble, Datetime, Timedelta# + * #name = int*5, uint*5, float*4, datetime, timedelta# + * #CNAME = (CHAR, SHORT, INT, LONG, LONGLONG)*2, HALF, FLOAT, DOUBLE, + * LONGDOUBLE, DATETIME, TIMEDELTA# + */ +#if NPY_BITSOF_@CNAME@ == 8 +#define _THIS_SIZE "8" +#elif NPY_BITSOF_@CNAME@ == 16 +#define _THIS_SIZE "16" +#elif NPY_BITSOF_@CNAME@ == 32 +#define _THIS_SIZE "32" +#elif NPY_BITSOF_@CNAME@ == 64 +#define _THIS_SIZE "64" +#elif NPY_BITSOF_@CNAME@ == 80 +#define _THIS_SIZE "80" +#elif NPY_BITSOF_@CNAME@ == 96 +#define _THIS_SIZE "96" +#elif NPY_BITSOF_@CNAME@ == 128 +#define _THIS_SIZE "128" +#elif NPY_BITSOF_@CNAME@ == 256 +#define _THIS_SIZE "256" +#endif +NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = { +#if defined(NPY_PY3K) + PyVarObject_HEAD_INIT(NULL, 0) +#else + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ +#endif + "numpy.@name@" _THIS_SIZE, /* tp_name*/ + sizeof(Py@NAME@ScalarObject), /* tp_basicsize*/ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ +#if defined(NPY_PY3K) + 0, /* tp_reserved */ +#else + 0, /* tp_compare */ +#endif + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + 0, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ +}; + +#undef _THIS_SIZE +/**end repeat**/ + + +static PyMappingMethods gentype_as_mapping = { + NULL, + (binaryfunc)gen_arrtype_subscript, + NULL +}; + + +/**begin repeat + * #NAME = CFloat, CDouble, CLongDouble# + * #name = complex*3# + * #CNAME = FLOAT, DOUBLE, LONGDOUBLE# + */ +#if NPY_BITSOF_@CNAME@ == 16 +#define _THIS_SIZE "32" +#elif NPY_BITSOF_@CNAME@ == 32 +#define _THIS_SIZE "64" +#elif NPY_BITSOF_@CNAME@ == 64 +#define _THIS_SIZE "128" +#elif NPY_BITSOF_@CNAME@ == 80 +#define _THIS_SIZE "160" +#elif NPY_BITSOF_@CNAME@ == 96 +#define _THIS_SIZE "192" +#elif NPY_BITSOF_@CNAME@ == 128 +#define _THIS_SIZE "256" +#elif NPY_BITSOF_@CNAME@ == 256 +#define _THIS_SIZE "512" +#endif + +NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = { +#if defined(NPY_PY3K) + PyVarObject_HEAD_INIT(0, 0) +#else + PyObject_HEAD_INIT(0) + 0, /* ob_size */ +#endif + "numpy.@name@" _THIS_SIZE, /* tp_name*/ + sizeof(Py@NAME@ScalarObject), /* tp_basicsize*/ + 0, /* tp_itemsize*/ + 0, /* tp_dealloc*/ + 0, /* tp_print*/ + 0, /* tp_getattr*/ + 0, /* tp_setattr*/ +#if defined(NPY_PY3K) + 0, /* tp_reserved */ +#else + 0, /* tp_compare */ +#endif + 0, /* tp_repr*/ + 0, /* tp_as_number*/ + 0, /* tp_as_sequence*/ + 0, /* tp_as_mapping*/ + 0, /* tp_hash */ + 0, /* tp_call*/ + 0, /* tp_str*/ + 0, /* tp_getattro*/ + 0, /* tp_setattro*/ + 0, /* tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /* tp_flags*/ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ +}; +#undef _THIS_SIZE + +/**end repeat**/ + +/* + * This table maps the built-in type numbers to their scalar + * type numbers. Note that signed integers are mapped to INTNEG_SCALAR, + * which is different than what PyArray_ScalarKind returns. + */ +NPY_NO_EXPORT signed char +_npy_scalar_kinds_table[NPY_NTYPES]; + +/* + * This table maps a scalar kind (excluding NPY_NOSCALAR) + * to the smallest type number of that kind. + */ +NPY_NO_EXPORT signed char +_npy_smallest_type_of_kind_table[NPY_NSCALARKINDS]; + +/* + * This table gives the type of the same kind, but next in the sequence + * of sizes. + */ +NPY_NO_EXPORT signed char +_npy_next_larger_type_table[NPY_NTYPES]; + +/* + * This table describes safe casting for small type numbers, + * and is used by PyArray_CanCastSafely. + */ +NPY_NO_EXPORT unsigned char +_npy_can_cast_safely_table[NPY_NTYPES][NPY_NTYPES]; + +/* + * This table gives the smallest-size and smallest-kind type to which + * the input types may be safely cast, according to _npy_can_cast_safely. + */ +NPY_NO_EXPORT signed char +_npy_type_promotion_table[NPY_NTYPES][NPY_NTYPES]; + +NPY_NO_EXPORT void +initialize_casting_tables(void) +{ + int i, j; + + _npy_smallest_type_of_kind_table[NPY_BOOL_SCALAR] = NPY_BOOL; + _npy_smallest_type_of_kind_table[NPY_INTPOS_SCALAR] = NPY_UBYTE; + _npy_smallest_type_of_kind_table[NPY_INTNEG_SCALAR] = NPY_BYTE; + _npy_smallest_type_of_kind_table[NPY_FLOAT_SCALAR] = NPY_HALF; + _npy_smallest_type_of_kind_table[NPY_COMPLEX_SCALAR] = NPY_CFLOAT; + _npy_smallest_type_of_kind_table[NPY_OBJECT_SCALAR] = NPY_OBJECT; + + /* Default for built-in types is object scalar */ + memset(_npy_scalar_kinds_table, NPY_OBJECT_SCALAR, + sizeof(_npy_scalar_kinds_table)); + /* Default for next largest type is -1, signalling no bigger */ + memset(_npy_next_larger_type_table, -1, + sizeof(_npy_next_larger_type_table)); + + /* Compile-time loop of scalar kinds */ + + /**begin repeat + * #NAME = BOOL, + * BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #BIGGERTYPE = -1, + * NPY_SHORT, NPY_USHORT, NPY_INT, NPY_UINT, NPY_LONG, NPY_ULONG, + * NPY_LONGLONG, NPY_ULONGLONG, -1, -1, + * NPY_FLOAT, NPY_DOUBLE, NPY_LONGDOUBLE, -1, + * NPY_CDOUBLE, NPY_CLONGDOUBLE, -1# + * #SCKIND = BOOL, + * (INTNEG, INTPOS)*5, + * FLOAT*4, + * COMPLEX*3# + */ + + _npy_scalar_kinds_table[NPY_@NAME@] = NPY_@SCKIND@_SCALAR; + _npy_next_larger_type_table[NPY_@NAME@] = @BIGGERTYPE@; + + /**end repeat**/ + + memset(_npy_can_cast_safely_table, 0, sizeof(_npy_can_cast_safely_table)); + + for (i = 0; i < NPY_NTYPES; ++i) { + /* Identity */ + _npy_can_cast_safely_table[i][i] = 1; + if (i != NPY_DATETIME) { + /* + * Bool -> <Anything> except datetime (since + * it conceptually has no zero) + */ + _npy_can_cast_safely_table[NPY_BOOL][i] = 1; + } + /* <Anything> -> Object */ + _npy_can_cast_safely_table[i][NPY_OBJECT] = 1; + /* <Anything> -> Void */ + _npy_can_cast_safely_table[i][NPY_VOID] = 1; + } + + _npy_can_cast_safely_table[NPY_STRING][NPY_UNICODE] = 1; + _npy_can_cast_safely_table[NPY_BOOL][NPY_TIMEDELTA] = 1; + +#ifndef NPY_SIZEOF_BYTE +#define NPY_SIZEOF_BYTE 1 +#endif + + /* Compile-time loop of casting rules */ + + /**begin repeat + * #FROM_NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #FROM_BASENAME = BYTE, BYTE, SHORT, SHORT, INT, INT, + * LONG, LONG, LONGLONG, LONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * FLOAT, DOUBLE, LONGDOUBLE# + * #from_isint = 1, 0, 1, 0, 1, 0, 1, 0, + * 1, 0, 0, 0, 0, 0, + * 0, 0, 0# + * #from_isuint = 0, 1, 0, 1, 0, 1, 0, 1, + * 0, 1, 0, 0, 0, 0, + * 0, 0, 0# + * #from_isfloat = 0, 0, 0, 0, 0, 0, 0, 0, + * 0, 0, 1, 1, 1, 1, + * 0, 0, 0# + * #from_iscomplex = 0, 0, 0, 0, 0, 0, 0, 0, + * 0, 0, 0, 0, 0, 0, + * 1, 1, 1# + */ + +#define _FROM_BSIZE NPY_SIZEOF_@FROM_BASENAME@ +#define _FROM_NUM (NPY_@FROM_NAME@) + + _npy_can_cast_safely_table[_FROM_NUM][NPY_STRING] = 1; + _npy_can_cast_safely_table[_FROM_NUM][NPY_UNICODE] = 1; + + /* Allow casts from any integer to the TIMEDELTA type */ +#if @from_isint@ || @from_isuint@ + _npy_can_cast_safely_table[_FROM_NUM][NPY_TIMEDELTA] = 1; +#endif + + /**begin repeat1 + * #TO_NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #TO_BASENAME = BYTE, BYTE, SHORT, SHORT, INT, INT, + * LONG, LONG, LONGLONG, LONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * FLOAT, DOUBLE, LONGDOUBLE# + * #to_isint = 1, 0, 1, 0, 1, 0, 1, 0, + * 1, 0, 0, 0, 0, 0, + * 0, 0, 0# + * #to_isuint = 0, 1, 0, 1, 0, 1, 0, 1, + * 0, 1, 0, 0, 0, 0, + * 0, 0, 0# + * #to_isfloat = 0, 0, 0, 0, 0, 0, 0, 0, + * 0, 0, 1, 1, 1, 1, + * 0, 0, 0# + * #to_iscomplex = 0, 0, 0, 0, 0, 0, 0, 0, + * 0, 0, 0, 0, 0, 0, + * 1, 1, 1# + */ +#define _TO_BSIZE NPY_SIZEOF_@TO_BASENAME@ +#define _TO_NUM (NPY_@TO_NAME@) + + /* + * NOTE: _FROM_BSIZE and _TO_BSIZE are the sizes of the "base type" + * which is the same as the size of the type except for + * complex, where it is the size of the real type. + */ + +#if @from_isint@ + +# if @to_isint@ && (_TO_BSIZE >= _FROM_BSIZE) + /* int -> int */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# elif @to_isfloat@ && (_FROM_BSIZE < 8) && (_TO_BSIZE > _FROM_BSIZE) + /* int -> float */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# elif @to_isfloat@ && (_FROM_BSIZE >= 8) && (_TO_BSIZE >= _FROM_BSIZE) + /* int -> float */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# elif @to_iscomplex@ && (_FROM_BSIZE < 8) && (_TO_BSIZE > _FROM_BSIZE) + /* int -> complex */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# elif @to_iscomplex@ && (_FROM_BSIZE >= 8) && (_TO_BSIZE >= _FROM_BSIZE) + /* int -> complex */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# endif + +#elif @from_isuint@ + +# if @to_isint@ && (_TO_BSIZE > _FROM_BSIZE) + /* uint -> int */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# elif @to_isuint@ && (_TO_BSIZE >= _FROM_BSIZE) + /* uint -> uint */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# elif @to_isfloat@ && (_FROM_BSIZE < 8) && (_TO_BSIZE > _FROM_BSIZE) + /* uint -> float */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# elif @to_isfloat@ && (_FROM_BSIZE >= 8) && (_TO_BSIZE >= _FROM_BSIZE) + /* uint -> float */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# elif @to_iscomplex@ && (_FROM_BSIZE < 8) && (_TO_BSIZE > _FROM_BSIZE) + /* uint -> complex */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# elif @to_iscomplex@ && (_FROM_BSIZE >= 8) && (_TO_BSIZE >= _FROM_BSIZE) + /* uint -> complex */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# endif + + +#elif @from_isfloat@ + +# if @to_isfloat@ && (_TO_BSIZE >= _FROM_BSIZE) + /* float -> float */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# elif @to_iscomplex@ && (_TO_BSIZE >= _FROM_BSIZE) + /* float -> complex */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# endif + +#elif @from_iscomplex@ + +# if @to_iscomplex@ && (_TO_BSIZE >= _FROM_BSIZE) + /* complex -> complex */ + _npy_can_cast_safely_table[_FROM_NUM][_TO_NUM] = 1; +# endif + +#endif + +#undef _TO_NUM +#undef _TO_BSIZE + +/**end repeat1**/ + +#undef _FROM_NUM +#undef _FROM_BSIZE + +/**end repeat**/ + + /* + * Now that the _can_cast_safely table is finished, we can + * use it to build the _type_promotion table + */ + for (i = 0; i < NPY_NTYPES; ++i) { + _npy_type_promotion_table[i][i] = i; + /* Don't let number promote to string/unicode/void/datetime/timedelta */ + if (i == NPY_STRING || i == NPY_UNICODE || i == NPY_VOID || + i == NPY_DATETIME || i == NPY_TIMEDELTA) { + /* Promoting these types requires examining their contents */ + _npy_type_promotion_table[i][i] = -1; + for (j = i + 1; j < NPY_NTYPES; ++j) { + _npy_type_promotion_table[i][j] = -1; + _npy_type_promotion_table[j][i] = -1; + } + /* Except they can convert to OBJECT */ + _npy_type_promotion_table[i][NPY_OBJECT] = NPY_OBJECT; + _npy_type_promotion_table[NPY_OBJECT][i] = NPY_OBJECT; + } + else { + for (j = i + 1; j < NPY_NTYPES; ++j) { + /* Don't let number promote to string/unicode/void */ + if (j == NPY_STRING || j == NPY_UNICODE || j == NPY_VOID) { + _npy_type_promotion_table[i][j] = -1; + _npy_type_promotion_table[j][i] = -1; + } + else if (_npy_can_cast_safely_table[i][j]) { + _npy_type_promotion_table[i][j] = j; + _npy_type_promotion_table[j][i] = j; + } + else if (_npy_can_cast_safely_table[j][i]) { + _npy_type_promotion_table[i][j] = i; + _npy_type_promotion_table[j][i] = i; + } + else { + int k, iskind, jskind, skind; + iskind = _npy_scalar_kinds_table[i]; + jskind = _npy_scalar_kinds_table[j]; + /* If there's no kind (void/string/etc) */ + if (iskind == NPY_NOSCALAR || jskind == NPY_NOSCALAR) { + k = -1; + } + else { + /* Start with the type of larger kind */ + if (iskind > jskind) { + skind = iskind; + k = i; + } + else { + skind = jskind; + k = j; + } + for (;;) { + /* Try the next larger type of this kind */ + k = _npy_next_larger_type_table[k]; + + /* If there is no larger, try a larger kind */ + if (k < 0) { + ++skind; + /* Use -1 to signal no promoted type found */ + if (skind < NPY_NSCALARKINDS) { + k = _npy_smallest_type_of_kind_table[skind]; + } + else { + k = -1; + break; + } + } + + if (_npy_can_cast_safely_table[i][k] && + _npy_can_cast_safely_table[j][k]) { + break; + } + } + } + _npy_type_promotion_table[i][j] = k; + _npy_type_promotion_table[j][i] = k; + } + } + } + } +} + +#ifndef NPY_PY3K +/* + * In python2, the `float` and `complex` types still implement the obsolete + * "tp_print" method, which uses CPython's float-printing routines to print the + * float. Numpy's float_/cfloat inherit from Python float/complex, but + * override its tp_repr and tp_str methods. In order to avoid an inconsistency + * with the inherited tp_print, we need to override it too. + * + * In python3 the tp_print method is reserved/unused. + */ +static int +doubletype_print(PyObject *o, FILE *fp, int flags) +{ + int ret; + PyObject *to_print; + if (flags & Py_PRINT_RAW) { + to_print = PyObject_Str(o); + } + else { + to_print = PyObject_Repr(o); + } + + if (to_print == NULL) { + return -1; + } + + ret = PyObject_Print(to_print, fp, Py_PRINT_RAW); + Py_DECREF(to_print); + return ret; +} +#endif + +static PyNumberMethods longdoubletype_as_number; +static PyNumberMethods clongdoubletype_as_number; +static void init_basetypes(void); + + +NPY_NO_EXPORT void +initialize_numeric_types(void) +{ + init_basetypes(); + PyGenericArrType_Type.tp_dealloc = (destructor)gentype_dealloc; + PyGenericArrType_Type.tp_as_number = &gentype_as_number; + PyGenericArrType_Type.tp_as_buffer = &gentype_as_buffer; + PyGenericArrType_Type.tp_as_mapping = &gentype_as_mapping; + PyGenericArrType_Type.tp_flags = BASEFLAGS; + PyGenericArrType_Type.tp_methods = gentype_methods; + PyGenericArrType_Type.tp_getset = gentype_getsets; + PyGenericArrType_Type.tp_new = NULL; + PyGenericArrType_Type.tp_alloc = gentype_alloc; + PyGenericArrType_Type.tp_free = (freefunc)gentype_free; + PyGenericArrType_Type.tp_richcompare = gentype_richcompare; + + PyBoolArrType_Type.tp_as_number = &bool_arrtype_as_number; + /* + * need to add dummy versions with filled-in nb_index + * in-order for PyType_Ready to fill in .__index__() method + * also fill array_type_as_number struct with reasonable defaults + */ + + /**begin repeat + * #name = byte, short, int, long, longlong, ubyte, ushort, + * uint, ulong, ulonglong# + * #NAME = Byte, Short, Int, Long, LongLong, UByte, UShort, + * UInt, ULong, ULongLong# + */ + @name@_arrtype_as_number = gentype_as_number; + Py@NAME@ArrType_Type.tp_as_number = &@name@_arrtype_as_number; + Py@NAME@ArrType_Type.tp_as_number->nb_index = (unaryfunc)@name@_index; + + /**end repeat**/ + + /**begin repeat + * #name = half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #NAME = Half, Float, Double, LongDouble, + * CFloat, CDouble, CLongDouble# + */ + @name@_arrtype_as_number = gentype_as_number; + Py@NAME@ArrType_Type.tp_as_number = &@name@_arrtype_as_number; + + /**end repeat**/ + +#ifndef NPY_PY3K + PyDoubleArrType_Type.tp_print = &doubletype_print; + PyCDoubleArrType_Type.tp_print = &doubletype_print; +#endif + + + PyBoolArrType_Type.tp_as_number->nb_index = (unaryfunc)bool_index; + + PyStringArrType_Type.tp_alloc = NULL; + PyStringArrType_Type.tp_free = NULL; + + PyStringArrType_Type.tp_repr = stringtype_repr; + PyStringArrType_Type.tp_str = stringtype_str; + + PyUnicodeArrType_Type.tp_repr = unicodetype_repr; + PyUnicodeArrType_Type.tp_str = unicodetype_str; + + PyVoidArrType_Type.tp_methods = voidtype_methods; + PyVoidArrType_Type.tp_getset = voidtype_getsets; + PyVoidArrType_Type.tp_as_mapping = &voidtype_as_mapping; + PyVoidArrType_Type.tp_as_sequence = &voidtype_as_sequence; + PyVoidArrType_Type.tp_repr = voidtype_repr; + PyVoidArrType_Type.tp_str = voidtype_str; + + PyIntegerArrType_Type.tp_getset = inttype_getsets; + + /**begin repeat + * #NAME= Number, Integer, SignedInteger, UnsignedInteger, Inexact, + * Floating, ComplexFloating, Flexible, Character# + */ + + Py@NAME@ArrType_Type.tp_flags = BASEFLAGS; + + /**end repeat**/ + + /**begin repeat + * #name = bool, byte, short, int, long, longlong, ubyte, ushort, uint, + * ulong, ulonglong, half, float, double, longdouble, cfloat, + * cdouble, clongdouble, string, unicode, void, object, datetime, + * timedelta# + * #NAME = Bool, Byte, Short, Int, Long, LongLong, UByte, UShort, UInt, + * ULong, ULongLong, Half, Float, Double, LongDouble, CFloat, + * CDouble, CLongDouble, String, Unicode, Void, Object, Datetime, + * Timedelta# + */ + + Py@NAME@ArrType_Type.tp_flags = BASEFLAGS; + Py@NAME@ArrType_Type.tp_new = @name@_arrtype_new; + Py@NAME@ArrType_Type.tp_richcompare = gentype_richcompare; + + /**end repeat**/ + + /**begin repeat + * #name = bool, byte, short, ubyte, ushort, uint, ulong, ulonglong, + * half, float, longdouble, cfloat, clongdouble, void, object, + * datetime, timedelta# + * #NAME = Bool, Byte, Short, UByte, UShort, UInt, ULong, ULongLong, + * Half, Float, LongDouble, CFloat, CLongDouble, Void, Object, + * Datetime, Timedelta# + */ + + Py@NAME@ArrType_Type.tp_hash = @name@_arrtype_hash; + + /**end repeat**/ + + /**begin repeat + * #name = cfloat, clongdouble# + * #NAME = CFloat, CLongDouble# + */ + + Py@NAME@ArrType_Type.tp_methods = @name@type_methods; + + /**end repeat**/ + +#if (NPY_SIZEOF_INT != NPY_SIZEOF_LONG) || defined(NPY_PY3K) + /* We won't be inheriting from Python Int type. */ + PyIntArrType_Type.tp_hash = int_arrtype_hash; +#endif + +#if defined(NPY_PY3K) + /* We won't be inheriting from Python Int type. */ + PyLongArrType_Type.tp_hash = long_arrtype_hash; +#endif + +#if (NPY_SIZEOF_LONG != NPY_SIZEOF_LONGLONG) || defined(NPY_PY3K) + /* We won't be inheriting from Python Int type. */ + PyLongLongArrType_Type.tp_hash = longlong_arrtype_hash; +#endif + + /**begin repeat + * #name = repr, str# + */ + + PyHalfArrType_Type.tp_@name@ = halftype_@name@; + + PyFloatArrType_Type.tp_@name@ = floattype_@name@; + PyCFloatArrType_Type.tp_@name@ = cfloattype_@name@; + + PyDoubleArrType_Type.tp_@name@ = doubletype_@name@; + PyCDoubleArrType_Type.tp_@name@ = cdoubletype_@name@; + + PyDatetimeArrType_Type.tp_@name@ = datetimetype_@name@; + PyTimedeltaArrType_Type.tp_@name@ = timedeltatype_@name@; + + /**end repeat**/ + + + /**begin repeat + * #Type = Bool, Byte, UByte, Short, UShort, Int, UInt, Long, + * ULong, LongLong, ULongLong# + */ + + /* both str/repr use genint_type_str to avoid trailing "L" of longs */ + Py@Type@ArrType_Type.tp_str = genint_type_str; + Py@Type@ArrType_Type.tp_repr = genint_type_str; + + /**end repeat**/ + + + + /**begin repeat + * #char = ,c# + * #CHAR = ,C# + */ + + /* + * These need to be coded specially because longdouble/clongdouble getitem + * does not return a normal Python type + */ + @char@longdoubletype_as_number.nb_float = @char@longdoubletype_float; +#if defined(NPY_PY3K) + @char@longdoubletype_as_number.nb_int = @char@longdoubletype_long; +#else + @char@longdoubletype_as_number.nb_int = @char@longdoubletype_int; + @char@longdoubletype_as_number.nb_long = @char@longdoubletype_long; + @char@longdoubletype_as_number.nb_hex = @char@longdoubletype_hex; + @char@longdoubletype_as_number.nb_oct = @char@longdoubletype_oct; +#endif + + Py@CHAR@LongDoubleArrType_Type.tp_as_number = &@char@longdoubletype_as_number; + Py@CHAR@LongDoubleArrType_Type.tp_repr = @char@longdoubletype_repr; + Py@CHAR@LongDoubleArrType_Type.tp_str = @char@longdoubletype_str; + + /**end repeat**/ + + PyStringArrType_Type.tp_itemsize = sizeof(char); + PyVoidArrType_Type.tp_dealloc = (destructor) void_dealloc; + + PyArrayIter_Type.tp_iter = PyObject_SelfIter; + PyArrayMapIter_Type.tp_iter = PyObject_SelfIter; +} + +typedef struct { + PyTypeObject * type; + int typenum; +} scalar_type; + +static scalar_type typeobjects[] = { + {&PyBoolArrType_Type, NPY_BOOL}, + {&PyByteArrType_Type, NPY_BYTE}, + {&PyUByteArrType_Type, NPY_UBYTE}, + {&PyShortArrType_Type, NPY_SHORT}, + {&PyUShortArrType_Type, NPY_USHORT}, + {&PyIntArrType_Type, NPY_INT}, + {&PyUIntArrType_Type, NPY_UINT}, + {&PyLongArrType_Type, NPY_LONG}, + {&PyULongArrType_Type, NPY_ULONG}, + {&PyLongLongArrType_Type, NPY_LONGLONG}, + {&PyULongLongArrType_Type, NPY_ULONGLONG}, + {&PyFloatArrType_Type, NPY_FLOAT}, + {&PyDoubleArrType_Type, NPY_DOUBLE}, + {&PyLongDoubleArrType_Type, NPY_LONGDOUBLE}, + {&PyCFloatArrType_Type, NPY_CFLOAT}, + {&PyCDoubleArrType_Type, NPY_CDOUBLE}, + {&PyCLongDoubleArrType_Type, NPY_CLONGDOUBLE}, + {&PyObjectArrType_Type, NPY_OBJECT}, + {&PyStringArrType_Type, NPY_STRING}, + {&PyUnicodeArrType_Type, NPY_UNICODE}, + {&PyVoidArrType_Type, NPY_VOID}, + {&PyDatetimeArrType_Type, NPY_DATETIME}, + {&PyTimedeltaArrType_Type, NPY_TIMEDELTA}, + {&PyHalfArrType_Type, NPY_HALF} +}; + +static int compare_types(const void * a_, const void * b_) +{ + const PyTypeObject * a = ((const scalar_type *)a_)->type; + const PyTypeObject * b = ((const scalar_type *)b_)->type; + if (a < b) { + return -1; + } + else if (a > b) { + return 1; + } + return 0; +} + +static void init_basetypes(void) +{ + qsort(typeobjects, sizeof(typeobjects) / sizeof(typeobjects[0]), + sizeof(typeobjects[0]), + compare_types); +} + + +NPY_NO_EXPORT int +get_typeobj_idx(PyTypeObject * obj) +{ + npy_intp imin = 0, imax = sizeof(typeobjects) / sizeof(typeobjects[0]) - 1; + while (imax >= imin) + { + npy_intp imid = ((imax - imin) / 2) + imin; + if(typeobjects[imid].type == obj) { + return imid; + } + else if (typeobjects[imid].type < obj) { + imin = imid + 1; + } + else { + imax = imid - 1; + } + } + + return -1; +} + +NPY_NO_EXPORT int +is_anyscalar_exact(PyObject *obj) +{ + return get_typeobj_idx(Py_TYPE(obj)) >= 0; +} + +NPY_NO_EXPORT int +_typenum_fromtypeobj(PyObject *type, int user) +{ + int typenum, i; + + typenum = NPY_NOTYPE; + i = get_typeobj_idx((PyTypeObject*)type); + if (i >= 0) { + typenum = typeobjects[i].typenum; + } + + if (!user) { + return typenum; + } + /* Search any registered types */ + i = 0; + while (i < NPY_NUMUSERTYPES) { + if (type == (PyObject *)(userdescrs[i]->typeobj)) { + typenum = i + NPY_USERDEF; + break; + } + i++; + } + return typenum; +} diff --git a/contrib/python/numpy/py2/numpy/core/src/npymath/ieee754.c.src b/contrib/python/numpy/py2/numpy/core/src/npymath/ieee754.c.src new file mode 100644 index 0000000000..d960838c8f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/npymath/ieee754.c.src @@ -0,0 +1,841 @@ +/* -*- c -*- */ +/* + * vim:syntax=c + * + * Low-level routines related to IEEE-754 format + */ +#include "npy_math_common.h" +#include "npy_math_private.h" +#include "numpy/utils.h" + +#ifndef HAVE_COPYSIGN +double npy_copysign(double x, double y) +{ + npy_uint32 hx, hy; + GET_HIGH_WORD(hx, x); + GET_HIGH_WORD(hy, y); + SET_HIGH_WORD(x, (hx & 0x7fffffff) | (hy & 0x80000000)); + return x; +} +#endif + +/* + The below code is provided for compilers which do not yet provide C11 compatibility (gcc 4.5 and older) + */ +#ifndef LDBL_TRUE_MIN +#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ +#endif + +#if !defined(HAVE_DECL_SIGNBIT) +#include "_signbit.c" + +int _npy_signbit_f(float x) +{ + return _npy_signbit_d((double) x); +} + +int _npy_signbit_ld(long double x) +{ + return _npy_signbit_d((double) x); +} +#endif + +/* + * FIXME: There is a lot of redundancy between _next* and npy_nextafter*. + * refactor this at some point + * + * p >= 0, returnx x + nulp + * p < 0, returnx x - nulp + */ +static double _next(double x, int p) +{ + volatile double t; + npy_int32 hx, hy, ix; + npy_uint32 lx; + + EXTRACT_WORDS(hx, lx, x); + ix = hx & 0x7fffffff; /* |x| */ + + if (((ix >= 0x7ff00000) && ((ix - 0x7ff00000) | lx) != 0)) /* x is nan */ + return x; + if ((ix | lx) == 0) { /* x == 0 */ + if (p >= 0) { + INSERT_WORDS(x, 0x0, 1); /* return +minsubnormal */ + } else { + INSERT_WORDS(x, 0x80000000, 1); /* return -minsubnormal */ + } + t = x * x; + if (t == x) + return t; + else + return x; /* raise underflow flag */ + } + if (p < 0) { /* x -= ulp */ + if (lx == 0) + hx -= 1; + lx -= 1; + } else { /* x += ulp */ + lx += 1; + if (lx == 0) + hx += 1; + } + hy = hx & 0x7ff00000; + if (hy >= 0x7ff00000) + return x + x; /* overflow */ + if (hy < 0x00100000) { /* underflow */ + t = x * x; + if (t != x) { /* raise underflow flag */ + INSERT_WORDS(x, hx, lx); + return x; + } + } + INSERT_WORDS(x, hx, lx); + return x; +} + +static float _nextf(float x, int p) +{ + volatile float t; + npy_int32 hx, hy, ix; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7fffffff; /* |x| */ + + if ((ix > 0x7f800000)) /* x is nan */ + return x; + if (ix == 0) { /* x == 0 */ + if (p >= 0) { + SET_FLOAT_WORD(x, 0x0 | 1); /* return +minsubnormal */ + } else { + SET_FLOAT_WORD(x, 0x80000000 | 1); /* return -minsubnormal */ + } + t = x * x; + if (t == x) + return t; + else + return x; /* raise underflow flag */ + } + if (p < 0) { /* x -= ulp */ + hx -= 1; + } else { /* x += ulp */ + hx += 1; + } + hy = hx & 0x7f800000; + if (hy >= 0x7f800000) + return x + x; /* overflow */ + if (hy < 0x00800000) { /* underflow */ + t = x * x; + if (t != x) { /* raise underflow flag */ + SET_FLOAT_WORD(x, hx); + return x; + } + } + SET_FLOAT_WORD(x, hx); + return x; +} + +#if defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) || \ + defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE) + +/* + * FIXME: this is ugly and untested. The asm part only works with gcc, and we + * should consolidate the GET_LDOUBLE* / SET_LDOUBLE macros + */ +#define math_opt_barrier(x) \ + ({ __typeof (x) __x = x; __asm ("" : "+m" (__x)); __x; }) +#define math_force_eval(x) __asm __volatile ("" : : "m" (x)) + +/* only works for big endian */ +typedef union +{ + npy_longdouble value; + struct + { + npy_uint64 msw; + npy_uint64 lsw; + } parts64; + struct + { + npy_uint32 w0, w1, w2, w3; + } parts32; +} ieee854_long_double_shape_type; + +/* Get two 64 bit ints from a long double. */ + +#define GET_LDOUBLE_WORDS64(ix0,ix1,d) \ +do { \ + ieee854_long_double_shape_type qw_u; \ + qw_u.value = (d); \ + (ix0) = qw_u.parts64.msw; \ + (ix1) = qw_u.parts64.lsw; \ +} while (0) + +/* Set a long double from two 64 bit ints. */ + +#define SET_LDOUBLE_WORDS64(d,ix0,ix1) \ +do { \ + ieee854_long_double_shape_type qw_u; \ + qw_u.parts64.msw = (ix0); \ + qw_u.parts64.lsw = (ix1); \ + (d) = qw_u.value; \ +} while (0) + +static npy_longdouble _nextl(npy_longdouble x, int p) +{ + npy_int64 hx,ihx,ilx; + npy_uint64 lx; + npy_longdouble u; + + GET_LDOUBLE_WORDS64(hx, lx, x); + ihx = hx & 0x7fffffffffffffffLL; /* |hx| */ + ilx = lx & 0x7fffffffffffffffLL; /* |lx| */ + + if(((ihx & 0x7ff0000000000000LL)==0x7ff0000000000000LL)&& + ((ihx & 0x000fffffffffffffLL)!=0)) { + return x; /* signal the nan */ + } + if(ihx == 0 && ilx == 0) { /* x == 0 */ + SET_LDOUBLE_WORDS64(x, p, 0ULL);/* return +-minsubnormal */ + u = x * x; + if (u == x) { + return u; + } else { + return x; /* raise underflow flag */ + } + } + + if(p < 0) { /* p < 0, x -= ulp */ + if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL)) + return x+x; /* overflow, return -inf */ + if (hx >= 0x7ff0000000000000LL) { + SET_LDOUBLE_WORDS64(u,0x7fefffffffffffffLL,0x7c8ffffffffffffeLL); + return u; + } + if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */ + u = math_opt_barrier (x); + x -= LDBL_TRUE_MIN; + if (ihx < 0x0360000000000000LL + || (hx > 0 && (npy_int64) lx <= 0) + || (hx < 0 && (npy_int64) lx > 1)) { + u = u * u; + math_force_eval (u); /* raise underflow flag */ + } + return x; + } + if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */ + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL); + u *= 0x1.0000000000000p-105L; + } else + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL); + return x - u; + } else { /* p >= 0, x += ulp */ + if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL)) + return x+x; /* overflow, return +inf */ + if ((npy_uint64) hx >= 0xfff0000000000000ULL) { + SET_LDOUBLE_WORDS64(u,0xffefffffffffffffLL,0xfc8ffffffffffffeLL); + return u; + } + if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */ + u = math_opt_barrier (x); + x += LDBL_TRUE_MIN; + if (ihx < 0x0360000000000000LL + || (hx > 0 && (npy_int64) lx < 0 && lx != 0x8000000000000001LL) + || (hx < 0 && (npy_int64) lx >= 0)) { + u = u * u; + math_force_eval (u); /* raise underflow flag */ + } + if (x == 0.0L) /* handle negative LDBL_TRUE_MIN case */ + x = -0.0L; + return x; + } + if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */ + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL); + u *= 0x1.0000000000000p-105L; + } else + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL); + return x + u; + } +} +#else +static npy_longdouble _nextl(npy_longdouble x, int p) +{ + volatile npy_longdouble t; + union IEEEl2bitsrep ux; + + ux.e = x; + + if ((GET_LDOUBLE_EXP(ux) == 0x7fff && + ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) | GET_LDOUBLE_MANL(ux)) != 0)) { + return ux.e; /* x is nan */ + } + if (ux.e == 0.0) { + SET_LDOUBLE_MANH(ux, 0); /* return +-minsubnormal */ + SET_LDOUBLE_MANL(ux, 1); + if (p >= 0) { + SET_LDOUBLE_SIGN(ux, 0); + } else { + SET_LDOUBLE_SIGN(ux, 1); + } + t = ux.e * ux.e; + if (t == ux.e) { + return t; + } else { + return ux.e; /* raise underflow flag */ + } + } + if (p < 0) { /* x -= ulp */ + if (GET_LDOUBLE_MANL(ux) == 0) { + if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) { + SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) - 1); + } + SET_LDOUBLE_MANH(ux, + (GET_LDOUBLE_MANH(ux) - 1) | + (GET_LDOUBLE_MANH(ux) & LDBL_NBIT)); + } + SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) - 1); + } else { /* x += ulp */ + SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) + 1); + if (GET_LDOUBLE_MANL(ux) == 0) { + SET_LDOUBLE_MANH(ux, + (GET_LDOUBLE_MANH(ux) + 1) | + (GET_LDOUBLE_MANH(ux) & LDBL_NBIT)); + if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) { + SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) + 1); + } + } + } + if (GET_LDOUBLE_EXP(ux) == 0x7fff) { + return ux.e + ux.e; /* overflow */ + } + if (GET_LDOUBLE_EXP(ux) == 0) { /* underflow */ + if (LDBL_NBIT) { + SET_LDOUBLE_MANH(ux, GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT); + } + t = ux.e * ux.e; + if (t != ux.e) { /* raise underflow flag */ + return ux.e; + } + } + + return ux.e; +} +#endif + +/* + * nextafter code taken from BSD math lib, the code contains the following + * notice: + * + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#ifndef HAVE_NEXTAFTER +double npy_nextafter(double x, double y) +{ + volatile double t; + npy_int32 hx, hy, ix, iy; + npy_uint32 lx, ly; + + EXTRACT_WORDS(hx, lx, x); + EXTRACT_WORDS(hy, ly, y); + ix = hx & 0x7fffffff; /* |x| */ + iy = hy & 0x7fffffff; /* |y| */ + + if (((ix >= 0x7ff00000) && ((ix - 0x7ff00000) | lx) != 0) || /* x is nan */ + ((iy >= 0x7ff00000) && ((iy - 0x7ff00000) | ly) != 0)) /* y is nan */ + return x + y; + if (x == y) + return y; /* x=y, return y */ + if ((ix | lx) == 0) { /* x == 0 */ + INSERT_WORDS(x, hy & 0x80000000, 1); /* return +-minsubnormal */ + t = x * x; + if (t == x) + return t; + else + return x; /* raise underflow flag */ + } + if (hx >= 0) { /* x > 0 */ + if (hx > hy || ((hx == hy) && (lx > ly))) { /* x > y, x -= ulp */ + if (lx == 0) + hx -= 1; + lx -= 1; + } else { /* x < y, x += ulp */ + lx += 1; + if (lx == 0) + hx += 1; + } + } else { /* x < 0 */ + if (hy >= 0 || hx > hy || ((hx == hy) && (lx > ly))) { /* x < y, x -= ulp */ + if (lx == 0) + hx -= 1; + lx -= 1; + } else { /* x > y, x += ulp */ + lx += 1; + if (lx == 0) + hx += 1; + } + } + hy = hx & 0x7ff00000; + if (hy >= 0x7ff00000) + return x + x; /* overflow */ + if (hy < 0x00100000) { /* underflow */ + t = x * x; + if (t != x) { /* raise underflow flag */ + INSERT_WORDS(y, hx, lx); + return y; + } + } + INSERT_WORDS(x, hx, lx); + return x; +} +#endif + +#ifndef HAVE_NEXTAFTERF +float npy_nextafterf(float x, float y) +{ + volatile float t; + npy_int32 hx, hy, ix, iy; + + GET_FLOAT_WORD(hx, x); + GET_FLOAT_WORD(hy, y); + ix = hx & 0x7fffffff; /* |x| */ + iy = hy & 0x7fffffff; /* |y| */ + + if ((ix > 0x7f800000) || /* x is nan */ + (iy > 0x7f800000)) /* y is nan */ + return x + y; + if (x == y) + return y; /* x=y, return y */ + if (ix == 0) { /* x == 0 */ + SET_FLOAT_WORD(x, (hy & 0x80000000) | 1); /* return +-minsubnormal */ + t = x * x; + if (t == x) + return t; + else + return x; /* raise underflow flag */ + } + if (hx >= 0) { /* x > 0 */ + if (hx > hy) { /* x > y, x -= ulp */ + hx -= 1; + } else { /* x < y, x += ulp */ + hx += 1; + } + } else { /* x < 0 */ + if (hy >= 0 || hx > hy) { /* x < y, x -= ulp */ + hx -= 1; + } else { /* x > y, x += ulp */ + hx += 1; + } + } + hy = hx & 0x7f800000; + if (hy >= 0x7f800000) + return x + x; /* overflow */ + if (hy < 0x00800000) { /* underflow */ + t = x * x; + if (t != x) { /* raise underflow flag */ + SET_FLOAT_WORD(y, hx); + return y; + } + } + SET_FLOAT_WORD(x, hx); + return x; +} +#endif + +#ifndef HAVE_NEXTAFTERL +npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y) +{ + volatile npy_longdouble t; + union IEEEl2bitsrep ux; + union IEEEl2bitsrep uy; + + ux.e = x; + uy.e = y; + + if ((GET_LDOUBLE_EXP(ux) == 0x7fff && + ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) | GET_LDOUBLE_MANL(ux)) != 0) || + (GET_LDOUBLE_EXP(uy) == 0x7fff && + ((GET_LDOUBLE_MANH(uy) & ~LDBL_NBIT) | GET_LDOUBLE_MANL(uy)) != 0)) { + return ux.e + uy.e; /* x or y is nan */ + } + if (ux.e == uy.e) { + return uy.e; /* x=y, return y */ + } + if (ux.e == 0.0) { + SET_LDOUBLE_MANH(ux, 0); /* return +-minsubnormal */ + SET_LDOUBLE_MANL(ux, 1); + SET_LDOUBLE_SIGN(ux, GET_LDOUBLE_SIGN(uy)); + t = ux.e * ux.e; + if (t == ux.e) { + return t; + } else { + return ux.e; /* raise underflow flag */ + } + } + if ((ux.e > 0.0) ^ (ux.e < uy.e)) { /* x -= ulp */ + if (GET_LDOUBLE_MANL(ux) == 0) { + if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) { + SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) - 1); + } + SET_LDOUBLE_MANH(ux, + (GET_LDOUBLE_MANH(ux) - 1) | + (GET_LDOUBLE_MANH(ux) & LDBL_NBIT)); + } + SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) - 1); + } else { /* x += ulp */ + SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) + 1); + if (GET_LDOUBLE_MANL(ux) == 0) { + SET_LDOUBLE_MANH(ux, + (GET_LDOUBLE_MANH(ux) + 1) | + (GET_LDOUBLE_MANH(ux) & LDBL_NBIT)); + if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) { + SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) + 1); + } + } + } + if (GET_LDOUBLE_EXP(ux) == 0x7fff) { + return ux.e + ux.e; /* overflow */ + } + if (GET_LDOUBLE_EXP(ux) == 0) { /* underflow */ + if (LDBL_NBIT) { + SET_LDOUBLE_MANH(ux, GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT); + } + t = ux.e * ux.e; + if (t != ux.e) { /* raise underflow flag */ + return ux.e; + } + } + + return ux.e; +} +#endif + +/**begin repeat + * #suff = f,,l# + * #SUFF = F,,L# + * #type = npy_float, npy_double, npy_longdouble# + */ +@type@ npy_spacing@suff@(@type@ x) +{ + /* XXX: npy isnan/isinf may be optimized by bit twiddling */ + if (npy_isinf(x)) { + return NPY_NAN@SUFF@; + } + + return _next@suff@(x, 1) - x; +} +/**end repeat**/ + +/* + * Decorate all the math functions which are available on the current platform + */ + +#ifdef HAVE_NEXTAFTERF +float npy_nextafterf(float x, float y) +{ + return nextafterf(x, y); +} +#endif + +#ifdef HAVE_NEXTAFTER +double npy_nextafter(double x, double y) +{ + return nextafter(x, y); +} +#endif + +#ifdef HAVE_NEXTAFTERL +npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y) +{ + return nextafterl(x, y); +} +#endif + +int npy_clear_floatstatus() { + char x=0; + return npy_clear_floatstatus_barrier(&x); +} +int npy_get_floatstatus() { + char x=0; + return npy_get_floatstatus_barrier(&x); +} + +/* + * Functions to set the floating point status word. + */ + +#if (defined(__unix__) || defined(unix)) && !defined(USG) +#include <sys/param.h> +#endif + + +/* + * Define floating point status functions. We must define + * npy_get_floatstatus_barrier, npy_clear_floatstatus_barrier, + * npy_set_floatstatus_{divbyzero, overflow, underflow, invalid} + * for all supported platforms. + */ + + +/* Solaris --------------------------------------------------------*/ +/* --------ignoring SunOS ieee_flags approach, someone else can +** deal with that! */ +#if defined(sun) || defined(__BSD__) || defined(__OpenBSD__) || \ + (defined(__FreeBSD__) && (__FreeBSD_version < 502114)) || \ + defined(__NetBSD__) +#include <ieeefp.h> + +int npy_get_floatstatus_barrier(char * param) +{ + int fpstatus = fpgetsticky(); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } + return ((FP_X_DZ & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | + ((FP_X_OFL & fpstatus) ? NPY_FPE_OVERFLOW : 0) | + ((FP_X_UFL & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | + ((FP_X_INV & fpstatus) ? NPY_FPE_INVALID : 0); +} + +int npy_clear_floatstatus_barrier(char * param) +{ + int fpstatus = npy_get_floatstatus_barrier(param); + fpsetsticky(0); + + return fpstatus; +} + +void npy_set_floatstatus_divbyzero(void) +{ + fpsetsticky(FP_X_DZ); +} + +void npy_set_floatstatus_overflow(void) +{ + fpsetsticky(FP_X_OFL); +} + +void npy_set_floatstatus_underflow(void) +{ + fpsetsticky(FP_X_UFL); +} + +void npy_set_floatstatus_invalid(void) +{ + fpsetsticky(FP_X_INV); +} + +#elif defined(_AIX) +#include <float.h> +#include <fpxcp.h> + +int npy_get_floatstatus_barrier(char *param) +{ + int fpstatus = fp_read_flag(); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } + return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | + ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | + ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | + ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); +} + +int npy_clear_floatstatus_barrier(char * param) +{ + int fpstatus = npy_get_floatstatus_barrier(param); + fp_swap_flag(0); + + return fpstatus; +} + +void npy_set_floatstatus_divbyzero(void) +{ + fp_raise_xcp(FP_DIV_BY_ZERO); +} + +void npy_set_floatstatus_overflow(void) +{ + fp_raise_xcp(FP_OVERFLOW); +} + +void npy_set_floatstatus_underflow(void) +{ + fp_raise_xcp(FP_UNDERFLOW); +} + +void npy_set_floatstatus_invalid(void) +{ + fp_raise_xcp(FP_INVALID); +} + +#elif defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) + +/* + * By using a volatile floating point value, + * the compiler is forced to actually do the requested + * operations because of potential concurrency. + * + * We shouldn't write multiple values to a single + * global here, because that would cause + * a race condition. + */ +static volatile double _npy_floatstatus_x, + _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300, + _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf; + +void npy_set_floatstatus_divbyzero(void) +{ + _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero; +} + +void npy_set_floatstatus_overflow(void) +{ + _npy_floatstatus_x = _npy_floatstatus_big * 1e300; +} + +void npy_set_floatstatus_underflow(void) +{ + _npy_floatstatus_x = _npy_floatstatus_small * 1e-300; +} + +void npy_set_floatstatus_invalid(void) +{ + _npy_floatstatus_inf = NPY_INFINITY; + _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY; +} + +/* MS Windows -----------------------------------------------------*/ +#if defined(_MSC_VER) + +#include <float.h> + +int npy_get_floatstatus_barrier(char *param) +{ + /* + * By using a volatile, the compiler cannot reorder this call + */ +#if defined(_WIN64) + int fpstatus = _statusfp(); +#else + /* windows enables sse on 32 bit, so check both flags */ + int fpstatus, fpstatus2; + _statusfp2(&fpstatus, &fpstatus2); + fpstatus |= fpstatus2; +#endif + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } + return ((SW_ZERODIVIDE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | + ((SW_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | + ((SW_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | + ((SW_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); +} + +int npy_clear_floatstatus_barrier(char *param) +{ + int fpstatus = npy_get_floatstatus_barrier(param); + _clearfp(); + + return fpstatus; +} + +/* OSF/Alpha (Tru64) ---------------------------------------------*/ +#elif defined(__osf__) && defined(__alpha) + +#include <machine/fpu.h> + +int npy_get_floatstatus_barrier(char *param) +{ + unsigned long fpstatus = ieee_get_fp_control(); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } + return ((IEEE_STATUS_DZE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | + ((IEEE_STATUS_OVF & fpstatus) ? NPY_FPE_OVERFLOW : 0) | + ((IEEE_STATUS_UNF & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | + ((IEEE_STATUS_INV & fpstatus) ? NPY_FPE_INVALID : 0); +} + +int npy_clear_floatstatus_barrier(char *param) +{ + int fpstatus = npy_get_floatstatus_barrier(param); + /* clear status bits as well as disable exception mode if on */ + ieee_set_fp_control(0); + + return fpstatus; +} + +#endif +/* End of defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) */ + +#else +/* General GCC code, should work on most platforms */ +# include <fenv.h> + +int npy_get_floatstatus_barrier(char* param) +{ + int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW | + FE_UNDERFLOW | FE_INVALID); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } + + return ((FE_DIVBYZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | + ((FE_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | + ((FE_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | + ((FE_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); +} + +int npy_clear_floatstatus_barrier(char * param) +{ + /* testing float status is 50-100 times faster than clearing on x86 */ + int fpstatus = npy_get_floatstatus_barrier(param); + if (fpstatus != 0) { + feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | + FE_UNDERFLOW | FE_INVALID); + } + + return fpstatus; +} + + +void npy_set_floatstatus_divbyzero(void) +{ + feraiseexcept(FE_DIVBYZERO); +} + +void npy_set_floatstatus_overflow(void) +{ + feraiseexcept(FE_OVERFLOW); +} + +void npy_set_floatstatus_underflow(void) +{ + feraiseexcept(FE_UNDERFLOW); +} + +void npy_set_floatstatus_invalid(void) +{ + feraiseexcept(FE_INVALID); +} + +#endif diff --git a/contrib/python/numpy/py2/numpy/core/src/npymath/npy_math_complex.c.src b/contrib/python/numpy/py2/numpy/core/src/npymath/npy_math_complex.c.src new file mode 100644 index 0000000000..cf427dad80 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/npymath/npy_math_complex.c.src @@ -0,0 +1,1811 @@ +/* + * vim: syntax=c + * + * Implement some C99-compatible complex math functions + * + * Most of the code is taken from the msun library in FreeBSD (HEAD @ 4th + * October 2013), under the following license: + * + * Copyright (c) 2007, 2011 David Schultz <das@FreeBSD.ORG> + * Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "npy_math_common.h" +#include "npy_math_private.h" +#include <numpy/utils.h> + +/* + * Hack inherited from BSD, the intent is to set the FPU inexact + * flag in an efficient way. The flag is IEEE specific. See + * https://github.com/freebsd/freebsd/blob/4c6378299/lib/msun/src/catrig.c#L42 + */ +#define raise_inexact() do { \ + volatile npy_float NPY_UNUSED(junk) = 1 + tiny; \ +} while (0) + + +static const volatile npy_float tiny = 3.9443045e-31f; + + +/**begin repeat + * #type = npy_float, npy_double, npy_longdouble# + * #ctype = npy_cfloat,npy_cdouble,npy_clongdouble# + * #c = f, , l# + * #C = F, , L# + * #TMAX = FLT_MAX, DBL_MAX, LDBL_MAX# + * #TMIN = FLT_MIN, DBL_MIN, LDBL_MIN# + * #TMANT_DIG = FLT_MANT_DIG, DBL_MANT_DIG, LDBL_MANT_DIG# + * #TEPS = FLT_EPSILON, DBL_EPSILON, LDBL_EPSILON# + * #precision = 1, 2, 3# + */ + +/*========================================================== + * Constants + *=========================================================*/ +static const @ctype@ c_1@c@ = {1.0@C@, 0.0}; +static const @ctype@ c_half@c@ = {0.5@C@, 0.0}; +static const @ctype@ c_i@c@ = {0.0, 1.0@C@}; +static const @ctype@ c_ihalf@c@ = {0.0, 0.5@C@}; + +/*========================================================== + * Helper functions + * + * These are necessary because we do not count on using a + * C99 compiler. + *=========================================================*/ +static NPY_INLINE +@ctype@ +cadd@c@(@ctype@ a, @ctype@ b) +{ + return npy_cpack@c@(npy_creal@c@(a) + npy_creal@c@(b), + npy_cimag@c@(a) + npy_cimag@c@(b)); +} + +static NPY_INLINE +@ctype@ +csub@c@(@ctype@ a, @ctype@ b) +{ + return npy_cpack@c@(npy_creal@c@(a) - npy_creal@c@(b), + npy_cimag@c@(a) - npy_cimag@c@(b)); +} + +static NPY_INLINE +@ctype@ +cmul@c@(@ctype@ a, @ctype@ b) +{ + @type@ ar, ai, br, bi; + ar = npy_creal@c@(a); + ai = npy_cimag@c@(a); + br = npy_creal@c@(b); + bi = npy_cimag@c@(b); + return npy_cpack@c@(ar*br - ai*bi, ar*bi + ai*br); +} + +static NPY_INLINE +@ctype@ +cdiv@c@(@ctype@ a, @ctype@ b) +{ + @type@ ar, ai, br, bi, abs_br, abs_bi; + ar = npy_creal@c@(a); + ai = npy_cimag@c@(a); + br = npy_creal@c@(b); + bi = npy_cimag@c@(b); + abs_br = npy_fabs@c@(br); + abs_bi = npy_fabs@c@(bi); + + if (abs_br >= abs_bi) { + if (abs_br == 0 && abs_bi == 0) { + /* divide by zeros should yield a complex inf or nan */ + return npy_cpack@c@(ar/abs_br, ai/abs_bi); + } + else { + @type@ rat = bi/br; + @type@ scl = 1.0@C@/(br+bi*rat); + return npy_cpack@c@((ar + ai*rat)*scl, (ai - ar*rat)*scl); + } + } + else { + @type@ rat = br/bi; + @type@ scl = 1.0@C@/(bi + br*rat); + return npy_cpack@c@((ar*rat + ai)*scl, (ai*rat - ar)*scl); + } +} + +static NPY_INLINE +@ctype@ +cneg@c@(@ctype@ a) +{ + return npy_cpack@c@(-npy_creal@c@(a), -npy_cimag@c@(a)); +} + +static NPY_INLINE +@ctype@ +cmuli@c@(@ctype@ a) +{ + return npy_cpack@c@(-npy_cimag@c@(a), npy_creal@c@(a)); +} + +/*========================================================== + * Custom implementation of missing complex C99 functions + *=========================================================*/ + +#ifndef HAVE_CABS@C@ +@type@ +npy_cabs@c@(@ctype@ z) +{ + return npy_hypot@c@(npy_creal@c@(z), npy_cimag@c@(z)); +} +#endif + +#ifndef HAVE_CARG@C@ +@type@ +npy_carg@c@(@ctype@ z) +{ + return npy_atan2@c@(npy_cimag@c@(z), npy_creal@c@(z)); +} +#endif + +/* + * cexp and (ccos, csin)h functions need to calculate exp scaled by another + * number. This can be difficult if exp(x) overflows. By doing this way, we + * don't risk overflowing exp. This likely raises floating-point exceptions, + * if we decide that we care. + * + * This is only useful over a limited range, (see below) an expects that the + * input values are in this range. + * + * This is based on the technique used in FreeBSD's __frexp_exp and + * __ldexp_(c)exp functions by David Schultz. + * + * SCALED_CEXP_LOWER = log(FLT_MAX) + * SCALED_CEXP_UPPER = log(2) + log(FLT_MAX) - log(FLT_TRUE_MIN), + * where FLT_TRUE_MIN is the smallest possible subnormal number. + */ + +#define SCALED_CEXP_LOWERF 88.722839f +#define SCALED_CEXP_UPPERF 192.69492f +#define SCALED_CEXP_LOWER 710.47586007394386 +#define SCALED_CEXP_UPPER 1454.9159319953251 +#define SCALED_CEXP_LOWERL 11357.216553474703895L +#define SCALED_CEXP_UPPERL 22756.021937783004509L + +#if !defined(HAVE_CSINH@C@) || \ + !defined(HAVE_CCOSH@C@) || \ + !defined(HAVE_CEXP@C@) + +static +@ctype@ +_npy_scaled_cexp@c@(@type@ x, @type@ y, npy_int expt) +{ +#if @precision@ == 1 + const npy_int k = 235; +#endif +#if @precision@ == 2 + const npy_int k = 1799; +#endif +#if @precision@ == 3 + const npy_int k = 19547; +#endif + const @type@ kln2 = k * NPY_LOGE2@c@; + @type@ mant, mantcos, mantsin; + npy_int ex, excos, exsin; + + mant = npy_frexp@c@(npy_exp@c@(x - kln2), &ex); + mantcos = npy_frexp@c@(npy_cos@c@(y), &excos); + mantsin = npy_frexp@c@(npy_sin@c@(y), &exsin); + + expt += ex + k; + return npy_cpack@c@( npy_ldexp@c@(mant * mantcos, expt + excos), + npy_ldexp@c@(mant * mantsin, expt + exsin)); +} + +#endif + +#ifndef HAVE_CEXP@C@ + +@ctype@ +npy_cexp@c@(@ctype@ z) +{ + @type@ x, c, s; + @type@ r, i; + @ctype@ ret; + + r = npy_creal@c@(z); + i = npy_cimag@c@(z); + + if (npy_isfinite(r)) { + if (r >= SCALED_CEXP_LOWER@C@ && r <= SCALED_CEXP_UPPER@C@) { + ret = _npy_scaled_cexp@c@(r, i, 0); + } + else { + x = npy_exp@c@(r); + + c = npy_cos@c@(i); + s = npy_sin@c@(i); + + if (npy_isfinite(i)) { + ret = npy_cpack@c@(x * c, x * s); + } + else { + ret = npy_cpack@c@(NPY_NAN@C@, npy_copysign@c@(NPY_NAN@C@, i)); + } + } + + } + else if (npy_isnan(r)) { + /* r is nan */ + if (i == 0) { + ret = z; + } + else { + ret = npy_cpack@c@(r, npy_copysign@c@(NPY_NAN@C@, i)); + } + } + else { + /* r is +- inf */ + if (r > 0) { + if (i == 0) { + ret = npy_cpack@c@(r, i); + } + else if (npy_isfinite(i)) { + c = npy_cos@c@(i); + s = npy_sin@c@(i); + + ret = npy_cpack@c@(r * c, r * s); + } + else { + /* x = +inf, y = +-inf | nan */ + npy_set_floatstatus_invalid(); + ret = npy_cpack@c@(r, NPY_NAN@C@); + } + } + else { + if (npy_isfinite(i)) { + x = npy_exp@c@(r); + c = npy_cos@c@(i); + s = npy_sin@c@(i); + + ret = npy_cpack@c@(x * c, x * s); + } + else { + /* x = -inf, y = nan | +i inf */ + ret = npy_cpack@c@(0, 0); + } + } + } + + return ret; +} +#endif + +#ifndef HAVE_CLOG@C@ +/* algorithm from cpython, rev. d86f5686cef9 + * + * The usual formula for the real part is log(hypot(z.real, z.imag)). + * There are four situations where this formula is potentially + * problematic: + * + * (1) the absolute value of z is subnormal. Then hypot is subnormal, + * so has fewer than the usual number of bits of accuracy, hence may + * have large relative error. This then gives a large absolute error + * in the log. This can be solved by rescaling z by a suitable power + * of 2. + * + * (2) the absolute value of z is greater than DBL_MAX (e.g. when both + * z.real and z.imag are within a factor of 1/sqrt(2) of DBL_MAX) + * Again, rescaling solves this. + * + * (3) the absolute value of z is close to 1. In this case it's + * difficult to achieve good accuracy, at least in part because a + * change of 1ulp in the real or imaginary part of z can result in a + * change of billions of ulps in the correctly rounded answer. + * + * (4) z = 0. The simplest thing to do here is to call the + * floating-point log with an argument of 0, and let its behaviour + * (returning -infinity, signaling a floating-point exception, setting + * errno, or whatever) determine that of c_log. So the usual formula + * is fine here. +*/ +@ctype@ +npy_clog@c@(@ctype@ z) +{ + @type@ ax = npy_fabs@c@(npy_creal@c@(z)); + @type@ ay = npy_fabs@c@(npy_cimag@c@(z)); + @type@ rr, ri; + + if (ax > @TMAX@/4 || ay > @TMAX@/4) { + rr = npy_log@c@(npy_hypot@c@(ax/2, ay/2)) + NPY_LOGE2@c@; + } + else if (ax < @TMIN@ && ay < @TMIN@) { + if (ax > 0 || ay > 0) { + /* catch cases where hypot(ax, ay) is subnormal */ + rr = npy_log@c@(npy_hypot@c@(npy_ldexp@c@(ax, @TMANT_DIG@), + npy_ldexp@c@(ay, @TMANT_DIG@))) - @TMANT_DIG@*NPY_LOGE2@c@; + } + else { + /* log(+/-0 +/- 0i) */ + /* raise divide-by-zero floating point exception */ + rr = -1.0@c@ / npy_creal@c@(z); + rr = npy_copysign@c@(rr, -1); + ri = npy_carg@c@(z); + return npy_cpack@c@(rr, ri); + } + } + else { + @type@ h = npy_hypot@c@(ax, ay); + if (0.71 <= h && h <= 1.73) { + @type@ am = ax > ay ? ax : ay; /* max(ax, ay) */ + @type@ an = ax > ay ? ay : ax; /* min(ax, ay) */ + rr = npy_log1p@c@((am-1)*(am+1)+an*an)/2; + } + else { + rr = npy_log@c@(h); + } + } + ri = npy_carg@c@(z); + + return npy_cpack@c@(rr, ri); +} +#endif + +#ifndef HAVE_CSQRT@C@ + +/* We risk spurious overflow for components >= DBL_MAX / (1 + sqrt(2)). */ +#define THRESH (@TMAX@ / (1 + NPY_SQRT2@c@)) + +@ctype@ +npy_csqrt@c@(@ctype@ z) +{ + @ctype@ result; + @type@ a, b; + @type@ t; + int scale; + + a = npy_creal@c@(z); + b = npy_cimag@c@(z); + + /* Handle special cases. */ + if (a == 0 && b == 0) { + return (npy_cpack@c@(0, b)); + } + if (npy_isinf(b)) { + return (npy_cpack@c@(NPY_INFINITY@C@, b)); + } + if (npy_isnan(a)) { + t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ + return (npy_cpack@c@(a, t)); /* return NaN + NaN i */ + } + if (npy_isinf(a)) { + /* + * csqrt(inf + NaN i) = inf + NaN i + * csqrt(inf + y i) = inf + 0 i + * csqrt(-inf + NaN i) = NaN +- inf i + * csqrt(-inf + y i) = 0 + inf i + */ + if (npy_signbit(a)) { + return (npy_cpack@c@(npy_fabs@c@(b - b), npy_copysign@c@(a, b))); + } + else { + return (npy_cpack@c@(a, npy_copysign@c@(b - b, b))); + } + } + /* + * The remaining special case (b is NaN) is handled just fine by + * the normal code path below. + */ + + /* Scale to avoid overflow. */ + if (npy_fabs@c@(a) >= THRESH || npy_fabs@c@(b) >= THRESH) { + a *= 0.25; + b *= 0.25; + scale = 1; + } + else { + scale = 0; + } + + /* Algorithm 312, CACM vol 10, Oct 1967. */ + if (a >= 0) { + t = npy_sqrt@c@((a + npy_hypot@c@(a, b)) * 0.5@c@); + result = npy_cpack@c@(t, b / (2 * t)); + } + else { + t = npy_sqrt@c@((-a + npy_hypot@c@(a, b)) * 0.5@c@); + result = npy_cpack@c@(npy_fabs@c@(b) / (2 * t), npy_copysign@c@(t, b)); + } + + /* Rescale. */ + if (scale) { + return (npy_cpack@c@(npy_creal@c@(result) * 2, npy_cimag@c@(result))); + } + else { + return (result); + } +} +#undef THRESH +#endif + +/* + * Always use this function because of the multiplication for small + * integer powers, but in the body use cpow if it is available. + */ + +/* private function for use in npy_pow{f, ,l} */ +#ifdef HAVE_CPOW@C@ +static @ctype@ +sys_cpow@c@(@ctype@ x, @ctype@ y) +{ + __@ctype@_to_c99_cast xcast; + __@ctype@_to_c99_cast ycast; + __@ctype@_to_c99_cast ret; + xcast.npy_z = x; + ycast.npy_z = y; + ret.c99_z = cpow@c@(xcast.c99_z, ycast.c99_z); + return ret.npy_z; +} +#endif + + +@ctype@ +npy_cpow@c@ (@ctype@ a, @ctype@ b) +{ + npy_intp n; + @type@ ar = npy_creal@c@(a); + @type@ br = npy_creal@c@(b); + @type@ ai = npy_cimag@c@(a); + @type@ bi = npy_cimag@c@(b); + @ctype@ r; + + if (br == 0. && bi == 0.) { + return npy_cpack@c@(1., 0.); + } + if (ar == 0. && ai == 0.) { + if (br > 0 && bi == 0) { + return npy_cpack@c@(0., 0.); + } + else { + volatile @type@ tmp = NPY_INFINITY@C@; + /* + * NB: there are four complex zeros; c0 = (+-0, +-0), so that + * unlike for reals, c0**p, with `p` negative is in general + * ill-defined. + * + * c0**z with z complex is also ill-defined. + */ + r = npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); + + /* Raise invalid */ + tmp -= NPY_INFINITY@C@; + ar = tmp; + return r; + } + } + if (bi == 0 && (n=(npy_intp)br) == br) { + if (n == 1) { + /* unroll: handle inf better */ + return npy_cpack@c@(ar, ai); + } + else if (n == 2) { + /* unroll: handle inf better */ + return cmul@c@(a, a); + } + else if (n == 3) { + /* unroll: handle inf better */ + return cmul@c@(a, cmul@c@(a, a)); + } + else if (n > -100 && n < 100) { + @ctype@ p, aa; + npy_intp mask = 1; + if (n < 0) { + n = -n; + } + aa = c_1@c@; + p = npy_cpack@c@(ar, ai); + while (1) { + if (n & mask) { + aa = cmul@c@(aa,p); + } + mask <<= 1; + if (n < mask || mask <= 0) { + break; + } + p = cmul@c@(p,p); + } + r = npy_cpack@c@(npy_creal@c@(aa), npy_cimag@c@(aa)); + if (br < 0) { + r = cdiv@c@(c_1@c@, r); + } + return r; + } + } + +#ifdef HAVE_CPOW@C@ + return sys_cpow@c@(a, b); + +#else + { + @ctype@ loga = npy_clog@c@(a); + + ar = npy_creal@c@(loga); + ai = npy_cimag@c@(loga); + return npy_cexp@c@(npy_cpack@c@(ar*br - ai*bi, ar*bi + ai*br)); + } + +#endif +} + + +#ifndef HAVE_CCOS@C@ +@ctype@ +npy_ccos@c@(@ctype@ z) +{ + /* ccos(z) = ccosh(I * z) */ + return npy_ccosh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z))); +} +#endif + +#ifndef HAVE_CSIN@C@ +@ctype@ +npy_csin@c@(@ctype@ z) +{ + /* csin(z) = -I * csinh(I * z) */ + z = npy_csinh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z))); + return npy_cpack@c@(npy_cimag@c@(z), -npy_creal@c@(z)); +} +#endif + +#ifndef HAVE_CTAN@C@ +@ctype@ +npy_ctan@c@(@ctype@ z) +{ + /* ctan(z) = -I * ctanh(I * z) */ + z = npy_ctanh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z))); + return (npy_cpack@c@(npy_cimag@c@(z), -npy_creal@c@(z))); +} +#endif + +#ifndef HAVE_CCOSH@C@ +/* + * Taken from the msun library in FreeBSD, rev 226599. + * + * Hyperbolic cosine of a complex argument z = x + i y. + * + * cosh(z) = cosh(x+iy) + * = cosh(x) cos(y) + i sinh(x) sin(y). + * + * Exceptional values are noted in the comments within the source code. + * These values and the return value were taken from n1124.pdf. + * + * CCOSH_BIG is chosen such that + * spacing(0.5 * exp(CCOSH_BIG)) > 0.5*exp(-CCOSH_BIG) + * although the exact value assigned to CCOSH_BIG is not so important + */ +@ctype@ +npy_ccosh@c@(@ctype@ z) +{ +#if @precision@ == 1 + const npy_float CCOSH_BIG = 9.0f; + const npy_float CCOSH_HUGE = 1.70141183e+38f; +#endif +#if @precision@ == 2 + const npy_double CCOSH_BIG = 22.0; + const npy_double CCOSH_HUGE = 8.9884656743115795e+307; +#endif +#if @precision@ >= 3 +#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + const npy_longdouble CCOSH_BIG = 22.0L; + const npy_longdouble CCOSH_HUGE = 8.9884656743115795e+307L; +#else + const npy_longdouble CCOSH_BIG = 24.0L; + const npy_longdouble CCOSH_HUGE = 5.94865747678615882543e+4931L; +#endif +#endif + + @type@ x, y, h, absx; + npy_int xfinite, yfinite; + + x = npy_creal@c@(z); + y = npy_cimag@c@(z); + + xfinite = npy_isfinite(x); + yfinite = npy_isfinite(y); + + /* Handle the nearly-non-exceptional cases where x and y are finite. */ + if (xfinite && yfinite) { + if (y == 0) { + return npy_cpack@c@(npy_cosh@c@(x), x * y); + } + absx = npy_fabs@c@(x); + if (absx < CCOSH_BIG) { + /* small x: normal case */ + return npy_cpack@c@(npy_cosh@c@(x) * npy_cos@c@(y), + npy_sinh@c@(x) * npy_sin@c@(y)); + } + + /* |x| >= 22, so cosh(x) ~= exp(|x|) */ + if (absx < SCALED_CEXP_LOWER@C@) { + /* x < 710: exp(|x|) won't overflow */ + h = npy_exp@c@(absx) * 0.5@c@; + return npy_cpack@c@(h * npy_cos@c@(y), + npy_copysign@c@(h, x) * npy_sin@c@(y)); + } + else if (absx < SCALED_CEXP_UPPER@C@) { + /* x < 1455: scale to avoid overflow */ + z = _npy_scaled_cexp@c@(absx, y, -1); + return npy_cpack@c@(npy_creal@c@(z), + npy_cimag@c@(z) * npy_copysign@c@(1, x)); + } + else { + /* x >= 1455: the result always overflows */ + h = CCOSH_HUGE * x; + return npy_cpack@c@(h * h * npy_cos@c@(y), h * npy_sin@c@(y)); + } + } + + /* + * cosh(+-0 +- I Inf) = dNaN + I sign(d(+-0, dNaN))0. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as dNaN. Raise the invalid floating-point exception. + * + * cosh(+-0 +- I NaN) = d(NaN) + I sign(d(+-0, NaN))0. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as d(NaN). + */ + if (x == 0 && !yfinite) { + return npy_cpack@c@(y - y, npy_copysign@c@(0, x * (y - y))); + } + + /* + * cosh(+-Inf +- I 0) = +Inf + I (+-)(+-)0. + * + * cosh(NaN +- I 0) = d(NaN) + I sign(d(NaN, +-0))0. + * The sign of 0 in the result is unspecified. + */ + if (y == 0 && !xfinite) { + return npy_cpack@c@(x * x, npy_copysign@c@(0, x) * y); + } + + /* + * cosh(x +- I Inf) = dNaN + I dNaN. + * Raise the invalid floating-point exception for finite nonzero x. + * + * cosh(x + I NaN) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero x. Choice = don't raise (except for signaling NaNs). + */ + if (xfinite && !yfinite) { + return npy_cpack@c@(y - y, x * (y - y)); + } + + /* + * cosh(+-Inf + I NaN) = +Inf + I d(NaN). + * + * cosh(+-Inf +- I Inf) = +Inf + I dNaN. + * The sign of Inf in the result is unspecified. Choice = always +. + * Raise the invalid floating-point exception. + * + * cosh(+-Inf + I y) = +Inf cos(y) +- I Inf sin(y) + */ + if (npy_isinf(x)) { + if (!yfinite) { + return npy_cpack@c@(x * x, x * (y - y)); + } + return npy_cpack@c@((x * x) * npy_cos@c@(y), x * npy_sin@c@(y)); + } + + /* + * cosh(NaN + I NaN) = d(NaN) + I d(NaN). + * + * cosh(NaN +- I Inf) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception. + * Choice = raise. + * + * cosh(NaN + I y) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero y. Choice = don't raise (except for signaling NaNs). + */ + return npy_cpack@c@((x * x) * (y - y), (x + x) * (y - y)); +} +#undef CCOSH_BIG +#undef CCOSH_HUGE +#endif + +#ifndef HAVE_CSINH@C@ +/* + * Taken from the msun library in FreeBSD, rev 226599. + * + * Hyperbolic sine of a complex argument z = x + i y. + * + * sinh(z) = sinh(x+iy) + * = sinh(x) cos(y) + i cosh(x) sin(y). + * + * Exceptional values are noted in the comments within the source code. + * These values and the return value were taken from n1124.pdf. + */ +@ctype@ +npy_csinh@c@(@ctype@ z) +{ +#if @precision@ == 1 + const npy_float CSINH_BIG = 9.0f; + const npy_float CSINH_HUGE = 1.70141183e+38f; +#endif +#if @precision@ == 2 + const npy_double CSINH_BIG = 22.0; + const npy_double CSINH_HUGE = 8.9884656743115795e+307; +#endif +#if @precision@ >= 3 +#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + const npy_longdouble CSINH_BIG = 22.0L; + const npy_longdouble CSINH_HUGE = 8.9884656743115795e+307L; +#else + const npy_longdouble CSINH_BIG = 24.0L; + const npy_longdouble CSINH_HUGE = 5.94865747678615882543e+4931L; +#endif +#endif + + @type@ x, y, h, absx; + npy_int xfinite, yfinite; + + x = npy_creal@c@(z); + y = npy_cimag@c@(z); + + xfinite = npy_isfinite(x); + yfinite = npy_isfinite(y); + + /* Handle the nearly-non-exceptional cases where x and y are finite. */ + if (xfinite && yfinite) { + if (y == 0) { + return npy_cpack@c@(npy_sinh@c@(x), y); + } + absx = npy_fabs@c@(x); + if (absx < CSINH_BIG) { + /* small x: normal case */ + return npy_cpack@c@(npy_sinh@c@(x) * npy_cos@c@(y), + npy_cosh@c@(x) * npy_sin@c@(y)); + } + + /* |x| >= 22, so cosh(x) ~= exp(|x|) */ + if (absx < SCALED_CEXP_LOWER@C@) { + /* x < 710: exp(|x|) won't overflow */ + h = npy_exp@c@(npy_fabs@c@(x)) * 0.5@c@; + return npy_cpack@c@(npy_copysign@c@(h, x) * npy_cos@c@(y), + h * npy_sin@c@(y)); + } + else if (x < SCALED_CEXP_UPPER@C@) { + /* x < 1455: scale to avoid overflow */ + z = _npy_scaled_cexp@c@(absx, y, -1); + return npy_cpack@c@(npy_creal@c@(z) * npy_copysign@c@(1, x), + npy_cimag@c@(z)); + } + else { + /* x >= 1455: the result always overflows */ + h = CSINH_HUGE * x; + return npy_cpack@c@(h * npy_cos@c@(y), h * h * npy_sin@c@(y)); + } + } + + /* + * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as dNaN. Raise the invalid floating-point exception. + * + * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN). + * The sign of 0 in the result is unspecified. Choice = normally + * the same as d(NaN). + */ + if (x == 0 && !yfinite) { + return npy_cpack@c@(npy_copysign@c@(0, x * (y - y)), y - y); + } + + /* + * sinh(+-Inf +- I 0) = +-Inf + I +-0. + * + * sinh(NaN +- I 0) = d(NaN) + I +-0. + */ + if (y == 0 && !xfinite) { + if (npy_isnan(x)) { + return z; + } + return npy_cpack@c@(x, npy_copysign@c@(0, y)); + } + + /* + * sinh(x +- I Inf) = dNaN + I dNaN. + * Raise the invalid floating-point exception for finite nonzero x. + * + * sinh(x + I NaN) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero x. Choice = don't raise (except for signaling NaNs). + */ + if (xfinite && !yfinite) { + return npy_cpack@c@(y - y, x * (y - y)); + } + + /* + * sinh(+-Inf + I NaN) = +-Inf + I d(NaN). + * The sign of Inf in the result is unspecified. Choice = normally + * the same as d(NaN). + * + * sinh(+-Inf +- I Inf) = +Inf + I dNaN. + * The sign of Inf in the result is unspecified. Choice = always +. + * Raise the invalid floating-point exception. + * + * sinh(+-Inf + I y) = +-Inf cos(y) + I Inf sin(y) + */ + if (!xfinite && !npy_isnan(x)) { + if (!yfinite) { + return npy_cpack@c@(x * x, x * (y - y)); + } + return npy_cpack@c@(x * npy_cos@c@(y), + NPY_INFINITY@C@ * npy_sin@c@(y)); + } + + /* + * sinh(NaN + I NaN) = d(NaN) + I d(NaN). + * + * sinh(NaN +- I Inf) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception. + * Choice = raise. + * + * sinh(NaN + I y) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero y. Choice = don't raise (except for signaling NaNs). + */ + return npy_cpack@c@((x * x) * (y - y), (x + x) * (y - y)); +} +#undef CSINH_BIG +#undef CSINH_HUGE +#endif + +#ifndef HAVE_CTANH@C@ +/* + * Taken from the msun library in FreeBSD, rev 226600. + * + * Hyperbolic tangent of a complex argument z = x + i y. + * + * The algorithm is from: + * + * W. Kahan. Branch Cuts for Complex Elementary Functions or Much + * Ado About Nothing's Sign Bit. In The State of the Art in + * Numerical Analysis, pp. 165 ff. Iserles and Powell, eds., 1987. + * + * Method: + * + * Let t = tan(x) + * beta = 1/cos^2(y) + * s = sinh(x) + * rho = cosh(x) + * + * We have: + * + * tanh(z) = sinh(z) / cosh(z) + * + * sinh(x) cos(y) + i cosh(x) sin(y) + * = --------------------------------- + * cosh(x) cos(y) + i sinh(x) sin(y) + * + * cosh(x) sinh(x) / cos^2(y) + i tan(y) + * = ------------------------------------- + * 1 + sinh^2(x) / cos^2(y) + * + * beta rho s + i t + * = ---------------- + * 1 + beta s^2 + * + * Modifications: + * + * I omitted the original algorithm's handling of overflow in tan(x) after + * verifying with nearpi.c that this can't happen in IEEE single or double + * precision. I also handle large x differently. + */ + +#define TANH_HUGE 22.0 +#define TANHF_HUGE 11.0F +#define TANHL_HUGE 42.0L + +@ctype@ +npy_ctanh@c@(@ctype@ z) +{ + @type@ x, y; + @type@ t, beta, s, rho, denom; + + x = npy_creal@c@(z); + y = npy_cimag@c@(z); + + /* + * ctanh(NaN + i 0) = NaN + i 0 + * + * ctanh(NaN + i y) = NaN + i NaN for y != 0 + * + * The imaginary part has the sign of x*sin(2*y), but there's no + * special effort to get this right. + * + * ctanh(+-Inf +- i Inf) = +-1 +- 0 + * + * ctanh(+-Inf + i y) = +-1 + 0 sin(2y) for y finite + * + * The imaginary part of the sign is unspecified. This special + * case is only needed to avoid a spurious invalid exception when + * y is infinite. + */ + if (!npy_isfinite(x)) { + if (npy_isnan(x)) { + return npy_cpack@c@(x, (y == 0 ? y : x * y)); + } + return npy_cpack@c@(npy_copysign@c@(1,x), + npy_copysign@c@(0, + npy_isinf(y) ? + y : npy_sin@c@(y) * npy_cos@c@(y))); + } + + /* + * ctanh(x + i NAN) = NaN + i NaN + * ctanh(x +- i Inf) = NaN + i NaN + */ + if (!npy_isfinite(y)) { + return (npy_cpack@c@(y - y, y - y)); + } + + /* + * ctanh(+-huge + i +-y) ~= +-1 +- i 2sin(2y)/exp(2x), using the + * approximation sinh^2(huge) ~= exp(2*huge) / 4. + * We use a modified formula to avoid spurious overflow. + */ + if (npy_fabs@c@(x) >= TANH@C@_HUGE) { + @type@ exp_mx = npy_exp@c@(-npy_fabs@c@(x)); + return npy_cpack@c@(npy_copysign@c@(1, x), + 4 * npy_sin@c@(y) * npy_cos@c@(y) * + exp_mx * exp_mx); + } + + /* Kahan's algorithm */ + t = npy_tan@c@(y); + beta = 1 + t * t; /* = 1 / cos^2(y) */ + s = npy_sinh@c@(x); + rho = npy_sqrt@c@(1 + s * s); /* = cosh(x) */ + denom = 1 + beta * s * s; + return (npy_cpack@c@((beta * rho * s) / denom, t / denom)); +} +#undef TANH_HUGE +#undef TANHF_HUGE +#undef TANHL_HUGE +#endif + +#if !defined (HAVE_CACOS@C@) || !defined (HAVE_CASINH@C@) +/* + * Complex inverse trig functions taken from the msum library in FreeBSD + * revision 251404 + * + * The algorithm is very close to that in "Implementing the complex arcsine + * and arccosine functions using exception handling" by T. E. Hull, Thomas F. + * Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on + * Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335, + * http://dl.acm.org/citation.cfm?id=275324. + * + * Throughout we use the convention z = x + I*y. + * + * casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B) + * where + * A = (|z+I| + |z-I|) / 2 + * B = (|z+I| - |z-I|) / 2 = y/A + * + * These formulas become numerically unstable: + * (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that + * is, Re(casinh(z)) is close to 0); + * (b) for Im(casinh(z)) when z is close to either of the intervals + * [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is + * close to PI/2). + * + * These numerical problems are overcome by defining + * f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2 + * Then if A < A_crossover, we use + * log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1))) + * A-1 = f(x, 1+y) + f(x, 1-y) + * and if B > B_crossover, we use + * asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y))) + * A-y = f(x, y+1) + f(x, y-1) + * where without loss of generality we have assumed that x and y are + * non-negative. + * + * Much of the difficulty comes because the intermediate computations may + * produce overflows or underflows. This is dealt with in the paper by Hull + * et al by using exception handling. We do this by detecting when + * computations risk underflow or overflow. The hardest part is handling the + * underflows when computing f(a, b). + * + * Note that the function f(a, b) does not appear explicitly in the paper by + * Hull et al, but the idea may be found on pages 308 and 309. Introducing the + * function f(a, b) allows us to concentrate many of the clever tricks in this + * paper into one function. + */ + +/* + * Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2. + * Pass hypot(a, b) as the third argument. + */ +static NPY_INLINE @type@ +_f@c@(@type@ a, @type@ b, @type@ hypot_a_b) +{ + if (b < 0) { + return ((hypot_a_b - b) / 2); + } + if (b == 0) { + return (a / 2); + } + return (a * a / (hypot_a_b + b) / 2); +} + +/* + * All the hard work is contained in this function. + * x and y are assumed positive or zero, and less than RECIP_EPSILON. + * Upon return: + * rx = Re(casinh(z)) = -Im(cacos(y + I*x)). + * B_is_usable is set to 1 if the value of B is usable. + * If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y. + * If returning sqrt_A2my2 has potential to result in an underflow, it is + * rescaled, and new_y is similarly rescaled. + */ +static NPY_INLINE void +_do_hard_work@c@(@type@ x, @type@ y, @type@ *rx, + npy_int *B_is_usable, @type@ *B, @type@ *sqrt_A2my2, @type@ *new_y) +{ +#if @precision@ == 1 + const npy_float A_crossover = 10.0f; + const npy_float B_crossover = 0.6417f; + const npy_float FOUR_SQRT_MIN = 4.3368086899420177e-19f; +#endif +#if @precision@ == 2 + const npy_double A_crossover = 10.0; + const npy_double B_crossover = 0.6417; + const npy_double FOUR_SQRT_MIN = 5.9666725849601654e-154; +#endif +#if @precision@ == 3 + const npy_longdouble A_crossover = 10.0l; + const npy_longdouble B_crossover = 0.6417l; +#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + const npy_longdouble FOUR_SQRT_MIN = 5.9666725849601654e-154; +#else + const npy_longdouble FOUR_SQRT_MIN = 7.3344154702193886625e-2466l; +#endif +#endif + @type@ R, S, A; /* A, B, R, and S are as in Hull et al. */ + @type@ Am1, Amy; /* A-1, A-y. */ + + R = npy_hypot@c@(x, y + 1); /* |z+I| */ + S = npy_hypot@c@(x, y - 1); /* |z-I| */ + + /* A = (|z+I| + |z-I|) / 2 */ + A = (R + S) / 2; + /* + * Mathematically A >= 1. There is a small chance that this will not + * be so because of rounding errors. So we will make certain it is + * so. + */ + if (A < 1) { + A = 1; + } + + if (A < A_crossover) { + /* + * Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y). + * rx = log1p(Am1 + sqrt(Am1*(A+1))) + */ + if (y == 1 && x < @TEPS@ * @TEPS@ / 128) { + /* + * fp is of order x^2, and fm = x/2. + * A = 1 (inexactly). + */ + *rx = npy_sqrt@c@(x); + } + else if (x >= @TEPS@ * npy_fabs@c@(y - 1)) { + /* + * Underflow will not occur because + * x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN + */ + Am1 = _f@c@(x, 1 + y, R) + _f@c@(x, 1 - y, S); + *rx = npy_log1p@c@(Am1 + npy_sqrt@c@(Am1 * (A + 1))); + } + else if (y < 1) { + /* + * fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and + * A = 1 (inexactly). + */ + *rx = x / npy_sqrt@c@((1 - y) * (1 + y)); + } + else { /* if (y > 1) */ + /* + * A-1 = y-1 (inexactly). + */ + *rx = npy_log1p@c@((y - 1) + npy_sqrt@c@((y - 1) * (y + 1))); + } + } + else { + *rx = npy_log@c@(A + npy_sqrt@c@(A * A - 1)); + } + + *new_y = y; + + if (y < FOUR_SQRT_MIN) { + /* + * Avoid a possible underflow caused by y/A. For casinh this + * would be legitimate, but will be picked up by invoking atan2 + * later on. For cacos this would not be legitimate. + */ + *B_is_usable = 0; + *sqrt_A2my2 = A * (2 / @TEPS@); + *new_y = y * (2 / @TEPS@); + return; + } + + /* B = (|z+I| - |z-I|) / 2 = y/A */ + *B = y / A; + *B_is_usable = 1; + + if (*B > B_crossover) { + *B_is_usable = 0; + /* + * Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1). + * sqrt_A2my2 = sqrt(Amy*(A+y)) + */ + if (y == 1 && x < @TEPS@ / 128) { + /* + * fp is of order x^2, and fm = x/2. + * A = 1 (inexactly). + */ + *sqrt_A2my2 = npy_sqrt@c@(x) * npy_sqrt@c@((A + y) / 2); + } + else if (x >= @TEPS@ * npy_fabs@c@(y - 1)) { + /* + * Underflow will not occur because + * x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN + * and + * x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN + */ + Amy = _f@c@(x, y + 1, R) + _f@c@(x, y - 1, S); + *sqrt_A2my2 = npy_sqrt@c@(Amy * (A + y)); + } + else if (y > 1) { + /* + * fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and + * A = y (inexactly). + * + * y < RECIP_EPSILON. So the following + * scaling should avoid any underflow problems. + */ + *sqrt_A2my2 = x * (4 / @TEPS@ / @TEPS@) * y / + npy_sqrt@c@((y + 1) * (y - 1)); + *new_y = y * (4 / @TEPS@ / @TEPS@); + } + else { /* if (y < 1) */ + /* + * fm = 1-y >= DBL_EPSILON, fp is of order x^2, and + * A = 1 (inexactly). + */ + *sqrt_A2my2 = npy_sqrt@c@((1 - y) * (1 + y)); + } + } +} + +/* + * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON. + */ +static NPY_INLINE void +_clog_for_large_values@c@(@type@ x, @type@ y, + @type@ *rr, @type@ *ri) +{ +#if @precision@ == 1 + const npy_float QUARTER_SQRT_MAX = 4.611685743549481e+18f; + const npy_float SQRT_MIN = 1.0842021724855044e-19f; + #endif +#if @precision@ == 2 + const npy_double QUARTER_SQRT_MAX = 3.3519519824856489e+153; + const npy_double SQRT_MIN = 1.4916681462400413e-154; + #endif +#if @precision@ == 3 +#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + const npy_longdouble QUARTER_SQRT_MAX = 3.3519519824856489e+153; + const npy_longdouble SQRT_MIN = 1.4916681462400413e-154; +#else + const npy_longdouble QUARTER_SQRT_MAX = 2.7268703390485398235e+2465l; + const npy_longdouble SQRT_MIN = 1.8336038675548471656e-2466l; +#endif +#endif + @type@ ax, ay, t; + + ax = npy_fabs@c@(x); + ay = npy_fabs@c@(y); + if (ax < ay) { + t = ax; + ax = ay; + ay = t; + } + + /* + * Avoid overflow in hypot() when x and y are both very large. + * Divide x and y by E, and then add 1 to the logarithm. This depends + * on E being larger than sqrt(2). + * Dividing by E causes an insignificant loss of accuracy; however + * this method is still poor since it is uneccessarily slow. + */ + if (ax > @TMAX@ / 2) { + *rr = npy_log@c@(npy_hypot@c@(x / NPY_E@c@, y / NPY_E@c@)) + 1; + } + /* + * Avoid overflow when x or y is large. Avoid underflow when x or + * y is small. + */ + else if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) { + *rr = npy_log@c@(npy_hypot@c@(x, y)); + } + else { + *rr = npy_log@c@(ax * ax + ay * ay) / 2; + } + *ri = npy_atan2@c@(y, x); +} +#endif + +#ifndef HAVE_CACOS@C@ +@ctype@ +npy_cacos@c@(@ctype@ z) +{ +#if @precision@ == 1 + /* this is sqrt(6*EPS) */ + const npy_float SQRT_6_EPSILON = 8.4572793338e-4f; + /* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */ + const volatile npy_float pio2_lo = 7.5497899549e-9f; +#endif +#if @precision@ == 2 + const npy_double SQRT_6_EPSILON = 3.65002414998885671e-08; + const volatile npy_double pio2_lo = 6.1232339957367659e-17; +#endif +#if @precision@ == 3 + const npy_longdouble SQRT_6_EPSILON = 8.0654900873493277169e-10l; + const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l; +#endif + const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@; + const @type@ pio2_hi = NPY_PI_2@c@; + @type@ x, y, ax, ay, wx, wy, rx, ry, B, sqrt_A2mx2, new_x; + npy_int sx, sy; + npy_int B_is_usable; + + x = npy_creal@c@(z); + y = npy_cimag@c@(z); + sx = npy_signbit(x); + sy = npy_signbit(y); + ax = npy_fabs@c@(x); + ay = npy_fabs@c@(y); + + if (npy_isnan(x) || npy_isnan(y)) { + /* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */ + if (npy_isinf(x)) { + return npy_cpack@c@(y + y, -NPY_INFINITY@C@); + } + /* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */ + if (npy_isinf(y)) { + return npy_cpack@c@(x + x, -y); + } + /* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */ + if (x == 0) { + return npy_cpack@c@(pio2_hi + pio2_lo, y + y); + } + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + /* clog...() will raise inexact unless x or y is infinite. */ + _clog_for_large_values@c@(x, y, &wx, &wy); + rx = npy_fabs@c@(wy); + ry = wx + NPY_LOGE2@c@; + if (sy == 0) { + ry = -ry; + } + return npy_cpack@c@(rx, ry); + } + + /* Avoid spuriously raising inexact for z = 1. */ + if (x == 1 && y == 0) { + return npy_cpack@c@(0, -y); + } + + /* All remaining cases are inexact. */ + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) { + return npy_cpack@c@(pio2_hi - (x - pio2_lo), -y); + } + + _do_hard_work@c@(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x); + if (B_is_usable) { + if (sx == 0) { + rx = npy_acos@c@(B); + } + else { + rx = npy_acos@c@(-B); + } + } + else { + if (sx == 0) { + rx = npy_atan2@c@(sqrt_A2mx2, new_x); + } + else { + rx = npy_atan2@c@(sqrt_A2mx2, -new_x); + } + } + if (sy == 0) { + ry = -ry; + } + return npy_cpack@c@(rx, ry); +} +#endif + +#ifndef HAVE_CASIN@C@ +@ctype@ +npy_casin@c@(@ctype@ z) +{ + /* casin(z) = I * conj( casinh(I * conj(z)) ) */ + z = npy_casinh@c@(npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z))); + return npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z)); +} +#endif + +#ifndef HAVE_CATAN@C@ +@ctype@ +npy_catan@c@(@ctype@ z) +{ + /* catan(z) = I * conj( catanh(I * conj(z)) ) */ + z = npy_catanh@c@(npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z))); + return npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z)); +} +#endif + +#ifndef HAVE_CACOSH@C@ +@ctype@ +npy_cacosh@c@(@ctype@ z) +{ + /* + * cacosh(z) = I*cacos(z) or -I*cacos(z) + * where the sign is chosen so Re(cacosh(z)) >= 0. + */ + @ctype@ w; + @type@ rx, ry; + + w = npy_cacos@c@(z); + rx = npy_creal@c@(w); + ry = npy_cimag@c@(w); + /* cacosh(NaN + I*NaN) = NaN + I*NaN */ + if (npy_isnan(rx) && npy_isnan(ry)) { + return npy_cpack@c@(ry, rx); + } + /* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */ + /* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */ + if (npy_isnan(rx)) { + return npy_cpack@c@(npy_fabs@c@(ry), rx); + } + /* cacosh(0 + I*NaN) = NaN + I*NaN */ + if (npy_isnan(ry)) { + return npy_cpack@c@(ry, ry); + } + return npy_cpack@c@(npy_fabs@c@(ry), npy_copysign@c@(rx, npy_cimag@c@(z))); +} +#endif + +#ifndef HAVE_CASINH@C@ +/* + * casinh(z) = z + O(z^3) as z -> 0 + * + * casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2) as z -> infinity + * The above formula works for the imaginary part as well, because + * Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3) + * as z -> infinity, uniformly in y + */ +@ctype@ +npy_casinh@c@(@ctype@ z) +{ +#if @precision@ == 1 + /* this is sqrt(6*EPS) */ + const npy_float SQRT_6_EPSILON = 8.4572793338e-4f; + /* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */ + const volatile npy_float pio2_lo = 7.5497899549e-9f; +#endif +#if @precision@ == 2 + const npy_double SQRT_6_EPSILON = 3.65002414998885671e-08; + const volatile npy_double pio2_lo = 6.1232339957367659e-17; +#endif +#if @precision@ == 3 + const npy_longdouble SQRT_6_EPSILON = 8.0654900873493277169e-10l; + const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l; +#endif + const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@; + const @type@ pio2_hi = NPY_PI_2@c@; + @type@ x, y, ax, ay, wx, wy, rx, ry, B, sqrt_A2my2, new_y; + npy_int B_is_usable; + + x = npy_creal@c@(z); + y = npy_cimag@c@(z); + ax = npy_fabs@c@(x); + ay = npy_fabs@c@(y); + + if (npy_isnan(x) || npy_isnan(y)) { + /* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */ + if (npy_isinf(x)) { + return npy_cpack@c@(x, y + y); + } + /* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */ + if (npy_isinf(y)) { + return npy_cpack@c@(y, x + x); + } + /* casinh(NaN + I*0) = NaN + I*0 */ + if (y == 0) { + return npy_cpack@c@(x + x, y); + } + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + /* clog...() will raise inexact unless x or y is infinite. */ + if (npy_signbit(x) == 0) { + _clog_for_large_values@c@(x, y, &wx, &wy); + wx += NPY_LOGE2@c@; + } + else { + _clog_for_large_values@c@(-x, -y, &wx, &wy); + wx += NPY_LOGE2@c@; + } + return npy_cpack@c@(npy_copysign@c@(wx, x), npy_copysign@c@(wy, y)); + } + + /* Avoid spuriously raising inexact for z = 0. */ + if (x == 0 && y == 0) { + return (z); + } + + /* All remaining cases are inexact. */ + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) { + return (z); + } + + _do_hard_work@c@(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y); + if (B_is_usable) { + ry = npy_asin@c@(B); + } + else { + ry = npy_atan2@c@(new_y, sqrt_A2my2); + } + return npy_cpack@c@(npy_copysign@c@(rx, x), npy_copysign@c@(ry, y)); +} +#endif + +#ifndef HAVE_CATANH@C@ +/* + * sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow). + * Assumes x*x and y*y will not overflow. + * Assumes x and y are finite. + * Assumes y is non-negative. + * Assumes fabs(x) >= DBL_EPSILON. + */ +static NPY_INLINE @type@ +_sum_squares@c@(@type@ x, @type@ y) +{ +#if @precision@ == 1 +const npy_float SQRT_MIN = 1.0842022e-19f; +#endif +#if @precision@ == 2 +const npy_double SQRT_MIN = 1.4916681462400413e-154; /* sqrt(DBL_MIN) */ +#endif +#if @precision@ == 3 +#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE +const npy_longdouble SQRT_MIN = 1.4916681462400413e-154; /* sqrt(DBL_MIN) */ +#else +/* this is correct for 80 bit long doubles */ +const npy_longdouble SQRT_MIN = 1.8336038675548471656e-2466l; +#endif +#endif + /* Avoid underflow when y is small. */ + if (y < SQRT_MIN) { + return (x * x); + } + + return (x * x + y * y); +} + +/* + * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y). + * Assumes x and y are not NaN, and one of x and y is larger than + * RECIP_EPSILON. We avoid unwarranted underflow. It is important to not use + * the code creal(1/z), because the imaginary part may produce an unwanted + * underflow. + * This is only called in a context where inexact is always raised before + * the call, so no effort is made to avoid or force inexact. + */ +#if @precision@ == 1 +#define BIAS (FLT_MAX_EXP - 1) +#define CUTOFF (FLT_MANT_DIG / 2 + 1) +static NPY_INLINE npy_float +_real_part_reciprocalf(npy_float x, npy_float y) +{ + npy_float scale; + npy_uint32 hx, hy; + npy_int32 ix, iy; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7f800000; + GET_FLOAT_WORD(hy, y); + iy = hy & 0x7f800000; + if (ix - iy >= CUTOFF << 23 || npy_isinf(x)) { + return (1 / x); + } + if (iy - ix >= CUTOFF << 23) { + return (x / y / y); + } + if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23) { + return (x / (x * x + y * y)); + } + SET_FLOAT_WORD(scale, 0x7f800000 - ix); + x *= scale; + y *= scale; + return (x / (x * x + y * y) * scale); +} +#undef BIAS +#undef CUTOFF +#endif + +#if @precision@ == 2 +#define BIAS (DBL_MAX_EXP - 1) +/* more guard digits are useful iff there is extra precision. */ +#define CUTOFF (DBL_MANT_DIG / 2 + 1) /* just half or 1 guard digit */ +static NPY_INLINE npy_double +_real_part_reciprocal(npy_double x, npy_double y) +{ + npy_double scale; + npy_uint32 hx, hy; + npy_int32 ix, iy; + + /* + * This code is inspired by the C99 document n1124.pdf, Section G.5.1, + * example 2. + */ + GET_HIGH_WORD(hx, x); + ix = hx & 0x7ff00000; + GET_HIGH_WORD(hy, y); + iy = hy & 0x7ff00000; + if (ix - iy >= CUTOFF << 20 || npy_isinf(x)) { + /* +-Inf -> +-0 is special */ + return (1 / x); + } + if (iy - ix >= CUTOFF << 20) { + /* should avoid double div, but hard */ + return (x / y / y); + } + if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20) { + return (x / (x * x + y * y)); + } + scale = 1; + SET_HIGH_WORD(scale, 0x7ff00000 - ix); /* 2**(1-ilogb(x)) */ + x *= scale; + y *= scale; + return (x / (x * x + y * y) * scale); +} +#undef BIAS +#undef CUTOFF +#endif + +#if @precision@ == 3 +#if !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) && \ + !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE) + +#define BIAS (LDBL_MAX_EXP - 1) +#define CUTOFF (LDBL_MANT_DIG / 2 + 1) +static NPY_INLINE npy_longdouble +_real_part_reciprocall(npy_longdouble x, + npy_longdouble y) +{ + npy_longdouble scale; + union IEEEl2bitsrep ux, uy, us; + npy_int32 ix, iy; + + ux.e = x; + ix = GET_LDOUBLE_EXP(ux); + uy.e = y; + iy = GET_LDOUBLE_EXP(uy); + if (ix - iy >= CUTOFF || npy_isinf(x)) { + return (1/x); + } + if (iy - ix >= CUTOFF) { + return (x/y/y); + } + if (ix <= BIAS + LDBL_MAX_EXP / 2 - CUTOFF) { + return (x/(x*x + y*y)); + } + us.e = 1; + SET_LDOUBLE_EXP(us, 0x7fff - ix); + scale = us.e; + x *= scale; + y *= scale; + return (x/(x*x + y*y) * scale); +} +#undef BIAS +#undef CUTOFF + +#else + +static NPY_INLINE npy_longdouble +_real_part_reciprocall(npy_longdouble x, + npy_longdouble y) +{ + return x/(x*x + y*y); +} + +#endif +#endif + +@ctype@ +npy_catanh@c@(@ctype@ z) +{ +#if @precision@ == 1 + /* this is sqrt(3*EPS) */ + const npy_float SQRT_3_EPSILON = 5.9801995673e-4f; + /* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */ + const volatile npy_float pio2_lo = 7.5497899549e-9f; +#endif +#if @precision@ == 2 + const npy_double SQRT_3_EPSILON = 2.5809568279517849e-8; + const volatile npy_double pio2_lo = 6.1232339957367659e-17; +#endif +#if @precision@ == 3 + const npy_longdouble SQRT_3_EPSILON = 5.70316273435758915310e-10l; + const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l; +#endif + const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@; + const @type@ pio2_hi = NPY_PI_2@c@; + @type@ x, y, ax, ay, rx, ry; + + x = npy_creal@c@(z); + y = npy_cimag@c@(z); + ax = npy_fabs@c@(x); + ay = npy_fabs@c@(y); + + /* This helps handle many cases. */ + if (y == 0 && ax <= 1) { + return npy_cpack@c@(npy_atanh@c@(x), y); + } + + /* To ensure the same accuracy as atan(), and to filter out z = 0. */ + if (x == 0) { + return npy_cpack@c@(x, npy_atan@c@(y)); + } + + if (npy_isnan(x) || npy_isnan(y)) { + /* catanh(+-Inf + I*NaN) = +-0 + I*NaN */ + if (npy_isinf(x)) { + return npy_cpack@c@(npy_copysign@c@(0, x), y + y); + } + /* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */ + if (npy_isinf(y)) { + return npy_cpack@c@(npy_copysign@c@(0, x), + npy_copysign@c@(pio2_hi + pio2_lo, y)); + } + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + return npy_cpack@c@(_real_part_reciprocal@c@(x, y), + npy_copysign@c@(pio2_hi + pio2_lo, y)); + } + + if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { + /* + * z = 0 was filtered out above. All other cases must raise + * inexact, but this is the only only that needs to do it + * explicitly. + */ + raise_inexact(); + return (z); + } + + if (ax == 1 && ay < @TEPS@) { + rx = (NPY_LOGE2@c@ - npy_log@c@(ay)) / 2; + } + else { + rx = npy_log1p@c@(4 * ax / _sum_squares@c@(ax - 1, ay)) / 4; + } + + if (ax == 1) { + ry = npy_atan2@c@(2, -ay) / 2; + } + else if (ay < @TEPS@) { + ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax)) / 2; + } + else { + ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; + } + + return npy_cpack@c@(npy_copysign@c@(rx, x), npy_copysign@c@(ry, y)); +} +#endif +/**end repeat**/ + +/*========================================================== + * Decorate all the functions which are available natively + *=========================================================*/ + +/**begin repeat + * #type = npy_float, npy_double, npy_longdouble# + * #ctype = npy_cfloat, npy_cdouble, npy_clongdouble# + * #c = f, , l# + * #C = F, , L# + */ + +/**begin repeat1 + * #kind = cabs,carg# + * #KIND = CABS,CARG# + */ +#ifdef HAVE_@KIND@@C@ +@type@ +npy_@kind@@c@(@ctype@ z) +{ + __@ctype@_to_c99_cast z1; + z1.npy_z = z; + return @kind@@c@(z1.c99_z); +} +#endif +/**end repeat1**/ + +/**begin repeat1 + * #kind = cexp,clog,csqrt,ccos,csin,ctan,ccosh,csinh,ctanh, + * cacos,casin,catan,cacosh,casinh,catanh# + * #KIND = CEXP,CLOG,CSQRT,CCOS,CSIN,CTAN,CCOSH,CSINH,CTANH, + * CACOS,CASIN,CATAN,CACOSH,CASINH,CATANH# + */ +#ifdef HAVE_@KIND@@C@ +@ctype@ +npy_@kind@@c@(@ctype@ z) +{ + __@ctype@_to_c99_cast z1; + __@ctype@_to_c99_cast ret; + z1.npy_z = z; + ret.c99_z = @kind@@c@(z1.c99_z); + return ret.npy_z; +} +#endif +/**end repeat1**/ + + +/**end repeat**/ diff --git a/contrib/python/numpy/py2/numpy/core/src/npymath/npy_math_internal.h.src b/contrib/python/numpy/py2/numpy/core/src/npymath/npy_math_internal.h.src new file mode 100644 index 0000000000..fa820baac3 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/npymath/npy_math_internal.h.src @@ -0,0 +1,718 @@ +/* + * vim:syntax=c + * A small module to implement missing C99 math capabilities required by numpy + * + * Please keep this independent of python ! Only basic types (npy_longdouble) + * can be used, otherwise, pure C, without any use of Python facilities + * + * How to add a function to this section + * ------------------------------------- + * + * Say you want to add `foo`, these are the steps and the reasons for them. + * + * 1) Add foo to the appropriate list in the configuration system. The + * lists can be found in numpy/core/setup.py lines 63-105. Read the + * comments that come with them, they are very helpful. + * + * 2) The configuration system will define a macro HAVE_FOO if your function + * can be linked from the math library. The result can depend on the + * optimization flags as well as the compiler, so can't be known ahead of + * time. If the function can't be linked, then either it is absent, defined + * as a macro, or is an intrinsic (hardware) function. + * + * i) Undefine any possible macros: + * + * #ifdef foo + * #undef foo + * #endif + * + * ii) Avoid as much as possible to declare any function here. Declaring + * functions is not portable: some platforms define some function inline + * with a non standard identifier, for example, or may put another + * identifier which changes the calling convention of the function. If you + * really have to, ALWAYS declare it for the one platform you are dealing + * with: + * + * Not ok: + * double exp(double a); + * + * Ok: + * #ifdef SYMBOL_DEFINED_WEIRD_PLATFORM + * double exp(double); + * #endif + * + * Some of the code is taken from msun library in FreeBSD, with the following + * notice: + * + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +#include "npy_math_private.h" + +/* + ***************************************************************************** + ** BASIC MATH FUNCTIONS ** + ***************************************************************************** + */ + +/* Original code by Konrad Hinsen. */ +#ifndef HAVE_EXPM1 +NPY_INPLACE double npy_expm1(double x) +{ + if (npy_isinf(x) && x > 0) { + return x; + } + else { + const double u = npy_exp(x); + + if (u == 1.0) { + return x; + } else if (u - 1.0 == -1.0) { + return -1; + } else { + return (u - 1.0) * x/npy_log(u); + } + } +} +#endif + +#ifndef HAVE_LOG1P +NPY_INPLACE double npy_log1p(double x) +{ + if (npy_isinf(x) && x > 0) { + return x; + } + else { + const double u = 1. + x; + const double d = u - 1.; + + if (d == 0) { + return x; + } else { + return npy_log(u) * x / d; + } + } +} +#endif + +/* Taken from FreeBSD mlib, adapted for numpy + * + * XXX: we could be a bit faster by reusing high/low words for inf/nan + * classification instead of calling npy_isinf/npy_isnan: we should have some + * macros for this, though, instead of doing it manually + */ +#ifndef HAVE_ATAN2 +/* XXX: we should have this in npy_math.h */ +#define NPY_DBL_EPSILON 1.2246467991473531772E-16 +NPY_INPLACE double npy_atan2(double y, double x) +{ + npy_int32 k, m, iy, ix, hx, hy; + npy_uint32 lx,ly; + double z; + + EXTRACT_WORDS(hx, lx, x); + ix = hx & 0x7fffffff; + EXTRACT_WORDS(hy, ly, y); + iy = hy & 0x7fffffff; + + /* if x or y is nan, return nan */ + if (npy_isnan(x * y)) { + return x + y; + } + + if (x == 1.0) { + return npy_atan(y); + } + + m = 2 * (npy_signbit((x)) != 0) + (npy_signbit((y)) != 0); + if (y == 0.0) { + switch(m) { + case 0: + case 1: return y; /* atan(+-0,+anything)=+-0 */ + case 2: return NPY_PI;/* atan(+0,-anything) = pi */ + case 3: return -NPY_PI;/* atan(-0,-anything) =-pi */ + } + } + + if (x == 0.0) { + return y > 0 ? NPY_PI_2 : -NPY_PI_2; + } + + if (npy_isinf(x)) { + if (npy_isinf(y)) { + switch(m) { + case 0: return NPY_PI_4;/* atan(+INF,+INF) */ + case 1: return -NPY_PI_4;/* atan(-INF,+INF) */ + case 2: return 3.0*NPY_PI_4;/*atan(+INF,-INF)*/ + case 3: return -3.0*NPY_PI_4;/*atan(-INF,-INF)*/ + } + } else { + switch(m) { + case 0: return NPY_PZERO; /* atan(+...,+INF) */ + case 1: return NPY_NZERO; /* atan(-...,+INF) */ + case 2: return NPY_PI; /* atan(+...,-INF) */ + case 3: return -NPY_PI; /* atan(-...,-INF) */ + } + } + } + + if (npy_isinf(y)) { + return y > 0 ? NPY_PI_2 : -NPY_PI_2; + } + + /* compute y/x */ + k = (iy - ix) >> 20; + if (k > 60) { /* |y/x| > 2**60 */ + z = NPY_PI_2 + 0.5 * NPY_DBL_EPSILON; + m &= 1; + } else if (hx < 0 && k < -60) { + z = 0.0; /* 0 > |y|/x > -2**-60 */ + } else { + z = npy_atan(npy_fabs(y/x)); /* safe to do y/x */ + } + + switch (m) { + case 0: return z ; /* atan(+,+) */ + case 1: return -z ; /* atan(-,+) */ + case 2: return NPY_PI - (z - NPY_DBL_EPSILON);/* atan(+,-) */ + default: /* case 3 */ + return (z - NPY_DBL_EPSILON) - NPY_PI;/* atan(-,-) */ + } +} + +#endif + +#ifndef HAVE_HYPOT +NPY_INPLACE double npy_hypot(double x, double y) +{ + double yx; + + if (npy_isinf(x) || npy_isinf(y)) { + return NPY_INFINITY; + } + + if (npy_isnan(x) || npy_isnan(y)) { + return NPY_NAN; + } + + x = npy_fabs(x); + y = npy_fabs(y); + if (x < y) { + double temp = x; + x = y; + y = temp; + } + if (x == 0.) { + return 0.; + } + else { + yx = y/x; + return x*npy_sqrt(1.+yx*yx); + } +} +#endif + +#ifndef HAVE_ACOSH +NPY_INPLACE double npy_acosh(double x) +{ + if (x < 1.0) { + return NPY_NAN; + } + + if (npy_isfinite(x)) { + if (x > 1e8) { + return npy_log(x) + NPY_LOGE2; + } + else { + double u = x - 1.0; + return npy_log1p(u + npy_sqrt(2*u + u*u)); + } + } + return x; +} +#endif + +#ifndef HAVE_ASINH +NPY_INPLACE double npy_asinh(double xx) +{ + double x, d; + int sign; + if (xx < 0.0) { + sign = -1; + x = -xx; + } + else { + sign = 1; + x = xx; + } + if (x > 1e8) { + d = x; + } else { + d = npy_sqrt(x*x + 1); + } + return sign*npy_log1p(x*(1.0 + x/(d+1))); +} +#endif + +#ifndef HAVE_ATANH +NPY_INPLACE double npy_atanh(double x) +{ + if (x > 0) { + return -0.5*npy_log1p(-2.0*x/(1.0 + x)); + } + else { + return 0.5*npy_log1p(2.0*x/(1.0 - x)); + } +} +#endif + +#ifndef HAVE_RINT +#if defined(_MSC_VER) && (_MSC_VER == 1500) && !defined(_WIN64) +#pragma optimize("", off) +#endif +NPY_INPLACE double npy_rint(double x) +{ + double y, r; + + y = npy_floor(x); + r = x - y; + + if (r > 0.5) { + y += 1.0; + } + + /* Round to nearest even */ + if (r == 0.5) { + r = y - 2.0*npy_floor(0.5*y); + if (r == 1.0) { + y += 1.0; + } + } + return y; +} +#if defined(_MSC_VER) && (_MSC_VER == 1500) && !defined(_WIN64) +#pragma optimize("", on) +#endif +#endif + +#ifndef HAVE_TRUNC +NPY_INPLACE double npy_trunc(double x) +{ + return x < 0 ? npy_ceil(x) : npy_floor(x); +} +#endif + +#ifndef HAVE_EXP2 +NPY_INPLACE double npy_exp2(double x) +{ + return npy_exp(NPY_LOGE2*x); +} +#endif + +#ifndef HAVE_LOG2 +NPY_INPLACE double npy_log2(double x) +{ +#ifdef HAVE_FREXP + if (!npy_isfinite(x) || x <= 0.) { + /* special value result */ + return npy_log(x); + } + else { + /* + * fallback implementation copied from python3.4 math.log2 + * provides int(log(2**i)) == i for i 1-64 in default rounding mode. + * + * We want log2(m * 2**e) == log(m) / log(2) + e. Care is needed when + * x is just greater than 1.0: in that case e is 1, log(m) is negative, + * and we get significant cancellation error from the addition of + * log(m) / log(2) to e. The slight rewrite of the expression below + * avoids this problem. + */ + int e; + double m = frexp(x, &e); + if (x >= 1.0) { + return log(2.0 * m) / log(2.0) + (e - 1); + } + else { + return log(m) / log(2.0) + e; + } + } +#else + /* does not provide int(log(2**i)) == i */ + return NPY_LOG2E * npy_log(x); +#endif +} +#endif + +/* + * if C99 extensions not available then define dummy functions that use the + * double versions for + * + * sin, cos, tan + * sinh, cosh, tanh, + * fabs, floor, ceil, rint, trunc + * sqrt, log10, log, exp, expm1 + * asin, acos, atan, + * asinh, acosh, atanh + * + * hypot, atan2, pow, fmod, modf + * ldexp, frexp + * + * We assume the above are always available in their double versions. + * + * NOTE: some facilities may be available as macro only instead of functions. + * For simplicity, we define our own functions and undef the macros. We could + * instead test for the macro, but I am lazy to do that for now. + */ + +/**begin repeat + * #type = npy_longdouble, npy_float# + * #TYPE = NPY_LONGDOUBLE, FLOAT# + * #c = l,f# + * #C = L,F# + */ + +/**begin repeat1 + * #kind = sin,cos,tan,sinh,cosh,tanh,fabs,floor,ceil,rint,trunc,sqrt,log10, + * log,exp,expm1,asin,acos,atan,asinh,acosh,atanh,log1p,exp2,log2# + * #KIND = SIN,COS,TAN,SINH,COSH,TANH,FABS,FLOOR,CEIL,RINT,TRUNC,SQRT,LOG10, + * LOG,EXP,EXPM1,ASIN,ACOS,ATAN,ASINH,ACOSH,ATANH,LOG1P,EXP2,LOG2# + */ + +#ifdef @kind@@c@ +#undef @kind@@c@ +#endif +#ifndef HAVE_@KIND@@C@ +NPY_INPLACE @type@ npy_@kind@@c@(@type@ x) +{ + return (@type@) npy_@kind@((double)x); +} +#endif + +/**end repeat1**/ + +/**begin repeat1 + * #kind = atan2,hypot,pow,fmod,copysign# + * #KIND = ATAN2,HYPOT,POW,FMOD,COPYSIGN# + */ +#ifdef @kind@@c@ +#undef @kind@@c@ +#endif +#ifndef HAVE_@KIND@@C@ +NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y) +{ + return (@type@) npy_@kind@((double)x, (double) y); +} +#endif +/**end repeat1**/ + +#ifdef modf@c@ +#undef modf@c@ +#endif +#ifndef HAVE_MODF@C@ +NPY_INPLACE @type@ npy_modf@c@(@type@ x, @type@ *iptr) +{ + double niptr; + double y = npy_modf((double)x, &niptr); + *iptr = (@type@) niptr; + return (@type@) y; +} +#endif + +#ifdef ldexp@c@ +#undef ldexp@c@ +#endif +#ifndef HAVE_LDEXP@C@ +NPY_INPLACE @type@ npy_ldexp@c@(@type@ x, int exp) +{ + return (@type@) npy_ldexp((double)x, exp); +} +#endif + +#ifdef frexp@c@ +#undef frexp@c@ +#endif +#ifndef HAVE_FREXP@C@ +NPY_INPLACE @type@ npy_frexp@c@(@type@ x, int* exp) +{ + return (@type@) npy_frexp(x, exp); +} +#endif + +/**end repeat**/ + + +/* + * Decorate all the math functions which are available on the current platform + */ + +/**begin repeat + * #type = npy_longdouble, npy_double, npy_float# + * #c = l,,f# + * #C = L,,F# + */ +/**begin repeat1 + * #kind = sin,cos,tan,sinh,cosh,tanh,fabs,floor,ceil,rint,trunc,sqrt,log10, + * log,exp,expm1,asin,acos,atan,asinh,acosh,atanh,log1p,exp2,log2# + * #KIND = SIN,COS,TAN,SINH,COSH,TANH,FABS,FLOOR,CEIL,RINT,TRUNC,SQRT,LOG10, + * LOG,EXP,EXPM1,ASIN,ACOS,ATAN,ASINH,ACOSH,ATANH,LOG1P,EXP2,LOG2# + */ +#ifdef HAVE_@KIND@@C@ +NPY_INPLACE @type@ npy_@kind@@c@(@type@ x) +{ + return @kind@@c@(x); +} +#endif + +/**end repeat1**/ + +/**begin repeat1 + * #kind = atan2,hypot,pow,fmod,copysign# + * #KIND = ATAN2,HYPOT,POW,FMOD,COPYSIGN# + */ +#ifdef HAVE_@KIND@@C@ +NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y) +{ + return @kind@@c@(x, y); +} +#endif +/**end repeat1**/ + +#ifdef HAVE_MODF@C@ +NPY_INPLACE @type@ npy_modf@c@(@type@ x, @type@ *iptr) +{ + return modf@c@(x, iptr); +} +#endif + +#ifdef HAVE_LDEXP@C@ +NPY_INPLACE @type@ npy_ldexp@c@(@type@ x, int exp) +{ + return ldexp@c@(x, exp); +} +#endif + +#ifdef HAVE_FREXP@C@ +NPY_INPLACE @type@ npy_frexp@c@(@type@ x, int* exp) +{ + return frexp@c@(x, exp); +} +#endif + +/* C99 but not mandatory */ + +#ifndef HAVE_CBRT@C@ +NPY_INPLACE @type@ npy_cbrt@c@(@type@ x) +{ + /* don't set invalid flag */ + if (npy_isnan(x)) { + return NPY_NAN; + } + else if (x < 0) { + return -npy_pow@c@(-x, 1. / 3.); + } + else { + return npy_pow@c@(x, 1. / 3.); + } +} +#else +NPY_INPLACE @type@ npy_cbrt@c@(@type@ x) +{ + return cbrt@c@(x); +} +#endif + +/**end repeat**/ + + +/* + * Non standard functions + */ + +/**begin repeat + * #type = npy_float, npy_double, npy_longdouble# + * #c = f, ,l# + * #C = F, ,L# + */ + +@type@ npy_heaviside@c@(@type@ x, @type@ h0) +{ + if (npy_isnan(x)) { + return (@type@) NPY_NAN; + } + else if (x == 0) { + return h0; + } + else if (x < 0) { + return (@type@) 0.0; + } + else { + return (@type@) 1.0; + } +} + +#define LOGE2 NPY_LOGE2@c@ +#define LOG2E NPY_LOG2E@c@ +#define RAD2DEG (180.0@c@/NPY_PI@c@) +#define DEG2RAD (NPY_PI@c@/180.0@c@) + +NPY_INPLACE @type@ npy_rad2deg@c@(@type@ x) +{ + return x*RAD2DEG; +} + +NPY_INPLACE @type@ npy_deg2rad@c@(@type@ x) +{ + return x*DEG2RAD; +} + +NPY_INPLACE @type@ npy_log2_1p@c@(@type@ x) +{ + return LOG2E*npy_log1p@c@(x); +} + +NPY_INPLACE @type@ npy_exp2_m1@c@(@type@ x) +{ + return npy_expm1@c@(LOGE2*x); +} + +NPY_INPLACE @type@ npy_logaddexp@c@(@type@ x, @type@ y) +{ + if (x == y) { + /* Handles infinities of the same sign without warnings */ + return x + LOGE2; + } + else { + const @type@ tmp = x - y; + if (tmp > 0) { + return x + npy_log1p@c@(npy_exp@c@(-tmp)); + } + else if (tmp <= 0) { + return y + npy_log1p@c@(npy_exp@c@(tmp)); + } + else { + /* NaNs */ + return tmp; + } + } +} + +NPY_INPLACE @type@ npy_logaddexp2@c@(@type@ x, @type@ y) +{ + if (x == y) { + /* Handles infinities of the same sign without warnings */ + return x + 1; + } + else { + const @type@ tmp = x - y; + if (tmp > 0) { + return x + npy_log2_1p@c@(npy_exp2@c@(-tmp)); + } + else if (tmp <= 0) { + return y + npy_log2_1p@c@(npy_exp2@c@(tmp)); + } + else { + /* NaNs */ + return tmp; + } + } +} + +/* + * Python version of divmod. + * + * The implementation is mostly copied from cpython 3.5. + */ +NPY_INPLACE @type@ +npy_divmod@c@(@type@ a, @type@ b, @type@ *modulus) +{ + @type@ div, mod, floordiv; + + mod = npy_fmod@c@(a, b); + + if (!b) { + /* If b == 0, return result of fmod. For IEEE is nan */ + *modulus = mod; + return mod; + } + + /* a - mod should be very nearly an integer multiple of b */ + div = (a - mod) / b; + + /* adjust fmod result to conform to Python convention of remainder */ + if (mod) { + if ((b < 0) != (mod < 0)) { + mod += b; + div -= 1.0@c@; + } + } + else { + /* if mod is zero ensure correct sign */ + mod = npy_copysign@c@(0, b); + } + + /* snap quotient to nearest integral value */ + if (div) { + floordiv = npy_floor@c@(div); + if (div - floordiv > 0.5@c@) + floordiv += 1.0@c@; + } + else { + /* if div is zero ensure correct sign */ + floordiv = npy_copysign@c@(0, a/b); + } + + *modulus = mod; + return floordiv; +} + +#undef LOGE2 +#undef LOG2E +#undef RAD2DEG +#undef DEG2RAD + +/**end repeat**/ + +/**begin repeat + * + * #type = npy_uint, npy_ulong, npy_ulonglong# + * #c = u,ul,ull# + */ +NPY_INPLACE @type@ +npy_gcd@c@(@type@ a, @type@ b) +{ + @type@ c; + while (a != 0) { + c = a; + a = b%a; + b = c; + } + return b; +} + +NPY_INPLACE @type@ +npy_lcm@c@(@type@ a, @type@ b) +{ + @type@ gcd = npy_gcd@c@(a, b); + return gcd == 0 ? 0 : a / gcd * b; +} +/**end repeat**/ + +/**begin repeat + * + * #type = (npy_int, npy_long, npy_longlong)*2# + * #c = (,l,ll)*2# + * #func=gcd*3,lcm*3# + */ +NPY_INPLACE @type@ +npy_@func@@c@(@type@ a, @type@ b) +{ + return npy_@func@u@c@(a < 0 ? -a : a, b < 0 ? -b : b); +} +/**end repeat**/ diff --git a/contrib/python/numpy/py2/numpy/core/src/npysort/binsearch.c.src b/contrib/python/numpy/py2/numpy/core/src/npysort/binsearch.c.src new file mode 100644 index 0000000000..c04e197b7c --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/npysort/binsearch.c.src @@ -0,0 +1,250 @@ +/* -*- c -*- */ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "npy_sort.h" +#include "npysort_common.h" +#include "npy_binsearch.h" + +#define NOT_USED NPY_UNUSED(unused) + +/* + ***************************************************************************** + ** NUMERIC SEARCHES ** + ***************************************************************************** + */ + +/**begin repeat + * + * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA# + * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, half, float, double, longdouble, + * cfloat, cdouble, clongdouble, datetime, timedelta# + * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, + * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, + * npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta# + */ + +#define @TYPE@_LTE(a, b) (!@TYPE@_LT((b), (a))) + +/**begin repeat1 + * + * #side = left, right# + * #CMP = LT, LTE# + */ + +NPY_VISIBILITY_HIDDEN void +binsearch_@side@_@suff@(const char *arr, const char *key, char *ret, + npy_intp arr_len, npy_intp key_len, + npy_intp arr_str, npy_intp key_str, npy_intp ret_str, + PyArrayObject *NOT_USED) +{ + npy_intp min_idx = 0; + npy_intp max_idx = arr_len; + @type@ last_key_val; + + if (key_len == 0) { + return; + } + last_key_val = *(const @type@ *)key; + + for (; key_len > 0; key_len--, key += key_str, ret += ret_str) { + const @type@ key_val = *(const @type@ *)key; + /* + * Updating only one of the indices based on the previous key + * gives the search a big boost when keys are sorted, but slightly + * slows down things for purely random ones. + */ + if (@TYPE@_LT(last_key_val, key_val)) { + max_idx = arr_len; + } + else { + min_idx = 0; + max_idx = (max_idx < arr_len) ? (max_idx + 1) : arr_len; + } + + last_key_val = key_val; + + while (min_idx < max_idx) { + const npy_intp mid_idx = min_idx + ((max_idx - min_idx) >> 1); + const @type@ mid_val = *(const @type@ *)(arr + mid_idx*arr_str); + if (@TYPE@_@CMP@(mid_val, key_val)) { + min_idx = mid_idx + 1; + } + else { + max_idx = mid_idx; + } + } + *(npy_intp *)ret = min_idx; + } +} + +NPY_VISIBILITY_HIDDEN int +argbinsearch_@side@_@suff@(const char *arr, const char *key, + const char *sort, char *ret, + npy_intp arr_len, npy_intp key_len, + npy_intp arr_str, npy_intp key_str, + npy_intp sort_str, npy_intp ret_str, + PyArrayObject *NOT_USED) +{ + npy_intp min_idx = 0; + npy_intp max_idx = arr_len; + @type@ last_key_val; + + if (key_len == 0) { + return 0; + } + last_key_val = *(const @type@ *)key; + + for (; key_len > 0; key_len--, key += key_str, ret += ret_str) { + const @type@ key_val = *(const @type@ *)key; + /* + * Updating only one of the indices based on the previous key + * gives the search a big boost when keys are sorted, but slightly + * slows down things for purely random ones. + */ + if (@TYPE@_LT(last_key_val, key_val)) { + max_idx = arr_len; + } + else { + min_idx = 0; + max_idx = (max_idx < arr_len) ? (max_idx + 1) : arr_len; + } + + last_key_val = key_val; + + while (min_idx < max_idx) { + const npy_intp mid_idx = min_idx + ((max_idx - min_idx) >> 1); + const npy_intp sort_idx = *(npy_intp *)(sort + mid_idx*sort_str); + @type@ mid_val; + + if (sort_idx < 0 || sort_idx >= arr_len) { + return -1; + } + + mid_val = *(const @type@ *)(arr + sort_idx*arr_str); + + if (@TYPE@_@CMP@(mid_val, key_val)) { + min_idx = mid_idx + 1; + } + else { + max_idx = mid_idx; + } + } + *(npy_intp *)ret = min_idx; + } + return 0; +} + +/**end repeat1**/ +/**end repeat**/ + +/* + ***************************************************************************** + ** GENERIC SEARCH ** + ***************************************************************************** + */ + + /**begin repeat + * + * #side = left, right# + * #CMP = <, <=# + */ + +NPY_VISIBILITY_HIDDEN void +npy_binsearch_@side@(const char *arr, const char *key, char *ret, + npy_intp arr_len, npy_intp key_len, + npy_intp arr_str, npy_intp key_str, npy_intp ret_str, + PyArrayObject *cmp) +{ + PyArray_CompareFunc *compare = PyArray_DESCR(cmp)->f->compare; + npy_intp min_idx = 0; + npy_intp max_idx = arr_len; + const char *last_key = key; + + for (; key_len > 0; key_len--, key += key_str, ret += ret_str) { + /* + * Updating only one of the indices based on the previous key + * gives the search a big boost when keys are sorted, but slightly + * slows down things for purely random ones. + */ + if (compare(last_key, key, cmp) @CMP@ 0) { + max_idx = arr_len; + } + else { + min_idx = 0; + max_idx = (max_idx < arr_len) ? (max_idx + 1) : arr_len; + } + + last_key = key; + + while (min_idx < max_idx) { + const npy_intp mid_idx = min_idx + ((max_idx - min_idx) >> 1); + const char *arr_ptr = arr + mid_idx*arr_str; + + if (compare(arr_ptr, key, cmp) @CMP@ 0) { + min_idx = mid_idx + 1; + } + else { + max_idx = mid_idx; + } + } + *(npy_intp *)ret = min_idx; + } +} + +NPY_VISIBILITY_HIDDEN int +npy_argbinsearch_@side@(const char *arr, const char *key, + const char *sort, char *ret, + npy_intp arr_len, npy_intp key_len, + npy_intp arr_str, npy_intp key_str, + npy_intp sort_str, npy_intp ret_str, + PyArrayObject *cmp) +{ + PyArray_CompareFunc *compare = PyArray_DESCR(cmp)->f->compare; + npy_intp min_idx = 0; + npy_intp max_idx = arr_len; + const char *last_key = key; + + for (; key_len > 0; key_len--, key += key_str, ret += ret_str) { + /* + * Updating only one of the indices based on the previous key + * gives the search a big boost when keys are sorted, but slightly + * slows down things for purely random ones. + */ + if (compare(last_key, key, cmp) @CMP@ 0) { + max_idx = arr_len; + } + else { + min_idx = 0; + max_idx = (max_idx < arr_len) ? (max_idx + 1) : arr_len; + } + + last_key = key; + + while (min_idx < max_idx) { + const npy_intp mid_idx = min_idx + ((max_idx - min_idx) >> 1); + const npy_intp sort_idx = *(npy_intp *)(sort + mid_idx*sort_str); + const char *arr_ptr; + + if (sort_idx < 0 || sort_idx >= arr_len) { + return -1; + } + + arr_ptr = arr + sort_idx*arr_str; + + if (compare(arr_ptr, key, cmp) @CMP@ 0) { + min_idx = mid_idx + 1; + } + else { + max_idx = mid_idx; + } + } + *(npy_intp *)ret = min_idx; + } + return 0; +} + +/**end repeat**/ diff --git a/contrib/python/numpy/py2/numpy/core/src/npysort/heapsort.c.src b/contrib/python/numpy/py2/numpy/core/src/npysort/heapsort.c.src new file mode 100644 index 0000000000..c2e3b63cbe --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/npysort/heapsort.c.src @@ -0,0 +1,402 @@ +/* -*- c -*- */ + +/* + * The purpose of this module is to add faster sort functions + * that are type-specific. This is done by altering the + * function table for the builtin descriptors. + * + * These sorting functions are copied almost directly from numarray + * with a few modifications (complex comparisons compare the imaginary + * part if the real parts are equal, for example), and the names + * are changed. + * + * The original sorting code is due to Charles R. Harris who wrote + * it for numarray. + */ + +/* + * Quick sort is usually the fastest, but the worst case scenario can + * be slower than the merge and heap sorts. The merge sort requires + * extra memory and so for large arrays may not be useful. + * + * The merge sort is *stable*, meaning that equal components + * are unmoved from their entry versions, so it can be used to + * implement lexigraphic sorting on multiple keys. + * + * The heap sort is included for completeness. + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "npy_sort.h" +#include "npysort_common.h" +#include <stdlib.h> + +#define NOT_USED NPY_UNUSED(unused) +#define PYA_QS_STACK 100 +#define SMALL_QUICKSORT 15 +#define SMALL_MERGESORT 20 +#define SMALL_STRING 16 + + +/* + ***************************************************************************** + ** NUMERIC SORTS ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA# + * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, half, float, double, longdouble, + * cfloat, cdouble, clongdouble, datetime, timedelta# + * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, + * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, + * npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta# + */ + +int +heapsort_@suff@(void *start, npy_intp n, void *NOT_USED) +{ + @type@ tmp, *a; + npy_intp i,j,l; + + /* The array needs to be offset by one for heapsort indexing */ + a = (@type@ *)start - 1; + + for (l = n>>1; l > 0; --l) { + tmp = a[l]; + for (i = l, j = l<<1; j <= n;) { + if (j < n && @TYPE@_LT(a[j], a[j+1])) { + j += 1; + } + if (@TYPE@_LT(tmp, a[j])) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + for (; n > 1;) { + tmp = a[n]; + a[n] = a[1]; + n -= 1; + for (i = 1, j = 2; j <= n;) { + if (j < n && @TYPE@_LT(a[j], a[j+1])) { + j++; + } + if (@TYPE@_LT(tmp, a[j])) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + return 0; +} + + +int +aheapsort_@suff@(void *vv, npy_intp *tosort, npy_intp n, void *NOT_USED) +{ + @type@ *v = vv; + npy_intp *a, i,j,l, tmp; + /* The arrays need to be offset by one for heapsort indexing */ + a = tosort - 1; + + for (l = n>>1; l > 0; --l) { + tmp = a[l]; + for (i = l, j = l<<1; j <= n;) { + if (j < n && @TYPE@_LT(v[a[j]], v[a[j+1]])) { + j += 1; + } + if (@TYPE@_LT(v[tmp], v[a[j]])) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + for (; n > 1;) { + tmp = a[n]; + a[n] = a[1]; + n -= 1; + for (i = 1, j = 2; j <= n;) { + if (j < n && @TYPE@_LT(v[a[j]], v[a[j+1]])) { + j++; + } + if (@TYPE@_LT(v[tmp], v[a[j]])) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + return 0; +} + +/**end repeat**/ + + +/* + ***************************************************************************** + ** STRING SORTS ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #TYPE = STRING, UNICODE# + * #suff = string, unicode# + * #type = npy_char, npy_ucs4# + */ + +int +heapsort_@suff@(void *start, npy_intp n, void *varr) +{ + PyArrayObject *arr = varr; + size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@); + @type@ *tmp = malloc(PyArray_ITEMSIZE(arr)); + @type@ *a = (@type@ *)start - len; + npy_intp i, j, l; + + if (tmp == NULL) { + return -NPY_ENOMEM; + } + + for (l = n>>1; l > 0; --l) { + @TYPE@_COPY(tmp, a + l*len, len); + for (i = l, j = l<<1; j <= n;) { + if (j < n && @TYPE@_LT(a + j*len, a + (j+1)*len, len)) + j += 1; + if (@TYPE@_LT(tmp, a + j*len, len)) { + @TYPE@_COPY(a + i*len, a + j*len, len); + i = j; + j += j; + } + else { + break; + } + } + @TYPE@_COPY(a + i*len, tmp, len); + } + + for (; n > 1;) { + @TYPE@_COPY(tmp, a + n*len, len); + @TYPE@_COPY(a + n*len, a + len, len); + n -= 1; + for (i = 1, j = 2; j <= n;) { + if (j < n && @TYPE@_LT(a + j*len, a + (j+1)*len, len)) + j++; + if (@TYPE@_LT(tmp, a + j*len, len)) { + @TYPE@_COPY(a + i*len, a + j*len, len); + i = j; + j += j; + } + else { + break; + } + } + @TYPE@_COPY(a + i*len, tmp, len); + } + + free(tmp); + return 0; +} + + +int +aheapsort_@suff@(void *vv, npy_intp *tosort, npy_intp n, void *varr) +{ + @type@ *v = vv; + PyArrayObject *arr = varr; + size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@); + npy_intp *a, i,j,l, tmp; + + /* The array needs to be offset by one for heapsort indexing */ + a = tosort - 1; + + for (l = n>>1; l > 0; --l) { + tmp = a[l]; + for (i = l, j = l<<1; j <= n;) { + if (j < n && @TYPE@_LT(v + a[j]*len, v + a[j+1]*len, len)) + j += 1; + if (@TYPE@_LT(v + tmp*len, v + a[j]*len, len)) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + for (; n > 1;) { + tmp = a[n]; + a[n] = a[1]; + n -= 1; + for (i = 1, j = 2; j <= n;) { + if (j < n && @TYPE@_LT(v + a[j]*len, v + a[j+1]*len, len)) + j++; + if (@TYPE@_LT(v + tmp*len, v + a[j]*len, len)) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + return 0; +} + +/**end repeat**/ + + +/* + ***************************************************************************** + ** GENERIC SORT ** + ***************************************************************************** + */ + + +int +npy_heapsort(void *start, npy_intp num, void *varr) +{ + PyArrayObject *arr = varr; + npy_intp elsize = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; + char *tmp = malloc(elsize); + char *a = (char *)start - elsize; + npy_intp i, j, l; + + if (tmp == NULL) { + return -NPY_ENOMEM; + } + + for (l = num >> 1; l > 0; --l) { + GENERIC_COPY(tmp, a + l*elsize, elsize); + for (i = l, j = l << 1; j <= num;) { + if (j < num && cmp(a + j*elsize, a + (j+1)*elsize, arr) < 0) { + ++j; + } + if (cmp(tmp, a + j*elsize, arr) < 0) { + GENERIC_COPY(a + i*elsize, a + j*elsize, elsize); + i = j; + j += j; + } + else { + break; + } + } + GENERIC_COPY(a + i*elsize, tmp, elsize); + } + + for (; num > 1;) { + GENERIC_COPY(tmp, a + num*elsize, elsize); + GENERIC_COPY(a + num*elsize, a + elsize, elsize); + num -= 1; + for (i = 1, j = 2; j <= num;) { + if (j < num && cmp(a + j*elsize, a + (j+1)*elsize, arr) < 0) { + ++j; + } + if (cmp(tmp, a + j*elsize, arr) < 0) { + GENERIC_COPY(a + i*elsize, a + j*elsize, elsize); + i = j; + j += j; + } + else { + break; + } + } + GENERIC_COPY(a + i*elsize, tmp, elsize); + } + + free(tmp); + return 0; +} + + +int +npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) +{ + char *v = vv; + PyArrayObject *arr = varr; + npy_intp elsize = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; + npy_intp *a, i, j, l, tmp; + + /* The array needs to be offset by one for heapsort indexing */ + a = tosort - 1; + + for (l = n >> 1; l > 0; --l) { + tmp = a[l]; + for (i = l, j = l<<1; j <= n;) { + if (j < n && cmp(v + a[j]*elsize, v + a[j+1]*elsize, arr) < 0) { + ++j; + } + if (cmp(v + tmp*elsize, v + a[j]*elsize, arr) < 0) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + for (; n > 1;) { + tmp = a[n]; + a[n] = a[1]; + n -= 1; + for (i = 1, j = 2; j <= n;) { + if (j < n && cmp(v + a[j]*elsize, v + a[j+1]*elsize, arr) < 0) { + ++j; + } + if (cmp(v + tmp*elsize, v + a[j]*elsize, arr) < 0) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + return 0; +} diff --git a/contrib/python/numpy/py2/numpy/core/src/npysort/mergesort.c.src b/contrib/python/numpy/py2/numpy/core/src/npysort/mergesort.c.src new file mode 100644 index 0000000000..6f659617a7 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/npysort/mergesort.c.src @@ -0,0 +1,511 @@ +/* -*- c -*- */ + +/* + * The purpose of this module is to add faster sort functions + * that are type-specific. This is done by altering the + * function table for the builtin descriptors. + * + * These sorting functions are copied almost directly from numarray + * with a few modifications (complex comparisons compare the imaginary + * part if the real parts are equal, for example), and the names + * are changed. + * + * The original sorting code is due to Charles R. Harris who wrote + * it for numarray. + */ + +/* + * Quick sort is usually the fastest, but the worst case scenario can + * be slower than the merge and heap sorts. The merge sort requires + * extra memory and so for large arrays may not be useful. + * + * The merge sort is *stable*, meaning that equal components + * are unmoved from their entry versions, so it can be used to + * implement lexigraphic sorting on multiple keys. + * + * The heap sort is included for completeness. + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "npy_sort.h" +#include "npysort_common.h" +#include <stdlib.h> + +#define NOT_USED NPY_UNUSED(unused) +#define PYA_QS_STACK 100 +#define SMALL_QUICKSORT 15 +#define SMALL_MERGESORT 20 +#define SMALL_STRING 16 + + +/* + ***************************************************************************** + ** NUMERIC SORTS ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA# + * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, half, float, double, longdouble, + * cfloat, cdouble, clongdouble, datetime, timedelta# + * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, + * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, + * npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta# + */ + +static void +mergesort0_@suff@(@type@ *pl, @type@ *pr, @type@ *pw) +{ + @type@ vp, *pi, *pj, *pk, *pm; + + if (pr - pl > SMALL_MERGESORT) { + /* merge sort */ + pm = pl + ((pr - pl) >> 1); + mergesort0_@suff@(pl, pm, pw); + mergesort0_@suff@(pm, pr, pw); + for (pi = pw, pj = pl; pj < pm;) { + *pi++ = *pj++; + } + pi = pw + (pm - pl); + pj = pw; + pk = pl; + while (pj < pi && pm < pr) { + if (@TYPE@_LT(*pm, *pj)) { + *pk++ = *pm++; + } + else { + *pk++ = *pj++; + } + } + while(pj < pi) { + *pk++ = *pj++; + } + } + else { + /* insertion sort */ + for (pi = pl + 1; pi < pr; ++pi) { + vp = *pi; + pj = pi; + pk = pi - 1; + while (pj > pl && @TYPE@_LT(vp, *pk)) { + *pj-- = *pk--; + } + *pj = vp; + } + } +} + + +int +mergesort_@suff@(void *start, npy_intp num, void *NOT_USED) +{ + @type@ *pl, *pr, *pw; + + pl = start; + pr = pl + num; + pw = malloc((num/2) * sizeof(@type@)); + if (pw == NULL) { + return -NPY_ENOMEM; + } + mergesort0_@suff@(pl, pr, pw); + + free(pw); + return 0; +} + + +static void +amergesort0_@suff@(npy_intp *pl, npy_intp *pr, @type@ *v, npy_intp *pw) +{ + @type@ vp; + npy_intp vi, *pi, *pj, *pk, *pm; + + if (pr - pl > SMALL_MERGESORT) { + /* merge sort */ + pm = pl + ((pr - pl) >> 1); + amergesort0_@suff@(pl, pm, v, pw); + amergesort0_@suff@(pm, pr, v, pw); + for (pi = pw, pj = pl; pj < pm;) { + *pi++ = *pj++; + } + pi = pw + (pm - pl); + pj = pw; + pk = pl; + while (pj < pi && pm < pr) { + if (@TYPE@_LT(v[*pm], v[*pj])) { + *pk++ = *pm++; + } + else { + *pk++ = *pj++; + } + } + while(pj < pi) { + *pk++ = *pj++; + } + } + else { + /* insertion sort */ + for (pi = pl + 1; pi < pr; ++pi) { + vi = *pi; + vp = v[vi]; + pj = pi; + pk = pi - 1; + while (pj > pl && @TYPE@_LT(vp, v[*pk])) { + *pj-- = *pk--; + } + *pj = vi; + } + } +} + + +int +amergesort_@suff@(void *v, npy_intp *tosort, npy_intp num, void *NOT_USED) +{ + npy_intp *pl, *pr, *pw; + + pl = tosort; + pr = pl + num; + pw = malloc((num/2) * sizeof(npy_intp)); + if (pw == NULL) { + return -NPY_ENOMEM; + } + amergesort0_@suff@(pl, pr, v, pw); + free(pw); + + return 0; +} + +/**end repeat**/ + + +/* + ***************************************************************************** + ** STRING SORTS ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #TYPE = STRING, UNICODE# + * #suff = string, unicode# + * #type = npy_char, npy_ucs4# + */ + +static void +mergesort0_@suff@(@type@ *pl, @type@ *pr, @type@ *pw, @type@ *vp, size_t len) +{ + @type@ *pi, *pj, *pk, *pm; + + if ((size_t)(pr - pl) > SMALL_MERGESORT*len) { + /* merge sort */ + pm = pl + (((pr - pl)/len) >> 1)*len; + mergesort0_@suff@(pl, pm, pw, vp, len); + mergesort0_@suff@(pm, pr, pw, vp, len); + @TYPE@_COPY(pw, pl, pm - pl); + pi = pw + (pm - pl); + pj = pw; + pk = pl; + while (pj < pi && pm < pr) { + if (@TYPE@_LT(pm, pj, len)) { + @TYPE@_COPY(pk, pm, len); + pm += len; + pk += len; + } + else { + @TYPE@_COPY(pk, pj, len); + pj += len; + pk += len; + } + } + @TYPE@_COPY(pk, pj, pi - pj); + } + else { + /* insertion sort */ + for (pi = pl + len; pi < pr; pi += len) { + @TYPE@_COPY(vp, pi, len); + pj = pi; + pk = pi - len; + while (pj > pl && @TYPE@_LT(vp, pk, len)) { + @TYPE@_COPY(pj, pk, len); + pj -= len; + pk -= len; + } + @TYPE@_COPY(pj, vp, len); + } + } +} + + +int +mergesort_@suff@(void *start, npy_intp num, void *varr) +{ + PyArrayObject *arr = varr; + size_t elsize = PyArray_ITEMSIZE(arr); + size_t len = elsize / sizeof(@type@); + @type@ *pl, *pr, *pw, *vp; + int err = 0; + + /* Items that have zero size don't make sense to sort */ + if (elsize == 0) { + return 0; + } + + pl = start; + pr = pl + num*len; + pw = malloc((num/2) * elsize); + if (pw == NULL) { + err = -NPY_ENOMEM; + goto fail_0; + } + vp = malloc(elsize); + if (vp == NULL) { + err = -NPY_ENOMEM; + goto fail_1; + } + mergesort0_@suff@(pl, pr, pw, vp, len); + + free(vp); +fail_1: + free(pw); +fail_0: + return err; +} + + +static void +amergesort0_@suff@(npy_intp *pl, npy_intp *pr, @type@ *v, npy_intp *pw, size_t len) +{ + @type@ *vp; + npy_intp vi, *pi, *pj, *pk, *pm; + + if (pr - pl > SMALL_MERGESORT) { + /* merge sort */ + pm = pl + ((pr - pl) >> 1); + amergesort0_@suff@(pl, pm, v, pw, len); + amergesort0_@suff@(pm, pr, v, pw, len); + for (pi = pw, pj = pl; pj < pm;) { + *pi++ = *pj++; + } + pi = pw + (pm - pl); + pj = pw; + pk = pl; + while (pj < pi && pm < pr) { + if (@TYPE@_LT(v + (*pm)*len, v + (*pj)*len, len)) { + *pk++ = *pm++; + } + else { + *pk++ = *pj++; + } + } + while (pj < pi) { + *pk++ = *pj++; + } + } + else { + /* insertion sort */ + for (pi = pl + 1; pi < pr; ++pi) { + vi = *pi; + vp = v + vi*len; + pj = pi; + pk = pi - 1; + while (pj > pl && @TYPE@_LT(vp, v + (*pk)*len, len)) { + *pj-- = *pk--; + } + *pj = vi; + } + } +} + + +int +amergesort_@suff@(void *v, npy_intp *tosort, npy_intp num, void *varr) +{ + PyArrayObject *arr = varr; + size_t elsize = PyArray_ITEMSIZE(arr); + size_t len = elsize / sizeof(@type@); + npy_intp *pl, *pr, *pw; + + /* Items that have zero size don't make sense to sort */ + if (elsize == 0) { + return 0; + } + + pl = tosort; + pr = pl + num; + pw = malloc((num/2) * sizeof(npy_intp)); + if (pw == NULL) { + return -NPY_ENOMEM; + } + amergesort0_@suff@(pl, pr, v, pw, len); + free(pw); + + return 0; +} + +/**end repeat**/ + + +/* + ***************************************************************************** + ** GENERIC SORT ** + ***************************************************************************** + */ + + +static void +npy_mergesort0(char *pl, char *pr, char *pw, char *vp, npy_intp elsize, + PyArray_CompareFunc *cmp, PyArrayObject *arr) +{ + char *pi, *pj, *pk, *pm; + + if (pr - pl > SMALL_MERGESORT*elsize) { + /* merge sort */ + pm = pl + (((pr - pl)/elsize) >> 1)*elsize; + npy_mergesort0(pl, pm, pw, vp, elsize, cmp, arr); + npy_mergesort0(pm, pr, pw, vp, elsize, cmp, arr); + GENERIC_COPY(pw, pl, pm - pl); + pi = pw + (pm - pl); + pj = pw; + pk = pl; + while (pj < pi && pm < pr) { + if (cmp(pm, pj, arr) < 0) { + GENERIC_COPY(pk, pm, elsize); + pm += elsize; + pk += elsize; + } + else { + GENERIC_COPY(pk, pj, elsize); + pj += elsize; + pk += elsize; + } + } + GENERIC_COPY(pk, pj, pi - pj); + } + else { + /* insertion sort */ + for (pi = pl + elsize; pi < pr; pi += elsize) { + GENERIC_COPY(vp, pi, elsize); + pj = pi; + pk = pi - elsize; + while (pj > pl && cmp(vp, pk, arr) < 0) { + GENERIC_COPY(pj, pk, elsize); + pj -= elsize; + pk -= elsize; + } + GENERIC_COPY(pj, vp, elsize); + } + } +} + + +int +npy_mergesort(void *start, npy_intp num, void *varr) +{ + PyArrayObject *arr = varr; + npy_intp elsize = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; + char *pl = start; + char *pr = pl + num*elsize; + char *pw; + char *vp; + int err = -NPY_ENOMEM; + + /* Items that have zero size don't make sense to sort */ + if (elsize == 0) { + return 0; + } + + pw = malloc((num >> 1) *elsize); + vp = malloc(elsize); + + if (pw != NULL && vp != NULL) { + npy_mergesort0(pl, pr, pw, vp, elsize, cmp, arr); + err = 0; + } + + free(vp); + free(pw); + + return err; +} + + +static void +npy_amergesort0(npy_intp *pl, npy_intp *pr, char *v, npy_intp *pw, + npy_intp elsize, PyArray_CompareFunc *cmp, PyArrayObject *arr) +{ + char *vp; + npy_intp vi, *pi, *pj, *pk, *pm; + + if (pr - pl > SMALL_MERGESORT) { + /* merge sort */ + pm = pl + ((pr - pl) >> 1); + npy_amergesort0(pl, pm, v, pw, elsize, cmp, arr); + npy_amergesort0(pm, pr, v, pw, elsize, cmp, arr); + for (pi = pw, pj = pl; pj < pm;) { + *pi++ = *pj++; + } + pi = pw + (pm - pl); + pj = pw; + pk = pl; + while (pj < pi && pm < pr) { + if (cmp(v + (*pm)*elsize, v + (*pj)*elsize, arr) < 0) { + *pk++ = *pm++; + } + else { + *pk++ = *pj++; + } + } + while (pj < pi) { + *pk++ = *pj++; + } + } + else { + /* insertion sort */ + for (pi = pl + 1; pi < pr; ++pi) { + vi = *pi; + vp = v + vi*elsize; + pj = pi; + pk = pi - 1; + while (pj > pl && cmp(vp, v + (*pk)*elsize, arr) < 0) { + *pj-- = *pk--; + } + *pj = vi; + } + } +} + + +int +npy_amergesort(void *v, npy_intp *tosort, npy_intp num, void *varr) +{ + PyArrayObject *arr = varr; + npy_intp elsize = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; + npy_intp *pl, *pr, *pw; + + /* Items that have zero size don't make sense to sort */ + if (elsize == 0) { + return 0; + } + + pl = tosort; + pr = pl + num; + pw = malloc((num >> 1) * sizeof(npy_intp)); + if (pw == NULL) { + return -NPY_ENOMEM; + } + npy_amergesort0(pl, pr, v, pw, elsize, cmp, arr); + free(pw); + + return 0; +} diff --git a/contrib/python/numpy/py2/numpy/core/src/npysort/quicksort.c.src b/contrib/python/numpy/py2/numpy/core/src/npysort/quicksort.c.src new file mode 100644 index 0000000000..49a2c49068 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/npysort/quicksort.c.src @@ -0,0 +1,634 @@ +/* -*- c -*- */ + +/* + * The purpose of this module is to add faster sort functions + * that are type-specific. This is done by altering the + * function table for the builtin descriptors. + * + * These sorting functions are copied almost directly from numarray + * with a few modifications (complex comparisons compare the imaginary + * part if the real parts are equal, for example), and the names + * are changed. + * + * The original sorting code is due to Charles R. Harris who wrote + * it for numarray. + */ + +/* + * Quick sort is usually the fastest, but the worst case scenario is O(N^2) so + * the code switches to the O(NlogN) worst case heapsort if not enough progress + * is made on the large side of the two quicksort partitions. This improves the + * worst case while still retaining the speed of quicksort for the common case. + * This is variant known as introsort. + * + * + * def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2): + * # sort remainder with heapsort if we are not making enough progress + * # we arbitrarily choose 2 * log(n) as the cutoff point + * if recursion_limit < 0: + * heapsort(lower, higher) + * return + * + * if lower < higher: + * pivot_pos = partition(lower, higher) + * # recurse into smaller first and leave larger on stack + * # this limits the required stack space + * if (pivot_pos - lower > higher - pivot_pos): + * quicksort(pivot_pos + 1, higher, recursion_limit - 1) + * quicksort(lower, pivot_pos, recursion_limit - 1) + * else: + * quicksort(lower, pivot_pos, recursion_limit - 1) + * quicksort(pivot_pos + 1, higher, recursion_limit - 1) + * + * + * the below code implements this converted to an iteration and as an + * additional minor optimization skips the recursion depth checking on the + * smaller partition as it is always less than half of the remaining data and + * will thus terminate fast enough + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "npy_sort.h" +#include "npysort_common.h" +#include <stdlib.h> + +#define NOT_USED NPY_UNUSED(unused) +/* + * pushing largest partition has upper bound of log2(n) space + * we store two pointers each time + */ +#define PYA_QS_STACK (NPY_BITSOF_INTP * 2) +#define SMALL_QUICKSORT 15 +#define SMALL_MERGESORT 20 +#define SMALL_STRING 16 + + +/* + ***************************************************************************** + ** NUMERIC SORTS ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA# + * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, half, float, double, longdouble, + * cfloat, cdouble, clongdouble, datetime, timedelta# + * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, + * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, + * npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta# + */ + +int +quicksort_@suff@(void *start, npy_intp num, void *NOT_USED) +{ + @type@ vp; + @type@ *pl = start; + @type@ *pr = pl + num - 1; + @type@ *stack[PYA_QS_STACK]; + @type@ **sptr = stack; + @type@ *pm, *pi, *pj, *pk; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + heapsort_@suff@(pl, pr - pl + 1, NULL); + goto stack_pop; + } + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ + pm = pl + ((pr - pl) >> 1); + if (@TYPE@_LT(*pm, *pl)) @TYPE@_SWAP(*pm, *pl); + if (@TYPE@_LT(*pr, *pm)) @TYPE@_SWAP(*pr, *pm); + if (@TYPE@_LT(*pm, *pl)) @TYPE@_SWAP(*pm, *pl); + vp = *pm; + pi = pl; + pj = pr - 1; + @TYPE@_SWAP(*pm, *pj); + for (;;) { + do ++pi; while (@TYPE@_LT(*pi, vp)); + do --pj; while (@TYPE@_LT(vp, *pj)); + if (pi >= pj) { + break; + } + @TYPE@_SWAP(*pi,*pj); + } + pk = pr - 1; + @TYPE@_SWAP(*pi, *pk); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vp = *pi; + pj = pi; + pk = pi - 1; + while (pj > pl && @TYPE@_LT(vp, *pk)) { + *pj-- = *pk--; + } + *pj = vp; + } +stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + return 0; +} + + +int +aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED) +{ + @type@ *v = vv; + @type@ vp; + npy_intp *pl = tosort; + npy_intp *pr = tosort + num - 1; + npy_intp *stack[PYA_QS_STACK]; + npy_intp **sptr = stack; + npy_intp *pm, *pi, *pj, *pk, vi; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + aheapsort_@suff@(vv, pl, pr - pl + 1, NULL); + goto stack_pop; + } + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ + pm = pl + ((pr - pl) >> 1); + if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl); + if (@TYPE@_LT(v[*pr],v[*pm])) INTP_SWAP(*pr, *pm); + if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl); + vp = v[*pm]; + pi = pl; + pj = pr - 1; + INTP_SWAP(*pm, *pj); + for (;;) { + do ++pi; while (@TYPE@_LT(v[*pi], vp)); + do --pj; while (@TYPE@_LT(vp, v[*pj])); + if (pi >= pj) { + break; + } + INTP_SWAP(*pi, *pj); + } + pk = pr - 1; + INTP_SWAP(*pi,*pk); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vi = *pi; + vp = v[vi]; + pj = pi; + pk = pi - 1; + while (pj > pl && @TYPE@_LT(vp, v[*pk])) { + *pj-- = *pk--; + } + *pj = vi; + } +stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + return 0; +} + +/**end repeat**/ + + +/* + ***************************************************************************** + ** STRING SORTS ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #TYPE = STRING, UNICODE# + * #suff = string, unicode# + * #type = npy_char, npy_ucs4# + */ + +int +quicksort_@suff@(void *start, npy_intp num, void *varr) +{ + PyArrayObject *arr = varr; + const size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@); + @type@ *vp; + @type@ *pl = start; + @type@ *pr = pl + (num - 1)*len; + @type@ *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (len == 0) { + return 0; + } + + vp = malloc(PyArray_ITEMSIZE(arr)); + if (vp == NULL) { + return -NPY_ENOMEM; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + heapsort_@suff@(pl, (pr - pl) / len + 1, varr); + goto stack_pop; + } + while ((size_t)(pr - pl) > SMALL_QUICKSORT*len) { + /* quicksort partition */ + pm = pl + (((pr - pl)/len) >> 1)*len; + if (@TYPE@_LT(pm, pl, len)) @TYPE@_SWAP(pm, pl, len); + if (@TYPE@_LT(pr, pm, len)) @TYPE@_SWAP(pr, pm, len); + if (@TYPE@_LT(pm, pl, len)) @TYPE@_SWAP(pm, pl, len); + @TYPE@_COPY(vp, pm, len); + pi = pl; + pj = pr - len; + @TYPE@_SWAP(pm, pj, len); + for (;;) { + do pi += len; while (@TYPE@_LT(pi, vp, len)); + do pj -= len; while (@TYPE@_LT(vp, pj, len)); + if (pi >= pj) { + break; + } + @TYPE@_SWAP(pi, pj, len); + } + pk = pr - len; + @TYPE@_SWAP(pi, pk, len); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + len; + *sptr++ = pr; + pr = pi - len; + } + else { + *sptr++ = pl; + *sptr++ = pi - len; + pl = pi + len; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + len; pi <= pr; pi += len) { + @TYPE@_COPY(vp, pi, len); + pj = pi; + pk = pi - len; + while (pj > pl && @TYPE@_LT(vp, pk, len)) { + @TYPE@_COPY(pj, pk, len); + pj -= len; + pk -= len; + } + @TYPE@_COPY(pj, vp, len); + } +stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + free(vp); + return 0; +} + + +int +aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr) +{ + @type@ *v = vv; + PyArrayObject *arr = varr; + size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@); + @type@ *vp; + npy_intp *pl = tosort; + npy_intp *pr = tosort + num - 1; + npy_intp *stack[PYA_QS_STACK]; + npy_intp **sptr=stack; + npy_intp *pm, *pi, *pj, *pk, vi; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (len == 0) { + return 0; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + aheapsort_@suff@(vv, pl, pr - pl + 1, varr); + goto stack_pop; + } + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ + pm = pl + ((pr - pl) >> 1); + if (@TYPE@_LT(v + (*pm)*len, v + (*pl)*len, len)) INTP_SWAP(*pm, *pl); + if (@TYPE@_LT(v + (*pr)*len, v + (*pm)*len, len)) INTP_SWAP(*pr, *pm); + if (@TYPE@_LT(v + (*pm)*len, v + (*pl)*len, len)) INTP_SWAP(*pm, *pl); + vp = v + (*pm)*len; + pi = pl; + pj = pr - 1; + INTP_SWAP(*pm,*pj); + for (;;) { + do ++pi; while (@TYPE@_LT(v + (*pi)*len, vp, len)); + do --pj; while (@TYPE@_LT(vp, v + (*pj)*len, len)); + if (pi >= pj) { + break; + } + INTP_SWAP(*pi,*pj); + } + pk = pr - 1; + INTP_SWAP(*pi,*pk); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vi = *pi; + vp = v + vi*len; + pj = pi; + pk = pi - 1; + while (pj > pl && @TYPE@_LT(vp, v + (*pk)*len, len)) { + *pj-- = *pk--; + } + *pj = vi; + } +stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + return 0; +} + +/**end repeat**/ + + +/* + ***************************************************************************** + ** GENERIC SORT ** + ***************************************************************************** + */ + + +int +npy_quicksort(void *start, npy_intp num, void *varr) +{ + PyArrayObject *arr = varr; + npy_intp elsize = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; + char *vp; + char *pl = start; + char *pr = pl + (num - 1)*elsize; + char *stack[PYA_QS_STACK]; + char **sptr = stack; + char *pm, *pi, *pj, *pk; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (elsize == 0) { + return 0; + } + + vp = malloc(elsize); + if (vp == NULL) { + return -NPY_ENOMEM; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + npy_heapsort(pl, (pr - pl) / elsize + 1, varr); + goto stack_pop; + } + while(pr - pl > SMALL_QUICKSORT*elsize) { + /* quicksort partition */ + pm = pl + (((pr - pl) / elsize) >> 1) * elsize; + if (cmp(pm, pl, arr) < 0) { + GENERIC_SWAP(pm, pl, elsize); + } + if (cmp(pr, pm, arr) < 0) { + GENERIC_SWAP(pr, pm, elsize); + } + if (cmp(pm, pl, arr) < 0) { + GENERIC_SWAP(pm, pl, elsize); + } + GENERIC_COPY(vp, pm, elsize); + pi = pl; + pj = pr - elsize; + GENERIC_SWAP(pm, pj, elsize); + /* + * Generic comparisons may be buggy, so don't rely on the sentinels + * to keep the pointers from going out of bounds. + */ + for (;;) { + do { + pi += elsize; + } while (cmp(pi, vp, arr) < 0 && pi < pj); + do { + pj -= elsize; + } while (cmp(vp, pj, arr) < 0 && pi < pj); + if (pi >= pj) { + break; + } + GENERIC_SWAP(pi, pj, elsize); + } + pk = pr - elsize; + GENERIC_SWAP(pi, pk, elsize); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + elsize; + *sptr++ = pr; + pr = pi - elsize; + } + else { + *sptr++ = pl; + *sptr++ = pi - elsize; + pl = pi + elsize; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + elsize; pi <= pr; pi += elsize) { + GENERIC_COPY(vp, pi, elsize); + pj = pi; + pk = pi - elsize; + while (pj > pl && cmp(vp, pk, arr) < 0) { + GENERIC_COPY(pj, pk, elsize); + pj -= elsize; + pk -= elsize; + } + GENERIC_COPY(pj, vp, elsize); + } +stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + free(vp); + return 0; +} + + +int +npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr) +{ + char *v = vv; + PyArrayObject *arr = varr; + npy_intp elsize = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; + char *vp; + npy_intp *pl = tosort; + npy_intp *pr = tosort + num - 1; + npy_intp *stack[PYA_QS_STACK]; + npy_intp **sptr = stack; + npy_intp *pm, *pi, *pj, *pk, vi; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (elsize == 0) { + return 0; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + npy_aheapsort(vv, pl, pr - pl + 1, varr); + goto stack_pop; + } + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ + pm = pl + ((pr - pl) >> 1); + if (cmp(v + (*pm)*elsize, v + (*pl)*elsize, arr) < 0) { + INTP_SWAP(*pm, *pl); + } + if (cmp(v + (*pr)*elsize, v + (*pm)*elsize, arr) < 0) { + INTP_SWAP(*pr, *pm); + } + if (cmp(v + (*pm)*elsize, v + (*pl)*elsize, arr) < 0) { + INTP_SWAP(*pm, *pl); + } + vp = v + (*pm)*elsize; + pi = pl; + pj = pr - 1; + INTP_SWAP(*pm,*pj); + for (;;) { + do { + ++pi; + } while (cmp(v + (*pi)*elsize, vp, arr) < 0 && pi < pj); + do { + --pj; + } while (cmp(vp, v + (*pj)*elsize, arr) < 0 && pi < pj); + if (pi >= pj) { + break; + } + INTP_SWAP(*pi,*pj); + } + pk = pr - 1; + INTP_SWAP(*pi,*pk); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vi = *pi; + vp = v + vi*elsize; + pj = pi; + pk = pi - 1; + while (pj > pl && cmp(vp, v + (*pk)*elsize, arr) < 0) { + *pj-- = *pk--; + } + *pj = vi; + } +stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + return 0; +} diff --git a/contrib/python/numpy/py2/numpy/core/src/npysort/selection.c.src b/contrib/python/numpy/py2/numpy/core/src/npysort/selection.c.src new file mode 100644 index 0000000000..1e0934558a --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/npysort/selection.c.src @@ -0,0 +1,418 @@ +/* -*- c -*- */ + +/* + * + * The code is loosely based on the quickselect from + * Nicolas Devillard - 1998 public domain + * http://ndevilla.free.fr/median/median/ + * + * Quick select with median of 3 pivot is usually the fastest, + * but the worst case scenario can be quadratic complexity, + * e.g. np.roll(np.arange(x), x / 2) + * To avoid this if it recurses too much it falls back to the + * worst case linear median of median of group 5 pivot strategy. + */ + + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "npy_sort.h" +#include "npysort_common.h" +#include "numpy/npy_math.h" +#include "npy_partition.h" +#include <stdlib.h> + +#define NOT_USED NPY_UNUSED(unused) + + +/* + ***************************************************************************** + ** NUMERIC SORTS ** + ***************************************************************************** + */ + + +static NPY_INLINE void store_pivot(npy_intp pivot, npy_intp kth, + npy_intp * pivots, npy_intp * npiv) +{ + if (pivots == NULL) { + return; + } + + /* + * If pivot is the requested kth store it, overwritting other pivots if + * required. This must be done so iterative partition can work without + * manually shifting lower data offset by kth each time + */ + if (pivot == kth && *npiv == NPY_MAX_PIVOT_STACK) { + pivots[*npiv - 1] = pivot; + } + /* + * we only need pivots larger than current kth, larger pivots are not + * useful as partitions on smaller kth would reorder the stored pivots + */ + else if (pivot >= kth && *npiv < NPY_MAX_PIVOT_STACK) { + pivots[*npiv] = pivot; + (*npiv) += 1; + } +} + +/**begin repeat + * + * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, + * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, + * npy_cdouble, npy_clongdouble# + * #inexact = 0*11, 1*7# + */ + +static npy_intp +amedian_of_median5_@suff@(@type@ *v, npy_intp* tosort, const npy_intp num, + npy_intp * pivots, + npy_intp * npiv); + +static npy_intp +median_of_median5_@suff@(@type@ *v, const npy_intp num, + npy_intp * pivots, + npy_intp * npiv); + +/**begin repeat1 + * #name = , a# + * #idx = , tosort# + * #arg = 0, 1# + */ +#if @arg@ +/* helper macros to avoid duplication of direct/indirect selection */ +#define IDX(x) tosort[x] +#define SORTEE(x) tosort[x] +#define SWAP INTP_SWAP +#define MEDIAN3_SWAP(v, tosort, low, mid, high) \ + amedian3_swap_@suff@(v, tosort, low, mid, high) +#define MEDIAN5(v, tosort, subleft) \ + amedian5_@suff@(v, tosort + subleft) +#define UNGUARDED_PARTITION(v, tosort, pivot, ll, hh) \ + aunguarded_partition_@suff@(v, tosort, pivot, ll, hh) +#define INTROSELECT(v, tosort, num, kth, pivots, npiv) \ + aintroselect_@suff@(v, tosort, nmed, nmed / 2, pivots, npiv, NULL) +#define DUMBSELECT(v, tosort, left, num, kth) \ + adumb_select_@suff@(v, tosort + left, num, kth) +#else +#define IDX(x) (x) +#define SORTEE(x) v[x] +#define SWAP @TYPE@_SWAP +#define MEDIAN3_SWAP(v, tosort, low, mid, high) \ + median3_swap_@suff@(v, low, mid, high) +#define MEDIAN5(v, tosort, subleft) \ + median5_@suff@(v + subleft) +#define UNGUARDED_PARTITION(v, tosort, pivot, ll, hh) \ + unguarded_partition_@suff@(v, pivot, ll, hh) +#define INTROSELECT(v, tosort, num, kth, pivots, npiv) \ + introselect_@suff@(v, nmed, nmed / 2, pivots, npiv, NULL) +#define DUMBSELECT(v, tosort, left, num, kth) \ + dumb_select_@suff@(v + left, num, kth) +#endif + + +/* + * median of 3 pivot strategy + * gets min and median and moves median to low and min to low + 1 + * for efficient partitioning, see unguarded_partition + */ +static NPY_INLINE void +@name@median3_swap_@suff@(@type@ * v, +#if @arg@ + npy_intp * tosort, +#endif + npy_intp low, npy_intp mid, npy_intp high) +{ + if (@TYPE@_LT(v[IDX(high)], v[IDX(mid)])) + SWAP(SORTEE(high), SORTEE(mid)); + if (@TYPE@_LT(v[IDX(high)], v[IDX(low)])) + SWAP(SORTEE(high), SORTEE(low)); + /* move pivot to low */ + if (@TYPE@_LT(v[IDX(low)], v[IDX(mid)])) + SWAP(SORTEE(low), SORTEE(mid)); + /* move 3-lowest element to low + 1 */ + SWAP(SORTEE(mid), SORTEE(low + 1)); +} + + +/* select index of median of five elements */ +static npy_intp @name@median5_@suff@( +#if @arg@ + const @type@ * v, npy_intp * tosort +#else + @type@ * v +#endif + ) +{ + /* could be optimized as we only need the index (no swaps) */ + if (@TYPE@_LT(v[IDX(1)], v[IDX(0)])) { + SWAP(SORTEE(1), SORTEE(0)); + } + if (@TYPE@_LT(v[IDX(4)], v[IDX(3)])) { + SWAP(SORTEE(4), SORTEE(3)); + } + if (@TYPE@_LT(v[IDX(3)], v[IDX(0)])) { + SWAP(SORTEE(3), SORTEE(0)); + } + if (@TYPE@_LT(v[IDX(4)], v[IDX(1)])) { + SWAP(SORTEE(4), SORTEE(1)); + } + if (@TYPE@_LT(v[IDX(2)], v[IDX(1)])) { + SWAP(SORTEE(2), SORTEE(1)); + } + if (@TYPE@_LT(v[IDX(3)], v[IDX(2)])) { + if (@TYPE@_LT(v[IDX(3)], v[IDX(1)])) { + return 1; + } + else { + return 3; + } + } + else { + /* v[1] and v[2] swapped into order above */ + return 2; + } +} + + +/* + * partition and return the index were the pivot belongs + * the data must have following property to avoid bound checks: + * ll ... hh + * lower-than-pivot [x x x x] larger-than-pivot + */ +static NPY_INLINE void +@name@unguarded_partition_@suff@(@type@ * v, +#if @arg@ + npy_intp * tosort, +#endif + const @type@ pivot, + npy_intp * ll, npy_intp * hh) +{ + for (;;) { + do (*ll)++; while (@TYPE@_LT(v[IDX(*ll)], pivot)); + do (*hh)--; while (@TYPE@_LT(pivot, v[IDX(*hh)])); + + if (*hh < *ll) + break; + + SWAP(SORTEE(*ll), SORTEE(*hh)); + } +} + + +/* + * select median of median of blocks of 5 + * if used as partition pivot it splits the range into at least 30%/70% + * allowing linear time worstcase quickselect + */ +static npy_intp +@name@median_of_median5_@suff@(@type@ *v, +#if @arg@ + npy_intp* tosort, +#endif + const npy_intp num, + npy_intp * pivots, + npy_intp * npiv) +{ + npy_intp i, subleft; + npy_intp right = num - 1; + npy_intp nmed = (right + 1) / 5; + for (i = 0, subleft = 0; i < nmed; i++, subleft += 5) { + npy_intp m = MEDIAN5(v, tosort, subleft); + SWAP(SORTEE(subleft + m), SORTEE(i)); + } + + if (nmed > 2) + INTROSELECT(v, tosort, nmed, nmed / 2, pivots, npiv); + return nmed / 2; +} + + +/* + * N^2 selection, fast only for very small kth + * useful for close multiple partitions + * (e.g. even element median, interpolating percentile) + */ +static int +@name@dumb_select_@suff@(@type@ *v, +#if @arg@ + npy_intp * tosort, +#endif + npy_intp num, npy_intp kth) +{ + npy_intp i; + for (i = 0; i <= kth; i++) { + npy_intp minidx = i; + @type@ minval = v[IDX(i)]; + npy_intp k; + for (k = i + 1; k < num; k++) { + if (@TYPE@_LT(v[IDX(k)], minval)) { + minidx = k; + minval = v[IDX(k)]; + } + } + SWAP(SORTEE(i), SORTEE(minidx)); + } + + return 0; +} + + +/* + * iterative median of 3 quickselect with cutoff to median-of-medians-of5 + * receives stack of already computed pivots in v to minimize the + * partition size were kth is searched in + * + * area that needs partitioning in [...] + * kth 0: [8 7 6 5 4 3 2 1 0] -> med3 partitions elements [4, 2, 0] + * 0 1 2 3 4 8 7 5 6 -> pop requested kth -> stack [4, 2] + * kth 3: 0 1 2 [3] 4 8 7 5 6 -> stack [4] + * kth 5: 0 1 2 3 4 [8 7 5 6] -> stack [6] + * kth 8: 0 1 2 3 4 5 6 [8 7] -> stack [] + * + */ +int +@name@introselect_@suff@(@type@ *v, +#if @arg@ + npy_intp* tosort, +#endif + npy_intp num, npy_intp kth, + npy_intp * pivots, + npy_intp * npiv, + void *NOT_USED) +{ + npy_intp low = 0; + npy_intp high = num - 1; + int depth_limit; + + if (npiv == NULL) + pivots = NULL; + + while (pivots != NULL && *npiv > 0) { + if (pivots[*npiv - 1] > kth) { + /* pivot larger than kth set it as upper bound */ + high = pivots[*npiv - 1] - 1; + break; + } + else if (pivots[*npiv - 1] == kth) { + /* kth was already found in a previous iteration -> done */ + return 0; + } + + low = pivots[*npiv - 1] + 1; + + /* pop from stack */ + *npiv -= 1; + } + + /* + * use a faster O(n*kth) algorithm for very small kth + * e.g. for interpolating percentile + */ + if (kth - low < 3) { + DUMBSELECT(v, tosort, low, high - low + 1, kth - low); + store_pivot(kth, kth, pivots, npiv); + return 0; + } + else if (@inexact@ && kth == num - 1) { + /* useful to check if NaN present via partition(d, (x, -1)) */ + npy_intp k; + npy_intp maxidx = low; + @type@ maxval = v[IDX(low)]; + for (k = low + 1; k < num; k++) { + if (!@TYPE@_LT(v[IDX(k)], maxval)) { + maxidx = k; + maxval = v[IDX(k)]; + } + } + SWAP(SORTEE(kth), SORTEE(maxidx)); + return 0; + } + + depth_limit = npy_get_msb(num) * 2; + + /* guarantee three elements */ + for (;low + 1 < high;) { + npy_intp ll = low + 1; + npy_intp hh = high; + + /* + * if we aren't making sufficient progress with median of 3 + * fall back to median-of-median5 pivot for linear worst case + * med3 for small sizes is required to do unguarded partition + */ + if (depth_limit > 0 || hh - ll < 5) { + const npy_intp mid = low + (high - low) / 2; + /* median of 3 pivot strategy, + * swapping for efficient partition */ + MEDIAN3_SWAP(v, tosort, low, mid, high); + } + else { + npy_intp mid; + /* FIXME: always use pivots to optimize this iterative partition */ +#if @arg@ + mid = ll + amedian_of_median5_@suff@(v, tosort + ll, hh - ll, NULL, NULL); +#else + mid = ll + median_of_median5_@suff@(v + ll, hh - ll, NULL, NULL); +#endif + SWAP(SORTEE(mid), SORTEE(low)); + /* adapt for the larger partition than med3 pivot */ + ll--; + hh++; + } + + depth_limit--; + + /* + * find place to put pivot (in low): + * previous swapping removes need for bound checks + * pivot 3-lowest [x x x] 3-highest + */ + UNGUARDED_PARTITION(v, tosort, v[IDX(low)], &ll, &hh); + + /* move pivot into position */ + SWAP(SORTEE(low), SORTEE(hh)); + + /* kth pivot stored later */ + if (hh != kth) { + store_pivot(hh, kth, pivots, npiv); + } + + if (hh >= kth) + high = hh - 1; + if (hh <= kth) + low = ll; + } + + /* two elements */ + if (high == low + 1) { + if (@TYPE@_LT(v[IDX(high)], v[IDX(low)])) { + SWAP(SORTEE(high), SORTEE(low)) + } + } + store_pivot(kth, kth, pivots, npiv); + + return 0; +} + + +#undef IDX +#undef SWAP +#undef SORTEE +#undef MEDIAN3_SWAP +#undef MEDIAN5 +#undef UNGUARDED_PARTITION +#undef INTROSELECT +#undef DUMBSELECT +/**end repeat1**/ + +/**end repeat**/ diff --git a/contrib/python/numpy/py2/numpy/core/src/umath/_operand_flag_tests.c.src b/contrib/python/numpy/py2/numpy/core/src/umath/_operand_flag_tests.c.src new file mode 100644 index 0000000000..551a9c6329 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/umath/_operand_flag_tests.c.src @@ -0,0 +1,105 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include <Python.h> +#include <numpy/arrayobject.h> +#include <numpy/ufuncobject.h> +#include "numpy/npy_3kcompat.h" +#include <math.h> +#include <structmember.h> + + +static PyMethodDef TestMethods[] = { + {NULL, NULL, 0, NULL} +}; + + +static void +inplace_add(char **args, npy_intp *dimensions, npy_intp *steps, void *data) +{ + npy_intp i; + npy_intp n = dimensions[0]; + char *in1 = args[0]; + char *in2 = args[1]; + npy_intp in1_step = steps[0]; + npy_intp in2_step = steps[1]; + + for (i = 0; i < n; i++) { + (*(long *)in1) = *(long*)in1 + *(long*)in2; + in1 += in1_step; + in2 += in2_step; + } +} + + +/*This a pointer to the above function*/ +PyUFuncGenericFunction funcs[1] = {&inplace_add}; + +/* These are the input and return dtypes of logit.*/ +static char types[2] = {NPY_LONG, NPY_LONG}; + +static void *data[1] = {NULL}; + +#if defined(NPY_PY3K) +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_operand_flag_tests", + NULL, + -1, + TestMethods, + NULL, + NULL, + NULL, + NULL +}; + +#define RETVAL m +PyMODINIT_FUNC PyInit__operand_flag_tests(void) +{ +#else +#define RETVAL +PyMODINIT_FUNC init_operand_flag_tests(void) +{ +#endif + PyObject *m = NULL; + PyObject *ufunc; + +#if defined(NPY_PY3K) + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule("_operand_flag_tests", TestMethods); +#endif + if (m == NULL) { + goto fail; + } + + import_array(); + import_umath(); + + ufunc = PyUFunc_FromFuncAndData(funcs, data, types, 1, 2, 0, + PyUFunc_None, "inplace_add", + "inplace_add_docstring", 0); + + /* + * Set flags to turn off buffering for first input operand, + * so that result can be written back to input operand. + */ + ((PyUFuncObject*)ufunc)->op_flags[0] = NPY_ITER_READWRITE; + ((PyUFuncObject*)ufunc)->iter_flags = NPY_ITER_REDUCE_OK; + PyModule_AddObject(m, "inplace_add", (PyObject*)ufunc); + + return RETVAL; + +fail: + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, + "cannot load _operand_flag_tests module."); + } +#if defined(NPY_PY3K) + if (m) { + Py_DECREF(m); + m = NULL; + } +#endif + return RETVAL; + +} diff --git a/contrib/python/numpy/py2/numpy/core/src/umath/_rational_tests.c.src b/contrib/python/numpy/py2/numpy/core/src/umath/_rational_tests.c.src new file mode 100644 index 0000000000..9e74845df2 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/umath/_rational_tests.c.src @@ -0,0 +1,1409 @@ +/* Fixed size rational numbers exposed to Python */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include <Python.h> +#include <structmember.h> +#include <numpy/arrayobject.h> +#include <numpy/ufuncobject.h> +#include <numpy/npy_3kcompat.h> +#include <math.h> + +#include "common.h" /* for error_converting */ + + +/* Relevant arithmetic exceptions */ + +/* Uncomment the following line to work around a bug in numpy */ +/* #define ACQUIRE_GIL */ + +static void +set_overflow(void) { +#ifdef ACQUIRE_GIL + /* Need to grab the GIL to dodge a bug in numpy */ + PyGILState_STATE state = PyGILState_Ensure(); +#endif + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_OverflowError, + "overflow in rational arithmetic"); + } +#ifdef ACQUIRE_GIL + PyGILState_Release(state); +#endif +} + +static void +set_zero_divide(void) { +#ifdef ACQUIRE_GIL + /* Need to grab the GIL to dodge a bug in numpy */ + PyGILState_STATE state = PyGILState_Ensure(); +#endif + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ZeroDivisionError, + "zero divide in rational arithmetic"); + } +#ifdef ACQUIRE_GIL + PyGILState_Release(state); +#endif +} + +/* Integer arithmetic utilities */ + +static NPY_INLINE npy_int32 +safe_neg(npy_int32 x) { + if (x==(npy_int32)1<<31) { + set_overflow(); + } + return -x; +} + +static NPY_INLINE npy_int32 +safe_abs32(npy_int32 x) { + npy_int32 nx; + if (x>=0) { + return x; + } + nx = -x; + if (nx<0) { + set_overflow(); + } + return nx; +} + +static NPY_INLINE npy_int64 +safe_abs64(npy_int64 x) { + npy_int64 nx; + if (x>=0) { + return x; + } + nx = -x; + if (nx<0) { + set_overflow(); + } + return nx; +} + +static NPY_INLINE npy_int64 +gcd(npy_int64 x, npy_int64 y) { + x = safe_abs64(x); + y = safe_abs64(y); + if (x < y) { + npy_int64 t = x; + x = y; + y = t; + } + while (y) { + npy_int64 t; + x = x%y; + t = x; + x = y; + y = t; + } + return x; +} + +static NPY_INLINE npy_int64 +lcm(npy_int64 x, npy_int64 y) { + npy_int64 lcm; + if (!x || !y) { + return 0; + } + x /= gcd(x,y); + lcm = x*y; + if (lcm/y!=x) { + set_overflow(); + } + return safe_abs64(lcm); +} + +/* Fixed precision rational numbers */ + +typedef struct { + /* numerator */ + npy_int32 n; + /* + * denominator minus one: numpy.zeros() uses memset(0) for non-object + * types, so need to ensure that rational(0) has all zero bytes + */ + npy_int32 dmm; +} rational; + +static NPY_INLINE rational +make_rational_int(npy_int64 n) { + rational r = {(npy_int32)n,0}; + if (r.n != n) { + set_overflow(); + } + return r; +} + +static rational +make_rational_slow(npy_int64 n_, npy_int64 d_) { + rational r = {0}; + if (!d_) { + set_zero_divide(); + } + else { + npy_int64 g = gcd(n_,d_); + npy_int32 d; + n_ /= g; + d_ /= g; + r.n = (npy_int32)n_; + d = (npy_int32)d_; + if (r.n!=n_ || d!=d_) { + set_overflow(); + } + else { + if (d <= 0) { + d = -d; + r.n = safe_neg(r.n); + } + r.dmm = d-1; + } + } + return r; +} + +static NPY_INLINE npy_int32 +d(rational r) { + return r.dmm+1; +} + +/* Assumes d_ > 0 */ +static rational +make_rational_fast(npy_int64 n_, npy_int64 d_) { + npy_int64 g = gcd(n_,d_); + rational r; + n_ /= g; + d_ /= g; + r.n = (npy_int32)n_; + r.dmm = (npy_int32)(d_-1); + if (r.n!=n_ || r.dmm+1!=d_) { + set_overflow(); + } + return r; +} + +static NPY_INLINE rational +rational_negative(rational r) { + rational x; + x.n = safe_neg(r.n); + x.dmm = r.dmm; + return x; +} + +static NPY_INLINE rational +rational_add(rational x, rational y) { + /* + * Note that the numerator computation can never overflow int128_t, + * since each term is strictly under 2**128/4 (since d > 0). + */ + return make_rational_fast((npy_int64)x.n*d(y)+(npy_int64)d(x)*y.n, + (npy_int64)d(x)*d(y)); +} + +static NPY_INLINE rational +rational_subtract(rational x, rational y) { + /* We're safe from overflow as with + */ + return make_rational_fast((npy_int64)x.n*d(y)-(npy_int64)d(x)*y.n, + (npy_int64)d(x)*d(y)); +} + +static NPY_INLINE rational +rational_multiply(rational x, rational y) { + /* We're safe from overflow as with + */ + return make_rational_fast((npy_int64)x.n*y.n,(npy_int64)d(x)*d(y)); +} + +static NPY_INLINE rational +rational_divide(rational x, rational y) { + return make_rational_slow((npy_int64)x.n*d(y),(npy_int64)d(x)*y.n); +} + +static NPY_INLINE npy_int64 +rational_floor(rational x) { + /* Always round down */ + if (x.n>=0) { + return x.n/d(x); + } + /* + * This can be done without casting up to 64 bits, but it requires + * working out all the sign cases + */ + return -((-(npy_int64)x.n+d(x)-1)/d(x)); +} + +static NPY_INLINE npy_int64 +rational_ceil(rational x) { + return -rational_floor(rational_negative(x)); +} + +static NPY_INLINE rational +rational_remainder(rational x, rational y) { + return rational_subtract(x, rational_multiply(y,make_rational_int( + rational_floor(rational_divide(x,y))))); +} + +static NPY_INLINE rational +rational_abs(rational x) { + rational y; + y.n = safe_abs32(x.n); + y.dmm = x.dmm; + return y; +} + +static NPY_INLINE npy_int64 +rational_rint(rational x) { + /* + * Round towards nearest integer, moving exact half integers towards + * zero + */ + npy_int32 d_ = d(x); + return (2*(npy_int64)x.n+(x.n<0?-d_:d_))/(2*(npy_int64)d_); +} + +static NPY_INLINE int +rational_sign(rational x) { + return x.n<0?-1:x.n==0?0:1; +} + +static NPY_INLINE rational +rational_inverse(rational x) { + rational y = {0}; + if (!x.n) { + set_zero_divide(); + } + else { + npy_int32 d_; + y.n = d(x); + d_ = x.n; + if (d_ <= 0) { + d_ = safe_neg(d_); + y.n = -y.n; + } + y.dmm = d_-1; + } + return y; +} + +static NPY_INLINE int +rational_eq(rational x, rational y) { + /* + * Since we enforce d > 0, and store fractions in reduced form, + * equality is easy. + */ + return x.n==y.n && x.dmm==y.dmm; +} + +static NPY_INLINE int +rational_ne(rational x, rational y) { + return !rational_eq(x,y); +} + +static NPY_INLINE int +rational_lt(rational x, rational y) { + return (npy_int64)x.n*d(y) < (npy_int64)y.n*d(x); +} + +static NPY_INLINE int +rational_gt(rational x, rational y) { + return rational_lt(y,x); +} + +static NPY_INLINE int +rational_le(rational x, rational y) { + return !rational_lt(y,x); +} + +static NPY_INLINE int +rational_ge(rational x, rational y) { + return !rational_lt(x,y); +} + +static NPY_INLINE npy_int32 +rational_int(rational x) { + return x.n/d(x); +} + +static NPY_INLINE double +rational_double(rational x) { + return (double)x.n/d(x); +} + +static NPY_INLINE int +rational_nonzero(rational x) { + return x.n!=0; +} + +static int +scan_rational(const char** s, rational* x) { + long n,d; + int offset; + const char* ss; + if (sscanf(*s,"%ld%n",&n,&offset)<=0) { + return 0; + } + ss = *s+offset; + if (*ss!='/') { + *s = ss; + *x = make_rational_int(n); + return 1; + } + ss++; + if (sscanf(ss,"%ld%n",&d,&offset)<=0 || d<=0) { + return 0; + } + *s = ss+offset; + *x = make_rational_slow(n,d); + return 1; +} + +/* Expose rational to Python as a numpy scalar */ + +typedef struct { + PyObject_HEAD + rational r; +} PyRational; + +static PyTypeObject PyRational_Type; + +static NPY_INLINE int +PyRational_Check(PyObject* object) { + return PyObject_IsInstance(object,(PyObject*)&PyRational_Type); +} + +static PyObject* +PyRational_FromRational(rational x) { + PyRational* p = (PyRational*)PyRational_Type.tp_alloc(&PyRational_Type,0); + if (p) { + p->r = x; + } + return (PyObject*)p; +} + +static PyObject* +pyrational_new(PyTypeObject* type, PyObject* args, PyObject* kwds) { + Py_ssize_t size; + PyObject* x[2]; + long n[2]={0,1}; + int i; + rational r; + if (kwds && PyDict_Size(kwds)) { + PyErr_SetString(PyExc_TypeError, + "constructor takes no keyword arguments"); + return 0; + } + size = PyTuple_GET_SIZE(args); + if (size > 2) { + PyErr_SetString(PyExc_TypeError, + "expected rational or numerator and optional denominator"); + return 0; + } + + if (size == 1) { + x[0] = PyTuple_GET_ITEM(args, 0); + if (PyRational_Check(x[0])) { + Py_INCREF(x[0]); + return x[0]; + } + else if (PyString_Check(x[0])) { + const char* s = PyString_AS_STRING(x[0]); + rational x; + if (scan_rational(&s,&x)) { + const char* p; + for (p = s; *p; p++) { + if (!isspace(*p)) { + goto bad; + } + } + return PyRational_FromRational(x); + } + bad: + PyErr_Format(PyExc_ValueError, + "invalid rational literal '%s'",s); + return 0; + } + } + + for (i=0; i<size; i++) { + PyObject* y; + int eq; + x[i] = PyTuple_GET_ITEM(args, i); + n[i] = PyInt_AsLong(x[i]); + if (error_converting(n[i])) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Format(PyExc_TypeError, + "expected integer %s, got %s", + (i ? "denominator" : "numerator"), + x[i]->ob_type->tp_name); + } + return 0; + } + /* Check that we had an exact integer */ + y = PyInt_FromLong(n[i]); + if (!y) { + return 0; + } + eq = PyObject_RichCompareBool(x[i],y,Py_EQ); + Py_DECREF(y); + if (eq<0) { + return 0; + } + if (!eq) { + PyErr_Format(PyExc_TypeError, + "expected integer %s, got %s", + (i ? "denominator" : "numerator"), + x[i]->ob_type->tp_name); + return 0; + } + } + r = make_rational_slow(n[0],n[1]); + if (PyErr_Occurred()) { + return 0; + } + return PyRational_FromRational(r); +} + +/* + * Returns Py_NotImplemented on most conversion failures, or raises an + * overflow error for too long ints + */ +#define AS_RATIONAL(dst,object) \ + { \ + dst.n = 0; \ + if (PyRational_Check(object)) { \ + dst = ((PyRational*)object)->r; \ + } \ + else { \ + PyObject* y_; \ + int eq_; \ + long n_ = PyInt_AsLong(object); \ + if (error_converting(n_)) { \ + if (PyErr_ExceptionMatches(PyExc_TypeError)) { \ + PyErr_Clear(); \ + Py_INCREF(Py_NotImplemented); \ + return Py_NotImplemented; \ + } \ + return 0; \ + } \ + y_ = PyInt_FromLong(n_); \ + if (!y_) { \ + return 0; \ + } \ + eq_ = PyObject_RichCompareBool(object,y_,Py_EQ); \ + Py_DECREF(y_); \ + if (eq_<0) { \ + return 0; \ + } \ + if (!eq_) { \ + Py_INCREF(Py_NotImplemented); \ + return Py_NotImplemented; \ + } \ + dst = make_rational_int(n_); \ + } \ + } + +static PyObject* +pyrational_richcompare(PyObject* a, PyObject* b, int op) { + rational x, y; + int result = 0; + AS_RATIONAL(x,a); + AS_RATIONAL(y,b); + #define OP(py,op) case py: result = rational_##op(x,y); break; + switch (op) { + OP(Py_LT,lt) + OP(Py_LE,le) + OP(Py_EQ,eq) + OP(Py_NE,ne) + OP(Py_GT,gt) + OP(Py_GE,ge) + }; + #undef OP + return PyBool_FromLong(result); +} + +static PyObject* +pyrational_repr(PyObject* self) { + rational x = ((PyRational*)self)->r; + if (d(x)!=1) { + return PyUString_FromFormat( + "rational(%ld,%ld)",(long)x.n,(long)d(x)); + } + else { + return PyUString_FromFormat( + "rational(%ld)",(long)x.n); + } +} + +static PyObject* +pyrational_str(PyObject* self) { + rational x = ((PyRational*)self)->r; + if (d(x)!=1) { + return PyString_FromFormat( + "%ld/%ld",(long)x.n,(long)d(x)); + } + else { + return PyString_FromFormat( + "%ld",(long)x.n); + } +} + +static npy_hash_t +pyrational_hash(PyObject* self) { + rational x = ((PyRational*)self)->r; + /* Use a fairly weak hash as Python expects */ + long h = 131071*x.n+524287*x.dmm; + /* Never return the special error value -1 */ + return h==-1?2:h; +} + +#define RATIONAL_BINOP_2(name,exp) \ + static PyObject* \ + pyrational_##name(PyObject* a, PyObject* b) { \ + rational x, y, z; \ + AS_RATIONAL(x,a); \ + AS_RATIONAL(y,b); \ + z = exp; \ + if (PyErr_Occurred()) { \ + return 0; \ + } \ + return PyRational_FromRational(z); \ + } +#define RATIONAL_BINOP(name) RATIONAL_BINOP_2(name,rational_##name(x,y)) +RATIONAL_BINOP(add) +RATIONAL_BINOP(subtract) +RATIONAL_BINOP(multiply) +RATIONAL_BINOP(divide) +RATIONAL_BINOP(remainder) +RATIONAL_BINOP_2(floor_divide, + make_rational_int(rational_floor(rational_divide(x,y)))) + +#define RATIONAL_UNOP(name,type,exp,convert) \ + static PyObject* \ + pyrational_##name(PyObject* self) { \ + rational x = ((PyRational*)self)->r; \ + type y = exp; \ + if (PyErr_Occurred()) { \ + return 0; \ + } \ + return convert(y); \ + } +RATIONAL_UNOP(negative,rational,rational_negative(x),PyRational_FromRational) +RATIONAL_UNOP(absolute,rational,rational_abs(x),PyRational_FromRational) +RATIONAL_UNOP(int,long,rational_int(x),PyInt_FromLong) +RATIONAL_UNOP(float,double,rational_double(x),PyFloat_FromDouble) + +static PyObject* +pyrational_positive(PyObject* self) { + Py_INCREF(self); + return self; +} + +static int +pyrational_nonzero(PyObject* self) { + rational x = ((PyRational*)self)->r; + return rational_nonzero(x); +} + +static PyNumberMethods pyrational_as_number = { + pyrational_add, /* nb_add */ + pyrational_subtract, /* nb_subtract */ + pyrational_multiply, /* nb_multiply */ +#if PY_MAJOR_VERSION < 3 + pyrational_divide, /* nb_divide */ +#endif + pyrational_remainder, /* nb_remainder */ + 0, /* nb_divmod */ + 0, /* nb_power */ + pyrational_negative, /* nb_negative */ + pyrational_positive, /* nb_positive */ + pyrational_absolute, /* nb_absolute */ + pyrational_nonzero, /* nb_nonzero */ + 0, /* nb_invert */ + 0, /* nb_lshift */ + 0, /* nb_rshift */ + 0, /* nb_and */ + 0, /* nb_xor */ + 0, /* nb_or */ +#if PY_MAJOR_VERSION < 3 + 0, /* nb_coerce */ +#endif + pyrational_int, /* nb_int */ +#if PY_MAJOR_VERSION < 3 + pyrational_int, /* nb_long */ +#else + 0, /* reserved */ +#endif + pyrational_float, /* nb_float */ +#if PY_MAJOR_VERSION < 3 + 0, /* nb_oct */ + 0, /* nb_hex */ +#endif + + 0, /* nb_inplace_add */ + 0, /* nb_inplace_subtract */ + 0, /* nb_inplace_multiply */ +#if PY_MAJOR_VERSION < 3 + 0, /* nb_inplace_divide */ +#endif + 0, /* nb_inplace_remainder */ + 0, /* nb_inplace_power */ + 0, /* nb_inplace_lshift */ + 0, /* nb_inplace_rshift */ + 0, /* nb_inplace_and */ + 0, /* nb_inplace_xor */ + 0, /* nb_inplace_or */ + + pyrational_floor_divide, /* nb_floor_divide */ + pyrational_divide, /* nb_true_divide */ + 0, /* nb_inplace_floor_divide */ + 0, /* nb_inplace_true_divide */ + 0, /* nb_index */ +}; + +static PyObject* +pyrational_n(PyObject* self, void* closure) { + return PyInt_FromLong(((PyRational*)self)->r.n); +} + +static PyObject* +pyrational_d(PyObject* self, void* closure) { + return PyInt_FromLong(d(((PyRational*)self)->r)); +} + +static PyGetSetDef pyrational_getset[] = { + {(char*)"n",pyrational_n,0,(char*)"numerator",0}, + {(char*)"d",pyrational_d,0,(char*)"denominator",0}, + {0} /* sentinel */ +}; + +static PyTypeObject PyRational_Type = { +#if defined(NPY_PY3K) + PyVarObject_HEAD_INIT(NULL, 0) +#else + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ +#endif + "rational", /* tp_name */ + sizeof(PyRational), /* tp_basicsize */ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ +#if defined(NPY_PY3K) + 0, /* tp_reserved */ +#else + 0, /* tp_compare */ +#endif + pyrational_repr, /* tp_repr */ + &pyrational_as_number, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + pyrational_hash, /* tp_hash */ + 0, /* tp_call */ + pyrational_str, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + "Fixed precision rational numbers", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + pyrational_richcompare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + pyrational_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + pyrational_new, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ +}; + +/* NumPy support */ + +static PyObject* +npyrational_getitem(void* data, void* arr) { + rational r; + memcpy(&r,data,sizeof(rational)); + return PyRational_FromRational(r); +} + +static int +npyrational_setitem(PyObject* item, void* data, void* arr) { + rational r; + if (PyRational_Check(item)) { + r = ((PyRational*)item)->r; + } + else { + long n = PyInt_AsLong(item); + PyObject* y; + int eq; + if (error_converting(n)) { + return -1; + } + y = PyInt_FromLong(n); + if (!y) { + return -1; + } + eq = PyObject_RichCompareBool(item,y,Py_EQ); + Py_DECREF(y); + if (eq<0) { + return -1; + } + if (!eq) { + PyErr_Format(PyExc_TypeError, + "expected rational, got %s", item->ob_type->tp_name); + return -1; + } + r = make_rational_int(n); + } + memcpy(data,&r,sizeof(rational)); + return 0; +} + +static NPY_INLINE void +byteswap(npy_int32* x) { + char* p = (char*)x; + size_t i; + for (i = 0; i < sizeof(*x)/2; i++) { + size_t j = sizeof(*x)-1-i; + char t = p[i]; + p[i] = p[j]; + p[j] = t; + } +} + +static void +npyrational_copyswapn(void* dst_, npy_intp dstride, void* src_, + npy_intp sstride, npy_intp n, int swap, void* arr) { + char *dst = (char*)dst_, *src = (char*)src_; + npy_intp i; + if (!src) { + return; + } + if (swap) { + for (i = 0; i < n; i++) { + rational* r = (rational*)(dst+dstride*i); + memcpy(r,src+sstride*i,sizeof(rational)); + byteswap(&r->n); + byteswap(&r->dmm); + } + } + else if (dstride == sizeof(rational) && sstride == sizeof(rational)) { + memcpy(dst, src, n*sizeof(rational)); + } + else { + for (i = 0; i < n; i++) { + memcpy(dst + dstride*i, src + sstride*i, sizeof(rational)); + } + } +} + +static void +npyrational_copyswap(void* dst, void* src, int swap, void* arr) { + rational* r; + if (!src) { + return; + } + r = (rational*)dst; + memcpy(r,src,sizeof(rational)); + if (swap) { + byteswap(&r->n); + byteswap(&r->dmm); + } +} + +static int +npyrational_compare(const void* d0, const void* d1, void* arr) { + rational x = *(rational*)d0, + y = *(rational*)d1; + return rational_lt(x,y)?-1:rational_eq(x,y)?0:1; +} + +#define FIND_EXTREME(name,op) \ + static int \ + npyrational_##name(void* data_, npy_intp n, \ + npy_intp* max_ind, void* arr) { \ + const rational* data; \ + npy_intp best_i; \ + rational best_r; \ + npy_intp i; \ + if (!n) { \ + return 0; \ + } \ + data = (rational*)data_; \ + best_i = 0; \ + best_r = data[0]; \ + for (i = 1; i < n; i++) { \ + if (rational_##op(data[i],best_r)) { \ + best_i = i; \ + best_r = data[i]; \ + } \ + } \ + *max_ind = best_i; \ + return 0; \ + } +FIND_EXTREME(argmin,lt) +FIND_EXTREME(argmax,gt) + +static void +npyrational_dot(void* ip0_, npy_intp is0, void* ip1_, npy_intp is1, + void* op, npy_intp n, void* arr) { + rational r = {0}; + const char *ip0 = (char*)ip0_, *ip1 = (char*)ip1_; + npy_intp i; + for (i = 0; i < n; i++) { + r = rational_add(r,rational_multiply(*(rational*)ip0,*(rational*)ip1)); + ip0 += is0; + ip1 += is1; + } + *(rational*)op = r; +} + +static npy_bool +npyrational_nonzero(void* data, void* arr) { + rational r; + memcpy(&r,data,sizeof(r)); + return rational_nonzero(r)?NPY_TRUE:NPY_FALSE; +} + +static int +npyrational_fill(void* data_, npy_intp length, void* arr) { + rational* data = (rational*)data_; + rational delta = rational_subtract(data[1],data[0]); + rational r = data[1]; + npy_intp i; + for (i = 2; i < length; i++) { + r = rational_add(r,delta); + data[i] = r; + } + return 0; +} + +static int +npyrational_fillwithscalar(void* buffer_, npy_intp length, + void* value, void* arr) { + rational r = *(rational*)value; + rational* buffer = (rational*)buffer_; + npy_intp i; + for (i = 0; i < length; i++) { + buffer[i] = r; + } + return 0; +} + +static PyArray_ArrFuncs npyrational_arrfuncs; + +typedef struct { char c; rational r; } align_test; + +PyArray_Descr npyrational_descr = { + PyObject_HEAD_INIT(0) + &PyRational_Type, /* typeobj */ + 'V', /* kind */ + 'r', /* type */ + '=', /* byteorder */ + /* + * For now, we need NPY_NEEDS_PYAPI in order to make numpy detect our + * exceptions. This isn't technically necessary, + * since we're careful about thread safety, and hopefully future + * versions of numpy will recognize that. + */ + NPY_NEEDS_PYAPI | NPY_USE_GETITEM | NPY_USE_SETITEM, /* hasobject */ + 0, /* type_num */ + sizeof(rational), /* elsize */ + offsetof(align_test,r), /* alignment */ + 0, /* subarray */ + 0, /* fields */ + 0, /* names */ + &npyrational_arrfuncs, /* f */ +}; + +#define DEFINE_CAST(From,To,statement) \ + static void \ + npycast_##From##_##To(void* from_, void* to_, npy_intp n, \ + void* fromarr, void* toarr) { \ + const From* from = (From*)from_; \ + To* to = (To*)to_; \ + npy_intp i; \ + for (i = 0; i < n; i++) { \ + From x = from[i]; \ + statement \ + to[i] = y; \ + } \ + } +#define DEFINE_INT_CAST(bits) \ + DEFINE_CAST(npy_int##bits,rational,rational y = make_rational_int(x);) \ + DEFINE_CAST(rational,npy_int##bits,npy_int32 z = rational_int(x); \ + npy_int##bits y = z; if (y != z) set_overflow();) +DEFINE_INT_CAST(8) +DEFINE_INT_CAST(16) +DEFINE_INT_CAST(32) +DEFINE_INT_CAST(64) +DEFINE_CAST(rational,float,double y = rational_double(x);) +DEFINE_CAST(rational,double,double y = rational_double(x);) +DEFINE_CAST(npy_bool,rational,rational y = make_rational_int(x);) +DEFINE_CAST(rational,npy_bool,npy_bool y = rational_nonzero(x);) + +#define BINARY_UFUNC(name,intype0,intype1,outtype,exp) \ + void name(char** args, npy_intp* dimensions, \ + npy_intp* steps, void* data) { \ + npy_intp is0 = steps[0], is1 = steps[1], \ + os = steps[2], n = *dimensions; \ + char *i0 = args[0], *i1 = args[1], *o = args[2]; \ + int k; \ + for (k = 0; k < n; k++) { \ + intype0 x = *(intype0*)i0; \ + intype1 y = *(intype1*)i1; \ + *(outtype*)o = exp; \ + i0 += is0; i1 += is1; o += os; \ + } \ + } +#define RATIONAL_BINARY_UFUNC(name,type,exp) \ + BINARY_UFUNC(rational_ufunc_##name,rational,rational,type,exp) +RATIONAL_BINARY_UFUNC(add,rational,rational_add(x,y)) +RATIONAL_BINARY_UFUNC(subtract,rational,rational_subtract(x,y)) +RATIONAL_BINARY_UFUNC(multiply,rational,rational_multiply(x,y)) +RATIONAL_BINARY_UFUNC(divide,rational,rational_divide(x,y)) +RATIONAL_BINARY_UFUNC(remainder,rational,rational_remainder(x,y)) +RATIONAL_BINARY_UFUNC(floor_divide,rational, + make_rational_int(rational_floor(rational_divide(x,y)))) +PyUFuncGenericFunction rational_ufunc_true_divide = rational_ufunc_divide; +RATIONAL_BINARY_UFUNC(minimum,rational,rational_lt(x,y)?x:y) +RATIONAL_BINARY_UFUNC(maximum,rational,rational_lt(x,y)?y:x) +RATIONAL_BINARY_UFUNC(equal,npy_bool,rational_eq(x,y)) +RATIONAL_BINARY_UFUNC(not_equal,npy_bool,rational_ne(x,y)) +RATIONAL_BINARY_UFUNC(less,npy_bool,rational_lt(x,y)) +RATIONAL_BINARY_UFUNC(greater,npy_bool,rational_gt(x,y)) +RATIONAL_BINARY_UFUNC(less_equal,npy_bool,rational_le(x,y)) +RATIONAL_BINARY_UFUNC(greater_equal,npy_bool,rational_ge(x,y)) + +BINARY_UFUNC(gcd_ufunc,npy_int64,npy_int64,npy_int64,gcd(x,y)) +BINARY_UFUNC(lcm_ufunc,npy_int64,npy_int64,npy_int64,lcm(x,y)) + +#define UNARY_UFUNC(name,type,exp) \ + void rational_ufunc_##name(char** args, npy_intp* dimensions, \ + npy_intp* steps, void* data) { \ + npy_intp is = steps[0], os = steps[1], n = *dimensions; \ + char *i = args[0], *o = args[1]; \ + int k; \ + for (k = 0; k < n; k++) { \ + rational x = *(rational*)i; \ + *(type*)o = exp; \ + i += is; o += os; \ + } \ + } +UNARY_UFUNC(negative,rational,rational_negative(x)) +UNARY_UFUNC(absolute,rational,rational_abs(x)) +UNARY_UFUNC(floor,rational,make_rational_int(rational_floor(x))) +UNARY_UFUNC(ceil,rational,make_rational_int(rational_ceil(x))) +UNARY_UFUNC(trunc,rational,make_rational_int(x.n/d(x))) +UNARY_UFUNC(square,rational,rational_multiply(x,x)) +UNARY_UFUNC(rint,rational,make_rational_int(rational_rint(x))) +UNARY_UFUNC(sign,rational,make_rational_int(rational_sign(x))) +UNARY_UFUNC(reciprocal,rational,rational_inverse(x)) +UNARY_UFUNC(numerator,npy_int64,x.n) +UNARY_UFUNC(denominator,npy_int64,d(x)) + +static NPY_INLINE void +rational_matrix_multiply(char **args, npy_intp *dimensions, npy_intp *steps) +{ + /* pointers to data for input and output arrays */ + char *ip1 = args[0]; + char *ip2 = args[1]; + char *op = args[2]; + + /* lengths of core dimensions */ + npy_intp dm = dimensions[0]; + npy_intp dn = dimensions[1]; + npy_intp dp = dimensions[2]; + + /* striding over core dimensions */ + npy_intp is1_m = steps[0]; + npy_intp is1_n = steps[1]; + npy_intp is2_n = steps[2]; + npy_intp is2_p = steps[3]; + npy_intp os_m = steps[4]; + npy_intp os_p = steps[5]; + + /* core dimensions counters */ + npy_intp m, p; + + /* calculate dot product for each row/column vector pair */ + for (m = 0; m < dm; m++) { + for (p = 0; p < dp; p++) { + npyrational_dot(ip1, is1_n, ip2, is2_n, op, dn, NULL); + + /* advance to next column of 2nd input array and output array */ + ip2 += is2_p; + op += os_p; + } + + /* reset to first column of 2nd input array and output array */ + ip2 -= is2_p * p; + op -= os_p * p; + + /* advance to next row of 1st input array and output array */ + ip1 += is1_m; + op += os_m; + } +} + + +static void +rational_gufunc_matrix_multiply(char **args, npy_intp *dimensions, + npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* outer dimensions counter */ + npy_intp N_; + + /* length of flattened outer dimensions */ + npy_intp dN = dimensions[0]; + + /* striding over flattened outer dimensions for input and output arrays */ + npy_intp s0 = steps[0]; + npy_intp s1 = steps[1]; + npy_intp s2 = steps[2]; + + /* + * loop through outer dimensions, performing matrix multiply on + * core dimensions for each loop + */ + for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1, args[2] += s2) { + rational_matrix_multiply(args, dimensions+1, steps+3); + } +} + + +static void +rational_ufunc_test_add(char** args, npy_intp* dimensions, + npy_intp* steps, void* data) { + npy_intp is0 = steps[0], is1 = steps[1], os = steps[2], n = *dimensions; + char *i0 = args[0], *i1 = args[1], *o = args[2]; + int k; + for (k = 0; k < n; k++) { + npy_int64 x = *(npy_int64*)i0; + npy_int64 y = *(npy_int64*)i1; + *(rational*)o = rational_add(make_rational_fast(x, 1), + make_rational_fast(y, 1)); + i0 += is0; i1 += is1; o += os; + } +} + + +static void +rational_ufunc_test_add_rationals(char** args, npy_intp* dimensions, + npy_intp* steps, void* data) { + npy_intp is0 = steps[0], is1 = steps[1], os = steps[2], n = *dimensions; + char *i0 = args[0], *i1 = args[1], *o = args[2]; + int k; + for (k = 0; k < n; k++) { + rational x = *(rational*)i0; + rational y = *(rational*)i1; + *(rational*)o = rational_add(x, y); + i0 += is0; i1 += is1; o += os; + } +} + + +PyMethodDef module_methods[] = { + {0} /* sentinel */ +}; + +#if defined(NPY_PY3K) +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_rational_tests", + NULL, + -1, + module_methods, + NULL, + NULL, + NULL, + NULL +}; +#endif + +#if defined(NPY_PY3K) +#define RETVAL m +PyMODINIT_FUNC PyInit__rational_tests(void) { +#else +#define RETVAL +PyMODINIT_FUNC init_rational_tests(void) { +#endif + + PyObject *m = NULL; + PyObject* numpy_str; + PyObject* numpy; + int npy_rational; + + import_array(); + if (PyErr_Occurred()) { + goto fail; + } + import_umath(); + if (PyErr_Occurred()) { + goto fail; + } + numpy_str = PyUString_FromString("numpy"); + if (!numpy_str) { + goto fail; + } + numpy = PyImport_Import(numpy_str); + Py_DECREF(numpy_str); + if (!numpy) { + goto fail; + } + + /* Can't set this until we import numpy */ + PyRational_Type.tp_base = &PyGenericArrType_Type; + + /* Initialize rational type object */ + if (PyType_Ready(&PyRational_Type) < 0) { + goto fail; + } + + /* Initialize rational descriptor */ + PyArray_InitArrFuncs(&npyrational_arrfuncs); + npyrational_arrfuncs.getitem = npyrational_getitem; + npyrational_arrfuncs.setitem = npyrational_setitem; + npyrational_arrfuncs.copyswapn = npyrational_copyswapn; + npyrational_arrfuncs.copyswap = npyrational_copyswap; + npyrational_arrfuncs.compare = npyrational_compare; + npyrational_arrfuncs.argmin = npyrational_argmin; + npyrational_arrfuncs.argmax = npyrational_argmax; + npyrational_arrfuncs.dotfunc = npyrational_dot; + npyrational_arrfuncs.nonzero = npyrational_nonzero; + npyrational_arrfuncs.fill = npyrational_fill; + npyrational_arrfuncs.fillwithscalar = npyrational_fillwithscalar; + /* Left undefined: scanfunc, fromstr, sort, argsort */ + Py_TYPE(&npyrational_descr) = &PyArrayDescr_Type; + npy_rational = PyArray_RegisterDataType(&npyrational_descr); + if (npy_rational<0) { + goto fail; + } + + /* Support dtype(rational) syntax */ + if (PyDict_SetItemString(PyRational_Type.tp_dict, "dtype", + (PyObject*)&npyrational_descr) < 0) { + goto fail; + } + + /* Register casts to and from rational */ + #define REGISTER_CAST(From,To,from_descr,to_typenum,safe) { \ + PyArray_Descr* from_descr_##From##_##To = (from_descr); \ + if (PyArray_RegisterCastFunc(from_descr_##From##_##To, \ + (to_typenum), \ + npycast_##From##_##To) < 0) { \ + goto fail; \ + } \ + if (safe && PyArray_RegisterCanCast(from_descr_##From##_##To, \ + (to_typenum), \ + NPY_NOSCALAR) < 0) { \ + goto fail; \ + } \ + } + #define REGISTER_INT_CASTS(bits) \ + REGISTER_CAST(npy_int##bits, rational, \ + PyArray_DescrFromType(NPY_INT##bits), npy_rational, 1) \ + REGISTER_CAST(rational, npy_int##bits, &npyrational_descr, \ + NPY_INT##bits, 0) + REGISTER_INT_CASTS(8) + REGISTER_INT_CASTS(16) + REGISTER_INT_CASTS(32) + REGISTER_INT_CASTS(64) + REGISTER_CAST(rational,float,&npyrational_descr,NPY_FLOAT,0) + REGISTER_CAST(rational,double,&npyrational_descr,NPY_DOUBLE,1) + REGISTER_CAST(npy_bool,rational, PyArray_DescrFromType(NPY_BOOL), + npy_rational,1) + REGISTER_CAST(rational,npy_bool,&npyrational_descr,NPY_BOOL,0) + + /* Register ufuncs */ + #define REGISTER_UFUNC(name,...) { \ + PyUFuncObject* ufunc = \ + (PyUFuncObject*)PyObject_GetAttrString(numpy, #name); \ + int _types[] = __VA_ARGS__; \ + if (!ufunc) { \ + goto fail; \ + } \ + if (sizeof(_types)/sizeof(int)!=ufunc->nargs) { \ + PyErr_Format(PyExc_AssertionError, \ + "ufunc %s takes %d arguments, our loop takes %lu", \ + #name, ufunc->nargs, (unsigned long) \ + (sizeof(_types)/sizeof(int))); \ + Py_DECREF(ufunc); \ + goto fail; \ + } \ + if (PyUFunc_RegisterLoopForType((PyUFuncObject*)ufunc, npy_rational, \ + rational_ufunc_##name, _types, 0) < 0) { \ + Py_DECREF(ufunc); \ + goto fail; \ + } \ + Py_DECREF(ufunc); \ + } + #define REGISTER_UFUNC_BINARY_RATIONAL(name) \ + REGISTER_UFUNC(name, {npy_rational, npy_rational, npy_rational}) + #define REGISTER_UFUNC_BINARY_COMPARE(name) \ + REGISTER_UFUNC(name, {npy_rational, npy_rational, NPY_BOOL}) + #define REGISTER_UFUNC_UNARY(name) \ + REGISTER_UFUNC(name, {npy_rational, npy_rational}) + /* Binary */ + REGISTER_UFUNC_BINARY_RATIONAL(add) + REGISTER_UFUNC_BINARY_RATIONAL(subtract) + REGISTER_UFUNC_BINARY_RATIONAL(multiply) + REGISTER_UFUNC_BINARY_RATIONAL(divide) + REGISTER_UFUNC_BINARY_RATIONAL(remainder) + REGISTER_UFUNC_BINARY_RATIONAL(true_divide) + REGISTER_UFUNC_BINARY_RATIONAL(floor_divide) + REGISTER_UFUNC_BINARY_RATIONAL(minimum) + REGISTER_UFUNC_BINARY_RATIONAL(maximum) + /* Comparisons */ + REGISTER_UFUNC_BINARY_COMPARE(equal) + REGISTER_UFUNC_BINARY_COMPARE(not_equal) + REGISTER_UFUNC_BINARY_COMPARE(less) + REGISTER_UFUNC_BINARY_COMPARE(greater) + REGISTER_UFUNC_BINARY_COMPARE(less_equal) + REGISTER_UFUNC_BINARY_COMPARE(greater_equal) + /* Unary */ + REGISTER_UFUNC_UNARY(negative) + REGISTER_UFUNC_UNARY(absolute) + REGISTER_UFUNC_UNARY(floor) + REGISTER_UFUNC_UNARY(ceil) + REGISTER_UFUNC_UNARY(trunc) + REGISTER_UFUNC_UNARY(rint) + REGISTER_UFUNC_UNARY(square) + REGISTER_UFUNC_UNARY(reciprocal) + REGISTER_UFUNC_UNARY(sign) + + /* Create module */ +#if defined(NPY_PY3K) + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule("_rational_tests", module_methods); +#endif + + if (!m) { + goto fail; + } + + /* Add rational type */ + Py_INCREF(&PyRational_Type); + PyModule_AddObject(m,"rational",(PyObject*)&PyRational_Type); + + /* Create matrix multiply generalized ufunc */ + { + int types2[3] = {npy_rational,npy_rational,npy_rational}; + PyObject* gufunc = PyUFunc_FromFuncAndDataAndSignature(0,0,0,0,2,1, + PyUFunc_None,(char*)"matrix_multiply", + (char*)"return result of multiplying two matrices of rationals", + 0,"(m,n),(n,p)->(m,p)"); + if (!gufunc) { + goto fail; + } + if (PyUFunc_RegisterLoopForType((PyUFuncObject*)gufunc, npy_rational, + rational_gufunc_matrix_multiply, types2, 0) < 0) { + goto fail; + } + PyModule_AddObject(m,"matrix_multiply",(PyObject*)gufunc); + } + + /* Create test ufunc with built in input types and rational output type */ + { + int types3[3] = {NPY_INT64,NPY_INT64,npy_rational}; + + PyObject* ufunc = PyUFunc_FromFuncAndData(0,0,0,0,2,1, + PyUFunc_None,(char*)"test_add", + (char*)"add two matrices of int64 and return rational matrix",0); + if (!ufunc) { + goto fail; + } + if (PyUFunc_RegisterLoopForType((PyUFuncObject*)ufunc, npy_rational, + rational_ufunc_test_add, types3, 0) < 0) { + goto fail; + } + PyModule_AddObject(m,"test_add",(PyObject*)ufunc); + } + + /* Create test ufunc with rational types using RegisterLoopForDescr */ + { + PyObject* ufunc = PyUFunc_FromFuncAndData(0,0,0,0,2,1, + PyUFunc_None,(char*)"test_add_rationals", + (char*)"add two matrices of rationals and return rational matrix",0); + PyArray_Descr* types[3] = {&npyrational_descr, + &npyrational_descr, + &npyrational_descr}; + + if (!ufunc) { + goto fail; + } + if (PyUFunc_RegisterLoopForDescr((PyUFuncObject*)ufunc, &npyrational_descr, + rational_ufunc_test_add_rationals, types, 0) < 0) { + goto fail; + } + PyModule_AddObject(m,"test_add_rationals",(PyObject*)ufunc); + } + + /* Create numerator and denominator ufuncs */ + #define NEW_UNARY_UFUNC(name,type,doc) { \ + int types[2] = {npy_rational,type}; \ + PyObject* ufunc = PyUFunc_FromFuncAndData(0,0,0,0,1,1, \ + PyUFunc_None,(char*)#name,(char*)doc,0); \ + if (!ufunc) { \ + goto fail; \ + } \ + if (PyUFunc_RegisterLoopForType((PyUFuncObject*)ufunc, \ + npy_rational,rational_ufunc_##name,types,0)<0) { \ + goto fail; \ + } \ + PyModule_AddObject(m,#name,(PyObject*)ufunc); \ + } + NEW_UNARY_UFUNC(numerator,NPY_INT64,"rational number numerator"); + NEW_UNARY_UFUNC(denominator,NPY_INT64,"rational number denominator"); + + /* Create gcd and lcm ufuncs */ + #define GCD_LCM_UFUNC(name,type,doc) { \ + static const PyUFuncGenericFunction func[1] = {name##_ufunc}; \ + static const char types[3] = {type,type,type}; \ + static void* data[1] = {0}; \ + PyObject* ufunc = PyUFunc_FromFuncAndData( \ + (PyUFuncGenericFunction*)func, data,(char*)types, \ + 1,2,1,PyUFunc_One,(char*)#name,(char*)doc,0); \ + if (!ufunc) { \ + goto fail; \ + } \ + PyModule_AddObject(m,#name,(PyObject*)ufunc); \ + } + GCD_LCM_UFUNC(gcd,NPY_INT64,"greatest common denominator of two integers"); + GCD_LCM_UFUNC(lcm,NPY_INT64,"least common multiple of two integers"); + + return RETVAL; + +fail: + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, + "cannot load _rational_tests module."); + } +#if defined(NPY_PY3K) + if (m) { + Py_DECREF(m); + m = NULL; + } +#endif + return RETVAL; +} diff --git a/contrib/python/numpy/py2/numpy/core/src/umath/_struct_ufunc_tests.c.src b/contrib/python/numpy/py2/numpy/core/src/umath/_struct_ufunc_tests.c.src new file mode 100644 index 0000000000..5c6e235e01 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/umath/_struct_ufunc_tests.c.src @@ -0,0 +1,125 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "Python.h" +#include "math.h" +#include "numpy/ndarraytypes.h" +#include "numpy/ufuncobject.h" +#include "numpy/npy_3kcompat.h" + + +/* + * struct_ufunc_test.c + * This is the C code for creating your own + * NumPy ufunc for a structured array dtype. + * + * Details explaining the Python-C API can be found under + * 'Extending and Embedding' and 'Python/C API' at + * docs.python.org . + */ + +static PyMethodDef StructUfuncTestMethods[] = { + {NULL, NULL, 0, NULL} +}; + +/* The loop definition must precede the PyMODINIT_FUNC. */ + +static void add_uint64_triplet(char **args, npy_intp *dimensions, + npy_intp* steps, void* data) +{ + npy_intp i; + npy_intp is1=steps[0]; + npy_intp is2=steps[1]; + npy_intp os=steps[2]; + npy_intp n=dimensions[0]; + npy_uint64 *x, *y, *z; + + char *i1=args[0]; + char *i2=args[1]; + char *op=args[2]; + + for (i = 0; i < n; i++) { + + x = (npy_uint64*)i1; + y = (npy_uint64*)i2; + z = (npy_uint64*)op; + + z[0] = x[0] + y[0]; + z[1] = x[1] + y[1]; + z[2] = x[2] + y[2]; + + i1 += is1; + i2 += is2; + op += os; + } +} + +#if defined(NPY_PY3K) +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_struct_ufunc_tests", + NULL, + -1, + StructUfuncTestMethods, + NULL, + NULL, + NULL, + NULL +}; +#endif + +#if defined(NPY_PY3K) +PyMODINIT_FUNC PyInit__struct_ufunc_tests(void) +#else +PyMODINIT_FUNC init_struct_ufunc_tests(void) +#endif +{ + PyObject *m, *add_triplet, *d; + PyObject *dtype_dict; + PyArray_Descr *dtype; + PyArray_Descr *dtypes[3]; + +#if defined(NPY_PY3K) + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule("_struct_ufunc_tests", StructUfuncTestMethods); +#endif + + if (m == NULL) { +#if defined(NPY_PY3K) + return NULL; +#else + return; +#endif + } + + import_array(); + import_umath(); + + add_triplet = PyUFunc_FromFuncAndData(NULL, NULL, NULL, 0, 2, 1, + PyUFunc_None, "add_triplet", + "add_triplet_docstring", 0); + + dtype_dict = Py_BuildValue("[(s, s), (s, s), (s, s)]", + "f0", "u8", "f1", "u8", "f2", "u8"); + PyArray_DescrConverter(dtype_dict, &dtype); + Py_DECREF(dtype_dict); + + dtypes[0] = dtype; + dtypes[1] = dtype; + dtypes[2] = dtype; + + PyUFunc_RegisterLoopForDescr((PyUFuncObject *)add_triplet, + dtype, + &add_uint64_triplet, + dtypes, + NULL); + + Py_DECREF(dtype); + d = PyModule_GetDict(m); + + PyDict_SetItemString(d, "add_triplet", add_triplet); + Py_DECREF(add_triplet); +#if defined(NPY_PY3K) + return m; +#endif +} diff --git a/contrib/python/numpy/py2/numpy/core/src/umath/_umath_tests.c.src b/contrib/python/numpy/py2/numpy/core/src/umath/_umath_tests.c.src new file mode 100644 index 0000000000..6c3bcce713 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/umath/_umath_tests.c.src @@ -0,0 +1,643 @@ +/* -*- c -*- */ + +/* + ***************************************************************************** + ** INCLUDES ** + ***************************************************************************** + */ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "Python.h" +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" +#include "numpy/npy_math.h" + +#include "npy_pycompat.h" + +#include "npy_config.h" + +/* + ***************************************************************************** + ** BASICS ** + ***************************************************************************** + */ + +#define INIT_OUTER_LOOP_1 \ + npy_intp dN = *dimensions++;\ + npy_intp N_; \ + npy_intp s0 = *steps++; + +#define INIT_OUTER_LOOP_2 \ + INIT_OUTER_LOOP_1 \ + npy_intp s1 = *steps++; + +#define INIT_OUTER_LOOP_3 \ + INIT_OUTER_LOOP_2 \ + npy_intp s2 = *steps++; + +#define INIT_OUTER_LOOP_4 \ + INIT_OUTER_LOOP_3 \ + npy_intp s3 = *steps++; + +#define BEGIN_OUTER_LOOP_2 \ + for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1) { + +#define BEGIN_OUTER_LOOP_3 \ + for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1, args[2] += s2) { + +#define BEGIN_OUTER_LOOP_4 \ + for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1, args[2] += s2, args[3] += s3) { + +#define END_OUTER_LOOP } + + +/* + ***************************************************************************** + ** UFUNC LOOPS ** + ***************************************************************************** + */ + +char *inner1d_signature = "(i),(i)->()"; + +/**begin repeat + + #TYPE=LONG,DOUBLE# + #typ=npy_long,npy_double# +*/ + +/* + * This implements the function + * out[n] = sum_i { in1[n, i] * in2[n, i] }. + */ + +static void +@TYPE@_inner1d(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + INIT_OUTER_LOOP_3 + npy_intp di = dimensions[0]; + npy_intp i; + npy_intp is1=steps[0], is2=steps[1]; + BEGIN_OUTER_LOOP_3 + char *ip1=args[0], *ip2=args[1], *op=args[2]; + @typ@ sum = 0; + for (i = 0; i < di; i++) { + sum += (*(@typ@ *)ip1) * (*(@typ@ *)ip2); + ip1 += is1; + ip2 += is2; + } + *(@typ@ *)op = sum; + END_OUTER_LOOP +} + +/**end repeat**/ + +char *innerwt_signature = "(i),(i),(i)->()"; + +/**begin repeat + + #TYPE=LONG,DOUBLE# + #typ=npy_long,npy_double# +*/ + + +/* + * This implements the function + * out[n] = sum_i { in1[n, i] * in2[n, i] * in3[n, i] }. + */ + +static void +@TYPE@_innerwt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + INIT_OUTER_LOOP_4 + npy_intp di = dimensions[0]; + npy_intp i; + npy_intp is1=steps[0], is2=steps[1], is3=steps[2]; + BEGIN_OUTER_LOOP_4 + char *ip1=args[0], *ip2=args[1], *ip3=args[2], *op=args[3]; + @typ@ sum = 0; + for (i = 0; i < di; i++) { + sum += (*(@typ@ *)ip1) * (*(@typ@ *)ip2) * (*(@typ@ *)ip3); + ip1 += is1; + ip2 += is2; + ip3 += is3; + } + *(@typ@ *)op = sum; + END_OUTER_LOOP +} + +/**end repeat**/ + +char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)"; +/* for use with matrix_multiply code, but different signature */ +char *matmul_signature = "(m?,n),(n,p?)->(m?,p?)"; + +/**begin repeat + + #TYPE=FLOAT,DOUBLE,LONG# + #typ=npy_float,npy_double,npy_long# +*/ + +/* + * This implements the function + * out[k, m, p] = sum_n { in1[k, m, n] * in2[k, n, p] }. + */ + +static void +@TYPE@_matrix_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* no BLAS is available */ + INIT_OUTER_LOOP_3 + npy_intp dm = dimensions[0]; + npy_intp dn = dimensions[1]; + npy_intp dp = dimensions[2]; + npy_intp m,n,p; + npy_intp is1_m=steps[0], is1_n=steps[1], is2_n=steps[2], is2_p=steps[3], + os_m=steps[4], os_p=steps[5]; + npy_intp ib1_n = is1_n*dn; + npy_intp ib2_n = is2_n*dn; + npy_intp ib2_p = is2_p*dp; + npy_intp ob_p = os_p *dp; + if (dn == 0) { + /* No operand, need to zero the output */ + BEGIN_OUTER_LOOP_3 + char *op=args[2]; + for (m = 0; m < dm; m++) { + for (p = 0; p < dp; p++) { + *(@typ@ *)op = 0; + op += os_p; + } + op += os_m - ob_p; + } + END_OUTER_LOOP + return; + } + BEGIN_OUTER_LOOP_3 + char *ip1=args[0], *ip2=args[1], *op=args[2]; + for (m = 0; m < dm; m++) { + for (n = 0; n < dn; n++) { + @typ@ val1 = (*(@typ@ *)ip1); + for (p = 0; p < dp; p++) { + if (n == 0) *(@typ@ *)op = 0; + *(@typ@ *)op += val1 * (*(@typ@ *)ip2); + ip2 += is2_p; + op += os_p; + } + ip2 -= ib2_p; + op -= ob_p; + ip1 += is1_n; + ip2 += is2_n; + } + ip1 -= ib1_n; + ip2 -= ib2_n; + ip1 += is1_m; + op += os_m; + } + END_OUTER_LOOP +} + +/**end repeat**/ + +char *cross1d_signature = "(3),(3)->(3)"; + +/**begin repeat + + #TYPE=LONG,DOUBLE# + #typ=npy_long, npy_double# +*/ + +/* + * This implements the cross product: + * out[n, 0] = in1[n, 1]*in2[n, 2] - in1[n, 2]*in2[n, 1] + * out[n, 1] = in1[n, 2]*in2[n, 0] - in1[n, 0]*in2[n, 2] + * out[n, 2] = in1[n, 0]*in2[n, 1] - in1[n, 1]*in2[n, 0] + */ +static void +@TYPE@_cross1d(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + INIT_OUTER_LOOP_3 + npy_intp is1=steps[0], is2=steps[1], os = steps[2]; + BEGIN_OUTER_LOOP_3 + @typ@ i1_x = *(@typ@ *)(args[0] + 0*is1); + @typ@ i1_y = *(@typ@ *)(args[0] + 1*is1); + @typ@ i1_z = *(@typ@ *)(args[0] + 2*is1); + + @typ@ i2_x = *(@typ@ *)(args[1] + 0*is2); + @typ@ i2_y = *(@typ@ *)(args[1] + 1*is2); + @typ@ i2_z = *(@typ@ *)(args[1] + 2*is2); + char *op = args[2]; + + *(@typ@ *)op = i1_y * i2_z - i1_z * i2_y; + op += os; + *(@typ@ *)op = i1_z * i2_x - i1_x * i2_z; + op += os; + *(@typ@ *)op = i1_x * i2_y - i1_y * i2_x; + END_OUTER_LOOP +} + +/**end repeat**/ + +char *euclidean_pdist_signature = "(n,d)->(p)"; + +/**begin repeat + + #TYPE=FLOAT,DOUBLE# + #typ=npy_float,npy_double# + #sqrt_func=sqrtf,sqrt# +*/ + +/* + * This implements the function + * out[j*(2*n-3-j)+k-1] = sum_d { (in1[j, d] - in1[k, d])^2 } + * with 0 < k < j < n, i.e. computes all unique pairwise euclidean distances. + */ + +static void +@TYPE@_euclidean_pdist(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) +{ + INIT_OUTER_LOOP_2 + npy_intp len_n = *dimensions++; + npy_intp len_d = *dimensions++; + npy_intp stride_n = *steps++; + npy_intp stride_d = *steps++; + npy_intp stride_p = *steps; + + assert(len_n * (len_n - 1) / 2 == *dimensions); + + BEGIN_OUTER_LOOP_2 + const char *data_this = (const char *)args[0]; + char *data_out = args[1]; + npy_intp n; + for (n = 0; n < len_n; ++n) { + const char *data_that = data_this + stride_n; + npy_intp nn; + for (nn = n + 1; nn < len_n; ++nn) { + const char *ptr_this = data_this; + const char *ptr_that = data_that; + @typ@ out = 0; + npy_intp d; + for (d = 0; d < len_d; ++d) { + const @typ@ delta = *(const @typ@ *)ptr_this - + *(const @typ@ *)ptr_that; + out += delta * delta; + ptr_this += stride_d; + ptr_that += stride_d; + } + *(@typ@ *)data_out = npy_@sqrt_func@(out); + data_that += stride_n; + data_out += stride_p; + } + data_this += stride_n; + } + END_OUTER_LOOP +} + +/**end repeat**/ + +char *cumsum_signature = "(i)->(i)"; + +/* + * This implements the function + * out[n] = sum_i^n in[i] + */ + +/**begin repeat + + #TYPE=LONG,DOUBLE# + #typ=npy_long,npy_double# +*/ + +static void +@TYPE@_cumsum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + INIT_OUTER_LOOP_2 + npy_intp di = dimensions[0]; + npy_intp i; + npy_intp is=steps[0], os=steps[1]; + BEGIN_OUTER_LOOP_2 + char *ip=args[0], *op=args[1]; + @typ@ cumsum = 0; + for (i = 0; i < di; i++, ip += is, op += os) { + cumsum += (*(@typ@ *)ip); + *(@typ@ *)op = cumsum; + } + END_OUTER_LOOP +} + +/**end repeat**/ + +/* The following lines were generated using a slightly modified + version of code_generators/generate_umath.py and adding these + lines to defdict: + +defdict = { +'inner1d' : + Ufunc(2, 1, None_, + r'''inner on the last dimension and broadcast on the rest \n" + " \"(i),(i)->()\" \n''', + TD('ld'), + ), +'innerwt' : + Ufunc(3, 1, None_, + r'''inner1d with a weight argument \n" + " \"(i),(i),(i)->()\" \n''', + TD('ld'), + ), +} + +*/ + +static PyUFuncGenericFunction inner1d_functions[] = { LONG_inner1d, DOUBLE_inner1d }; +static void *inner1d_data[] = { (void *)NULL, (void *)NULL }; +static char inner1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE }; +static PyUFuncGenericFunction innerwt_functions[] = { LONG_innerwt, DOUBLE_innerwt }; +static void *innerwt_data[] = { (void *)NULL, (void *)NULL }; +static char innerwt_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE }; +static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multiply, FLOAT_matrix_multiply, DOUBLE_matrix_multiply }; +static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL }; +static char matrix_multiply_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE }; +static PyUFuncGenericFunction cross1d_functions[] = { LONG_cross1d, DOUBLE_cross1d }; +static void *cross1d_data[] = { (void *)NULL, (void *)NULL }; +static char cross1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE }; +static PyUFuncGenericFunction euclidean_pdist_functions[] = + { FLOAT_euclidean_pdist, DOUBLE_euclidean_pdist }; +static void *eucldiean_pdist_data[] = { (void *)NULL, (void *)NULL }; +static char euclidean_pdist_signatures[] = { NPY_FLOAT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE }; + +static PyUFuncGenericFunction cumsum_functions[] = { LONG_cumsum, DOUBLE_cumsum }; +static void *cumsum_data[] = { (void *)NULL, (void *)NULL }; +static char cumsum_signatures[] = { NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE }; + + +static int +addUfuncs(PyObject *dictionary) { + PyObject *f; + + f = PyUFunc_FromFuncAndDataAndSignature(inner1d_functions, inner1d_data, + inner1d_signatures, 2, 2, 1, PyUFunc_None, "inner1d", + "inner on the last dimension and broadcast on the rest \n" + " \"(i),(i)->()\" \n", + 0, inner1d_signature); + /* + * yes, this should not happen, but I (MHvK) just spent an hour looking at + * segfaults because I screwed up something that seemed totally unrelated. + */ + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "inner1d", f); + Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature(innerwt_functions, innerwt_data, + innerwt_signatures, 2, 3, 1, PyUFunc_None, "innerwt", + "inner1d with a weight argument \n" + " \"(i),(i),(i)->()\" \n", + 0, innerwt_signature); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "innerwt", f); + Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature(matrix_multiply_functions, + matrix_multiply_data, matrix_multiply_signatures, + 3, 2, 1, PyUFunc_None, "matrix_multiply", + "matrix multiplication on last two dimensions \n" + " \"(m,n),(n,p)->(m,p)\" \n", + 0, matrix_multiply_signature); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "matrix_multiply", f); + Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature(matrix_multiply_functions, + matrix_multiply_data, matrix_multiply_signatures, + 3, 2, 1, PyUFunc_None, "matmul", + "matmul on last two dimensions, with some being optional\n" + " \"(m?,n),(n,p?)->(m?,p?)\" \n", + 0, matmul_signature); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "matmul", f); + Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature(euclidean_pdist_functions, + eucldiean_pdist_data, euclidean_pdist_signatures, + 2, 1, 1, PyUFunc_None, "euclidean_pdist", + "pairwise euclidean distance on last two dimensions \n" + " \"(n,d)->(p)\" \n", + 0, euclidean_pdist_signature); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "euclidean_pdist", f); + Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature(cumsum_functions, + cumsum_data, cumsum_signatures, + 2, 1, 1, PyUFunc_None, "cumsum", + "Cumulative sum of the input (n)->(n)\n", + 0, cumsum_signature); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "cumsum", f); + Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature(inner1d_functions, inner1d_data, + inner1d_signatures, 2, 2, 1, PyUFunc_None, "inner1d_no_doc", + NULL, + 0, inner1d_signature); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "inner1d_no_doc", f); + Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature(cross1d_functions, cross1d_data, + cross1d_signatures, 2, 2, 1, PyUFunc_None, "cross1d", + "cross product on the last dimension and broadcast on the rest \n"\ + " \"(3),(3)->(3)\" \n", + 0, cross1d_signature); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "cross1d", f); + Py_DECREF(f); + + return 0; +} + + +static PyObject * +UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args) +{ + int nin, nout, i; + PyObject *signature=NULL, *sig_str=NULL; + PyUFuncObject *f=NULL; + PyObject *core_num_dims=NULL, *core_dim_ixs=NULL; + PyObject *core_dim_flags=NULL, *core_dim_sizes=NULL; + int core_enabled; + int core_num_ixs = 0; + + if (!PyArg_ParseTuple(args, "iiO", &nin, &nout, &signature)) { + return NULL; + } + + if (PyString_Check(signature)) { + sig_str = signature; + } else if (PyUnicode_Check(signature)) { + sig_str = PyUnicode_AsUTF8String(signature); + } else { + PyErr_SetString(PyExc_ValueError, "signature should be a string"); + return NULL; + } + + f = (PyUFuncObject*)PyUFunc_FromFuncAndDataAndSignature( + NULL, NULL, NULL, + 0, nin, nout, PyUFunc_None, "no name", + "doc:none", + 1, PyString_AS_STRING(sig_str)); + if (sig_str != signature) { + Py_DECREF(sig_str); + } + if (f == NULL) { + return NULL; + } + core_enabled = f->core_enabled; + /* + * Don't presume core_num_dims and core_dim_ixs are defined; + * they currently are even if core_enabled=0, but there's no real + * reason they should be. So avoid segfaults if we change our mind. + */ + if (f->core_num_dims != NULL) { + core_num_dims = PyTuple_New(f->nargs); + if (core_num_dims == NULL) { + goto fail; + } + for (i = 0; i < f->nargs; i++) { + PyObject *val = PyLong_FromLong(f->core_num_dims[i]); + PyTuple_SET_ITEM(core_num_dims, i, val); + core_num_ixs += f->core_num_dims[i]; + } + } + else { + Py_INCREF(Py_None); + core_num_dims = Py_None; + } + if (f->core_dim_ixs != NULL) { + core_dim_ixs = PyTuple_New(core_num_ixs); + if (core_num_dims == NULL) { + goto fail; + } + for (i = 0; i < core_num_ixs; i++) { + PyObject *val = PyLong_FromLong(f->core_dim_ixs[i]); + PyTuple_SET_ITEM(core_dim_ixs, i, val); + } + } + else { + Py_INCREF(Py_None); + core_dim_ixs = Py_None; + } + if (f->core_dim_flags != NULL) { + core_dim_flags = PyTuple_New(f->core_num_dim_ix); + if (core_dim_flags == NULL) { + goto fail; + } + for (i = 0; i < f->core_num_dim_ix; i++) { + PyObject *val = PyLong_FromLong(f->core_dim_flags[i]); + PyTuple_SET_ITEM(core_dim_flags, i, val); + } + } + else { + Py_INCREF(Py_None); + core_dim_flags = Py_None; + } + if (f->core_dim_sizes != NULL) { + core_dim_sizes = PyTuple_New(f->core_num_dim_ix); + if (core_dim_sizes == NULL) { + goto fail; + } + for (i = 0; i < f->core_num_dim_ix; i++) { + PyObject *val = PyLong_FromLong(f->core_dim_sizes[i]); + PyTuple_SET_ITEM(core_dim_sizes, i, val); + } + } + else { + Py_INCREF(Py_None); + core_dim_sizes = Py_None; + } + Py_DECREF(f); + return Py_BuildValue("iNNNN", core_enabled, core_num_dims, + core_dim_ixs, core_dim_flags, core_dim_sizes); + +fail: + Py_XDECREF(f); + Py_XDECREF(core_num_dims); + Py_XDECREF(core_dim_ixs); + Py_XDECREF(core_dim_flags); + Py_XDECREF(core_dim_sizes); + return NULL; +} + +static PyMethodDef UMath_TestsMethods[] = { + {"test_signature", UMath_Tests_test_signature, METH_VARARGS, + "Test signature parsing of ufunc. \n" + "Arguments: nin nout signature \n" + "If fails, it returns NULL. Otherwise it returns a tuple of ufunc " + "internals. \n", + }, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +#if defined(NPY_PY3K) +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_umath_tests", + NULL, + -1, + UMath_TestsMethods, + NULL, + NULL, + NULL, + NULL +}; +#endif + +/* Initialization function for the module */ +#if defined(NPY_PY3K) +#define RETVAL(x) x +PyMODINIT_FUNC PyInit__umath_tests(void) { +#else +#define RETVAL(x) +PyMODINIT_FUNC init_umath_tests(void) { +#endif + PyObject *m; + PyObject *d; + PyObject *version; + +#if defined(NPY_PY3K) + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule("_umath_tests", UMath_TestsMethods); +#endif + if (m == NULL) { + return RETVAL(NULL); + } + + import_array(); + import_ufunc(); + + d = PyModule_GetDict(m); + + version = PyString_FromString("0.1"); + PyDict_SetItemString(d, "__version__", version); + Py_DECREF(version); + + /* Load the ufunc operators into the module's namespace */ + if (addUfuncs(d) < 0) { + Py_DECREF(m); + PyErr_Print(); + PyErr_SetString(PyExc_RuntimeError, + "cannot load _umath_tests module."); + return RETVAL(NULL); + } + + return RETVAL(m); +} diff --git a/contrib/python/numpy/py2/numpy/core/src/umath/funcs.inc.src b/contrib/python/numpy/py2/numpy/core/src/umath/funcs.inc.src new file mode 100644 index 0000000000..da2ab07f8b --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/umath/funcs.inc.src @@ -0,0 +1,432 @@ +/* -*- c -*- */ + +/* + * This file is for the definitions of the non-c99 functions used in ufuncs. + * All the complex ufuncs are defined here along with a smattering of real and + * object functions. + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#include "npy_pycompat.h" +#include "npy_import.h" + + +/* + ***************************************************************************** + ** PYTHON OBJECT FUNCTIONS ** + ***************************************************************************** + */ + +static PyObject * +Py_square(PyObject *o) +{ + return PyNumber_Multiply(o, o); +} + +static PyObject * +Py_get_one(PyObject *NPY_UNUSED(o)) +{ + return PyInt_FromLong(1); +} + +static PyObject * +Py_reciprocal(PyObject *o) +{ + PyObject *one = PyInt_FromLong(1); + PyObject *result; + + if (!one) { + return NULL; + } +#if defined(NPY_PY3K) + result = PyNumber_TrueDivide(one, o); +#else + result = PyNumber_Divide(one, o); +#endif + Py_DECREF(one); + return result; +} + +/* + * Define numpy version of PyNumber_Power as binary function. + */ +static PyObject * +npy_ObjectPower(PyObject *x, PyObject *y) +{ + return PyNumber_Power(x, y, Py_None); +} + +/**begin repeat + * #Kind = Max, Min# + * #OP = Py_GE, Py_LE# + */ +static PyObject * +npy_Object@Kind@(PyObject *i1, PyObject *i2) +{ + PyObject *result; + int cmp; + + cmp = PyObject_RichCompareBool(i1, i2, @OP@); + if (cmp < 0) { + return NULL; + } + if (cmp == 1) { + result = i1; + } + else { + result = i2; + } + Py_INCREF(result); + return result; +} +/**end repeat**/ + +/* Emulates Python's 'a or b' behavior */ +static PyObject * +npy_ObjectLogicalOr(PyObject *i1, PyObject *i2) +{ + if (i1 == NULL) { + Py_XINCREF(i2); + return i2; + } + else if (i2 == NULL) { + Py_INCREF(i1); + return i1; + } + else { + int retcode = PyObject_IsTrue(i1); + if (retcode == -1) { + return NULL; + } + else if (retcode) { + Py_INCREF(i1); + return i1; + } + else { + Py_INCREF(i2); + return i2; + } + } +} + +/* Emulates Python's 'a and b' behavior */ +static PyObject * +npy_ObjectLogicalAnd(PyObject *i1, PyObject *i2) +{ + if (i1 == NULL) { + return NULL; + } + else if (i2 == NULL) { + return NULL; + } + else { + int retcode = PyObject_IsTrue(i1); + if (retcode == -1) { + return NULL; + } + else if (!retcode) { + Py_INCREF(i1); + return i1; + } + else { + Py_INCREF(i2); + return i2; + } + } +} + + +/* Emulates Python's 'not b' behavior */ +static PyObject * +npy_ObjectLogicalNot(PyObject *i1) +{ + if (i1 == NULL) { + return NULL; + } + else { + int retcode = PyObject_Not(i1); + if (retcode == -1) { + return NULL; + } + else if (retcode) { + Py_INCREF(Py_True); + return Py_True; + } + else { + Py_INCREF(Py_False); + return Py_False; + } + } +} + +static PyObject * +npy_ObjectGCD(PyObject *i1, PyObject *i2) +{ + PyObject *gcd = NULL; + + /* use math.gcd if available, and valid on the provided types */ +#if PY_VERSION_HEX >= 0x03050000 + { + static PyObject *math_gcd_func = NULL; + + npy_cache_import("math", "gcd", &math_gcd_func); + if (math_gcd_func == NULL) { + return NULL; + } + gcd = PyObject_CallFunction(math_gcd_func, "OO", i1, i2); + if (gcd != NULL) { + return gcd; + } + /* silence errors, and fall back on pure-python gcd */ + PyErr_Clear(); + } +#endif + + /* otherwise, use our internal one, written in python */ + { + static PyObject *internal_gcd_func = NULL; + + npy_cache_import("numpy.core._internal", "_gcd", &internal_gcd_func); + if (internal_gcd_func == NULL) { + return NULL; + } + gcd = PyObject_CallFunction(internal_gcd_func, "OO", i1, i2); + if (gcd == NULL) { + return NULL; + } + /* _gcd has some unusual behaviour regarding sign */ + return PyNumber_Absolute(gcd); + } +} + +static PyObject * +npy_ObjectLCM(PyObject *i1, PyObject *i2) +{ + /* lcm(a, b) = abs(a // gcd(a, b) * b) */ + + PyObject *gcd = npy_ObjectGCD(i1, i2); + PyObject *tmp; + if(gcd == NULL) { + return NULL; + } + /* Floor divide preserves integer types - we know the division will have + * no remainder + */ + tmp = PyNumber_FloorDivide(i1, gcd); + if(tmp == NULL) { + return NULL; + } + + tmp = PyNumber_Multiply(tmp, i2); + if(tmp == NULL) { + return NULL; + } + + /* even though we fix gcd to be positive, we need to do it again here */ + return PyNumber_Absolute(tmp); +} + +/* + ***************************************************************************** + ** COMPLEX FUNCTIONS ** + ***************************************************************************** + */ + + +/* + * Don't pass structures between functions (only pointers) because how + * structures are passed is compiler dependent and could cause segfaults if + * umath_ufunc_object.inc is compiled with a different compiler than an + * extension that makes use of the UFUNC API + */ + +/**begin repeat + * + * #ctype = npy_cfloat, npy_cdouble, npy_clongdouble# + * #ftype = npy_float, npy_double, npy_longdouble# + * #c = f, ,l# + */ + +static void +nc_neg@c@(@ctype@ *a, @ctype@ *r) +{ + r->real = -a->real; + r->imag = -a->imag; + return; +} + +static void +nc_pos@c@(@ctype@ *a, @ctype@ *r) +{ + r->real = +a->real; + r->imag = +a->imag; + return; +} + +static void +nc_sqrt@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_csqrt@c@(*x); + return; +} + +static void +nc_rint@c@(@ctype@ *x, @ctype@ *r) +{ + r->real = npy_rint@c@(x->real); + r->imag = npy_rint@c@(x->imag); +} + +static void +nc_log@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_clog@c@(*x); + return; +} + +static void +nc_log1p@c@(@ctype@ *x, @ctype@ *r) +{ + @ftype@ l = npy_hypot@c@(x->real + 1,x->imag); + r->imag = npy_atan2@c@(x->imag, x->real + 1); + r->real = npy_log@c@(l); + return; +} + +static void +nc_exp@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_cexp@c@(*x); + return; +} + +static void +nc_exp2@c@(@ctype@ *x, @ctype@ *r) +{ + @ctype@ a; + a.real = x->real*NPY_LOGE2@c@; + a.imag = x->imag*NPY_LOGE2@c@; + nc_exp@c@(&a, r); + return; +} + +static void +nc_expm1@c@(@ctype@ *x, @ctype@ *r) +{ + @ftype@ a = npy_exp@c@(x->real); + r->real = a*npy_cos@c@(x->imag) - 1.0@c@; + r->imag = a*npy_sin@c@(x->imag); + return; +} + +static void +nc_pow@c@(@ctype@ *a, @ctype@ *b, @ctype@ *r) +{ + *r = npy_cpow@c@(*a, *b); + return; +} + +static void +nc_acos@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_cacos@c@(*x); + return; +} + +static void +nc_acosh@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_cacosh@c@(*x); + return; +} + +static void +nc_asin@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_casin@c@(*x); + return; +} + + +static void +nc_asinh@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_casinh@c@(*x); + return; +} + +static void +nc_atan@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_catan@c@(*x); + return; +} + +static void +nc_atanh@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_catanh@c@(*x); + return; +} + +static void +nc_cos@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_ccos@c@(*x); + return; +} + +static void +nc_cosh@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_ccosh@c@(*x); + return; +} + +static void +nc_log10@c@(@ctype@ *x, @ctype@ *r) +{ + nc_log@c@(x, r); + r->real *= NPY_LOG10E@c@; + r->imag *= NPY_LOG10E@c@; + return; +} + +static void +nc_log2@c@(@ctype@ *x, @ctype@ *r) +{ + nc_log@c@(x, r); + r->real *= NPY_LOG2E@c@; + r->imag *= NPY_LOG2E@c@; + return; +} + +static void +nc_sin@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_csin@c@(*x); + return; +} + +static void +nc_sinh@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_csinh@c@(*x); + return; +} + +static void +nc_tan@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_ctan@c@(*x); + return; +} + +static void +nc_tanh@c@(@ctype@ *x, @ctype@ *r) +{ + *r = npy_ctanh@c@(*x); + return; +} + +/**end repeat**/ diff --git a/contrib/python/numpy/py2/numpy/core/src/umath/loops.c.src b/contrib/python/numpy/py2/numpy/core/src/umath/loops.c.src new file mode 100644 index 0000000000..975a5e6b83 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/umath/loops.c.src @@ -0,0 +1,2988 @@ +/* -*- c -*- */ + +#define _UMATHMODULE +#define _MULTIARRAYMODULE +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "Python.h" + +#include "npy_config.h" +#include "numpy/npy_common.h" +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" +#include "numpy/npy_math.h" +#include "numpy/halffloat.h" +#include "lowlevel_strided_loops.h" + +#include "npy_pycompat.h" + +#include "ufunc_object.h" + +#include <string.h> /* for memchr */ + +/* + * cutoff blocksize for pairwise summation + * decreasing it decreases errors slightly as more pairs are summed but + * also lowers performance, as the inner loop is unrolled eight times it is + * effectively 16 + */ +#define PW_BLOCKSIZE 128 + + +/* + * largest simd vector size in bytes numpy supports + * it is currently a extremely large value as it is only used for memory + * overlap checks + */ +#ifndef NPY_MAX_SIMD_SIZE +#define NPY_MAX_SIMD_SIZE 1024 +#endif + +/* + * include vectorized functions and dispatchers + * this file is safe to include also for generic builds + * platform specific instructions are either masked via the proprocessor or + * runtime detected + */ +#include "simd.inc" + + +/* + ***************************************************************************** + ** UFUNC LOOPS ** + ***************************************************************************** + */ + +/* unary loop input and output contiguous */ +#define IS_UNARY_CONT(tin, tout) (steps[0] == sizeof(tin) && \ + steps[1] == sizeof(tout)) + +#define IS_BINARY_REDUCE ((args[0] == args[2])\ + && (steps[0] == steps[2])\ + && (steps[0] == 0)) + +/* binary loop input and output contiguous */ +#define IS_BINARY_CONT(tin, tout) (steps[0] == sizeof(tin) && \ + steps[1] == sizeof(tin) && \ + steps[2] == sizeof(tout)) +/* binary loop input and output contiguous with first scalar */ +#define IS_BINARY_CONT_S1(tin, tout) (steps[0] == 0 && \ + steps[1] == sizeof(tin) && \ + steps[2] == sizeof(tout)) +/* binary loop input and output contiguous with second scalar */ +#define IS_BINARY_CONT_S2(tin, tout) (steps[0] == sizeof(tin) && \ + steps[1] == 0 && \ + steps[2] == sizeof(tout)) + +#define OUTPUT_LOOP\ + char *op1 = args[1];\ + npy_intp os1 = steps[1];\ + npy_intp n = dimensions[0];\ + npy_intp i;\ + for(i = 0; i < n; i++, op1 += os1) + +#define UNARY_LOOP\ + char *ip1 = args[0], *op1 = args[1];\ + npy_intp is1 = steps[0], os1 = steps[1];\ + npy_intp n = dimensions[0];\ + npy_intp i;\ + for(i = 0; i < n; i++, ip1 += is1, op1 += os1) + +/* + * loop with contiguous specialization + * op should be the code working on `tin in` and + * storing the result in `tout * out` + * combine with NPY_GCC_OPT_3 to allow autovectorization + * should only be used where its worthwhile to avoid code bloat + */ +#define BASE_UNARY_LOOP(tin, tout, op) \ + UNARY_LOOP { \ + const tin in = *(tin *)ip1; \ + tout * out = (tout *)op1; \ + op; \ + } +#define UNARY_LOOP_FAST(tin, tout, op) \ + do { \ + /* condition allows compiler to optimize the generic macro */ \ + if (IS_UNARY_CONT(tin, tout)) { \ + if (args[0] == args[1]) { \ + BASE_UNARY_LOOP(tin, tout, op) \ + } \ + else { \ + BASE_UNARY_LOOP(tin, tout, op) \ + } \ + } \ + else { \ + BASE_UNARY_LOOP(tin, tout, op) \ + } \ + } \ + while (0) + +#define UNARY_LOOP_TWO_OUT\ + char *ip1 = args[0], *op1 = args[1], *op2 = args[2];\ + npy_intp is1 = steps[0], os1 = steps[1], os2 = steps[2];\ + npy_intp n = dimensions[0];\ + npy_intp i;\ + for(i = 0; i < n; i++, ip1 += is1, op1 += os1, op2 += os2) + +#define BINARY_LOOP\ + char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];\ + npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\ + npy_intp n = dimensions[0];\ + npy_intp i;\ + for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1) + +/* + * loop with contiguous specialization + * op should be the code working on `tin in1`, `tin in2` and + * storing the result in `tout * out` + * combine with NPY_GCC_OPT_3 to allow autovectorization + * should only be used where its worthwhile to avoid code bloat + */ +#define BASE_BINARY_LOOP(tin, tout, op) \ + BINARY_LOOP { \ + const tin in1 = *(tin *)ip1; \ + const tin in2 = *(tin *)ip2; \ + tout * out = (tout *)op1; \ + op; \ + } +/* + * unfortunately gcc 6/7 regressed and we need to give it additional hints to + * vectorize inplace operations (PR80198) + * must only be used after op1 == ip1 or ip2 has been checked + * TODO: using ivdep might allow other compilers to vectorize too + */ +#if __GNUC__ >= 6 +#define IVDEP_LOOP _Pragma("GCC ivdep") +#else +#define IVDEP_LOOP +#endif +#define BASE_BINARY_LOOP_INP(tin, tout, op) \ + char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];\ + npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\ + npy_intp n = dimensions[0];\ + npy_intp i;\ + IVDEP_LOOP \ + for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1) { \ + const tin in1 = *(tin *)ip1; \ + const tin in2 = *(tin *)ip2; \ + tout * out = (tout *)op1; \ + op; \ + } +#define BASE_BINARY_LOOP_S(tin, tout, cin, cinp, vin, vinp, op) \ + const tin cin = *(tin *)cinp; \ + BINARY_LOOP { \ + const tin vin = *(tin *)vinp; \ + tout * out = (tout *)op1; \ + op; \ + } +/* PR80198 again, scalar works without the pragma */ +#define BASE_BINARY_LOOP_S_INP(tin, tout, cin, cinp, vin, vinp, op) \ + const tin cin = *(tin *)cinp; \ + BINARY_LOOP { \ + const tin vin = *(tin *)vinp; \ + tout * out = (tout *)vinp; \ + op; \ + } +#define BINARY_LOOP_FAST(tin, tout, op) \ + do { \ + /* condition allows compiler to optimize the generic macro */ \ + if (IS_BINARY_CONT(tin, tout)) { \ + if (abs_ptrdiff(args[2], args[0]) == 0 && \ + abs_ptrdiff(args[2], args[1]) >= NPY_MAX_SIMD_SIZE) { \ + BASE_BINARY_LOOP_INP(tin, tout, op) \ + } \ + else if (abs_ptrdiff(args[2], args[1]) == 0 && \ + abs_ptrdiff(args[2], args[0]) >= NPY_MAX_SIMD_SIZE) { \ + BASE_BINARY_LOOP_INP(tin, tout, op) \ + } \ + else { \ + BASE_BINARY_LOOP(tin, tout, op) \ + } \ + } \ + else if (IS_BINARY_CONT_S1(tin, tout)) { \ + if (abs_ptrdiff(args[2], args[1]) == 0) { \ + BASE_BINARY_LOOP_S_INP(tin, tout, in1, args[0], in2, ip2, op) \ + } \ + else { \ + BASE_BINARY_LOOP_S(tin, tout, in1, args[0], in2, ip2, op) \ + } \ + } \ + else if (IS_BINARY_CONT_S2(tin, tout)) { \ + if (abs_ptrdiff(args[2], args[0]) == 0) { \ + BASE_BINARY_LOOP_S_INP(tin, tout, in2, args[1], in1, ip1, op) \ + } \ + else { \ + BASE_BINARY_LOOP_S(tin, tout, in2, args[1], in1, ip1, op) \ + }\ + } \ + else { \ + BASE_BINARY_LOOP(tin, tout, op) \ + } \ + } \ + while (0) + +#define BINARY_REDUCE_LOOP_INNER\ + char *ip2 = args[1]; \ + npy_intp is2 = steps[1]; \ + npy_intp n = dimensions[0]; \ + npy_intp i; \ + for(i = 0; i < n; i++, ip2 += is2) + +#define BINARY_REDUCE_LOOP(TYPE)\ + char *iop1 = args[0]; \ + TYPE io1 = *(TYPE *)iop1; \ + BINARY_REDUCE_LOOP_INNER + +#define BINARY_LOOP_TWO_OUT\ + char *ip1 = args[0], *ip2 = args[1], *op1 = args[2], *op2 = args[3];\ + npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2], os2 = steps[3];\ + npy_intp n = dimensions[0];\ + npy_intp i;\ + for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1, op2 += os2) + +/****************************************************************************** + ** GENERIC FLOAT LOOPS ** + *****************************************************************************/ + + +typedef float halfUnaryFunc(npy_half x); +typedef float floatUnaryFunc(float x); +typedef double doubleUnaryFunc(double x); +typedef npy_longdouble longdoubleUnaryFunc(npy_longdouble x); +typedef npy_half halfBinaryFunc(npy_half x, npy_half y); +typedef float floatBinaryFunc(float x, float y); +typedef double doubleBinaryFunc(double x, double y); +typedef npy_longdouble longdoubleBinaryFunc(npy_longdouble x, npy_longdouble y); + + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_e_e(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + halfUnaryFunc *f = (halfUnaryFunc *)func; + UNARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + *(npy_half *)op1 = f(in1); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_e_e_As_f_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + floatUnaryFunc *f = (floatUnaryFunc *)func; + UNARY_LOOP { + const float in1 = npy_half_to_float(*(npy_half *)ip1); + *(npy_half *)op1 = npy_float_to_half(f(in1)); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_e_e_As_d_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + doubleUnaryFunc *f = (doubleUnaryFunc *)func; + UNARY_LOOP { + const double in1 = npy_half_to_double(*(npy_half *)ip1); + *(npy_half *)op1 = npy_double_to_half(f(in1)); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_f_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + floatUnaryFunc *f = (floatUnaryFunc *)func; + UNARY_LOOP { + const float in1 = *(float *)ip1; + *(float *)op1 = f(in1); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_f_f_As_d_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + doubleUnaryFunc *f = (doubleUnaryFunc *)func; + UNARY_LOOP { + const float in1 = *(float *)ip1; + *(float *)op1 = (float)f((double)in1); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_ee_e(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + halfBinaryFunc *f = (halfBinaryFunc *)func; + BINARY_LOOP { + npy_half in1 = *(npy_half *)ip1; + npy_half in2 = *(npy_half *)ip2; + *(npy_half *)op1 = f(in1, in2); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_ee_e_As_ff_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + floatBinaryFunc *f = (floatBinaryFunc *)func; + BINARY_LOOP { + float in1 = npy_half_to_float(*(npy_half *)ip1); + float in2 = npy_half_to_float(*(npy_half *)ip2); + *(npy_half *)op1 = npy_float_to_half(f(in1, in2)); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_ee_e_As_dd_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + doubleBinaryFunc *f = (doubleBinaryFunc *)func; + BINARY_LOOP { + double in1 = npy_half_to_double(*(npy_half *)ip1); + double in2 = npy_half_to_double(*(npy_half *)ip2); + *(npy_half *)op1 = npy_double_to_half(f(in1, in2)); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_ff_f(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + floatBinaryFunc *f = (floatBinaryFunc *)func; + BINARY_LOOP { + float in1 = *(float *)ip1; + float in2 = *(float *)ip2; + *(float *)op1 = f(in1, in2); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_ff_f_As_dd_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + doubleBinaryFunc *f = (doubleBinaryFunc *)func; + BINARY_LOOP { + float in1 = *(float *)ip1; + float in2 = *(float *)ip2; + *(float *)op1 = (double)f((double)in1, (double)in2); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_d_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + doubleUnaryFunc *f = (doubleUnaryFunc *)func; + UNARY_LOOP { + double in1 = *(double *)ip1; + *(double *)op1 = f(in1); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_dd_d(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + doubleBinaryFunc *f = (doubleBinaryFunc *)func; + BINARY_LOOP { + double in1 = *(double *)ip1; + double in2 = *(double *)ip2; + *(double *)op1 = f(in1, in2); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_g_g(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + longdoubleUnaryFunc *f = (longdoubleUnaryFunc *)func; + UNARY_LOOP { + npy_longdouble in1 = *(npy_longdouble *)ip1; + *(npy_longdouble *)op1 = f(in1); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_gg_g(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + longdoubleBinaryFunc *f = (longdoubleBinaryFunc *)func; + BINARY_LOOP { + npy_longdouble in1 = *(npy_longdouble *)ip1; + npy_longdouble in2 = *(npy_longdouble *)ip2; + *(npy_longdouble *)op1 = f(in1, in2); + } +} + + + +/****************************************************************************** + ** GENERIC COMPLEX LOOPS ** + *****************************************************************************/ + + +typedef void cdoubleUnaryFunc(npy_cdouble *x, npy_cdouble *r); +typedef void cfloatUnaryFunc(npy_cfloat *x, npy_cfloat *r); +typedef void clongdoubleUnaryFunc(npy_clongdouble *x, npy_clongdouble *r); +typedef void cdoubleBinaryFunc(npy_cdouble *x, npy_cdouble *y, npy_cdouble *r); +typedef void cfloatBinaryFunc(npy_cfloat *x, npy_cfloat *y, npy_cfloat *r); +typedef void clongdoubleBinaryFunc(npy_clongdouble *x, npy_clongdouble *y, + npy_clongdouble *r); + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_F_F(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + cfloatUnaryFunc *f = (cfloatUnaryFunc *)func; + UNARY_LOOP { + npy_cfloat in1 = *(npy_cfloat *)ip1; + npy_cfloat *out = (npy_cfloat *)op1; + f(&in1, out); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_F_F_As_D_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + cdoubleUnaryFunc *f = (cdoubleUnaryFunc *)func; + UNARY_LOOP { + npy_cdouble tmp, out; + tmp.real = (double)((float *)ip1)[0]; + tmp.imag = (double)((float *)ip1)[1]; + f(&tmp, &out); + ((float *)op1)[0] = (float)out.real; + ((float *)op1)[1] = (float)out.imag; + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_FF_F(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + cfloatBinaryFunc *f = (cfloatBinaryFunc *)func; + BINARY_LOOP { + npy_cfloat in1 = *(npy_cfloat *)ip1; + npy_cfloat in2 = *(npy_cfloat *)ip2; + npy_cfloat *out = (npy_cfloat *)op1; + f(&in1, &in2, out); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_FF_F_As_DD_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + cdoubleBinaryFunc *f = (cdoubleBinaryFunc *)func; + BINARY_LOOP { + npy_cdouble tmp1, tmp2, out; + tmp1.real = (double)((float *)ip1)[0]; + tmp1.imag = (double)((float *)ip1)[1]; + tmp2.real = (double)((float *)ip2)[0]; + tmp2.imag = (double)((float *)ip2)[1]; + f(&tmp1, &tmp2, &out); + ((float *)op1)[0] = (float)out.real; + ((float *)op1)[1] = (float)out.imag; + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_D_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + cdoubleUnaryFunc *f = (cdoubleUnaryFunc *)func; + UNARY_LOOP { + npy_cdouble in1 = *(npy_cdouble *)ip1; + npy_cdouble *out = (npy_cdouble *)op1; + f(&in1, out); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_DD_D(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + cdoubleBinaryFunc *f = (cdoubleBinaryFunc *)func; + BINARY_LOOP { + npy_cdouble in1 = *(npy_cdouble *)ip1; + npy_cdouble in2 = *(npy_cdouble *)ip2; + npy_cdouble *out = (npy_cdouble *)op1; + f(&in1, &in2, out); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_G_G(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + clongdoubleUnaryFunc *f = (clongdoubleUnaryFunc *)func; + UNARY_LOOP { + npy_clongdouble in1 = *(npy_clongdouble *)ip1; + npy_clongdouble *out = (npy_clongdouble *)op1; + f(&in1, out); + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_GG_G(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + clongdoubleBinaryFunc *f = (clongdoubleBinaryFunc *)func; + BINARY_LOOP { + npy_clongdouble in1 = *(npy_clongdouble *)ip1; + npy_clongdouble in2 = *(npy_clongdouble *)ip2; + npy_clongdouble *out = (npy_clongdouble *)op1; + f(&in1, &in2, out); + } +} + + +/****************************************************************************** + ** GENERIC OBJECT lOOPS ** + *****************************************************************************/ + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_O_O(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + unaryfunc f = (unaryfunc)func; + UNARY_LOOP { + PyObject *in1 = *(PyObject **)ip1; + PyObject **out = (PyObject **)op1; + PyObject *ret = f(in1 ? in1 : Py_None); + if (ret == NULL) { + return; + } + Py_XDECREF(*out); + *out = ret; + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_O_O_method(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + char *meth = (char *)func; + UNARY_LOOP { + PyObject *in1 = *(PyObject **)ip1; + PyObject **out = (PyObject **)op1; + PyObject *ret = PyObject_CallMethod(in1 ? in1 : Py_None, meth, NULL); + if (ret == NULL) { + return; + } + Py_XDECREF(*out); + *out = ret; + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_OO_O(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + binaryfunc f = (binaryfunc)func; + BINARY_LOOP { + PyObject *in1 = *(PyObject **)ip1; + PyObject *in2 = *(PyObject **)ip2; + PyObject **out = (PyObject **)op1; + PyObject *ret = f(in1 ? in1 : Py_None, in2 ? in2 : Py_None); + if (ret == NULL) { + return; + } + Py_XDECREF(*out); + *out = ret; + } +} + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_OO_O_method(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + char *meth = (char *)func; + BINARY_LOOP { + PyObject *in1 = *(PyObject **)ip1; + PyObject *in2 = *(PyObject **)ip2; + PyObject **out = (PyObject **)op1; + PyObject *ret = PyObject_CallMethod(in1 ? in1 : Py_None, + meth, "(O)", in2); + if (ret == NULL) { + return; + } + Py_XDECREF(*out); + *out = ret; + } +} + +/* + * A general-purpose ufunc that deals with general-purpose Python callable. + * func is a structure with nin, nout, and a Python callable function + */ + +/*UFUNC_API*/ +NPY_NO_EXPORT void +PyUFunc_On_Om(char **args, npy_intp *dimensions, npy_intp *steps, void *func) +{ + npy_intp n = dimensions[0]; + PyUFunc_PyFuncData *data = (PyUFunc_PyFuncData *)func; + int nin = data->nin; + int nout = data->nout; + PyObject *tocall = data->callable; + char *ptrs[NPY_MAXARGS]; + PyObject *arglist, *result; + PyObject *in, **op; + npy_intp i, j, ntot; + + ntot = nin+nout; + + for(j = 0; j < ntot; j++) { + ptrs[j] = args[j]; + } + for(i = 0; i < n; i++) { + arglist = PyTuple_New(nin); + if (arglist == NULL) { + return; + } + for(j = 0; j < nin; j++) { + in = *((PyObject **)ptrs[j]); + if (in == NULL) { + in = Py_None; + } + PyTuple_SET_ITEM(arglist, j, in); + Py_INCREF(in); + } + result = PyEval_CallObject(tocall, arglist); + Py_DECREF(arglist); + if (result == NULL) { + return; + } + if (nout == 0 && result == Py_None) { + /* No output expected, no output received, continue */ + Py_DECREF(result); + } + else if (nout == 1) { + /* Single output expected, assign and continue */ + op = (PyObject **)ptrs[nin]; + Py_XDECREF(*op); + *op = result; + } + else if (PyTuple_Check(result) && nout == PyTuple_Size(result)) { + /* + * Multiple returns match expected number of outputs, assign + * and continue. Will also gobble empty tuples if nout == 0. + */ + for(j = 0; j < nout; j++) { + op = (PyObject **)ptrs[j+nin]; + Py_XDECREF(*op); + *op = PyTuple_GET_ITEM(result, j); + Py_INCREF(*op); + } + Py_DECREF(result); + } + else { + /* Mismatch between returns and expected outputs, exit */ + Py_DECREF(result); + return; + } + for(j = 0; j < ntot; j++) { + ptrs[j] += steps[j]; + } + } +} + +/* + ***************************************************************************** + ** BOOLEAN LOOPS ** + ***************************************************************************** + */ + +/**begin repeat + * #kind = equal, not_equal, greater, greater_equal, less, less_equal# + * #OP = ==, !=, >, >=, <, <=# + **/ + +NPY_NO_EXPORT void +BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + npy_bool in1 = *((npy_bool *)ip1) != 0; + npy_bool in2 = *((npy_bool *)ip2) != 0; + *((npy_bool *)op1)= in1 @OP@ in2; + } +} +/**end repeat**/ + + +/**begin repeat + * #kind = logical_and, logical_or# + * #OP = &&, ||# + * #SC = ==, !=# + * #and = 1, 0# + **/ + +NPY_NO_EXPORT void +BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if(IS_BINARY_REDUCE) { +#ifdef NPY_HAVE_SSE2_INTRINSICS + /* + * stick with our variant for more reliable performance, only known + * platform which outperforms it by ~20% is an i7 with glibc 2.17 + */ + if (run_reduce_simd_@kind@_BOOL(args, dimensions, steps)) { + return; + } +#else + /* for now only use libc on 32-bit/non-x86 */ + if (steps[1] == 1) { + npy_bool * op = (npy_bool *)args[0]; +#if @and@ + /* np.all(), search for a zero (false) */ + if (*op) { + *op = memchr(args[1], 0, dimensions[0]) == NULL; + } +#else + /* + * np.any(), search for a non-zero (true) via comparing against + * zero blocks, memcmp is faster than memchr on SSE4 machines + * with glibc >= 2.12 and memchr can only check for equal 1 + */ + static const npy_bool zero[4096]; /* zero by C standard */ + npy_uintp i, n = dimensions[0]; + + for (i = 0; !*op && i < n - (n % sizeof(zero)); i += sizeof(zero)) { + *op = memcmp(&args[1][i], zero, sizeof(zero)) != 0; + } + if (!*op && n - i > 0) { + *op = memcmp(&args[1][i], zero, n - i) != 0; + } +#endif + return; + } +#endif + else { + BINARY_REDUCE_LOOP(npy_bool) { + const npy_bool in2 = *(npy_bool *)ip2; + io1 = io1 @OP@ in2; + if (io1 @SC@ 0) { + break; + } + } + *((npy_bool *)iop1) = io1; + } + } + else { + if (run_binary_simd_@kind@_BOOL(args, dimensions, steps)) { + return; + } + else { + BINARY_LOOP { + const npy_bool in1 = *(npy_bool *)ip1; + const npy_bool in2 = *(npy_bool *)ip2; + *((npy_bool *)op1) = in1 @OP@ in2; + } + } + } +} +/**end repeat**/ + +/**begin repeat + * #kind = absolute, logical_not# + * #OP = !=, ==# + **/ +NPY_NO_EXPORT void +BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if (run_unary_simd_@kind@_BOOL(args, dimensions, steps)) { + return; + } + else { + UNARY_LOOP { + npy_bool in1 = *(npy_bool *)ip1; + *((npy_bool *)op1) = in1 @OP@ 0; + } + } +} +/**end repeat**/ + +NPY_NO_EXPORT void +BOOL__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + OUTPUT_LOOP { + *((npy_bool *)op1) = 1; + } +} + + +/* + ***************************************************************************** + ** INTEGER LOOPS + ***************************************************************************** + */ + +/**begin repeat + * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + * #ftype = npy_float, npy_float, npy_float, npy_float, npy_double, npy_double, + * npy_double, npy_double, npy_double, npy_double# + * #SIGNED = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0# + */ + +#define @TYPE@_floor_divide @TYPE@_divide +#define @TYPE@_fmax @TYPE@_maximum +#define @TYPE@_fmin @TYPE@_minimum + +NPY_NO_EXPORT void +@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + OUTPUT_LOOP { + *((@type@ *)op1) = 1; + } +} + +NPY_NO_EXPORT void +@TYPE@_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP_FAST(@type@, @type@, *out = +in); +} + +/**begin repeat1 + * #isa = , _avx2# + * #ISA = , AVX2# + * #CHK = 1, HAVE_ATTRIBUTE_TARGET_AVX2# + * #ATTR = , NPY_GCC_TARGET_AVX2# + */ + +#if @CHK@ +NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void +@TYPE@_square@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + UNARY_LOOP_FAST(@type@, @type@, *out = in * in); +} +#endif + +#if @CHK@ +NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void +@TYPE@_reciprocal@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + UNARY_LOOP_FAST(@type@, @type@, *out = 1.0 / in); +} +#endif + +#if @CHK@ +NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void +@TYPE@_conjugate@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP_FAST(@type@, @type@, *out = in); +} +#endif + +#if @CHK@ +NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void +@TYPE@_negative@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP_FAST(@type@, @type@, *out = -in); +} +#endif + +#if @CHK@ +NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void +@TYPE@_logical_not@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP_FAST(@type@, npy_bool, *out = !in); +} +#endif + +#if @CHK@ +NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void +@TYPE@_invert@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP_FAST(@type@, @type@, *out = ~in); +} +#endif + +/**begin repeat2 + * Arithmetic + * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor, + * left_shift, right_shift# + * #OP = +, -,*, &, |, ^, <<, >># + */ + +#if @CHK@ +NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void +@TYPE@_@kind@@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if(IS_BINARY_REDUCE) { + BINARY_REDUCE_LOOP(@type@) { + io1 @OP@= *(@type@ *)ip2; + } + *((@type@ *)iop1) = io1; + } + else { + BINARY_LOOP_FAST(@type@, @type@, *out = in1 @OP@ in2); + } +} +#endif + +/**end repeat2**/ + +/**begin repeat2 + * #kind = equal, not_equal, greater, greater_equal, less, less_equal, + * logical_and, logical_or# + * #OP = ==, !=, >, >=, <, <=, &&, ||# + */ + +#if @CHK@ +NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void +@TYPE@_@kind@@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* + * gcc vectorization of this is not good (PR60575) but manual integer + * vectorization is too tedious to be worthwhile + */ + BINARY_LOOP_FAST(@type@, npy_bool, *out = in1 @OP@ in2); +} +#endif + +/**end repeat2**/ + +#if @CHK@ +NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void +@TYPE@_logical_xor@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const int t1 = !!*(@type@ *)ip1; + const int t2 = !!*(@type@ *)ip2; + *((npy_bool *)op1) = (t1 != t2); + } +} +#endif + +/**end repeat1**/ + +/**begin repeat1 + * #kind = maximum, minimum# + * #OP = >, <# + **/ + +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if (IS_BINARY_REDUCE) { + BINARY_REDUCE_LOOP(@type@) { + const @type@ in2 = *(@type@ *)ip2; + io1 = (io1 @OP@ in2) ? io1 : in2; + } + *((@type@ *)iop1) = io1; + } + else { + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((@type@ *)op1) = (in1 @OP@ in2) ? in1 : in2; + } + } +} + +/**end repeat1**/ + +NPY_NO_EXPORT void +@TYPE@_power(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + @type@ in1 = *(@type@ *)ip1; + @type@ in2 = *(@type@ *)ip2; + @type@ out; + +#if @SIGNED@ + if (in2 < 0) { + NPY_ALLOW_C_API_DEF + NPY_ALLOW_C_API; + PyErr_SetString(PyExc_ValueError, + "Integers to negative integer powers are not allowed."); + NPY_DISABLE_C_API; + return; + } +#endif + if (in2 == 0) { + *((@type@ *)op1) = 1; + continue; + } + if (in1 == 1) { + *((@type@ *)op1) = 1; + continue; + } + + out = in2 & 1 ? in1 : 1; + in2 >>= 1; + while (in2 > 0) { + in1 *= in1; + if (in2 & 1) { + out *= in1; + } + in2 >>= 1; + } + *((@type@ *) op1) = out; + } +} + +NPY_NO_EXPORT void +@TYPE@_fmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + if (in2 == 0) { + npy_set_floatstatus_divbyzero(); + *((@type@ *)op1) = 0; + } + else { + *((@type@ *)op1)= in1 % in2; + } + + } +} + +/**end repeat**/ + +/**begin repeat + * #TYPE = BYTE, SHORT, INT, LONG, LONGLONG# + * #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong# + * #c = ,,,l,ll# + */ + +NPY_NO_EXPORT NPY_GCC_OPT_3 void +@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP_FAST(@type@, @type@, *out = (in >= 0) ? in : -in); +} + +NPY_NO_EXPORT NPY_GCC_OPT_3 void +@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP_FAST(@type@, @type@, *out = in > 0 ? 1 : (in < 0 ? -1 : 0)); +} + +NPY_NO_EXPORT void +@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + /* + * FIXME: On x86 at least, dividing the smallest representable integer + * by -1 causes a SIFGPE (division overflow). We treat this case here + * (to avoid a SIGFPE crash at python level), but a good solution would + * be to treat integer division problems separately from FPU exceptions + * (i.e. a different approach than npy_set_floatstatus_divbyzero()). + */ + if (in2 == 0 || (in1 == NPY_MIN_@TYPE@ && in2 == -1)) { + npy_set_floatstatus_divbyzero(); + *((@type@ *)op1) = 0; + } + else if (((in1 > 0) != (in2 > 0)) && (in1 % in2 != 0)) { + *((@type@ *)op1) = in1/in2 - 1; + } + else { + *((@type@ *)op1) = in1/in2; + } + } +} + +NPY_NO_EXPORT void +@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + if (in2 == 0) { + npy_set_floatstatus_divbyzero(); + *((@type@ *)op1) = 0; + } + else { + /* handle mixed case the way Python does */ + const @type@ rem = in1 % in2; + if ((in1 > 0) == (in2 > 0) || rem == 0) { + *((@type@ *)op1) = rem; + } + else { + *((@type@ *)op1) = rem + in2; + } + } + } +} + +NPY_NO_EXPORT void +@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP_TWO_OUT { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + /* see FIXME note for divide above */ + if (in2 == 0 || (in1 == NPY_MIN_@TYPE@ && in2 == -1)) { + npy_set_floatstatus_divbyzero(); + *((@type@ *)op1) = 0; + *((@type@ *)op2) = 0; + } + else { + /* handle mixed case the way Python does */ + const @type@ quo = in1 / in2; + const @type@ rem = in1 % in2; + if ((in1 > 0) == (in2 > 0) || rem == 0) { + *((@type@ *)op1) = quo; + *((@type@ *)op2) = rem; + } + else { + *((@type@ *)op1) = quo - 1; + *((@type@ *)op2) = rem + in2; + } + } + } +} + +/**begin repeat1 + * #kind = gcd, lcm# + **/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((@type@ *)op1) = npy_@kind@@c@(in1, in2); + } +} +/**end repeat1**/ + +/**end repeat**/ + +/**begin repeat + * #TYPE = UBYTE, USHORT, UINT, ULONG, ULONGLONG# + * #type = npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong# + * #c = u,u,u,ul,ull# + */ + +NPY_NO_EXPORT void +@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((@type@ *)op1) = in1; + } +} + +NPY_NO_EXPORT NPY_GCC_OPT_3 void +@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP_FAST(@type@, @type@, *out = in > 0 ? 1 : 0); +} + +NPY_NO_EXPORT void +@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + if (in2 == 0) { + npy_set_floatstatus_divbyzero(); + *((@type@ *)op1) = 0; + } + else { + *((@type@ *)op1)= in1/in2; + } + } +} + +NPY_NO_EXPORT void +@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + if (in2 == 0) { + npy_set_floatstatus_divbyzero(); + *((@type@ *)op1) = 0; + } + else { + *((@type@ *)op1) = in1 % in2; + } + } +} + +NPY_NO_EXPORT void +@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP_TWO_OUT { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + if (in2 == 0) { + npy_set_floatstatus_divbyzero(); + *((@type@ *)op1) = 0; + *((@type@ *)op2) = 0; + } + else { + *((@type@ *)op1)= in1/in2; + *((@type@ *)op2) = in1 % in2; + } + } +} + +/**begin repeat1 + * #kind = gcd, lcm# + **/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((@type@ *)op1) = npy_@kind@@c@(in1, in2); + } +} +/**end repeat1**/ + +/**end repeat**/ + +/* + ***************************************************************************** + ** DATETIME LOOPS ** + ***************************************************************************** + */ + +NPY_NO_EXPORT void +TIMEDELTA_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + if (in1 == NPY_DATETIME_NAT) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + *((npy_timedelta *)op1) = -in1; + } + } +} + +NPY_NO_EXPORT void +TIMEDELTA_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + *((npy_timedelta *)op1) = +in1; + } +} + +NPY_NO_EXPORT void +TIMEDELTA_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + if (in1 == NPY_DATETIME_NAT) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + *((npy_timedelta *)op1) = (in1 >= 0) ? in1 : -in1; + } + } +} + +NPY_NO_EXPORT void +TIMEDELTA_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + *((npy_timedelta *)op1) = in1 > 0 ? 1 : (in1 < 0 ? -1 : 0); + } +} + +/**begin repeat + * #type = npy_datetime, npy_timedelta# + * #TYPE = DATETIME, TIMEDELTA# + */ + +NPY_NO_EXPORT void +@TYPE@_isnat(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((npy_bool *)op1) = (in1 == NPY_DATETIME_NAT); + } +} + +NPY_NO_EXPORT void +@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + OUTPUT_LOOP { + *((@type@ *)op1) = 1; + } +} + +/**begin repeat1 + * #kind = equal, greater, greater_equal, less, less_equal# + * #OP = ==, >, >=, <, <=# + */ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((npy_bool *)op1) = (in1 @OP@ in2 && + in1 != NPY_DATETIME_NAT && + in2 != NPY_DATETIME_NAT); + } +} +/**end repeat1**/ + +NPY_NO_EXPORT void +@TYPE@_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((npy_bool *)op1) = (in1 != in2 || + in1 == NPY_DATETIME_NAT || + in2 == NPY_DATETIME_NAT); + } +} + + +/**begin repeat1 + * #kind = maximum, minimum# + * #OP = >, <# + **/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + if (in1 == NPY_DATETIME_NAT) { + *((@type@ *)op1) = in2; + } + else if (in2 == NPY_DATETIME_NAT) { + *((@type@ *)op1) = in1; + } + else { + *((@type@ *)op1) = (in1 @OP@ in2) ? in1 : in2; + } + } +} +/**end repeat1**/ + +/**end repeat**/ + +NPY_NO_EXPORT void +DATETIME_Mm_M_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + BINARY_LOOP { + const npy_datetime in1 = *(npy_datetime *)ip1; + const npy_timedelta in2 = *(npy_timedelta *)ip2; + if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) { + *((npy_datetime *)op1) = NPY_DATETIME_NAT; + } + else { + *((npy_datetime *)op1) = in1 + in2; + } + } +} + +NPY_NO_EXPORT void +DATETIME_mM_M_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + const npy_datetime in2 = *(npy_datetime *)ip2; + if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) { + *((npy_datetime *)op1) = NPY_DATETIME_NAT; + } + else { + *((npy_datetime *)op1) = in1 + in2; + } + } +} + +NPY_NO_EXPORT void +TIMEDELTA_mm_m_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + const npy_timedelta in2 = *(npy_timedelta *)ip2; + if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + *((npy_timedelta *)op1) = in1 + in2; + } + } +} + +NPY_NO_EXPORT void +DATETIME_Mm_M_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_datetime in1 = *(npy_datetime *)ip1; + const npy_timedelta in2 = *(npy_timedelta *)ip2; + if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) { + *((npy_datetime *)op1) = NPY_DATETIME_NAT; + } + else { + *((npy_datetime *)op1) = in1 - in2; + } + } +} + +NPY_NO_EXPORT void +DATETIME_MM_m_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_datetime in1 = *(npy_datetime *)ip1; + const npy_datetime in2 = *(npy_datetime *)ip2; + if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + *((npy_timedelta *)op1) = in1 - in2; + } + } +} + +NPY_NO_EXPORT void +TIMEDELTA_mm_m_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + const npy_timedelta in2 = *(npy_timedelta *)ip2; + if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + *((npy_timedelta *)op1) = in1 - in2; + } + } +} + +/* Note: Assuming 'q' == NPY_LONGLONG */ +NPY_NO_EXPORT void +TIMEDELTA_mq_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + const npy_int64 in2 = *(npy_int64 *)ip2; + if (in1 == NPY_DATETIME_NAT) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + *((npy_timedelta *)op1) = in1 * in2; + } + } +} + +/* Note: Assuming 'q' == NPY_LONGLONG */ +NPY_NO_EXPORT void +TIMEDELTA_qm_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_int64 in1 = *(npy_int64 *)ip1; + const npy_timedelta in2 = *(npy_timedelta *)ip2; + if (in2 == NPY_DATETIME_NAT) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + *((npy_timedelta *)op1) = in1 * in2; + } + } +} + +NPY_NO_EXPORT void +TIMEDELTA_md_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + const double in2 = *(double *)ip2; + if (in1 == NPY_DATETIME_NAT) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + double result = in1 * in2; + if (npy_isfinite(result)) { + *((npy_timedelta *)op1) = (npy_timedelta)result; + } + else { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + } + } +} + +NPY_NO_EXPORT void +TIMEDELTA_dm_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const double in1 = *(double *)ip1; + const npy_timedelta in2 = *(npy_timedelta *)ip2; + if (in2 == NPY_DATETIME_NAT) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + double result = in1 * in2; + if (npy_isfinite(result)) { + *((npy_timedelta *)op1) = (npy_timedelta)result; + } + else { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + } + } +} + +/* Note: Assuming 'q' == NPY_LONGLONG */ +NPY_NO_EXPORT void +TIMEDELTA_mq_m_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + const npy_int64 in2 = *(npy_int64 *)ip2; + if (in1 == NPY_DATETIME_NAT || in2 == 0) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + *((npy_timedelta *)op1) = in1 / in2; + } + } +} + +NPY_NO_EXPORT void +TIMEDELTA_md_m_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + const double in2 = *(double *)ip2; + if (in1 == NPY_DATETIME_NAT) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + double result = in1 / in2; + if (npy_isfinite(result)) { + *((npy_timedelta *)op1) = (npy_timedelta)result; + } + else { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + } + } +} + +NPY_NO_EXPORT void +TIMEDELTA_mm_d_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + const npy_timedelta in2 = *(npy_timedelta *)ip2; + if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) { + *((double *)op1) = NPY_NAN; + } + else { + *((double *)op1) = (double)in1 / (double)in2; + } + } +} + +NPY_NO_EXPORT void +TIMEDELTA_mm_m_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + const npy_timedelta in2 = *(npy_timedelta *)ip2; + if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) { + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + if (in2 == 0) { + npy_set_floatstatus_divbyzero(); + *((npy_timedelta *)op1) = NPY_DATETIME_NAT; + } + else { + /* handle mixed case the way Python does */ + const npy_timedelta rem = in1 % in2; + if ((in1 > 0) == (in2 > 0) || rem == 0) { + *((npy_timedelta *)op1) = rem; + } + else { + *((npy_timedelta *)op1) = rem + in2; + } + } + } + } +} + +NPY_NO_EXPORT void +TIMEDELTA_mm_q_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + const npy_timedelta in2 = *(npy_timedelta *)ip2; + if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) { + npy_set_floatstatus_invalid(); + *((npy_int64 *)op1) = 0; + } + else if (in2 == 0) { + npy_set_floatstatus_divbyzero(); + *((npy_int64 *)op1) = 0; + } + else { + if (((in1 > 0) != (in2 > 0)) && (in1 % in2 != 0)) { + *((npy_int64 *)op1) = in1/in2 - 1; + } + else { + *((npy_int64 *)op1) = in1/in2; + } + } + } +} + +NPY_NO_EXPORT void +TIMEDELTA_mm_qm_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP_TWO_OUT { + const npy_timedelta in1 = *(npy_timedelta *)ip1; + const npy_timedelta in2 = *(npy_timedelta *)ip2; + if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) { + npy_set_floatstatus_invalid(); + *((npy_int64 *)op1) = 0; + *((npy_timedelta *)op2) = NPY_DATETIME_NAT; + } + else if (in2 == 0) { + npy_set_floatstatus_divbyzero(); + *((npy_int64 *)op1) = 0; + *((npy_timedelta *)op2) = NPY_DATETIME_NAT; + } + else { + const npy_int64 quo = in1 / in2; + const npy_timedelta rem = in1 % in2; + if ((in1 > 0) == (in2 > 0) || rem == 0) { + *((npy_int64 *)op1) = quo; + *((npy_timedelta *)op2) = rem; + } + else { + *((npy_int64 *)op1) = quo - 1; + *((npy_timedelta *)op2) = rem + in2; + } + } + } +} + +/* + ***************************************************************************** + ** FLOAT LOOPS ** + ***************************************************************************** + */ + +/**begin repeat + * Float types + * #type = npy_float, npy_double# + * #TYPE = FLOAT, DOUBLE# + * #scalarf = npy_sqrtf, npy_sqrt# + */ + +NPY_NO_EXPORT void +@TYPE@_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if (!run_unary_simd_sqrt_@TYPE@(args, dimensions, steps)) { + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *(@type@ *)op1 = @scalarf@(in1); + } + } +} + +/**end repeat**/ + + +/**begin repeat + * Float types + * #type = npy_float, npy_double, npy_longdouble, npy_float# + * #dtype = npy_float, npy_double, npy_longdouble, npy_half# + * #TYPE = FLOAT, DOUBLE, LONGDOUBLE, HALF# + * #c = f, , l, # + * #C = F, , L, # + * #trf = , , , npy_half_to_float# + */ + +/* + * Pairwise summation, rounding error O(lg n) instead of O(n). + * The recursion depth is O(lg n) as well. + * when updating also update similar complex floats summation + */ +static @type@ +pairwise_sum_@TYPE@(char *a, npy_intp n, npy_intp stride) +{ + if (n < 8) { + npy_intp i; + @type@ res = 0.; + + for (i = 0; i < n; i++) { + res += @trf@(*((@dtype@*)(a + i * stride))); + } + return res; + } + else if (n <= PW_BLOCKSIZE) { + npy_intp i; + @type@ r[8], res; + + /* + * sum a block with 8 accumulators + * 8 times unroll reduces blocksize to 16 and allows vectorization with + * avx without changing summation ordering + */ + r[0] = @trf@(*((@dtype@ *)(a + 0 * stride))); + r[1] = @trf@(*((@dtype@ *)(a + 1 * stride))); + r[2] = @trf@(*((@dtype@ *)(a + 2 * stride))); + r[3] = @trf@(*((@dtype@ *)(a + 3 * stride))); + r[4] = @trf@(*((@dtype@ *)(a + 4 * stride))); + r[5] = @trf@(*((@dtype@ *)(a + 5 * stride))); + r[6] = @trf@(*((@dtype@ *)(a + 6 * stride))); + r[7] = @trf@(*((@dtype@ *)(a + 7 * stride))); + + for (i = 8; i < n - (n % 8); i += 8) { + /* small blocksizes seems to mess with hardware prefetch */ + NPY_PREFETCH(a + (i + 512/(npy_intp)sizeof(@dtype@))*stride, 0, 3); + r[0] += @trf@(*((@dtype@ *)(a + (i + 0) * stride))); + r[1] += @trf@(*((@dtype@ *)(a + (i + 1) * stride))); + r[2] += @trf@(*((@dtype@ *)(a + (i + 2) * stride))); + r[3] += @trf@(*((@dtype@ *)(a + (i + 3) * stride))); + r[4] += @trf@(*((@dtype@ *)(a + (i + 4) * stride))); + r[5] += @trf@(*((@dtype@ *)(a + (i + 5) * stride))); + r[6] += @trf@(*((@dtype@ *)(a + (i + 6) * stride))); + r[7] += @trf@(*((@dtype@ *)(a + (i + 7) * stride))); + } + + /* accumulate now to avoid stack spills for single peel loop */ + res = ((r[0] + r[1]) + (r[2] + r[3])) + + ((r[4] + r[5]) + (r[6] + r[7])); + + /* do non multiple of 8 rest */ + for (; i < n; i++) { + res += @trf@(*((@dtype@ *)(a + i * stride))); + } + return res; + } + else { + /* divide by two but avoid non-multiples of unroll factor */ + npy_intp n2 = n / 2; + + n2 -= n2 % 8; + return pairwise_sum_@TYPE@(a, n2, stride) + + pairwise_sum_@TYPE@(a + n2 * stride, n - n2, stride); + } +} + +/**end repeat**/ + +/**begin repeat + * Float types + * #type = npy_float, npy_double, npy_longdouble# + * #TYPE = FLOAT, DOUBLE, LONGDOUBLE# + * #c = f, , l# + * #C = F, , L# + */ + +/**begin repeat1 + * Arithmetic + * # kind = add, subtract, multiply, divide# + * # OP = +, -, *, /# + * # PW = 1, 0, 0, 0# + */ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if (IS_BINARY_REDUCE) { +#if @PW@ + @type@ * iop1 = (@type@ *)args[0]; + npy_intp n = dimensions[0]; + + *iop1 @OP@= pairwise_sum_@TYPE@(args[1], n, steps[1]); +#else + BINARY_REDUCE_LOOP(@type@) { + io1 @OP@= *(@type@ *)ip2; + } + *((@type@ *)iop1) = io1; +#endif + } + else if (!run_binary_simd_@kind@_@TYPE@(args, dimensions, steps)) { + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((@type@ *)op1) = in1 @OP@ in2; + } + } +} +/**end repeat1**/ + +/**begin repeat1 + * #kind = equal, not_equal, less, less_equal, greater, greater_equal, + * logical_and, logical_or# + * #OP = ==, !=, <, <=, >, >=, &&, ||# + */ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if (!run_binary_simd_@kind@_@TYPE@(args, dimensions, steps)) { + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((npy_bool *)op1) = in1 @OP@ in2; + } + } +} +/**end repeat1**/ + +NPY_NO_EXPORT void +@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const int t1 = !!*(@type@ *)ip1; + const int t2 = !!*(@type@ *)ip2; + *((npy_bool *)op1) = (t1 != t2); + } +} + +NPY_NO_EXPORT void +@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((npy_bool *)op1) = !in1; + } +} + +/**begin repeat1 + * #kind = isnan, isinf, isfinite, signbit# + * #func = npy_isnan, npy_isinf, npy_isfinite, npy_signbit# + **/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if (!run_@kind@_simd_@TYPE@(args, dimensions, steps)) { + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((npy_bool *)op1) = @func@(in1) != 0; + } + } + npy_clear_floatstatus_barrier((char*)dimensions); +} +/**end repeat1**/ + +NPY_NO_EXPORT void +@TYPE@_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((@type@ *)op1) = npy_spacing@c@(in1); + } +} + +NPY_NO_EXPORT void +@TYPE@_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((@type@ *)op1)= npy_copysign@c@(in1, in2); + } +} + +NPY_NO_EXPORT void +@TYPE@_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((@type@ *)op1)= npy_nextafter@c@(in1, in2); + } +} + +/**begin repeat1 + * #kind = maximum, minimum# + * #OP = >=, <=# + **/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* */ + if (IS_BINARY_REDUCE) { + if (!run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) { + BINARY_REDUCE_LOOP(@type@) { + const @type@ in2 = *(@type@ *)ip2; + /* Order of operations important for MSVC 2015 */ + io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2; + } + *((@type@ *)iop1) = io1; + } + } + else { + BINARY_LOOP { + @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + /* Order of operations important for MSVC 2015 */ + in1 = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2; + *((@type@ *)op1) = in1; + } + } + npy_clear_floatstatus_barrier((char*)dimensions); +} +/**end repeat1**/ + +/**begin repeat1 + * #kind = fmax, fmin# + * #OP = >=, <=# + **/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* */ + if (IS_BINARY_REDUCE) { + BINARY_REDUCE_LOOP(@type@) { + const @type@ in2 = *(@type@ *)ip2; + /* Order of operations important for MSVC 2015 */ + io1 = (io1 @OP@ in2 || npy_isnan(in2)) ? io1 : in2; + } + *((@type@ *)iop1) = io1; + } + else { + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + /* Order of operations important for MSVC 2015 */ + *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2; + } + } + npy_clear_floatstatus_barrier((char*)dimensions); +} +/**end repeat1**/ + +NPY_NO_EXPORT void +@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + @type@ mod; + *((@type@ *)op1) = npy_divmod@c@(in1, in2, &mod); + } +} + +NPY_NO_EXPORT void +@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + npy_divmod@c@(in1, in2, (@type@ *)op1); + } +} + +NPY_NO_EXPORT void +@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP_TWO_OUT { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((@type@ *)op1) = npy_divmod@c@(in1, in2, (@type@ *)op2); + } +} + +NPY_NO_EXPORT void +@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + char * margs[] = {args[0], args[0], args[1]}; + npy_intp msteps[] = {steps[0], steps[0], steps[1]}; + if (!run_binary_simd_multiply_@TYPE@(margs, dimensions, msteps)) { + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((@type@ *)op1) = in1*in1; + } + } +} + +NPY_NO_EXPORT void +@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + @type@ one = 1.@c@; + char * margs[] = {(char*)&one, args[0], args[1]}; + npy_intp msteps[] = {0, steps[0], steps[1]}; + if (!run_binary_simd_divide_@TYPE@(margs, dimensions, msteps)) { + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((@type@ *)op1) = 1/in1; + } + } +} + +NPY_NO_EXPORT void +@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + OUTPUT_LOOP { + *((@type@ *)op1) = 1; + } +} + +NPY_NO_EXPORT void +@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((@type@ *)op1) = in1; + } +} + +NPY_NO_EXPORT void +@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if (!run_unary_simd_absolute_@TYPE@(args, dimensions, steps)) { + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ tmp = in1 > 0 ? in1 : -in1; + /* add 0 to clear -0.0 */ + *((@type@ *)op1) = tmp + 0; + } + } + npy_clear_floatstatus_barrier((char*)dimensions); +} + +NPY_NO_EXPORT void +@TYPE@_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if (!run_unary_simd_negative_@TYPE@(args, dimensions, steps)) { + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((@type@ *)op1) = -in1; + } + } +} + +NPY_NO_EXPORT void +@TYPE@_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((@type@ *)op1) = +in1; + } +} + +NPY_NO_EXPORT void +@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* Sign of nan is nan */ + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((@type@ *)op1) = in1 > 0 ? 1 : (in1 < 0 ? -1 : (in1 == 0 ? 0 : in1)); + } +} + +NPY_NO_EXPORT void +@TYPE@_modf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP_TWO_OUT { + const @type@ in1 = *(@type@ *)ip1; + *((@type@ *)op1) = npy_modf@c@(in1, (@type@ *)op2); + } +} + +NPY_NO_EXPORT void +@TYPE@_frexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP_TWO_OUT { + const @type@ in1 = *(@type@ *)ip1; + *((@type@ *)op1) = npy_frexp@c@(in1, (int *)op2); + } +} + +NPY_NO_EXPORT void +@TYPE@_ldexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const int in2 = *(int *)ip2; + *((@type@ *)op1) = npy_ldexp@c@(in1, in2); + } +} + +NPY_NO_EXPORT void +@TYPE@_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* + * Additional loop to handle npy_long integer inputs (cf. #866, #1633). + * npy_long != npy_int on many 64-bit platforms, so we need this second loop + * to handle the default integer type. + */ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const long in2 = *(long *)ip2; + if (((int)in2) == in2) { + /* Range OK */ + *((@type@ *)op1) = npy_ldexp@c@(in1, ((int)in2)); + } + else { + /* + * Outside npy_int range -- also ldexp will overflow in this case, + * given that exponent has less bits than npy_int. + */ + if (in2 > 0) { + *((@type@ *)op1) = npy_ldexp@c@(in1, NPY_MAX_INT); + } + else { + *((@type@ *)op1) = npy_ldexp@c@(in1, NPY_MIN_INT); + } + } + } +} + +#define @TYPE@_true_divide @TYPE@_divide + +/**end repeat**/ + +/* + ***************************************************************************** + ** HALF-FLOAT LOOPS ** + ***************************************************************************** + */ + + +/**begin repeat + * Arithmetic + * # kind = add, subtract, multiply, divide# + * # OP = +, -, *, /# + * # PW = 1, 0, 0, 0# + */ +NPY_NO_EXPORT void +HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if (IS_BINARY_REDUCE) { + char *iop1 = args[0]; + float io1 = npy_half_to_float(*(npy_half *)iop1); +#if @PW@ + npy_intp n = dimensions[0]; + + io1 @OP@= pairwise_sum_HALF(args[1], n, steps[1]); +#else + BINARY_REDUCE_LOOP_INNER { + io1 @OP@= npy_half_to_float(*(npy_half *)ip2); + } +#endif + *((npy_half *)iop1) = npy_float_to_half(io1); + } + else { + BINARY_LOOP { + const float in1 = npy_half_to_float(*(npy_half *)ip1); + const float in2 = npy_half_to_float(*(npy_half *)ip2); + *((npy_half *)op1) = npy_float_to_half(in1 @OP@ in2); + } + } +} +/**end repeat**/ + +#define _HALF_LOGICAL_AND(a,b) (!npy_half_iszero(a) && !npy_half_iszero(b)) +#define _HALF_LOGICAL_OR(a,b) (!npy_half_iszero(a) || !npy_half_iszero(b)) +/**begin repeat + * #kind = equal, not_equal, less, less_equal, greater, + * greater_equal, logical_and, logical_or# + * #OP = npy_half_eq, npy_half_ne, npy_half_lt, npy_half_le, npy_half_gt, + * npy_half_ge, _HALF_LOGICAL_AND, _HALF_LOGICAL_OR# + */ +NPY_NO_EXPORT void +HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + const npy_half in2 = *(npy_half *)ip2; + *((npy_bool *)op1) = @OP@(in1, in2); + } +} +/**end repeat**/ +#undef _HALF_LOGICAL_AND +#undef _HALF_LOGICAL_OR + +NPY_NO_EXPORT void +HALF_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const int in1 = !npy_half_iszero(*(npy_half *)ip1); + const int in2 = !npy_half_iszero(*(npy_half *)ip2); + *((npy_bool *)op1) = (in1 != in2); + } +} + +NPY_NO_EXPORT void +HALF_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + *((npy_bool *)op1) = npy_half_iszero(in1); + } +} + +/**begin repeat + * #kind = isnan, isinf, isfinite, signbit# + * #func = npy_half_isnan, npy_half_isinf, npy_half_isfinite, npy_half_signbit# + **/ +NPY_NO_EXPORT void +HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + *((npy_bool *)op1) = @func@(in1) != 0; + } + npy_clear_floatstatus_barrier((char*)dimensions); +} +/**end repeat**/ + +NPY_NO_EXPORT void +HALF_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + *((npy_half *)op1) = npy_half_spacing(in1); + } +} + +NPY_NO_EXPORT void +HALF_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + const npy_half in2 = *(npy_half *)ip2; + *((npy_half *)op1)= npy_half_copysign(in1, in2); + } +} + +NPY_NO_EXPORT void +HALF_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + const npy_half in2 = *(npy_half *)ip2; + *((npy_half *)op1)= npy_half_nextafter(in1, in2); + } +} + +/**begin repeat + * #kind = maximum, minimum# + * #OP = npy_half_ge, npy_half_le# + **/ +NPY_NO_EXPORT void +HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* */ + BINARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + const npy_half in2 = *(npy_half *)ip2; + *((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in1)) ? in1 : in2; + } + /* npy_half_isnan will never set floatstatus_invalid, so do not clear */ +} +/**end repeat**/ + +/**begin repeat + * #kind = fmax, fmin# + * #OP = npy_half_ge, npy_half_le# + **/ +NPY_NO_EXPORT void +HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* */ + BINARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + const npy_half in2 = *(npy_half *)ip2; + *((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in2)) ? in1 : in2; + } + /* npy_half_isnan will never set floatstatus_invalid, so do not clear */ +} +/**end repeat**/ + +NPY_NO_EXPORT void +HALF_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + const npy_half in2 = *(npy_half *)ip2; + npy_half mod; + *((npy_half *)op1) = npy_half_divmod(in1, in2, &mod); + } +} + +NPY_NO_EXPORT void +HALF_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + const npy_half in2 = *(npy_half *)ip2; + npy_half_divmod(in1, in2, (npy_half *)op1); + } +} + +NPY_NO_EXPORT void +HALF_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP_TWO_OUT { + const npy_half in1 = *(npy_half *)ip1; + const npy_half in2 = *(npy_half *)ip2; + *((npy_half *)op1) = npy_half_divmod(in1, in2, (npy_half *)op2); + } +} + +NPY_NO_EXPORT void +HALF_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + UNARY_LOOP { + const float in1 = npy_half_to_float(*(npy_half *)ip1); + *((npy_half *)op1) = npy_float_to_half(in1*in1); + } +} + +NPY_NO_EXPORT void +HALF_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + UNARY_LOOP { + const float in1 = npy_half_to_float(*(npy_half *)ip1); + *((npy_half *)op1) = npy_float_to_half(1/in1); + } +} + +NPY_NO_EXPORT void +HALF__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + OUTPUT_LOOP { + *((npy_half *)op1) = NPY_HALF_ONE; + } +} + +NPY_NO_EXPORT void +HALF_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + *((npy_half *)op1) = in1; + } +} + +NPY_NO_EXPORT void +HALF_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + *((npy_half *)op1) = in1&0x7fffu; + } +} + +NPY_NO_EXPORT void +HALF_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + *((npy_half *)op1) = in1^0x8000u; + } +} + +NPY_NO_EXPORT void +HALF_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + *((npy_half *)op1) = +in1; + } +} + +NPY_NO_EXPORT void +HALF_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* Sign of nan is nan */ + UNARY_LOOP { + const npy_half in1 = *(npy_half *)ip1; + *((npy_half *)op1) = npy_half_isnan(in1) ? in1 : + (((in1&0x7fffu) == 0) ? 0 : + (((in1&0x8000u) == 0) ? NPY_HALF_ONE : NPY_HALF_NEGONE)); + } +} + +NPY_NO_EXPORT void +HALF_modf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + float temp; + + UNARY_LOOP_TWO_OUT { + const float in1 = npy_half_to_float(*(npy_half *)ip1); + *((npy_half *)op1) = npy_float_to_half(npy_modff(in1, &temp)); + *((npy_half *)op2) = npy_float_to_half(temp); + } +} + +NPY_NO_EXPORT void +HALF_frexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP_TWO_OUT { + const float in1 = npy_half_to_float(*(npy_half *)ip1); + *((npy_half *)op1) = npy_float_to_half(npy_frexpf(in1, (int *)op2)); + } +} + +NPY_NO_EXPORT void +HALF_ldexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const float in1 = npy_half_to_float(*(npy_half *)ip1); + const int in2 = *(int *)ip2; + *((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, in2)); + } +} + +NPY_NO_EXPORT void +HALF_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* + * Additional loop to handle npy_long integer inputs (cf. #866, #1633). + * npy_long != npy_int on many 64-bit platforms, so we need this second loop + * to handle the default integer type. + */ + BINARY_LOOP { + const float in1 = npy_half_to_float(*(npy_half *)ip1); + const long in2 = *(long *)ip2; + if (((int)in2) == in2) { + /* Range OK */ + *((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, ((int)in2))); + } + else { + /* + * Outside npy_int range -- also ldexp will overflow in this case, + * given that exponent has less bits than npy_int. + */ + if (in2 > 0) { + *((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, NPY_MAX_INT)); + } + else { + *((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, NPY_MIN_INT)); + } + } + } +} + +#define HALF_true_divide HALF_divide + + +/* + ***************************************************************************** + ** COMPLEX LOOPS ** + ***************************************************************************** + */ + +#define CGE(xr,xi,yr,yi) ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) \ + || (xr == yr && xi >= yi)) +#define CLE(xr,xi,yr,yi) ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) \ + || (xr == yr && xi <= yi)) +#define CGT(xr,xi,yr,yi) ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) \ + || (xr == yr && xi > yi)) +#define CLT(xr,xi,yr,yi) ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) \ + || (xr == yr && xi < yi)) +#define CEQ(xr,xi,yr,yi) (xr == yr && xi == yi) +#define CNE(xr,xi,yr,yi) (xr != yr || xi != yi) + +/**begin repeat + * complex types + * #TYPE = CFLOAT, CDOUBLE, CLONGDOUBLE# + * #ftype = npy_float, npy_double, npy_longdouble# + * #c = f, , l# + * #C = F, , L# + */ + +/* similar to pairwise sum of real floats */ +static void +pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n, + npy_intp stride) +{ + assert(n % 2 == 0); + if (n < 8) { + npy_intp i; + + *rr = 0.; + *ri = 0.; + for (i = 0; i < n; i += 2) { + *rr += *((@ftype@ *)(a + i * stride + 0)); + *ri += *((@ftype@ *)(a + i * stride + sizeof(@ftype@))); + } + return; + } + else if (n <= PW_BLOCKSIZE) { + npy_intp i; + @ftype@ r[8]; + + /* + * sum a block with 8 accumulators + * 8 times unroll reduces blocksize to 16 and allows vectorization with + * avx without changing summation ordering + */ + r[0] = *((@ftype@ *)(a + 0 * stride)); + r[1] = *((@ftype@ *)(a + 0 * stride + sizeof(@ftype@))); + r[2] = *((@ftype@ *)(a + 2 * stride)); + r[3] = *((@ftype@ *)(a + 2 * stride + sizeof(@ftype@))); + r[4] = *((@ftype@ *)(a + 4 * stride)); + r[5] = *((@ftype@ *)(a + 4 * stride + sizeof(@ftype@))); + r[6] = *((@ftype@ *)(a + 6 * stride)); + r[7] = *((@ftype@ *)(a + 6 * stride + sizeof(@ftype@))); + + for (i = 8; i < n - (n % 8); i += 8) { + /* small blocksizes seems to mess with hardware prefetch */ + NPY_PREFETCH(a + (i + 512/(npy_intp)sizeof(@ftype@))*stride, 0, 3); + r[0] += *((@ftype@ *)(a + (i + 0) * stride)); + r[1] += *((@ftype@ *)(a + (i + 0) * stride + sizeof(@ftype@))); + r[2] += *((@ftype@ *)(a + (i + 2) * stride)); + r[3] += *((@ftype@ *)(a + (i + 2) * stride + sizeof(@ftype@))); + r[4] += *((@ftype@ *)(a + (i + 4) * stride)); + r[5] += *((@ftype@ *)(a + (i + 4) * stride + sizeof(@ftype@))); + r[6] += *((@ftype@ *)(a + (i + 6) * stride)); + r[7] += *((@ftype@ *)(a + (i + 6) * stride + sizeof(@ftype@))); + } + + /* accumulate now to avoid stack spills for single peel loop */ + *rr = ((r[0] + r[2]) + (r[4] + r[6])); + *ri = ((r[1] + r[3]) + (r[5] + r[7])); + + /* do non multiple of 8 rest */ + for (; i < n; i+=2) { + *rr += *((@ftype@ *)(a + i * stride + 0)); + *ri += *((@ftype@ *)(a + i * stride + sizeof(@ftype@))); + } + return; + } + else { + /* divide by two but avoid non-multiples of unroll factor */ + @ftype@ rr1, ri1, rr2, ri2; + npy_intp n2 = n / 2; + + n2 -= n2 % 8; + pairwise_sum_@TYPE@(&rr1, &ri1, a, n2, stride); + pairwise_sum_@TYPE@(&rr2, &ri2, a + n2 * stride, n - n2, stride); + *rr = rr1 + rr2; + *ri = ri1 + ri2; + return; + } +} + +/**begin repeat1 + * arithmetic + * #kind = add, subtract# + * #OP = +, -# + * #PW = 1, 0# + */ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + if (IS_BINARY_REDUCE && @PW@) { + npy_intp n = dimensions[0]; + @ftype@ * or = ((@ftype@ *)args[0]); + @ftype@ * oi = ((@ftype@ *)args[0]) + 1; + @ftype@ rr, ri; + + pairwise_sum_@TYPE@(&rr, &ri, args[1], n * 2, steps[1] / 2); + *or @OP@= rr; + *oi @OP@= ri; + return; + } + else { + BINARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + const @ftype@ in2r = ((@ftype@ *)ip2)[0]; + const @ftype@ in2i = ((@ftype@ *)ip2)[1]; + ((@ftype@ *)op1)[0] = in1r @OP@ in2r; + ((@ftype@ *)op1)[1] = in1i @OP@ in2i; + } + } +} +/**end repeat1**/ + +NPY_NO_EXPORT void +@TYPE@_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + const @ftype@ in2r = ((@ftype@ *)ip2)[0]; + const @ftype@ in2i = ((@ftype@ *)ip2)[1]; + ((@ftype@ *)op1)[0] = in1r*in2r - in1i*in2i; + ((@ftype@ *)op1)[1] = in1r*in2i + in1i*in2r; + } +} + +NPY_NO_EXPORT void +@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + const @ftype@ in2r = ((@ftype@ *)ip2)[0]; + const @ftype@ in2i = ((@ftype@ *)ip2)[1]; + const @ftype@ in2r_abs = npy_fabs@c@(in2r); + const @ftype@ in2i_abs = npy_fabs@c@(in2i); + if (in2r_abs >= in2i_abs) { + if (in2r_abs == 0 && in2i_abs == 0) { + /* divide by zero should yield a complex inf or nan */ + ((@ftype@ *)op1)[0] = in1r/in2r_abs; + ((@ftype@ *)op1)[1] = in1i/in2i_abs; + } + else { + const @ftype@ rat = in2i/in2r; + const @ftype@ scl = 1.0@c@/(in2r + in2i*rat); + ((@ftype@ *)op1)[0] = (in1r + in1i*rat)*scl; + ((@ftype@ *)op1)[1] = (in1i - in1r*rat)*scl; + } + } + else { + const @ftype@ rat = in2r/in2i; + const @ftype@ scl = 1.0@c@/(in2i + in2r*rat); + ((@ftype@ *)op1)[0] = (in1r*rat + in1i)*scl; + ((@ftype@ *)op1)[1] = (in1i*rat - in1r)*scl; + } + } +} + +NPY_NO_EXPORT void +@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + const @ftype@ in2r = ((@ftype@ *)ip2)[0]; + const @ftype@ in2i = ((@ftype@ *)ip2)[1]; + if (npy_fabs@c@(in2r) >= npy_fabs@c@(in2i)) { + const @ftype@ rat = in2i/in2r; + ((@ftype@ *)op1)[0] = npy_floor@c@((in1r + in1i*rat)/(in2r + in2i*rat)); + ((@ftype@ *)op1)[1] = 0; + } + else { + const @ftype@ rat = in2r/in2i; + ((@ftype@ *)op1)[0] = npy_floor@c@((in1r*rat + in1i)/(in2i + in2r*rat)); + ((@ftype@ *)op1)[1] = 0; + } + } +} + +/**begin repeat1 + * #kind= greater, greater_equal, less, less_equal, equal, not_equal# + * #OP = CGT, CGE, CLT, CLE, CEQ, CNE# + */ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + const @ftype@ in2r = ((@ftype@ *)ip2)[0]; + const @ftype@ in2i = ((@ftype@ *)ip2)[1]; + *((npy_bool *)op1) = @OP@(in1r,in1i,in2r,in2i); + } +} +/**end repeat1**/ + +/**begin repeat1 + #kind = logical_and, logical_or# + #OP1 = ||, ||# + #OP2 = &&, ||# +*/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + const @ftype@ in2r = ((@ftype@ *)ip2)[0]; + const @ftype@ in2i = ((@ftype@ *)ip2)[1]; + *((npy_bool *)op1) = (in1r @OP1@ in1i) @OP2@ (in2r @OP1@ in2i); + } +} +/**end repeat1**/ + +NPY_NO_EXPORT void +@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + const @ftype@ in2r = ((@ftype@ *)ip2)[0]; + const @ftype@ in2i = ((@ftype@ *)ip2)[1]; + const npy_bool tmp1 = (in1r || in1i); + const npy_bool tmp2 = (in2r || in2i); + *((npy_bool *)op1) = tmp1 != tmp2; + } +} + +NPY_NO_EXPORT void +@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + *((npy_bool *)op1) = !(in1r || in1i); + } +} + +/**begin repeat1 + * #kind = isnan, isinf, isfinite# + * #func = npy_isnan, npy_isinf, npy_isfinite# + * #OP = ||, ||, &&# + **/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + *((npy_bool *)op1) = @func@(in1r) @OP@ @func@(in1i); + } + npy_clear_floatstatus_barrier((char*)dimensions); +} +/**end repeat1**/ + +NPY_NO_EXPORT void +@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + UNARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + ((@ftype@ *)op1)[0] = in1r*in1r - in1i*in1i; + ((@ftype@ *)op1)[1] = in1r*in1i + in1i*in1r; + } +} + +NPY_NO_EXPORT void +@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + UNARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + if (npy_fabs@c@(in1i) <= npy_fabs@c@(in1r)) { + const @ftype@ r = in1i/in1r; + const @ftype@ d = in1r + in1i*r; + ((@ftype@ *)op1)[0] = 1/d; + ((@ftype@ *)op1)[1] = -r/d; + } else { + const @ftype@ r = in1r/in1i; + const @ftype@ d = in1r*r + in1i; + ((@ftype@ *)op1)[0] = r/d; + ((@ftype@ *)op1)[1] = -1/d; + } + } +} + +NPY_NO_EXPORT void +@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) +{ + OUTPUT_LOOP { + ((@ftype@ *)op1)[0] = 1; + ((@ftype@ *)op1)[1] = 0; + } +} + +NPY_NO_EXPORT void +@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { + UNARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + ((@ftype@ *)op1)[0] = in1r; + ((@ftype@ *)op1)[1] = -in1i; + } +} + +NPY_NO_EXPORT void +@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + *((@ftype@ *)op1) = npy_hypot@c@(in1r, in1i); + } +} + +NPY_NO_EXPORT void +@TYPE@__arg(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + *((@ftype@ *)op1) = npy_atan2@c@(in1i, in1r); + } +} + +NPY_NO_EXPORT void +@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + /* fixme: sign of nan is currently 0 */ + UNARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + ((@ftype@ *)op1)[0] = CGT(in1r, in1i, 0.0, 0.0) ? 1 : + (CLT(in1r, in1i, 0.0, 0.0) ? -1 : + (CEQ(in1r, in1i, 0.0, 0.0) ? 0 : NPY_NAN@C@)); + ((@ftype@ *)op1)[1] = 0; + } +} + +/**begin repeat1 + * #kind = maximum, minimum# + * #OP = CGE, CLE# + */ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + @ftype@ in1r = ((@ftype@ *)ip1)[0]; + @ftype@ in1i = ((@ftype@ *)ip1)[1]; + const @ftype@ in2r = ((@ftype@ *)ip2)[0]; + const @ftype@ in2i = ((@ftype@ *)ip2)[1]; + if ( !(npy_isnan(in1r) || npy_isnan(in1i) || @OP@(in1r, in1i, in2r, in2i))) { + in1r = in2r; + in1i = in2i; + } + ((@ftype@ *)op1)[0] = in1r; + ((@ftype@ *)op1)[1] = in1i; + } + npy_clear_floatstatus_barrier((char*)dimensions); +} +/**end repeat1**/ + +/**begin repeat1 + * #kind = fmax, fmin# + * #OP = CGE, CLE# + */ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + const @ftype@ in2r = ((@ftype@ *)ip2)[0]; + const @ftype@ in2i = ((@ftype@ *)ip2)[1]; + if (npy_isnan(in2r) || npy_isnan(in2i) || @OP@(in1r, in1i, in2r, in2i)) { + ((@ftype@ *)op1)[0] = in1r; + ((@ftype@ *)op1)[1] = in1i; + } + else { + ((@ftype@ *)op1)[0] = in2r; + ((@ftype@ *)op1)[1] = in2i; + } + } + npy_clear_floatstatus_barrier((char*)dimensions); +} +/**end repeat1**/ + +#define @TYPE@_true_divide @TYPE@_divide + +/**end repeat**/ + +#undef CGE +#undef CLE +#undef CGT +#undef CLT +#undef CEQ +#undef CNE + +/* + ***************************************************************************** + ** OBJECT LOOPS ** + ***************************************************************************** + */ + +/**begin repeat + * #kind = equal, not_equal, greater, greater_equal, less, less_equal# + * #OP = EQ, NE, GT, GE, LT, LE# + * #identity = NPY_TRUE, NPY_FALSE, -1*4# + */ + +/**begin repeat1 + * #suffix = , _OO_O# + * #as_bool = 1, 0# + */ +NPY_NO_EXPORT void +OBJECT@suffix@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { + BINARY_LOOP { + PyObject *ret_obj; + PyObject *in1 = *(PyObject **)ip1; + PyObject *in2 = *(PyObject **)ip2; + + in1 = in1 ? in1 : Py_None; + in2 = in2 ? in2 : Py_None; + + /* + * Do not use RichCompareBool because it includes an identity check for + * == and !=. This is wrong for elementwise behaviour, since it means + * that NaN can be equal to NaN and an array is equal to itself. + */ + ret_obj = PyObject_RichCompare(in1, in2, Py_@OP@); + if (ret_obj == NULL) { + return; + } +#if @as_bool@ + { + int ret = PyObject_IsTrue(ret_obj); + Py_DECREF(ret_obj); + if (ret == -1) { + return; + } + *((npy_bool *)op1) = (npy_bool)ret; + } +#else + *((PyObject **)op1) = ret_obj; +#endif + } +} +/**end repeat1**/ +/**end repeat**/ + +NPY_NO_EXPORT void +OBJECT_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + PyObject *zero = PyLong_FromLong(0); + + UNARY_LOOP { + PyObject *in1 = *(PyObject **)ip1; + PyObject **out = (PyObject **)op1; + PyObject *ret = NULL; + int v; + + if (in1 == NULL) { + in1 = Py_None; + } + + if ((v = PyObject_RichCompareBool(in1, zero, Py_LT)) == 1) { + ret = PyLong_FromLong(-1); + } + else if (v == 0 && + (v = PyObject_RichCompareBool(in1, zero, Py_GT)) == 1) { + ret = PyLong_FromLong(1); + } + else if (v == 0 && + (v = PyObject_RichCompareBool(in1, zero, Py_EQ)) == 1) { + ret = PyLong_FromLong(0); + } + else if (v == 0) { + /* in1 is NaN */ + PyErr_SetString(PyExc_TypeError, + "unorderable types for comparison"); + } + + if (ret == NULL) { + break; + } + Py_XDECREF(*out); + *out = ret; + } + Py_XDECREF(zero); +} + +/* + ***************************************************************************** + ** END LOOPS ** + ***************************************************************************** + */ diff --git a/contrib/python/numpy/py2/numpy/core/src/umath/loops.h.src b/contrib/python/numpy/py2/numpy/core/src/umath/loops.h.src new file mode 100644 index 0000000000..5264a6533e --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/umath/loops.h.src @@ -0,0 +1,525 @@ +/* -*- c -*- */ +/* + * vim:syntax=c + */ + +#ifndef _NPY_UMATH_LOOPS_H_ +#define _NPY_UMATH_LOOPS_H_ + +#define BOOL_invert BOOL_logical_not +#define BOOL_negative BOOL_logical_not +#define BOOL_add BOOL_logical_or +#define BOOL_bitwise_and BOOL_logical_and +#define BOOL_bitwise_or BOOL_logical_or +#define BOOL_logical_xor BOOL_not_equal +#define BOOL_bitwise_xor BOOL_logical_xor +#define BOOL_multiply BOOL_logical_and +#define BOOL_subtract BOOL_logical_xor +#define BOOL_maximum BOOL_logical_or +#define BOOL_minimum BOOL_logical_and +#define BOOL_fmax BOOL_maximum +#define BOOL_fmin BOOL_minimum + + +/* + ***************************************************************************** + ** BOOLEAN LOOPS ** + ***************************************************************************** + */ + +/**begin repeat + * #kind = equal, not_equal, greater, greater_equal, less, less_equal, + * logical_and, logical_or, absolute, logical_not# + **/ +NPY_NO_EXPORT void +BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat**/ + +NPY_NO_EXPORT void +BOOL__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + +/* + ***************************************************************************** + ** INTEGER LOOPS + ***************************************************************************** + */ + +/**begin repeat + * #TYPE = BYTE, SHORT, INT, LONG, LONGLONG# + */ + +/**begin repeat1 + * both signed and unsigned integer types + * #s = , u# + * #S = , U# + */ + +#define @S@@TYPE@_floor_divide @S@@TYPE@_divide +#define @S@@TYPE@_fmax @S@@TYPE@_maximum +#define @S@@TYPE@_fmin @S@@TYPE@_minimum + +NPY_NO_EXPORT void +@S@@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + +NPY_NO_EXPORT void +@S@@TYPE@_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat2 + * #isa = , _avx2# + */ + +NPY_NO_EXPORT void +@S@@TYPE@_square@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + +NPY_NO_EXPORT void +@S@@TYPE@_reciprocal@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + +NPY_NO_EXPORT void +@S@@TYPE@_conjugate@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@S@@TYPE@_negative@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@S@@TYPE@_logical_not@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@S@@TYPE@_invert@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat3 + * Arithmetic + * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor, + * left_shift, right_shift# + * #OP = +, -,*, &, |, ^, <<, >># + */ +NPY_NO_EXPORT void +@S@@TYPE@_@kind@@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/**end repeat3**/ + +/**begin repeat3 + * #kind = equal, not_equal, greater, greater_equal, less, less_equal, + * logical_and, logical_or# + * #OP = ==, !=, >, >=, <, <=, &&, ||# + */ +NPY_NO_EXPORT void +@S@@TYPE@_@kind@@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/**end repeat3**/ + +NPY_NO_EXPORT void +@S@@TYPE@_logical_xor@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat2**/ + +/**begin repeat2 + * #kind = maximum, minimum# + * #OP = >, <# + **/ +NPY_NO_EXPORT void +@S@@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat2**/ + +NPY_NO_EXPORT void +@S@@TYPE@_power(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@S@@TYPE@_fmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@S@@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@S@@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@S@@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@S@@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@S@@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@S@@TYPE@_gcd(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@S@@TYPE@_lcm(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/**end repeat1**/ + +/**end repeat**/ + +/* + ***************************************************************************** + ** FLOAT LOOPS ** + ***************************************************************************** + */ + +/**begin repeat + * #TYPE = FLOAT, DOUBLE# + */ +NPY_NO_EXPORT void +@TYPE@_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat**/ + +/**begin repeat + * Float types + * #TYPE = HALF, FLOAT, DOUBLE, LONGDOUBLE# + * #c = f, f, , l# + * #C = F, F, , L# + */ + + +/**begin repeat1 + * Arithmetic + * # kind = add, subtract, multiply, divide# + * # OP = +, -, *, /# + */ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +/**begin repeat1 + * #kind = equal, not_equal, less, less_equal, greater, greater_equal, + * logical_and, logical_or# + * #OP = ==, !=, <, <=, >, >=, &&, ||# + */ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +NPY_NO_EXPORT void +@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat1 + * #kind = isnan, isinf, isfinite, signbit, copysign, nextafter, spacing# + * #func = npy_isnan, npy_isinf, npy_isfinite, npy_signbit, npy_copysign, nextafter, spacing# + **/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +/**begin repeat1 + * #kind = maximum, minimum# + * #OP = >=, <=# + **/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +/**begin repeat1 + * #kind = fmax, fmin# + * #OP = >=, <=# + **/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +NPY_NO_EXPORT void +@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@TYPE@_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + +NPY_NO_EXPORT void +@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + + +NPY_NO_EXPORT void +@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + +NPY_NO_EXPORT void +@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@TYPE@_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@TYPE@_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + + +NPY_NO_EXPORT void +@TYPE@_modf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@TYPE@_frexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@TYPE@_ldexp(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +@TYPE@_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +#define @TYPE@_true_divide @TYPE@_divide + +/**end repeat**/ + + +/* + ***************************************************************************** + ** COMPLEX LOOPS ** + ***************************************************************************** + */ + +#define CGE(xr,xi,yr,yi) (xr > yr || (xr == yr && xi >= yi)); +#define CLE(xr,xi,yr,yi) (xr < yr || (xr == yr && xi <= yi)); +#define CGT(xr,xi,yr,yi) (xr > yr || (xr == yr && xi > yi)); +#define CLT(xr,xi,yr,yi) (xr < yr || (xr == yr && xi < yi)); +#define CEQ(xr,xi,yr,yi) (xr == yr && xi == yi); +#define CNE(xr,xi,yr,yi) (xr != yr || xi != yi); + +/**begin repeat + * complex types + * #TYPE = FLOAT, DOUBLE, LONGDOUBLE# + * #c = f, , l# + * #C = F, , L# + */ + +/**begin repeat1 + * arithmetic + * #kind = add, subtract# + * #OP = +, -# + */ +NPY_NO_EXPORT void +C@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/**end repeat1**/ + +NPY_NO_EXPORT void +C@TYPE@_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +C@TYPE@_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +C@TYPE@_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat1 + * #kind= greater, greater_equal, less, less_equal, equal, not_equal# + * #OP = CGT, CGE, CLT, CLE, CEQ, CNE# + */ +NPY_NO_EXPORT void +C@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +/**begin repeat1 + #kind = logical_and, logical_or# + #OP1 = ||, ||# + #OP2 = &&, ||# +*/ +NPY_NO_EXPORT void +C@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +NPY_NO_EXPORT void +C@TYPE@_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +C@TYPE@_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**begin repeat1 + * #kind = isnan, isinf, isfinite# + * #func = npy_isnan, npy_isinf, npy_isfinite# + * #OP = ||, ||, &&# + **/ +NPY_NO_EXPORT void +C@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +NPY_NO_EXPORT void +C@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + +NPY_NO_EXPORT void +C@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + +NPY_NO_EXPORT void +C@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + +NPY_NO_EXPORT void +C@TYPE@_conjugate(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +C@TYPE@_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +C@TYPE@__arg(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +C@TYPE@_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat1 + * #kind = maximum, minimum# + * #OP = CGE, CLE# + */ +NPY_NO_EXPORT void +C@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +/**begin repeat1 + * #kind = fmax, fmin# + * #OP = CGE, CLE# + */ +NPY_NO_EXPORT void +C@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +#define C@TYPE@_true_divide C@TYPE@_divide + +/**end repeat**/ + +#undef CGE +#undef CLE +#undef CGT +#undef CLT +#undef CEQ +#undef CNE + +/* + ***************************************************************************** + ** DATETIME LOOPS ** + ***************************************************************************** + */ + +NPY_NO_EXPORT void +TIMEDELTA_negative(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_positive(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/**begin repeat + * #TYPE = DATETIME, TIMEDELTA# + */ + +NPY_NO_EXPORT void +@TYPE@_isnat(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +@TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + +/**begin repeat1 + * #kind = equal, not_equal, greater, greater_equal, less, less_equal# + * #OP = ==, !=, >, >=, <, <=# + */ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +/**begin repeat1 + * #kind = maximum, minimum# + * #OP = >, <# + **/ +NPY_NO_EXPORT void +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ + +/**end repeat**/ + +NPY_NO_EXPORT void +DATETIME_Mm_M_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); + +NPY_NO_EXPORT void +DATETIME_mM_M_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_mm_m_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +DATETIME_Mm_M_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +DATETIME_MM_m_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_mm_m_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_mq_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_qm_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_md_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_dm_m_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_mq_m_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_md_m_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_mm_d_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_mm_q_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_mm_m_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +TIMEDELTA_mm_qm_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/* Special case equivalents to above functions */ + +#define TIMEDELTA_mq_m_true_divide TIMEDELTA_mq_m_divide +#define TIMEDELTA_md_m_true_divide TIMEDELTA_md_m_divide +#define TIMEDELTA_mm_d_true_divide TIMEDELTA_mm_d_divide +#define TIMEDELTA_mq_m_floor_divide TIMEDELTA_mq_m_divide +#define TIMEDELTA_md_m_floor_divide TIMEDELTA_md_m_divide +/* #define TIMEDELTA_mm_d_floor_divide TIMEDELTA_mm_d_divide */ +#define TIMEDELTA_fmin TIMEDELTA_minimum +#define TIMEDELTA_fmax TIMEDELTA_maximum +#define DATETIME_fmin DATETIME_minimum +#define DATETIME_fmax DATETIME_maximum + +/* + ***************************************************************************** + ** OBJECT LOOPS ** + ***************************************************************************** + */ + +/**begin repeat + * #kind = equal, not_equal, greater, greater_equal, less, less_equal# + * #OP = EQ, NE, GT, GE, LT, LE# + */ +/**begin repeat1 + * #suffix = , _OO_O# + */ +NPY_NO_EXPORT void +OBJECT@suffix@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat1**/ +/**end repeat**/ + +NPY_NO_EXPORT void +OBJECT_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +/* + ***************************************************************************** + ** END LOOPS ** + ***************************************************************************** + */ + +#endif diff --git a/contrib/python/numpy/py2/numpy/core/src/umath/matmul.c.src b/contrib/python/numpy/py2/numpy/core/src/umath/matmul.c.src new file mode 100644 index 0000000000..bc00d3562d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/umath/matmul.c.src @@ -0,0 +1,503 @@ +/* -*- c -*- */ + +#define _UMATHMODULE +#define _MULTIARRAYMODULE +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "Python.h" + +#include "npy_config.h" +#include "numpy/npy_common.h" +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" +#include "numpy/npy_math.h" +#include "numpy/halffloat.h" +#include "lowlevel_strided_loops.h" + +#include "npy_pycompat.h" + +#include "npy_cblas.h" +#include "arraytypes.h" /* For TYPE_dot functions */ + +#include <assert.h> + +/* + ***************************************************************************** + ** BASICS ** + ***************************************************************************** + */ + +/* + * -1 to be conservative, in case blas internally uses a for loop with an + * inclusive upper bound + */ +#define BLAS_MAXSIZE (NPY_MAX_INT - 1) + +/* + * Determine if a 2d matrix can be used by BLAS + * 1. Strides must not alias or overlap + * 2. The faster (second) axis must be contiguous + * 3. The slower (first) axis stride, in unit steps, must be larger than + * the faster axis dimension + */ +static NPY_INLINE npy_bool +is_blasable2d(npy_intp byte_stride1, npy_intp byte_stride2, + npy_intp d1, npy_intp d2, npy_intp itemsize) +{ + npy_intp unit_stride1 = byte_stride1 / itemsize; + if (byte_stride2 != itemsize) { + return NPY_FALSE; + } + if ((byte_stride1 % itemsize ==0) && + (unit_stride1 >= d2) && + (unit_stride1 <= BLAS_MAXSIZE)) + { + return NPY_TRUE; + } + return NPY_FALSE; +} + +#if defined(HAVE_CBLAS) +static const npy_cdouble oneD = {1.0, 0.0}, zeroD = {0.0, 0.0}; +static const npy_cfloat oneF = {1.0, 0.0}, zeroF = {0.0, 0.0}; + +/**begin repeat + * + * #name = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + * #ctype = npy_float, npy_double, npy_cfloat, npy_cdouble# + * #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# + * #prefix = s, d, c, z# + * #step1 = 1.F, 1., &oneF, &oneD# + * #step0 = 0.F, 0., &zeroF, &zeroD# + */ +NPY_NO_EXPORT void +@name@_gemv(void *ip1, npy_intp is1_m, npy_intp is1_n, + void *ip2, npy_intp is2_n, npy_intp NPY_UNUSED(is2_p), + void *op, npy_intp op_m, npy_intp NPY_UNUSED(op_p), + npy_intp m, npy_intp n, npy_intp NPY_UNUSED(p)) +{ + /* + * Vector matrix multiplication -- Level 2 BLAS + * arguments + * ip1: contiguous data, m*n shape + * ip2: data in c order, n*1 shape + * op: data in c order, m shape + */ + enum CBLAS_ORDER order; + int M, N, lda; + + assert(m <= BLAS_MAXSIZE && n <= BLAS_MAXSIZE); + assert (is_blasable2d(is2_n, sizeof(@typ@), n, 1, sizeof(@typ@))); + M = (int)m; + N = (int)n; + + if (is_blasable2d(is1_m, is1_n, m, n, sizeof(@typ@))) { + order = CblasColMajor; + lda = (int)(is1_m / sizeof(@typ@)); + } + else { + /* If not ColMajor, caller should have ensured we are RowMajor */ + /* will not assert in release mode */ + order = CblasRowMajor; + assert(is_blasable2d(is1_n, is1_m, n, m, sizeof(@typ@))); + lda = (int)(is1_n / sizeof(@typ@)); + } + cblas_@prefix@gemv(order, CblasTrans, N, M, @step1@, ip1, lda, ip2, + is2_n / sizeof(@typ@), @step0@, op, op_m / sizeof(@typ@)); +} + +NPY_NO_EXPORT void +@name@_matmul_matrixmatrix(void *ip1, npy_intp is1_m, npy_intp is1_n, + void *ip2, npy_intp is2_n, npy_intp is2_p, + void *op, npy_intp os_m, npy_intp os_p, + npy_intp m, npy_intp n, npy_intp p) +{ + /* + * matrix matrix multiplication -- Level 3 BLAS + */ + enum CBLAS_ORDER order = CblasRowMajor; + enum CBLAS_TRANSPOSE trans1, trans2; + int M, N, P, lda, ldb, ldc; + assert(m <= BLAS_MAXSIZE && n <= BLAS_MAXSIZE && p <= BLAS_MAXSIZE); + M = (int)m; + N = (int)n; + P = (int)p; + + assert(is_blasable2d(os_m, os_p, m, p, sizeof(@typ@))); + ldc = (int)(os_m / sizeof(@typ@)); + + if (is_blasable2d(is1_m, is1_n, m, n, sizeof(@typ@))) { + trans1 = CblasNoTrans; + lda = (int)(is1_m / sizeof(@typ@)); + } + else { + /* If not ColMajor, caller should have ensured we are RowMajor */ + /* will not assert in release mode */ + assert(is_blasable2d(is1_n, is1_m, n, m, sizeof(@typ@))); + trans1 = CblasTrans; + lda = (int)(is1_n / sizeof(@typ@)); + } + + if (is_blasable2d(is2_n, is2_p, n, p, sizeof(@typ@))) { + trans2 = CblasNoTrans; + ldb = (int)(is2_n / sizeof(@typ@)); + } + else { + /* If not ColMajor, caller should have ensured we are RowMajor */ + /* will not assert in release mode */ + assert(is_blasable2d(is2_p, is2_n, p, n, sizeof(@typ@))); + trans2 = CblasTrans; + ldb = (int)(is2_p / sizeof(@typ@)); + } + /* + * Use syrk if we have a case of a matrix times its transpose. + * Otherwise, use gemm for all other cases. + */ + if ( + (ip1 == ip2) && + (m == p) && + (is1_m == is2_p) && + (is1_n == is2_n) && + (trans1 != trans2) + ) { + npy_intp i,j; + if (trans1 == CblasNoTrans) { + cblas_@prefix@syrk(order, CblasUpper, trans1, P, N, @step1@, + ip1, lda, @step0@, op, ldc); + } + else { + cblas_@prefix@syrk(order, CblasUpper, trans1, P, N, @step1@, + ip1, ldb, @step0@, op, ldc); + } + /* Copy the triangle */ + for (i = 0; i < P; i++) { + for (j = i + 1; j < P; j++) { + ((@typ@*)op)[j * ldc + i] = ((@typ@*)op)[i * ldc + j]; + } + } + + } + else { + cblas_@prefix@gemm(order, trans1, trans2, M, P, N, @step1@, ip1, lda, + ip2, ldb, @step0@, op, ldc); + } +} + +/**end repeat**/ +#endif + +/* + * matmul loops + * signature is (m?,n),(n,p?)->(m?,p?) + */ + +/**begin repeat + * #TYPE = LONGDOUBLE, + * FLOAT, DOUBLE, HALF, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG# + * #typ = npy_longdouble, + * npy_float,npy_double,npy_half, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_byte, npy_short, npy_int, npy_long, npy_longlong# + * #IS_COMPLEX = 0, 0, 0, 0, 1, 1, 1, 0*10# + * #IS_HALF = 0, 0, 0, 1, 0*13# + */ + +NPY_NO_EXPORT void +@TYPE@_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n, + void *_ip2, npy_intp is2_n, npy_intp is2_p, + void *_op, npy_intp os_m, npy_intp os_p, + npy_intp dm, npy_intp dn, npy_intp dp) +{ + npy_intp m, n, p; + npy_intp ib1_n, ib2_n, ib2_p, ob_p; + char *ip1 = (char *)_ip1, *ip2 = (char *)_ip2, *op = (char *)_op; + + ib1_n = is1_n * dn; + ib2_n = is2_n * dn; + ib2_p = is2_p * dp; + ob_p = os_p * dp; + + for (m = 0; m < dm; m++) { + for (p = 0; p < dp; p++) { +#if @IS_COMPLEX@ == 1 + (*(@typ@ *)op).real = 0; + (*(@typ@ *)op).imag = 0; +#elif @IS_HALF@ + float sum = 0; +#else + *(@typ@ *)op = 0; +#endif + for (n = 0; n < dn; n++) { + @typ@ val1 = (*(@typ@ *)ip1); + @typ@ val2 = (*(@typ@ *)ip2); +#if @IS_HALF@ + sum += npy_half_to_float(val1) * npy_half_to_float(val2); +#elif @IS_COMPLEX@ == 1 + (*(@typ@ *)op).real += (val1.real * val2.real) - + (val1.imag * val2.imag); + (*(@typ@ *)op).imag += (val1.real * val2.imag) + + (val1.imag * val2.real); +#else + *(@typ@ *)op += val1 * val2; +#endif + ip2 += is2_n; + ip1 += is1_n; + } +#if @IS_HALF@ + *(@typ@ *)op = npy_float_to_half(sum); +#endif + ip1 -= ib1_n; + ip2 -= ib2_n; + op += os_p; + ip2 += is2_p; + } + op -= ob_p; + ip2 -= ib2_p; + ip1 += is1_m; + op += os_m; + } +} + +/**end repeat**/ +NPY_NO_EXPORT void +BOOL_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n, + void *_ip2, npy_intp is2_n, npy_intp is2_p, + void *_op, npy_intp os_m, npy_intp os_p, + npy_intp dm, npy_intp dn, npy_intp dp) +{ + npy_intp m, n, p; + npy_intp ib2_p, ob_p; + char *ip1 = (char *)_ip1, *ip2 = (char *)_ip2, *op = (char *)_op; + + ib2_p = is2_p * dp; + ob_p = os_p * dp; + + for (m = 0; m < dm; m++) { + for (p = 0; p < dp; p++) { + char *ip1tmp = ip1; + char *ip2tmp = ip2; + *(npy_bool *)op = NPY_FALSE; + for (n = 0; n < dn; n++) { + npy_bool val1 = (*(npy_bool *)ip1tmp); + npy_bool val2 = (*(npy_bool *)ip2tmp); + if (val1 != 0 && val2 != 0) { + *(npy_bool *)op = NPY_TRUE; + break; + } + ip2tmp += is2_n; + ip1tmp += is1_n; + } + op += os_p; + ip2 += is2_p; + } + op -= ob_p; + ip2 -= ib2_p; + ip1 += is1_m; + op += os_m; + } +} + +NPY_NO_EXPORT void +OBJECT_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n, + void *_ip2, npy_intp is2_n, npy_intp is2_p, + void *_op, npy_intp os_m, npy_intp os_p, + npy_intp dm, npy_intp dn, npy_intp dp) +{ + char *ip1 = (char *)_ip1, *ip2 = (char *)_ip2, *op = (char *)_op; + + npy_intp ib1_n = is1_n * dn; + npy_intp ib2_n = is2_n * dn; + npy_intp ib2_p = is2_p * dp; + npy_intp ob_p = os_p * dp; + npy_intp m, p, n; + + PyObject *product, *sum_of_products = NULL; + + for (m = 0; m < dm; m++) { + for (p = 0; p < dp; p++) { + if ( 0 == dn ) { + sum_of_products = PyLong_FromLong(0); + if (sum_of_products == NULL) { + return; + } + } + + for (n = 0; n < dn; n++) { + PyObject *obj1 = *(PyObject**)ip1, *obj2 = *(PyObject**)ip2; + if (obj1 == NULL) { + obj1 = Py_None; + } + if (obj2 == NULL) { + obj2 = Py_None; + } + + product = PyNumber_Multiply(obj1, obj2); + if (product == NULL) { + Py_XDECREF(sum_of_products); + return; + } + + if (n == 0) { + sum_of_products = product; + } + else { + Py_SETREF(sum_of_products, PyNumber_Add(sum_of_products, product)); + Py_DECREF(product); + if (sum_of_products == NULL) { + return; + } + } + + ip2 += is2_n; + ip1 += is1_n; + } + + *((PyObject **)op) = sum_of_products; + ip1 -= ib1_n; + ip2 -= ib2_n; + op += os_p; + ip2 += is2_p; + } + op -= ob_p; + ip2 -= ib2_p; + ip1 += is1_m; + op += os_m; + } +} + + +/**begin repeat + * #TYPE = FLOAT, DOUBLE, LONGDOUBLE, HALF, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * BOOL, OBJECT# + * #typ = npy_float,npy_double,npy_longdouble, npy_half, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_bool,npy_object# + * #IS_COMPLEX = 0, 0, 0, 0, 1, 1, 1, 0*12# + * #USEBLAS = 1, 1, 0, 0, 1, 1, 0*13# + */ + + +NPY_NO_EXPORT void +@TYPE@_matmul(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + npy_intp dOuter = *dimensions++; + npy_intp iOuter; + npy_intp s0 = *steps++; + npy_intp s1 = *steps++; + npy_intp s2 = *steps++; + npy_intp dm = dimensions[0]; + npy_intp dn = dimensions[1]; + npy_intp dp = dimensions[2]; + npy_intp is1_m=steps[0], is1_n=steps[1], is2_n=steps[2], is2_p=steps[3], + os_m=steps[4], os_p=steps[5]; +#if @USEBLAS@ && defined(HAVE_CBLAS) + npy_intp sz = sizeof(@typ@); + npy_bool special_case = (dm == 1 || dn == 1 || dp == 1); + npy_bool any_zero_dim = (dm == 0 || dn == 0 || dp == 0); + npy_bool scalar_out = (dm == 1 && dp == 1); + npy_bool scalar_vec = (dn == 1 && (dp == 1 || dm == 1)); + npy_bool too_big_for_blas = (dm > BLAS_MAXSIZE || dn > BLAS_MAXSIZE || + dp > BLAS_MAXSIZE); + npy_bool i1_c_blasable = is_blasable2d(is1_m, is1_n, dm, dn, sz); + npy_bool i2_c_blasable = is_blasable2d(is2_n, is2_p, dn, dp, sz); + npy_bool i1_f_blasable = is_blasable2d(is1_n, is1_m, dn, dm, sz); + npy_bool i2_f_blasable = is_blasable2d(is2_p, is2_n, dp, dn, sz); + npy_bool i1blasable = i1_c_blasable || i1_f_blasable; + npy_bool i2blasable = i2_c_blasable || i2_f_blasable; + npy_bool o_c_blasable = is_blasable2d(os_m, os_p, dm, dp, sz); + npy_bool o_f_blasable = is_blasable2d(os_p, os_m, dp, dm, sz); + npy_bool vector_matrix = ((dm == 1) && i2blasable && + is_blasable2d(is1_n, sz, dn, 1, sz)); + npy_bool matrix_vector = ((dp == 1) && i1blasable && + is_blasable2d(is2_n, sz, dn, 1, sz)); +#endif + + for (iOuter = 0; iOuter < dOuter; iOuter++, + args[0] += s0, args[1] += s1, args[2] += s2) { + void *ip1=args[0], *ip2=args[1], *op=args[2]; +#if @USEBLAS@ && defined(HAVE_CBLAS) + /* + * TODO: refactor this out to a inner_loop_selector, in + * PyUFunc_MatmulLoopSelector. But that call does not have access to + * n, m, p and strides. + */ + if (too_big_for_blas || any_zero_dim) { + @TYPE@_matmul_inner_noblas(ip1, is1_m, is1_n, + ip2, is2_n, is2_p, + op, os_m, os_p, dm, dn, dp); + } + else if (special_case) { + /* Special case variants that have a 1 in the core dimensions */ + if (scalar_out) { + /* row @ column, 1,1 output */ + @TYPE@_dot(ip1, is1_n, ip2, is2_n, op, dn, NULL); + } else if (scalar_vec){ + /* + * 1,1d @ vector or vector @ 1,1d + * could use cblas_Xaxy, but that requires 0ing output + * and would not be faster (XXX prove it) + */ + @TYPE@_matmul_inner_noblas(ip1, is1_m, is1_n, + ip2, is2_n, is2_p, + op, os_m, os_p, dm, dn, dp); + } else if (vector_matrix) { + /* vector @ matrix, switch ip1, ip2, p and m */ + @TYPE@_gemv(ip2, is2_p, is2_n, ip1, is1_n, is1_m, + op, os_p, os_m, dp, dn, dm); + } else if (matrix_vector) { + /* matrix @ vector */ + @TYPE@_gemv(ip1, is1_m, is1_n, ip2, is2_n, is2_p, + + op, os_m, os_p, dm, dn, dp); + } else { + /* column @ row, 2d output, no blas needed or non-blas-able input */ + @TYPE@_matmul_inner_noblas(ip1, is1_m, is1_n, + ip2, is2_n, is2_p, + op, os_m, os_p, dm, dn, dp); + } + } else { + /* matrix @ matrix */ + if (i1blasable && i2blasable && o_c_blasable) { + @TYPE@_matmul_matrixmatrix(ip1, is1_m, is1_n, + ip2, is2_n, is2_p, + op, os_m, os_p, + dm, dn, dp); + } else if (i1blasable && i2blasable && o_f_blasable) { + /* + * Use transpose equivalence: + * matmul(a, b, o) == matmul(b.T, a.T, o.T) + */ + @TYPE@_matmul_matrixmatrix(ip2, is2_p, is2_n, + ip1, is1_n, is1_m, + op, os_p, os_m, + dp, dn, dm); + } else { + /* + * If parameters are castable to int and we copy the + * non-blasable (or non-ccontiguous output) + * we could still use BLAS, see gh-12365. + */ + @TYPE@_matmul_inner_noblas(ip1, is1_m, is1_n, + ip2, is2_n, is2_p, + op, os_m, os_p, dm, dn, dp); + } + } +#else + @TYPE@_matmul_inner_noblas(ip1, is1_m, is1_n, + ip2, is2_n, is2_p, + op, os_m, os_p, dm, dn, dp); + +#endif + } +} + +/**end repeat**/ diff --git a/contrib/python/numpy/py2/numpy/core/src/umath/matmul.h.src b/contrib/python/numpy/py2/numpy/core/src/umath/matmul.h.src new file mode 100644 index 0000000000..a664b1b4e1 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/umath/matmul.h.src @@ -0,0 +1,12 @@ +/**begin repeat + * #TYPE = FLOAT, DOUBLE, LONGDOUBLE, HALF, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * BOOL, OBJECT# + **/ +NPY_NO_EXPORT void +@TYPE@_matmul(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat**/ + + diff --git a/contrib/python/numpy/py2/numpy/core/src/umath/scalarmath.c.src b/contrib/python/numpy/py2/numpy/core/src/umath/scalarmath.c.src new file mode 100644 index 0000000000..a7987acda0 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/umath/scalarmath.c.src @@ -0,0 +1,1704 @@ +/* -*- c -*- */ + +/* The purpose of this module is to add faster math for array scalars + that does not go through the ufunc machinery + + but still supports error-modes. +*/ + +#define _UMATHMODULE +#define _MULTIARRAYMODULE +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "Python.h" +#include "npy_config.h" +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" +#include "numpy/arrayscalars.h" + +#include "npy_pycompat.h" + +#include "numpy/halffloat.h" +#include "templ_common.h" + +#include "binop_override.h" +#include "npy_longdouble.h" + +/* Basic operations: + * + * BINARY: + * + * add, subtract, multiply, divide, remainder, divmod, power, + * floor_divide, true_divide + * + * lshift, rshift, and, or, xor (integers only) + * + * UNARY: + * + * negative, positive, absolute, nonzero, invert, int, long, float, oct, hex + * + */ + +/**begin repeat + * #name = byte, short, int, long, longlong# + * #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong# + */ +static void +@name@_ctype_add(@type@ a, @type@ b, @type@ *out) { + *out = a + b; + if ((*out^a) >= 0 || (*out^b) >= 0) { + return; + } + npy_set_floatstatus_overflow(); + return; +} +static void +@name@_ctype_subtract(@type@ a, @type@ b, @type@ *out) { + *out = a - b; + if ((*out^a) >= 0 || (*out^~b) >= 0) { + return; + } + npy_set_floatstatus_overflow(); + return; +} +/**end repeat**/ + +/**begin repeat + * #name = ubyte, ushort, uint, ulong, ulonglong# + * #type = npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong# + */ +static void +@name@_ctype_add(@type@ a, @type@ b, @type@ *out) { + *out = a + b; + if (*out >= a && *out >= b) { + return; + } + npy_set_floatstatus_overflow(); + return; +} +static void +@name@_ctype_subtract(@type@ a, @type@ b, @type@ *out) { + *out = a - b; + if (a >= b) { + return; + } + npy_set_floatstatus_overflow(); + return; +} +/**end repeat**/ + +#ifndef NPY_SIZEOF_BYTE +#define NPY_SIZEOF_BYTE 1 +#endif + +/**begin repeat + * + * #name = byte, ubyte, short, ushort, + * int, uint, long, ulong# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, + * npy_int, npy_uint, npy_long, npy_ulong# + * #big = npy_int, npy_uint, npy_int, npy_uint, + * npy_longlong, npy_ulonglong, npy_longlong, npy_ulonglong# + * #NAME = BYTE, UBYTE, SHORT, USHORT, + * INT, UINT, LONG, ULONG# + * #SIZENAME = BYTE*2, SHORT*2, INT*2, LONG*2# + * #SIZE = INT*4,LONGLONG*4# + * #neg = (1,0)*4# + */ +#if NPY_SIZEOF_@SIZE@ > NPY_SIZEOF_@SIZENAME@ +static void +@name@_ctype_multiply(@type@ a, @type@ b, @type@ *out) { + @big@ temp; + temp = ((@big@) a) * ((@big@) b); + *out = (@type@) temp; +#if @neg@ + if (temp > NPY_MAX_@NAME@ || temp < NPY_MIN_@NAME@) +#else + if (temp > NPY_MAX_@NAME@) +#endif + npy_set_floatstatus_overflow(); + return; +} +#endif +/**end repeat**/ + +/**begin repeat + * + * #name = int, uint, long, ulong, + * longlong, ulonglong# + * #type = npy_int, npy_uint, npy_long, npy_ulong, + * npy_longlong, npy_ulonglong# + * #SIZE = INT*2, LONG*2, LONGLONG*2# + */ +#if NPY_SIZEOF_LONGLONG == NPY_SIZEOF_@SIZE@ +static void +@name@_ctype_multiply(@type@ a, @type@ b, @type@ *out) { + if (npy_mul_with_overflow_@name@(out, a, b)) { + npy_set_floatstatus_overflow(); + } + return; +} +#endif +/**end repeat**/ + +/**begin repeat + * + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + * #neg = (1,0)*5# + */ +static void +@name@_ctype_divide(@type@ a, @type@ b, @type@ *out) { + if (b == 0) { + npy_set_floatstatus_divbyzero(); + *out = 0; + } +#if @neg@ + else if (b == -1 && a < 0 && a == -a) { + npy_set_floatstatus_overflow(); + *out = a / b; + } +#endif + else { +#if @neg@ + @type@ tmp; + tmp = a / b; + if (((a > 0) != (b > 0)) && (a % b != 0)) { + tmp--; + } + *out = tmp; +#else + *out = a / b; +#endif + } +} + +#define @name@_ctype_floor_divide @name@_ctype_divide + +static void +@name@_ctype_remainder(@type@ a, @type@ b, @type@ *out) { + if (a == 0 || b == 0) { + if (b == 0) npy_set_floatstatus_divbyzero(); + *out = 0; + return; + } +#if @neg@ + else if ((a > 0) == (b > 0)) { + *out = a % b; + } + else { + /* handled like Python does */ + *out = a % b; + if (*out) *out += b; + } +#else + *out = a % b; +#endif +} +/**end repeat**/ + +/**begin repeat + * + * #name = byte, ubyte, short, ushort, int, uint, long, + * ulong, longlong, ulonglong# + * #otyp = npy_float*4, npy_double*6# + */ +#define @name@_ctype_true_divide(a, b, out) \ + *(out) = ((@otyp@) (a)) / ((@otyp@) (b)); +/**end repeat**/ + +/* b will always be positive in this call */ +/**begin repeat + * + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + * #upc = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG# + */ +static void +@name@_ctype_power(@type@ a, @type@ b, @type@ *out) { + @type@ tmp; + + if (b == 0) { + *out = 1; + return; + } + if (a == 1) { + *out = 1; + return; + } + + tmp = b & 1 ? a : 1; + b >>= 1; + while (b > 0) { + a *= a; + if (b & 1) { + tmp *= a; + } + b >>= 1; + } + *out = tmp; +} +/**end repeat**/ + + + +/* QUESTION: Should we check for overflow / underflow in (l,r)shift? */ + +/**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + */ + +/**begin repeat1 + * #oper = and, xor, or, lshift, rshift# + * #op = &, ^, |, <<, >># + */ + +#define @name@_ctype_@oper@(arg1, arg2, out) *(out) = (arg1) @op@ (arg2) + +/**end repeat1**/ + +/**end repeat**/ + +/**begin repeat + * #name = float, double, longdouble# + * #type = npy_float, npy_double, npy_longdouble# + * #c = f, , l# + */ +#define @name@_ctype_add(a, b, outp) *(outp) = (a) + (b) +#define @name@_ctype_subtract(a, b, outp) *(outp) = (a) - (b) +#define @name@_ctype_multiply(a, b, outp) *(outp) = (a) * (b) +#define @name@_ctype_divide(a, b, outp) *(outp) = (a) / (b) +#define @name@_ctype_true_divide @name@_ctype_divide + + +static void +@name@_ctype_floor_divide(@type@ a, @type@ b, @type@ *out) { + @type@ mod; + + *out = npy_divmod@c@(a, b, &mod); +} + + +static void +@name@_ctype_remainder(@type@ a, @type@ b, @type@ *out) { + npy_divmod@c@(a, b, out); +} + + +static void +@name@_ctype_divmod(@type@ a, @type@ b, @type@ *out1, @type@ *out2) { + *out1 = npy_divmod@c@(a, b, out2); +} + + +/**end repeat**/ + +#define half_ctype_add(a, b, outp) *(outp) = \ + npy_float_to_half(npy_half_to_float(a) + npy_half_to_float(b)) +#define half_ctype_subtract(a, b, outp) *(outp) = \ + npy_float_to_half(npy_half_to_float(a) - npy_half_to_float(b)) +#define half_ctype_multiply(a, b, outp) *(outp) = \ + npy_float_to_half(npy_half_to_float(a) * npy_half_to_float(b)) +#define half_ctype_divide(a, b, outp) *(outp) = \ + npy_float_to_half(npy_half_to_float(a) / npy_half_to_float(b)) +#define half_ctype_true_divide half_ctype_divide + + +static void +half_ctype_floor_divide(npy_half a, npy_half b, npy_half *out) { + npy_half mod; + + *out = npy_half_divmod(a, b, &mod); +} + + +static void +half_ctype_remainder(npy_half a, npy_half b, npy_half *out) { + npy_half_divmod(a, b, out); +} + + +static void +half_ctype_divmod(npy_half a, npy_half b, npy_half *out1, npy_half *out2) { + *out1 = npy_half_divmod(a, b, out2); +} + +/**begin repeat + * #name = cfloat, cdouble, clongdouble# + * #rname = float, double, longdouble# + * #rtype = npy_float, npy_double, npy_longdouble# + * #c = f,,l# + */ +#define @name@_ctype_add(a, b, outp) do{ \ + (outp)->real = (a).real + (b).real; \ + (outp)->imag = (a).imag + (b).imag; \ + } while(0) +#define @name@_ctype_subtract(a, b, outp) do{ \ + (outp)->real = (a).real - (b).real; \ + (outp)->imag = (a).imag - (b).imag; \ + } while(0) +#define @name@_ctype_multiply(a, b, outp) do{ \ + (outp)->real = (a).real * (b).real - (a).imag * (b).imag; \ + (outp)->imag = (a).real * (b).imag + (a).imag * (b).real; \ + } while(0) +/* Algorithm identical to that in loops.c.src, for consistency */ +#define @name@_ctype_divide(a, b, outp) do{ \ + @rtype@ in1r = (a).real; \ + @rtype@ in1i = (a).imag; \ + @rtype@ in2r = (b).real; \ + @rtype@ in2i = (b).imag; \ + @rtype@ in2r_abs = npy_fabs@c@(in2r); \ + @rtype@ in2i_abs = npy_fabs@c@(in2i); \ + if (in2r_abs >= in2i_abs) { \ + if (in2r_abs == 0 && in2i_abs == 0) { \ + /* divide by zero should yield a complex inf or nan */ \ + (outp)->real = in1r/in2r_abs; \ + (outp)->imag = in1i/in2i_abs; \ + } \ + else { \ + @rtype@ rat = in2i/in2r; \ + @rtype@ scl = 1.0@c@/(in2r + in2i*rat); \ + (outp)->real = (in1r + in1i*rat)*scl; \ + (outp)->imag = (in1i - in1r*rat)*scl; \ + } \ + } \ + else { \ + @rtype@ rat = in2r/in2i; \ + @rtype@ scl = 1.0@c@/(in2i + in2r*rat); \ + (outp)->real = (in1r*rat + in1i)*scl; \ + (outp)->imag = (in1i*rat - in1r)*scl; \ + } \ + } while(0) + +#define @name@_ctype_true_divide @name@_ctype_divide + +#define @name@_ctype_floor_divide(a, b, outp) do { \ + @rname@_ctype_floor_divide( \ + ((a).real*(b).real + (a).imag*(b).imag), \ + ((b).real*(b).real + (b).imag*(b).imag), \ + &((outp)->real)); \ + (outp)->imag = 0; \ + } while(0) +/**end repeat**/ + + + +/**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, cfloat, cdouble, clongdouble# + */ +#define @name@_ctype_divmod(a, b, out, out2) { \ + @name@_ctype_floor_divide(a, b, out); \ + @name@_ctype_remainder(a, b, out2); \ + } +/**end repeat**/ + + +/**begin repeat + * #name = float, double, longdouble# + * #type = npy_float, npy_double, npy_longdouble# + */ +static npy_@name@ (*_basic_@name@_pow)(@type@ a, @type@ b); + +static void +@name@_ctype_power(@type@ a, @type@ b, @type@ *out) +{ + *out = _basic_@name@_pow(a, b); +} +/**end repeat**/ +static void +half_ctype_power(npy_half a, npy_half b, npy_half *out) +{ + const npy_float af = npy_half_to_float(a); + const npy_float bf = npy_half_to_float(b); + const npy_float outf = _basic_float_pow(af,bf); + *out = npy_float_to_half(outf); +} + +/**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * float, double, longdouble# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_float, npy_double, npy_longdouble# + * #uns = (0,1)*5,0*3# + */ +static void +@name@_ctype_negative(@type@ a, @type@ *out) +{ +#if @uns@ + npy_set_floatstatus_overflow(); +#endif + *out = -a; +} +/**end repeat**/ + +static void +half_ctype_negative(npy_half a, npy_half *out) +{ + *out = a^0x8000u; +} + + +/**begin repeat + * #name = cfloat, cdouble, clongdouble# + * #type = npy_cfloat, npy_cdouble, npy_clongdouble# + */ +static void +@name@_ctype_negative(@type@ a, @type@ *out) +{ + out->real = -a.real; + out->imag = -a.imag; +} +/**end repeat**/ + +/**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * half, float, double, longdouble# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble# + */ +static void +@name@_ctype_positive(@type@ a, @type@ *out) +{ + *out = a; +} +/**end repeat**/ + +/* + * Get the nc_powf, nc_pow, and nc_powl functions from + * the data area of the power ufunc in umathmodule. + */ + +/**begin repeat + * #name = cfloat, cdouble, clongdouble# + * #type = npy_cfloat, npy_cdouble, npy_clongdouble# + */ +static void +@name@_ctype_positive(@type@ a, @type@ *out) +{ + out->real = a.real; + out->imag = a.imag; +} + +static void (*_basic_@name@_pow)(@type@ *, @type@ *, @type@ *); + +static void +@name@_ctype_power(@type@ a, @type@ b, @type@ *out) +{ + _basic_@name@_pow(&a, &b, out); +} +/**end repeat**/ + + +/**begin repeat + * #name = ubyte, ushort, uint, ulong, ulonglong# + */ + +#define @name@_ctype_absolute @name@_ctype_positive + +/**end repeat**/ + + +/**begin repeat + * #name = byte, short, int, long, longlong# + * #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong# + */ +static void +@name@_ctype_absolute(@type@ a, @type@ *out) +{ + *out = (a < 0 ? -a : a); +} +/**end repeat**/ + +/**begin repeat + * #name = float, double, longdouble# + * #type = npy_float, npy_double, npy_longdouble# + * #c = f,,l# + */ +static void +@name@_ctype_absolute(@type@ a, @type@ *out) +{ + *out = npy_fabs@c@(a); +} +/**end repeat**/ + +static void +half_ctype_absolute(npy_half a, npy_half *out) +{ + *out = a&0x7fffu; +} + +/**begin repeat + * #name = cfloat, cdouble, clongdouble# + * #type = npy_cfloat, npy_cdouble, npy_clongdouble# + * #rtype = npy_float, npy_double, npy_longdouble# + * #c = f,,l# + */ +static void +@name@_ctype_absolute(@type@ a, @rtype@ *out) +{ + *out = npy_cabs@c@(a); +} +/**end repeat**/ + +/**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, long, + * ulong, longlong, ulonglong# + */ + +#define @name@_ctype_invert(a, out) *(out) = ~a; + +/**end repeat**/ + +/*** END OF BASIC CODE **/ + + +/* The general strategy for commutative binary operators is to + * + * 1) Convert the types to the common type if both are scalars (0 return) + * 2) If both are not scalars use ufunc machinery (-2 return) + * 3) If both are scalars but cannot be cast to the right type + * return NotImplmented (-1 return) + * + * 4) Perform the function on the C-type. + * 5) If an error condition occurred, check to see + * what the current error-handling is and handle the error. + * + * 6) Construct and return the output scalar. + */ + +/**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * half, float, longdouble, + * cfloat, cdouble, clongdouble# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble# + * #Name = Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong, LongLong, ULongLong, + * Half, Float, LongDouble, + * CFloat, CDouble, CLongDouble# + * #TYPE = NPY_BYTE, NPY_UBYTE, NPY_SHORT, NPY_USHORT, NPY_INT, NPY_UINT, + * NPY_LONG, NPY_ULONG, NPY_LONGLONG, NPY_ULONGLONG, + * NPY_HALF, NPY_FLOAT, NPY_LONGDOUBLE, + * NPY_CFLOAT, NPY_CDOUBLE, NPY_CLONGDOUBLE# + */ + +static int +_@name@_convert_to_ctype(PyObject *a, @type@ *arg1) +{ + PyObject *temp; + + if (PyArray_IsScalar(a, @Name@)) { + *arg1 = PyArrayScalar_VAL(a, @Name@); + return 0; + } + else if (PyArray_IsScalar(a, Generic)) { + PyArray_Descr *descr1; + + if (!PyArray_IsScalar(a, Number)) { + return -1; + } + descr1 = PyArray_DescrFromTypeObject((PyObject *)Py_TYPE(a)); + if (PyArray_CanCastSafely(descr1->type_num, @TYPE@)) { + PyArray_CastScalarDirect(a, descr1, arg1, @TYPE@); + Py_DECREF(descr1); + return 0; + } + else { + Py_DECREF(descr1); + return -1; + } + } + else if (PyArray_GetPriority(a, NPY_PRIORITY) > NPY_PRIORITY) { + return -2; + } + else if ((temp = PyArray_ScalarFromObject(a)) != NULL) { + int retval = _@name@_convert_to_ctype(temp, arg1); + + Py_DECREF(temp); + return retval; + } + return -2; +} + +/**end repeat**/ + + +/* Same as above but added exact checks against known python types for speed */ + +/**begin repeat + * #name = double# + * #type = npy_double# + * #Name = Double# + * #TYPE = NPY_DOUBLE# + * #PYCHECKEXACT = PyFloat_CheckExact# + * #PYEXTRACTCTYPE = PyFloat_AS_DOUBLE# + */ + +static int +_@name@_convert_to_ctype(PyObject *a, @type@ *arg1) +{ + PyObject *temp; + + if (@PYCHECKEXACT@(a)){ + *arg1 = @PYEXTRACTCTYPE@(a); + return 0; + } + + if (PyArray_IsScalar(a, @Name@)) { + *arg1 = PyArrayScalar_VAL(a, @Name@); + return 0; + } + else if (PyArray_IsScalar(a, Generic)) { + PyArray_Descr *descr1; + + if (!PyArray_IsScalar(a, Number)) { + return -1; + } + descr1 = PyArray_DescrFromTypeObject((PyObject *)Py_TYPE(a)); + if (PyArray_CanCastSafely(descr1->type_num, @TYPE@)) { + PyArray_CastScalarDirect(a, descr1, arg1, @TYPE@); + Py_DECREF(descr1); + return 0; + } + else { + Py_DECREF(descr1); + return -1; + } + } + else if (PyArray_GetPriority(a, NPY_PRIORITY) > NPY_PRIORITY) { + return -2; + } + else if ((temp = PyArray_ScalarFromObject(a)) != NULL) { + int retval = _@name@_convert_to_ctype(temp, arg1); + + Py_DECREF(temp); + return retval; + } + return -2; +} + +/**end repeat**/ + + +/**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * half, float, double, cfloat, cdouble# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_cfloat, npy_cdouble# + */ +static int +_@name@_convert2_to_ctypes(PyObject *a, @type@ *arg1, + PyObject *b, @type@ *arg2) +{ + int ret; + ret = _@name@_convert_to_ctype(a, arg1); + if (ret < 0) { + return ret; + } + ret = _@name@_convert_to_ctype(b, arg2); + if (ret < 0) { + return ret; + } + return 0; +} +/**end repeat**/ + +/**begin repeat + * #name = longdouble, clongdouble# + * #type = npy_longdouble, npy_clongdouble# + */ + +static int +_@name@_convert2_to_ctypes(PyObject *a, @type@ *arg1, + PyObject *b, @type@ *arg2) +{ + int ret; + ret = _@name@_convert_to_ctype(a, arg1); + if (ret < 0) { + return ret; + } + ret = _@name@_convert_to_ctype(b, arg2); + if (ret == -2) { + ret = -3; + } + if (ret < 0) { + return ret; + } + return 0; +} + +/**end repeat**/ + + +#if defined(NPY_PY3K) +#define CODEGEN_SKIP_divide_FLAG +#endif + +/**begin repeat + * + * #name = (byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong)*13, + * (half, float, double, longdouble, + * cfloat, cdouble, clongdouble)*6, + * (half, float, double, longdouble)*2# + * #Name = (Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong,LongLong,ULongLong)*13, + * (Half, Float, Double, LongDouble, + * CFloat, CDouble, CLongDouble)*6, + * (Half, Float, Double, LongDouble)*2# + * #type = (npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong)*13, + * (npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble)*6, + * (npy_half, npy_float, npy_double, npy_longdouble)*2# + * + * #oper = add*10, subtract*10, multiply*10, divide*10, remainder*10, + * divmod*10, floor_divide*10, lshift*10, rshift*10, and*10, + * or*10, xor*10, true_divide*10, + * add*7, subtract*7, multiply*7, divide*7, floor_divide*7, true_divide*7, + * divmod*4, remainder*4# + * + * #fperr = 1*70,0*50,1*10, + * 1*42, + * 1*8# + * #twoout = 0*50,1*10,0*70, + * 0*42, + * 1*4,0*4# + * #otype = (npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong)*12, + * npy_float*4, npy_double*6, + * (npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble)*6, + * (npy_half, npy_float, npy_double, npy_longdouble)*2# + * #OName = (Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong, LongLong, ULongLong)*12, + * Float*4, Double*6, + * (Half, Float, Double, LongDouble, + * CFloat, CDouble, CLongDouble)*6, + * (Half, Float, Double, LongDouble)*2# + */ + +#if !defined(CODEGEN_SKIP_@oper@_FLAG) + +static PyObject * +@name@_@oper@(PyObject *a, PyObject *b) +{ + PyObject *ret; + @type@ arg1, arg2; + /* + * NOTE: In gcc >= 4.1, the compiler will reorder floating point + * operations and floating point error state checks. In + * particular, the arithmetic operations were being reordered + * so that the errors weren't caught. Declaring this output + * variable volatile was the minimal fix for the issue. + * (Ticket #1671) + */ + volatile @otype@ out; +#if @twoout@ + @otype@ out2; + PyObject *obj; +#endif + +#if @fperr@ + int retstatus; + int first; +#endif + + BINOP_GIVE_UP_IF_NEEDED(a, b, nb_@oper@, @name@_@oper@); + + switch(_@name@_convert2_to_ctypes(a, &arg1, b, &arg2)) { + case 0: + break; + case -1: + /* one of them can't be cast safely must be mixed-types*/ + return PyArray_Type.tp_as_number->nb_@oper@(a,b); + case -2: + /* use default handling */ + if (PyErr_Occurred()) { + return NULL; + } + return PyGenericArrType_Type.tp_as_number->nb_@oper@(a,b); + case -3: + /* + * special case for longdouble and clongdouble + * because they have a recursive getitem in their dtype + */ + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + +#if @fperr@ + npy_clear_floatstatus_barrier((char*)&out); +#endif + + /* + * here we do the actual calculation with arg1 and arg2 + * as a function call. + */ +#if @twoout@ + @name@_ctype_@oper@(arg1, arg2, (@otype@ *)&out, &out2); +#else + @name@_ctype_@oper@(arg1, arg2, (@otype@ *)&out); +#endif + +#if @fperr@ + /* Check status flag. If it is set, then look up what to do */ + retstatus = npy_get_floatstatus_barrier((char*)&out); + if (retstatus) { + int bufsize, errmask; + PyObject *errobj; + + if (PyUFunc_GetPyValues("@name@_scalars", &bufsize, &errmask, + &errobj) < 0) { + return NULL; + } + first = 1; + if (PyUFunc_handlefperr(errmask, errobj, retstatus, &first)) { + Py_XDECREF(errobj); + return NULL; + } + Py_XDECREF(errobj); + } +#endif + + +#if @twoout@ + ret = PyTuple_New(2); + if (ret == NULL) { + return NULL; + } + obj = PyArrayScalar_New(@OName@); + if (obj == NULL) { + Py_DECREF(ret); + return NULL; + } + PyArrayScalar_ASSIGN(obj, @OName@, out); + PyTuple_SET_ITEM(ret, 0, obj); + obj = PyArrayScalar_New(@OName@); + if (obj == NULL) { + Py_DECREF(ret); + return NULL; + } + PyArrayScalar_ASSIGN(obj, @OName@, out2); + PyTuple_SET_ITEM(ret, 1, obj); +#else + ret = PyArrayScalar_New(@OName@); + if (ret == NULL) { + return NULL; + } + PyArrayScalar_ASSIGN(ret, @OName@, out); +#endif + return ret; +} +#endif + +/**end repeat**/ + +#undef CODEGEN_SKIP_divide_FLAG + +#define _IS_ZERO(x) (x == 0) + +/**begin repeat + * + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble# + * + * #Name = Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong, LongLong, ULongLong, + * Half, Float, Double, LongDouble, + * CFloat, CDouble, CLongDouble# + * + * #oname = float*4, double*6, half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * + * #otype = npy_float*4, npy_double*6, npy_half, npy_float, + * npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble# + * + * #OName = Float*4, Double*6, Half, Float, + * Double, LongDouble, + * CFloat, CDouble, CLongDouble# + * + * #isint = (1,0)*5,0*7# + * #cmplx = 0*14,1*3# + * #iszero = _IS_ZERO*10, npy_half_iszero, _IS_ZERO*6# + * #zero = 0*10, NPY_HALF_ZERO, 0*6# + * #one = 1*10, NPY_HALF_ONE, 1*6# + */ + +#if @cmplx@ +static PyObject * +@name@_power(PyObject *a, PyObject *b, PyObject *modulo) +{ + PyObject *ret; + @type@ arg1, arg2; + int retstatus; + int first; + @type@ out = {@zero@, @zero@}; + + BINOP_GIVE_UP_IF_NEEDED(a, b, nb_power, @name@_power); + + switch(_@name@_convert2_to_ctypes(a, &arg1, b, &arg2)) { + case 0: + break; + case -1: + /* can't cast both safely mixed-types? */ + return PyArray_Type.tp_as_number->nb_power(a,b,modulo); + case -2: + /* use default handling */ + if (PyErr_Occurred()) { + return NULL; + } + return PyGenericArrType_Type.tp_as_number->nb_power(a,b,modulo); + case -3: + default: + /* + * special case for longdouble and clongdouble + * because they have a recursive getitem in their dtype + */ + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + if (modulo != Py_None) { + /* modular exponentiation is not implemented (gh-8804) */ + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + npy_clear_floatstatus_barrier((char*)&out); + + /* + * here we do the actual calculation with arg1 and arg2 + * as a function call. + */ + if (@iszero@(arg2.real) && @iszero@(arg2.imag)) { + out.real = @one@; + out.imag = @zero@; + } + else { + @name@_ctype_power(arg1, arg2, &out); + } + + /* Check status flag. If it is set, then look up what to do */ + retstatus = npy_get_floatstatus_barrier((char*)&out); + if (retstatus) { + int bufsize, errmask; + PyObject *errobj; + + if (PyUFunc_GetPyValues("@name@_scalars", &bufsize, &errmask, + &errobj) < 0) { + return NULL; + } + first = 1; + if (PyUFunc_handlefperr(errmask, errobj, retstatus, &first)) { + Py_XDECREF(errobj); + return NULL; + } + Py_XDECREF(errobj); + } + + ret = PyArrayScalar_New(@Name@); + if (ret == NULL) { + return NULL; + } + PyArrayScalar_ASSIGN(ret, @Name@, out); + + return ret; +} + +#elif @isint@ + +static PyObject * +@name@_power(PyObject *a, PyObject *b, PyObject *modulo) +{ + PyObject *ret; + @type@ arg1, arg2, out; + + BINOP_GIVE_UP_IF_NEEDED(a, b, nb_power, @name@_power); + + switch(_@name@_convert2_to_ctypes(a, &arg1, b, &arg2)) { + case 0: + break; + case -1: + /* can't cast both safely mixed-types? */ + return PyArray_Type.tp_as_number->nb_power(a,b,modulo); + case -2: + /* use default handling */ + if (PyErr_Occurred()) { + return NULL; + } + return PyGenericArrType_Type.tp_as_number->nb_power(a,b,modulo); + case -3: + default: + /* + * special case for longdouble and clongdouble + * because they have a recursive getitem in their dtype + */ + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + if (modulo != Py_None) { + /* modular exponentiation is not implemented (gh-8804) */ + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + npy_clear_floatstatus_barrier((char*)&out); + + /* + * here we do the actual calculation with arg1 and arg2 + * as a function call. + */ + if (arg2 < 0) { + PyErr_SetString(PyExc_ValueError, + "Integers to negative integer powers are not allowed."); + return NULL; + } + @name@_ctype_power(arg1, arg2, &out); + + ret = PyArrayScalar_New(@Name@); + if (ret == NULL) { + return NULL; + } + PyArrayScalar_ASSIGN(ret, @Name@, out); + + return ret; +} + +#else + +static PyObject * +@name@_power(PyObject *a, PyObject *b, PyObject *modulo) +{ + PyObject *ret; + @type@ arg1, arg2; + int retstatus; + int first; + + @type@ out = @zero@; + + BINOP_GIVE_UP_IF_NEEDED(a, b, nb_power, @name@_power); + + switch(_@name@_convert2_to_ctypes(a, &arg1, b, &arg2)) { + case 0: + break; + case -1: + /* can't cast both safely mixed-types? */ + return PyArray_Type.tp_as_number->nb_power(a,b,modulo); + case -2: + /* use default handling */ + if (PyErr_Occurred()) { + return NULL; + } + return PyGenericArrType_Type.tp_as_number->nb_power(a,b,modulo); + case -3: + default: + /* + * special case for longdouble and clongdouble + * because they have a recursive getitem in their dtype + */ + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + if (modulo != Py_None) { + /* modular exponentiation is not implemented (gh-8804) */ + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + npy_clear_floatstatus_barrier((char*)&out); + + /* + * here we do the actual calculation with arg1 and arg2 + * as a function call. + */ + if (@iszero@(arg2)) { + out = @one@; + } + else { + @name@_ctype_power(arg1, arg2, &out); + } + + /* Check status flag. If it is set, then look up what to do */ + retstatus = npy_get_floatstatus_barrier((char*)&out); + if (retstatus) { + int bufsize, errmask; + PyObject *errobj; + + if (PyUFunc_GetPyValues("@name@_scalars", &bufsize, &errmask, + &errobj) < 0) { + return NULL; + } + first = 1; + if (PyUFunc_handlefperr(errmask, errobj, retstatus, &first)) { + Py_XDECREF(errobj); + return NULL; + } + Py_XDECREF(errobj); + } + + ret = PyArrayScalar_New(@Name@); + if (ret == NULL) { + return NULL; + } + PyArrayScalar_ASSIGN(ret, @Name@, out); + + return ret; +} + +#endif + +/**end repeat**/ +#undef _IS_ZERO + + +/**begin repeat + * + * #name = cfloat, cdouble, clongdouble# + * + */ + +/**begin repeat1 + * + * #oper = divmod, remainder# + * + */ + +#define @name@_@oper@ NULL + +/**end repeat1**/ + +/**end repeat**/ + +/**begin repeat + * + * #name = half, float, double, longdouble, cfloat, cdouble, clongdouble# + * + */ + +/**begin repeat1 + * + * #oper = lshift, rshift, and, or, xor# + * + */ + +#define @name@_@oper@ NULL + +/**end repeat1**/ + +/**end repeat**/ + +/**begin repeat + * #name = (byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble)*3, + * + * byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong# + * + * #type = (npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble)*3, + * + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + * + * #otype = (npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble)*2, + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_float, npy_double, npy_longdouble, + * + * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + * + * #OName = (Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong, LongLong, ULongLong, + * Half, Float, Double, LongDouble, + * CFloat, CDouble, CLongDouble)*2, + * Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong, LongLong, ULongLong, + * Half, Float, Double, LongDouble, + * Float, Double, LongDouble, + * + * Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong, LongLong, ULongLong# + * + * #oper = negative*17, positive*17, absolute*17, invert*10# + */ +static PyObject * +@name@_@oper@(PyObject *a) +{ + @type@ arg1; + @otype@ out; + PyObject *ret; + + switch(_@name@_convert_to_ctype(a, &arg1)) { + case 0: + break; + case -1: + /* can't cast both safely use different add function */ + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + case -2: + /* use default handling */ + if (PyErr_Occurred()) { + return NULL; + } + return PyGenericArrType_Type.tp_as_number->nb_@oper@(a); + } + + /* + * here we do the actual calculation with arg1 and arg2 + * make it a function call. + */ + + @name@_ctype_@oper@(arg1, &out); + + ret = PyArrayScalar_New(@OName@); + PyArrayScalar_ASSIGN(ret, @OName@, out); + + return ret; +} +/**end repeat**/ + +/**begin repeat + * + * #name = half, float, double, longdouble, cfloat, cdouble, clongdouble# + */ + +#define @name@_invert NULL + +/**end repeat**/ + +#if defined(NPY_PY3K) +#define NONZERO_NAME(prefix) prefix##bool +#else +#define NONZERO_NAME(prefix) prefix##nonzero +#endif + +#define _IS_NONZERO(x) (x != 0) +/**begin repeat + * + * #name = byte, ubyte, short, ushort, int, + * uint, long, ulong, longlong, ulonglong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, + * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble# + * #simp = 1*14, 0*3# + * #nonzero = _IS_NONZERO*10, !npy_half_iszero, _IS_NONZERO*6# + */ +static int +NONZERO_NAME(@name@_)(PyObject *a) +{ + int ret; + @type@ arg1; + + if (_@name@_convert_to_ctype(a, &arg1) < 0) { + if (PyErr_Occurred()) { + return -1; + } + return PyGenericArrType_Type.tp_as_number->NONZERO_NAME(nb_)(a); + } + + /* + * here we do the actual calculation with arg1 and arg2 + * make it a function call. + */ + +#if @simp@ + ret = @nonzero@(arg1); +#else + ret = (@nonzero@(arg1.real) || @nonzero@(arg1.imag)); +#endif + + return ret; +} +/**end repeat**/ +#undef _IS_NONZERO + + +static int +emit_complexwarning(void) +{ + static PyObject *cls = NULL; + if (cls == NULL) { + PyObject *mod; + mod = PyImport_ImportModule("numpy.core"); + assert(mod != NULL); + cls = PyObject_GetAttrString(mod, "ComplexWarning"); + assert(cls != NULL); + Py_DECREF(mod); + } + return PyErr_WarnEx(cls, + "Casting complex values to real discards the imaginary part", 1); +} + +/**begin repeat + * + * #name = byte, ubyte, short, ushort, int, + * uint, long, ulong, longlong, ulonglong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * + * #Name = Byte, UByte, Short, UShort, Int, + * UInt, Long, ULong, LongLong, ULongLong, + * Half, Float, Double, LongDouble, + * CFloat, CDouble, CLongDouble# + * + * #cmplx = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1# + * #sign = (signed, unsigned)*5, , , , , , , # + * #ctype = long*8, PY_LONG_LONG*2, + * double*3, npy_longdouble, double*2, npy_longdouble# + * #to_ctype = , , , , , , , , , , npy_half_to_double, , , , , , # + * #func = (PyLong_FromLong, PyLong_FromUnsignedLong)*4, + * PyLong_FromLongLong, PyLong_FromUnsignedLongLong, + * PyLong_FromDouble*3, npy_longdouble_to_PyLong, + * PyLong_FromDouble*2, npy_longdouble_to_PyLong# + */ +static PyObject * +@name@_int(PyObject *obj) +{ + PyObject *long_result; + +#if @cmplx@ + @sign@ @ctype@ x = @to_ctype@(PyArrayScalar_VAL(obj, @Name@).real); +#else + @sign@ @ctype@ x = @to_ctype@(PyArrayScalar_VAL(obj, @Name@)); +#endif + +#if @cmplx@ + if (emit_complexwarning() < 0) { + return NULL; + } +#endif + + long_result = @func@(x); + if (long_result == NULL){ + return NULL; + } + +#ifndef NPY_PY3K + /* Invoke long.__int__ to try to downcast */ + { + PyObject *before_downcast = long_result; + long_result = Py_TYPE(long_result)->tp_as_number->nb_int(long_result); + Py_DECREF(before_downcast); + } +#endif + + return long_result; +} +/**end repeat**/ + +/**begin repeat + * + * #name = (byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble)*2# + * #Name = (Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong, LongLong, ULongLong, + * Half, Float, Double, LongDouble, + * CFloat, CDouble, CLongDouble)*2# + * #cmplx = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1)*2# + * #to_ctype = (, , , , , , , , , , npy_half_to_double, , , , , , )*2# + * #which = long*17, float*17# + * #func = (PyLong_FromLongLong, PyLong_FromUnsignedLongLong)*5, + * PyLong_FromDouble*3, npy_longdouble_to_PyLong, + * PyLong_FromDouble*2, npy_longdouble_to_PyLong, + * PyFloat_FromDouble*17# + */ +static NPY_INLINE PyObject * +@name@_@which@(PyObject *obj) +{ +#if @cmplx@ + if (emit_complexwarning() < 0) { + return NULL; + } + return @func@(@to_ctype@(PyArrayScalar_VAL(obj, @Name@).real)); +#else + return @func@(@to_ctype@(PyArrayScalar_VAL(obj, @Name@))); +#endif +} +/**end repeat**/ + +#if !defined(NPY_PY3K) + +/**begin repeat + * + * #name = (byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble)*2# + * #oper = oct*17, hex*17# + * #kind = (int*5, long*5, int*2, long*2, int, long*2)*2# + * #cap = (Int*5, Long*5, Int*2, Long*2, Int, Long*2)*2# + */ +static PyObject * +@name@_@oper@(PyObject *obj) +{ + PyObject *pyint; + pyint = @name@_@kind@(obj); + if (pyint == NULL) { + return NULL; + } + return Py@cap@_Type.tp_as_number->nb_@oper@(pyint); +} +/**end repeat**/ + +#endif + +/**begin repeat + * #oper = le, ge, lt, gt, eq, ne# + * #op = <=, >=, <, >, ==, !=# + * #halfop = npy_half_le, npy_half_ge, npy_half_lt, + * npy_half_gt, npy_half_eq, npy_half_ne# + */ +#define def_cmp_@oper@(arg1, arg2) (arg1 @op@ arg2) +#define cmplx_cmp_@oper@(arg1, arg2) ((arg1.real == arg2.real) ? \ + arg1.imag @op@ arg2.imag : \ + arg1.real @op@ arg2.real) +#define def_half_cmp_@oper@(arg1, arg2) @halfop@(arg1, arg2) +/**end repeat**/ + +/**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #simp = def*10, def_half, def*3, cmplx*3# + */ +static PyObject* +@name@_richcompare(PyObject *self, PyObject *other, int cmp_op) +{ + npy_@name@ arg1, arg2; + int out=0; + + RICHCMP_GIVE_UP_IF_NEEDED(self, other); + + switch(_@name@_convert2_to_ctypes(self, &arg1, other, &arg2)) { + case 0: + break; + case -1: + /* can't cast both safely use different add function */ + case -2: + /* use ufunc */ + if (PyErr_Occurred()) { + return NULL; + } + return PyGenericArrType_Type.tp_richcompare(self, other, cmp_op); + case -3: + /* + * special case for longdouble and clongdouble + * because they have a recursive getitem in their dtype + */ + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + /* here we do the actual calculation with arg1 and arg2 */ + switch (cmp_op) { + case Py_EQ: + out = @simp@_cmp_eq(arg1, arg2); + break; + case Py_NE: + out = @simp@_cmp_ne(arg1, arg2); + break; + case Py_LE: + out = @simp@_cmp_le(arg1, arg2); + break; + case Py_GE: + out = @simp@_cmp_ge(arg1, arg2); + break; + case Py_LT: + out = @simp@_cmp_lt(arg1, arg2); + break; + case Py_GT: + out = @simp@_cmp_gt(arg1, arg2); + break; + } + + if (out) { + PyArrayScalar_RETURN_TRUE; + } + else { + PyArrayScalar_RETURN_FALSE; + } +} +/**end repeat**/ + +/**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# +**/ +static PyNumberMethods @name@_as_number = { + (binaryfunc)@name@_add, /*nb_add*/ + (binaryfunc)@name@_subtract, /*nb_subtract*/ + (binaryfunc)@name@_multiply, /*nb_multiply*/ +#if !defined(NPY_PY3K) + (binaryfunc)@name@_divide, /*nb_divide*/ +#endif + (binaryfunc)@name@_remainder, /*nb_remainder*/ + (binaryfunc)@name@_divmod, /*nb_divmod*/ + (ternaryfunc)@name@_power, /*nb_power*/ + (unaryfunc)@name@_negative, + (unaryfunc)@name@_positive, /*nb_pos*/ + (unaryfunc)@name@_absolute, /*nb_abs*/ +#if defined(NPY_PY3K) + (inquiry)@name@_bool, /*nb_bool*/ +#else + (inquiry)@name@_nonzero, /*nb_nonzero*/ +#endif + (unaryfunc)@name@_invert, /*nb_invert*/ + (binaryfunc)@name@_lshift, /*nb_lshift*/ + (binaryfunc)@name@_rshift, /*nb_rshift*/ + (binaryfunc)@name@_and, /*nb_and*/ + (binaryfunc)@name@_xor, /*nb_xor*/ + (binaryfunc)@name@_or, /*nb_or*/ +#if !defined(NPY_PY3K) + 0, /*nb_coerce*/ +#endif + (unaryfunc)@name@_int, /*nb_int*/ +#if defined(NPY_PY3K) + (unaryfunc)0, /*nb_reserved*/ +#else + (unaryfunc)@name@_long, /*nb_long*/ +#endif + (unaryfunc)@name@_float, /*nb_float*/ +#if !defined(NPY_PY3K) + (unaryfunc)@name@_oct, /*nb_oct*/ + (unaryfunc)@name@_hex, /*nb_hex*/ +#endif + 0, /*inplace_add*/ + 0, /*inplace_subtract*/ + 0, /*inplace_multiply*/ +#if !defined(NPY_PY3K) + 0, /*inplace_divide*/ +#endif + 0, /*inplace_remainder*/ + 0, /*inplace_power*/ + 0, /*inplace_lshift*/ + 0, /*inplace_rshift*/ + 0, /*inplace_and*/ + 0, /*inplace_xor*/ + 0, /*inplace_or*/ + (binaryfunc)@name@_floor_divide, /*nb_floor_divide*/ + (binaryfunc)@name@_true_divide, /*nb_true_divide*/ + 0, /*nb_inplace_floor_divide*/ + 0, /*nb_inplace_true_divide*/ + (unaryfunc)NULL, /*nb_index*/ +#if PY_VERSION_HEX >= 0x03050000 + 0, /*nb_matrix_multiply*/ + 0, /*nb_inplace_matrix_multiply*/ +#endif +}; +/**end repeat**/ + +NPY_NO_EXPORT void +add_scalarmath(void) +{ + /**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #NAME = Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong, LongLong, ULongLong, + * Half, Float, Double, LongDouble, + * CFloat, CDouble, CLongDouble# + **/ + @name@_as_number.nb_index = Py@NAME@ArrType_Type.tp_as_number->nb_index; + Py@NAME@ArrType_Type.tp_as_number = &(@name@_as_number); + Py@NAME@ArrType_Type.tp_richcompare = @name@_richcompare; + /**end repeat**/ +} + +static int +get_functions(PyObject * mm) +{ + PyObject *obj; + void **funcdata; + char *signatures; + int i, j; + int ret = -1; + + /* Get the nc_pow functions */ + /* Get the pow functions */ + obj = PyObject_GetAttrString(mm, "power"); + if (obj == NULL) { + goto fail; + } + funcdata = ((PyUFuncObject *)obj)->data; + signatures = ((PyUFuncObject *)obj)->types; + + i = 0; + j = 0; + while (signatures[i] != NPY_FLOAT) { + i += 3; + j++; + } + _basic_float_pow = funcdata[j]; + _basic_double_pow = funcdata[j + 1]; + _basic_longdouble_pow = funcdata[j + 2]; + _basic_cfloat_pow = funcdata[j + 3]; + _basic_cdouble_pow = funcdata[j + 4]; + _basic_clongdouble_pow = funcdata[j + 5]; + Py_DECREF(obj); + + return ret = 0; + + fail: + Py_DECREF(mm); + return ret; +} + + +NPY_NO_EXPORT int initscalarmath(PyObject * m) +{ + if (get_functions(m) < 0) { + return -1; + } + + add_scalarmath(); + + return 0; +} diff --git a/contrib/python/numpy/py2/numpy/core/src/umath/simd.inc.src b/contrib/python/numpy/py2/numpy/core/src/umath/simd.inc.src new file mode 100644 index 0000000000..4bb8569bee --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/src/umath/simd.inc.src @@ -0,0 +1,1219 @@ +/* -*- c -*- */ + +/* + * This file is for the definitions of simd vectorized operations. + * + * Currently contains sse2 functions that are built on amd64, x32 or + * non-generic builds (CFLAGS=-march=...) + * In future it may contain other instruction sets like AVX or NEON detected + * at runtime in which case it needs to be included indirectly via a file + * compiled with special options (or use gcc target attributes) so the binary + * stays portable. + */ + + +#ifndef __NPY_SIMD_INC +#define __NPY_SIMD_INC + +#include "lowlevel_strided_loops.h" +#include "numpy/npy_common.h" +#include "numpy/npy_math.h" +#ifdef NPY_HAVE_SSE2_INTRINSICS +#include <emmintrin.h> +#if !defined(_MSC_VER) || _MSC_VER >= 1600 +#include <immintrin.h> +#else +#undef __AVX2__ +#undef __AVX512F__ +#endif +#endif +#include <assert.h> +#include <stdlib.h> +#include <float.h> +#include <string.h> /* for memcpy */ + +#define VECTOR_SIZE_BYTES 16 + +static NPY_INLINE npy_uintp +abs_ptrdiff(char *a, char *b) +{ + return (a > b) ? (a - b) : (b - a); +} + + +/* + * stride is equal to element size and input and destination are equal or + * don't overlap within one register. The check of the steps against + * esize also quarantees that steps are >= 0. + */ +#define IS_BLOCKABLE_UNARY(esize, vsize) \ + (steps[0] == (esize) && steps[0] == steps[1] && \ + (npy_is_aligned(args[0], esize) && npy_is_aligned(args[1], esize)) && \ + ((abs_ptrdiff(args[1], args[0]) >= (vsize)) || \ + ((abs_ptrdiff(args[1], args[0]) == 0)))) + +#define IS_BLOCKABLE_REDUCE(esize, vsize) \ + (steps[1] == (esize) && abs_ptrdiff(args[1], args[0]) >= (vsize) && \ + npy_is_aligned(args[1], (esize)) && \ + npy_is_aligned(args[0], (esize))) + +#define IS_BLOCKABLE_BINARY(esize, vsize) \ + (steps[0] == steps[1] && steps[1] == steps[2] && steps[2] == (esize) && \ + npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \ + npy_is_aligned(args[0], (esize)) && \ + (abs_ptrdiff(args[2], args[0]) >= (vsize) || \ + abs_ptrdiff(args[2], args[0]) == 0) && \ + (abs_ptrdiff(args[2], args[1]) >= (vsize) || \ + abs_ptrdiff(args[2], args[1]) >= 0)) + +#define IS_BLOCKABLE_BINARY_SCALAR1(esize, vsize) \ + (steps[0] == 0 && steps[1] == steps[2] && steps[2] == (esize) && \ + npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \ + ((abs_ptrdiff(args[2], args[1]) >= (vsize)) || \ + (abs_ptrdiff(args[2], args[1]) == 0)) && \ + abs_ptrdiff(args[2], args[0]) >= (esize)) + +#define IS_BLOCKABLE_BINARY_SCALAR2(esize, vsize) \ + (steps[1] == 0 && steps[0] == steps[2] && steps[2] == (esize) && \ + npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[0], (esize)) && \ + ((abs_ptrdiff(args[2], args[0]) >= (vsize)) || \ + (abs_ptrdiff(args[2], args[0]) == 0)) && \ + abs_ptrdiff(args[2], args[1]) >= (esize)) + +#undef abs_ptrdiff + +#define IS_BLOCKABLE_BINARY_BOOL(esize, vsize) \ + (steps[0] == (esize) && steps[0] == steps[1] && steps[2] == (1) && \ + npy_is_aligned(args[1], (esize)) && \ + npy_is_aligned(args[0], (esize))) + +#define IS_BLOCKABLE_BINARY_SCALAR1_BOOL(esize, vsize) \ + (steps[0] == 0 && steps[1] == (esize) && steps[2] == (1) && \ + npy_is_aligned(args[1], (esize))) + +#define IS_BLOCKABLE_BINARY_SCALAR2_BOOL(esize, vsize) \ + (steps[0] == (esize) && steps[1] == 0 && steps[2] == (1) && \ + npy_is_aligned(args[0], (esize))) + +/* align var to alignment */ +#define LOOP_BLOCK_ALIGN_VAR(var, type, alignment)\ + npy_intp i, peel = npy_aligned_block_offset(var, sizeof(type),\ + alignment, n);\ + for(i = 0; i < peel; i++) + +#define LOOP_BLOCKED(type, vsize)\ + for(; i < npy_blocked_end(peel, sizeof(type), vsize, n);\ + i += (vsize / sizeof(type))) + +#define LOOP_BLOCKED_END\ + for (; i < n; i++) + + +/* + * Dispatcher functions + * decide whether the operation can be vectorized and run it + * if it was run returns true and false if nothing was done + */ + +/* + ***************************************************************************** + ** FLOAT DISPATCHERS + ***************************************************************************** + */ + +/**begin repeat + * Float types + * #type = npy_float, npy_double, npy_longdouble# + * #TYPE = FLOAT, DOUBLE, LONGDOUBLE# + * #vector = 1, 1, 0# + */ + +/**begin repeat1 + * #func = sqrt, absolute, negative, minimum, maximum# + * #check = IS_BLOCKABLE_UNARY*3, IS_BLOCKABLE_REDUCE*2 # + * #name = unary*3, unary_reduce*2# + */ + +#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS + +/* prototypes */ +static void +sse2_@func@_@TYPE@(@type@ *, @type@ *, const npy_intp n); + +#endif + +static NPY_INLINE int +run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps) +{ +#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS + if (@check@(sizeof(@type@), VECTOR_SIZE_BYTES)) { + sse2_@func@_@TYPE@((@type@*)args[1], (@type@*)args[0], dimensions[0]); + return 1; + } +#endif + return 0; +} + +/**end repeat1**/ + +/**begin repeat1 + * Arithmetic + * # kind = add, subtract, multiply, divide# + */ + +#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS + +/* prototypes */ +static void +sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, + npy_intp n); +static void +sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, + npy_intp n); +static void +sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, + npy_intp n); + +#endif + +static NPY_INLINE int +run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps) +{ +#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS + @type@ * ip1 = (@type@ *)args[0]; + @type@ * ip2 = (@type@ *)args[1]; + @type@ * op = (@type@ *)args[2]; + npy_intp n = dimensions[0]; +#if defined __AVX512F__ + const npy_intp vector_size_bytes = 64; +#elif defined __AVX2__ + const npy_intp vector_size_bytes = 32; +#else + const npy_intp vector_size_bytes = 32; +#endif + /* argument one scalar */ + if (IS_BLOCKABLE_BINARY_SCALAR1(sizeof(@type@), vector_size_bytes)) { + sse2_binary_scalar1_@kind@_@TYPE@(op, ip1, ip2, n); + return 1; + } + /* argument two scalar */ + else if (IS_BLOCKABLE_BINARY_SCALAR2(sizeof(@type@), vector_size_bytes)) { + sse2_binary_scalar2_@kind@_@TYPE@(op, ip1, ip2, n); + return 1; + } + else if (IS_BLOCKABLE_BINARY(sizeof(@type@), vector_size_bytes)) { + sse2_binary_@kind@_@TYPE@(op, ip1, ip2, n); + return 1; + } +#endif + return 0; +} + +/**end repeat1**/ + +/**begin repeat1 + * #kind = equal, not_equal, less, less_equal, greater, greater_equal, + * logical_and, logical_or# + * #simd = 1, 1, 1, 1, 1, 1, 0, 0# + */ + +#if @vector@ && @simd@ && defined NPY_HAVE_SSE2_INTRINSICS + +/* prototypes */ +static void +sse2_binary_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, + npy_intp n); +static void +sse2_binary_scalar1_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, + npy_intp n); +static void +sse2_binary_scalar2_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, + npy_intp n); + +#endif + +static NPY_INLINE int +run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps) +{ +#if @vector@ && @simd@ && defined NPY_HAVE_SSE2_INTRINSICS + @type@ * ip1 = (@type@ *)args[0]; + @type@ * ip2 = (@type@ *)args[1]; + npy_bool * op = (npy_bool *)args[2]; + npy_intp n = dimensions[0]; + /* argument one scalar */ + if (IS_BLOCKABLE_BINARY_SCALAR1_BOOL(sizeof(@type@), VECTOR_SIZE_BYTES)) { + sse2_binary_scalar1_@kind@_@TYPE@(op, ip1, ip2, n); + return 1; + } + /* argument two scalar */ + else if (IS_BLOCKABLE_BINARY_SCALAR2_BOOL(sizeof(@type@), VECTOR_SIZE_BYTES)) { + sse2_binary_scalar2_@kind@_@TYPE@(op, ip1, ip2, n); + return 1; + } + else if (IS_BLOCKABLE_BINARY_BOOL(sizeof(@type@), VECTOR_SIZE_BYTES)) { + sse2_binary_@kind@_@TYPE@(op, ip1, ip2, n); + return 1; + } +#endif + return 0; +} + +/**end repeat1**/ + +/**begin repeat1 + * #kind = isnan, isfinite, isinf, signbit# + */ + +#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS + +static void +sse2_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, npy_intp n); + +#endif + +static NPY_INLINE int +run_@kind@_simd_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps) +{ +#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS + if (steps[0] == sizeof(@type@) && steps[1] == 1 && + npy_is_aligned(args[0], sizeof(@type@))) { + sse2_@kind@_@TYPE@((npy_bool*)args[1], (@type@*)args[0], dimensions[0]); + return 1; + } +#endif + return 0; +} + +/**end repeat1**/ + +/**end repeat**/ + +/* + ***************************************************************************** + ** BOOL DISPATCHERS + ***************************************************************************** + */ + +/**begin repeat + * # kind = logical_or, logical_and# + */ + +#if defined NPY_HAVE_SSE2_INTRINSICS +static void +sse2_binary_@kind@_BOOL(npy_bool * op, npy_bool * ip1, npy_bool * ip2, + npy_intp n); + +static void +sse2_reduce_@kind@_BOOL(npy_bool * op, npy_bool * ip, npy_intp n); +#endif + +static NPY_INLINE int +run_binary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) +{ +#if defined NPY_HAVE_SSE2_INTRINSICS + if (sizeof(npy_bool) == 1 && + IS_BLOCKABLE_BINARY(sizeof(npy_bool), VECTOR_SIZE_BYTES)) { + sse2_binary_@kind@_BOOL((npy_bool*)args[2], (npy_bool*)args[0], + (npy_bool*)args[1], dimensions[0]); + return 1; + } +#endif + return 0; +} + + +static NPY_INLINE int +run_reduce_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) +{ +#if defined NPY_HAVE_SSE2_INTRINSICS + if (sizeof(npy_bool) == 1 && + IS_BLOCKABLE_REDUCE(sizeof(npy_bool), VECTOR_SIZE_BYTES)) { + sse2_reduce_@kind@_BOOL((npy_bool*)args[0], (npy_bool*)args[1], + dimensions[0]); + return 1; + } +#endif + return 0; +} + +/**end repeat**/ + +/**begin repeat + * # kind = absolute, logical_not# + */ + +#if defined NPY_HAVE_SSE2_INTRINSICS +static void +sse2_@kind@_BOOL(npy_bool *, npy_bool *, const npy_intp n); +#endif + +static NPY_INLINE int +run_unary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) +{ +#if defined NPY_HAVE_SSE2_INTRINSICS + if (sizeof(npy_bool) == 1 && + IS_BLOCKABLE_UNARY(sizeof(npy_bool), VECTOR_SIZE_BYTES)) { + sse2_@kind@_BOOL((npy_bool*)args[1], (npy_bool*)args[0], dimensions[0]); + return 1; + } +#endif + return 0; +} + +/**end repeat**/ + +#ifdef NPY_HAVE_SSE2_INTRINSICS + +/* + * Vectorized operations + */ +/* + ***************************************************************************** + ** FLOAT LOOPS + ***************************************************************************** + */ + +/**begin repeat +* horizontal reductions on a vector +* # VOP = min, max# +*/ + +static NPY_INLINE npy_float sse2_horizontal_@VOP@___m128(__m128 v) +{ + npy_float r; + __m128 tmp = _mm_movehl_ps(v, v); /* c d ... */ + __m128 m = _mm_@VOP@_ps(v, tmp); /* m(ac) m(bd) ... */ + tmp = _mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1));/* m(bd) m(bd) ... */ + _mm_store_ss(&r, _mm_@VOP@_ps(tmp, m)); /* m(acbd) ... */ + return r; +} + +static NPY_INLINE npy_double sse2_horizontal_@VOP@___m128d(__m128d v) +{ + npy_double r; + __m128d tmp = _mm_unpackhi_pd(v, v); /* b b */ + _mm_store_sd(&r, _mm_@VOP@_pd(tmp, v)); /* m(ab) m(bb) */ + return r; +} + +/**end repeat**/ + +/**begin repeat + * #type = npy_float, npy_double# + * #TYPE = FLOAT, DOUBLE# + * #scalarf = npy_sqrtf, npy_sqrt# + * #c = f, # + * #vtype = __m128, __m128d# + * #vtype256 = __m256, __m256d# + * #vtype512 = __m512, __m512d# + * #vpre = _mm, _mm# + * #vpre256 = _mm256, _mm256# + * #vpre512 = _mm512, _mm512# + * #vsuf = ps, pd# + * #vsufs = ss, sd# + * #nan = NPY_NANF, NPY_NAN# + * #double = 0, 1# + * #cast = _mm_castps_si128, _mm_castpd_si128# + */ + + +/**begin repeat1 +* Arithmetic +* # kind = add, subtract, multiply, divide# +* # OP = +, -, *, /# +* # VOP = add, sub, mul, div# +*/ + +static void +sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) +{ +#ifdef __AVX512F__ + const npy_intp vector_size_bytes = 64; + LOOP_BLOCK_ALIGN_VAR(op, @type@, vector_size_bytes) + op[i] = ip1[i] @OP@ ip2[i]; + /* lots of specializations, to squeeze out max performance */ + if (npy_is_aligned(&ip1[i], vector_size_bytes) && npy_is_aligned(&ip2[i], vector_size_bytes)) { + if (ip1 == ip2) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, a); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); + @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + } + else if (npy_is_aligned(&ip1[i], vector_size_bytes)) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); + @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else if (npy_is_aligned(&ip2[i], vector_size_bytes)) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); + @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else { + if (ip1 == ip2) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, a); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); + @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + } +#elif __AVX2__ + const npy_intp vector_size_bytes = 32; + LOOP_BLOCK_ALIGN_VAR(op, @type@, vector_size_bytes) + op[i] = ip1[i] @OP@ ip2[i]; + /* lots of specializations, to squeeze out max performance */ + if (npy_is_aligned(&ip1[i], vector_size_bytes) && + npy_is_aligned(&ip2[i], vector_size_bytes)) { + if (ip1 == ip2) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, a); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); + @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + } + else if (npy_is_aligned(&ip1[i], vector_size_bytes)) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); + @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else if (npy_is_aligned(&ip2[i], vector_size_bytes)) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); + @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else { + if (ip1 == ip2) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, a); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); + @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + } +#else + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) + op[i] = ip1[i] @OP@ ip2[i]; + /* lots of specializations, to squeeze out max performance */ + if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES) && + npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { + if (ip1 == ip2) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]); + @vtype@ c = @vpre@_@VOP@_@vsuf@(a, a); + @vpre@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]); + @vtype@ b = @vpre@_load_@vsuf@(&ip2[i]); + @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); + @vpre@_store_@vsuf@(&op[i], c); + } + } + } + else if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]); + @vtype@ b = @vpre@_loadu_@vsuf@(&ip2[i]); + @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); + @vpre@_store_@vsuf@(&op[i], c); + } + } + else if (npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]); + @vtype@ b = @vpre@_load_@vsuf@(&ip2[i]); + @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); + @vpre@_store_@vsuf@(&op[i], c); + } + } + else { + if (ip1 == ip2) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]); + @vtype@ c = @vpre@_@VOP@_@vsuf@(a, a); + @vpre@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]); + @vtype@ b = @vpre@_loadu_@vsuf@(&ip2[i]); + @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); + @vpre@_store_@vsuf@(&op[i], c); + } + } + } +#endif + LOOP_BLOCKED_END { + op[i] = ip1[i] @OP@ ip2[i]; + } +} + + +static void +sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) +{ +#ifdef __AVX512F__ + const npy_intp vector_size_bytes = 64; + const @vtype512@ a = @vpre512@_set1_@vsuf@(ip1[0]); + LOOP_BLOCK_ALIGN_VAR(op, @type@, vector_size_bytes) + op[i] = ip1[0] @OP@ ip2[i]; + if (npy_is_aligned(&ip2[i], vector_size_bytes)) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + + +#elif __AVX2__ + const npy_intp vector_size_bytes = 32; + const @vtype256@ a = @vpre256@_set1_@vsuf@(ip1[0]); + LOOP_BLOCK_ALIGN_VAR(op, @type@, vector_size_bytes) + op[i] = ip1[0] @OP@ ip2[i]; + if (npy_is_aligned(&ip2[i], vector_size_bytes)) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } +#else + const @vtype@ a = @vpre@_set1_@vsuf@(ip1[0]); + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) + op[i] = ip1[0] @OP@ ip2[i]; + if (npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ b = @vpre@_load_@vsuf@(&ip2[i]); + @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); + @vpre@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ b = @vpre@_loadu_@vsuf@(&ip2[i]); + @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); + @vpre@_store_@vsuf@(&op[i], c); + } + } +#endif + LOOP_BLOCKED_END { + op[i] = ip1[0] @OP@ ip2[i]; + } +} + + +static void +sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) +{ +#ifdef __AVX512F__ + const npy_intp vector_size_bytes = 64; + const @vtype512@ b = @vpre512@_set1_@vsuf@(ip2[0]); + LOOP_BLOCK_ALIGN_VAR(op, @type@, vector_size_bytes) + op[i] = ip1[i] @OP@ ip2[0]; + if (npy_is_aligned(&ip1[i], vector_size_bytes)) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + +#elif __AVX2__ + const npy_intp vector_size_bytes = 32; + const @vtype256@ b = @vpre256@_set1_@vsuf@(ip2[0]); + LOOP_BLOCK_ALIGN_VAR(op, @type@, vector_size_bytes) + op[i] = ip1[i] @OP@ ip2[0]; + if (npy_is_aligned(&ip1[i], vector_size_bytes)) { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, vector_size_bytes) { + @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } +#else + const @vtype@ b = @vpre@_set1_@vsuf@(ip2[0]); + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) + op[i] = ip1[i] @OP@ ip2[0]; + if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]); + @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); + @vpre@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]); + @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); + @vpre@_store_@vsuf@(&op[i], c); + } + } +#endif + LOOP_BLOCKED_END { + op[i] = ip1[i] @OP@ ip2[0]; + } +} + +/**end repeat1**/ + +/* + * compress 4 vectors to 4/8 bytes in op with filled with 0 or 1 + * the last vector is passed as a pointer as MSVC 2010 is unable to ignore the + * calling convention leading to C2719 on 32 bit, see #4795 + */ +static NPY_INLINE void +sse2_compress4_to_byte_@TYPE@(@vtype@ r1, @vtype@ r2, @vtype@ r3, @vtype@ * r4, + npy_bool * op) +{ + const __m128i mask = @vpre@_set1_epi8(0x1); + __m128i ir1 = @vpre@_packs_epi32(@cast@(r1), @cast@(r2)); + __m128i ir2 = @vpre@_packs_epi32(@cast@(r3), @cast@(*r4)); + __m128i rr = @vpre@_packs_epi16(ir1, ir2); +#if @double@ + rr = @vpre@_packs_epi16(rr, rr); + rr = @vpre@_and_si128(rr, mask); + @vpre@_storel_epi64((__m128i*)op, rr); +#else + rr = @vpre@_and_si128(rr, mask); + @vpre@_storeu_si128((__m128i*)op, rr); +#endif +} + +static void +sse2_signbit_@TYPE@(npy_bool * op, @type@ * ip1, npy_intp n) +{ + LOOP_BLOCK_ALIGN_VAR(ip1, @type@, VECTOR_SIZE_BYTES) { + op[i] = npy_signbit(ip1[i]) != 0; + } + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]); + int r = @vpre@_movemask_@vsuf@(a); + if (sizeof(@type@) == 8) { + op[i] = r & 1; + op[i + 1] = (r >> 1); + } + else { + op[i] = r & 1; + op[i + 1] = (r >> 1) & 1; + op[i + 2] = (r >> 2) & 1; + op[i + 3] = (r >> 3); + } + } + LOOP_BLOCKED_END { + op[i] = npy_signbit(ip1[i]) != 0; + } +} + +/**begin repeat1 + * #kind = isnan, isfinite, isinf# + * #var = 0, 1, 2# + */ + +static void +sse2_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, npy_intp n) +{ +#if @var@ != 0 /* isinf/isfinite */ + /* signbit mask 0x7FFFFFFF after andnot */ + const @vtype@ mask = @vpre@_set1_@vsuf@(-0.@c@); + const @vtype@ ones = @vpre@_cmpeq_@vsuf@(@vpre@_setzero_@vsuf@(), + @vpre@_setzero_@vsuf@()); +#if @double@ + const @vtype@ fltmax = @vpre@_set1_@vsuf@(DBL_MAX); +#else + const @vtype@ fltmax = @vpre@_set1_@vsuf@(FLT_MAX); +#endif +#endif + LOOP_BLOCK_ALIGN_VAR(ip1, @type@, VECTOR_SIZE_BYTES) { + op[i] = npy_@kind@(ip1[i]) != 0; + } + LOOP_BLOCKED(@type@, 4 * VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip1[i + 0 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ b = @vpre@_load_@vsuf@(&ip1[i + 1 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ c = @vpre@_load_@vsuf@(&ip1[i + 2 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ d = @vpre@_load_@vsuf@(&ip1[i + 3 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ r1, r2, r3, r4; +#if @var@ != 0 /* isinf/isfinite */ + /* fabs via masking of sign bit */ + r1 = @vpre@_andnot_@vsuf@(mask, a); + r2 = @vpre@_andnot_@vsuf@(mask, b); + r3 = @vpre@_andnot_@vsuf@(mask, c); + r4 = @vpre@_andnot_@vsuf@(mask, d); +#if @var@ == 1 /* isfinite */ + /* negative compare against max float, nan is always true */ + r1 = @vpre@_cmpnle_@vsuf@(r1, fltmax); + r2 = @vpre@_cmpnle_@vsuf@(r2, fltmax); + r3 = @vpre@_cmpnle_@vsuf@(r3, fltmax); + r4 = @vpre@_cmpnle_@vsuf@(r4, fltmax); +#else /* isinf */ + r1 = @vpre@_cmpnlt_@vsuf@(fltmax, r1); + r2 = @vpre@_cmpnlt_@vsuf@(fltmax, r2); + r3 = @vpre@_cmpnlt_@vsuf@(fltmax, r3); + r4 = @vpre@_cmpnlt_@vsuf@(fltmax, r4); +#endif + /* flip results to what we want (andnot as there is no sse not) */ + r1 = @vpre@_andnot_@vsuf@(r1, ones); + r2 = @vpre@_andnot_@vsuf@(r2, ones); + r3 = @vpre@_andnot_@vsuf@(r3, ones); + r4 = @vpre@_andnot_@vsuf@(r4, ones); +#endif +#if @var@ == 0 /* isnan */ + r1 = @vpre@_cmpneq_@vsuf@(a, a); + r2 = @vpre@_cmpneq_@vsuf@(b, b); + r3 = @vpre@_cmpneq_@vsuf@(c, c); + r4 = @vpre@_cmpneq_@vsuf@(d, d); +#endif + sse2_compress4_to_byte_@TYPE@(r1, r2, r3, &r4, &op[i]); + } + LOOP_BLOCKED_END { + op[i] = npy_@kind@(ip1[i]) != 0; + } +} + +/**end repeat1**/ + +/**begin repeat1 + * #kind = equal, not_equal, less, less_equal, greater, greater_equal# + * #OP = ==, !=, <, <=, >, >=# + * #VOP = cmpeq, cmpneq, cmplt, cmple, cmpgt, cmpge# +*/ + +/* sets invalid fpu flag on QNaN for consistency with packed compare */ +static NPY_INLINE int +sse2_ordered_cmp_@kind@_@TYPE@(const @type@ a, const @type@ b) +{ + @vtype@ one = @vpre@_set1_@vsuf@(1); + @type@ tmp; + @vtype@ v = @vpre@_@VOP@_@vsufs@(@vpre@_load_@vsufs@(&a), + @vpre@_load_@vsufs@(&b)); + v = @vpre@_and_@vsuf@(v, one); + @vpre@_store_@vsufs@(&tmp, v); + return tmp; +} + +static void +sse2_binary_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, npy_intp n) +{ + LOOP_BLOCK_ALIGN_VAR(ip1, @type@, VECTOR_SIZE_BYTES) { + op[i] = sse2_ordered_cmp_@kind@_@TYPE@(ip1[i], ip2[i]); + } + LOOP_BLOCKED(@type@, 4 * VECTOR_SIZE_BYTES) { + @vtype@ a1 = @vpre@_load_@vsuf@(&ip1[i + 0 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ b1 = @vpre@_load_@vsuf@(&ip1[i + 1 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ c1 = @vpre@_load_@vsuf@(&ip1[i + 2 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ d1 = @vpre@_load_@vsuf@(&ip1[i + 3 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ a2 = @vpre@_loadu_@vsuf@(&ip2[i + 0 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ b2 = @vpre@_loadu_@vsuf@(&ip2[i + 1 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ c2 = @vpre@_loadu_@vsuf@(&ip2[i + 2 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ d2 = @vpre@_loadu_@vsuf@(&ip2[i + 3 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ r1 = @vpre@_@VOP@_@vsuf@(a1, a2); + @vtype@ r2 = @vpre@_@VOP@_@vsuf@(b1, b2); + @vtype@ r3 = @vpre@_@VOP@_@vsuf@(c1, c2); + @vtype@ r4 = @vpre@_@VOP@_@vsuf@(d1, d2); + sse2_compress4_to_byte_@TYPE@(r1, r2, r3, &r4, &op[i]); + } + LOOP_BLOCKED_END { + op[i] = sse2_ordered_cmp_@kind@_@TYPE@(ip1[i], ip2[i]); + } +} + + +static void +sse2_binary_scalar1_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, npy_intp n) +{ + @vtype@ s = @vpre@_set1_@vsuf@(ip1[0]); + LOOP_BLOCK_ALIGN_VAR(ip2, @type@, VECTOR_SIZE_BYTES) { + op[i] = sse2_ordered_cmp_@kind@_@TYPE@(ip1[0], ip2[i]); + } + LOOP_BLOCKED(@type@, 4 * VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip2[i + 0 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ b = @vpre@_load_@vsuf@(&ip2[i + 1 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ c = @vpre@_load_@vsuf@(&ip2[i + 2 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ d = @vpre@_load_@vsuf@(&ip2[i + 3 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ r1 = @vpre@_@VOP@_@vsuf@(s, a); + @vtype@ r2 = @vpre@_@VOP@_@vsuf@(s, b); + @vtype@ r3 = @vpre@_@VOP@_@vsuf@(s, c); + @vtype@ r4 = @vpre@_@VOP@_@vsuf@(s, d); + sse2_compress4_to_byte_@TYPE@(r1, r2, r3, &r4, &op[i]); + } + LOOP_BLOCKED_END { + op[i] = sse2_ordered_cmp_@kind@_@TYPE@(ip1[0], ip2[i]); + } +} + + +static void +sse2_binary_scalar2_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, npy_intp n) +{ + @vtype@ s = @vpre@_set1_@vsuf@(ip2[0]); + LOOP_BLOCK_ALIGN_VAR(ip1, @type@, VECTOR_SIZE_BYTES) { + op[i] = sse2_ordered_cmp_@kind@_@TYPE@(ip1[i], ip2[0]); + } + LOOP_BLOCKED(@type@, 4 * VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip1[i + 0 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ b = @vpre@_load_@vsuf@(&ip1[i + 1 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ c = @vpre@_load_@vsuf@(&ip1[i + 2 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ d = @vpre@_load_@vsuf@(&ip1[i + 3 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ r1 = @vpre@_@VOP@_@vsuf@(a, s); + @vtype@ r2 = @vpre@_@VOP@_@vsuf@(b, s); + @vtype@ r3 = @vpre@_@VOP@_@vsuf@(c, s); + @vtype@ r4 = @vpre@_@VOP@_@vsuf@(d, s); + sse2_compress4_to_byte_@TYPE@(r1, r2, r3, &r4, &op[i]); + } + LOOP_BLOCKED_END { + op[i] = sse2_ordered_cmp_@kind@_@TYPE@(ip1[i], ip2[0]); + } +} +/**end repeat1**/ + +static void +sse2_sqrt_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n) +{ + /* align output to VECTOR_SIZE_BYTES bytes */ + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) { + op[i] = @scalarf@(ip[i]); + } + assert(n < (VECTOR_SIZE_BYTES / sizeof(@type@)) || + npy_is_aligned(&op[i], VECTOR_SIZE_BYTES)); + if (npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ d = @vpre@_load_@vsuf@(&ip[i]); + @vpre@_store_@vsuf@(&op[i], @vpre@_sqrt_@vsuf@(d)); + } + } + else { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ d = @vpre@_loadu_@vsuf@(&ip[i]); + @vpre@_store_@vsuf@(&op[i], @vpre@_sqrt_@vsuf@(d)); + } + } + LOOP_BLOCKED_END { + op[i] = @scalarf@(ip[i]); + } +} + + +static NPY_INLINE +@type@ scalar_abs_@type@(@type@ v) +{ + /* add 0 to clear -0.0 */ + return (v > 0 ? v: -v) + 0; +} + +static NPY_INLINE +@type@ scalar_neg_@type@(@type@ v) +{ + return -v; +} + +/**begin repeat1 + * #kind = absolute, negative# + * #VOP = andnot, xor# + * #scalar = scalar_abs, scalar_neg# + **/ +static void +sse2_@kind@_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n) +{ + /* + * get 0x7FFFFFFF mask (everything but signbit set) + * float & ~mask will remove the sign, float ^ mask flips the sign + * this is equivalent to how the compiler implements fabs on amd64 + */ + const @vtype@ mask = @vpre@_set1_@vsuf@(-0.@c@); + + /* align output to VECTOR_SIZE_BYTES bytes */ + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) { + op[i] = @scalar@_@type@(ip[i]); + } + assert(n < (VECTOR_SIZE_BYTES / sizeof(@type@)) || + npy_is_aligned(&op[i], VECTOR_SIZE_BYTES)); + if (npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip[i]); + @vpre@_store_@vsuf@(&op[i], @vpre@_@VOP@_@vsuf@(mask, a)); + } + } + else { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_loadu_@vsuf@(&ip[i]); + @vpre@_store_@vsuf@(&op[i], @vpre@_@VOP@_@vsuf@(mask, a)); + } + } + LOOP_BLOCKED_END { + op[i] = @scalar@_@type@(ip[i]); + } +} +/**end repeat1**/ + + +/**begin repeat1 + * #kind = maximum, minimum# + * #VOP = max, min# + * #OP = >=, <=# + **/ +/* arguments swapped as unary reduce has the swapped compared to unary */ +static void +sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) +{ + const npy_intp stride = VECTOR_SIZE_BYTES / (npy_intp)sizeof(@type@); + LOOP_BLOCK_ALIGN_VAR(ip, @type@, VECTOR_SIZE_BYTES) { + /* Order of operations important for MSVC 2015 */ + *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i]; + } + assert(n < (stride) || npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES)); + if (i + 3 * stride <= n) { + /* load the first elements */ + @vtype@ c1 = @vpre@_load_@vsuf@((@type@*)&ip[i]); + @vtype@ c2 = @vpre@_load_@vsuf@((@type@*)&ip[i + stride]); + i += 2 * stride; + + /* minps/minpd will set invalid flag if nan is encountered */ + npy_clear_floatstatus_barrier((char*)&c1); + LOOP_BLOCKED(@type@, 2 * VECTOR_SIZE_BYTES) { + @vtype@ v1 = @vpre@_load_@vsuf@((@type@*)&ip[i]); + @vtype@ v2 = @vpre@_load_@vsuf@((@type@*)&ip[i + stride]); + c1 = @vpre@_@VOP@_@vsuf@(c1, v1); + c2 = @vpre@_@VOP@_@vsuf@(c2, v2); + } + c1 = @vpre@_@VOP@_@vsuf@(c1, c2); + + if (npy_get_floatstatus_barrier((char*)&c1) & NPY_FPE_INVALID) { + *op = @nan@; + } + else { + @type@ tmp = sse2_horizontal_@VOP@_@vtype@(c1); + /* Order of operations important for MSVC 2015 */ + *op = (*op @OP@ tmp || npy_isnan(*op)) ? *op : tmp; + } + } + LOOP_BLOCKED_END { + /* Order of operations important for MSVC 2015 */ + *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i]; + } + npy_clear_floatstatus_barrier((char*)op); +} +/**end repeat1**/ + +/**end repeat**/ + +/* + ***************************************************************************** + ** BOOL LOOPS + ***************************************************************************** + */ + +/**begin repeat + * # kind = logical_or, logical_and# + * # and = 0, 1# + * # op = ||, &&# + * # sc = !=, ==# + * # vpre = _mm*2# + * # vsuf = si128*2# + * # vtype = __m128i*2# + * # type = npy_bool*2# + * # vload = _mm_load_si128*2# + * # vloadu = _mm_loadu_si128*2# + * # vstore = _mm_store_si128*2# + */ + +/* + * convert any bit set to boolean true so vectorized and normal operations are + * consistent, should not be required if bool is used correctly everywhere but + * you never know + */ +#if !@and@ +static NPY_INLINE @vtype@ byte_to_true(@vtype@ v) +{ + const @vtype@ zero = @vpre@_setzero_@vsuf@(); + const @vtype@ truemask = @vpre@_set1_epi8(1 == 1); + /* get 0xFF for zeros */ + @vtype@ tmp = @vpre@_cmpeq_epi8(v, zero); + /* filled with 0xFF/0x00, negate and mask to boolean true */ + return @vpre@_andnot_@vsuf@(tmp, truemask); +} +#endif + +static void +sse2_binary_@kind@_BOOL(npy_bool * op, npy_bool * ip1, npy_bool * ip2, npy_intp n) +{ + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) + op[i] = ip1[i] @op@ ip2[i]; + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vloadu@((@vtype@*)&ip1[i]); + @vtype@ b = @vloadu@((@vtype@*)&ip2[i]); +#if @and@ + const @vtype@ zero = @vpre@_setzero_@vsuf@(); + /* get 0xFF for non zeros*/ + @vtype@ tmp = @vpre@_cmpeq_epi8(a, zero); + /* andnot -> 0x00 for zeros xFF for non zeros, & with ip2 */ + tmp = @vpre@_andnot_@vsuf@(tmp, b); +#else + @vtype@ tmp = @vpre@_or_@vsuf@(a, b); +#endif + + @vstore@((@vtype@*)&op[i], byte_to_true(tmp)); + } + LOOP_BLOCKED_END { + op[i] = (ip1[i] @op@ ip2[i]); + } +} + + +static void +sse2_reduce_@kind@_BOOL(npy_bool * op, npy_bool * ip, const npy_intp n) +{ + const @vtype@ zero = @vpre@_setzero_@vsuf@(); + LOOP_BLOCK_ALIGN_VAR(ip, npy_bool, VECTOR_SIZE_BYTES) { + *op = *op @op@ ip[i]; + if (*op @sc@ 0) { + return; + } + } + /* unrolled once to replace a slow movmsk with a fast pmaxb */ + LOOP_BLOCKED(npy_bool, 2 * VECTOR_SIZE_BYTES) { + @vtype@ v = @vload@((@vtype@*)&ip[i]); + @vtype@ v2 = @vload@((@vtype@*)&ip[i + VECTOR_SIZE_BYTES]); + v = @vpre@_cmpeq_epi8(v, zero); + v2 = @vpre@_cmpeq_epi8(v2, zero); +#if @and@ + if ((@vpre@_movemask_epi8(@vpre@_max_epu8(v, v2)) != 0)) { + *op = 0; +#else + if ((@vpre@_movemask_epi8(@vpre@_min_epu8(v, v2)) != 0xFFFF)) { + *op = 1; +#endif + return; + } + } + LOOP_BLOCKED_END { + *op = *op @op@ ip[i]; + if (*op @sc@ 0) { + return; + } + } +} + +/**end repeat**/ + +/**begin repeat + * # kind = absolute, logical_not# + * # op = !=, ==# + * # not = 0, 1# + * # vpre = _mm*2# + * # vsuf = si128*2# + * # vtype = __m128i*2# + * # type = npy_bool*2# + * # vloadu = _mm_loadu_si128*2# + * # vstore = _mm_store_si128*2# + */ + +static void +sse2_@kind@_BOOL(@type@ * op, @type@ * ip, const npy_intp n) +{ + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) + op[i] = (ip[i] @op@ 0); + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { + @vtype@ a = @vloadu@((@vtype@*)&ip[i]); +#if @not@ + const @vtype@ zero = @vpre@_setzero_@vsuf@(); + const @vtype@ truemask = @vpre@_set1_epi8(1 == 1); + /* equivalent to byte_to_true but can skip the negation */ + a = @vpre@_cmpeq_epi8(a, zero); + a = @vpre@_and_@vsuf@(a, truemask); +#else + /* abs is kind of pointless but maybe its used for byte_to_true */ + a = byte_to_true(a); +#endif + @vstore@((@vtype@*)&op[i], a); + } + LOOP_BLOCKED_END { + op[i] = (ip[i] @op@ 0); + } +} + +/**end repeat**/ + +#undef VECTOR_SIZE_BYTES + +#endif /* NPY_HAVE_SSE2_INTRINSICS */ + +#endif diff --git a/contrib/python/numpy/py2/numpy/core/tests/__init__.py b/contrib/python/numpy/py2/numpy/core/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/core/tests/_locales.py b/contrib/python/numpy/py2/numpy/core/tests/_locales.py new file mode 100644 index 0000000000..52e4ff36d5 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/_locales.py @@ -0,0 +1,76 @@ +"""Provide class for testing in French locale + +""" +from __future__ import division, absolute_import, print_function + +import sys +import locale + +import pytest + +__ALL__ = ['CommaDecimalPointLocale'] + + +def find_comma_decimal_point_locale(): + """See if platform has a decimal point as comma locale. + + Find a locale that uses a comma instead of a period as the + decimal point. + + Returns + ------- + old_locale: str + Locale when the function was called. + new_locale: {str, None) + First French locale found, None if none found. + + """ + if sys.platform == 'win32': + locales = ['FRENCH'] + else: + locales = ['fr_FR', 'fr_FR.UTF-8', 'fi_FI', 'fi_FI.UTF-8'] + + old_locale = locale.getlocale(locale.LC_NUMERIC) + new_locale = None + try: + for loc in locales: + try: + locale.setlocale(locale.LC_NUMERIC, loc) + new_locale = loc + break + except locale.Error: + pass + finally: + locale.setlocale(locale.LC_NUMERIC, locale=old_locale) + return old_locale, new_locale + + +class CommaDecimalPointLocale(object): + """Sets LC_NUMERIC to a locale with comma as decimal point. + + Classes derived from this class have setup and teardown methods that run + tests with locale.LC_NUMERIC set to a locale where commas (',') are used as + the decimal point instead of periods ('.'). On exit the locale is restored + to the initial locale. It also serves as context manager with the same + effect. If no such locale is available, the test is skipped. + + .. versionadded:: 1.15.0 + + """ + (cur_locale, tst_locale) = find_comma_decimal_point_locale() + + def setup(self): + if self.tst_locale is None: + pytest.skip("No French locale available") + locale.setlocale(locale.LC_NUMERIC, locale=self.tst_locale) + + def teardown(self): + locale.setlocale(locale.LC_NUMERIC, locale=self.cur_locale) + + def __enter__(self): + if self.tst_locale is None: + pytest.skip("No French locale available") + locale.setlocale(locale.LC_NUMERIC, locale=self.tst_locale) + + def __exit__(self, type, value, traceback): + locale.setlocale(locale.LC_NUMERIC, locale=self.cur_locale) diff --git a/contrib/python/numpy/py2/numpy/core/tests/data/astype_copy.pkl b/contrib/python/numpy/py2/numpy/core/tests/data/astype_copy.pkl new file mode 100644 index 0000000000..7397c97829 Binary files /dev/null and b/contrib/python/numpy/py2/numpy/core/tests/data/astype_copy.pkl differ diff --git a/contrib/python/numpy/py2/numpy/core/tests/data/recarray_from_file.fits b/contrib/python/numpy/py2/numpy/core/tests/data/recarray_from_file.fits new file mode 100644 index 0000000000..ca48ee8515 Binary files /dev/null and b/contrib/python/numpy/py2/numpy/core/tests/data/recarray_from_file.fits differ diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_abc.py b/contrib/python/numpy/py2/numpy/core/tests/test_abc.py new file mode 100644 index 0000000000..d9c61b0c61 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_abc.py @@ -0,0 +1,56 @@ +from __future__ import division, absolute_import, print_function + +from numpy.testing import assert_ + +import numbers + +import numpy as np +from numpy.core.numerictypes import sctypes + +class TestABC(object): + def test_abstract(self): + assert_(issubclass(np.number, numbers.Number)) + + assert_(issubclass(np.inexact, numbers.Complex)) + assert_(issubclass(np.complexfloating, numbers.Complex)) + assert_(issubclass(np.floating, numbers.Real)) + + assert_(issubclass(np.integer, numbers.Integral)) + assert_(issubclass(np.signedinteger, numbers.Integral)) + assert_(issubclass(np.unsignedinteger, numbers.Integral)) + + def test_floats(self): + for t in sctypes['float']: + assert_(isinstance(t(), numbers.Real), + "{0} is not instance of Real".format(t.__name__)) + assert_(issubclass(t, numbers.Real), + "{0} is not subclass of Real".format(t.__name__)) + assert_(not isinstance(t(), numbers.Rational), + "{0} is instance of Rational".format(t.__name__)) + assert_(not issubclass(t, numbers.Rational), + "{0} is subclass of Rational".format(t.__name__)) + + def test_complex(self): + for t in sctypes['complex']: + assert_(isinstance(t(), numbers.Complex), + "{0} is not instance of Complex".format(t.__name__)) + assert_(issubclass(t, numbers.Complex), + "{0} is not subclass of Complex".format(t.__name__)) + assert_(not isinstance(t(), numbers.Real), + "{0} is instance of Real".format(t.__name__)) + assert_(not issubclass(t, numbers.Real), + "{0} is subclass of Real".format(t.__name__)) + + def test_int(self): + for t in sctypes['int']: + assert_(isinstance(t(), numbers.Integral), + "{0} is not instance of Integral".format(t.__name__)) + assert_(issubclass(t, numbers.Integral), + "{0} is not subclass of Integral".format(t.__name__)) + + def test_uint(self): + for t in sctypes['uint']: + assert_(isinstance(t(), numbers.Integral), + "{0} is not instance of Integral".format(t.__name__)) + assert_(issubclass(t, numbers.Integral), + "{0} is not subclass of Integral".format(t.__name__)) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_api.py b/contrib/python/numpy/py2/numpy/core/tests/test_api.py new file mode 100644 index 0000000000..9755e7b36d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_api.py @@ -0,0 +1,516 @@ +from __future__ import division, absolute_import, print_function + +import sys + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_raises, HAS_REFCOUNT + ) + +# Switch between new behaviour when NPY_RELAXED_STRIDES_CHECKING is set. +NPY_RELAXED_STRIDES_CHECKING = np.ones((10, 1), order='C').flags.f_contiguous + + +def test_array_array(): + tobj = type(object) + ones11 = np.ones((1, 1), np.float64) + tndarray = type(ones11) + # Test is_ndarray + assert_equal(np.array(ones11, dtype=np.float64), ones11) + if HAS_REFCOUNT: + old_refcount = sys.getrefcount(tndarray) + np.array(ones11) + assert_equal(old_refcount, sys.getrefcount(tndarray)) + + # test None + assert_equal(np.array(None, dtype=np.float64), + np.array(np.nan, dtype=np.float64)) + if HAS_REFCOUNT: + old_refcount = sys.getrefcount(tobj) + np.array(None, dtype=np.float64) + assert_equal(old_refcount, sys.getrefcount(tobj)) + + # test scalar + assert_equal(np.array(1.0, dtype=np.float64), + np.ones((), dtype=np.float64)) + if HAS_REFCOUNT: + old_refcount = sys.getrefcount(np.float64) + np.array(np.array(1.0, dtype=np.float64), dtype=np.float64) + assert_equal(old_refcount, sys.getrefcount(np.float64)) + + # test string + S2 = np.dtype((str, 2)) + S3 = np.dtype((str, 3)) + S5 = np.dtype((str, 5)) + assert_equal(np.array("1.0", dtype=np.float64), + np.ones((), dtype=np.float64)) + assert_equal(np.array("1.0").dtype, S3) + assert_equal(np.array("1.0", dtype=str).dtype, S3) + assert_equal(np.array("1.0", dtype=S2), np.array("1.")) + assert_equal(np.array("1", dtype=S5), np.ones((), dtype=S5)) + + # test unicode + _unicode = globals().get("unicode") + if _unicode: + U2 = np.dtype((_unicode, 2)) + U3 = np.dtype((_unicode, 3)) + U5 = np.dtype((_unicode, 5)) + assert_equal(np.array(_unicode("1.0"), dtype=np.float64), + np.ones((), dtype=np.float64)) + assert_equal(np.array(_unicode("1.0")).dtype, U3) + assert_equal(np.array(_unicode("1.0"), dtype=_unicode).dtype, U3) + assert_equal(np.array(_unicode("1.0"), dtype=U2), + np.array(_unicode("1."))) + assert_equal(np.array(_unicode("1"), dtype=U5), + np.ones((), dtype=U5)) + + builtins = getattr(__builtins__, '__dict__', __builtins__) + assert_(hasattr(builtins, 'get')) + + # test buffer + _buffer = builtins.get("buffer") + if _buffer and sys.version_info[:3] >= (2, 7, 5): + # This test fails for earlier versions of Python. + # Evidently a bug got fixed in 2.7.5. + dat = np.array(_buffer('1.0'), dtype=np.float64) + assert_equal(dat, [49.0, 46.0, 48.0]) + assert_(dat.dtype.type is np.float64) + + dat = np.array(_buffer(b'1.0')) + assert_equal(dat, [49, 46, 48]) + assert_(dat.dtype.type is np.uint8) + + # test memoryview, new version of buffer + _memoryview = builtins.get("memoryview") + if _memoryview: + dat = np.array(_memoryview(b'1.0'), dtype=np.float64) + assert_equal(dat, [49.0, 46.0, 48.0]) + assert_(dat.dtype.type is np.float64) + + dat = np.array(_memoryview(b'1.0')) + assert_equal(dat, [49, 46, 48]) + assert_(dat.dtype.type is np.uint8) + + # test array interface + a = np.array(100.0, dtype=np.float64) + o = type("o", (object,), + dict(__array_interface__=a.__array_interface__)) + assert_equal(np.array(o, dtype=np.float64), a) + + # test array_struct interface + a = np.array([(1, 4.0, 'Hello'), (2, 6.0, 'World')], + dtype=[('f0', int), ('f1', float), ('f2', str)]) + o = type("o", (object,), + dict(__array_struct__=a.__array_struct__)) + ## wasn't what I expected... is np.array(o) supposed to equal a ? + ## instead we get a array([...], dtype=">V18") + assert_equal(bytes(np.array(o).data), bytes(a.data)) + + # test array + o = type("o", (object,), + dict(__array__=lambda *x: np.array(100.0, dtype=np.float64)))() + assert_equal(np.array(o, dtype=np.float64), np.array(100.0, np.float64)) + + # test recursion + nested = 1.5 + for i in range(np.MAXDIMS): + nested = [nested] + + # no error + np.array(nested) + + # Exceeds recursion limit + assert_raises(ValueError, np.array, [nested], dtype=np.float64) + + # Try with lists... + assert_equal(np.array([None] * 10, dtype=np.float64), + np.full((10,), np.nan, dtype=np.float64)) + assert_equal(np.array([[None]] * 10, dtype=np.float64), + np.full((10, 1), np.nan, dtype=np.float64)) + assert_equal(np.array([[None] * 10], dtype=np.float64), + np.full((1, 10), np.nan, dtype=np.float64)) + assert_equal(np.array([[None] * 10] * 10, dtype=np.float64), + np.full((10, 10), np.nan, dtype=np.float64)) + + assert_equal(np.array([1.0] * 10, dtype=np.float64), + np.ones((10,), dtype=np.float64)) + assert_equal(np.array([[1.0]] * 10, dtype=np.float64), + np.ones((10, 1), dtype=np.float64)) + assert_equal(np.array([[1.0] * 10], dtype=np.float64), + np.ones((1, 10), dtype=np.float64)) + assert_equal(np.array([[1.0] * 10] * 10, dtype=np.float64), + np.ones((10, 10), dtype=np.float64)) + + # Try with tuples + assert_equal(np.array((None,) * 10, dtype=np.float64), + np.full((10,), np.nan, dtype=np.float64)) + assert_equal(np.array([(None,)] * 10, dtype=np.float64), + np.full((10, 1), np.nan, dtype=np.float64)) + assert_equal(np.array([(None,) * 10], dtype=np.float64), + np.full((1, 10), np.nan, dtype=np.float64)) + assert_equal(np.array([(None,) * 10] * 10, dtype=np.float64), + np.full((10, 10), np.nan, dtype=np.float64)) + + assert_equal(np.array((1.0,) * 10, dtype=np.float64), + np.ones((10,), dtype=np.float64)) + assert_equal(np.array([(1.0,)] * 10, dtype=np.float64), + np.ones((10, 1), dtype=np.float64)) + assert_equal(np.array([(1.0,) * 10], dtype=np.float64), + np.ones((1, 10), dtype=np.float64)) + assert_equal(np.array([(1.0,) * 10] * 10, dtype=np.float64), + np.ones((10, 10), dtype=np.float64)) + + +def test_fastCopyAndTranspose(): + # 0D array + a = np.array(2) + b = np.fastCopyAndTranspose(a) + assert_equal(b, a.T) + assert_(b.flags.owndata) + + # 1D array + a = np.array([3, 2, 7, 0]) + b = np.fastCopyAndTranspose(a) + assert_equal(b, a.T) + assert_(b.flags.owndata) + + # 2D array + a = np.arange(6).reshape(2, 3) + b = np.fastCopyAndTranspose(a) + assert_equal(b, a.T) + assert_(b.flags.owndata) + +def test_array_astype(): + a = np.arange(6, dtype='f4').reshape(2, 3) + # Default behavior: allows unsafe casts, keeps memory layout, + # always copies. + b = a.astype('i4') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('i4')) + assert_equal(a.strides, b.strides) + b = a.T.astype('i4') + assert_equal(a.T, b) + assert_equal(b.dtype, np.dtype('i4')) + assert_equal(a.T.strides, b.strides) + b = a.astype('f4') + assert_equal(a, b) + assert_(not (a is b)) + + # copy=False parameter can sometimes skip a copy + b = a.astype('f4', copy=False) + assert_(a is b) + + # order parameter allows overriding of the memory layout, + # forcing a copy if the layout is wrong + b = a.astype('f4', order='F', copy=False) + assert_equal(a, b) + assert_(not (a is b)) + assert_(b.flags.f_contiguous) + + b = a.astype('f4', order='C', copy=False) + assert_equal(a, b) + assert_(a is b) + assert_(b.flags.c_contiguous) + + # casting parameter allows catching bad casts + b = a.astype('c8', casting='safe') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('c8')) + + assert_raises(TypeError, a.astype, 'i4', casting='safe') + + # subok=False passes through a non-subclassed array + b = a.astype('f4', subok=0, copy=False) + assert_(a is b) + + class MyNDArray(np.ndarray): + pass + + a = np.array([[0, 1, 2], [3, 4, 5]], dtype='f4').view(MyNDArray) + + # subok=True passes through a subclass + b = a.astype('f4', subok=True, copy=False) + assert_(a is b) + + # subok=True is default, and creates a subtype on a cast + b = a.astype('i4', copy=False) + assert_equal(a, b) + assert_equal(type(b), MyNDArray) + + # subok=False never returns a subclass + b = a.astype('f4', subok=False, copy=False) + assert_equal(a, b) + assert_(not (a is b)) + assert_(type(b) is not MyNDArray) + + # Make sure converting from string object to fixed length string + # does not truncate. + a = np.array([b'a'*100], dtype='O') + b = a.astype('S') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('S100')) + a = np.array([u'a'*100], dtype='O') + b = a.astype('U') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('U100')) + + # Same test as above but for strings shorter than 64 characters + a = np.array([b'a'*10], dtype='O') + b = a.astype('S') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('S10')) + a = np.array([u'a'*10], dtype='O') + b = a.astype('U') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('U10')) + + a = np.array(123456789012345678901234567890, dtype='O').astype('S') + assert_array_equal(a, np.array(b'1234567890' * 3, dtype='S30')) + a = np.array(123456789012345678901234567890, dtype='O').astype('U') + assert_array_equal(a, np.array(u'1234567890' * 3, dtype='U30')) + + a = np.array([123456789012345678901234567890], dtype='O').astype('S') + assert_array_equal(a, np.array(b'1234567890' * 3, dtype='S30')) + a = np.array([123456789012345678901234567890], dtype='O').astype('U') + assert_array_equal(a, np.array(u'1234567890' * 3, dtype='U30')) + + a = np.array(123456789012345678901234567890, dtype='S') + assert_array_equal(a, np.array(b'1234567890' * 3, dtype='S30')) + a = np.array(123456789012345678901234567890, dtype='U') + assert_array_equal(a, np.array(u'1234567890' * 3, dtype='U30')) + + a = np.array(u'a\u0140', dtype='U') + b = np.ndarray(buffer=a, dtype='uint32', shape=2) + assert_(b.size == 2) + + a = np.array([1000], dtype='i4') + assert_raises(TypeError, a.astype, 'S1', casting='safe') + + a = np.array(1000, dtype='i4') + assert_raises(TypeError, a.astype, 'U1', casting='safe') + +def test_copyto_fromscalar(): + a = np.arange(6, dtype='f4').reshape(2, 3) + + # Simple copy + np.copyto(a, 1.5) + assert_equal(a, 1.5) + np.copyto(a.T, 2.5) + assert_equal(a, 2.5) + + # Where-masked copy + mask = np.array([[0, 1, 0], [0, 0, 1]], dtype='?') + np.copyto(a, 3.5, where=mask) + assert_equal(a, [[2.5, 3.5, 2.5], [2.5, 2.5, 3.5]]) + mask = np.array([[0, 1], [1, 1], [1, 0]], dtype='?') + np.copyto(a.T, 4.5, where=mask) + assert_equal(a, [[2.5, 4.5, 4.5], [4.5, 4.5, 3.5]]) + +def test_copyto(): + a = np.arange(6, dtype='i4').reshape(2, 3) + + # Simple copy + np.copyto(a, [[3, 1, 5], [6, 2, 1]]) + assert_equal(a, [[3, 1, 5], [6, 2, 1]]) + + # Overlapping copy should work + np.copyto(a[:, :2], a[::-1, 1::-1]) + assert_equal(a, [[2, 6, 5], [1, 3, 1]]) + + # Defaults to 'same_kind' casting + assert_raises(TypeError, np.copyto, a, 1.5) + + # Force a copy with 'unsafe' casting, truncating 1.5 to 1 + np.copyto(a, 1.5, casting='unsafe') + assert_equal(a, 1) + + # Copying with a mask + np.copyto(a, 3, where=[True, False, True]) + assert_equal(a, [[3, 1, 3], [3, 1, 3]]) + + # Casting rule still applies with a mask + assert_raises(TypeError, np.copyto, a, 3.5, where=[True, False, True]) + + # Lists of integer 0's and 1's is ok too + np.copyto(a, 4.0, casting='unsafe', where=[[0, 1, 1], [1, 0, 0]]) + assert_equal(a, [[3, 4, 4], [4, 1, 3]]) + + # Overlapping copy with mask should work + np.copyto(a[:, :2], a[::-1, 1::-1], where=[[0, 1], [1, 1]]) + assert_equal(a, [[3, 4, 4], [4, 3, 3]]) + + # 'dst' must be an array + assert_raises(TypeError, np.copyto, [1, 2, 3], [2, 3, 4]) + +def test_copyto_permut(): + # test explicit overflow case + pad = 500 + l = [True] * pad + [True, True, True, True] + r = np.zeros(len(l)-pad) + d = np.ones(len(l)-pad) + mask = np.array(l)[pad:] + np.copyto(r, d, where=mask[::-1]) + + # test all permutation of possible masks, 9 should be sufficient for + # current 4 byte unrolled code + power = 9 + d = np.ones(power) + for i in range(2**power): + r = np.zeros(power) + l = [(i & x) != 0 for x in range(power)] + mask = np.array(l) + np.copyto(r, d, where=mask) + assert_array_equal(r == 1, l) + assert_equal(r.sum(), sum(l)) + + r = np.zeros(power) + np.copyto(r, d, where=mask[::-1]) + assert_array_equal(r == 1, l[::-1]) + assert_equal(r.sum(), sum(l)) + + r = np.zeros(power) + np.copyto(r[::2], d[::2], where=mask[::2]) + assert_array_equal(r[::2] == 1, l[::2]) + assert_equal(r[::2].sum(), sum(l[::2])) + + r = np.zeros(power) + np.copyto(r[::2], d[::2], where=mask[::-2]) + assert_array_equal(r[::2] == 1, l[::-2]) + assert_equal(r[::2].sum(), sum(l[::-2])) + + for c in [0xFF, 0x7F, 0x02, 0x10]: + r = np.zeros(power) + mask = np.array(l) + imask = np.array(l).view(np.uint8) + imask[mask != 0] = c + np.copyto(r, d, where=mask) + assert_array_equal(r == 1, l) + assert_equal(r.sum(), sum(l)) + + r = np.zeros(power) + np.copyto(r, d, where=True) + assert_equal(r.sum(), r.size) + r = np.ones(power) + d = np.zeros(power) + np.copyto(r, d, where=False) + assert_equal(r.sum(), r.size) + +def test_copy_order(): + a = np.arange(24).reshape(2, 1, 3, 4) + b = a.copy(order='F') + c = np.arange(24).reshape(2, 1, 4, 3).swapaxes(2, 3) + + def check_copy_result(x, y, ccontig, fcontig, strides=False): + assert_(not (x is y)) + assert_equal(x, y) + assert_equal(res.flags.c_contiguous, ccontig) + assert_equal(res.flags.f_contiguous, fcontig) + # This check is impossible only because + # NPY_RELAXED_STRIDES_CHECKING changes the strides actively + if not NPY_RELAXED_STRIDES_CHECKING: + if strides: + assert_equal(x.strides, y.strides) + else: + assert_(x.strides != y.strides) + + # Validate the initial state of a, b, and c + assert_(a.flags.c_contiguous) + assert_(not a.flags.f_contiguous) + assert_(not b.flags.c_contiguous) + assert_(b.flags.f_contiguous) + assert_(not c.flags.c_contiguous) + assert_(not c.flags.f_contiguous) + + # Copy with order='C' + res = a.copy(order='C') + check_copy_result(res, a, ccontig=True, fcontig=False, strides=True) + res = b.copy(order='C') + check_copy_result(res, b, ccontig=True, fcontig=False, strides=False) + res = c.copy(order='C') + check_copy_result(res, c, ccontig=True, fcontig=False, strides=False) + res = np.copy(a, order='C') + check_copy_result(res, a, ccontig=True, fcontig=False, strides=True) + res = np.copy(b, order='C') + check_copy_result(res, b, ccontig=True, fcontig=False, strides=False) + res = np.copy(c, order='C') + check_copy_result(res, c, ccontig=True, fcontig=False, strides=False) + + # Copy with order='F' + res = a.copy(order='F') + check_copy_result(res, a, ccontig=False, fcontig=True, strides=False) + res = b.copy(order='F') + check_copy_result(res, b, ccontig=False, fcontig=True, strides=True) + res = c.copy(order='F') + check_copy_result(res, c, ccontig=False, fcontig=True, strides=False) + res = np.copy(a, order='F') + check_copy_result(res, a, ccontig=False, fcontig=True, strides=False) + res = np.copy(b, order='F') + check_copy_result(res, b, ccontig=False, fcontig=True, strides=True) + res = np.copy(c, order='F') + check_copy_result(res, c, ccontig=False, fcontig=True, strides=False) + + # Copy with order='K' + res = a.copy(order='K') + check_copy_result(res, a, ccontig=True, fcontig=False, strides=True) + res = b.copy(order='K') + check_copy_result(res, b, ccontig=False, fcontig=True, strides=True) + res = c.copy(order='K') + check_copy_result(res, c, ccontig=False, fcontig=False, strides=True) + res = np.copy(a, order='K') + check_copy_result(res, a, ccontig=True, fcontig=False, strides=True) + res = np.copy(b, order='K') + check_copy_result(res, b, ccontig=False, fcontig=True, strides=True) + res = np.copy(c, order='K') + check_copy_result(res, c, ccontig=False, fcontig=False, strides=True) + +def test_contiguous_flags(): + a = np.ones((4, 4, 1))[::2,:,:] + if NPY_RELAXED_STRIDES_CHECKING: + a.strides = a.strides[:2] + (-123,) + b = np.ones((2, 2, 1, 2, 2)).swapaxes(3, 4) + + def check_contig(a, ccontig, fcontig): + assert_(a.flags.c_contiguous == ccontig) + assert_(a.flags.f_contiguous == fcontig) + + # Check if new arrays are correct: + check_contig(a, False, False) + check_contig(b, False, False) + if NPY_RELAXED_STRIDES_CHECKING: + check_contig(np.empty((2, 2, 0, 2, 2)), True, True) + check_contig(np.array([[[1], [2]]], order='F'), True, True) + else: + check_contig(np.empty((2, 2, 0, 2, 2)), True, False) + check_contig(np.array([[[1], [2]]], order='F'), False, True) + check_contig(np.empty((2, 2)), True, False) + check_contig(np.empty((2, 2), order='F'), False, True) + + # Check that np.array creates correct contiguous flags: + check_contig(np.array(a, copy=False), False, False) + check_contig(np.array(a, copy=False, order='C'), True, False) + check_contig(np.array(a, ndmin=4, copy=False, order='F'), False, True) + + if NPY_RELAXED_STRIDES_CHECKING: + # Check slicing update of flags and : + check_contig(a[0], True, True) + check_contig(a[None, ::4, ..., None], True, True) + check_contig(b[0, 0, ...], False, True) + check_contig(b[:,:, 0:0,:,:], True, True) + else: + # Check slicing update of flags: + check_contig(a[0], True, False) + # Would be nice if this was C-Contiguous: + check_contig(a[None, 0, ..., None], False, False) + check_contig(b[0, 0, 0, ...], False, True) + + # Test ravel and squeeze. + check_contig(a.ravel(), True, True) + check_contig(np.ones((1, 3, 1)).squeeze(), True, True) + +def test_broadcast_arrays(): + # Test user defined dtypes + a = np.array([(1, 2, 3)], dtype='u4,u4,u4') + b = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype='u4,u4,u4') + result = np.broadcast_arrays(a, b) + assert_equal(result[0], np.array([(1, 2, 3), (1, 2, 3), (1, 2, 3)], dtype='u4,u4,u4')) + assert_equal(result[1], np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], dtype='u4,u4,u4')) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_arrayprint.py b/contrib/python/numpy/py2/numpy/core/tests/test_arrayprint.py new file mode 100644 index 0000000000..f2b8fdca71 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_arrayprint.py @@ -0,0 +1,893 @@ +# -*- coding: utf-8 -*- +from __future__ import division, absolute_import, print_function + +import sys +import gc +import pytest + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_warns, HAS_REFCOUNT, + assert_raises_regex, + ) +import textwrap + +class TestArrayRepr(object): + def test_nan_inf(self): + x = np.array([np.nan, np.inf]) + assert_equal(repr(x), 'array([nan, inf])') + + def test_subclass(self): + class sub(np.ndarray): pass + + # one dimensional + x1d = np.array([1, 2]).view(sub) + assert_equal(repr(x1d), 'sub([1, 2])') + + # two dimensional + x2d = np.array([[1, 2], [3, 4]]).view(sub) + assert_equal(repr(x2d), + 'sub([[1, 2],\n' + ' [3, 4]])') + + # two dimensional with flexible dtype + xstruct = np.ones((2,2), dtype=[('a', '<i4')]).view(sub) + assert_equal(repr(xstruct), + "sub([[(1,), (1,)],\n" + " [(1,), (1,)]], dtype=[('a', '<i4')])" + ) + + @pytest.mark.xfail(reason="See gh-10544") + def test_object_subclass(self): + class sub(np.ndarray): + def __new__(cls, inp): + obj = np.asarray(inp).view(cls) + return obj + + def __getitem__(self, ind): + ret = super(sub, self).__getitem__(ind) + return sub(ret) + + # test that object + subclass is OK: + x = sub([None, None]) + assert_equal(repr(x), 'sub([None, None], dtype=object)') + assert_equal(str(x), '[None None]') + + x = sub([None, sub([None, None])]) + assert_equal(repr(x), + 'sub([None, sub([None, None], dtype=object)], dtype=object)') + assert_equal(str(x), '[None sub([None, None], dtype=object)]') + + def test_0d_object_subclass(self): + # make sure that subclasses which return 0ds instead + # of scalars don't cause infinite recursion in str + class sub(np.ndarray): + def __new__(cls, inp): + obj = np.asarray(inp).view(cls) + return obj + + def __getitem__(self, ind): + ret = super(sub, self).__getitem__(ind) + return sub(ret) + + x = sub(1) + assert_equal(repr(x), 'sub(1)') + assert_equal(str(x), '1') + + x = sub([1, 1]) + assert_equal(repr(x), 'sub([1, 1])') + assert_equal(str(x), '[1 1]') + + # check it works properly with object arrays too + x = sub(None) + assert_equal(repr(x), 'sub(None, dtype=object)') + assert_equal(str(x), 'None') + + # plus recursive object arrays (even depth > 1) + y = sub(None) + x[()] = y + y[()] = x + assert_equal(repr(x), + 'sub(sub(sub(..., dtype=object), dtype=object), dtype=object)') + assert_equal(str(x), '...') + x[()] = 0 # resolve circular references for garbage collector + + # nested 0d-subclass-object + x = sub(None) + x[()] = sub(None) + assert_equal(repr(x), 'sub(sub(None, dtype=object), dtype=object)') + assert_equal(str(x), 'None') + + # gh-10663 + class DuckCounter(np.ndarray): + def __getitem__(self, item): + result = super(DuckCounter, self).__getitem__(item) + if not isinstance(result, DuckCounter): + result = result[...].view(DuckCounter) + return result + + def to_string(self): + return {0: 'zero', 1: 'one', 2: 'two'}.get(self.item(), 'many') + + def __str__(self): + if self.shape == (): + return self.to_string() + else: + fmt = {'all': lambda x: x.to_string()} + return np.array2string(self, formatter=fmt) + + dc = np.arange(5).view(DuckCounter) + assert_equal(str(dc), "[zero one two many many]") + assert_equal(str(dc[0]), "zero") + + def test_self_containing(self): + arr0d = np.array(None) + arr0d[()] = arr0d + assert_equal(repr(arr0d), + 'array(array(..., dtype=object), dtype=object)') + arr0d[()] = 0 # resolve recursion for garbage collector + + arr1d = np.array([None, None]) + arr1d[1] = arr1d + assert_equal(repr(arr1d), + 'array([None, array(..., dtype=object)], dtype=object)') + arr1d[1] = 0 # resolve recursion for garbage collector + + first = np.array(None) + second = np.array(None) + first[()] = second + second[()] = first + assert_equal(repr(first), + 'array(array(array(..., dtype=object), dtype=object), dtype=object)') + first[()] = 0 # resolve circular references for garbage collector + + def test_containing_list(self): + # printing square brackets directly would be ambiguuous + arr1d = np.array([None, None]) + arr1d[0] = [1, 2] + arr1d[1] = [3] + assert_equal(repr(arr1d), + 'array([list([1, 2]), list([3])], dtype=object)') + + def test_void_scalar_recursion(self): + # gh-9345 + repr(np.void(b'test')) # RecursionError ? + + def test_fieldless_structured(self): + # gh-10366 + no_fields = np.dtype([]) + arr_no_fields = np.empty(4, dtype=no_fields) + assert_equal(repr(arr_no_fields), 'array([(), (), (), ()], dtype=[])') + + +class TestComplexArray(object): + def test_str(self): + rvals = [0, 1, -1, np.inf, -np.inf, np.nan] + cvals = [complex(rp, ip) for rp in rvals for ip in rvals] + dtypes = [np.complex64, np.cdouble, np.clongdouble] + actual = [str(np.array([c], dt)) for c in cvals for dt in dtypes] + wanted = [ + '[0.+0.j]', '[0.+0.j]', '[0.+0.j]', + '[0.+1.j]', '[0.+1.j]', '[0.+1.j]', + '[0.-1.j]', '[0.-1.j]', '[0.-1.j]', + '[0.+infj]', '[0.+infj]', '[0.+infj]', + '[0.-infj]', '[0.-infj]', '[0.-infj]', + '[0.+nanj]', '[0.+nanj]', '[0.+nanj]', + '[1.+0.j]', '[1.+0.j]', '[1.+0.j]', + '[1.+1.j]', '[1.+1.j]', '[1.+1.j]', + '[1.-1.j]', '[1.-1.j]', '[1.-1.j]', + '[1.+infj]', '[1.+infj]', '[1.+infj]', + '[1.-infj]', '[1.-infj]', '[1.-infj]', + '[1.+nanj]', '[1.+nanj]', '[1.+nanj]', + '[-1.+0.j]', '[-1.+0.j]', '[-1.+0.j]', + '[-1.+1.j]', '[-1.+1.j]', '[-1.+1.j]', + '[-1.-1.j]', '[-1.-1.j]', '[-1.-1.j]', + '[-1.+infj]', '[-1.+infj]', '[-1.+infj]', + '[-1.-infj]', '[-1.-infj]', '[-1.-infj]', + '[-1.+nanj]', '[-1.+nanj]', '[-1.+nanj]', + '[inf+0.j]', '[inf+0.j]', '[inf+0.j]', + '[inf+1.j]', '[inf+1.j]', '[inf+1.j]', + '[inf-1.j]', '[inf-1.j]', '[inf-1.j]', + '[inf+infj]', '[inf+infj]', '[inf+infj]', + '[inf-infj]', '[inf-infj]', '[inf-infj]', + '[inf+nanj]', '[inf+nanj]', '[inf+nanj]', + '[-inf+0.j]', '[-inf+0.j]', '[-inf+0.j]', + '[-inf+1.j]', '[-inf+1.j]', '[-inf+1.j]', + '[-inf-1.j]', '[-inf-1.j]', '[-inf-1.j]', + '[-inf+infj]', '[-inf+infj]', '[-inf+infj]', + '[-inf-infj]', '[-inf-infj]', '[-inf-infj]', + '[-inf+nanj]', '[-inf+nanj]', '[-inf+nanj]', + '[nan+0.j]', '[nan+0.j]', '[nan+0.j]', + '[nan+1.j]', '[nan+1.j]', '[nan+1.j]', + '[nan-1.j]', '[nan-1.j]', '[nan-1.j]', + '[nan+infj]', '[nan+infj]', '[nan+infj]', + '[nan-infj]', '[nan-infj]', '[nan-infj]', + '[nan+nanj]', '[nan+nanj]', '[nan+nanj]'] + + for res, val in zip(actual, wanted): + assert_equal(res, val) + +class TestArray2String(object): + def test_basic(self): + """Basic test of array2string.""" + a = np.arange(3) + assert_(np.array2string(a) == '[0 1 2]') + assert_(np.array2string(a, max_line_width=4, legacy='1.13') == '[0 1\n 2]') + assert_(np.array2string(a, max_line_width=4) == '[0\n 1\n 2]') + + def test_unexpected_kwarg(self): + # ensure than an appropriate TypeError + # is raised when array2string receives + # an unexpected kwarg + + with assert_raises_regex(TypeError, 'nonsense'): + np.array2string(np.array([1, 2, 3]), + nonsense=None) + + def test_format_function(self): + """Test custom format function for each element in array.""" + def _format_function(x): + if np.abs(x) < 1: + return '.' + elif np.abs(x) < 2: + return 'o' + else: + return 'O' + + x = np.arange(3) + if sys.version_info[0] >= 3: + x_hex = "[0x0 0x1 0x2]" + x_oct = "[0o0 0o1 0o2]" + else: + x_hex = "[0x0L 0x1L 0x2L]" + x_oct = "[0L 01L 02L]" + assert_(np.array2string(x, formatter={'all':_format_function}) == + "[. o O]") + assert_(np.array2string(x, formatter={'int_kind':_format_function}) == + "[. o O]") + assert_(np.array2string(x, formatter={'all':lambda x: "%.4f" % x}) == + "[0.0000 1.0000 2.0000]") + assert_equal(np.array2string(x, formatter={'int':lambda x: hex(x)}), + x_hex) + assert_equal(np.array2string(x, formatter={'int':lambda x: oct(x)}), + x_oct) + + x = np.arange(3.) + assert_(np.array2string(x, formatter={'float_kind':lambda x: "%.2f" % x}) == + "[0.00 1.00 2.00]") + assert_(np.array2string(x, formatter={'float':lambda x: "%.2f" % x}) == + "[0.00 1.00 2.00]") + + s = np.array(['abc', 'def']) + assert_(np.array2string(s, formatter={'numpystr':lambda s: s*2}) == + '[abcabc defdef]') + + # check for backcompat that using FloatFormat works and emits warning + with assert_warns(DeprecationWarning): + fmt = np.core.arrayprint.FloatFormat(x, 9, 'maxprec', False) + assert_equal(np.array2string(x, formatter={'float_kind': fmt}), + '[0. 1. 2.]') + + def test_structure_format(self): + dt = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))]) + x = np.array([('Sarah', (8.0, 7.0)), ('John', (6.0, 7.0))], dtype=dt) + assert_equal(np.array2string(x), + "[('Sarah', [8., 7.]) ('John', [6., 7.])]") + + np.set_printoptions(legacy='1.13') + try: + # for issue #5692 + A = np.zeros(shape=10, dtype=[("A", "M8[s]")]) + A[5:].fill(np.datetime64('NaT')) + assert_equal( + np.array2string(A), + textwrap.dedent("""\ + [('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) + ('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) ('NaT',) ('NaT',) + ('NaT',) ('NaT',) ('NaT',)]""") + ) + finally: + np.set_printoptions(legacy=False) + + # same again, but with non-legacy behavior + assert_equal( + np.array2string(A), + textwrap.dedent("""\ + [('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) + ('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) + ('1970-01-01T00:00:00',) ( 'NaT',) + ( 'NaT',) ( 'NaT',) + ( 'NaT',) ( 'NaT',)]""") + ) + + # and again, with timedeltas + A = np.full(10, 123456, dtype=[("A", "m8[s]")]) + A[5:].fill(np.datetime64('NaT')) + assert_equal( + np.array2string(A), + textwrap.dedent("""\ + [(123456,) (123456,) (123456,) (123456,) (123456,) ( 'NaT',) ( 'NaT',) + ( 'NaT',) ( 'NaT',) ( 'NaT',)]""") + ) + + # See #8160 + struct_int = np.array([([1, -1],), ([123, 1],)], dtype=[('B', 'i4', 2)]) + assert_equal(np.array2string(struct_int), + "[([ 1, -1],) ([123, 1],)]") + struct_2dint = np.array([([[0, 1], [2, 3]],), ([[12, 0], [0, 0]],)], + dtype=[('B', 'i4', (2, 2))]) + assert_equal(np.array2string(struct_2dint), + "[([[ 0, 1], [ 2, 3]],) ([[12, 0], [ 0, 0]],)]") + + # See #8172 + array_scalar = np.array( + (1., 2.1234567890123456789, 3.), dtype=('f8,f8,f8')) + assert_equal(np.array2string(array_scalar), "(1., 2.12345679, 3.)") + + def test_unstructured_void_repr(self): + a = np.array([27, 91, 50, 75, 7, 65, 10, 8, + 27, 91, 51, 49,109, 82,101,100], dtype='u1').view('V8') + assert_equal(repr(a[0]), r"void(b'\x1B\x5B\x32\x4B\x07\x41\x0A\x08')") + assert_equal(str(a[0]), r"b'\x1B\x5B\x32\x4B\x07\x41\x0A\x08'") + assert_equal(repr(a), + r"array([b'\x1B\x5B\x32\x4B\x07\x41\x0A\x08'," "\n" + r" b'\x1B\x5B\x33\x31\x6D\x52\x65\x64'], dtype='|V8')") + + assert_equal(eval(repr(a), vars(np)), a) + assert_equal(eval(repr(a[0]), vars(np)), a[0]) + + def test_edgeitems_kwarg(self): + # previously the global print options would be taken over the kwarg + arr = np.zeros(3, int) + assert_equal( + np.array2string(arr, edgeitems=1, threshold=0), + "[0 ... 0]" + ) + + def test_summarize_1d(self): + A = np.arange(1001) + strA = '[ 0 1 2 ... 998 999 1000]' + assert_equal(str(A), strA) + + reprA = 'array([ 0, 1, 2, ..., 998, 999, 1000])' + assert_equal(repr(A), reprA) + + def test_summarize_2d(self): + A = np.arange(1002).reshape(2, 501) + strA = '[[ 0 1 2 ... 498 499 500]\n' \ + ' [ 501 502 503 ... 999 1000 1001]]' + assert_equal(str(A), strA) + + reprA = 'array([[ 0, 1, 2, ..., 498, 499, 500],\n' \ + ' [ 501, 502, 503, ..., 999, 1000, 1001]])' + assert_equal(repr(A), reprA) + + def test_linewidth(self): + a = np.full(6, 1) + + def make_str(a, width, **kw): + return np.array2string(a, separator="", max_line_width=width, **kw) + + assert_equal(make_str(a, 8, legacy='1.13'), '[111111]') + assert_equal(make_str(a, 7, legacy='1.13'), '[111111]') + assert_equal(make_str(a, 5, legacy='1.13'), '[1111\n' + ' 11]') + + assert_equal(make_str(a, 8), '[111111]') + assert_equal(make_str(a, 7), '[11111\n' + ' 1]') + assert_equal(make_str(a, 5), '[111\n' + ' 111]') + + b = a[None,None,:] + + assert_equal(make_str(b, 12, legacy='1.13'), '[[[111111]]]') + assert_equal(make_str(b, 9, legacy='1.13'), '[[[111111]]]') + assert_equal(make_str(b, 8, legacy='1.13'), '[[[11111\n' + ' 1]]]') + + assert_equal(make_str(b, 12), '[[[111111]]]') + assert_equal(make_str(b, 9), '[[[111\n' + ' 111]]]') + assert_equal(make_str(b, 8), '[[[11\n' + ' 11\n' + ' 11]]]') + + def test_wide_element(self): + a = np.array(['xxxxx']) + assert_equal( + np.array2string(a, max_line_width=5), + "['xxxxx']" + ) + assert_equal( + np.array2string(a, max_line_width=5, legacy='1.13'), + "[ 'xxxxx']" + ) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_refcount(self): + # make sure we do not hold references to the array due to a recursive + # closure (gh-10620) + gc.disable() + a = np.arange(2) + r1 = sys.getrefcount(a) + np.array2string(a) + np.array2string(a) + r2 = sys.getrefcount(a) + gc.collect() + gc.enable() + assert_(r1 == r2) + +class TestPrintOptions(object): + """Test getting and setting global print options.""" + + def setup(self): + self.oldopts = np.get_printoptions() + + def teardown(self): + np.set_printoptions(**self.oldopts) + + def test_basic(self): + x = np.array([1.5, 0, 1.234567890]) + assert_equal(repr(x), "array([1.5 , 0. , 1.23456789])") + np.set_printoptions(precision=4) + assert_equal(repr(x), "array([1.5 , 0. , 1.2346])") + + def test_precision_zero(self): + np.set_printoptions(precision=0) + for values, string in ( + ([0.], "0."), ([.3], "0."), ([-.3], "-0."), ([.7], "1."), + ([1.5], "2."), ([-1.5], "-2."), ([-15.34], "-15."), + ([100.], "100."), ([.2, -1, 122.51], " 0., -1., 123."), + ([0], "0"), ([-12], "-12"), ([complex(.3, -.7)], "0.-1.j")): + x = np.array(values) + assert_equal(repr(x), "array([%s])" % string) + + def test_formatter(self): + x = np.arange(3) + np.set_printoptions(formatter={'all':lambda x: str(x-1)}) + assert_equal(repr(x), "array([-1, 0, 1])") + + def test_formatter_reset(self): + x = np.arange(3) + np.set_printoptions(formatter={'all':lambda x: str(x-1)}) + assert_equal(repr(x), "array([-1, 0, 1])") + np.set_printoptions(formatter={'int':None}) + assert_equal(repr(x), "array([0, 1, 2])") + + np.set_printoptions(formatter={'all':lambda x: str(x-1)}) + assert_equal(repr(x), "array([-1, 0, 1])") + np.set_printoptions(formatter={'all':None}) + assert_equal(repr(x), "array([0, 1, 2])") + + np.set_printoptions(formatter={'int':lambda x: str(x-1)}) + assert_equal(repr(x), "array([-1, 0, 1])") + np.set_printoptions(formatter={'int_kind':None}) + assert_equal(repr(x), "array([0, 1, 2])") + + x = np.arange(3.) + np.set_printoptions(formatter={'float':lambda x: str(x-1)}) + assert_equal(repr(x), "array([-1.0, 0.0, 1.0])") + np.set_printoptions(formatter={'float_kind':None}) + assert_equal(repr(x), "array([0., 1., 2.])") + + def test_0d_arrays(self): + unicode = type(u'') + + assert_equal(unicode(np.array(u'café', '<U4')), u'café') + + if sys.version_info[0] >= 3: + assert_equal(repr(np.array('café', '<U4')), + "array('café', dtype='<U4')") + else: + assert_equal(repr(np.array(u'café', '<U4')), + "array(u'caf\\xe9', dtype='<U4')") + assert_equal(str(np.array('test', np.str_)), 'test') + + a = np.zeros(1, dtype=[('a', '<i4', (3,))]) + assert_equal(str(a[0]), '([0, 0, 0],)') + + assert_equal(repr(np.datetime64('2005-02-25')[...]), + "array('2005-02-25', dtype='datetime64[D]')") + + assert_equal(repr(np.timedelta64('10', 'Y')[...]), + "array(10, dtype='timedelta64[Y]')") + + # repr of 0d arrays is affected by printoptions + x = np.array(1) + np.set_printoptions(formatter={'all':lambda x: "test"}) + assert_equal(repr(x), "array(test)") + # str is unaffected + assert_equal(str(x), "1") + + # check `style` arg raises + assert_warns(DeprecationWarning, np.array2string, + np.array(1.), style=repr) + # but not in legacy mode + np.array2string(np.array(1.), style=repr, legacy='1.13') + # gh-10934 style was broken in legacy mode, check it works + np.array2string(np.array(1.), legacy='1.13') + + def test_float_spacing(self): + x = np.array([1., 2., 3.]) + y = np.array([1., 2., -10.]) + z = np.array([100., 2., -1.]) + w = np.array([-100., 2., 1.]) + + assert_equal(repr(x), 'array([1., 2., 3.])') + assert_equal(repr(y), 'array([ 1., 2., -10.])') + assert_equal(repr(np.array(y[0])), 'array(1.)') + assert_equal(repr(np.array(y[-1])), 'array(-10.)') + assert_equal(repr(z), 'array([100., 2., -1.])') + assert_equal(repr(w), 'array([-100., 2., 1.])') + + assert_equal(repr(np.array([np.nan, np.inf])), 'array([nan, inf])') + assert_equal(repr(np.array([np.nan, -np.inf])), 'array([ nan, -inf])') + + x = np.array([np.inf, 100000, 1.1234]) + y = np.array([np.inf, 100000, -1.1234]) + z = np.array([np.inf, 1.1234, -1e120]) + np.set_printoptions(precision=2) + assert_equal(repr(x), 'array([ inf, 1.00e+05, 1.12e+00])') + assert_equal(repr(y), 'array([ inf, 1.00e+05, -1.12e+00])') + assert_equal(repr(z), 'array([ inf, 1.12e+000, -1.00e+120])') + + def test_bool_spacing(self): + assert_equal(repr(np.array([True, True])), + 'array([ True, True])') + assert_equal(repr(np.array([True, False])), + 'array([ True, False])') + assert_equal(repr(np.array([True])), + 'array([ True])') + assert_equal(repr(np.array(True)), + 'array(True)') + assert_equal(repr(np.array(False)), + 'array(False)') + + def test_sign_spacing(self): + a = np.arange(4.) + b = np.array([1.234e9]) + c = np.array([1.0 + 1.0j, 1.123456789 + 1.123456789j], dtype='c16') + + assert_equal(repr(a), 'array([0., 1., 2., 3.])') + assert_equal(repr(np.array(1.)), 'array(1.)') + assert_equal(repr(b), 'array([1.234e+09])') + assert_equal(repr(np.array([0.])), 'array([0.])') + assert_equal(repr(c), + "array([1. +1.j , 1.12345679+1.12345679j])") + assert_equal(repr(np.array([0., -0.])), 'array([ 0., -0.])') + + np.set_printoptions(sign=' ') + assert_equal(repr(a), 'array([ 0., 1., 2., 3.])') + assert_equal(repr(np.array(1.)), 'array( 1.)') + assert_equal(repr(b), 'array([ 1.234e+09])') + assert_equal(repr(c), + "array([ 1. +1.j , 1.12345679+1.12345679j])") + assert_equal(repr(np.array([0., -0.])), 'array([ 0., -0.])') + + np.set_printoptions(sign='+') + assert_equal(repr(a), 'array([+0., +1., +2., +3.])') + assert_equal(repr(np.array(1.)), 'array(+1.)') + assert_equal(repr(b), 'array([+1.234e+09])') + assert_equal(repr(c), + "array([+1. +1.j , +1.12345679+1.12345679j])") + + np.set_printoptions(legacy='1.13') + assert_equal(repr(a), 'array([ 0., 1., 2., 3.])') + assert_equal(repr(b), 'array([ 1.23400000e+09])') + assert_equal(repr(-b), 'array([ -1.23400000e+09])') + assert_equal(repr(np.array(1.)), 'array(1.0)') + assert_equal(repr(np.array([0.])), 'array([ 0.])') + assert_equal(repr(c), + "array([ 1.00000000+1.j , 1.12345679+1.12345679j])") + # gh-10383 + assert_equal(str(np.array([-1., 10])), "[ -1. 10.]") + + assert_raises(TypeError, np.set_printoptions, wrongarg=True) + + def test_float_overflow_nowarn(self): + # make sure internal computations in FloatingFormat don't + # warn about overflow + repr(np.array([1e4, 0.1], dtype='f2')) + + def test_sign_spacing_structured(self): + a = np.ones(2, dtype='<f,<f') + assert_equal(repr(a), + "array([(1., 1.), (1., 1.)], dtype=[('f0', '<f4'), ('f1', '<f4')])") + assert_equal(repr(a[0]), "(1., 1.)") + + def test_floatmode(self): + x = np.array([0.6104, 0.922, 0.457, 0.0906, 0.3733, 0.007244, + 0.5933, 0.947, 0.2383, 0.4226], dtype=np.float16) + y = np.array([0.2918820979355541, 0.5064172631089138, + 0.2848750619642916, 0.4342965294660567, + 0.7326538397312751, 0.3459503329096204, + 0.0862072768214508, 0.39112753029631175], + dtype=np.float64) + z = np.arange(6, dtype=np.float16)/10 + c = np.array([1.0 + 1.0j, 1.123456789 + 1.123456789j], dtype='c16') + + # also make sure 1e23 is right (is between two fp numbers) + w = np.array(['1e{}'.format(i) for i in range(25)], dtype=np.float64) + # note: we construct w from the strings `1eXX` instead of doing + # `10.**arange(24)` because it turns out the two are not equivalent in + # python. On some architectures `1e23 != 10.**23`. + wp = np.array([1.234e1, 1e2, 1e123]) + + # unique mode + np.set_printoptions(floatmode='unique') + assert_equal(repr(x), + "array([0.6104 , 0.922 , 0.457 , 0.0906 , 0.3733 , 0.007244,\n" + " 0.5933 , 0.947 , 0.2383 , 0.4226 ], dtype=float16)") + assert_equal(repr(y), + "array([0.2918820979355541 , 0.5064172631089138 , 0.2848750619642916 ,\n" + " 0.4342965294660567 , 0.7326538397312751 , 0.3459503329096204 ,\n" + " 0.0862072768214508 , 0.39112753029631175])") + assert_equal(repr(z), + "array([0. , 0.1, 0.2, 0.3, 0.4, 0.5], dtype=float16)") + assert_equal(repr(w), + "array([1.e+00, 1.e+01, 1.e+02, 1.e+03, 1.e+04, 1.e+05, 1.e+06, 1.e+07,\n" + " 1.e+08, 1.e+09, 1.e+10, 1.e+11, 1.e+12, 1.e+13, 1.e+14, 1.e+15,\n" + " 1.e+16, 1.e+17, 1.e+18, 1.e+19, 1.e+20, 1.e+21, 1.e+22, 1.e+23,\n" + " 1.e+24])") + assert_equal(repr(wp), "array([1.234e+001, 1.000e+002, 1.000e+123])") + assert_equal(repr(c), + "array([1. +1.j , 1.123456789+1.123456789j])") + + # maxprec mode, precision=8 + np.set_printoptions(floatmode='maxprec', precision=8) + assert_equal(repr(x), + "array([0.6104 , 0.922 , 0.457 , 0.0906 , 0.3733 , 0.007244,\n" + " 0.5933 , 0.947 , 0.2383 , 0.4226 ], dtype=float16)") + assert_equal(repr(y), + "array([0.2918821 , 0.50641726, 0.28487506, 0.43429653, 0.73265384,\n" + " 0.34595033, 0.08620728, 0.39112753])") + assert_equal(repr(z), + "array([0. , 0.1, 0.2, 0.3, 0.4, 0.5], dtype=float16)") + assert_equal(repr(w[::5]), + "array([1.e+00, 1.e+05, 1.e+10, 1.e+15, 1.e+20])") + assert_equal(repr(wp), "array([1.234e+001, 1.000e+002, 1.000e+123])") + assert_equal(repr(c), + "array([1. +1.j , 1.12345679+1.12345679j])") + + # fixed mode, precision=4 + np.set_printoptions(floatmode='fixed', precision=4) + assert_equal(repr(x), + "array([0.6104, 0.9219, 0.4570, 0.0906, 0.3733, 0.0072, 0.5933, 0.9468,\n" + " 0.2383, 0.4226], dtype=float16)") + assert_equal(repr(y), + "array([0.2919, 0.5064, 0.2849, 0.4343, 0.7327, 0.3460, 0.0862, 0.3911])") + assert_equal(repr(z), + "array([0.0000, 0.1000, 0.2000, 0.3000, 0.3999, 0.5000], dtype=float16)") + assert_equal(repr(w[::5]), + "array([1.0000e+00, 1.0000e+05, 1.0000e+10, 1.0000e+15, 1.0000e+20])") + assert_equal(repr(wp), "array([1.2340e+001, 1.0000e+002, 1.0000e+123])") + assert_equal(repr(np.zeros(3)), "array([0.0000, 0.0000, 0.0000])") + assert_equal(repr(c), + "array([1.0000+1.0000j, 1.1235+1.1235j])") + # for larger precision, representation error becomes more apparent: + np.set_printoptions(floatmode='fixed', precision=8) + assert_equal(repr(z), + "array([0.00000000, 0.09997559, 0.19995117, 0.30004883, 0.39990234,\n" + " 0.50000000], dtype=float16)") + + # maxprec_equal mode, precision=8 + np.set_printoptions(floatmode='maxprec_equal', precision=8) + assert_equal(repr(x), + "array([0.610352, 0.921875, 0.457031, 0.090576, 0.373291, 0.007244,\n" + " 0.593262, 0.946777, 0.238281, 0.422607], dtype=float16)") + assert_equal(repr(y), + "array([0.29188210, 0.50641726, 0.28487506, 0.43429653, 0.73265384,\n" + " 0.34595033, 0.08620728, 0.39112753])") + assert_equal(repr(z), + "array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5], dtype=float16)") + assert_equal(repr(w[::5]), + "array([1.e+00, 1.e+05, 1.e+10, 1.e+15, 1.e+20])") + assert_equal(repr(wp), "array([1.234e+001, 1.000e+002, 1.000e+123])") + assert_equal(repr(c), + "array([1.00000000+1.00000000j, 1.12345679+1.12345679j])") + + def test_legacy_mode_scalars(self): + # in legacy mode, str of floats get truncated, and complex scalars + # use * for non-finite imaginary part + np.set_printoptions(legacy='1.13') + assert_equal(str(np.float64(1.123456789123456789)), '1.12345678912') + assert_equal(str(np.complex128(complex(1, np.nan))), '(1+nan*j)') + + np.set_printoptions(legacy=False) + assert_equal(str(np.float64(1.123456789123456789)), + '1.1234567891234568') + assert_equal(str(np.complex128(complex(1, np.nan))), '(1+nanj)') + + def test_legacy_stray_comma(self): + np.set_printoptions(legacy='1.13') + assert_equal(str(np.arange(10000)), '[ 0 1 2 ..., 9997 9998 9999]') + + np.set_printoptions(legacy=False) + assert_equal(str(np.arange(10000)), '[ 0 1 2 ... 9997 9998 9999]') + + def test_dtype_linewidth_wrapping(self): + np.set_printoptions(linewidth=75) + assert_equal(repr(np.arange(10,20., dtype='f4')), + "array([10., 11., 12., 13., 14., 15., 16., 17., 18., 19.], dtype=float32)") + assert_equal(repr(np.arange(10,23., dtype='f4')), textwrap.dedent("""\ + array([10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22.], + dtype=float32)""")) + + styp = '<U4' if sys.version_info[0] >= 3 else '|S4' + assert_equal(repr(np.ones(3, dtype=styp)), + "array(['1', '1', '1'], dtype='{}')".format(styp)) + assert_equal(repr(np.ones(12, dtype=styp)), textwrap.dedent("""\ + array(['1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1'], + dtype='{}')""".format(styp))) + + def test_linewidth_repr(self): + a = np.full(7, fill_value=2) + np.set_printoptions(linewidth=17) + assert_equal( + repr(a), + textwrap.dedent("""\ + array([2, 2, 2, + 2, 2, 2, + 2])""") + ) + np.set_printoptions(linewidth=17, legacy='1.13') + assert_equal( + repr(a), + textwrap.dedent("""\ + array([2, 2, 2, + 2, 2, 2, 2])""") + ) + + a = np.full(8, fill_value=2) + + np.set_printoptions(linewidth=18, legacy=False) + assert_equal( + repr(a), + textwrap.dedent("""\ + array([2, 2, 2, + 2, 2, 2, + 2, 2])""") + ) + + np.set_printoptions(linewidth=18, legacy='1.13') + assert_equal( + repr(a), + textwrap.dedent("""\ + array([2, 2, 2, 2, + 2, 2, 2, 2])""") + ) + + def test_linewidth_str(self): + a = np.full(18, fill_value=2) + np.set_printoptions(linewidth=18) + assert_equal( + str(a), + textwrap.dedent("""\ + [2 2 2 2 2 2 2 2 + 2 2 2 2 2 2 2 2 + 2 2]""") + ) + np.set_printoptions(linewidth=18, legacy='1.13') + assert_equal( + str(a), + textwrap.dedent("""\ + [2 2 2 2 2 2 2 2 2 + 2 2 2 2 2 2 2 2 2]""") + ) + + def test_edgeitems(self): + np.set_printoptions(edgeitems=1, threshold=1) + a = np.arange(27).reshape((3, 3, 3)) + assert_equal( + repr(a), + textwrap.dedent("""\ + array([[[ 0, ..., 2], + ..., + [ 6, ..., 8]], + + ..., + + [[18, ..., 20], + ..., + [24, ..., 26]]])""") + ) + + b = np.zeros((3, 3, 1, 1)) + assert_equal( + repr(b), + textwrap.dedent("""\ + array([[[[0.]], + + ..., + + [[0.]]], + + + ..., + + + [[[0.]], + + ..., + + [[0.]]]])""") + ) + + # 1.13 had extra trailing spaces, and was missing newlines + np.set_printoptions(legacy='1.13') + + assert_equal( + repr(a), + textwrap.dedent("""\ + array([[[ 0, ..., 2], + ..., + [ 6, ..., 8]], + + ..., + [[18, ..., 20], + ..., + [24, ..., 26]]])""") + ) + + assert_equal( + repr(b), + textwrap.dedent("""\ + array([[[[ 0.]], + + ..., + [[ 0.]]], + + + ..., + [[[ 0.]], + + ..., + [[ 0.]]]])""") + ) + + def test_bad_args(self): + assert_raises(ValueError, np.set_printoptions, threshold='nan') + assert_raises(ValueError, np.set_printoptions, threshold=u'1') + assert_raises(ValueError, np.set_printoptions, threshold=b'1') + +def test_unicode_object_array(): + import sys + if sys.version_info[0] >= 3: + expected = "array(['é'], dtype=object)" + else: + expected = "array([u'\\xe9'], dtype=object)" + x = np.array([u'\xe9'], dtype=object) + assert_equal(repr(x), expected) + + +class TestContextManager(object): + def test_ctx_mgr(self): + # test that context manager actuall works + with np.printoptions(precision=2): + s = str(np.array([2.0]) / 3) + assert_equal(s, '[0.67]') + + def test_ctx_mgr_restores(self): + # test that print options are actually restrored + opts = np.get_printoptions() + with np.printoptions(precision=opts['precision'] - 1, + linewidth=opts['linewidth'] - 4): + pass + assert_equal(np.get_printoptions(), opts) + + def test_ctx_mgr_exceptions(self): + # test that print options are restored even if an exception is raised + opts = np.get_printoptions() + try: + with np.printoptions(precision=2, linewidth=11): + raise ValueError + except ValueError: + pass + assert_equal(np.get_printoptions(), opts) + + def test_ctx_mgr_as_smth(self): + opts = {"precision": 2} + with np.printoptions(**opts) as ctx: + saved_opts = ctx.copy() + assert_equal({k: saved_opts[k] for k in opts}, opts) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_datetime.py b/contrib/python/numpy/py2/numpy/core/tests/test_datetime.py new file mode 100644 index 0000000000..170c52e9e3 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_datetime.py @@ -0,0 +1,2228 @@ +from __future__ import division, absolute_import, print_function + + +import numpy +import numpy as np +import datetime +import pytest +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_warns, suppress_warnings, + assert_raises_regex, + ) +from numpy.core.numeric import pickle + +# Use pytz to test out various time zones if available +try: + from pytz import timezone as tz + _has_pytz = True +except ImportError: + _has_pytz = False + +try: + RecursionError +except NameError: + RecursionError = RuntimeError # python < 3.5 + + +class TestDateTime(object): + def test_datetime_dtype_creation(self): + for unit in ['Y', 'M', 'W', 'D', + 'h', 'm', 's', 'ms', 'us', + 'ns', 'ps', 'fs', 'as']: + dt1 = np.dtype('M8[750%s]' % unit) + assert_(dt1 == np.dtype('datetime64[750%s]' % unit)) + dt2 = np.dtype('m8[%s]' % unit) + assert_(dt2 == np.dtype('timedelta64[%s]' % unit)) + + # Generic units shouldn't add [] to the end + assert_equal(str(np.dtype("M8")), "datetime64") + + # Should be possible to specify the endianness + assert_equal(np.dtype("=M8"), np.dtype("M8")) + assert_equal(np.dtype("=M8[s]"), np.dtype("M8[s]")) + assert_(np.dtype(">M8") == np.dtype("M8") or + np.dtype("<M8") == np.dtype("M8")) + assert_(np.dtype(">M8[D]") == np.dtype("M8[D]") or + np.dtype("<M8[D]") == np.dtype("M8[D]")) + assert_(np.dtype(">M8") != np.dtype("<M8")) + + assert_equal(np.dtype("=m8"), np.dtype("m8")) + assert_equal(np.dtype("=m8[s]"), np.dtype("m8[s]")) + assert_(np.dtype(">m8") == np.dtype("m8") or + np.dtype("<m8") == np.dtype("m8")) + assert_(np.dtype(">m8[D]") == np.dtype("m8[D]") or + np.dtype("<m8[D]") == np.dtype("m8[D]")) + assert_(np.dtype(">m8") != np.dtype("<m8")) + + # Check that the parser rejects bad datetime types + assert_raises(TypeError, np.dtype, 'M8[badunit]') + assert_raises(TypeError, np.dtype, 'm8[badunit]') + assert_raises(TypeError, np.dtype, 'M8[YY]') + assert_raises(TypeError, np.dtype, 'm8[YY]') + assert_raises(TypeError, np.dtype, 'm4') + assert_raises(TypeError, np.dtype, 'M7') + assert_raises(TypeError, np.dtype, 'm7') + assert_raises(TypeError, np.dtype, 'M16') + assert_raises(TypeError, np.dtype, 'm16') + + def test_datetime_casting_rules(self): + # Cannot cast safely/same_kind between timedelta and datetime + assert_(not np.can_cast('m8', 'M8', casting='same_kind')) + assert_(not np.can_cast('M8', 'm8', casting='same_kind')) + assert_(not np.can_cast('m8', 'M8', casting='safe')) + assert_(not np.can_cast('M8', 'm8', casting='safe')) + + # Can cast safely/same_kind from integer to timedelta + assert_(np.can_cast('i8', 'm8', casting='same_kind')) + assert_(np.can_cast('i8', 'm8', casting='safe')) + + # Cannot cast safely/same_kind from float to timedelta + assert_(not np.can_cast('f4', 'm8', casting='same_kind')) + assert_(not np.can_cast('f4', 'm8', casting='safe')) + + # Cannot cast safely/same_kind from integer to datetime + assert_(not np.can_cast('i8', 'M8', casting='same_kind')) + assert_(not np.can_cast('i8', 'M8', casting='safe')) + + # Cannot cast safely/same_kind from bool to datetime + assert_(not np.can_cast('b1', 'M8', casting='same_kind')) + assert_(not np.can_cast('b1', 'M8', casting='safe')) + # Can cast safely/same_kind from bool to timedelta + assert_(np.can_cast('b1', 'm8', casting='same_kind')) + assert_(np.can_cast('b1', 'm8', casting='safe')) + + # Can cast datetime safely from months/years to days + assert_(np.can_cast('M8[M]', 'M8[D]', casting='safe')) + assert_(np.can_cast('M8[Y]', 'M8[D]', casting='safe')) + # Cannot cast timedelta safely from months/years to days + assert_(not np.can_cast('m8[M]', 'm8[D]', casting='safe')) + assert_(not np.can_cast('m8[Y]', 'm8[D]', casting='safe')) + # Can cast datetime same_kind from months/years to days + assert_(np.can_cast('M8[M]', 'M8[D]', casting='same_kind')) + assert_(np.can_cast('M8[Y]', 'M8[D]', casting='same_kind')) + # Can't cast timedelta same_kind from months/years to days + assert_(not np.can_cast('m8[M]', 'm8[D]', casting='same_kind')) + assert_(not np.can_cast('m8[Y]', 'm8[D]', casting='same_kind')) + # Can cast datetime same_kind across the date/time boundary + assert_(np.can_cast('M8[D]', 'M8[h]', casting='same_kind')) + # Can cast timedelta same_kind across the date/time boundary + assert_(np.can_cast('m8[D]', 'm8[h]', casting='same_kind')) + assert_(np.can_cast('m8[h]', 'm8[D]', casting='same_kind')) + + # Cannot cast safely if the integer multiplier doesn't divide + assert_(not np.can_cast('M8[7h]', 'M8[3h]', casting='safe')) + assert_(not np.can_cast('M8[3h]', 'M8[6h]', casting='safe')) + # But can cast same_kind + assert_(np.can_cast('M8[7h]', 'M8[3h]', casting='same_kind')) + # Can cast safely if the integer multiplier does divide + assert_(np.can_cast('M8[6h]', 'M8[3h]', casting='safe')) + + # We can always cast types with generic units (corresponding to NaT) to + # more specific types + assert_(np.can_cast('m8', 'm8[h]', casting='same_kind')) + assert_(np.can_cast('m8', 'm8[h]', casting='safe')) + assert_(np.can_cast('M8', 'M8[h]', casting='same_kind')) + assert_(np.can_cast('M8', 'M8[h]', casting='safe')) + # but not the other way around + assert_(not np.can_cast('m8[h]', 'm8', casting='same_kind')) + assert_(not np.can_cast('m8[h]', 'm8', casting='safe')) + assert_(not np.can_cast('M8[h]', 'M8', casting='same_kind')) + assert_(not np.can_cast('M8[h]', 'M8', casting='safe')) + + def test_compare_generic_nat(self): + # regression tests for gh-6452 + assert_(np.datetime64('NaT') != + np.datetime64('2000') + np.timedelta64('NaT')) + assert_(np.datetime64('NaT') != np.datetime64('NaT', 'us')) + assert_(np.datetime64('NaT', 'us') != np.datetime64('NaT')) + + def test_datetime_scalar_construction(self): + # Construct with different units + assert_equal(np.datetime64('1950-03-12', 'D'), + np.datetime64('1950-03-12')) + assert_equal(np.datetime64('1950-03-12T13', 's'), + np.datetime64('1950-03-12T13', 'm')) + + # Default construction means NaT + assert_equal(np.datetime64(), np.datetime64('NaT')) + + # Some basic strings and repr + assert_equal(str(np.datetime64('NaT')), 'NaT') + assert_equal(repr(np.datetime64('NaT')), + "numpy.datetime64('NaT')") + assert_equal(str(np.datetime64('2011-02')), '2011-02') + assert_equal(repr(np.datetime64('2011-02')), + "numpy.datetime64('2011-02')") + + # None gets constructed as NaT + assert_equal(np.datetime64(None), np.datetime64('NaT')) + + # Default construction of NaT is in generic units + assert_equal(np.datetime64().dtype, np.dtype('M8')) + assert_equal(np.datetime64('NaT').dtype, np.dtype('M8')) + + # Construction from integers requires a specified unit + assert_raises(ValueError, np.datetime64, 17) + + # When constructing from a scalar or zero-dimensional array, + # it either keeps the units or you can override them. + a = np.datetime64('2000-03-18T16', 'h') + b = np.array('2000-03-18T16', dtype='M8[h]') + + assert_equal(a.dtype, np.dtype('M8[h]')) + assert_equal(b.dtype, np.dtype('M8[h]')) + + assert_equal(np.datetime64(a), a) + assert_equal(np.datetime64(a).dtype, np.dtype('M8[h]')) + + assert_equal(np.datetime64(b), a) + assert_equal(np.datetime64(b).dtype, np.dtype('M8[h]')) + + assert_equal(np.datetime64(a, 's'), a) + assert_equal(np.datetime64(a, 's').dtype, np.dtype('M8[s]')) + + assert_equal(np.datetime64(b, 's'), a) + assert_equal(np.datetime64(b, 's').dtype, np.dtype('M8[s]')) + + # Construction from datetime.date + assert_equal(np.datetime64('1945-03-25'), + np.datetime64(datetime.date(1945, 3, 25))) + assert_equal(np.datetime64('2045-03-25', 'D'), + np.datetime64(datetime.date(2045, 3, 25), 'D')) + # Construction from datetime.datetime + assert_equal(np.datetime64('1980-01-25T14:36:22.5'), + np.datetime64(datetime.datetime(1980, 1, 25, + 14, 36, 22, 500000))) + + # Construction with time units from a date is okay + assert_equal(np.datetime64('1920-03-13', 'h'), + np.datetime64('1920-03-13T00')) + assert_equal(np.datetime64('1920-03', 'm'), + np.datetime64('1920-03-01T00:00')) + assert_equal(np.datetime64('1920', 's'), + np.datetime64('1920-01-01T00:00:00')) + assert_equal(np.datetime64(datetime.date(2045, 3, 25), 'ms'), + np.datetime64('2045-03-25T00:00:00.000')) + + # Construction with date units from a datetime is also okay + assert_equal(np.datetime64('1920-03-13T18', 'D'), + np.datetime64('1920-03-13')) + assert_equal(np.datetime64('1920-03-13T18:33:12', 'M'), + np.datetime64('1920-03')) + assert_equal(np.datetime64('1920-03-13T18:33:12.5', 'Y'), + np.datetime64('1920')) + + def test_datetime_scalar_construction_timezone(self): + # verify that supplying an explicit timezone works, but is deprecated + with assert_warns(DeprecationWarning): + assert_equal(np.datetime64('2000-01-01T00Z'), + np.datetime64('2000-01-01T00')) + with assert_warns(DeprecationWarning): + assert_equal(np.datetime64('2000-01-01T00-08'), + np.datetime64('2000-01-01T08')) + + def test_datetime_array_find_type(self): + dt = np.datetime64('1970-01-01', 'M') + arr = np.array([dt]) + assert_equal(arr.dtype, np.dtype('M8[M]')) + + # at the moment, we don't automatically convert these to datetime64 + + dt = datetime.date(1970, 1, 1) + arr = np.array([dt]) + assert_equal(arr.dtype, np.dtype('O')) + + dt = datetime.datetime(1970, 1, 1, 12, 30, 40) + arr = np.array([dt]) + assert_equal(arr.dtype, np.dtype('O')) + + # find "supertype" for non-dates and dates + + b = np.bool_(True) + dm = np.datetime64('1970-01-01', 'M') + d = datetime.date(1970, 1, 1) + dt = datetime.datetime(1970, 1, 1, 12, 30, 40) + + arr = np.array([b, dm]) + assert_equal(arr.dtype, np.dtype('O')) + + arr = np.array([b, d]) + assert_equal(arr.dtype, np.dtype('O')) + + arr = np.array([b, dt]) + assert_equal(arr.dtype, np.dtype('O')) + + arr = np.array([d, d]).astype('datetime64') + assert_equal(arr.dtype, np.dtype('M8[D]')) + + arr = np.array([dt, dt]).astype('datetime64') + assert_equal(arr.dtype, np.dtype('M8[us]')) + + @pytest.mark.parametrize("unit", [ + # test all date / time units and use + # "generic" to select generic unit + ("Y"), ("M"), ("W"), ("D"), ("h"), ("m"), + ("s"), ("ms"), ("us"), ("ns"), ("ps"), + ("fs"), ("as"), ("generic") ]) + def test_timedelta_np_int_construction(self, unit): + # regression test for gh-7617 + if unit != "generic": + assert_equal(np.timedelta64(np.int64(123), unit), + np.timedelta64(123, unit)) + else: + assert_equal(np.timedelta64(np.int64(123)), + np.timedelta64(123)) + + def test_timedelta_scalar_construction(self): + # Construct with different units + assert_equal(np.timedelta64(7, 'D'), + np.timedelta64(1, 'W')) + assert_equal(np.timedelta64(120, 's'), + np.timedelta64(2, 'm')) + + # Default construction means 0 + assert_equal(np.timedelta64(), np.timedelta64(0)) + + # None gets constructed as NaT + assert_equal(np.timedelta64(None), np.timedelta64('NaT')) + + # Some basic strings and repr + assert_equal(str(np.timedelta64('NaT')), 'NaT') + assert_equal(repr(np.timedelta64('NaT')), + "numpy.timedelta64('NaT')") + assert_equal(str(np.timedelta64(3, 's')), '3 seconds') + assert_equal(repr(np.timedelta64(-3, 's')), + "numpy.timedelta64(-3,'s')") + assert_equal(repr(np.timedelta64(12)), + "numpy.timedelta64(12)") + + # Construction from an integer produces generic units + assert_equal(np.timedelta64(12).dtype, np.dtype('m8')) + + # When constructing from a scalar or zero-dimensional array, + # it either keeps the units or you can override them. + a = np.timedelta64(2, 'h') + b = np.array(2, dtype='m8[h]') + + assert_equal(a.dtype, np.dtype('m8[h]')) + assert_equal(b.dtype, np.dtype('m8[h]')) + + assert_equal(np.timedelta64(a), a) + assert_equal(np.timedelta64(a).dtype, np.dtype('m8[h]')) + + assert_equal(np.timedelta64(b), a) + assert_equal(np.timedelta64(b).dtype, np.dtype('m8[h]')) + + assert_equal(np.timedelta64(a, 's'), a) + assert_equal(np.timedelta64(a, 's').dtype, np.dtype('m8[s]')) + + assert_equal(np.timedelta64(b, 's'), a) + assert_equal(np.timedelta64(b, 's').dtype, np.dtype('m8[s]')) + + # Construction from datetime.timedelta + assert_equal(np.timedelta64(5, 'D'), + np.timedelta64(datetime.timedelta(days=5))) + assert_equal(np.timedelta64(102347621, 's'), + np.timedelta64(datetime.timedelta(seconds=102347621))) + assert_equal(np.timedelta64(-10234760000, 'us'), + np.timedelta64(datetime.timedelta( + microseconds=-10234760000))) + assert_equal(np.timedelta64(10234760000, 'us'), + np.timedelta64(datetime.timedelta( + microseconds=10234760000))) + assert_equal(np.timedelta64(1023476, 'ms'), + np.timedelta64(datetime.timedelta(milliseconds=1023476))) + assert_equal(np.timedelta64(10, 'm'), + np.timedelta64(datetime.timedelta(minutes=10))) + assert_equal(np.timedelta64(281, 'h'), + np.timedelta64(datetime.timedelta(hours=281))) + assert_equal(np.timedelta64(28, 'W'), + np.timedelta64(datetime.timedelta(weeks=28))) + + # Cannot construct across nonlinear time unit boundaries + a = np.timedelta64(3, 's') + assert_raises(TypeError, np.timedelta64, a, 'M') + assert_raises(TypeError, np.timedelta64, a, 'Y') + a = np.timedelta64(6, 'M') + assert_raises(TypeError, np.timedelta64, a, 'D') + assert_raises(TypeError, np.timedelta64, a, 'h') + a = np.timedelta64(1, 'Y') + assert_raises(TypeError, np.timedelta64, a, 'D') + assert_raises(TypeError, np.timedelta64, a, 'm') + a = datetime.timedelta(seconds=3) + assert_raises(TypeError, np.timedelta64, a, 'M') + assert_raises(TypeError, np.timedelta64, a, 'Y') + a = datetime.timedelta(weeks=3) + assert_raises(TypeError, np.timedelta64, a, 'M') + assert_raises(TypeError, np.timedelta64, a, 'Y') + a = datetime.timedelta() + assert_raises(TypeError, np.timedelta64, a, 'M') + assert_raises(TypeError, np.timedelta64, a, 'Y') + + def test_timedelta_object_array_conversion(self): + # Regression test for gh-11096 + inputs = [datetime.timedelta(28), + datetime.timedelta(30), + datetime.timedelta(31)] + expected = np.array([28, 30, 31], dtype='timedelta64[D]') + actual = np.array(inputs, dtype='timedelta64[D]') + assert_equal(expected, actual) + + def test_timedelta_0_dim_object_array_conversion(self): + # Regression test for gh-11151 + test = np.array(datetime.timedelta(seconds=20)) + actual = test.astype(np.timedelta64) + # expected value from the array constructor workaround + # described in above issue + expected = np.array(datetime.timedelta(seconds=20), + np.timedelta64) + assert_equal(actual, expected) + + def test_timedelta_scalar_construction_units(self): + # String construction detecting units + assert_equal(np.datetime64('2010').dtype, + np.dtype('M8[Y]')) + assert_equal(np.datetime64('2010-03').dtype, + np.dtype('M8[M]')) + assert_equal(np.datetime64('2010-03-12').dtype, + np.dtype('M8[D]')) + assert_equal(np.datetime64('2010-03-12T17').dtype, + np.dtype('M8[h]')) + assert_equal(np.datetime64('2010-03-12T17:15').dtype, + np.dtype('M8[m]')) + assert_equal(np.datetime64('2010-03-12T17:15:08').dtype, + np.dtype('M8[s]')) + + assert_equal(np.datetime64('2010-03-12T17:15:08.1').dtype, + np.dtype('M8[ms]')) + assert_equal(np.datetime64('2010-03-12T17:15:08.12').dtype, + np.dtype('M8[ms]')) + assert_equal(np.datetime64('2010-03-12T17:15:08.123').dtype, + np.dtype('M8[ms]')) + + assert_equal(np.datetime64('2010-03-12T17:15:08.1234').dtype, + np.dtype('M8[us]')) + assert_equal(np.datetime64('2010-03-12T17:15:08.12345').dtype, + np.dtype('M8[us]')) + assert_equal(np.datetime64('2010-03-12T17:15:08.123456').dtype, + np.dtype('M8[us]')) + + assert_equal(np.datetime64('1970-01-01T00:00:02.1234567').dtype, + np.dtype('M8[ns]')) + assert_equal(np.datetime64('1970-01-01T00:00:02.12345678').dtype, + np.dtype('M8[ns]')) + assert_equal(np.datetime64('1970-01-01T00:00:02.123456789').dtype, + np.dtype('M8[ns]')) + + assert_equal(np.datetime64('1970-01-01T00:00:02.1234567890').dtype, + np.dtype('M8[ps]')) + assert_equal(np.datetime64('1970-01-01T00:00:02.12345678901').dtype, + np.dtype('M8[ps]')) + assert_equal(np.datetime64('1970-01-01T00:00:02.123456789012').dtype, + np.dtype('M8[ps]')) + + assert_equal(np.datetime64( + '1970-01-01T00:00:02.1234567890123').dtype, + np.dtype('M8[fs]')) + assert_equal(np.datetime64( + '1970-01-01T00:00:02.12345678901234').dtype, + np.dtype('M8[fs]')) + assert_equal(np.datetime64( + '1970-01-01T00:00:02.123456789012345').dtype, + np.dtype('M8[fs]')) + + assert_equal(np.datetime64( + '1970-01-01T00:00:02.1234567890123456').dtype, + np.dtype('M8[as]')) + assert_equal(np.datetime64( + '1970-01-01T00:00:02.12345678901234567').dtype, + np.dtype('M8[as]')) + assert_equal(np.datetime64( + '1970-01-01T00:00:02.123456789012345678').dtype, + np.dtype('M8[as]')) + + # Python date object + assert_equal(np.datetime64(datetime.date(2010, 4, 16)).dtype, + np.dtype('M8[D]')) + + # Python datetime object + assert_equal(np.datetime64( + datetime.datetime(2010, 4, 16, 13, 45, 18)).dtype, + np.dtype('M8[us]')) + + # 'today' special value + assert_equal(np.datetime64('today').dtype, + np.dtype('M8[D]')) + + # 'now' special value + assert_equal(np.datetime64('now').dtype, + np.dtype('M8[s]')) + + def test_datetime_nat_casting(self): + a = np.array('NaT', dtype='M8[D]') + b = np.datetime64('NaT', '[D]') + + # Arrays + assert_equal(a.astype('M8[s]'), np.array('NaT', dtype='M8[s]')) + assert_equal(a.astype('M8[ms]'), np.array('NaT', dtype='M8[ms]')) + assert_equal(a.astype('M8[M]'), np.array('NaT', dtype='M8[M]')) + assert_equal(a.astype('M8[Y]'), np.array('NaT', dtype='M8[Y]')) + assert_equal(a.astype('M8[W]'), np.array('NaT', dtype='M8[W]')) + + # Scalars -> Scalars + assert_equal(np.datetime64(b, '[s]'), np.datetime64('NaT', '[s]')) + assert_equal(np.datetime64(b, '[ms]'), np.datetime64('NaT', '[ms]')) + assert_equal(np.datetime64(b, '[M]'), np.datetime64('NaT', '[M]')) + assert_equal(np.datetime64(b, '[Y]'), np.datetime64('NaT', '[Y]')) + assert_equal(np.datetime64(b, '[W]'), np.datetime64('NaT', '[W]')) + + # Arrays -> Scalars + assert_equal(np.datetime64(a, '[s]'), np.datetime64('NaT', '[s]')) + assert_equal(np.datetime64(a, '[ms]'), np.datetime64('NaT', '[ms]')) + assert_equal(np.datetime64(a, '[M]'), np.datetime64('NaT', '[M]')) + assert_equal(np.datetime64(a, '[Y]'), np.datetime64('NaT', '[Y]')) + assert_equal(np.datetime64(a, '[W]'), np.datetime64('NaT', '[W]')) + + def test_days_creation(self): + assert_equal(np.array('1599', dtype='M8[D]').astype('i8'), + (1600-1970)*365 - (1972-1600)/4 + 3 - 365) + assert_equal(np.array('1600', dtype='M8[D]').astype('i8'), + (1600-1970)*365 - (1972-1600)/4 + 3) + assert_equal(np.array('1601', dtype='M8[D]').astype('i8'), + (1600-1970)*365 - (1972-1600)/4 + 3 + 366) + assert_equal(np.array('1900', dtype='M8[D]').astype('i8'), + (1900-1970)*365 - (1970-1900)//4) + assert_equal(np.array('1901', dtype='M8[D]').astype('i8'), + (1900-1970)*365 - (1970-1900)//4 + 365) + assert_equal(np.array('1967', dtype='M8[D]').astype('i8'), -3*365 - 1) + assert_equal(np.array('1968', dtype='M8[D]').astype('i8'), -2*365 - 1) + assert_equal(np.array('1969', dtype='M8[D]').astype('i8'), -1*365) + assert_equal(np.array('1970', dtype='M8[D]').astype('i8'), 0*365) + assert_equal(np.array('1971', dtype='M8[D]').astype('i8'), 1*365) + assert_equal(np.array('1972', dtype='M8[D]').astype('i8'), 2*365) + assert_equal(np.array('1973', dtype='M8[D]').astype('i8'), 3*365 + 1) + assert_equal(np.array('1974', dtype='M8[D]').astype('i8'), 4*365 + 1) + assert_equal(np.array('2000', dtype='M8[D]').astype('i8'), + (2000 - 1970)*365 + (2000 - 1972)//4) + assert_equal(np.array('2001', dtype='M8[D]').astype('i8'), + (2000 - 1970)*365 + (2000 - 1972)//4 + 366) + assert_equal(np.array('2400', dtype='M8[D]').astype('i8'), + (2400 - 1970)*365 + (2400 - 1972)//4 - 3) + assert_equal(np.array('2401', dtype='M8[D]').astype('i8'), + (2400 - 1970)*365 + (2400 - 1972)//4 - 3 + 366) + + assert_equal(np.array('1600-02-29', dtype='M8[D]').astype('i8'), + (1600-1970)*365 - (1972-1600)//4 + 3 + 31 + 28) + assert_equal(np.array('1600-03-01', dtype='M8[D]').astype('i8'), + (1600-1970)*365 - (1972-1600)//4 + 3 + 31 + 29) + assert_equal(np.array('2000-02-29', dtype='M8[D]').astype('i8'), + (2000 - 1970)*365 + (2000 - 1972)//4 + 31 + 28) + assert_equal(np.array('2000-03-01', dtype='M8[D]').astype('i8'), + (2000 - 1970)*365 + (2000 - 1972)//4 + 31 + 29) + assert_equal(np.array('2001-03-22', dtype='M8[D]').astype('i8'), + (2000 - 1970)*365 + (2000 - 1972)//4 + 366 + 31 + 28 + 21) + + def test_days_to_pydate(self): + assert_equal(np.array('1599', dtype='M8[D]').astype('O'), + datetime.date(1599, 1, 1)) + assert_equal(np.array('1600', dtype='M8[D]').astype('O'), + datetime.date(1600, 1, 1)) + assert_equal(np.array('1601', dtype='M8[D]').astype('O'), + datetime.date(1601, 1, 1)) + assert_equal(np.array('1900', dtype='M8[D]').astype('O'), + datetime.date(1900, 1, 1)) + assert_equal(np.array('1901', dtype='M8[D]').astype('O'), + datetime.date(1901, 1, 1)) + assert_equal(np.array('2000', dtype='M8[D]').astype('O'), + datetime.date(2000, 1, 1)) + assert_equal(np.array('2001', dtype='M8[D]').astype('O'), + datetime.date(2001, 1, 1)) + assert_equal(np.array('1600-02-29', dtype='M8[D]').astype('O'), + datetime.date(1600, 2, 29)) + assert_equal(np.array('1600-03-01', dtype='M8[D]').astype('O'), + datetime.date(1600, 3, 1)) + assert_equal(np.array('2001-03-22', dtype='M8[D]').astype('O'), + datetime.date(2001, 3, 22)) + + def test_dtype_comparison(self): + assert_(not (np.dtype('M8[us]') == np.dtype('M8[ms]'))) + assert_(np.dtype('M8[us]') != np.dtype('M8[ms]')) + assert_(np.dtype('M8[2D]') != np.dtype('M8[D]')) + assert_(np.dtype('M8[D]') != np.dtype('M8[2D]')) + + def test_pydatetime_creation(self): + a = np.array(['1960-03-12', datetime.date(1960, 3, 12)], dtype='M8[D]') + assert_equal(a[0], a[1]) + a = np.array(['1999-12-31', datetime.date(1999, 12, 31)], dtype='M8[D]') + assert_equal(a[0], a[1]) + a = np.array(['2000-01-01', datetime.date(2000, 1, 1)], dtype='M8[D]') + assert_equal(a[0], a[1]) + # Will fail if the date changes during the exact right moment + a = np.array(['today', datetime.date.today()], dtype='M8[D]') + assert_equal(a[0], a[1]) + # datetime.datetime.now() returns local time, not UTC + #a = np.array(['now', datetime.datetime.now()], dtype='M8[s]') + #assert_equal(a[0], a[1]) + + # we can give a datetime.date time units + assert_equal(np.array(datetime.date(1960, 3, 12), dtype='M8[s]'), + np.array(np.datetime64('1960-03-12T00:00:00'))) + + def test_datetime_string_conversion(self): + a = ['2011-03-16', '1920-01-01', '2013-05-19'] + str_a = np.array(a, dtype='S') + uni_a = np.array(a, dtype='U') + dt_a = np.array(a, dtype='M') + + # String to datetime + assert_equal(dt_a, str_a.astype('M')) + assert_equal(dt_a.dtype, str_a.astype('M').dtype) + dt_b = np.empty_like(dt_a) + dt_b[...] = str_a + assert_equal(dt_a, dt_b) + + # Datetime to string + assert_equal(str_a, dt_a.astype('S0')) + str_b = np.empty_like(str_a) + str_b[...] = dt_a + assert_equal(str_a, str_b) + + # Unicode to datetime + assert_equal(dt_a, uni_a.astype('M')) + assert_equal(dt_a.dtype, uni_a.astype('M').dtype) + dt_b = np.empty_like(dt_a) + dt_b[...] = uni_a + assert_equal(dt_a, dt_b) + + # Datetime to unicode + assert_equal(uni_a, dt_a.astype('U')) + uni_b = np.empty_like(uni_a) + uni_b[...] = dt_a + assert_equal(uni_a, uni_b) + + # Datetime to long string - gh-9712 + assert_equal(str_a, dt_a.astype((np.string_, 128))) + str_b = np.empty(str_a.shape, dtype=(np.string_, 128)) + str_b[...] = dt_a + assert_equal(str_a, str_b) + + def test_datetime_array_str(self): + a = np.array(['2011-03-16', '1920-01-01', '2013-05-19'], dtype='M') + assert_equal(str(a), "['2011-03-16' '1920-01-01' '2013-05-19']") + + a = np.array(['2011-03-16T13:55', '1920-01-01T03:12'], dtype='M') + assert_equal(np.array2string(a, separator=', ', + formatter={'datetime': lambda x: + "'%s'" % np.datetime_as_string(x, timezone='UTC')}), + "['2011-03-16T13:55Z', '1920-01-01T03:12Z']") + + # Check that one NaT doesn't corrupt subsequent entries + a = np.array(['2010', 'NaT', '2030']).astype('M') + assert_equal(str(a), "['2010' 'NaT' '2030']") + + def test_timedelta_array_str(self): + a = np.array([-1, 0, 100], dtype='m') + assert_equal(str(a), "[ -1 0 100]") + a = np.array(['NaT', 'NaT'], dtype='m') + assert_equal(str(a), "['NaT' 'NaT']") + # Check right-alignment with NaTs + a = np.array([-1, 'NaT', 0], dtype='m') + assert_equal(str(a), "[ -1 'NaT' 0]") + a = np.array([-1, 'NaT', 1234567], dtype='m') + assert_equal(str(a), "[ -1 'NaT' 1234567]") + + # Test with other byteorder: + a = np.array([-1, 'NaT', 1234567], dtype='>m') + assert_equal(str(a), "[ -1 'NaT' 1234567]") + a = np.array([-1, 'NaT', 1234567], dtype='<m') + assert_equal(str(a), "[ -1 'NaT' 1234567]") + + def test_pickle(self): + # Check that pickle roundtripping works + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + dt = np.dtype('M8[7D]') + assert_equal(pickle.loads(pickle.dumps(dt, protocol=proto)), dt) + dt = np.dtype('M8[W]') + assert_equal(pickle.loads(pickle.dumps(dt, protocol=proto)), dt) + scalar = np.datetime64('2016-01-01T00:00:00.000000000') + assert_equal(pickle.loads(pickle.dumps(scalar, protocol=proto)), + scalar) + delta = scalar - np.datetime64('2015-01-01T00:00:00.000000000') + assert_equal(pickle.loads(pickle.dumps(delta, protocol=proto)), + delta) + + # Check that loading pickles from 1.6 works + pkl = b"cnumpy\ndtype\np0\n(S'M8'\np1\nI0\nI1\ntp2\nRp3\n" + \ + b"(I4\nS'<'\np4\nNNNI-1\nI-1\nI0\n((dp5\n(S'D'\np6\n" + \ + b"I7\nI1\nI1\ntp7\ntp8\ntp9\nb." + assert_equal(pickle.loads(pkl), np.dtype('<M8[7D]')) + pkl = b"cnumpy\ndtype\np0\n(S'M8'\np1\nI0\nI1\ntp2\nRp3\n" + \ + b"(I4\nS'<'\np4\nNNNI-1\nI-1\nI0\n((dp5\n(S'W'\np6\n" + \ + b"I1\nI1\nI1\ntp7\ntp8\ntp9\nb." + assert_equal(pickle.loads(pkl), np.dtype('<M8[W]')) + pkl = b"cnumpy\ndtype\np0\n(S'M8'\np1\nI0\nI1\ntp2\nRp3\n" + \ + b"(I4\nS'>'\np4\nNNNI-1\nI-1\nI0\n((dp5\n(S'us'\np6\n" + \ + b"I1\nI1\nI1\ntp7\ntp8\ntp9\nb." + assert_equal(pickle.loads(pkl), np.dtype('>M8[us]')) + + def test_setstate(self): + "Verify that datetime dtype __setstate__ can handle bad arguments" + dt = np.dtype('>M8[us]') + assert_raises(ValueError, dt.__setstate__, (4, '>', None, None, None, -1, -1, 0, 1)) + assert_(dt.__reduce__()[2] == np.dtype('>M8[us]').__reduce__()[2]) + assert_raises(TypeError, dt.__setstate__, (4, '>', None, None, None, -1, -1, 0, ({}, 'xxx'))) + assert_(dt.__reduce__()[2] == np.dtype('>M8[us]').__reduce__()[2]) + + def test_dtype_promotion(self): + # datetime <op> datetime computes the metadata gcd + # timedelta <op> timedelta computes the metadata gcd + for mM in ['m', 'M']: + assert_equal( + np.promote_types(np.dtype(mM+'8[2Y]'), np.dtype(mM+'8[2Y]')), + np.dtype(mM+'8[2Y]')) + assert_equal( + np.promote_types(np.dtype(mM+'8[12Y]'), np.dtype(mM+'8[15Y]')), + np.dtype(mM+'8[3Y]')) + assert_equal( + np.promote_types(np.dtype(mM+'8[62M]'), np.dtype(mM+'8[24M]')), + np.dtype(mM+'8[2M]')) + assert_equal( + np.promote_types(np.dtype(mM+'8[1W]'), np.dtype(mM+'8[2D]')), + np.dtype(mM+'8[1D]')) + assert_equal( + np.promote_types(np.dtype(mM+'8[W]'), np.dtype(mM+'8[13s]')), + np.dtype(mM+'8[s]')) + assert_equal( + np.promote_types(np.dtype(mM+'8[13W]'), np.dtype(mM+'8[49s]')), + np.dtype(mM+'8[7s]')) + # timedelta <op> timedelta raises when there is no reasonable gcd + assert_raises(TypeError, np.promote_types, + np.dtype('m8[Y]'), np.dtype('m8[D]')) + assert_raises(TypeError, np.promote_types, + np.dtype('m8[M]'), np.dtype('m8[W]')) + # timedelta <op> timedelta may overflow with big unit ranges + assert_raises(OverflowError, np.promote_types, + np.dtype('m8[W]'), np.dtype('m8[fs]')) + assert_raises(OverflowError, np.promote_types, + np.dtype('m8[s]'), np.dtype('m8[as]')) + + def test_cast_overflow(self): + # gh-4486 + def cast(): + numpy.datetime64("1971-01-01 00:00:00.000000000000000").astype("<M8[D]") + assert_raises(OverflowError, cast) + + def cast2(): + numpy.datetime64("2014").astype("<M8[fs]") + assert_raises(OverflowError, cast2) + + def test_pyobject_roundtrip(self): + # All datetime types should be able to roundtrip through object + a = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, + -1020040340, -2942398, -1, 0, 1, 234523453, 1199164176], + dtype=np.int64) + # With date units + for unit in ['M8[D]', 'M8[W]', 'M8[M]', 'M8[Y]']: + b = a.copy().view(dtype=unit) + b[0] = '-0001-01-01' + b[1] = '-0001-12-31' + b[2] = '0000-01-01' + b[3] = '0001-01-01' + b[4] = '1969-12-31' + b[5] = '1970-01-01' + b[6] = '9999-12-31' + b[7] = '10000-01-01' + b[8] = 'NaT' + + assert_equal(b.astype(object).astype(unit), b, + "Error roundtripping unit %s" % unit) + # With time units + for unit in ['M8[as]', 'M8[16fs]', 'M8[ps]', 'M8[us]', + 'M8[300as]', 'M8[20us]']: + b = a.copy().view(dtype=unit) + b[0] = '-0001-01-01T00' + b[1] = '-0001-12-31T00' + b[2] = '0000-01-01T00' + b[3] = '0001-01-01T00' + b[4] = '1969-12-31T23:59:59.999999' + b[5] = '1970-01-01T00' + b[6] = '9999-12-31T23:59:59.999999' + b[7] = '10000-01-01T00' + b[8] = 'NaT' + + assert_equal(b.astype(object).astype(unit), b, + "Error roundtripping unit %s" % unit) + + def test_month_truncation(self): + # Make sure that months are truncating correctly + assert_equal(np.array('1945-03-01', dtype='M8[M]'), + np.array('1945-03-31', dtype='M8[M]')) + assert_equal(np.array('1969-11-01', dtype='M8[M]'), + np.array('1969-11-30T23:59:59.99999', dtype='M').astype('M8[M]')) + assert_equal(np.array('1969-12-01', dtype='M8[M]'), + np.array('1969-12-31T23:59:59.99999', dtype='M').astype('M8[M]')) + assert_equal(np.array('1970-01-01', dtype='M8[M]'), + np.array('1970-01-31T23:59:59.99999', dtype='M').astype('M8[M]')) + assert_equal(np.array('1980-02-01', dtype='M8[M]'), + np.array('1980-02-29T23:59:59.99999', dtype='M').astype('M8[M]')) + + def test_different_unit_comparison(self): + # Check some years with date units + for unit1 in ['Y', 'M', 'D']: + dt1 = np.dtype('M8[%s]' % unit1) + for unit2 in ['Y', 'M', 'D']: + dt2 = np.dtype('M8[%s]' % unit2) + assert_equal(np.array('1945', dtype=dt1), + np.array('1945', dtype=dt2)) + assert_equal(np.array('1970', dtype=dt1), + np.array('1970', dtype=dt2)) + assert_equal(np.array('9999', dtype=dt1), + np.array('9999', dtype=dt2)) + assert_equal(np.array('10000', dtype=dt1), + np.array('10000-01-01', dtype=dt2)) + assert_equal(np.datetime64('1945', unit1), + np.datetime64('1945', unit2)) + assert_equal(np.datetime64('1970', unit1), + np.datetime64('1970', unit2)) + assert_equal(np.datetime64('9999', unit1), + np.datetime64('9999', unit2)) + assert_equal(np.datetime64('10000', unit1), + np.datetime64('10000-01-01', unit2)) + # Check some datetimes with time units + for unit1 in ['6h', 'h', 'm', 's', '10ms', 'ms', 'us']: + dt1 = np.dtype('M8[%s]' % unit1) + for unit2 in ['h', 'm', 's', 'ms', 'us']: + dt2 = np.dtype('M8[%s]' % unit2) + assert_equal(np.array('1945-03-12T18', dtype=dt1), + np.array('1945-03-12T18', dtype=dt2)) + assert_equal(np.array('1970-03-12T18', dtype=dt1), + np.array('1970-03-12T18', dtype=dt2)) + assert_equal(np.array('9999-03-12T18', dtype=dt1), + np.array('9999-03-12T18', dtype=dt2)) + assert_equal(np.array('10000-01-01T00', dtype=dt1), + np.array('10000-01-01T00', dtype=dt2)) + assert_equal(np.datetime64('1945-03-12T18', unit1), + np.datetime64('1945-03-12T18', unit2)) + assert_equal(np.datetime64('1970-03-12T18', unit1), + np.datetime64('1970-03-12T18', unit2)) + assert_equal(np.datetime64('9999-03-12T18', unit1), + np.datetime64('9999-03-12T18', unit2)) + assert_equal(np.datetime64('10000-01-01T00', unit1), + np.datetime64('10000-01-01T00', unit2)) + # Check some days with units that won't overflow + for unit1 in ['D', '12h', 'h', 'm', 's', '4s', 'ms', 'us']: + dt1 = np.dtype('M8[%s]' % unit1) + for unit2 in ['D', 'h', 'm', 's', 'ms', 'us']: + dt2 = np.dtype('M8[%s]' % unit2) + assert_(np.equal(np.array('1932-02-17', dtype='M').astype(dt1), + np.array('1932-02-17T00:00:00', dtype='M').astype(dt2), + casting='unsafe')) + assert_(np.equal(np.array('10000-04-27', dtype='M').astype(dt1), + np.array('10000-04-27T00:00:00', dtype='M').astype(dt2), + casting='unsafe')) + + # Shouldn't be able to compare datetime and timedelta + # TODO: Changing to 'same_kind' or 'safe' casting in the ufuncs by + # default is needed to properly catch this kind of thing... + a = np.array('2012-12-21', dtype='M8[D]') + b = np.array(3, dtype='m8[D]') + #assert_raises(TypeError, np.less, a, b) + assert_raises(TypeError, np.less, a, b, casting='same_kind') + + def test_datetime_like(self): + a = np.array([3], dtype='m8[4D]') + b = np.array(['2012-12-21'], dtype='M8[D]') + + assert_equal(np.ones_like(a).dtype, a.dtype) + assert_equal(np.zeros_like(a).dtype, a.dtype) + assert_equal(np.empty_like(a).dtype, a.dtype) + assert_equal(np.ones_like(b).dtype, b.dtype) + assert_equal(np.zeros_like(b).dtype, b.dtype) + assert_equal(np.empty_like(b).dtype, b.dtype) + + def test_datetime_unary(self): + for tda, tdb, tdzero, tdone, tdmone in \ + [ + # One-dimensional arrays + (np.array([3], dtype='m8[D]'), + np.array([-3], dtype='m8[D]'), + np.array([0], dtype='m8[D]'), + np.array([1], dtype='m8[D]'), + np.array([-1], dtype='m8[D]')), + # NumPy scalars + (np.timedelta64(3, '[D]'), + np.timedelta64(-3, '[D]'), + np.timedelta64(0, '[D]'), + np.timedelta64(1, '[D]'), + np.timedelta64(-1, '[D]'))]: + # negative ufunc + assert_equal(-tdb, tda) + assert_equal((-tdb).dtype, tda.dtype) + assert_equal(np.negative(tdb), tda) + assert_equal(np.negative(tdb).dtype, tda.dtype) + + # positive ufunc + assert_equal(np.positive(tda), tda) + assert_equal(np.positive(tda).dtype, tda.dtype) + assert_equal(np.positive(tdb), tdb) + assert_equal(np.positive(tdb).dtype, tdb.dtype) + + # absolute ufunc + assert_equal(np.absolute(tdb), tda) + assert_equal(np.absolute(tdb).dtype, tda.dtype) + + # sign ufunc + assert_equal(np.sign(tda), tdone) + assert_equal(np.sign(tdb), tdmone) + assert_equal(np.sign(tdzero), tdzero) + assert_equal(np.sign(tda).dtype, tda.dtype) + + # The ufuncs always produce native-endian results + assert_ + + def test_datetime_add(self): + for dta, dtb, dtc, dtnat, tda, tdb, tdc in \ + [ + # One-dimensional arrays + (np.array(['2012-12-21'], dtype='M8[D]'), + np.array(['2012-12-24'], dtype='M8[D]'), + np.array(['2012-12-21T11'], dtype='M8[h]'), + np.array(['NaT'], dtype='M8[D]'), + np.array([3], dtype='m8[D]'), + np.array([11], dtype='m8[h]'), + np.array([3*24 + 11], dtype='m8[h]')), + # NumPy scalars + (np.datetime64('2012-12-21', '[D]'), + np.datetime64('2012-12-24', '[D]'), + np.datetime64('2012-12-21T11', '[h]'), + np.datetime64('NaT', '[D]'), + np.timedelta64(3, '[D]'), + np.timedelta64(11, '[h]'), + np.timedelta64(3*24 + 11, '[h]'))]: + # m8 + m8 + assert_equal(tda + tdb, tdc) + assert_equal((tda + tdb).dtype, np.dtype('m8[h]')) + # m8 + bool + assert_equal(tdb + True, tdb + 1) + assert_equal((tdb + True).dtype, np.dtype('m8[h]')) + # m8 + int + assert_equal(tdb + 3*24, tdc) + assert_equal((tdb + 3*24).dtype, np.dtype('m8[h]')) + # bool + m8 + assert_equal(False + tdb, tdb) + assert_equal((False + tdb).dtype, np.dtype('m8[h]')) + # int + m8 + assert_equal(3*24 + tdb, tdc) + assert_equal((3*24 + tdb).dtype, np.dtype('m8[h]')) + # M8 + bool + assert_equal(dta + True, dta + 1) + assert_equal(dtnat + True, dtnat) + assert_equal((dta + True).dtype, np.dtype('M8[D]')) + # M8 + int + assert_equal(dta + 3, dtb) + assert_equal(dtnat + 3, dtnat) + assert_equal((dta + 3).dtype, np.dtype('M8[D]')) + # bool + M8 + assert_equal(False + dta, dta) + assert_equal(False + dtnat, dtnat) + assert_equal((False + dta).dtype, np.dtype('M8[D]')) + # int + M8 + assert_equal(3 + dta, dtb) + assert_equal(3 + dtnat, dtnat) + assert_equal((3 + dta).dtype, np.dtype('M8[D]')) + # M8 + m8 + assert_equal(dta + tda, dtb) + assert_equal(dtnat + tda, dtnat) + assert_equal((dta + tda).dtype, np.dtype('M8[D]')) + # m8 + M8 + assert_equal(tda + dta, dtb) + assert_equal(tda + dtnat, dtnat) + assert_equal((tda + dta).dtype, np.dtype('M8[D]')) + + # In M8 + m8, the result goes to higher precision + assert_equal(np.add(dta, tdb, casting='unsafe'), dtc) + assert_equal(np.add(dta, tdb, casting='unsafe').dtype, + np.dtype('M8[h]')) + assert_equal(np.add(tdb, dta, casting='unsafe'), dtc) + assert_equal(np.add(tdb, dta, casting='unsafe').dtype, + np.dtype('M8[h]')) + + # M8 + M8 + assert_raises(TypeError, np.add, dta, dtb) + + def test_datetime_subtract(self): + for dta, dtb, dtc, dtd, dte, dtnat, tda, tdb, tdc in \ + [ + # One-dimensional arrays + (np.array(['2012-12-21'], dtype='M8[D]'), + np.array(['2012-12-24'], dtype='M8[D]'), + np.array(['1940-12-24'], dtype='M8[D]'), + np.array(['1940-12-24T00'], dtype='M8[h]'), + np.array(['1940-12-23T13'], dtype='M8[h]'), + np.array(['NaT'], dtype='M8[D]'), + np.array([3], dtype='m8[D]'), + np.array([11], dtype='m8[h]'), + np.array([3*24 - 11], dtype='m8[h]')), + # NumPy scalars + (np.datetime64('2012-12-21', '[D]'), + np.datetime64('2012-12-24', '[D]'), + np.datetime64('1940-12-24', '[D]'), + np.datetime64('1940-12-24T00', '[h]'), + np.datetime64('1940-12-23T13', '[h]'), + np.datetime64('NaT', '[D]'), + np.timedelta64(3, '[D]'), + np.timedelta64(11, '[h]'), + np.timedelta64(3*24 - 11, '[h]'))]: + # m8 - m8 + assert_equal(tda - tdb, tdc) + assert_equal((tda - tdb).dtype, np.dtype('m8[h]')) + assert_equal(tdb - tda, -tdc) + assert_equal((tdb - tda).dtype, np.dtype('m8[h]')) + # m8 - bool + assert_equal(tdc - True, tdc - 1) + assert_equal((tdc - True).dtype, np.dtype('m8[h]')) + # m8 - int + assert_equal(tdc - 3*24, -tdb) + assert_equal((tdc - 3*24).dtype, np.dtype('m8[h]')) + # int - m8 + assert_equal(False - tdb, -tdb) + assert_equal((False - tdb).dtype, np.dtype('m8[h]')) + # int - m8 + assert_equal(3*24 - tdb, tdc) + assert_equal((3*24 - tdb).dtype, np.dtype('m8[h]')) + # M8 - bool + assert_equal(dtb - True, dtb - 1) + assert_equal(dtnat - True, dtnat) + assert_equal((dtb - True).dtype, np.dtype('M8[D]')) + # M8 - int + assert_equal(dtb - 3, dta) + assert_equal(dtnat - 3, dtnat) + assert_equal((dtb - 3).dtype, np.dtype('M8[D]')) + # M8 - m8 + assert_equal(dtb - tda, dta) + assert_equal(dtnat - tda, dtnat) + assert_equal((dtb - tda).dtype, np.dtype('M8[D]')) + + # In M8 - m8, the result goes to higher precision + assert_equal(np.subtract(dtc, tdb, casting='unsafe'), dte) + assert_equal(np.subtract(dtc, tdb, casting='unsafe').dtype, + np.dtype('M8[h]')) + + # M8 - M8 with different goes to higher precision + assert_equal(np.subtract(dtc, dtd, casting='unsafe'), + np.timedelta64(0, 'h')) + assert_equal(np.subtract(dtc, dtd, casting='unsafe').dtype, + np.dtype('m8[h]')) + assert_equal(np.subtract(dtd, dtc, casting='unsafe'), + np.timedelta64(0, 'h')) + assert_equal(np.subtract(dtd, dtc, casting='unsafe').dtype, + np.dtype('m8[h]')) + + # m8 - M8 + assert_raises(TypeError, np.subtract, tda, dta) + # bool - M8 + assert_raises(TypeError, np.subtract, False, dta) + # int - M8 + assert_raises(TypeError, np.subtract, 3, dta) + + def test_datetime_multiply(self): + for dta, tda, tdb, tdc in \ + [ + # One-dimensional arrays + (np.array(['2012-12-21'], dtype='M8[D]'), + np.array([6], dtype='m8[h]'), + np.array([9], dtype='m8[h]'), + np.array([12], dtype='m8[h]')), + # NumPy scalars + (np.datetime64('2012-12-21', '[D]'), + np.timedelta64(6, '[h]'), + np.timedelta64(9, '[h]'), + np.timedelta64(12, '[h]'))]: + # m8 * int + assert_equal(tda * 2, tdc) + assert_equal((tda * 2).dtype, np.dtype('m8[h]')) + # int * m8 + assert_equal(2 * tda, tdc) + assert_equal((2 * tda).dtype, np.dtype('m8[h]')) + # m8 * float + assert_equal(tda * 1.5, tdb) + assert_equal((tda * 1.5).dtype, np.dtype('m8[h]')) + # float * m8 + assert_equal(1.5 * tda, tdb) + assert_equal((1.5 * tda).dtype, np.dtype('m8[h]')) + + # m8 * m8 + assert_raises(TypeError, np.multiply, tda, tdb) + # m8 * M8 + assert_raises(TypeError, np.multiply, dta, tda) + # M8 * m8 + assert_raises(TypeError, np.multiply, tda, dta) + # M8 * int + assert_raises(TypeError, np.multiply, dta, 2) + # int * M8 + assert_raises(TypeError, np.multiply, 2, dta) + # M8 * float + assert_raises(TypeError, np.multiply, dta, 1.5) + # float * M8 + assert_raises(TypeError, np.multiply, 1.5, dta) + + # NaTs + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in multiply") + nat = np.timedelta64('NaT') + def check(a, b, res): + assert_equal(a * b, res) + assert_equal(b * a, res) + for tp in (int, float): + check(nat, tp(2), nat) + check(nat, tp(0), nat) + for f in (float('inf'), float('nan')): + check(np.timedelta64(1), f, nat) + check(np.timedelta64(0), f, nat) + check(nat, f, nat) + + @pytest.mark.parametrize("op1, op2, exp", [ + # m8 same units round down + (np.timedelta64(7, 's'), + np.timedelta64(4, 's'), + 1), + # m8 same units round down with negative + (np.timedelta64(7, 's'), + np.timedelta64(-4, 's'), + -2), + # m8 same units negative no round down + (np.timedelta64(8, 's'), + np.timedelta64(-4, 's'), + -2), + # m8 different units + (np.timedelta64(1, 'm'), + np.timedelta64(31, 's'), + 1), + # m8 generic units + (np.timedelta64(1890), + np.timedelta64(31), + 60), + # Y // M works + (np.timedelta64(2, 'Y'), + np.timedelta64('13', 'M'), + 1), + # handle 1D arrays + (np.array([1, 2, 3], dtype='m8'), + np.array([2], dtype='m8'), + np.array([0, 1, 1], dtype=np.int64)), + ]) + def test_timedelta_floor_divide(self, op1, op2, exp): + assert_equal(op1 // op2, exp) + + @pytest.mark.parametrize("op1, op2", [ + # div by 0 + (np.timedelta64(10, 'us'), + np.timedelta64(0, 'us')), + # div with NaT + (np.timedelta64('NaT'), + np.timedelta64(50, 'us')), + # special case for int64 min + # in integer floor division + (np.timedelta64(np.iinfo(np.int64).min), + np.timedelta64(-1)), + ]) + def test_timedelta_floor_div_warnings(self, op1, op2): + with assert_warns(RuntimeWarning): + actual = op1 // op2 + assert_equal(actual, 0) + assert_equal(actual.dtype, np.int64) + + @pytest.mark.parametrize("val1, val2", [ + # the smallest integer that can't be represented + # exactly in a double should be preserved if we avoid + # casting to double in floordiv operation + (9007199254740993, 1), + # stress the alternate floordiv code path where + # operand signs don't match and remainder isn't 0 + (9007199254740999, -2), + ]) + def test_timedelta_floor_div_precision(self, val1, val2): + op1 = np.timedelta64(val1) + op2 = np.timedelta64(val2) + actual = op1 // op2 + # Python reference integer floor + expected = val1 // val2 + assert_equal(actual, expected) + + @pytest.mark.parametrize("val1, val2", [ + # years and months sometimes can't be unambiguously + # divided for floor division operation + (np.timedelta64(7, 'Y'), + np.timedelta64(3, 's')), + (np.timedelta64(7, 'M'), + np.timedelta64(1, 'D')), + ]) + def test_timedelta_floor_div_error(self, val1, val2): + with assert_raises_regex(TypeError, "common metadata divisor"): + val1 // val2 + + @pytest.mark.parametrize("op1, op2", [ + # reuse the test cases from floordiv + (np.timedelta64(7, 's'), + np.timedelta64(4, 's')), + # m8 same units round down with negative + (np.timedelta64(7, 's'), + np.timedelta64(-4, 's')), + # m8 same units negative no round down + (np.timedelta64(8, 's'), + np.timedelta64(-4, 's')), + # m8 different units + (np.timedelta64(1, 'm'), + np.timedelta64(31, 's')), + # m8 generic units + (np.timedelta64(1890), + np.timedelta64(31)), + # Y // M works + (np.timedelta64(2, 'Y'), + np.timedelta64('13', 'M')), + # handle 1D arrays + (np.array([1, 2, 3], dtype='m8'), + np.array([2], dtype='m8')), + ]) + def test_timedelta_divmod(self, op1, op2): + expected = (op1 // op2, op1 % op2) + assert_equal(divmod(op1, op2), expected) + + @pytest.mark.parametrize("op1, op2", [ + # reuse cases from floordiv + # div by 0 + (np.timedelta64(10, 'us'), + np.timedelta64(0, 'us')), + # div with NaT + (np.timedelta64('NaT'), + np.timedelta64(50, 'us')), + # special case for int64 min + # in integer floor division + (np.timedelta64(np.iinfo(np.int64).min), + np.timedelta64(-1)), + ]) + def test_timedelta_divmod_warnings(self, op1, op2): + with assert_warns(RuntimeWarning): + expected = (op1 // op2, op1 % op2) + with assert_warns(RuntimeWarning): + actual = divmod(op1, op2) + assert_equal(actual, expected) + + def test_datetime_divide(self): + for dta, tda, tdb, tdc, tdd in \ + [ + # One-dimensional arrays + (np.array(['2012-12-21'], dtype='M8[D]'), + np.array([6], dtype='m8[h]'), + np.array([9], dtype='m8[h]'), + np.array([12], dtype='m8[h]'), + np.array([6], dtype='m8[m]')), + # NumPy scalars + (np.datetime64('2012-12-21', '[D]'), + np.timedelta64(6, '[h]'), + np.timedelta64(9, '[h]'), + np.timedelta64(12, '[h]'), + np.timedelta64(6, '[m]'))]: + # m8 / int + assert_equal(tdc / 2, tda) + assert_equal((tdc / 2).dtype, np.dtype('m8[h]')) + # m8 / float + assert_equal(tda / 0.5, tdc) + assert_equal((tda / 0.5).dtype, np.dtype('m8[h]')) + # m8 / m8 + assert_equal(tda / tdb, 6.0 / 9.0) + assert_equal(np.divide(tda, tdb), 6.0 / 9.0) + assert_equal(np.true_divide(tda, tdb), 6.0 / 9.0) + assert_equal(tdb / tda, 9.0 / 6.0) + assert_equal((tda / tdb).dtype, np.dtype('f8')) + assert_equal(tda / tdd, 60.0) + assert_equal(tdd / tda, 1.0 / 60.0) + + # int / m8 + assert_raises(TypeError, np.divide, 2, tdb) + # float / m8 + assert_raises(TypeError, np.divide, 0.5, tdb) + # m8 / M8 + assert_raises(TypeError, np.divide, dta, tda) + # M8 / m8 + assert_raises(TypeError, np.divide, tda, dta) + # M8 / int + assert_raises(TypeError, np.divide, dta, 2) + # int / M8 + assert_raises(TypeError, np.divide, 2, dta) + # M8 / float + assert_raises(TypeError, np.divide, dta, 1.5) + # float / M8 + assert_raises(TypeError, np.divide, 1.5, dta) + + # NaTs + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, r".*encountered in true\_divide") + nat = np.timedelta64('NaT') + for tp in (int, float): + assert_equal(np.timedelta64(1) / tp(0), nat) + assert_equal(np.timedelta64(0) / tp(0), nat) + assert_equal(nat / tp(0), nat) + assert_equal(nat / tp(2), nat) + # Division by inf + assert_equal(np.timedelta64(1) / float('inf'), np.timedelta64(0)) + assert_equal(np.timedelta64(0) / float('inf'), np.timedelta64(0)) + assert_equal(nat / float('inf'), nat) + # Division by nan + assert_equal(np.timedelta64(1) / float('nan'), nat) + assert_equal(np.timedelta64(0) / float('nan'), nat) + assert_equal(nat / float('nan'), nat) + + def test_datetime_compare(self): + # Test all the comparison operators + a = np.datetime64('2000-03-12T18:00:00.000000') + b = np.array(['2000-03-12T18:00:00.000000', + '2000-03-12T17:59:59.999999', + '2000-03-12T18:00:00.000001', + '1970-01-11T12:00:00.909090', + '2016-01-11T12:00:00.909090'], + dtype='datetime64[us]') + assert_equal(np.equal(a, b), [1, 0, 0, 0, 0]) + assert_equal(np.not_equal(a, b), [0, 1, 1, 1, 1]) + assert_equal(np.less(a, b), [0, 0, 1, 0, 1]) + assert_equal(np.less_equal(a, b), [1, 0, 1, 0, 1]) + assert_equal(np.greater(a, b), [0, 1, 0, 1, 0]) + assert_equal(np.greater_equal(a, b), [1, 1, 0, 1, 0]) + + def test_datetime_compare_nat(self): + dt_nat = np.datetime64('NaT', 'D') + dt_other = np.datetime64('2000-01-01') + td_nat = np.timedelta64('NaT', 'h') + td_other = np.timedelta64(1, 'h') + + for op in [np.equal, np.less, np.less_equal, + np.greater, np.greater_equal]: + assert_(not op(dt_nat, dt_nat)) + assert_(not op(dt_nat, dt_other)) + assert_(not op(dt_other, dt_nat)) + + assert_(not op(td_nat, td_nat)) + assert_(not op(td_nat, td_other)) + assert_(not op(td_other, td_nat)) + + assert_(np.not_equal(dt_nat, dt_nat)) + assert_(np.not_equal(dt_nat, dt_other)) + assert_(np.not_equal(dt_other, dt_nat)) + + assert_(np.not_equal(td_nat, td_nat)) + assert_(np.not_equal(td_nat, td_other)) + assert_(np.not_equal(td_other, td_nat)) + + def test_datetime_minmax(self): + # The metadata of the result should become the GCD + # of the operand metadata + a = np.array('1999-03-12T13', dtype='M8[2m]') + b = np.array('1999-03-12T12', dtype='M8[s]') + assert_equal(np.minimum(a, b), b) + assert_equal(np.minimum(a, b).dtype, np.dtype('M8[s]')) + assert_equal(np.fmin(a, b), b) + assert_equal(np.fmin(a, b).dtype, np.dtype('M8[s]')) + assert_equal(np.maximum(a, b), a) + assert_equal(np.maximum(a, b).dtype, np.dtype('M8[s]')) + assert_equal(np.fmax(a, b), a) + assert_equal(np.fmax(a, b).dtype, np.dtype('M8[s]')) + # Viewed as integers, the comparison is opposite because + # of the units chosen + assert_equal(np.minimum(a.view('i8'), b.view('i8')), a.view('i8')) + + # Interaction with NaT + a = np.array('1999-03-12T13', dtype='M8[2m]') + dtnat = np.array('NaT', dtype='M8[h]') + assert_equal(np.minimum(a, dtnat), a) + assert_equal(np.minimum(dtnat, a), a) + assert_equal(np.maximum(a, dtnat), a) + assert_equal(np.maximum(dtnat, a), a) + + # Also do timedelta + a = np.array(3, dtype='m8[h]') + b = np.array(3*3600 - 3, dtype='m8[s]') + assert_equal(np.minimum(a, b), b) + assert_equal(np.minimum(a, b).dtype, np.dtype('m8[s]')) + assert_equal(np.fmin(a, b), b) + assert_equal(np.fmin(a, b).dtype, np.dtype('m8[s]')) + assert_equal(np.maximum(a, b), a) + assert_equal(np.maximum(a, b).dtype, np.dtype('m8[s]')) + assert_equal(np.fmax(a, b), a) + assert_equal(np.fmax(a, b).dtype, np.dtype('m8[s]')) + # Viewed as integers, the comparison is opposite because + # of the units chosen + assert_equal(np.minimum(a.view('i8'), b.view('i8')), a.view('i8')) + + # should raise between datetime and timedelta + # + # TODO: Allowing unsafe casting by + # default in ufuncs strikes again... :( + a = np.array(3, dtype='m8[h]') + b = np.array('1999-03-12T12', dtype='M8[s]') + #assert_raises(TypeError, np.minimum, a, b) + #assert_raises(TypeError, np.maximum, a, b) + #assert_raises(TypeError, np.fmin, a, b) + #assert_raises(TypeError, np.fmax, a, b) + assert_raises(TypeError, np.minimum, a, b, casting='same_kind') + assert_raises(TypeError, np.maximum, a, b, casting='same_kind') + assert_raises(TypeError, np.fmin, a, b, casting='same_kind') + assert_raises(TypeError, np.fmax, a, b, casting='same_kind') + + def test_hours(self): + t = np.ones(3, dtype='M8[s]') + t[0] = 60*60*24 + 60*60*10 + assert_(t[0].item().hour == 10) + + def test_divisor_conversion_year(self): + assert_(np.dtype('M8[Y/4]') == np.dtype('M8[3M]')) + assert_(np.dtype('M8[Y/13]') == np.dtype('M8[4W]')) + assert_(np.dtype('M8[3Y/73]') == np.dtype('M8[15D]')) + + def test_divisor_conversion_month(self): + assert_(np.dtype('M8[M/2]') == np.dtype('M8[2W]')) + assert_(np.dtype('M8[M/15]') == np.dtype('M8[2D]')) + assert_(np.dtype('M8[3M/40]') == np.dtype('M8[54h]')) + + def test_divisor_conversion_week(self): + assert_(np.dtype('m8[W/7]') == np.dtype('m8[D]')) + assert_(np.dtype('m8[3W/14]') == np.dtype('m8[36h]')) + assert_(np.dtype('m8[5W/140]') == np.dtype('m8[360m]')) + + def test_divisor_conversion_day(self): + assert_(np.dtype('M8[D/12]') == np.dtype('M8[2h]')) + assert_(np.dtype('M8[D/120]') == np.dtype('M8[12m]')) + assert_(np.dtype('M8[3D/960]') == np.dtype('M8[270s]')) + + def test_divisor_conversion_hour(self): + assert_(np.dtype('m8[h/30]') == np.dtype('m8[2m]')) + assert_(np.dtype('m8[3h/300]') == np.dtype('m8[36s]')) + + def test_divisor_conversion_minute(self): + assert_(np.dtype('m8[m/30]') == np.dtype('m8[2s]')) + assert_(np.dtype('m8[3m/300]') == np.dtype('m8[600ms]')) + + def test_divisor_conversion_second(self): + assert_(np.dtype('m8[s/100]') == np.dtype('m8[10ms]')) + assert_(np.dtype('m8[3s/10000]') == np.dtype('m8[300us]')) + + def test_divisor_conversion_fs(self): + assert_(np.dtype('M8[fs/100]') == np.dtype('M8[10as]')) + assert_raises(ValueError, lambda: np.dtype('M8[3fs/10000]')) + + def test_divisor_conversion_as(self): + assert_raises(ValueError, lambda: np.dtype('M8[as/10]')) + + def test_string_parser_variants(self): + # Allow space instead of 'T' between date and time + assert_equal(np.array(['1980-02-29T01:02:03'], np.dtype('M8[s]')), + np.array(['1980-02-29 01:02:03'], np.dtype('M8[s]'))) + # Allow positive years + assert_equal(np.array(['+1980-02-29T01:02:03'], np.dtype('M8[s]')), + np.array(['+1980-02-29 01:02:03'], np.dtype('M8[s]'))) + # Allow negative years + assert_equal(np.array(['-1980-02-29T01:02:03'], np.dtype('M8[s]')), + np.array(['-1980-02-29 01:02:03'], np.dtype('M8[s]'))) + # UTC specifier + with assert_warns(DeprecationWarning): + assert_equal( + np.array(['+1980-02-29T01:02:03'], np.dtype('M8[s]')), + np.array(['+1980-02-29 01:02:03Z'], np.dtype('M8[s]'))) + with assert_warns(DeprecationWarning): + assert_equal( + np.array(['-1980-02-29T01:02:03'], np.dtype('M8[s]')), + np.array(['-1980-02-29 01:02:03Z'], np.dtype('M8[s]'))) + # Time zone offset + with assert_warns(DeprecationWarning): + assert_equal( + np.array(['1980-02-29T02:02:03'], np.dtype('M8[s]')), + np.array(['1980-02-29 00:32:03-0130'], np.dtype('M8[s]'))) + with assert_warns(DeprecationWarning): + assert_equal( + np.array(['1980-02-28T22:32:03'], np.dtype('M8[s]')), + np.array(['1980-02-29 00:02:03+01:30'], np.dtype('M8[s]'))) + with assert_warns(DeprecationWarning): + assert_equal( + np.array(['1980-02-29T02:32:03.506'], np.dtype('M8[s]')), + np.array(['1980-02-29 00:32:03.506-02'], np.dtype('M8[s]'))) + with assert_warns(DeprecationWarning): + assert_equal(np.datetime64('1977-03-02T12:30-0230'), + np.datetime64('1977-03-02T15:00')) + + def test_string_parser_error_check(self): + # Arbitrary bad string + assert_raises(ValueError, np.array, ['badvalue'], np.dtype('M8[us]')) + # Character after year must be '-' + assert_raises(ValueError, np.array, ['1980X'], np.dtype('M8[us]')) + # Cannot have trailing '-' + assert_raises(ValueError, np.array, ['1980-'], np.dtype('M8[us]')) + # Month must be in range [1,12] + assert_raises(ValueError, np.array, ['1980-00'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-13'], np.dtype('M8[us]')) + # Month must have two digits + assert_raises(ValueError, np.array, ['1980-1'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-1-02'], np.dtype('M8[us]')) + # 'Mor' is not a valid month + assert_raises(ValueError, np.array, ['1980-Mor'], np.dtype('M8[us]')) + # Cannot have trailing '-' + assert_raises(ValueError, np.array, ['1980-01-'], np.dtype('M8[us]')) + # Day must be in range [1,len(month)] + assert_raises(ValueError, np.array, ['1980-01-0'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-01-00'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-01-32'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1979-02-29'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-02-30'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-03-32'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-04-31'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-05-32'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-06-31'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-07-32'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-08-32'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-09-31'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-10-32'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-11-31'], np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-12-32'], np.dtype('M8[us]')) + # Cannot have trailing characters + assert_raises(ValueError, np.array, ['1980-02-03%'], + np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-02-03 q'], + np.dtype('M8[us]')) + + # Hours must be in range [0, 23] + assert_raises(ValueError, np.array, ['1980-02-03 25'], + np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-02-03T25'], + np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-02-03 24:01'], + np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-02-03T24:01'], + np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-02-03 -1'], + np.dtype('M8[us]')) + # No trailing ':' + assert_raises(ValueError, np.array, ['1980-02-03 01:'], + np.dtype('M8[us]')) + # Minutes must be in range [0, 59] + assert_raises(ValueError, np.array, ['1980-02-03 01:-1'], + np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-02-03 01:60'], + np.dtype('M8[us]')) + # No trailing ':' + assert_raises(ValueError, np.array, ['1980-02-03 01:60:'], + np.dtype('M8[us]')) + # Seconds must be in range [0, 59] + assert_raises(ValueError, np.array, ['1980-02-03 01:10:-1'], + np.dtype('M8[us]')) + assert_raises(ValueError, np.array, ['1980-02-03 01:01:60'], + np.dtype('M8[us]')) + # Timezone offset must within a reasonable range + with assert_warns(DeprecationWarning): + assert_raises(ValueError, np.array, ['1980-02-03 01:01:00+0661'], + np.dtype('M8[us]')) + with assert_warns(DeprecationWarning): + assert_raises(ValueError, np.array, ['1980-02-03 01:01:00+2500'], + np.dtype('M8[us]')) + with assert_warns(DeprecationWarning): + assert_raises(ValueError, np.array, ['1980-02-03 01:01:00-0070'], + np.dtype('M8[us]')) + with assert_warns(DeprecationWarning): + assert_raises(ValueError, np.array, ['1980-02-03 01:01:00-3000'], + np.dtype('M8[us]')) + with assert_warns(DeprecationWarning): + assert_raises(ValueError, np.array, ['1980-02-03 01:01:00-25:00'], + np.dtype('M8[us]')) + + def test_creation_overflow(self): + date = '1980-03-23 20:00:00' + timesteps = np.array([date], dtype='datetime64[s]')[0].astype(np.int64) + for unit in ['ms', 'us', 'ns']: + timesteps *= 1000 + x = np.array([date], dtype='datetime64[%s]' % unit) + + assert_equal(timesteps, x[0].astype(np.int64), + err_msg='Datetime conversion error for unit %s' % unit) + + assert_equal(x[0].astype(np.int64), 322689600000000000) + + # gh-13062 + with pytest.raises(OverflowError): + np.datetime64(2**64, 'D') + with pytest.raises(OverflowError): + np.timedelta64(2**64, 'D') + + def test_datetime_as_string(self): + # Check all the units with default string conversion + date = '1959-10-13' + datetime = '1959-10-13T12:34:56.789012345678901234' + + assert_equal(np.datetime_as_string(np.datetime64(date, 'Y')), + '1959') + assert_equal(np.datetime_as_string(np.datetime64(date, 'M')), + '1959-10') + assert_equal(np.datetime_as_string(np.datetime64(date, 'D')), + '1959-10-13') + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'h')), + '1959-10-13T12') + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'm')), + '1959-10-13T12:34') + assert_equal(np.datetime_as_string(np.datetime64(datetime, 's')), + '1959-10-13T12:34:56') + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'ms')), + '1959-10-13T12:34:56.789') + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'us')), + '1959-10-13T12:34:56.789012') + + datetime = '1969-12-31T23:34:56.789012345678901234' + + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'ns')), + '1969-12-31T23:34:56.789012345') + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'ps')), + '1969-12-31T23:34:56.789012345678') + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'fs')), + '1969-12-31T23:34:56.789012345678901') + + datetime = '1969-12-31T23:59:57.789012345678901234' + + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'as')), + datetime) + datetime = '1970-01-01T00:34:56.789012345678901234' + + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'ns')), + '1970-01-01T00:34:56.789012345') + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'ps')), + '1970-01-01T00:34:56.789012345678') + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'fs')), + '1970-01-01T00:34:56.789012345678901') + + datetime = '1970-01-01T00:00:05.789012345678901234' + + assert_equal(np.datetime_as_string(np.datetime64(datetime, 'as')), + datetime) + + # String conversion with the unit= parameter + a = np.datetime64('2032-07-18T12:23:34.123456', 'us') + assert_equal(np.datetime_as_string(a, unit='Y', casting='unsafe'), + '2032') + assert_equal(np.datetime_as_string(a, unit='M', casting='unsafe'), + '2032-07') + assert_equal(np.datetime_as_string(a, unit='W', casting='unsafe'), + '2032-07-18') + assert_equal(np.datetime_as_string(a, unit='D', casting='unsafe'), + '2032-07-18') + assert_equal(np.datetime_as_string(a, unit='h'), '2032-07-18T12') + assert_equal(np.datetime_as_string(a, unit='m'), + '2032-07-18T12:23') + assert_equal(np.datetime_as_string(a, unit='s'), + '2032-07-18T12:23:34') + assert_equal(np.datetime_as_string(a, unit='ms'), + '2032-07-18T12:23:34.123') + assert_equal(np.datetime_as_string(a, unit='us'), + '2032-07-18T12:23:34.123456') + assert_equal(np.datetime_as_string(a, unit='ns'), + '2032-07-18T12:23:34.123456000') + assert_equal(np.datetime_as_string(a, unit='ps'), + '2032-07-18T12:23:34.123456000000') + assert_equal(np.datetime_as_string(a, unit='fs'), + '2032-07-18T12:23:34.123456000000000') + assert_equal(np.datetime_as_string(a, unit='as'), + '2032-07-18T12:23:34.123456000000000000') + + # unit='auto' parameter + assert_equal(np.datetime_as_string( + np.datetime64('2032-07-18T12:23:34.123456', 'us'), unit='auto'), + '2032-07-18T12:23:34.123456') + assert_equal(np.datetime_as_string( + np.datetime64('2032-07-18T12:23:34.12', 'us'), unit='auto'), + '2032-07-18T12:23:34.120') + assert_equal(np.datetime_as_string( + np.datetime64('2032-07-18T12:23:34', 'us'), unit='auto'), + '2032-07-18T12:23:34') + assert_equal(np.datetime_as_string( + np.datetime64('2032-07-18T12:23:00', 'us'), unit='auto'), + '2032-07-18T12:23') + # 'auto' doesn't split up hour and minute + assert_equal(np.datetime_as_string( + np.datetime64('2032-07-18T12:00:00', 'us'), unit='auto'), + '2032-07-18T12:00') + assert_equal(np.datetime_as_string( + np.datetime64('2032-07-18T00:00:00', 'us'), unit='auto'), + '2032-07-18') + # 'auto' doesn't split up the date + assert_equal(np.datetime_as_string( + np.datetime64('2032-07-01T00:00:00', 'us'), unit='auto'), + '2032-07-01') + assert_equal(np.datetime_as_string( + np.datetime64('2032-01-01T00:00:00', 'us'), unit='auto'), + '2032-01-01') + + @pytest.mark.skipif(not _has_pytz, reason="The pytz module is not available.") + def test_datetime_as_string_timezone(self): + # timezone='local' vs 'UTC' + a = np.datetime64('2010-03-15T06:30', 'm') + assert_equal(np.datetime_as_string(a), + '2010-03-15T06:30') + assert_equal(np.datetime_as_string(a, timezone='naive'), + '2010-03-15T06:30') + assert_equal(np.datetime_as_string(a, timezone='UTC'), + '2010-03-15T06:30Z') + assert_(np.datetime_as_string(a, timezone='local') != + '2010-03-15T06:30') + + b = np.datetime64('2010-02-15T06:30', 'm') + + assert_equal(np.datetime_as_string(a, timezone=tz('US/Central')), + '2010-03-15T01:30-0500') + assert_equal(np.datetime_as_string(a, timezone=tz('US/Eastern')), + '2010-03-15T02:30-0400') + assert_equal(np.datetime_as_string(a, timezone=tz('US/Pacific')), + '2010-03-14T23:30-0700') + + assert_equal(np.datetime_as_string(b, timezone=tz('US/Central')), + '2010-02-15T00:30-0600') + assert_equal(np.datetime_as_string(b, timezone=tz('US/Eastern')), + '2010-02-15T01:30-0500') + assert_equal(np.datetime_as_string(b, timezone=tz('US/Pacific')), + '2010-02-14T22:30-0800') + + # Dates to strings with a timezone attached is disabled by default + assert_raises(TypeError, np.datetime_as_string, a, unit='D', + timezone=tz('US/Pacific')) + # Check that we can print out the date in the specified time zone + assert_equal(np.datetime_as_string(a, unit='D', + timezone=tz('US/Pacific'), casting='unsafe'), + '2010-03-14') + assert_equal(np.datetime_as_string(b, unit='D', + timezone=tz('US/Central'), casting='unsafe'), + '2010-02-15') + + def test_datetime_arange(self): + # With two datetimes provided as strings + a = np.arange('2010-01-05', '2010-01-10', dtype='M8[D]') + assert_equal(a.dtype, np.dtype('M8[D]')) + assert_equal(a, + np.array(['2010-01-05', '2010-01-06', '2010-01-07', + '2010-01-08', '2010-01-09'], dtype='M8[D]')) + + a = np.arange('1950-02-10', '1950-02-06', -1, dtype='M8[D]') + assert_equal(a.dtype, np.dtype('M8[D]')) + assert_equal(a, + np.array(['1950-02-10', '1950-02-09', '1950-02-08', + '1950-02-07'], dtype='M8[D]')) + + # Unit should be detected as months here + a = np.arange('1969-05', '1970-05', 2, dtype='M8') + assert_equal(a.dtype, np.dtype('M8[M]')) + assert_equal(a, + np.datetime64('1969-05') + np.arange(12, step=2)) + + # datetime, integer|timedelta works as well + # produces arange (start, start + stop) in this case + a = np.arange('1969', 18, 3, dtype='M8') + assert_equal(a.dtype, np.dtype('M8[Y]')) + assert_equal(a, + np.datetime64('1969') + np.arange(18, step=3)) + a = np.arange('1969-12-19', 22, np.timedelta64(2), dtype='M8') + assert_equal(a.dtype, np.dtype('M8[D]')) + assert_equal(a, + np.datetime64('1969-12-19') + np.arange(22, step=2)) + + # Step of 0 is disallowed + assert_raises(ValueError, np.arange, np.datetime64('today'), + np.datetime64('today') + 3, 0) + # Promotion across nonlinear unit boundaries is disallowed + assert_raises(TypeError, np.arange, np.datetime64('2011-03-01', 'D'), + np.timedelta64(5, 'M')) + assert_raises(TypeError, np.arange, + np.datetime64('2012-02-03T14', 's'), + np.timedelta64(5, 'Y')) + + def test_datetime_arange_no_dtype(self): + d = np.array('2010-01-04', dtype="M8[D]") + assert_equal(np.arange(d, d + 1), d) + assert_raises(ValueError, np.arange, d) + + def test_timedelta_arange(self): + a = np.arange(3, 10, dtype='m8') + assert_equal(a.dtype, np.dtype('m8')) + assert_equal(a, np.timedelta64(0) + np.arange(3, 10)) + + a = np.arange(np.timedelta64(3, 's'), 10, 2, dtype='m8') + assert_equal(a.dtype, np.dtype('m8[s]')) + assert_equal(a, np.timedelta64(0, 's') + np.arange(3, 10, 2)) + + # Step of 0 is disallowed + assert_raises(ValueError, np.arange, np.timedelta64(0), + np.timedelta64(5), 0) + # Promotion across nonlinear unit boundaries is disallowed + assert_raises(TypeError, np.arange, np.timedelta64(0, 'D'), + np.timedelta64(5, 'M')) + assert_raises(TypeError, np.arange, np.timedelta64(0, 'Y'), + np.timedelta64(5, 'D')) + + @pytest.mark.parametrize("val1, val2, expected", [ + # case from gh-12092 + (np.timedelta64(7, 's'), + np.timedelta64(3, 's'), + np.timedelta64(1, 's')), + # negative value cases + (np.timedelta64(3, 's'), + np.timedelta64(-2, 's'), + np.timedelta64(-1, 's')), + (np.timedelta64(-3, 's'), + np.timedelta64(2, 's'), + np.timedelta64(1, 's')), + # larger value cases + (np.timedelta64(17, 's'), + np.timedelta64(22, 's'), + np.timedelta64(17, 's')), + (np.timedelta64(22, 's'), + np.timedelta64(17, 's'), + np.timedelta64(5, 's')), + # different units + (np.timedelta64(1, 'm'), + np.timedelta64(57, 's'), + np.timedelta64(3, 's')), + (np.timedelta64(1, 'us'), + np.timedelta64(727, 'ns'), + np.timedelta64(273, 'ns')), + # NaT is propagated + (np.timedelta64('NaT'), + np.timedelta64(50, 'ns'), + np.timedelta64('NaT')), + # Y % M works + (np.timedelta64(2, 'Y'), + np.timedelta64(22, 'M'), + np.timedelta64(2, 'M')), + ]) + def test_timedelta_modulus(self, val1, val2, expected): + assert_equal(val1 % val2, expected) + + @pytest.mark.parametrize("val1, val2", [ + # years and months sometimes can't be unambiguously + # divided for modulus operation + (np.timedelta64(7, 'Y'), + np.timedelta64(3, 's')), + (np.timedelta64(7, 'M'), + np.timedelta64(1, 'D')), + ]) + def test_timedelta_modulus_error(self, val1, val2): + with assert_raises_regex(TypeError, "common metadata divisor"): + val1 % val2 + + def test_timedelta_modulus_div_by_zero(self): + with assert_warns(RuntimeWarning): + actual = np.timedelta64(10, 's') % np.timedelta64(0, 's') + assert_equal(actual, np.timedelta64('NaT')) + + @pytest.mark.parametrize("val1, val2", [ + # cases where one operand is not + # timedelta64 + (np.timedelta64(7, 'Y'), + 15,), + (7.5, + np.timedelta64(1, 'D')), + ]) + def test_timedelta_modulus_type_resolution(self, val1, val2): + # NOTE: some of the operations may be supported + # in the future + with assert_raises_regex(TypeError, + "remainder cannot use operands with types"): + val1 % val2 + + def test_timedelta_arange_no_dtype(self): + d = np.array(5, dtype="m8[D]") + assert_equal(np.arange(d, d + 1), d) + assert_raises(ValueError, np.arange, d) + + def test_datetime_maximum_reduce(self): + a = np.array(['2010-01-02', '1999-03-14', '1833-03'], dtype='M8[D]') + assert_equal(np.maximum.reduce(a).dtype, np.dtype('M8[D]')) + assert_equal(np.maximum.reduce(a), + np.datetime64('2010-01-02')) + + a = np.array([1, 4, 0, 7, 2], dtype='m8[s]') + assert_equal(np.maximum.reduce(a).dtype, np.dtype('m8[s]')) + assert_equal(np.maximum.reduce(a), + np.timedelta64(7, 's')) + + def test_datetime_busday_offset(self): + # First Monday in June + assert_equal( + np.busday_offset('2011-06', 0, roll='forward', weekmask='Mon'), + np.datetime64('2011-06-06')) + # Last Monday in June + assert_equal( + np.busday_offset('2011-07', -1, roll='forward', weekmask='Mon'), + np.datetime64('2011-06-27')) + assert_equal( + np.busday_offset('2011-07', -1, roll='forward', weekmask='Mon'), + np.datetime64('2011-06-27')) + + # Default M-F business days, different roll modes + assert_equal(np.busday_offset('2010-08', 0, roll='backward'), + np.datetime64('2010-07-30')) + assert_equal(np.busday_offset('2010-08', 0, roll='preceding'), + np.datetime64('2010-07-30')) + assert_equal(np.busday_offset('2010-08', 0, roll='modifiedpreceding'), + np.datetime64('2010-08-02')) + assert_equal(np.busday_offset('2010-08', 0, roll='modifiedfollowing'), + np.datetime64('2010-08-02')) + assert_equal(np.busday_offset('2010-08', 0, roll='forward'), + np.datetime64('2010-08-02')) + assert_equal(np.busday_offset('2010-08', 0, roll='following'), + np.datetime64('2010-08-02')) + assert_equal(np.busday_offset('2010-10-30', 0, roll='following'), + np.datetime64('2010-11-01')) + assert_equal( + np.busday_offset('2010-10-30', 0, roll='modifiedfollowing'), + np.datetime64('2010-10-29')) + assert_equal( + np.busday_offset('2010-10-30', 0, roll='modifiedpreceding'), + np.datetime64('2010-10-29')) + assert_equal( + np.busday_offset('2010-10-16', 0, roll='modifiedfollowing'), + np.datetime64('2010-10-18')) + assert_equal( + np.busday_offset('2010-10-16', 0, roll='modifiedpreceding'), + np.datetime64('2010-10-15')) + # roll='raise' by default + assert_raises(ValueError, np.busday_offset, '2011-06-04', 0) + + # Bigger offset values + assert_equal(np.busday_offset('2006-02-01', 25), + np.datetime64('2006-03-08')) + assert_equal(np.busday_offset('2006-03-08', -25), + np.datetime64('2006-02-01')) + assert_equal(np.busday_offset('2007-02-25', 11, weekmask='SatSun'), + np.datetime64('2007-04-07')) + assert_equal(np.busday_offset('2007-04-07', -11, weekmask='SatSun'), + np.datetime64('2007-02-25')) + + # NaT values when roll is not raise + assert_equal(np.busday_offset(np.datetime64('NaT'), 1, roll='nat'), + np.datetime64('NaT')) + assert_equal(np.busday_offset(np.datetime64('NaT'), 1, roll='following'), + np.datetime64('NaT')) + assert_equal(np.busday_offset(np.datetime64('NaT'), 1, roll='preceding'), + np.datetime64('NaT')) + + def test_datetime_busdaycalendar(self): + # Check that it removes NaT, duplicates, and weekends + # and sorts the result. + bdd = np.busdaycalendar( + holidays=['NaT', '2011-01-17', '2011-03-06', 'NaT', + '2011-12-26', '2011-05-30', '2011-01-17']) + assert_equal(bdd.holidays, + np.array(['2011-01-17', '2011-05-30', '2011-12-26'], dtype='M8')) + # Default M-F weekmask + assert_equal(bdd.weekmask, np.array([1, 1, 1, 1, 1, 0, 0], dtype='?')) + + # Check string weekmask with varying whitespace. + bdd = np.busdaycalendar(weekmask="Sun TueWed Thu\tFri") + assert_equal(bdd.weekmask, np.array([0, 1, 1, 1, 1, 0, 1], dtype='?')) + + # Check length 7 0/1 string + bdd = np.busdaycalendar(weekmask="0011001") + assert_equal(bdd.weekmask, np.array([0, 0, 1, 1, 0, 0, 1], dtype='?')) + + # Check length 7 string weekmask. + bdd = np.busdaycalendar(weekmask="Mon Tue") + assert_equal(bdd.weekmask, np.array([1, 1, 0, 0, 0, 0, 0], dtype='?')) + + # All-zeros weekmask should raise + assert_raises(ValueError, np.busdaycalendar, weekmask=[0, 0, 0, 0, 0, 0, 0]) + # weekday names must be correct case + assert_raises(ValueError, np.busdaycalendar, weekmask="satsun") + # All-zeros weekmask should raise + assert_raises(ValueError, np.busdaycalendar, weekmask="") + # Invalid weekday name codes should raise + assert_raises(ValueError, np.busdaycalendar, weekmask="Mon Tue We") + assert_raises(ValueError, np.busdaycalendar, weekmask="Max") + assert_raises(ValueError, np.busdaycalendar, weekmask="Monday Tue") + + def test_datetime_busday_holidays_offset(self): + # With exactly one holiday + assert_equal( + np.busday_offset('2011-11-10', 1, holidays=['2011-11-11']), + np.datetime64('2011-11-14')) + assert_equal( + np.busday_offset('2011-11-04', 5, holidays=['2011-11-11']), + np.datetime64('2011-11-14')) + assert_equal( + np.busday_offset('2011-11-10', 5, holidays=['2011-11-11']), + np.datetime64('2011-11-18')) + assert_equal( + np.busday_offset('2011-11-14', -1, holidays=['2011-11-11']), + np.datetime64('2011-11-10')) + assert_equal( + np.busday_offset('2011-11-18', -5, holidays=['2011-11-11']), + np.datetime64('2011-11-10')) + assert_equal( + np.busday_offset('2011-11-14', -5, holidays=['2011-11-11']), + np.datetime64('2011-11-04')) + # With the holiday appearing twice + assert_equal( + np.busday_offset('2011-11-10', 1, + holidays=['2011-11-11', '2011-11-11']), + np.datetime64('2011-11-14')) + assert_equal( + np.busday_offset('2011-11-14', -1, + holidays=['2011-11-11', '2011-11-11']), + np.datetime64('2011-11-10')) + # With a NaT holiday + assert_equal( + np.busday_offset('2011-11-10', 1, + holidays=['2011-11-11', 'NaT']), + np.datetime64('2011-11-14')) + assert_equal( + np.busday_offset('2011-11-14', -1, + holidays=['NaT', '2011-11-11']), + np.datetime64('2011-11-10')) + # With another holiday after + assert_equal( + np.busday_offset('2011-11-10', 1, + holidays=['2011-11-11', '2011-11-24']), + np.datetime64('2011-11-14')) + assert_equal( + np.busday_offset('2011-11-14', -1, + holidays=['2011-11-11', '2011-11-24']), + np.datetime64('2011-11-10')) + # With another holiday before + assert_equal( + np.busday_offset('2011-11-10', 1, + holidays=['2011-10-10', '2011-11-11']), + np.datetime64('2011-11-14')) + assert_equal( + np.busday_offset('2011-11-14', -1, + holidays=['2011-10-10', '2011-11-11']), + np.datetime64('2011-11-10')) + # With another holiday before and after + assert_equal( + np.busday_offset('2011-11-10', 1, + holidays=['2011-10-10', '2011-11-11', '2011-11-24']), + np.datetime64('2011-11-14')) + assert_equal( + np.busday_offset('2011-11-14', -1, + holidays=['2011-10-10', '2011-11-11', '2011-11-24']), + np.datetime64('2011-11-10')) + + # A bigger forward jump across more than one week/holiday + holidays = ['2011-10-10', '2011-11-11', '2011-11-24', + '2011-12-25', '2011-05-30', '2011-02-21', + '2011-12-26', '2012-01-02'] + bdd = np.busdaycalendar(weekmask='1111100', holidays=holidays) + assert_equal( + np.busday_offset('2011-10-03', 4, holidays=holidays), + np.busday_offset('2011-10-03', 4)) + assert_equal( + np.busday_offset('2011-10-03', 5, holidays=holidays), + np.busday_offset('2011-10-03', 5 + 1)) + assert_equal( + np.busday_offset('2011-10-03', 27, holidays=holidays), + np.busday_offset('2011-10-03', 27 + 1)) + assert_equal( + np.busday_offset('2011-10-03', 28, holidays=holidays), + np.busday_offset('2011-10-03', 28 + 2)) + assert_equal( + np.busday_offset('2011-10-03', 35, holidays=holidays), + np.busday_offset('2011-10-03', 35 + 2)) + assert_equal( + np.busday_offset('2011-10-03', 36, holidays=holidays), + np.busday_offset('2011-10-03', 36 + 3)) + assert_equal( + np.busday_offset('2011-10-03', 56, holidays=holidays), + np.busday_offset('2011-10-03', 56 + 3)) + assert_equal( + np.busday_offset('2011-10-03', 57, holidays=holidays), + np.busday_offset('2011-10-03', 57 + 4)) + assert_equal( + np.busday_offset('2011-10-03', 60, holidays=holidays), + np.busday_offset('2011-10-03', 60 + 4)) + assert_equal( + np.busday_offset('2011-10-03', 61, holidays=holidays), + np.busday_offset('2011-10-03', 61 + 5)) + assert_equal( + np.busday_offset('2011-10-03', 61, busdaycal=bdd), + np.busday_offset('2011-10-03', 61 + 5)) + # A bigger backward jump across more than one week/holiday + assert_equal( + np.busday_offset('2012-01-03', -1, holidays=holidays), + np.busday_offset('2012-01-03', -1 - 1)) + assert_equal( + np.busday_offset('2012-01-03', -4, holidays=holidays), + np.busday_offset('2012-01-03', -4 - 1)) + assert_equal( + np.busday_offset('2012-01-03', -5, holidays=holidays), + np.busday_offset('2012-01-03', -5 - 2)) + assert_equal( + np.busday_offset('2012-01-03', -25, holidays=holidays), + np.busday_offset('2012-01-03', -25 - 2)) + assert_equal( + np.busday_offset('2012-01-03', -26, holidays=holidays), + np.busday_offset('2012-01-03', -26 - 3)) + assert_equal( + np.busday_offset('2012-01-03', -33, holidays=holidays), + np.busday_offset('2012-01-03', -33 - 3)) + assert_equal( + np.busday_offset('2012-01-03', -34, holidays=holidays), + np.busday_offset('2012-01-03', -34 - 4)) + assert_equal( + np.busday_offset('2012-01-03', -56, holidays=holidays), + np.busday_offset('2012-01-03', -56 - 4)) + assert_equal( + np.busday_offset('2012-01-03', -57, holidays=holidays), + np.busday_offset('2012-01-03', -57 - 5)) + assert_equal( + np.busday_offset('2012-01-03', -57, busdaycal=bdd), + np.busday_offset('2012-01-03', -57 - 5)) + + # Can't supply both a weekmask/holidays and busdaycal + assert_raises(ValueError, np.busday_offset, '2012-01-03', -15, + weekmask='1111100', busdaycal=bdd) + assert_raises(ValueError, np.busday_offset, '2012-01-03', -15, + holidays=holidays, busdaycal=bdd) + + # Roll with the holidays + assert_equal( + np.busday_offset('2011-12-25', 0, + roll='forward', holidays=holidays), + np.datetime64('2011-12-27')) + assert_equal( + np.busday_offset('2011-12-26', 0, + roll='forward', holidays=holidays), + np.datetime64('2011-12-27')) + assert_equal( + np.busday_offset('2011-12-26', 0, + roll='backward', holidays=holidays), + np.datetime64('2011-12-23')) + assert_equal( + np.busday_offset('2012-02-27', 0, + roll='modifiedfollowing', + holidays=['2012-02-27', '2012-02-26', '2012-02-28', + '2012-03-01', '2012-02-29']), + np.datetime64('2012-02-24')) + assert_equal( + np.busday_offset('2012-03-06', 0, + roll='modifiedpreceding', + holidays=['2012-03-02', '2012-03-03', '2012-03-01', + '2012-03-05', '2012-03-07', '2012-03-06']), + np.datetime64('2012-03-08')) + + def test_datetime_busday_holidays_count(self): + holidays = ['2011-01-01', '2011-10-10', '2011-11-11', '2011-11-24', + '2011-12-25', '2011-05-30', '2011-02-21', '2011-01-17', + '2011-12-26', '2012-01-02', '2011-02-21', '2011-05-30', + '2011-07-01', '2011-07-04', '2011-09-05', '2011-10-10'] + bdd = np.busdaycalendar(weekmask='1111100', holidays=holidays) + + # Validate against busday_offset broadcast against + # a range of offsets + dates = np.busday_offset('2011-01-01', np.arange(366), + roll='forward', busdaycal=bdd) + assert_equal(np.busday_count('2011-01-01', dates, busdaycal=bdd), + np.arange(366)) + # Returns negative value when reversed + assert_equal(np.busday_count(dates, '2011-01-01', busdaycal=bdd), + -np.arange(366)) + + dates = np.busday_offset('2011-12-31', -np.arange(366), + roll='forward', busdaycal=bdd) + assert_equal(np.busday_count(dates, '2011-12-31', busdaycal=bdd), + np.arange(366)) + # Returns negative value when reversed + assert_equal(np.busday_count('2011-12-31', dates, busdaycal=bdd), + -np.arange(366)) + + # Can't supply both a weekmask/holidays and busdaycal + assert_raises(ValueError, np.busday_offset, '2012-01-03', '2012-02-03', + weekmask='1111100', busdaycal=bdd) + assert_raises(ValueError, np.busday_offset, '2012-01-03', '2012-02-03', + holidays=holidays, busdaycal=bdd) + + # Number of Mondays in March 2011 + assert_equal(np.busday_count('2011-03', '2011-04', weekmask='Mon'), 4) + # Returns negative value when reversed + assert_equal(np.busday_count('2011-04', '2011-03', weekmask='Mon'), -4) + + def test_datetime_is_busday(self): + holidays = ['2011-01-01', '2011-10-10', '2011-11-11', '2011-11-24', + '2011-12-25', '2011-05-30', '2011-02-21', '2011-01-17', + '2011-12-26', '2012-01-02', '2011-02-21', '2011-05-30', + '2011-07-01', '2011-07-04', '2011-09-05', '2011-10-10', + 'NaT'] + bdd = np.busdaycalendar(weekmask='1111100', holidays=holidays) + + # Weekend/weekday tests + assert_equal(np.is_busday('2011-01-01'), False) + assert_equal(np.is_busday('2011-01-02'), False) + assert_equal(np.is_busday('2011-01-03'), True) + + # All the holidays are not business days + assert_equal(np.is_busday(holidays, busdaycal=bdd), + np.zeros(len(holidays), dtype='?')) + + def test_datetime_y2038(self): + # Test parsing on either side of the Y2038 boundary + a = np.datetime64('2038-01-19T03:14:07') + assert_equal(a.view(np.int64), 2**31 - 1) + a = np.datetime64('2038-01-19T03:14:08') + assert_equal(a.view(np.int64), 2**31) + + # Test parsing on either side of the Y2038 boundary with + # a manually specified timezone offset + with assert_warns(DeprecationWarning): + a = np.datetime64('2038-01-19T04:14:07+0100') + assert_equal(a.view(np.int64), 2**31 - 1) + with assert_warns(DeprecationWarning): + a = np.datetime64('2038-01-19T04:14:08+0100') + assert_equal(a.view(np.int64), 2**31) + + # Test parsing a date after Y2038 + a = np.datetime64('2038-01-20T13:21:14') + assert_equal(str(a), '2038-01-20T13:21:14') + + def test_isnat(self): + assert_(np.isnat(np.datetime64('NaT', 'ms'))) + assert_(np.isnat(np.datetime64('NaT', 'ns'))) + assert_(not np.isnat(np.datetime64('2038-01-19T03:14:07'))) + + assert_(np.isnat(np.timedelta64('NaT', "ms"))) + assert_(not np.isnat(np.timedelta64(34, "ms"))) + + res = np.array([False, False, True]) + for unit in ['Y', 'M', 'W', 'D', + 'h', 'm', 's', 'ms', 'us', + 'ns', 'ps', 'fs', 'as']: + arr = np.array([123, -321, "NaT"], dtype='<datetime64[%s]' % unit) + assert_equal(np.isnat(arr), res) + arr = np.array([123, -321, "NaT"], dtype='>datetime64[%s]' % unit) + assert_equal(np.isnat(arr), res) + arr = np.array([123, -321, "NaT"], dtype='<timedelta64[%s]' % unit) + assert_equal(np.isnat(arr), res) + arr = np.array([123, -321, "NaT"], dtype='>timedelta64[%s]' % unit) + assert_equal(np.isnat(arr), res) + + def test_isnat_error(self): + # Test that only datetime dtype arrays are accepted + for t in np.typecodes["All"]: + if t in np.typecodes["Datetime"]: + continue + assert_raises(TypeError, np.isnat, np.zeros(10, t)) + + def test_corecursive_input(self): + # construct a co-recursive list + a, b = [], [] + a.append(b) + b.append(a) + obj_arr = np.array([None]) + obj_arr[0] = a + + # gh-11154: This shouldn't cause a C stack overflow + assert_raises(RecursionError, obj_arr.astype, 'M8') + assert_raises(RecursionError, obj_arr.astype, 'm8') + + +class TestDateTimeData(object): + + def test_basic(self): + a = np.array(['1980-03-23'], dtype=np.datetime64) + assert_equal(np.datetime_data(a.dtype), ('D', 1)) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_defchararray.py b/contrib/python/numpy/py2/numpy/core/tests/test_defchararray.py new file mode 100644 index 0000000000..7b0e6f8a4b --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_defchararray.py @@ -0,0 +1,692 @@ +from __future__ import division, absolute_import, print_function + +import sys + +import numpy as np +from numpy.core.multiarray import _vec_string +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_raises, + assert_raises_regex, suppress_warnings, + ) + +kw_unicode_true = {'unicode': True} # make 2to3 work properly +kw_unicode_false = {'unicode': False} + +class TestBasic(object): + def test_from_object_array(self): + A = np.array([['abc', 2], + ['long ', '0123456789']], dtype='O') + B = np.char.array(A) + assert_equal(B.dtype.itemsize, 10) + assert_array_equal(B, [[b'abc', b'2'], + [b'long', b'0123456789']]) + + def test_from_object_array_unicode(self): + A = np.array([['abc', u'Sigma \u03a3'], + ['long ', '0123456789']], dtype='O') + assert_raises(ValueError, np.char.array, (A,)) + B = np.char.array(A, **kw_unicode_true) + assert_equal(B.dtype.itemsize, 10 * np.array('a', 'U').dtype.itemsize) + assert_array_equal(B, [['abc', u'Sigma \u03a3'], + ['long', '0123456789']]) + + def test_from_string_array(self): + A = np.array([[b'abc', b'foo'], + [b'long ', b'0123456789']]) + assert_equal(A.dtype.type, np.string_) + B = np.char.array(A) + assert_array_equal(B, A) + assert_equal(B.dtype, A.dtype) + assert_equal(B.shape, A.shape) + B[0, 0] = 'changed' + assert_(B[0, 0] != A[0, 0]) + C = np.char.asarray(A) + assert_array_equal(C, A) + assert_equal(C.dtype, A.dtype) + C[0, 0] = 'changed again' + assert_(C[0, 0] != B[0, 0]) + assert_(C[0, 0] == A[0, 0]) + + def test_from_unicode_array(self): + A = np.array([['abc', u'Sigma \u03a3'], + ['long ', '0123456789']]) + assert_equal(A.dtype.type, np.unicode_) + B = np.char.array(A) + assert_array_equal(B, A) + assert_equal(B.dtype, A.dtype) + assert_equal(B.shape, A.shape) + B = np.char.array(A, **kw_unicode_true) + assert_array_equal(B, A) + assert_equal(B.dtype, A.dtype) + assert_equal(B.shape, A.shape) + + def fail(): + np.char.array(A, **kw_unicode_false) + + assert_raises(UnicodeEncodeError, fail) + + def test_unicode_upconvert(self): + A = np.char.array(['abc']) + B = np.char.array([u'\u03a3']) + assert_(issubclass((A + B).dtype.type, np.unicode_)) + + def test_from_string(self): + A = np.char.array(b'abc') + assert_equal(len(A), 1) + assert_equal(len(A[0]), 3) + assert_(issubclass(A.dtype.type, np.string_)) + + def test_from_unicode(self): + A = np.char.array(u'\u03a3') + assert_equal(len(A), 1) + assert_equal(len(A[0]), 1) + assert_equal(A.itemsize, 4) + assert_(issubclass(A.dtype.type, np.unicode_)) + +class TestVecString(object): + def test_non_existent_method(self): + + def fail(): + _vec_string('a', np.string_, 'bogus') + + assert_raises(AttributeError, fail) + + def test_non_string_array(self): + + def fail(): + _vec_string(1, np.string_, 'strip') + + assert_raises(TypeError, fail) + + def test_invalid_args_tuple(self): + + def fail(): + _vec_string(['a'], np.string_, 'strip', 1) + + assert_raises(TypeError, fail) + + def test_invalid_type_descr(self): + + def fail(): + _vec_string(['a'], 'BOGUS', 'strip') + + assert_raises(TypeError, fail) + + def test_invalid_function_args(self): + + def fail(): + _vec_string(['a'], np.string_, 'strip', (1,)) + + assert_raises(TypeError, fail) + + def test_invalid_result_type(self): + + def fail(): + _vec_string(['a'], np.integer, 'strip') + + assert_raises(TypeError, fail) + + def test_broadcast_error(self): + + def fail(): + _vec_string([['abc', 'def']], np.integer, 'find', (['a', 'd', 'j'],)) + + assert_raises(ValueError, fail) + + +class TestWhitespace(object): + def setup(self): + self.A = np.array([['abc ', '123 '], + ['789 ', 'xyz ']]).view(np.chararray) + self.B = np.array([['abc', '123'], + ['789', 'xyz']]).view(np.chararray) + + def test1(self): + assert_(np.all(self.A == self.B)) + assert_(np.all(self.A >= self.B)) + assert_(np.all(self.A <= self.B)) + assert_(not np.any(self.A > self.B)) + assert_(not np.any(self.A < self.B)) + assert_(not np.any(self.A != self.B)) + +class TestChar(object): + def setup(self): + self.A = np.array('abc1', dtype='c').view(np.chararray) + + def test_it(self): + assert_equal(self.A.shape, (4,)) + assert_equal(self.A.upper()[:2].tobytes(), b'AB') + +class TestComparisons(object): + def setup(self): + self.A = np.array([['abc', '123'], + ['789', 'xyz']]).view(np.chararray) + self.B = np.array([['efg', '123 '], + ['051', 'tuv']]).view(np.chararray) + + def test_not_equal(self): + assert_array_equal((self.A != self.B), [[True, False], [True, True]]) + + def test_equal(self): + assert_array_equal((self.A == self.B), [[False, True], [False, False]]) + + def test_greater_equal(self): + assert_array_equal((self.A >= self.B), [[False, True], [True, True]]) + + def test_less_equal(self): + assert_array_equal((self.A <= self.B), [[True, True], [False, False]]) + + def test_greater(self): + assert_array_equal((self.A > self.B), [[False, False], [True, True]]) + + def test_less(self): + assert_array_equal((self.A < self.B), [[True, False], [False, False]]) + +class TestComparisonsMixed1(TestComparisons): + """Ticket #1276""" + + def setup(self): + TestComparisons.setup(self) + self.B = np.array([['efg', '123 '], + ['051', 'tuv']], np.unicode_).view(np.chararray) + +class TestComparisonsMixed2(TestComparisons): + """Ticket #1276""" + + def setup(self): + TestComparisons.setup(self) + self.A = np.array([['abc', '123'], + ['789', 'xyz']], np.unicode_).view(np.chararray) + +class TestInformation(object): + def setup(self): + self.A = np.array([[' abc ', ''], + ['12345', 'MixedCase'], + ['123 \t 345 \0 ', 'UPPER']]).view(np.chararray) + self.B = np.array([[u' \u03a3 ', u''], + [u'12345', u'MixedCase'], + [u'123 \t 345 \0 ', u'UPPER']]).view(np.chararray) + + def test_len(self): + assert_(issubclass(np.char.str_len(self.A).dtype.type, np.integer)) + assert_array_equal(np.char.str_len(self.A), [[5, 0], [5, 9], [12, 5]]) + assert_array_equal(np.char.str_len(self.B), [[3, 0], [5, 9], [12, 5]]) + + def test_count(self): + assert_(issubclass(self.A.count('').dtype.type, np.integer)) + assert_array_equal(self.A.count('a'), [[1, 0], [0, 1], [0, 0]]) + assert_array_equal(self.A.count('123'), [[0, 0], [1, 0], [1, 0]]) + # Python doesn't seem to like counting NULL characters + # assert_array_equal(self.A.count('\0'), [[0, 0], [0, 0], [1, 0]]) + assert_array_equal(self.A.count('a', 0, 2), [[1, 0], [0, 0], [0, 0]]) + assert_array_equal(self.B.count('a'), [[0, 0], [0, 1], [0, 0]]) + assert_array_equal(self.B.count('123'), [[0, 0], [1, 0], [1, 0]]) + # assert_array_equal(self.B.count('\0'), [[0, 0], [0, 0], [1, 0]]) + + def test_endswith(self): + assert_(issubclass(self.A.endswith('').dtype.type, np.bool_)) + assert_array_equal(self.A.endswith(' '), [[1, 0], [0, 0], [1, 0]]) + assert_array_equal(self.A.endswith('3', 0, 3), [[0, 0], [1, 0], [1, 0]]) + + def fail(): + self.A.endswith('3', 'fdjk') + + assert_raises(TypeError, fail) + + def test_find(self): + assert_(issubclass(self.A.find('a').dtype.type, np.integer)) + assert_array_equal(self.A.find('a'), [[1, -1], [-1, 6], [-1, -1]]) + assert_array_equal(self.A.find('3'), [[-1, -1], [2, -1], [2, -1]]) + assert_array_equal(self.A.find('a', 0, 2), [[1, -1], [-1, -1], [-1, -1]]) + assert_array_equal(self.A.find(['1', 'P']), [[-1, -1], [0, -1], [0, 1]]) + + def test_index(self): + + def fail(): + self.A.index('a') + + assert_raises(ValueError, fail) + assert_(np.char.index('abcba', 'b') == 1) + assert_(issubclass(np.char.index('abcba', 'b').dtype.type, np.integer)) + + def test_isalnum(self): + assert_(issubclass(self.A.isalnum().dtype.type, np.bool_)) + assert_array_equal(self.A.isalnum(), [[False, False], [True, True], [False, True]]) + + def test_isalpha(self): + assert_(issubclass(self.A.isalpha().dtype.type, np.bool_)) + assert_array_equal(self.A.isalpha(), [[False, False], [False, True], [False, True]]) + + def test_isdigit(self): + assert_(issubclass(self.A.isdigit().dtype.type, np.bool_)) + assert_array_equal(self.A.isdigit(), [[False, False], [True, False], [False, False]]) + + def test_islower(self): + assert_(issubclass(self.A.islower().dtype.type, np.bool_)) + assert_array_equal(self.A.islower(), [[True, False], [False, False], [False, False]]) + + def test_isspace(self): + assert_(issubclass(self.A.isspace().dtype.type, np.bool_)) + assert_array_equal(self.A.isspace(), [[False, False], [False, False], [False, False]]) + + def test_istitle(self): + assert_(issubclass(self.A.istitle().dtype.type, np.bool_)) + assert_array_equal(self.A.istitle(), [[False, False], [False, False], [False, False]]) + + def test_isupper(self): + assert_(issubclass(self.A.isupper().dtype.type, np.bool_)) + assert_array_equal(self.A.isupper(), [[False, False], [False, False], [False, True]]) + + def test_rfind(self): + assert_(issubclass(self.A.rfind('a').dtype.type, np.integer)) + assert_array_equal(self.A.rfind('a'), [[1, -1], [-1, 6], [-1, -1]]) + assert_array_equal(self.A.rfind('3'), [[-1, -1], [2, -1], [6, -1]]) + assert_array_equal(self.A.rfind('a', 0, 2), [[1, -1], [-1, -1], [-1, -1]]) + assert_array_equal(self.A.rfind(['1', 'P']), [[-1, -1], [0, -1], [0, 2]]) + + def test_rindex(self): + + def fail(): + self.A.rindex('a') + + assert_raises(ValueError, fail) + assert_(np.char.rindex('abcba', 'b') == 3) + assert_(issubclass(np.char.rindex('abcba', 'b').dtype.type, np.integer)) + + def test_startswith(self): + assert_(issubclass(self.A.startswith('').dtype.type, np.bool_)) + assert_array_equal(self.A.startswith(' '), [[1, 0], [0, 0], [0, 0]]) + assert_array_equal(self.A.startswith('1', 0, 3), [[0, 0], [1, 0], [1, 0]]) + + def fail(): + self.A.startswith('3', 'fdjk') + + assert_raises(TypeError, fail) + + +class TestMethods(object): + def setup(self): + self.A = np.array([[' abc ', ''], + ['12345', 'MixedCase'], + ['123 \t 345 \0 ', 'UPPER']], + dtype='S').view(np.chararray) + self.B = np.array([[u' \u03a3 ', u''], + [u'12345', u'MixedCase'], + [u'123 \t 345 \0 ', u'UPPER']]).view(np.chararray) + + def test_capitalize(self): + tgt = [[b' abc ', b''], + [b'12345', b'Mixedcase'], + [b'123 \t 345 \0 ', b'Upper']] + assert_(issubclass(self.A.capitalize().dtype.type, np.string_)) + assert_array_equal(self.A.capitalize(), tgt) + + tgt = [[u' \u03c3 ', ''], + ['12345', 'Mixedcase'], + ['123 \t 345 \0 ', 'Upper']] + assert_(issubclass(self.B.capitalize().dtype.type, np.unicode_)) + assert_array_equal(self.B.capitalize(), tgt) + + def test_center(self): + assert_(issubclass(self.A.center(10).dtype.type, np.string_)) + C = self.A.center([10, 20]) + assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]]) + + C = self.A.center(20, b'#') + assert_(np.all(C.startswith(b'#'))) + assert_(np.all(C.endswith(b'#'))) + + C = np.char.center(b'FOO', [[10, 20], [15, 8]]) + tgt = [[b' FOO ', b' FOO '], + [b' FOO ', b' FOO ']] + assert_(issubclass(C.dtype.type, np.string_)) + assert_array_equal(C, tgt) + + def test_decode(self): + if sys.version_info[0] >= 3: + A = np.char.array([b'\\u03a3']) + assert_(A.decode('unicode-escape')[0] == '\u03a3') + else: + with suppress_warnings() as sup: + if sys.py3kwarning: + sup.filter(DeprecationWarning, "'hex_codec'") + A = np.char.array(['736563726574206d657373616765']) + assert_(A.decode('hex_codec')[0] == 'secret message') + + def test_encode(self): + B = self.B.encode('unicode_escape') + assert_(B[0][0] == str(' \\u03a3 ').encode('latin1')) + + def test_expandtabs(self): + T = self.A.expandtabs() + assert_(T[2, 0] == b'123 345 \0') + + def test_join(self): + if sys.version_info[0] >= 3: + # NOTE: list(b'123') == [49, 50, 51] + # so that b','.join(b'123') results to an error on Py3 + A0 = self.A.decode('ascii') + else: + A0 = self.A + + A = np.char.join([',', '#'], A0) + if sys.version_info[0] >= 3: + assert_(issubclass(A.dtype.type, np.unicode_)) + else: + assert_(issubclass(A.dtype.type, np.string_)) + tgt = np.array([[' ,a,b,c, ', ''], + ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'], + ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']]) + assert_array_equal(np.char.join([',', '#'], A0), tgt) + + def test_ljust(self): + assert_(issubclass(self.A.ljust(10).dtype.type, np.string_)) + + C = self.A.ljust([10, 20]) + assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]]) + + C = self.A.ljust(20, b'#') + assert_array_equal(C.startswith(b'#'), [ + [False, True], [False, False], [False, False]]) + assert_(np.all(C.endswith(b'#'))) + + C = np.char.ljust(b'FOO', [[10, 20], [15, 8]]) + tgt = [[b'FOO ', b'FOO '], + [b'FOO ', b'FOO ']] + assert_(issubclass(C.dtype.type, np.string_)) + assert_array_equal(C, tgt) + + def test_lower(self): + tgt = [[b' abc ', b''], + [b'12345', b'mixedcase'], + [b'123 \t 345 \0 ', b'upper']] + assert_(issubclass(self.A.lower().dtype.type, np.string_)) + assert_array_equal(self.A.lower(), tgt) + + tgt = [[u' \u03c3 ', u''], + [u'12345', u'mixedcase'], + [u'123 \t 345 \0 ', u'upper']] + assert_(issubclass(self.B.lower().dtype.type, np.unicode_)) + assert_array_equal(self.B.lower(), tgt) + + def test_lstrip(self): + tgt = [[b'abc ', b''], + [b'12345', b'MixedCase'], + [b'123 \t 345 \0 ', b'UPPER']] + assert_(issubclass(self.A.lstrip().dtype.type, np.string_)) + assert_array_equal(self.A.lstrip(), tgt) + + tgt = [[b' abc', b''], + [b'2345', b'ixedCase'], + [b'23 \t 345 \x00', b'UPPER']] + assert_array_equal(self.A.lstrip([b'1', b'M']), tgt) + + tgt = [[u'\u03a3 ', ''], + ['12345', 'MixedCase'], + ['123 \t 345 \0 ', 'UPPER']] + assert_(issubclass(self.B.lstrip().dtype.type, np.unicode_)) + assert_array_equal(self.B.lstrip(), tgt) + + def test_partition(self): + P = self.A.partition([b'3', b'M']) + tgt = [[(b' abc ', b'', b''), (b'', b'', b'')], + [(b'12', b'3', b'45'), (b'', b'M', b'ixedCase')], + [(b'12', b'3', b' \t 345 \0 '), (b'UPPER', b'', b'')]] + assert_(issubclass(P.dtype.type, np.string_)) + assert_array_equal(P, tgt) + + def test_replace(self): + R = self.A.replace([b'3', b'a'], + [b'##########', b'@']) + tgt = [[b' abc ', b''], + [b'12##########45', b'MixedC@se'], + [b'12########## \t ##########45 \x00', b'UPPER']] + assert_(issubclass(R.dtype.type, np.string_)) + assert_array_equal(R, tgt) + + if sys.version_info[0] < 3: + # NOTE: b'abc'.replace(b'a', 'b') is not allowed on Py3 + R = self.A.replace(b'a', u'\u03a3') + tgt = [[u' \u03a3bc ', ''], + ['12345', u'MixedC\u03a3se'], + ['123 \t 345 \x00', 'UPPER']] + assert_(issubclass(R.dtype.type, np.unicode_)) + assert_array_equal(R, tgt) + + def test_rjust(self): + assert_(issubclass(self.A.rjust(10).dtype.type, np.string_)) + + C = self.A.rjust([10, 20]) + assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]]) + + C = self.A.rjust(20, b'#') + assert_(np.all(C.startswith(b'#'))) + assert_array_equal(C.endswith(b'#'), + [[False, True], [False, False], [False, False]]) + + C = np.char.rjust(b'FOO', [[10, 20], [15, 8]]) + tgt = [[b' FOO', b' FOO'], + [b' FOO', b' FOO']] + assert_(issubclass(C.dtype.type, np.string_)) + assert_array_equal(C, tgt) + + def test_rpartition(self): + P = self.A.rpartition([b'3', b'M']) + tgt = [[(b'', b'', b' abc '), (b'', b'', b'')], + [(b'12', b'3', b'45'), (b'', b'M', b'ixedCase')], + [(b'123 \t ', b'3', b'45 \0 '), (b'', b'', b'UPPER')]] + assert_(issubclass(P.dtype.type, np.string_)) + assert_array_equal(P, tgt) + + def test_rsplit(self): + A = self.A.rsplit(b'3') + tgt = [[[b' abc '], [b'']], + [[b'12', b'45'], [b'MixedCase']], + [[b'12', b' \t ', b'45 \x00 '], [b'UPPER']]] + assert_(issubclass(A.dtype.type, np.object_)) + assert_equal(A.tolist(), tgt) + + def test_rstrip(self): + assert_(issubclass(self.A.rstrip().dtype.type, np.string_)) + + tgt = [[b' abc', b''], + [b'12345', b'MixedCase'], + [b'123 \t 345', b'UPPER']] + assert_array_equal(self.A.rstrip(), tgt) + + tgt = [[b' abc ', b''], + [b'1234', b'MixedCase'], + [b'123 \t 345 \x00', b'UPP'] + ] + assert_array_equal(self.A.rstrip([b'5', b'ER']), tgt) + + tgt = [[u' \u03a3', ''], + ['12345', 'MixedCase'], + ['123 \t 345', 'UPPER']] + assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_)) + assert_array_equal(self.B.rstrip(), tgt) + + def test_strip(self): + tgt = [[b'abc', b''], + [b'12345', b'MixedCase'], + [b'123 \t 345', b'UPPER']] + assert_(issubclass(self.A.strip().dtype.type, np.string_)) + assert_array_equal(self.A.strip(), tgt) + + tgt = [[b' abc ', b''], + [b'234', b'ixedCas'], + [b'23 \t 345 \x00', b'UPP']] + assert_array_equal(self.A.strip([b'15', b'EReM']), tgt) + + tgt = [[u'\u03a3', ''], + ['12345', 'MixedCase'], + ['123 \t 345', 'UPPER']] + assert_(issubclass(self.B.strip().dtype.type, np.unicode_)) + assert_array_equal(self.B.strip(), tgt) + + def test_split(self): + A = self.A.split(b'3') + tgt = [ + [[b' abc '], [b'']], + [[b'12', b'45'], [b'MixedCase']], + [[b'12', b' \t ', b'45 \x00 '], [b'UPPER']]] + assert_(issubclass(A.dtype.type, np.object_)) + assert_equal(A.tolist(), tgt) + + def test_splitlines(self): + A = np.char.array(['abc\nfds\nwer']).splitlines() + assert_(issubclass(A.dtype.type, np.object_)) + assert_(A.shape == (1,)) + assert_(len(A[0]) == 3) + + def test_swapcase(self): + tgt = [[b' ABC ', b''], + [b'12345', b'mIXEDcASE'], + [b'123 \t 345 \0 ', b'upper']] + assert_(issubclass(self.A.swapcase().dtype.type, np.string_)) + assert_array_equal(self.A.swapcase(), tgt) + + tgt = [[u' \u03c3 ', u''], + [u'12345', u'mIXEDcASE'], + [u'123 \t 345 \0 ', u'upper']] + assert_(issubclass(self.B.swapcase().dtype.type, np.unicode_)) + assert_array_equal(self.B.swapcase(), tgt) + + def test_title(self): + tgt = [[b' Abc ', b''], + [b'12345', b'Mixedcase'], + [b'123 \t 345 \0 ', b'Upper']] + assert_(issubclass(self.A.title().dtype.type, np.string_)) + assert_array_equal(self.A.title(), tgt) + + tgt = [[u' \u03a3 ', u''], + [u'12345', u'Mixedcase'], + [u'123 \t 345 \0 ', u'Upper']] + assert_(issubclass(self.B.title().dtype.type, np.unicode_)) + assert_array_equal(self.B.title(), tgt) + + def test_upper(self): + tgt = [[b' ABC ', b''], + [b'12345', b'MIXEDCASE'], + [b'123 \t 345 \0 ', b'UPPER']] + assert_(issubclass(self.A.upper().dtype.type, np.string_)) + assert_array_equal(self.A.upper(), tgt) + + tgt = [[u' \u03a3 ', u''], + [u'12345', u'MIXEDCASE'], + [u'123 \t 345 \0 ', u'UPPER']] + assert_(issubclass(self.B.upper().dtype.type, np.unicode_)) + assert_array_equal(self.B.upper(), tgt) + + def test_isnumeric(self): + + def fail(): + self.A.isnumeric() + + assert_raises(TypeError, fail) + assert_(issubclass(self.B.isnumeric().dtype.type, np.bool_)) + assert_array_equal(self.B.isnumeric(), [ + [False, False], [True, False], [False, False]]) + + def test_isdecimal(self): + + def fail(): + self.A.isdecimal() + + assert_raises(TypeError, fail) + assert_(issubclass(self.B.isdecimal().dtype.type, np.bool_)) + assert_array_equal(self.B.isdecimal(), [ + [False, False], [True, False], [False, False]]) + + +class TestOperations(object): + def setup(self): + self.A = np.array([['abc', '123'], + ['789', 'xyz']]).view(np.chararray) + self.B = np.array([['efg', '456'], + ['051', 'tuv']]).view(np.chararray) + + def test_add(self): + AB = np.array([['abcefg', '123456'], + ['789051', 'xyztuv']]).view(np.chararray) + assert_array_equal(AB, (self.A + self.B)) + assert_(len((self.A + self.B)[0][0]) == 6) + + def test_radd(self): + QA = np.array([['qabc', 'q123'], + ['q789', 'qxyz']]).view(np.chararray) + assert_array_equal(QA, ('q' + self.A)) + + def test_mul(self): + A = self.A + for r in (2, 3, 5, 7, 197): + Ar = np.array([[A[0, 0]*r, A[0, 1]*r], + [A[1, 0]*r, A[1, 1]*r]]).view(np.chararray) + + assert_array_equal(Ar, (self.A * r)) + + for ob in [object(), 'qrs']: + with assert_raises_regex(ValueError, + 'Can only multiply by integers'): + A*ob + + def test_rmul(self): + A = self.A + for r in (2, 3, 5, 7, 197): + Ar = np.array([[A[0, 0]*r, A[0, 1]*r], + [A[1, 0]*r, A[1, 1]*r]]).view(np.chararray) + assert_array_equal(Ar, (r * self.A)) + + for ob in [object(), 'qrs']: + with assert_raises_regex(ValueError, + 'Can only multiply by integers'): + ob * A + + def test_mod(self): + """Ticket #856""" + F = np.array([['%d', '%f'], ['%s', '%r']]).view(np.chararray) + C = np.array([[3, 7], [19, 1]]) + FC = np.array([['3', '7.000000'], + ['19', '1']]).view(np.chararray) + assert_array_equal(FC, F % C) + + A = np.array([['%.3f', '%d'], ['%s', '%r']]).view(np.chararray) + A1 = np.array([['1.000', '1'], ['1', '1']]).view(np.chararray) + assert_array_equal(A1, (A % 1)) + + A2 = np.array([['1.000', '2'], ['3', '4']]).view(np.chararray) + assert_array_equal(A2, (A % [[1, 2], [3, 4]])) + + def test_rmod(self): + assert_(("%s" % self.A) == str(self.A)) + assert_(("%r" % self.A) == repr(self.A)) + + for ob in [42, object()]: + with assert_raises_regex( + TypeError, "unsupported operand type.* and 'chararray'"): + ob % self.A + + def test_slice(self): + """Regression test for https://github.com/numpy/numpy/issues/5982""" + + arr = np.array([['abc ', 'def '], ['geh ', 'ijk ']], + dtype='S4').view(np.chararray) + sl1 = arr[:] + assert_array_equal(sl1, arr) + assert_(sl1.base is arr) + assert_(sl1.base.base is arr.base) + + sl2 = arr[:, :] + assert_array_equal(sl2, arr) + assert_(sl2.base is arr) + assert_(sl2.base.base is arr.base) + + assert_(arr[0, 0] == b'abc') + + +def test_empty_indexing(): + """Regression test for ticket 1948.""" + # Check that indexing a chararray with an empty list/array returns an + # empty chararray instead of a chararray with a single empty string in it. + s = np.chararray((4,)) + assert_(s[[]].size == 0) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_deprecations.py b/contrib/python/numpy/py2/numpy/core/tests/test_deprecations.py new file mode 100644 index 0000000000..edb5d5e460 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_deprecations.py @@ -0,0 +1,535 @@ +""" +Tests related to deprecation warnings. Also a convenient place +to document how deprecations should eventually be turned into errors. + +""" +from __future__ import division, absolute_import, print_function + +import datetime +import sys +import operator +import warnings +import pytest + +import numpy as np +from numpy.testing import ( + assert_raises, assert_warns, assert_ + ) + +try: + import pytz + _has_pytz = True +except ImportError: + _has_pytz = False + + +class _DeprecationTestCase(object): + # Just as warning: warnings uses re.match, so the start of this message + # must match. + message = '' + warning_cls = DeprecationWarning + + def setup(self): + self.warn_ctx = warnings.catch_warnings(record=True) + self.log = self.warn_ctx.__enter__() + + # Do *not* ignore other DeprecationWarnings. Ignoring warnings + # can give very confusing results because of + # https://bugs.python.org/issue4180 and it is probably simplest to + # try to keep the tests cleanly giving only the right warning type. + # (While checking them set to "error" those are ignored anyway) + # We still have them show up, because otherwise they would be raised + warnings.filterwarnings("always", category=self.warning_cls) + warnings.filterwarnings("always", message=self.message, + category=self.warning_cls) + + def teardown(self): + self.warn_ctx.__exit__() + + def assert_deprecated(self, function, num=1, ignore_others=False, + function_fails=False, + exceptions=np._NoValue, + args=(), kwargs={}): + """Test if DeprecationWarnings are given and raised. + + This first checks if the function when called gives `num` + DeprecationWarnings, after that it tries to raise these + DeprecationWarnings and compares them with `exceptions`. + The exceptions can be different for cases where this code path + is simply not anticipated and the exception is replaced. + + Parameters + ---------- + function : callable + The function to test + num : int + Number of DeprecationWarnings to expect. This should normally be 1. + ignore_others : bool + Whether warnings of the wrong type should be ignored (note that + the message is not checked) + function_fails : bool + If the function would normally fail, setting this will check for + warnings inside a try/except block. + exceptions : Exception or tuple of Exceptions + Exception to expect when turning the warnings into an error. + The default checks for DeprecationWarnings. If exceptions is + empty the function is expected to run successfully. + args : tuple + Arguments for `function` + kwargs : dict + Keyword arguments for `function` + """ + # reset the log + self.log[:] = [] + + if exceptions is np._NoValue: + exceptions = (self.warning_cls,) + + try: + function(*args, **kwargs) + except (Exception if function_fails else tuple()): + pass + + # just in case, clear the registry + num_found = 0 + for warning in self.log: + if warning.category is self.warning_cls: + num_found += 1 + elif not ignore_others: + raise AssertionError( + "expected %s but got: %s" % + (self.warning_cls.__name__, warning.category)) + if num is not None and num_found != num: + msg = "%i warnings found but %i expected." % (len(self.log), num) + lst = [str(w.category) for w in self.log] + raise AssertionError("\n".join([msg] + lst)) + + with warnings.catch_warnings(): + warnings.filterwarnings("error", message=self.message, + category=self.warning_cls) + try: + function(*args, **kwargs) + if exceptions != tuple(): + raise AssertionError( + "No error raised during function call") + except exceptions: + if exceptions == tuple(): + raise AssertionError( + "Error raised during function call") + + def assert_not_deprecated(self, function, args=(), kwargs={}): + """Test that warnings are not raised. + + This is just a shorthand for: + + self.assert_deprecated(function, num=0, ignore_others=True, + exceptions=tuple(), args=args, kwargs=kwargs) + """ + self.assert_deprecated(function, num=0, ignore_others=True, + exceptions=tuple(), args=args, kwargs=kwargs) + + +class _VisibleDeprecationTestCase(_DeprecationTestCase): + warning_cls = np.VisibleDeprecationWarning + + +class TestNonTupleNDIndexDeprecation(object): + def test_basic(self): + a = np.zeros((5, 5)) + with warnings.catch_warnings(): + warnings.filterwarnings('always') + assert_warns(FutureWarning, a.__getitem__, [[0, 1], [0, 1]]) + assert_warns(FutureWarning, a.__getitem__, [slice(None)]) + + warnings.filterwarnings('error') + assert_raises(FutureWarning, a.__getitem__, [[0, 1], [0, 1]]) + assert_raises(FutureWarning, a.__getitem__, [slice(None)]) + + # a a[[0, 1]] always was advanced indexing, so no error/warning + a[[0, 1]] + + +class TestRankDeprecation(_DeprecationTestCase): + """Test that np.rank is deprecated. The function should simply be + removed. The VisibleDeprecationWarning may become unnecessary. + """ + + def test(self): + a = np.arange(10) + assert_warns(np.VisibleDeprecationWarning, np.rank, a) + + +class TestComparisonDeprecations(_DeprecationTestCase): + """This tests the deprecation, for non-element-wise comparison logic. + This used to mean that when an error occurred during element-wise comparison + (i.e. broadcasting) NotImplemented was returned, but also in the comparison + itself, False was given instead of the error. + + Also test FutureWarning for the None comparison. + """ + + message = "elementwise.* comparison failed; .*" + + def test_normal_types(self): + for op in (operator.eq, operator.ne): + # Broadcasting errors: + self.assert_deprecated(op, args=(np.zeros(3), [])) + a = np.zeros(3, dtype='i,i') + # (warning is issued a couple of times here) + self.assert_deprecated(op, args=(a, a[:-1]), num=None) + + # Element comparison error (numpy array can't be compared). + a = np.array([1, np.array([1,2,3])], dtype=object) + b = np.array([1, np.array([1,2,3])], dtype=object) + self.assert_deprecated(op, args=(a, b), num=None) + + def test_string(self): + # For two string arrays, strings always raised the broadcasting error: + a = np.array(['a', 'b']) + b = np.array(['a', 'b', 'c']) + assert_raises(ValueError, lambda x, y: x == y, a, b) + + # The empty list is not cast to string, and this used to pass due + # to dtype mismatch; now (2018-06-21) it correctly leads to a + # FutureWarning. + assert_warns(FutureWarning, lambda: a == []) + + def test_void_dtype_equality_failures(self): + class NotArray(object): + def __array__(self): + raise TypeError + + # Needed so Python 3 does not raise DeprecationWarning twice. + def __ne__(self, other): + return NotImplemented + + self.assert_deprecated(lambda: np.arange(2) == NotArray()) + self.assert_deprecated(lambda: np.arange(2) != NotArray()) + + struct1 = np.zeros(2, dtype="i4,i4") + struct2 = np.zeros(2, dtype="i4,i4,i4") + + assert_warns(FutureWarning, lambda: struct1 == 1) + assert_warns(FutureWarning, lambda: struct1 == struct2) + assert_warns(FutureWarning, lambda: struct1 != 1) + assert_warns(FutureWarning, lambda: struct1 != struct2) + + def test_array_richcompare_legacy_weirdness(self): + # It doesn't really work to use assert_deprecated here, b/c part of + # the point of assert_deprecated is to check that when warnings are + # set to "error" mode then the error is propagated -- which is good! + # But here we are testing a bunch of code that is deprecated *because* + # it has the habit of swallowing up errors and converting them into + # different warnings. So assert_warns will have to be sufficient. + assert_warns(FutureWarning, lambda: np.arange(2) == "a") + assert_warns(FutureWarning, lambda: np.arange(2) != "a") + # No warning for scalar comparisons + with warnings.catch_warnings(): + warnings.filterwarnings("error") + assert_(not (np.array(0) == "a")) + assert_(np.array(0) != "a") + assert_(not (np.int16(0) == "a")) + assert_(np.int16(0) != "a") + + for arg1 in [np.asarray(0), np.int16(0)]: + struct = np.zeros(2, dtype="i4,i4") + for arg2 in [struct, "a"]: + for f in [operator.lt, operator.le, operator.gt, operator.ge]: + if sys.version_info[0] >= 3: + # py3 + with warnings.catch_warnings() as l: + warnings.filterwarnings("always") + assert_raises(TypeError, f, arg1, arg2) + assert_(not l) + else: + # py2 + assert_warns(DeprecationWarning, f, arg1, arg2) + + +class TestDatetime64Timezone(_DeprecationTestCase): + """Parsing of datetime64 with timezones deprecated in 1.11.0, because + datetime64 is now timezone naive rather than UTC only. + + It will be quite a while before we can remove this, because, at the very + least, a lot of existing code uses the 'Z' modifier to avoid conversion + from local time to UTC, even if otherwise it handles time in a timezone + naive fashion. + """ + def test_string(self): + self.assert_deprecated(np.datetime64, args=('2000-01-01T00+01',)) + self.assert_deprecated(np.datetime64, args=('2000-01-01T00Z',)) + + @pytest.mark.skipif(not _has_pytz, + reason="The pytz module is not available.") + def test_datetime(self): + tz = pytz.timezone('US/Eastern') + dt = datetime.datetime(2000, 1, 1, 0, 0, tzinfo=tz) + self.assert_deprecated(np.datetime64, args=(dt,)) + + +class TestNonCContiguousViewDeprecation(_DeprecationTestCase): + """View of non-C-contiguous arrays deprecated in 1.11.0. + + The deprecation will not be raised for arrays that are both C and F + contiguous, as C contiguous is dominant. There are more such arrays + with relaxed stride checking than without so the deprecation is not + as visible with relaxed stride checking in force. + """ + + def test_fortran_contiguous(self): + self.assert_deprecated(np.ones((2,2)).T.view, args=(complex,)) + self.assert_deprecated(np.ones((2,2)).T.view, args=(np.int8,)) + + +class TestInvalidOrderParameterInputForFlattenArrayDeprecation(_DeprecationTestCase): + """Invalid arguments to the ORDER parameter in array.flatten() should not be + allowed and should raise an error. However, in the interests of not breaking + code that may inadvertently pass invalid arguments to this parameter, a + DeprecationWarning will be issued instead for the time being to give developers + time to refactor relevant code. + """ + + def test_flatten_array_non_string_arg(self): + x = np.zeros((3, 5)) + self.message = ("Non-string object detected for " + "the array ordering. Please pass " + "in 'C', 'F', 'A', or 'K' instead") + self.assert_deprecated(x.flatten, args=(np.pi,)) + + def test_flatten_array_invalid_string_arg(self): + # Tests that a DeprecationWarning is raised + # when a string of length greater than one + # starting with "C", "F", "A", or "K" (case- + # and unicode-insensitive) is passed in for + # the ORDER parameter. Otherwise, a TypeError + # will be raised! + + x = np.zeros((3, 5)) + self.message = ("Non length-one string passed " + "in for the array ordering. Please " + "pass in 'C', 'F', 'A', or 'K' instead") + self.assert_deprecated(x.flatten, args=("FACK",)) + + +class TestArrayDataAttributeAssignmentDeprecation(_DeprecationTestCase): + """Assigning the 'data' attribute of an ndarray is unsafe as pointed + out in gh-7093. Eventually, such assignment should NOT be allowed, but + in the interests of maintaining backwards compatibility, only a Deprecation- + Warning will be raised instead for the time being to give developers time to + refactor relevant code. + """ + + def test_data_attr_assignment(self): + a = np.arange(10) + b = np.linspace(0, 1, 10) + + self.message = ("Assigning the 'data' attribute is an " + "inherently unsafe operation and will " + "be removed in the future.") + self.assert_deprecated(a.__setattr__, args=('data', b.data)) + + +class TestLinspaceInvalidNumParameter(_DeprecationTestCase): + """Argument to the num parameter in linspace that cannot be + safely interpreted as an integer is deprecated in 1.12.0. + + Argument to the num parameter in linspace that cannot be + safely interpreted as an integer should not be allowed. + In the interest of not breaking code that passes + an argument that could still be interpreted as an integer, a + DeprecationWarning will be issued for the time being to give + developers time to refactor relevant code. + """ + def test_float_arg(self): + # 2016-02-25, PR#7328 + self.assert_deprecated(np.linspace, args=(0, 10, 2.5)) + + +class TestBinaryReprInsufficientWidthParameterForRepresentation(_DeprecationTestCase): + """ + If a 'width' parameter is passed into ``binary_repr`` that is insufficient to + represent the number in base 2 (positive) or 2's complement (negative) form, + the function used to silently ignore the parameter and return a representation + using the minimal number of bits needed for the form in question. Such behavior + is now considered unsafe from a user perspective and will raise an error in the future. + """ + + def test_insufficient_width_positive(self): + args = (10,) + kwargs = {'width': 2} + + self.message = ("Insufficient bit width provided. This behavior " + "will raise an error in the future.") + self.assert_deprecated(np.binary_repr, args=args, kwargs=kwargs) + + def test_insufficient_width_negative(self): + args = (-5,) + kwargs = {'width': 2} + + self.message = ("Insufficient bit width provided. This behavior " + "will raise an error in the future.") + self.assert_deprecated(np.binary_repr, args=args, kwargs=kwargs) + + +class TestNumericStyleTypecodes(_DeprecationTestCase): + """ + Deprecate the old numeric-style dtypes, which are especially + confusing for complex types, e.g. Complex32 -> complex64. When the + deprecation cycle is complete, the check for the strings should be + removed from PyArray_DescrConverter in descriptor.c, and the + deprecated keys should not be added as capitalized aliases in + _add_aliases in numerictypes.py. + """ + def test_all_dtypes(self): + deprecated_types = [ + 'Bool', 'Complex32', 'Complex64', 'Float16', 'Float32', 'Float64', + 'Int8', 'Int16', 'Int32', 'Int64', 'Object0', 'Timedelta64', + 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'Void0' + ] + if sys.version_info[0] < 3: + deprecated_types.extend(['Unicode0', 'String0']) + + for dt in deprecated_types: + self.assert_deprecated(np.dtype, exceptions=(TypeError,), + args=(dt,)) + + +class TestTestDeprecated(object): + def test_assert_deprecated(self): + test_case_instance = _DeprecationTestCase() + test_case_instance.setup() + assert_raises(AssertionError, + test_case_instance.assert_deprecated, + lambda: None) + + def foo(): + warnings.warn("foo", category=DeprecationWarning, stacklevel=2) + + test_case_instance.assert_deprecated(foo) + test_case_instance.teardown() + + +class TestClassicIntDivision(_DeprecationTestCase): + """ + See #7949. Deprecate the numeric-style dtypes with -3 flag in python 2 + if used for division + List of data types: https://docs.scipy.org/doc/numpy/user/basics.types.html + """ + def test_int_dtypes(self): + #scramble types and do some mix and match testing + deprecated_types = [ + 'bool_', 'int_', 'intc', 'uint8', 'int8', 'uint64', 'int32', 'uint16', + 'intp', 'int64', 'uint32', 'int16' + ] + if sys.version_info[0] < 3 and sys.py3kwarning: + import operator as op + dt2 = 'bool_' + for dt1 in deprecated_types: + a = np.array([1,2,3], dtype=dt1) + b = np.array([1,2,3], dtype=dt2) + self.assert_deprecated(op.div, args=(a,b)) + dt2 = dt1 + + +class TestNonNumericConjugate(_DeprecationTestCase): + """ + Deprecate no-op behavior of ndarray.conjugate on non-numeric dtypes, + which conflicts with the error behavior of np.conjugate. + """ + def test_conjugate(self): + for a in np.array(5), np.array(5j): + self.assert_not_deprecated(a.conjugate) + for a in (np.array('s'), np.array('2016', 'M'), + np.array((1, 2), [('a', int), ('b', int)])): + self.assert_deprecated(a.conjugate) + + +class TestNPY_CHAR(_DeprecationTestCase): + # 2017-05-03, 1.13.0 + def test_npy_char_deprecation(self): + from numpy.core._multiarray_tests import npy_char_deprecation + self.assert_deprecated(npy_char_deprecation) + assert_(npy_char_deprecation() == 'S1') + + +class Test_UPDATEIFCOPY(_DeprecationTestCase): + """ + v1.14 deprecates creating an array with the UPDATEIFCOPY flag, use + WRITEBACKIFCOPY instead + """ + def test_npy_updateifcopy_deprecation(self): + from numpy.core._multiarray_tests import npy_updateifcopy_deprecation + arr = np.arange(9).reshape(3, 3) + v = arr.T + self.assert_deprecated(npy_updateifcopy_deprecation, args=(v,)) + + +class TestDatetimeEvent(_DeprecationTestCase): + # 2017-08-11, 1.14.0 + def test_3_tuple(self): + for cls in (np.datetime64, np.timedelta64): + # two valid uses - (unit, num) and (unit, num, den, None) + self.assert_not_deprecated(cls, args=(1, ('ms', 2))) + self.assert_not_deprecated(cls, args=(1, ('ms', 2, 1, None))) + + # trying to use the event argument, removed in 1.7.0, is deprecated + # it used to be a uint8 + self.assert_deprecated(cls, args=(1, ('ms', 2, 'event'))) + self.assert_deprecated(cls, args=(1, ('ms', 2, 63))) + self.assert_deprecated(cls, args=(1, ('ms', 2, 1, 'event'))) + self.assert_deprecated(cls, args=(1, ('ms', 2, 1, 63))) + + +class TestTruthTestingEmptyArrays(_DeprecationTestCase): + # 2017-09-25, 1.14.0 + message = '.*truth value of an empty array is ambiguous.*' + + def test_1d(self): + self.assert_deprecated(bool, args=(np.array([]),)) + + def test_2d(self): + self.assert_deprecated(bool, args=(np.zeros((1, 0)),)) + self.assert_deprecated(bool, args=(np.zeros((0, 1)),)) + self.assert_deprecated(bool, args=(np.zeros((0, 0)),)) + + +class TestBincount(_DeprecationTestCase): + # 2017-06-01, 1.14.0 + def test_bincount_minlength(self): + self.assert_deprecated(lambda: np.bincount([1, 2, 3], minlength=None)) + + +class TestGeneratorSum(_DeprecationTestCase): + # 2018-02-25, 1.15.0 + def test_generator_sum(self): + self.assert_deprecated(np.sum, args=((i for i in range(5)),)) + + +class TestSctypeNA(_VisibleDeprecationTestCase): + # 2018-06-24, 1.16 + def test_sctypeNA(self): + self.assert_deprecated(lambda: np.sctypeNA['?']) + self.assert_deprecated(lambda: np.typeNA['?']) + self.assert_deprecated(lambda: np.typeNA.get('?')) + + +class TestPositiveOnNonNumerical(_DeprecationTestCase): + # 2018-06-28, 1.16.0 + def test_positive_on_non_number(self): + self.assert_deprecated(operator.pos, args=(np.array('foo'),)) + +class TestFromstring(_DeprecationTestCase): + # 2017-10-19, 1.14 + def test_fromstring(self): + self.assert_deprecated(np.fromstring, args=('\x00'*80,)) + +class Test_GetSet_NumericOps(_DeprecationTestCase): + # 2018-09-20, 1.16.0 + def test_get_numeric_ops(self): + from numpy.core._multiarray_tests import getset_numericops + self.assert_deprecated(getset_numericops, num=2) + + # empty kwargs prevents any state actually changing which would break + # other tests. + self.assert_deprecated(np.set_numeric_ops, kwargs={}) + assert_raises(ValueError, np.set_numeric_ops, add='abc') diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_dtype.py b/contrib/python/numpy/py2/numpy/core/tests/test_dtype.py new file mode 100644 index 0000000000..ff0fb9eff5 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_dtype.py @@ -0,0 +1,1138 @@ +from __future__ import division, absolute_import, print_function + +import sys +import operator +import pytest +import ctypes +import gc + +import numpy as np +from numpy.core._rational_tests import rational +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_raises, HAS_REFCOUNT) +from numpy.core.numeric import pickle + +def assert_dtype_equal(a, b): + assert_equal(a, b) + assert_equal(hash(a), hash(b), + "two equivalent types do not hash to the same value !") + +def assert_dtype_not_equal(a, b): + assert_(a != b) + assert_(hash(a) != hash(b), + "two different types hash to the same value !") + +class TestBuiltin(object): + @pytest.mark.parametrize('t', [int, float, complex, np.int32, str, object, + np.unicode]) + def test_run(self, t): + """Only test hash runs at all.""" + dt = np.dtype(t) + hash(dt) + + @pytest.mark.parametrize('t', [int, float]) + def test_dtype(self, t): + # Make sure equivalent byte order char hash the same (e.g. < and = on + # little endian) + dt = np.dtype(t) + dt2 = dt.newbyteorder("<") + dt3 = dt.newbyteorder(">") + if dt == dt2: + assert_(dt.byteorder != dt2.byteorder, "bogus test") + assert_dtype_equal(dt, dt2) + else: + assert_(dt.byteorder != dt3.byteorder, "bogus test") + assert_dtype_equal(dt, dt3) + + def test_equivalent_dtype_hashing(self): + # Make sure equivalent dtypes with different type num hash equal + uintp = np.dtype(np.uintp) + if uintp.itemsize == 4: + left = uintp + right = np.dtype(np.uint32) + else: + left = uintp + right = np.dtype(np.ulonglong) + assert_(left == right) + assert_(hash(left) == hash(right)) + + def test_invalid_types(self): + # Make sure invalid type strings raise an error + + assert_raises(TypeError, np.dtype, 'O3') + assert_raises(TypeError, np.dtype, 'O5') + assert_raises(TypeError, np.dtype, 'O7') + assert_raises(TypeError, np.dtype, 'b3') + assert_raises(TypeError, np.dtype, 'h4') + assert_raises(TypeError, np.dtype, 'I5') + assert_raises(TypeError, np.dtype, 'e3') + assert_raises(TypeError, np.dtype, 'f5') + + if np.dtype('g').itemsize == 8 or np.dtype('g').itemsize == 16: + assert_raises(TypeError, np.dtype, 'g12') + elif np.dtype('g').itemsize == 12: + assert_raises(TypeError, np.dtype, 'g16') + + if np.dtype('l').itemsize == 8: + assert_raises(TypeError, np.dtype, 'l4') + assert_raises(TypeError, np.dtype, 'L4') + else: + assert_raises(TypeError, np.dtype, 'l8') + assert_raises(TypeError, np.dtype, 'L8') + + if np.dtype('q').itemsize == 8: + assert_raises(TypeError, np.dtype, 'q4') + assert_raises(TypeError, np.dtype, 'Q4') + else: + assert_raises(TypeError, np.dtype, 'q8') + assert_raises(TypeError, np.dtype, 'Q8') + + def test_bad_param(self): + # Can't give a size that's too small + assert_raises(ValueError, np.dtype, + {'names':['f0', 'f1'], + 'formats':['i4', 'i1'], + 'offsets':[0, 4], + 'itemsize':4}) + # If alignment is enabled, the alignment (4) must divide the itemsize + assert_raises(ValueError, np.dtype, + {'names':['f0', 'f1'], + 'formats':['i4', 'i1'], + 'offsets':[0, 4], + 'itemsize':9}, align=True) + # If alignment is enabled, the individual fields must be aligned + assert_raises(ValueError, np.dtype, + {'names':['f0', 'f1'], + 'formats':['i1', 'f4'], + 'offsets':[0, 2]}, align=True) + + def test_field_order_equality(self): + x = np.dtype({'names': ['A', 'B'], + 'formats': ['i4', 'f4'], + 'offsets': [0, 4]}) + y = np.dtype({'names': ['B', 'A'], + 'formats': ['f4', 'i4'], + 'offsets': [4, 0]}) + assert_equal(x == y, False) + +class TestRecord(object): + def test_equivalent_record(self): + """Test whether equivalent record dtypes hash the same.""" + a = np.dtype([('yo', int)]) + b = np.dtype([('yo', int)]) + assert_dtype_equal(a, b) + + def test_different_names(self): + # In theory, they may hash the same (collision) ? + a = np.dtype([('yo', int)]) + b = np.dtype([('ye', int)]) + assert_dtype_not_equal(a, b) + + def test_different_titles(self): + # In theory, they may hash the same (collision) ? + a = np.dtype({'names': ['r', 'b'], + 'formats': ['u1', 'u1'], + 'titles': ['Red pixel', 'Blue pixel']}) + b = np.dtype({'names': ['r', 'b'], + 'formats': ['u1', 'u1'], + 'titles': ['RRed pixel', 'Blue pixel']}) + assert_dtype_not_equal(a, b) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_refcount_dictionary_setting(self): + names = ["name1"] + formats = ["f8"] + titles = ["t1"] + offsets = [0] + d = dict(names=names, formats=formats, titles=titles, offsets=offsets) + refcounts = {k: sys.getrefcount(i) for k, i in d.items()} + np.dtype(d) + refcounts_new = {k: sys.getrefcount(i) for k, i in d.items()} + assert refcounts == refcounts_new + + def test_mutate(self): + # Mutating a dtype should reset the cached hash value + a = np.dtype([('yo', int)]) + b = np.dtype([('yo', int)]) + c = np.dtype([('ye', int)]) + assert_dtype_equal(a, b) + assert_dtype_not_equal(a, c) + a.names = ['ye'] + assert_dtype_equal(a, c) + assert_dtype_not_equal(a, b) + state = b.__reduce__()[2] + a.__setstate__(state) + assert_dtype_equal(a, b) + assert_dtype_not_equal(a, c) + + def test_not_lists(self): + """Test if an appropriate exception is raised when passing bad values to + the dtype constructor. + """ + assert_raises(TypeError, np.dtype, + dict(names={'A', 'B'}, formats=['f8', 'i4'])) + assert_raises(TypeError, np.dtype, + dict(names=['A', 'B'], formats={'f8', 'i4'})) + + def test_aligned_size(self): + # Check that structured dtypes get padded to an aligned size + dt = np.dtype('i4, i1', align=True) + assert_equal(dt.itemsize, 8) + dt = np.dtype([('f0', 'i4'), ('f1', 'i1')], align=True) + assert_equal(dt.itemsize, 8) + dt = np.dtype({'names':['f0', 'f1'], + 'formats':['i4', 'u1'], + 'offsets':[0, 4]}, align=True) + assert_equal(dt.itemsize, 8) + dt = np.dtype({'f0': ('i4', 0), 'f1':('u1', 4)}, align=True) + assert_equal(dt.itemsize, 8) + # Nesting should preserve that alignment + dt1 = np.dtype([('f0', 'i4'), + ('f1', [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')]), + ('f2', 'i1')], align=True) + assert_equal(dt1.itemsize, 20) + dt2 = np.dtype({'names':['f0', 'f1', 'f2'], + 'formats':['i4', + [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')], + 'i1'], + 'offsets':[0, 4, 16]}, align=True) + assert_equal(dt2.itemsize, 20) + dt3 = np.dtype({'f0': ('i4', 0), + 'f1': ([('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')], 4), + 'f2': ('i1', 16)}, align=True) + assert_equal(dt3.itemsize, 20) + assert_equal(dt1, dt2) + assert_equal(dt2, dt3) + # Nesting should preserve packing + dt1 = np.dtype([('f0', 'i4'), + ('f1', [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')]), + ('f2', 'i1')], align=False) + assert_equal(dt1.itemsize, 11) + dt2 = np.dtype({'names':['f0', 'f1', 'f2'], + 'formats':['i4', + [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')], + 'i1'], + 'offsets':[0, 4, 10]}, align=False) + assert_equal(dt2.itemsize, 11) + dt3 = np.dtype({'f0': ('i4', 0), + 'f1': ([('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')], 4), + 'f2': ('i1', 10)}, align=False) + assert_equal(dt3.itemsize, 11) + assert_equal(dt1, dt2) + assert_equal(dt2, dt3) + # Array of subtype should preserve alignment + dt1 = np.dtype([('a', '|i1'), + ('b', [('f0', '<i2'), + ('f1', '<f4')], 2)], align=True) + assert_equal(dt1.descr, [('a', '|i1'), ('', '|V3'), + ('b', [('f0', '<i2'), ('', '|V2'), + ('f1', '<f4')], (2,))]) + + def test_union_struct(self): + # Should be able to create union dtypes + dt = np.dtype({'names':['f0', 'f1', 'f2'], 'formats':['<u4', '<u2', '<u2'], + 'offsets':[0, 0, 2]}, align=True) + assert_equal(dt.itemsize, 4) + a = np.array([3], dtype='<u4').view(dt) + a['f1'] = 10 + a['f2'] = 36 + assert_equal(a['f0'], 10 + 36*256*256) + # Should be able to specify fields out of order + dt = np.dtype({'names':['f0', 'f1', 'f2'], 'formats':['<u4', '<u2', '<u2'], + 'offsets':[4, 0, 2]}, align=True) + assert_equal(dt.itemsize, 8) + # field name should not matter: assignment is by position + dt2 = np.dtype({'names':['f2', 'f0', 'f1'], + 'formats':['<u4', '<u2', '<u2'], + 'offsets':[4, 0, 2]}, align=True) + vals = [(0, 1, 2), (3, -1, 4)] + vals2 = [(0, 1, 2), (3, -1, 4)] + a = np.array(vals, dt) + b = np.array(vals2, dt2) + assert_equal(a.astype(dt2), b) + assert_equal(b.astype(dt), a) + assert_equal(a.view(dt2), b) + assert_equal(b.view(dt), a) + # Should not be able to overlap objects with other types + assert_raises(TypeError, np.dtype, + {'names':['f0', 'f1'], + 'formats':['O', 'i1'], + 'offsets':[0, 2]}) + assert_raises(TypeError, np.dtype, + {'names':['f0', 'f1'], + 'formats':['i4', 'O'], + 'offsets':[0, 3]}) + assert_raises(TypeError, np.dtype, + {'names':['f0', 'f1'], + 'formats':[[('a', 'O')], 'i1'], + 'offsets':[0, 2]}) + assert_raises(TypeError, np.dtype, + {'names':['f0', 'f1'], + 'formats':['i4', [('a', 'O')]], + 'offsets':[0, 3]}) + # Out of order should still be ok, however + dt = np.dtype({'names':['f0', 'f1'], + 'formats':['i1', 'O'], + 'offsets':[np.dtype('intp').itemsize, 0]}) + + def test_comma_datetime(self): + dt = np.dtype('M8[D],datetime64[Y],i8') + assert_equal(dt, np.dtype([('f0', 'M8[D]'), + ('f1', 'datetime64[Y]'), + ('f2', 'i8')])) + + def test_from_dictproxy(self): + # Tests for PR #5920 + dt = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'f4']}) + assert_dtype_equal(dt, np.dtype(dt.fields)) + dt2 = np.dtype((np.void, dt.fields)) + assert_equal(dt2.fields, dt.fields) + + def test_from_dict_with_zero_width_field(self): + # Regression test for #6430 / #2196 + dt = np.dtype([('val1', np.float32, (0,)), ('val2', int)]) + dt2 = np.dtype({'names': ['val1', 'val2'], + 'formats': [(np.float32, (0,)), int]}) + + assert_dtype_equal(dt, dt2) + assert_equal(dt.fields['val1'][0].itemsize, 0) + assert_equal(dt.itemsize, dt.fields['val2'][0].itemsize) + + def test_bool_commastring(self): + d = np.dtype('?,?,?') # raises? + assert_equal(len(d.names), 3) + for n in d.names: + assert_equal(d.fields[n][0], np.dtype('?')) + + def test_nonint_offsets(self): + # gh-8059 + def make_dtype(off): + return np.dtype({'names': ['A'], 'formats': ['i4'], + 'offsets': [off]}) + + assert_raises(TypeError, make_dtype, 'ASD') + assert_raises(OverflowError, make_dtype, 2**70) + assert_raises(TypeError, make_dtype, 2.3) + assert_raises(ValueError, make_dtype, -10) + + # no errors here: + dt = make_dtype(np.uint32(0)) + np.zeros(1, dtype=dt)[0].item() + + def test_fields_by_index(self): + dt = np.dtype([('a', np.int8), ('b', np.float32, 3)]) + assert_dtype_equal(dt[0], np.dtype(np.int8)) + assert_dtype_equal(dt[1], np.dtype((np.float32, 3))) + assert_dtype_equal(dt[-1], dt[1]) + assert_dtype_equal(dt[-2], dt[0]) + assert_raises(IndexError, lambda: dt[-3]) + + assert_raises(TypeError, operator.getitem, dt, 3.0) + assert_raises(TypeError, operator.getitem, dt, []) + + assert_equal(dt[1], dt[np.int8(1)]) + + def test_partial_dict(self): + # 'names' is missing + assert_raises(ValueError, np.dtype, + {'formats': ['i4', 'i4'], 'f0': ('i4', 0), 'f1':('i4', 4)}) + + +class TestSubarray(object): + def test_single_subarray(self): + a = np.dtype((int, (2))) + b = np.dtype((int, (2,))) + assert_dtype_equal(a, b) + + assert_equal(type(a.subdtype[1]), tuple) + assert_equal(type(b.subdtype[1]), tuple) + + def test_equivalent_record(self): + """Test whether equivalent subarray dtypes hash the same.""" + a = np.dtype((int, (2, 3))) + b = np.dtype((int, (2, 3))) + assert_dtype_equal(a, b) + + def test_nonequivalent_record(self): + """Test whether different subarray dtypes hash differently.""" + a = np.dtype((int, (2, 3))) + b = np.dtype((int, (3, 2))) + assert_dtype_not_equal(a, b) + + a = np.dtype((int, (2, 3))) + b = np.dtype((int, (2, 2))) + assert_dtype_not_equal(a, b) + + a = np.dtype((int, (1, 2, 3))) + b = np.dtype((int, (1, 2))) + assert_dtype_not_equal(a, b) + + def test_shape_equal(self): + """Test some data types that are equal""" + assert_dtype_equal(np.dtype('f8'), np.dtype(('f8', tuple()))) + assert_dtype_equal(np.dtype('f8'), np.dtype(('f8', 1))) + assert_dtype_equal(np.dtype((int, 2)), np.dtype((int, (2,)))) + assert_dtype_equal(np.dtype(('<f4', (3, 2))), np.dtype(('<f4', (3, 2)))) + d = ([('a', 'f4', (1, 2)), ('b', 'f8', (3, 1))], (3, 2)) + assert_dtype_equal(np.dtype(d), np.dtype(d)) + + def test_shape_simple(self): + """Test some simple cases that shouldn't be equal""" + assert_dtype_not_equal(np.dtype('f8'), np.dtype(('f8', (1,)))) + assert_dtype_not_equal(np.dtype(('f8', (1,))), np.dtype(('f8', (1, 1)))) + assert_dtype_not_equal(np.dtype(('f4', (3, 2))), np.dtype(('f4', (2, 3)))) + + def test_shape_monster(self): + """Test some more complicated cases that shouldn't be equal""" + assert_dtype_not_equal( + np.dtype(([('a', 'f4', (2, 1)), ('b', 'f8', (1, 3))], (2, 2))), + np.dtype(([('a', 'f4', (1, 2)), ('b', 'f8', (1, 3))], (2, 2)))) + assert_dtype_not_equal( + np.dtype(([('a', 'f4', (2, 1)), ('b', 'f8', (1, 3))], (2, 2))), + np.dtype(([('a', 'f4', (2, 1)), ('b', 'i8', (1, 3))], (2, 2)))) + assert_dtype_not_equal( + np.dtype(([('a', 'f4', (2, 1)), ('b', 'f8', (1, 3))], (2, 2))), + np.dtype(([('e', 'f8', (1, 3)), ('d', 'f4', (2, 1))], (2, 2)))) + assert_dtype_not_equal( + np.dtype(([('a', [('a', 'i4', 6)], (2, 1)), ('b', 'f8', (1, 3))], (2, 2))), + np.dtype(([('a', [('a', 'u4', 6)], (2, 1)), ('b', 'f8', (1, 3))], (2, 2)))) + + def test_shape_sequence(self): + # Any sequence of integers should work as shape, but the result + # should be a tuple (immutable) of base type integers. + a = np.array([1, 2, 3], dtype=np.int16) + l = [1, 2, 3] + # Array gets converted + dt = np.dtype([('a', 'f4', a)]) + assert_(isinstance(dt['a'].shape, tuple)) + assert_(isinstance(dt['a'].shape[0], int)) + # List gets converted + dt = np.dtype([('a', 'f4', l)]) + assert_(isinstance(dt['a'].shape, tuple)) + # + + class IntLike(object): + def __index__(self): + return 3 + + def __int__(self): + # (a PyNumber_Check fails without __int__) + return 3 + + dt = np.dtype([('a', 'f4', IntLike())]) + assert_(isinstance(dt['a'].shape, tuple)) + assert_(isinstance(dt['a'].shape[0], int)) + dt = np.dtype([('a', 'f4', (IntLike(),))]) + assert_(isinstance(dt['a'].shape, tuple)) + assert_(isinstance(dt['a'].shape[0], int)) + + def test_shape_matches_ndim(self): + dt = np.dtype([('a', 'f4', ())]) + assert_equal(dt['a'].shape, ()) + assert_equal(dt['a'].ndim, 0) + + dt = np.dtype([('a', 'f4')]) + assert_equal(dt['a'].shape, ()) + assert_equal(dt['a'].ndim, 0) + + dt = np.dtype([('a', 'f4', 4)]) + assert_equal(dt['a'].shape, (4,)) + assert_equal(dt['a'].ndim, 1) + + dt = np.dtype([('a', 'f4', (1, 2, 3))]) + assert_equal(dt['a'].shape, (1, 2, 3)) + assert_equal(dt['a'].ndim, 3) + + def test_shape_invalid(self): + # Check that the shape is valid. + max_int = np.iinfo(np.intc).max + max_intp = np.iinfo(np.intp).max + # Too large values (the datatype is part of this) + assert_raises(ValueError, np.dtype, [('a', 'f4', max_int // 4 + 1)]) + assert_raises(ValueError, np.dtype, [('a', 'f4', max_int + 1)]) + assert_raises(ValueError, np.dtype, [('a', 'f4', (max_int, 2))]) + # Takes a different code path (fails earlier: + assert_raises(ValueError, np.dtype, [('a', 'f4', max_intp + 1)]) + # Negative values + assert_raises(ValueError, np.dtype, [('a', 'f4', -1)]) + assert_raises(ValueError, np.dtype, [('a', 'f4', (-1, -1))]) + + def test_alignment(self): + #Check that subarrays are aligned + t1 = np.dtype('1i4', align=True) + t2 = np.dtype('2i4', align=True) + assert_equal(t1.alignment, t2.alignment) + + +def iter_struct_object_dtypes(): + """ + Iterates over a few complex dtypes and object pattern which + fill the array with a given object (defaults to a singleton). + + Yields + ------ + dtype : dtype + pattern : tuple + Structured tuple for use with `np.array`. + count : int + Number of objects stored in the dtype. + singleton : object + A singleton object. The returned pattern is constructed so that + all objects inside the datatype are set to the singleton. + """ + obj = object() + + dt = np.dtype([('b', 'O', (2, 3))]) + p = ([[obj] * 3] * 2,) + yield pytest.param(dt, p, 6, obj, id="<subarray>") + + dt = np.dtype([('a', 'i4'), ('b', 'O', (2, 3))]) + p = (0, [[obj] * 3] * 2) + yield pytest.param(dt, p, 6, obj, id="<subarray in field>") + + dt = np.dtype([('a', 'i4'), + ('b', [('ba', 'O'), ('bb', 'i1')], (2, 3))]) + p = (0, [[(obj, 0)] * 3] * 2) + yield pytest.param(dt, p, 6, obj, id="<structured subarray 1>") + + dt = np.dtype([('a', 'i4'), + ('b', [('ba', 'O'), ('bb', 'O')], (2, 3))]) + p = (0, [[(obj, obj)] * 3] * 2) + yield pytest.param(dt, p, 12, obj, id="<structured subarray 2>") + + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +class TestStructuredObjectRefcounting: + """These tests cover various uses of complicated structured types which + include objects and thus require reference counting. + """ + @pytest.mark.parametrize(['dt', 'pat', 'count', 'singleton'], + iter_struct_object_dtypes()) + @pytest.mark.parametrize(["creation_func", "creation_obj"], [ + pytest.param(np.empty, None, + # None is probably used for too many things + marks=pytest.mark.skip("unreliable due to python's behaviour")), + (np.ones, 1), + (np.zeros, 0)]) + def test_structured_object_create_delete(self, dt, pat, count, singleton, + creation_func, creation_obj): + """Structured object reference counting in creation and deletion""" + # The test assumes that 0, 1, and None are singletons. + gc.collect() + before = sys.getrefcount(creation_obj) + arr = creation_func(3, dt) + + now = sys.getrefcount(creation_obj) + assert now - before == count * 3 + del arr + now = sys.getrefcount(creation_obj) + assert now == before + + @pytest.mark.parametrize(['dt', 'pat', 'count', 'singleton'], + iter_struct_object_dtypes()) + def test_structured_object_item_setting(self, dt, pat, count, singleton): + """Structured object reference counting for simple item setting""" + one = 1 + + gc.collect() + before = sys.getrefcount(singleton) + arr = np.array([pat] * 3, dt) + assert sys.getrefcount(singleton) - before == count * 3 + # Fill with `1` and check that it was replaced correctly: + before2 = sys.getrefcount(one) + arr[...] = one + after2 = sys.getrefcount(one) + assert after2 - before2 == count * 3 + del arr + gc.collect() + assert sys.getrefcount(one) == before2 + assert sys.getrefcount(singleton) == before + + @pytest.mark.parametrize(['dt', 'pat', 'count', 'singleton'], + iter_struct_object_dtypes()) + @pytest.mark.parametrize( + ['shape', 'index', 'items_changed'], + [((3,), ([0, 2],), 2), + ((3, 2), ([0, 2], slice(None)), 4), + ((3, 2), ([0, 2], [1]), 2), + ((3,), ([True, False, True]), 2)]) + def test_structured_object_indexing(self, shape, index, items_changed, + dt, pat, count, singleton): + """Structured object reference counting for advanced indexing.""" + zero = 0 + one = 1 + + arr = np.zeros(shape, dt) + + gc.collect() + before_zero = sys.getrefcount(zero) + before_one = sys.getrefcount(one) + # Test item getting: + part = arr[index] + after_zero = sys.getrefcount(zero) + assert after_zero - before_zero == count * items_changed + del part + # Test item setting: + arr[index] = one + gc.collect() + after_zero = sys.getrefcount(zero) + after_one = sys.getrefcount(one) + assert before_zero - after_zero == count * items_changed + assert after_one - before_one == count * items_changed + + @pytest.mark.parametrize(['dt', 'pat', 'count', 'singleton'], + iter_struct_object_dtypes()) + def test_structured_object_take_and_repeat(self, dt, pat, count, singleton): + """Structured object reference counting for specialized functions. + The older functions such as take and repeat use different code paths + then item setting (when writing this). + """ + indices = [0, 1] + + arr = np.array([pat] * 3, dt) + gc.collect() + before = sys.getrefcount(singleton) + res = arr.take(indices) + after = sys.getrefcount(singleton) + assert after - before == count * 2 + new = res.repeat(10) + gc.collect() + after_repeat = sys.getrefcount(singleton) + assert after_repeat - after == count * 2 * 10 + + +class TestStructuredDtypeSparseFields(object): + """Tests subarray fields which contain sparse dtypes so that + not all memory is used by the dtype work. Such dtype's should + leave the underlying memory unchanged. + """ + dtype = np.dtype([('a', {'names':['aa', 'ab'], 'formats':['f', 'f'], + 'offsets':[0, 4]}, (2, 3))]) + sparse_dtype = np.dtype([('a', {'names':['ab'], 'formats':['f'], + 'offsets':[4]}, (2, 3))]) + + @pytest.mark.xfail(reason="inaccessible data is changed see gh-12686.") + @pytest.mark.valgrind_error(reason="reads from unitialized buffers.") + def test_sparse_field_assignment(self): + arr = np.zeros(3, self.dtype) + sparse_arr = arr.view(self.sparse_dtype) + + sparse_arr[...] = np.finfo(np.float32).max + # dtype is reduced when accessing the field, so shape is (3, 2, 3): + assert_array_equal(arr["a"]["aa"], np.zeros((3, 2, 3))) + + def test_sparse_field_assignment_fancy(self): + # Fancy assignment goes to the copyswap function for comlex types: + arr = np.zeros(3, self.dtype) + sparse_arr = arr.view(self.sparse_dtype) + + sparse_arr[[0, 1, 2]] = np.finfo(np.float32).max + # dtype is reduced when accessing the field, so shape is (3, 2, 3): + assert_array_equal(arr["a"]["aa"], np.zeros((3, 2, 3))) + + +class TestMonsterType(object): + """Test deeply nested subtypes.""" + + def test1(self): + simple1 = np.dtype({'names': ['r', 'b'], 'formats': ['u1', 'u1'], + 'titles': ['Red pixel', 'Blue pixel']}) + a = np.dtype([('yo', int), ('ye', simple1), + ('yi', np.dtype((int, (3, 2))))]) + b = np.dtype([('yo', int), ('ye', simple1), + ('yi', np.dtype((int, (3, 2))))]) + assert_dtype_equal(a, b) + + c = np.dtype([('yo', int), ('ye', simple1), + ('yi', np.dtype((a, (3, 2))))]) + d = np.dtype([('yo', int), ('ye', simple1), + ('yi', np.dtype((a, (3, 2))))]) + assert_dtype_equal(c, d) + +class TestMetadata(object): + def test_no_metadata(self): + d = np.dtype(int) + assert_(d.metadata is None) + + def test_metadata_takes_dict(self): + d = np.dtype(int, metadata={'datum': 1}) + assert_(d.metadata == {'datum': 1}) + + def test_metadata_rejects_nondict(self): + assert_raises(TypeError, np.dtype, int, metadata='datum') + assert_raises(TypeError, np.dtype, int, metadata=1) + assert_raises(TypeError, np.dtype, int, metadata=None) + + def test_nested_metadata(self): + d = np.dtype([('a', np.dtype(int, metadata={'datum': 1}))]) + assert_(d['a'].metadata == {'datum': 1}) + + def test_base_metadata_copied(self): + d = np.dtype((np.void, np.dtype('i4,i4', metadata={'datum': 1}))) + assert_(d.metadata == {'datum': 1}) + +class TestString(object): + def test_complex_dtype_str(self): + dt = np.dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)), + ('rtile', '>f4', (64, 36))], (3,)), + ('bottom', [('bleft', ('>f4', (8, 64)), (1,)), + ('bright', '>f4', (8, 36))])]) + assert_equal(str(dt), + "[('top', [('tiles', ('>f4', (64, 64)), (1,)), " + "('rtile', '>f4', (64, 36))], (3,)), " + "('bottom', [('bleft', ('>f4', (8, 64)), (1,)), " + "('bright', '>f4', (8, 36))])]") + + # If the sticky aligned flag is set to True, it makes the + # str() function use a dict representation with an 'aligned' flag + dt = np.dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)), + ('rtile', '>f4', (64, 36))], + (3,)), + ('bottom', [('bleft', ('>f4', (8, 64)), (1,)), + ('bright', '>f4', (8, 36))])], + align=True) + assert_equal(str(dt), + "{'names':['top','bottom'], " + "'formats':[([('tiles', ('>f4', (64, 64)), (1,)), " + "('rtile', '>f4', (64, 36))], (3,))," + "[('bleft', ('>f4', (8, 64)), (1,)), " + "('bright', '>f4', (8, 36))]], " + "'offsets':[0,76800], " + "'itemsize':80000, " + "'aligned':True}") + assert_equal(np.dtype(eval(str(dt))), dt) + + dt = np.dtype({'names': ['r', 'g', 'b'], 'formats': ['u1', 'u1', 'u1'], + 'offsets': [0, 1, 2], + 'titles': ['Red pixel', 'Green pixel', 'Blue pixel']}) + assert_equal(str(dt), + "[(('Red pixel', 'r'), 'u1'), " + "(('Green pixel', 'g'), 'u1'), " + "(('Blue pixel', 'b'), 'u1')]") + + dt = np.dtype({'names': ['rgba', 'r', 'g', 'b'], + 'formats': ['<u4', 'u1', 'u1', 'u1'], + 'offsets': [0, 0, 1, 2], + 'titles': ['Color', 'Red pixel', + 'Green pixel', 'Blue pixel']}) + assert_equal(str(dt), + "{'names':['rgba','r','g','b']," + " 'formats':['<u4','u1','u1','u1']," + " 'offsets':[0,0,1,2]," + " 'titles':['Color','Red pixel'," + "'Green pixel','Blue pixel']," + " 'itemsize':4}") + + dt = np.dtype({'names': ['r', 'b'], 'formats': ['u1', 'u1'], + 'offsets': [0, 2], + 'titles': ['Red pixel', 'Blue pixel']}) + assert_equal(str(dt), + "{'names':['r','b']," + " 'formats':['u1','u1']," + " 'offsets':[0,2]," + " 'titles':['Red pixel','Blue pixel']," + " 'itemsize':3}") + + dt = np.dtype([('a', '<m8[D]'), ('b', '<M8[us]')]) + assert_equal(str(dt), + "[('a', '<m8[D]'), ('b', '<M8[us]')]") + + def test_repr_structured(self): + dt = np.dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)), + ('rtile', '>f4', (64, 36))], (3,)), + ('bottom', [('bleft', ('>f4', (8, 64)), (1,)), + ('bright', '>f4', (8, 36))])]) + assert_equal(repr(dt), + "dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)), " + "('rtile', '>f4', (64, 36))], (3,)), " + "('bottom', [('bleft', ('>f4', (8, 64)), (1,)), " + "('bright', '>f4', (8, 36))])])") + + dt = np.dtype({'names': ['r', 'g', 'b'], 'formats': ['u1', 'u1', 'u1'], + 'offsets': [0, 1, 2], + 'titles': ['Red pixel', 'Green pixel', 'Blue pixel']}, + align=True) + assert_equal(repr(dt), + "dtype([(('Red pixel', 'r'), 'u1'), " + "(('Green pixel', 'g'), 'u1'), " + "(('Blue pixel', 'b'), 'u1')], align=True)") + + def test_repr_structured_not_packed(self): + dt = np.dtype({'names': ['rgba', 'r', 'g', 'b'], + 'formats': ['<u4', 'u1', 'u1', 'u1'], + 'offsets': [0, 0, 1, 2], + 'titles': ['Color', 'Red pixel', + 'Green pixel', 'Blue pixel']}, align=True) + assert_equal(repr(dt), + "dtype({'names':['rgba','r','g','b']," + " 'formats':['<u4','u1','u1','u1']," + " 'offsets':[0,0,1,2]," + " 'titles':['Color','Red pixel'," + "'Green pixel','Blue pixel']," + " 'itemsize':4}, align=True)") + + dt = np.dtype({'names': ['r', 'b'], 'formats': ['u1', 'u1'], + 'offsets': [0, 2], + 'titles': ['Red pixel', 'Blue pixel'], + 'itemsize': 4}) + assert_equal(repr(dt), + "dtype({'names':['r','b'], " + "'formats':['u1','u1'], " + "'offsets':[0,2], " + "'titles':['Red pixel','Blue pixel'], " + "'itemsize':4})") + + def test_repr_structured_datetime(self): + dt = np.dtype([('a', '<M8[D]'), ('b', '<m8[us]')]) + assert_equal(repr(dt), + "dtype([('a', '<M8[D]'), ('b', '<m8[us]')])") + + def test_repr_str_subarray(self): + dt = np.dtype(('<i2', (1,))) + assert_equal(repr(dt), "dtype(('<i2', (1,)))") + assert_equal(str(dt), "('<i2', (1,))") + + @pytest.mark.skipif(sys.version_info[0] >= 3, reason="Python 2 only") + def test_dtype_str_with_long_in_shape(self): + # Pull request #376, should not error + np.dtype('(1L,)i4') + + def test_base_dtype_with_object_type(self): + # Issue gh-2798, should not error. + np.array(['a'], dtype="O").astype(("O", [("name", "O")])) + + def test_empty_string_to_object(self): + # Pull request #4722 + np.array(["", ""]).astype(object) + + def test_void_subclass_unsized(self): + dt = np.dtype(np.record) + assert_equal(repr(dt), "dtype('V')") + assert_equal(str(dt), '|V0') + assert_equal(dt.name, 'record') + + def test_void_subclass_sized(self): + dt = np.dtype((np.record, 2)) + assert_equal(repr(dt), "dtype('V2')") + assert_equal(str(dt), '|V2') + assert_equal(dt.name, 'record16') + + def test_void_subclass_fields(self): + dt = np.dtype((np.record, [('a', '<u2')])) + assert_equal(repr(dt), "dtype((numpy.record, [('a', '<u2')]))") + assert_equal(str(dt), "(numpy.record, [('a', '<u2')])") + assert_equal(dt.name, 'record16') + + +class TestDtypeAttributeDeletion(object): + + def test_dtype_non_writable_attributes_deletion(self): + dt = np.dtype(np.double) + attr = ["subdtype", "descr", "str", "name", "base", "shape", + "isbuiltin", "isnative", "isalignedstruct", "fields", + "metadata", "hasobject"] + + for s in attr: + assert_raises(AttributeError, delattr, dt, s) + + def test_dtype_writable_attributes_deletion(self): + dt = np.dtype(np.double) + attr = ["names"] + for s in attr: + assert_raises(AttributeError, delattr, dt, s) + + +class TestDtypeAttributes(object): + def test_descr_has_trailing_void(self): + # see gh-6359 + dtype = np.dtype({ + 'names': ['A', 'B'], + 'formats': ['f4', 'f4'], + 'offsets': [0, 8], + 'itemsize': 16}) + new_dtype = np.dtype(dtype.descr) + assert_equal(new_dtype.itemsize, 16) + + @pytest.mark.parametrize('t', np.typeDict.values()) + def test_name_builtin(self, t): + name = t.__name__ + if name.endswith('_'): + name = name[:-1] + assert_equal(np.dtype(t).name, name) + + def test_name_dtype_subclass(self): + # Ticket #4357 + class user_def_subcls(np.void): + pass + assert_equal(np.dtype(user_def_subcls).name, 'user_def_subcls') + + +class TestPickling(object): + + def check_pickling(self, dtype): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + pickled = pickle.loads(pickle.dumps(dtype, proto)) + assert_equal(pickled, dtype) + assert_equal(pickled.descr, dtype.descr) + if dtype.metadata is not None: + assert_equal(pickled.metadata, dtype.metadata) + # Check the reconstructed dtype is functional + x = np.zeros(3, dtype=dtype) + y = np.zeros(3, dtype=pickled) + assert_equal(x, y) + assert_equal(x[0], y[0]) + + @pytest.mark.parametrize('t', [int, float, complex, np.int32, str, object, + np.unicode, bool]) + def test_builtin(self, t): + self.check_pickling(np.dtype(t)) + + def test_structured(self): + dt = np.dtype(([('a', '>f4', (2, 1)), ('b', '<f8', (1, 3))], (2, 2))) + self.check_pickling(dt) + + def test_structured_aligned(self): + dt = np.dtype('i4, i1', align=True) + self.check_pickling(dt) + + def test_structured_unaligned(self): + dt = np.dtype('i4, i1', align=False) + self.check_pickling(dt) + + def test_structured_padded(self): + dt = np.dtype({ + 'names': ['A', 'B'], + 'formats': ['f4', 'f4'], + 'offsets': [0, 8], + 'itemsize': 16}) + self.check_pickling(dt) + + def test_structured_titles(self): + dt = np.dtype({'names': ['r', 'b'], + 'formats': ['u1', 'u1'], + 'titles': ['Red pixel', 'Blue pixel']}) + self.check_pickling(dt) + + @pytest.mark.parametrize('base', ['m8', 'M8']) + @pytest.mark.parametrize('unit', ['', 'Y', 'M', 'W', 'D', 'h', 'm', 's', + 'ms', 'us', 'ns', 'ps', 'fs', 'as']) + def test_datetime(self, base, unit): + dt = np.dtype('%s[%s]' % (base, unit) if unit else base) + self.check_pickling(dt) + if unit: + dt = np.dtype('%s[7%s]' % (base, unit)) + self.check_pickling(dt) + + def test_metadata(self): + dt = np.dtype(int, metadata={'datum': 1}) + self.check_pickling(dt) + + +def test_rational_dtype(): + # test for bug gh-5719 + a = np.array([1111], dtype=rational).astype + assert_raises(OverflowError, a, 'int8') + + # test that dtype detection finds user-defined types + x = rational(1) + assert_equal(np.array([x,x]).dtype, np.dtype(rational)) + + +def test_dtypes_are_true(): + # test for gh-6294 + assert bool(np.dtype('f8')) + assert bool(np.dtype('i8')) + assert bool(np.dtype([('a', 'i8'), ('b', 'f4')])) + + +def test_invalid_dtype_string(): + # test for gh-10440 + assert_raises(TypeError, np.dtype, 'f8,i8,[f8,i8]') + assert_raises(TypeError, np.dtype, u'Fl\xfcgel') + + +class TestFromCTypes(object): + + @staticmethod + def check(ctype, dtype): + dtype = np.dtype(dtype) + assert_equal(np.dtype(ctype), dtype) + assert_equal(np.dtype(ctype()), dtype) + + def test_array(self): + c8 = ctypes.c_uint8 + self.check( 3 * c8, (np.uint8, (3,))) + self.check( 1 * c8, (np.uint8, (1,))) + self.check( 0 * c8, (np.uint8, (0,))) + self.check(1 * (3 * c8), ((np.uint8, (3,)), (1,))) + self.check(3 * (1 * c8), ((np.uint8, (1,)), (3,))) + + def test_padded_structure(self): + class PaddedStruct(ctypes.Structure): + _fields_ = [ + ('a', ctypes.c_uint8), + ('b', ctypes.c_uint16) + ] + expected = np.dtype([ + ('a', np.uint8), + ('b', np.uint16) + ], align=True) + self.check(PaddedStruct, expected) + + def test_bit_fields(self): + class BitfieldStruct(ctypes.Structure): + _fields_ = [ + ('a', ctypes.c_uint8, 7), + ('b', ctypes.c_uint8, 1) + ] + assert_raises(TypeError, np.dtype, BitfieldStruct) + assert_raises(TypeError, np.dtype, BitfieldStruct()) + + def test_pointer(self): + p_uint8 = ctypes.POINTER(ctypes.c_uint8) + assert_raises(TypeError, np.dtype, p_uint8) + + def test_void_pointer(self): + self.check(ctypes.c_void_p, np.uintp) + + def test_union(self): + class Union(ctypes.Union): + _fields_ = [ + ('a', ctypes.c_uint8), + ('b', ctypes.c_uint16), + ] + expected = np.dtype(dict( + names=['a', 'b'], + formats=[np.uint8, np.uint16], + offsets=[0, 0], + itemsize=2 + )) + self.check(Union, expected) + + def test_union_with_struct_packed(self): + class Struct(ctypes.Structure): + _pack_ = 1 + _fields_ = [ + ('one', ctypes.c_uint8), + ('two', ctypes.c_uint32) + ] + + class Union(ctypes.Union): + _fields_ = [ + ('a', ctypes.c_uint8), + ('b', ctypes.c_uint16), + ('c', ctypes.c_uint32), + ('d', Struct), + ] + expected = np.dtype(dict( + names=['a', 'b', 'c', 'd'], + formats=['u1', np.uint16, np.uint32, [('one', 'u1'), ('two', np.uint32)]], + offsets=[0, 0, 0, 0], + itemsize=ctypes.sizeof(Union) + )) + self.check(Union, expected) + + def test_union_packed(self): + class Struct(ctypes.Structure): + _fields_ = [ + ('one', ctypes.c_uint8), + ('two', ctypes.c_uint32) + ] + _pack_ = 1 + class Union(ctypes.Union): + _pack_ = 1 + _fields_ = [ + ('a', ctypes.c_uint8), + ('b', ctypes.c_uint16), + ('c', ctypes.c_uint32), + ('d', Struct), + ] + expected = np.dtype(dict( + names=['a', 'b', 'c', 'd'], + formats=['u1', np.uint16, np.uint32, [('one', 'u1'), ('two', np.uint32)]], + offsets=[0, 0, 0, 0], + itemsize=ctypes.sizeof(Union) + )) + self.check(Union, expected) + + def test_packed_structure(self): + class PackedStructure(ctypes.Structure): + _pack_ = 1 + _fields_ = [ + ('a', ctypes.c_uint8), + ('b', ctypes.c_uint16) + ] + expected = np.dtype([ + ('a', np.uint8), + ('b', np.uint16) + ]) + self.check(PackedStructure, expected) + + def test_large_packed_structure(self): + class PackedStructure(ctypes.Structure): + _pack_ = 2 + _fields_ = [ + ('a', ctypes.c_uint8), + ('b', ctypes.c_uint16), + ('c', ctypes.c_uint8), + ('d', ctypes.c_uint16), + ('e', ctypes.c_uint32), + ('f', ctypes.c_uint32), + ('g', ctypes.c_uint8) + ] + expected = np.dtype(dict( + formats=[np.uint8, np.uint16, np.uint8, np.uint16, np.uint32, np.uint32, np.uint8 ], + offsets=[0, 2, 4, 6, 8, 12, 16], + names=['a', 'b', 'c', 'd', 'e', 'f', 'g'], + itemsize=18)) + self.check(PackedStructure, expected) + + def test_big_endian_structure_packed(self): + class BigEndStruct(ctypes.BigEndianStructure): + _fields_ = [ + ('one', ctypes.c_uint8), + ('two', ctypes.c_uint32) + ] + _pack_ = 1 + expected = np.dtype([('one', 'u1'), ('two', '>u4')]) + self.check(BigEndStruct, expected) + + def test_little_endian_structure_packed(self): + class LittleEndStruct(ctypes.LittleEndianStructure): + _fields_ = [ + ('one', ctypes.c_uint8), + ('two', ctypes.c_uint32) + ] + _pack_ = 1 + expected = np.dtype([('one', 'u1'), ('two', '<u4')]) + self.check(LittleEndStruct, expected) + + def test_little_endian_structure(self): + class PaddedStruct(ctypes.LittleEndianStructure): + _fields_ = [ + ('a', ctypes.c_uint8), + ('b', ctypes.c_uint16) + ] + expected = np.dtype([ + ('a', '<B'), + ('b', '<H') + ], align=True) + self.check(PaddedStruct, expected) + + def test_big_endian_structure(self): + class PaddedStruct(ctypes.BigEndianStructure): + _fields_ = [ + ('a', ctypes.c_uint8), + ('b', ctypes.c_uint16) + ] + expected = np.dtype([ + ('a', '>B'), + ('b', '>H') + ], align=True) + self.check(PaddedStruct, expected) + + def test_simple_endian_types(self): + self.check(ctypes.c_uint16.__ctype_le__, np.dtype('<u2')) + self.check(ctypes.c_uint16.__ctype_be__, np.dtype('>u2')) + self.check(ctypes.c_uint8.__ctype_le__, np.dtype('u1')) + self.check(ctypes.c_uint8.__ctype_be__, np.dtype('u1')) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_einsum.py b/contrib/python/numpy/py2/numpy/core/tests/test_einsum.py new file mode 100644 index 0000000000..1b5b4cb262 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_einsum.py @@ -0,0 +1,1014 @@ +from __future__ import division, absolute_import, print_function + +import itertools + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_almost_equal, + assert_raises, suppress_warnings, assert_raises_regex, assert_allclose + ) + +# Setup for optimize einsum +chars = 'abcdefghij' +sizes = np.array([2, 3, 4, 5, 4, 3, 2, 6, 5, 4, 3]) +global_size_dict = dict(zip(chars, sizes)) + + +class TestEinsum(object): + def test_einsum_errors(self): + for do_opt in [True, False]: + # Need enough arguments + assert_raises(ValueError, np.einsum, optimize=do_opt) + assert_raises(ValueError, np.einsum, "", optimize=do_opt) + + # subscripts must be a string + assert_raises(TypeError, np.einsum, 0, 0, optimize=do_opt) + + # out parameter must be an array + assert_raises(TypeError, np.einsum, "", 0, out='test', + optimize=do_opt) + + # order parameter must be a valid order + assert_raises(TypeError, np.einsum, "", 0, order='W', + optimize=do_opt) + + # casting parameter must be a valid casting + assert_raises(ValueError, np.einsum, "", 0, casting='blah', + optimize=do_opt) + + # dtype parameter must be a valid dtype + assert_raises(TypeError, np.einsum, "", 0, dtype='bad_data_type', + optimize=do_opt) + + # other keyword arguments are rejected + assert_raises(TypeError, np.einsum, "", 0, bad_arg=0, + optimize=do_opt) + + # issue 4528 revealed a segfault with this call + assert_raises(TypeError, np.einsum, *(None,)*63, optimize=do_opt) + + # number of operands must match count in subscripts string + assert_raises(ValueError, np.einsum, "", 0, 0, optimize=do_opt) + assert_raises(ValueError, np.einsum, ",", 0, [0], [0], + optimize=do_opt) + assert_raises(ValueError, np.einsum, ",", [0], optimize=do_opt) + + # can't have more subscripts than dimensions in the operand + assert_raises(ValueError, np.einsum, "i", 0, optimize=do_opt) + assert_raises(ValueError, np.einsum, "ij", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "...i", 0, optimize=do_opt) + assert_raises(ValueError, np.einsum, "i...j", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "i...", 0, optimize=do_opt) + assert_raises(ValueError, np.einsum, "ij...", [0, 0], optimize=do_opt) + + # invalid ellipsis + assert_raises(ValueError, np.einsum, "i..", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, ".i...", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "j->..j", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "j->.j...", [0, 0], optimize=do_opt) + + # invalid subscript character + assert_raises(ValueError, np.einsum, "i%...", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "...j$", [0, 0], optimize=do_opt) + assert_raises(ValueError, np.einsum, "i->&", [0, 0], optimize=do_opt) + + # output subscripts must appear in input + assert_raises(ValueError, np.einsum, "i->ij", [0, 0], optimize=do_opt) + + # output subscripts may only be specified once + assert_raises(ValueError, np.einsum, "ij->jij", [[0, 0], [0, 0]], + optimize=do_opt) + + # dimensions much match when being collapsed + assert_raises(ValueError, np.einsum, "ii", + np.arange(6).reshape(2, 3), optimize=do_opt) + assert_raises(ValueError, np.einsum, "ii->i", + np.arange(6).reshape(2, 3), optimize=do_opt) + + # broadcasting to new dimensions must be enabled explicitly + assert_raises(ValueError, np.einsum, "i", np.arange(6).reshape(2, 3), + optimize=do_opt) + assert_raises(ValueError, np.einsum, "i->i", [[0, 1], [0, 1]], + out=np.arange(4).reshape(2, 2), optimize=do_opt) + with assert_raises_regex(ValueError, "'b'"): + # gh-11221 - 'c' erroneously appeared in the error message + a = np.ones((3, 3, 4, 5, 6)) + b = np.ones((3, 4, 5)) + np.einsum('aabcb,abc', a, b) + + def test_einsum_views(self): + # pass-through + for do_opt in [True, False]: + a = np.arange(6) + a.shape = (2, 3) + + b = np.einsum("...", a, optimize=do_opt) + assert_(b.base is a) + + b = np.einsum(a, [Ellipsis], optimize=do_opt) + assert_(b.base is a) + + b = np.einsum("ij", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a) + + b = np.einsum(a, [0, 1], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a) + + # output is writeable whenever input is writeable + b = np.einsum("...", a, optimize=do_opt) + assert_(b.flags['WRITEABLE']) + a.flags['WRITEABLE'] = False + b = np.einsum("...", a, optimize=do_opt) + assert_(not b.flags['WRITEABLE']) + + # transpose + a = np.arange(6) + a.shape = (2, 3) + + b = np.einsum("ji", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a.T) + + b = np.einsum(a, [1, 0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a.T) + + # diagonal + a = np.arange(9) + a.shape = (3, 3) + + b = np.einsum("ii->i", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[i, i] for i in range(3)]) + + b = np.einsum(a, [0, 0], [0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[i, i] for i in range(3)]) + + # diagonal with various ways of broadcasting an additional dimension + a = np.arange(27) + a.shape = (3, 3, 3) + + b = np.einsum("...ii->...i", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] for x in a]) + + b = np.einsum(a, [Ellipsis, 0, 0], [Ellipsis, 0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] for x in a]) + + b = np.einsum("ii...->...i", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] + for x in a.transpose(2, 0, 1)]) + + b = np.einsum(a, [0, 0, Ellipsis], [Ellipsis, 0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] + for x in a.transpose(2, 0, 1)]) + + b = np.einsum("...ii->i...", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[:, i, i] for i in range(3)]) + + b = np.einsum(a, [Ellipsis, 0, 0], [0, Ellipsis], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[:, i, i] for i in range(3)]) + + b = np.einsum("jii->ij", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[:, i, i] for i in range(3)]) + + b = np.einsum(a, [1, 0, 0], [0, 1], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[:, i, i] for i in range(3)]) + + b = np.einsum("ii...->i...", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a.transpose(2, 0, 1)[:, i, i] for i in range(3)]) + + b = np.einsum(a, [0, 0, Ellipsis], [0, Ellipsis], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a.transpose(2, 0, 1)[:, i, i] for i in range(3)]) + + b = np.einsum("i...i->i...", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a.transpose(1, 0, 2)[:, i, i] for i in range(3)]) + + b = np.einsum(a, [0, Ellipsis, 0], [0, Ellipsis], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a.transpose(1, 0, 2)[:, i, i] for i in range(3)]) + + b = np.einsum("i...i->...i", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] + for x in a.transpose(1, 0, 2)]) + + b = np.einsum(a, [0, Ellipsis, 0], [Ellipsis, 0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [[x[i, i] for i in range(3)] + for x in a.transpose(1, 0, 2)]) + + # triple diagonal + a = np.arange(27) + a.shape = (3, 3, 3) + + b = np.einsum("iii->i", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[i, i, i] for i in range(3)]) + + b = np.einsum(a, [0, 0, 0], [0], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, [a[i, i, i] for i in range(3)]) + + # swap axes + a = np.arange(24) + a.shape = (2, 3, 4) + + b = np.einsum("ijk->jik", a, optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a.swapaxes(0, 1)) + + b = np.einsum(a, [0, 1, 2], [1, 0, 2], optimize=do_opt) + assert_(b.base is a) + assert_equal(b, a.swapaxes(0, 1)) + + def check_einsum_sums(self, dtype, do_opt=False): + # Check various sums. Does many sizes to exercise unrolled loops. + + # sum(a, axis=-1) + for n in range(1, 17): + a = np.arange(n, dtype=dtype) + assert_equal(np.einsum("i->", a, optimize=do_opt), + np.sum(a, axis=-1).astype(dtype)) + assert_equal(np.einsum(a, [0], [], optimize=do_opt), + np.sum(a, axis=-1).astype(dtype)) + + for n in range(1, 17): + a = np.arange(2*3*n, dtype=dtype).reshape(2, 3, n) + assert_equal(np.einsum("...i->...", a, optimize=do_opt), + np.sum(a, axis=-1).astype(dtype)) + assert_equal(np.einsum(a, [Ellipsis, 0], [Ellipsis], optimize=do_opt), + np.sum(a, axis=-1).astype(dtype)) + + # sum(a, axis=0) + for n in range(1, 17): + a = np.arange(2*n, dtype=dtype).reshape(2, n) + assert_equal(np.einsum("i...->...", a, optimize=do_opt), + np.sum(a, axis=0).astype(dtype)) + assert_equal(np.einsum(a, [0, Ellipsis], [Ellipsis], optimize=do_opt), + np.sum(a, axis=0).astype(dtype)) + + for n in range(1, 17): + a = np.arange(2*3*n, dtype=dtype).reshape(2, 3, n) + assert_equal(np.einsum("i...->...", a, optimize=do_opt), + np.sum(a, axis=0).astype(dtype)) + assert_equal(np.einsum(a, [0, Ellipsis], [Ellipsis], optimize=do_opt), + np.sum(a, axis=0).astype(dtype)) + + # trace(a) + for n in range(1, 17): + a = np.arange(n*n, dtype=dtype).reshape(n, n) + assert_equal(np.einsum("ii", a, optimize=do_opt), + np.trace(a).astype(dtype)) + assert_equal(np.einsum(a, [0, 0], optimize=do_opt), + np.trace(a).astype(dtype)) + + # multiply(a, b) + assert_equal(np.einsum("..., ...", 3, 4), 12) # scalar case + for n in range(1, 17): + a = np.arange(3 * n, dtype=dtype).reshape(3, n) + b = np.arange(2 * 3 * n, dtype=dtype).reshape(2, 3, n) + assert_equal(np.einsum("..., ...", a, b, optimize=do_opt), + np.multiply(a, b)) + assert_equal(np.einsum(a, [Ellipsis], b, [Ellipsis], optimize=do_opt), + np.multiply(a, b)) + + # inner(a,b) + for n in range(1, 17): + a = np.arange(2 * 3 * n, dtype=dtype).reshape(2, 3, n) + b = np.arange(n, dtype=dtype) + assert_equal(np.einsum("...i, ...i", a, b, optimize=do_opt), np.inner(a, b)) + assert_equal(np.einsum(a, [Ellipsis, 0], b, [Ellipsis, 0], optimize=do_opt), + np.inner(a, b)) + + for n in range(1, 11): + a = np.arange(n * 3 * 2, dtype=dtype).reshape(n, 3, 2) + b = np.arange(n, dtype=dtype) + assert_equal(np.einsum("i..., i...", a, b, optimize=do_opt), + np.inner(a.T, b.T).T) + assert_equal(np.einsum(a, [0, Ellipsis], b, [0, Ellipsis], optimize=do_opt), + np.inner(a.T, b.T).T) + + # outer(a,b) + for n in range(1, 17): + a = np.arange(3, dtype=dtype)+1 + b = np.arange(n, dtype=dtype)+1 + assert_equal(np.einsum("i,j", a, b, optimize=do_opt), + np.outer(a, b)) + assert_equal(np.einsum(a, [0], b, [1], optimize=do_opt), + np.outer(a, b)) + + # Suppress the complex warnings for the 'as f8' tests + with suppress_warnings() as sup: + sup.filter(np.ComplexWarning) + + # matvec(a,b) / a.dot(b) where a is matrix, b is vector + for n in range(1, 17): + a = np.arange(4*n, dtype=dtype).reshape(4, n) + b = np.arange(n, dtype=dtype) + assert_equal(np.einsum("ij, j", a, b, optimize=do_opt), + np.dot(a, b)) + assert_equal(np.einsum(a, [0, 1], b, [1], optimize=do_opt), + np.dot(a, b)) + + c = np.arange(4, dtype=dtype) + np.einsum("ij,j", a, b, out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, + np.dot(a.astype('f8'), + b.astype('f8')).astype(dtype)) + c[...] = 0 + np.einsum(a, [0, 1], b, [1], out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, + np.dot(a.astype('f8'), + b.astype('f8')).astype(dtype)) + + for n in range(1, 17): + a = np.arange(4*n, dtype=dtype).reshape(4, n) + b = np.arange(n, dtype=dtype) + assert_equal(np.einsum("ji,j", a.T, b.T, optimize=do_opt), + np.dot(b.T, a.T)) + assert_equal(np.einsum(a.T, [1, 0], b.T, [1], optimize=do_opt), + np.dot(b.T, a.T)) + + c = np.arange(4, dtype=dtype) + np.einsum("ji,j", a.T, b.T, out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, + np.dot(b.T.astype('f8'), + a.T.astype('f8')).astype(dtype)) + c[...] = 0 + np.einsum(a.T, [1, 0], b.T, [1], out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, + np.dot(b.T.astype('f8'), + a.T.astype('f8')).astype(dtype)) + + # matmat(a,b) / a.dot(b) where a is matrix, b is matrix + for n in range(1, 17): + if n < 8 or dtype != 'f2': + a = np.arange(4*n, dtype=dtype).reshape(4, n) + b = np.arange(n*6, dtype=dtype).reshape(n, 6) + assert_equal(np.einsum("ij,jk", a, b, optimize=do_opt), + np.dot(a, b)) + assert_equal(np.einsum(a, [0, 1], b, [1, 2], optimize=do_opt), + np.dot(a, b)) + + for n in range(1, 17): + a = np.arange(4*n, dtype=dtype).reshape(4, n) + b = np.arange(n*6, dtype=dtype).reshape(n, 6) + c = np.arange(24, dtype=dtype).reshape(4, 6) + np.einsum("ij,jk", a, b, out=c, dtype='f8', casting='unsafe', + optimize=do_opt) + assert_equal(c, + np.dot(a.astype('f8'), + b.astype('f8')).astype(dtype)) + c[...] = 0 + np.einsum(a, [0, 1], b, [1, 2], out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, + np.dot(a.astype('f8'), + b.astype('f8')).astype(dtype)) + + # matrix triple product (note this is not currently an efficient + # way to multiply 3 matrices) + a = np.arange(12, dtype=dtype).reshape(3, 4) + b = np.arange(20, dtype=dtype).reshape(4, 5) + c = np.arange(30, dtype=dtype).reshape(5, 6) + if dtype != 'f2': + assert_equal(np.einsum("ij,jk,kl", a, b, c, optimize=do_opt), + a.dot(b).dot(c)) + assert_equal(np.einsum(a, [0, 1], b, [1, 2], c, [2, 3], + optimize=do_opt), a.dot(b).dot(c)) + + d = np.arange(18, dtype=dtype).reshape(3, 6) + np.einsum("ij,jk,kl", a, b, c, out=d, + dtype='f8', casting='unsafe', optimize=do_opt) + tgt = a.astype('f8').dot(b.astype('f8')) + tgt = tgt.dot(c.astype('f8')).astype(dtype) + assert_equal(d, tgt) + + d[...] = 0 + np.einsum(a, [0, 1], b, [1, 2], c, [2, 3], out=d, + dtype='f8', casting='unsafe', optimize=do_opt) + tgt = a.astype('f8').dot(b.astype('f8')) + tgt = tgt.dot(c.astype('f8')).astype(dtype) + assert_equal(d, tgt) + + # tensordot(a, b) + if np.dtype(dtype) != np.dtype('f2'): + a = np.arange(60, dtype=dtype).reshape(3, 4, 5) + b = np.arange(24, dtype=dtype).reshape(4, 3, 2) + assert_equal(np.einsum("ijk, jil -> kl", a, b), + np.tensordot(a, b, axes=([1, 0], [0, 1]))) + assert_equal(np.einsum(a, [0, 1, 2], b, [1, 0, 3], [2, 3]), + np.tensordot(a, b, axes=([1, 0], [0, 1]))) + + c = np.arange(10, dtype=dtype).reshape(5, 2) + np.einsum("ijk,jil->kl", a, b, out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, np.tensordot(a.astype('f8'), b.astype('f8'), + axes=([1, 0], [0, 1])).astype(dtype)) + c[...] = 0 + np.einsum(a, [0, 1, 2], b, [1, 0, 3], [2, 3], out=c, + dtype='f8', casting='unsafe', optimize=do_opt) + assert_equal(c, np.tensordot(a.astype('f8'), b.astype('f8'), + axes=([1, 0], [0, 1])).astype(dtype)) + + # logical_and(logical_and(a!=0, b!=0), c!=0) + a = np.array([1, 3, -2, 0, 12, 13, 0, 1], dtype=dtype) + b = np.array([0, 3.5, 0., -2, 0, 1, 3, 12], dtype=dtype) + c = np.array([True, True, False, True, True, False, True, True]) + assert_equal(np.einsum("i,i,i->i", a, b, c, + dtype='?', casting='unsafe', optimize=do_opt), + np.logical_and(np.logical_and(a != 0, b != 0), c != 0)) + assert_equal(np.einsum(a, [0], b, [0], c, [0], [0], + dtype='?', casting='unsafe'), + np.logical_and(np.logical_and(a != 0, b != 0), c != 0)) + + a = np.arange(9, dtype=dtype) + assert_equal(np.einsum(",i->", 3, a), 3*np.sum(a)) + assert_equal(np.einsum(3, [], a, [0], []), 3*np.sum(a)) + assert_equal(np.einsum("i,->", a, 3), 3*np.sum(a)) + assert_equal(np.einsum(a, [0], 3, [], []), 3*np.sum(a)) + + # Various stride0, contiguous, and SSE aligned variants + for n in range(1, 25): + a = np.arange(n, dtype=dtype) + if np.dtype(dtype).itemsize > 1: + assert_equal(np.einsum("...,...", a, a, optimize=do_opt), + np.multiply(a, a)) + assert_equal(np.einsum("i,i", a, a, optimize=do_opt), np.dot(a, a)) + assert_equal(np.einsum("i,->i", a, 2, optimize=do_opt), 2*a) + assert_equal(np.einsum(",i->i", 2, a, optimize=do_opt), 2*a) + assert_equal(np.einsum("i,->", a, 2, optimize=do_opt), 2*np.sum(a)) + assert_equal(np.einsum(",i->", 2, a, optimize=do_opt), 2*np.sum(a)) + + assert_equal(np.einsum("...,...", a[1:], a[:-1], optimize=do_opt), + np.multiply(a[1:], a[:-1])) + assert_equal(np.einsum("i,i", a[1:], a[:-1], optimize=do_opt), + np.dot(a[1:], a[:-1])) + assert_equal(np.einsum("i,->i", a[1:], 2, optimize=do_opt), 2*a[1:]) + assert_equal(np.einsum(",i->i", 2, a[1:], optimize=do_opt), 2*a[1:]) + assert_equal(np.einsum("i,->", a[1:], 2, optimize=do_opt), + 2*np.sum(a[1:])) + assert_equal(np.einsum(",i->", 2, a[1:], optimize=do_opt), + 2*np.sum(a[1:])) + + # An object array, summed as the data type + a = np.arange(9, dtype=object) + + b = np.einsum("i->", a, dtype=dtype, casting='unsafe') + assert_equal(b, np.sum(a)) + assert_equal(b.dtype, np.dtype(dtype)) + + b = np.einsum(a, [0], [], dtype=dtype, casting='unsafe') + assert_equal(b, np.sum(a)) + assert_equal(b.dtype, np.dtype(dtype)) + + # A case which was failing (ticket #1885) + p = np.arange(2) + 1 + q = np.arange(4).reshape(2, 2) + 3 + r = np.arange(4).reshape(2, 2) + 7 + assert_equal(np.einsum('z,mz,zm->', p, q, r), 253) + + # singleton dimensions broadcast (gh-10343) + p = np.ones((10,2)) + q = np.ones((1,2)) + assert_array_equal(np.einsum('ij,ij->j', p, q, optimize=True), + np.einsum('ij,ij->j', p, q, optimize=False)) + assert_array_equal(np.einsum('ij,ij->j', p, q, optimize=True), + [10.] * 2) + + # a blas-compatible contraction broadcasting case which was failing + # for optimize=True (ticket #10930) + x = np.array([2., 3.]) + y = np.array([4.]) + assert_array_equal(np.einsum("i, i", x, y, optimize=False), 20.) + assert_array_equal(np.einsum("i, i", x, y, optimize=True), 20.) + + # all-ones array was bypassing bug (ticket #10930) + p = np.ones((1, 5)) / 2 + q = np.ones((5, 5)) / 2 + for optimize in (True, False): + assert_array_equal(np.einsum("...ij,...jk->...ik", p, p, + optimize=optimize), + np.einsum("...ij,...jk->...ik", p, q, + optimize=optimize)) + assert_array_equal(np.einsum("...ij,...jk->...ik", p, q, + optimize=optimize), + np.full((1, 5), 1.25)) + + # Cases which were failing (gh-10899) + x = np.eye(2, dtype=dtype) + y = np.ones(2, dtype=dtype) + assert_array_equal(np.einsum("ji,i->", x, y, optimize=optimize), + [2.]) # contig_contig_outstride0_two + assert_array_equal(np.einsum("i,ij->", y, x, optimize=optimize), + [2.]) # stride0_contig_outstride0_two + assert_array_equal(np.einsum("ij,i->", x, y, optimize=optimize), + [2.]) # contig_stride0_outstride0_two + + def test_einsum_sums_int8(self): + self.check_einsum_sums('i1') + + def test_einsum_sums_uint8(self): + self.check_einsum_sums('u1') + + def test_einsum_sums_int16(self): + self.check_einsum_sums('i2') + + def test_einsum_sums_uint16(self): + self.check_einsum_sums('u2') + + def test_einsum_sums_int32(self): + self.check_einsum_sums('i4') + self.check_einsum_sums('i4', True) + + def test_einsum_sums_uint32(self): + self.check_einsum_sums('u4') + self.check_einsum_sums('u4', True) + + def test_einsum_sums_int64(self): + self.check_einsum_sums('i8') + + def test_einsum_sums_uint64(self): + self.check_einsum_sums('u8') + + def test_einsum_sums_float16(self): + self.check_einsum_sums('f2') + + def test_einsum_sums_float32(self): + self.check_einsum_sums('f4') + + def test_einsum_sums_float64(self): + self.check_einsum_sums('f8') + self.check_einsum_sums('f8', True) + + def test_einsum_sums_longdouble(self): + self.check_einsum_sums(np.longdouble) + + def test_einsum_sums_cfloat64(self): + self.check_einsum_sums('c8') + self.check_einsum_sums('c8', True) + + def test_einsum_sums_cfloat128(self): + self.check_einsum_sums('c16') + + def test_einsum_sums_clongdouble(self): + self.check_einsum_sums(np.clongdouble) + + def test_einsum_misc(self): + # This call used to crash because of a bug in + # PyArray_AssignZero + a = np.ones((1, 2)) + b = np.ones((2, 2, 1)) + assert_equal(np.einsum('ij...,j...->i...', a, b), [[[2], [2]]]) + assert_equal(np.einsum('ij...,j...->i...', a, b, optimize=True), [[[2], [2]]]) + + # Regression test for issue #10369 (test unicode inputs with Python 2) + assert_equal(np.einsum(u'ij...,j...->i...', a, b), [[[2], [2]]]) + assert_equal(np.einsum('...i,...i', [1, 2, 3], [2, 3, 4]), 20) + assert_equal(np.einsum(u'...i,...i', [1, 2, 3], [2, 3, 4]), 20) + assert_equal(np.einsum('...i,...i', [1, 2, 3], [2, 3, 4], + optimize=u'greedy'), 20) + + # The iterator had an issue with buffering this reduction + a = np.ones((5, 12, 4, 2, 3), np.int64) + b = np.ones((5, 12, 11), np.int64) + assert_equal(np.einsum('ijklm,ijn,ijn->', a, b, b), + np.einsum('ijklm,ijn->', a, b)) + assert_equal(np.einsum('ijklm,ijn,ijn->', a, b, b, optimize=True), + np.einsum('ijklm,ijn->', a, b, optimize=True)) + + # Issue #2027, was a problem in the contiguous 3-argument + # inner loop implementation + a = np.arange(1, 3) + b = np.arange(1, 5).reshape(2, 2) + c = np.arange(1, 9).reshape(4, 2) + assert_equal(np.einsum('x,yx,zx->xzy', a, b, c), + [[[1, 3], [3, 9], [5, 15], [7, 21]], + [[8, 16], [16, 32], [24, 48], [32, 64]]]) + assert_equal(np.einsum('x,yx,zx->xzy', a, b, c, optimize=True), + [[[1, 3], [3, 9], [5, 15], [7, 21]], + [[8, 16], [16, 32], [24, 48], [32, 64]]]) + + def test_subscript_range(self): + # Issue #7741, make sure that all letters of Latin alphabet (both uppercase & lowercase) can be used + # when creating a subscript from arrays + a = np.ones((2, 3)) + b = np.ones((3, 4)) + np.einsum(a, [0, 20], b, [20, 2], [0, 2], optimize=False) + np.einsum(a, [0, 27], b, [27, 2], [0, 2], optimize=False) + np.einsum(a, [0, 51], b, [51, 2], [0, 2], optimize=False) + assert_raises(ValueError, lambda: np.einsum(a, [0, 52], b, [52, 2], [0, 2], optimize=False)) + assert_raises(ValueError, lambda: np.einsum(a, [-1, 5], b, [5, 2], [-1, 2], optimize=False)) + + def test_einsum_broadcast(self): + # Issue #2455 change in handling ellipsis + # remove the 'middle broadcast' error + # only use the 'RIGHT' iteration in prepare_op_axes + # adds auto broadcast on left where it belongs + # broadcast on right has to be explicit + # We need to test the optimized parsing as well + + A = np.arange(2 * 3 * 4).reshape(2, 3, 4) + B = np.arange(3) + ref = np.einsum('ijk,j->ijk', A, B, optimize=False) + for opt in [True, False]: + assert_equal(np.einsum('ij...,j...->ij...', A, B, optimize=opt), ref) + assert_equal(np.einsum('ij...,...j->ij...', A, B, optimize=opt), ref) + assert_equal(np.einsum('ij...,j->ij...', A, B, optimize=opt), ref) # used to raise error + + A = np.arange(12).reshape((4, 3)) + B = np.arange(6).reshape((3, 2)) + ref = np.einsum('ik,kj->ij', A, B, optimize=False) + for opt in [True, False]: + assert_equal(np.einsum('ik...,k...->i...', A, B, optimize=opt), ref) + assert_equal(np.einsum('ik...,...kj->i...j', A, B, optimize=opt), ref) + assert_equal(np.einsum('...k,kj', A, B, optimize=opt), ref) # used to raise error + assert_equal(np.einsum('ik,k...->i...', A, B, optimize=opt), ref) # used to raise error + + dims = [2, 3, 4, 5] + a = np.arange(np.prod(dims)).reshape(dims) + v = np.arange(dims[2]) + ref = np.einsum('ijkl,k->ijl', a, v, optimize=False) + for opt in [True, False]: + assert_equal(np.einsum('ijkl,k', a, v, optimize=opt), ref) + assert_equal(np.einsum('...kl,k', a, v, optimize=opt), ref) # used to raise error + assert_equal(np.einsum('...kl,k...', a, v, optimize=opt), ref) + + J, K, M = 160, 160, 120 + A = np.arange(J * K * M).reshape(1, 1, 1, J, K, M) + B = np.arange(J * K * M * 3).reshape(J, K, M, 3) + ref = np.einsum('...lmn,...lmno->...o', A, B, optimize=False) + for opt in [True, False]: + assert_equal(np.einsum('...lmn,lmno->...o', A, B, + optimize=opt), ref) # used to raise error + + def test_einsum_fixedstridebug(self): + # Issue #4485 obscure einsum bug + # This case revealed a bug in nditer where it reported a stride + # as 'fixed' (0) when it was in fact not fixed during processing + # (0 or 4). The reason for the bug was that the check for a fixed + # stride was using the information from the 2D inner loop reuse + # to restrict the iteration dimensions it had to validate to be + # the same, but that 2D inner loop reuse logic is only triggered + # during the buffer copying step, and hence it was invalid to + # rely on those values. The fix is to check all the dimensions + # of the stride in question, which in the test case reveals that + # the stride is not fixed. + # + # NOTE: This test is triggered by the fact that the default buffersize, + # used by einsum, is 8192, and 3*2731 = 8193, is larger than that + # and results in a mismatch between the buffering and the + # striding for operand A. + A = np.arange(2 * 3).reshape(2, 3).astype(np.float32) + B = np.arange(2 * 3 * 2731).reshape(2, 3, 2731).astype(np.int16) + es = np.einsum('cl, cpx->lpx', A, B) + tp = np.tensordot(A, B, axes=(0, 0)) + assert_equal(es, tp) + # The following is the original test case from the bug report, + # made repeatable by changing random arrays to aranges. + A = np.arange(3 * 3).reshape(3, 3).astype(np.float64) + B = np.arange(3 * 3 * 64 * 64).reshape(3, 3, 64, 64).astype(np.float32) + es = np.einsum('cl, cpxy->lpxy', A, B) + tp = np.tensordot(A, B, axes=(0, 0)) + assert_equal(es, tp) + + def test_einsum_fixed_collapsingbug(self): + # Issue #5147. + # The bug only occurred when output argument of einssum was used. + x = np.random.normal(0, 1, (5, 5, 5, 5)) + y1 = np.zeros((5, 5)) + np.einsum('aabb->ab', x, out=y1) + idx = np.arange(5) + y2 = x[idx[:, None], idx[:, None], idx, idx] + assert_equal(y1, y2) + + def test_einsum_failed_on_p9_and_s390x(self): + # Issues gh-14692 and gh-12689 + # Bug with signed vs unsigned char errored on power9 and s390x Linux + tensor = np.random.random_sample((10, 10, 10, 10)) + x = np.einsum('ijij->', tensor) + y = tensor.trace(axis1=0, axis2=2).trace() + assert_allclose(x, y) + + def test_einsum_all_contig_non_contig_output(self): + # Issue gh-5907, tests that the all contiguous special case + # actually checks the contiguity of the output + x = np.ones((5, 5)) + out = np.ones(10)[::2] + correct_base = np.ones(10) + correct_base[::2] = 5 + # Always worked (inner iteration is done with 0-stride): + np.einsum('mi,mi,mi->m', x, x, x, out=out) + assert_array_equal(out.base, correct_base) + # Example 1: + out = np.ones(10)[::2] + np.einsum('im,im,im->m', x, x, x, out=out) + assert_array_equal(out.base, correct_base) + # Example 2, buffering causes x to be contiguous but + # special cases do not catch the operation before: + out = np.ones((2, 2, 2))[..., 0] + correct_base = np.ones((2, 2, 2)) + correct_base[..., 0] = 2 + x = np.ones((2, 2), np.float32) + np.einsum('ij,jk->ik', x, x, out=out) + assert_array_equal(out.base, correct_base) + + def test_small_boolean_arrays(self): + # See gh-5946. + # Use array of True embedded in False. + a = np.zeros((16, 1, 1), dtype=np.bool_)[:2] + a[...] = True + out = np.zeros((16, 1, 1), dtype=np.bool_)[:2] + tgt = np.ones((2, 1, 1), dtype=np.bool_) + res = np.einsum('...ij,...jk->...ik', a, a, out=out) + assert_equal(res, tgt) + + def test_out_is_res(self): + a = np.arange(9).reshape(3, 3) + res = np.einsum('...ij,...jk->...ik', a, a, out=a) + assert res is a + + def optimize_compare(self, subscripts, operands=None): + # Tests all paths of the optimization function against + # conventional einsum + if operands is None: + args = [subscripts] + terms = subscripts.split('->')[0].split(',') + for term in terms: + dims = [global_size_dict[x] for x in term] + args.append(np.random.rand(*dims)) + else: + args = [subscripts] + operands + + noopt = np.einsum(*args, optimize=False) + opt = np.einsum(*args, optimize='greedy') + assert_almost_equal(opt, noopt) + opt = np.einsum(*args, optimize='optimal') + assert_almost_equal(opt, noopt) + + def test_hadamard_like_products(self): + # Hadamard outer products + self.optimize_compare('a,ab,abc->abc') + self.optimize_compare('a,b,ab->ab') + + def test_index_transformations(self): + # Simple index transformation cases + self.optimize_compare('ea,fb,gc,hd,abcd->efgh') + self.optimize_compare('ea,fb,abcd,gc,hd->efgh') + self.optimize_compare('abcd,ea,fb,gc,hd->efgh') + + def test_complex(self): + # Long test cases + self.optimize_compare('acdf,jbje,gihb,hfac,gfac,gifabc,hfac') + self.optimize_compare('acdf,jbje,gihb,hfac,gfac,gifabc,hfac') + self.optimize_compare('cd,bdhe,aidb,hgca,gc,hgibcd,hgac') + self.optimize_compare('abhe,hidj,jgba,hiab,gab') + self.optimize_compare('bde,cdh,agdb,hica,ibd,hgicd,hiac') + self.optimize_compare('chd,bde,agbc,hiad,hgc,hgi,hiad') + self.optimize_compare('chd,bde,agbc,hiad,bdi,cgh,agdb') + self.optimize_compare('bdhe,acad,hiab,agac,hibd') + + def test_collapse(self): + # Inner products + self.optimize_compare('ab,ab,c->') + self.optimize_compare('ab,ab,c->c') + self.optimize_compare('ab,ab,cd,cd->') + self.optimize_compare('ab,ab,cd,cd->ac') + self.optimize_compare('ab,ab,cd,cd->cd') + self.optimize_compare('ab,ab,cd,cd,ef,ef->') + + def test_expand(self): + # Outer products + self.optimize_compare('ab,cd,ef->abcdef') + self.optimize_compare('ab,cd,ef->acdf') + self.optimize_compare('ab,cd,de->abcde') + self.optimize_compare('ab,cd,de->be') + self.optimize_compare('ab,bcd,cd->abcd') + self.optimize_compare('ab,bcd,cd->abd') + + def test_edge_cases(self): + # Difficult edge cases for optimization + self.optimize_compare('eb,cb,fb->cef') + self.optimize_compare('dd,fb,be,cdb->cef') + self.optimize_compare('bca,cdb,dbf,afc->') + self.optimize_compare('dcc,fce,ea,dbf->ab') + self.optimize_compare('fdf,cdd,ccd,afe->ae') + self.optimize_compare('abcd,ad') + self.optimize_compare('ed,fcd,ff,bcf->be') + self.optimize_compare('baa,dcf,af,cde->be') + self.optimize_compare('bd,db,eac->ace') + self.optimize_compare('fff,fae,bef,def->abd') + self.optimize_compare('efc,dbc,acf,fd->abe') + self.optimize_compare('ba,ac,da->bcd') + + def test_inner_product(self): + # Inner products + self.optimize_compare('ab,ab') + self.optimize_compare('ab,ba') + self.optimize_compare('abc,abc') + self.optimize_compare('abc,bac') + self.optimize_compare('abc,cba') + + def test_random_cases(self): + # Randomly built test cases + self.optimize_compare('aab,fa,df,ecc->bde') + self.optimize_compare('ecb,fef,bad,ed->ac') + self.optimize_compare('bcf,bbb,fbf,fc->') + self.optimize_compare('bb,ff,be->e') + self.optimize_compare('bcb,bb,fc,fff->') + self.optimize_compare('fbb,dfd,fc,fc->') + self.optimize_compare('afd,ba,cc,dc->bf') + self.optimize_compare('adb,bc,fa,cfc->d') + self.optimize_compare('bbd,bda,fc,db->acf') + self.optimize_compare('dba,ead,cad->bce') + self.optimize_compare('aef,fbc,dca->bde') + + def test_combined_views_mapping(self): + # gh-10792 + a = np.arange(9).reshape(1, 1, 3, 1, 3) + b = np.einsum('bbcdc->d', a) + assert_equal(b, [12]) + + def test_broadcasting_dot_cases(self): + # Ensures broadcasting cases are not mistaken for GEMM + + a = np.random.rand(1, 5, 4) + b = np.random.rand(4, 6) + c = np.random.rand(5, 6) + d = np.random.rand(10) + + self.optimize_compare('ijk,kl,jl', operands=[a, b, c]) + self.optimize_compare('ijk,kl,jl,i->i', operands=[a, b, c, d]) + + e = np.random.rand(1, 1, 5, 4) + f = np.random.rand(7, 7) + self.optimize_compare('abjk,kl,jl', operands=[e, b, c]) + self.optimize_compare('abjk,kl,jl,ab->ab', operands=[e, b, c, f]) + + # Edge case found in gh-11308 + g = np.arange(64).reshape(2, 4, 8) + self.optimize_compare('obk,ijk->ioj', operands=[g, g]) + + +class TestEinsumPath(object): + def build_operands(self, string, size_dict=global_size_dict): + + # Builds views based off initial operands + operands = [string] + terms = string.split('->')[0].split(',') + for term in terms: + dims = [size_dict[x] for x in term] + operands.append(np.random.rand(*dims)) + + return operands + + def assert_path_equal(self, comp, benchmark): + # Checks if list of tuples are equivalent + ret = (len(comp) == len(benchmark)) + assert_(ret) + for pos in range(len(comp) - 1): + ret &= isinstance(comp[pos + 1], tuple) + ret &= (comp[pos + 1] == benchmark[pos + 1]) + assert_(ret) + + def test_memory_contraints(self): + # Ensure memory constraints are satisfied + + outer_test = self.build_operands('a,b,c->abc') + + path, path_str = np.einsum_path(*outer_test, optimize=('greedy', 0)) + self.assert_path_equal(path, ['einsum_path', (0, 1, 2)]) + + path, path_str = np.einsum_path(*outer_test, optimize=('optimal', 0)) + self.assert_path_equal(path, ['einsum_path', (0, 1, 2)]) + + long_test = self.build_operands('acdf,jbje,gihb,hfac') + path, path_str = np.einsum_path(*long_test, optimize=('greedy', 0)) + self.assert_path_equal(path, ['einsum_path', (0, 1, 2, 3)]) + + path, path_str = np.einsum_path(*long_test, optimize=('optimal', 0)) + self.assert_path_equal(path, ['einsum_path', (0, 1, 2, 3)]) + + def test_long_paths(self): + # Long complex cases + + # Long test 1 + long_test1 = self.build_operands('acdf,jbje,gihb,hfac,gfac,gifabc,hfac') + path, path_str = np.einsum_path(*long_test1, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', + (3, 6), (3, 4), (2, 4), (2, 3), (0, 2), (0, 1)]) + + path, path_str = np.einsum_path(*long_test1, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', + (3, 6), (3, 4), (2, 4), (2, 3), (0, 2), (0, 1)]) + + # Long test 2 + long_test2 = self.build_operands('chd,bde,agbc,hiad,bdi,cgh,agdb') + path, path_str = np.einsum_path(*long_test2, optimize='greedy') + print(path) + self.assert_path_equal(path, ['einsum_path', + (3, 4), (0, 3), (3, 4), (1, 3), (1, 2), (0, 1)]) + + path, path_str = np.einsum_path(*long_test2, optimize='optimal') + print(path) + self.assert_path_equal(path, ['einsum_path', + (0, 5), (1, 4), (3, 4), (1, 3), (1, 2), (0, 1)]) + + def test_edge_paths(self): + # Difficult edge cases + + # Edge test1 + edge_test1 = self.build_operands('eb,cb,fb->cef') + path, path_str = np.einsum_path(*edge_test1, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', (0, 2), (0, 1)]) + + path, path_str = np.einsum_path(*edge_test1, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', (0, 2), (0, 1)]) + + # Edge test2 + edge_test2 = self.build_operands('dd,fb,be,cdb->cef') + path, path_str = np.einsum_path(*edge_test2, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', (0, 3), (0, 1), (0, 1)]) + + path, path_str = np.einsum_path(*edge_test2, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', (0, 3), (0, 1), (0, 1)]) + + # Edge test3 + edge_test3 = self.build_operands('bca,cdb,dbf,afc->') + path, path_str = np.einsum_path(*edge_test3, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 2), (0, 1)]) + + path, path_str = np.einsum_path(*edge_test3, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 2), (0, 1)]) + + # Edge test4 + edge_test4 = self.build_operands('dcc,fce,ea,dbf->ab') + path, path_str = np.einsum_path(*edge_test4, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 1), (0, 1)]) + + path, path_str = np.einsum_path(*edge_test4, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 2), (0, 1)]) + + # Edge test5 + edge_test4 = self.build_operands('a,ac,ab,ad,cd,bd,bc->', + size_dict={"a": 20, "b": 20, "c": 20, "d": 20}) + path, path_str = np.einsum_path(*edge_test4, optimize='greedy') + self.assert_path_equal(path, ['einsum_path', (0, 1), (0, 1, 2, 3, 4, 5)]) + + path, path_str = np.einsum_path(*edge_test4, optimize='optimal') + self.assert_path_equal(path, ['einsum_path', (0, 1), (0, 1, 2, 3, 4, 5)]) + + def test_path_type_input(self): + # Test explicit path handeling + path_test = self.build_operands('dcc,fce,ea,dbf->ab') + + path, path_str = np.einsum_path(*path_test, optimize=False) + self.assert_path_equal(path, ['einsum_path', (0, 1, 2, 3)]) + + path, path_str = np.einsum_path(*path_test, optimize=True) + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 1), (0, 1)]) + + exp_path = ['einsum_path', (0, 2), (0, 2), (0, 1)] + path, path_str = np.einsum_path(*path_test, optimize=exp_path) + self.assert_path_equal(path, exp_path) + + # Double check einsum works on the input path + noopt = np.einsum(*path_test, optimize=False) + opt = np.einsum(*path_test, optimize=exp_path) + assert_almost_equal(noopt, opt) + + def test_spaces(self): + #gh-10794 + arr = np.array([[1]]) + for sp in itertools.product(['', ' '], repeat=4): + # no error for any spacing + np.einsum('{}...a{}->{}...a{}'.format(*sp), arr) + +def test_overlap(): + a = np.arange(9, dtype=int).reshape(3, 3) + b = np.arange(9, dtype=int).reshape(3, 3) + d = np.dot(a, b) + # sanity check + c = np.einsum('ij,jk->ik', a, b) + assert_equal(c, d) + #gh-10080, out overlaps one of the operands + c = np.einsum('ij,jk->ik', a, b, out=b) + assert_equal(c, d) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_errstate.py b/contrib/python/numpy/py2/numpy/core/tests/test_errstate.py new file mode 100644 index 0000000000..670d485c1d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_errstate.py @@ -0,0 +1,41 @@ +from __future__ import division, absolute_import, print_function + +import platform +import pytest + +import numpy as np +from numpy.testing import assert_, assert_raises + + +class TestErrstate(object): + @pytest.mark.skipif(platform.machine() == "armv5tel", reason="See gh-413.") + def test_invalid(self): + with np.errstate(all='raise', under='ignore'): + a = -np.arange(3) + # This should work + with np.errstate(invalid='ignore'): + np.sqrt(a) + # While this should fail! + with assert_raises(FloatingPointError): + np.sqrt(a) + + def test_divide(self): + with np.errstate(all='raise', under='ignore'): + a = -np.arange(3) + # This should work + with np.errstate(divide='ignore'): + a // 0 + # While this should fail! + with assert_raises(FloatingPointError): + a // 0 + + def test_errcall(self): + def foo(*args): + print(args) + + olderrcall = np.geterrcall() + with np.errstate(call=foo): + assert_(np.geterrcall() is foo, 'call is not foo') + with np.errstate(call=None): + assert_(np.geterrcall() is None, 'call is not None') + assert_(np.geterrcall() is olderrcall, 'call is not olderrcall') diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_extint128.py b/contrib/python/numpy/py2/numpy/core/tests/test_extint128.py new file mode 100644 index 0000000000..7c454a603b --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_extint128.py @@ -0,0 +1,221 @@ +from __future__ import division, absolute_import, print_function + +import itertools +import contextlib +import operator +import pytest + +import numpy as np +import numpy.core._multiarray_tests as mt + +from numpy.testing import assert_raises, assert_equal + + +INT64_MAX = np.iinfo(np.int64).max +INT64_MIN = np.iinfo(np.int64).min +INT64_MID = 2**32 + +# int128 is not two's complement, the sign bit is separate +INT128_MAX = 2**128 - 1 +INT128_MIN = -INT128_MAX +INT128_MID = 2**64 + +INT64_VALUES = ( + [INT64_MIN + j for j in range(20)] + + [INT64_MAX - j for j in range(20)] + + [INT64_MID + j for j in range(-20, 20)] + + [2*INT64_MID + j for j in range(-20, 20)] + + [INT64_MID//2 + j for j in range(-20, 20)] + + list(range(-70, 70)) +) + +INT128_VALUES = ( + [INT128_MIN + j for j in range(20)] + + [INT128_MAX - j for j in range(20)] + + [INT128_MID + j for j in range(-20, 20)] + + [2*INT128_MID + j for j in range(-20, 20)] + + [INT128_MID//2 + j for j in range(-20, 20)] + + list(range(-70, 70)) + + [False] # negative zero +) + +INT64_POS_VALUES = [x for x in INT64_VALUES if x > 0] + + +@contextlib.contextmanager +def exc_iter(*args): + """ + Iterate over Cartesian product of *args, and if an exception is raised, + add information of the current iterate. + """ + + value = [None] + + def iterate(): + for v in itertools.product(*args): + value[0] = v + yield v + + try: + yield iterate() + except Exception: + import traceback + msg = "At: %r\n%s" % (repr(value[0]), + traceback.format_exc()) + raise AssertionError(msg) + + +def test_safe_binop(): + # Test checked arithmetic routines + + ops = [ + (operator.add, 1), + (operator.sub, 2), + (operator.mul, 3) + ] + + with exc_iter(ops, INT64_VALUES, INT64_VALUES) as it: + for xop, a, b in it: + pyop, op = xop + c = pyop(a, b) + + if not (INT64_MIN <= c <= INT64_MAX): + assert_raises(OverflowError, mt.extint_safe_binop, a, b, op) + else: + d = mt.extint_safe_binop(a, b, op) + if c != d: + # assert_equal is slow + assert_equal(d, c) + + +def test_to_128(): + with exc_iter(INT64_VALUES) as it: + for a, in it: + b = mt.extint_to_128(a) + if a != b: + assert_equal(b, a) + + +def test_to_64(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + if not (INT64_MIN <= a <= INT64_MAX): + assert_raises(OverflowError, mt.extint_to_64, a) + else: + b = mt.extint_to_64(a) + if a != b: + assert_equal(b, a) + + +def test_mul_64_64(): + with exc_iter(INT64_VALUES, INT64_VALUES) as it: + for a, b in it: + c = a * b + d = mt.extint_mul_64_64(a, b) + if c != d: + assert_equal(d, c) + + +def test_add_128(): + with exc_iter(INT128_VALUES, INT128_VALUES) as it: + for a, b in it: + c = a + b + if not (INT128_MIN <= c <= INT128_MAX): + assert_raises(OverflowError, mt.extint_add_128, a, b) + else: + d = mt.extint_add_128(a, b) + if c != d: + assert_equal(d, c) + + +def test_sub_128(): + with exc_iter(INT128_VALUES, INT128_VALUES) as it: + for a, b in it: + c = a - b + if not (INT128_MIN <= c <= INT128_MAX): + assert_raises(OverflowError, mt.extint_sub_128, a, b) + else: + d = mt.extint_sub_128(a, b) + if c != d: + assert_equal(d, c) + + +def test_neg_128(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + b = -a + c = mt.extint_neg_128(a) + if b != c: + assert_equal(c, b) + + +def test_shl_128(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + if a < 0: + b = -(((-a) << 1) & (2**128-1)) + else: + b = (a << 1) & (2**128-1) + c = mt.extint_shl_128(a) + if b != c: + assert_equal(c, b) + + +def test_shr_128(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + if a < 0: + b = -((-a) >> 1) + else: + b = a >> 1 + c = mt.extint_shr_128(a) + if b != c: + assert_equal(c, b) + + +def test_gt_128(): + with exc_iter(INT128_VALUES, INT128_VALUES) as it: + for a, b in it: + c = a > b + d = mt.extint_gt_128(a, b) + if c != d: + assert_equal(d, c) + + +@pytest.mark.slow +def test_divmod_128_64(): + with exc_iter(INT128_VALUES, INT64_POS_VALUES) as it: + for a, b in it: + if a >= 0: + c, cr = divmod(a, b) + else: + c, cr = divmod(-a, b) + c = -c + cr = -cr + + d, dr = mt.extint_divmod_128_64(a, b) + + if c != d or d != dr or b*d + dr != a: + assert_equal(d, c) + assert_equal(dr, cr) + assert_equal(b*d + dr, a) + + +def test_floordiv_128_64(): + with exc_iter(INT128_VALUES, INT64_POS_VALUES) as it: + for a, b in it: + c = a // b + d = mt.extint_floordiv_128_64(a, b) + + if c != d: + assert_equal(d, c) + + +def test_ceildiv_128_64(): + with exc_iter(INT128_VALUES, INT64_POS_VALUES) as it: + for a, b in it: + c = (a + b - 1) // b + d = mt.extint_ceildiv_128_64(a, b) + + if c != d: + assert_equal(d, c) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_function_base.py b/contrib/python/numpy/py2/numpy/core/tests/test_function_base.py new file mode 100644 index 0000000000..8b820bd75c --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_function_base.py @@ -0,0 +1,370 @@ +from __future__ import division, absolute_import, print_function + +from numpy import ( + logspace, linspace, geomspace, dtype, array, sctypes, arange, isnan, + ndarray, sqrt, nextafter, stack + ) +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_array_equal, assert_allclose, + suppress_warnings + ) + + +class PhysicalQuantity(float): + def __new__(cls, value): + return float.__new__(cls, value) + + def __add__(self, x): + assert_(isinstance(x, PhysicalQuantity)) + return PhysicalQuantity(float(x) + float(self)) + __radd__ = __add__ + + def __sub__(self, x): + assert_(isinstance(x, PhysicalQuantity)) + return PhysicalQuantity(float(self) - float(x)) + + def __rsub__(self, x): + assert_(isinstance(x, PhysicalQuantity)) + return PhysicalQuantity(float(x) - float(self)) + + def __mul__(self, x): + return PhysicalQuantity(float(x) * float(self)) + __rmul__ = __mul__ + + def __div__(self, x): + return PhysicalQuantity(float(self) / float(x)) + + def __rdiv__(self, x): + return PhysicalQuantity(float(x) / float(self)) + + +class PhysicalQuantity2(ndarray): + __array_priority__ = 10 + + +class TestLogspace(object): + + def test_basic(self): + y = logspace(0, 6) + assert_(len(y) == 50) + y = logspace(0, 6, num=100) + assert_(y[-1] == 10 ** 6) + y = logspace(0, 6, endpoint=0) + assert_(y[-1] < 10 ** 6) + y = logspace(0, 6, num=7) + assert_array_equal(y, [1, 10, 100, 1e3, 1e4, 1e5, 1e6]) + + def test_start_stop_array(self): + start = array([0., 1.]) + stop = array([6., 7.]) + t1 = logspace(start, stop, 6) + t2 = stack([logspace(_start, _stop, 6) + for _start, _stop in zip(start, stop)], axis=1) + assert_equal(t1, t2) + t3 = logspace(start, stop[0], 6) + t4 = stack([logspace(_start, stop[0], 6) + for _start in start], axis=1) + assert_equal(t3, t4) + t5 = logspace(start, stop, 6, axis=-1) + assert_equal(t5, t2.T) + + def test_dtype(self): + y = logspace(0, 6, dtype='float32') + assert_equal(y.dtype, dtype('float32')) + y = logspace(0, 6, dtype='float64') + assert_equal(y.dtype, dtype('float64')) + y = logspace(0, 6, dtype='int32') + assert_equal(y.dtype, dtype('int32')) + + def test_physical_quantities(self): + a = PhysicalQuantity(1.0) + b = PhysicalQuantity(5.0) + assert_equal(logspace(a, b), logspace(1.0, 5.0)) + + def test_subclass(self): + a = array(1).view(PhysicalQuantity2) + b = array(7).view(PhysicalQuantity2) + ls = logspace(a, b) + assert type(ls) is PhysicalQuantity2 + assert_equal(ls, logspace(1.0, 7.0)) + ls = logspace(a, b, 1) + assert type(ls) is PhysicalQuantity2 + assert_equal(ls, logspace(1.0, 7.0, 1)) + + +class TestGeomspace(object): + + def test_basic(self): + y = geomspace(1, 1e6) + assert_(len(y) == 50) + y = geomspace(1, 1e6, num=100) + assert_(y[-1] == 10 ** 6) + y = geomspace(1, 1e6, endpoint=False) + assert_(y[-1] < 10 ** 6) + y = geomspace(1, 1e6, num=7) + assert_array_equal(y, [1, 10, 100, 1e3, 1e4, 1e5, 1e6]) + + y = geomspace(8, 2, num=3) + assert_allclose(y, [8, 4, 2]) + assert_array_equal(y.imag, 0) + + y = geomspace(-1, -100, num=3) + assert_array_equal(y, [-1, -10, -100]) + assert_array_equal(y.imag, 0) + + y = geomspace(-100, -1, num=3) + assert_array_equal(y, [-100, -10, -1]) + assert_array_equal(y.imag, 0) + + def test_complex(self): + # Purely imaginary + y = geomspace(1j, 16j, num=5) + assert_allclose(y, [1j, 2j, 4j, 8j, 16j]) + assert_array_equal(y.real, 0) + + y = geomspace(-4j, -324j, num=5) + assert_allclose(y, [-4j, -12j, -36j, -108j, -324j]) + assert_array_equal(y.real, 0) + + y = geomspace(1+1j, 1000+1000j, num=4) + assert_allclose(y, [1+1j, 10+10j, 100+100j, 1000+1000j]) + + y = geomspace(-1+1j, -1000+1000j, num=4) + assert_allclose(y, [-1+1j, -10+10j, -100+100j, -1000+1000j]) + + # Logarithmic spirals + y = geomspace(-1, 1, num=3, dtype=complex) + assert_allclose(y, [-1, 1j, +1]) + + y = geomspace(0+3j, -3+0j, 3) + assert_allclose(y, [0+3j, -3/sqrt(2)+3j/sqrt(2), -3+0j]) + y = geomspace(0+3j, 3+0j, 3) + assert_allclose(y, [0+3j, 3/sqrt(2)+3j/sqrt(2), 3+0j]) + y = geomspace(-3+0j, 0-3j, 3) + assert_allclose(y, [-3+0j, -3/sqrt(2)-3j/sqrt(2), 0-3j]) + y = geomspace(0+3j, -3+0j, 3) + assert_allclose(y, [0+3j, -3/sqrt(2)+3j/sqrt(2), -3+0j]) + y = geomspace(-2-3j, 5+7j, 7) + assert_allclose(y, [-2-3j, -0.29058977-4.15771027j, + 2.08885354-4.34146838j, 4.58345529-3.16355218j, + 6.41401745-0.55233457j, 6.75707386+3.11795092j, + 5+7j]) + + # Type promotion should prevent the -5 from becoming a NaN + y = geomspace(3j, -5, 2) + assert_allclose(y, [3j, -5]) + y = geomspace(-5, 3j, 2) + assert_allclose(y, [-5, 3j]) + + def test_dtype(self): + y = geomspace(1, 1e6, dtype='float32') + assert_equal(y.dtype, dtype('float32')) + y = geomspace(1, 1e6, dtype='float64') + assert_equal(y.dtype, dtype('float64')) + y = geomspace(1, 1e6, dtype='int32') + assert_equal(y.dtype, dtype('int32')) + + # Native types + y = geomspace(1, 1e6, dtype=float) + assert_equal(y.dtype, dtype('float_')) + y = geomspace(1, 1e6, dtype=complex) + assert_equal(y.dtype, dtype('complex')) + + def test_start_stop_array_scalar(self): + lim1 = array([120, 100], dtype="int8") + lim2 = array([-120, -100], dtype="int8") + lim3 = array([1200, 1000], dtype="uint16") + t1 = geomspace(lim1[0], lim1[1], 5) + t2 = geomspace(lim2[0], lim2[1], 5) + t3 = geomspace(lim3[0], lim3[1], 5) + t4 = geomspace(120.0, 100.0, 5) + t5 = geomspace(-120.0, -100.0, 5) + t6 = geomspace(1200.0, 1000.0, 5) + + # t3 uses float32, t6 uses float64 + assert_allclose(t1, t4, rtol=1e-2) + assert_allclose(t2, t5, rtol=1e-2) + assert_allclose(t3, t6, rtol=1e-5) + + def test_start_stop_array(self): + # Try to use all special cases. + start = array([1.e0, 32., 1j, -4j, 1+1j, -1]) + stop = array([1.e4, 2., 16j, -324j, 10000+10000j, 1]) + t1 = geomspace(start, stop, 5) + t2 = stack([geomspace(_start, _stop, 5) + for _start, _stop in zip(start, stop)], axis=1) + assert_equal(t1, t2) + t3 = geomspace(start, stop[0], 5) + t4 = stack([geomspace(_start, stop[0], 5) + for _start in start], axis=1) + assert_equal(t3, t4) + t5 = geomspace(start, stop, 5, axis=-1) + assert_equal(t5, t2.T) + + def test_physical_quantities(self): + a = PhysicalQuantity(1.0) + b = PhysicalQuantity(5.0) + assert_equal(geomspace(a, b), geomspace(1.0, 5.0)) + + def test_subclass(self): + a = array(1).view(PhysicalQuantity2) + b = array(7).view(PhysicalQuantity2) + gs = geomspace(a, b) + assert type(gs) is PhysicalQuantity2 + assert_equal(gs, geomspace(1.0, 7.0)) + gs = geomspace(a, b, 1) + assert type(gs) is PhysicalQuantity2 + assert_equal(gs, geomspace(1.0, 7.0, 1)) + + def test_bounds(self): + assert_raises(ValueError, geomspace, 0, 10) + assert_raises(ValueError, geomspace, 10, 0) + assert_raises(ValueError, geomspace, 0, 0) + + +class TestLinspace(object): + + def test_basic(self): + y = linspace(0, 10) + assert_(len(y) == 50) + y = linspace(2, 10, num=100) + assert_(y[-1] == 10) + y = linspace(2, 10, endpoint=0) + assert_(y[-1] < 10) + assert_raises(ValueError, linspace, 0, 10, num=-1) + + def test_corner(self): + y = list(linspace(0, 1, 1)) + assert_(y == [0.0], y) + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, ".*safely interpreted as an integer") + y = list(linspace(0, 1, 2.5)) + assert_(y == [0.0, 1.0]) + + def test_type(self): + t1 = linspace(0, 1, 0).dtype + t2 = linspace(0, 1, 1).dtype + t3 = linspace(0, 1, 2).dtype + assert_equal(t1, t2) + assert_equal(t2, t3) + + def test_dtype(self): + y = linspace(0, 6, dtype='float32') + assert_equal(y.dtype, dtype('float32')) + y = linspace(0, 6, dtype='float64') + assert_equal(y.dtype, dtype('float64')) + y = linspace(0, 6, dtype='int32') + assert_equal(y.dtype, dtype('int32')) + + def test_start_stop_array_scalar(self): + lim1 = array([-120, 100], dtype="int8") + lim2 = array([120, -100], dtype="int8") + lim3 = array([1200, 1000], dtype="uint16") + t1 = linspace(lim1[0], lim1[1], 5) + t2 = linspace(lim2[0], lim2[1], 5) + t3 = linspace(lim3[0], lim3[1], 5) + t4 = linspace(-120.0, 100.0, 5) + t5 = linspace(120.0, -100.0, 5) + t6 = linspace(1200.0, 1000.0, 5) + assert_equal(t1, t4) + assert_equal(t2, t5) + assert_equal(t3, t6) + + def test_start_stop_array(self): + start = array([-120, 120], dtype="int8") + stop = array([100, -100], dtype="int8") + t1 = linspace(start, stop, 5) + t2 = stack([linspace(_start, _stop, 5) + for _start, _stop in zip(start, stop)], axis=1) + assert_equal(t1, t2) + t3 = linspace(start, stop[0], 5) + t4 = stack([linspace(_start, stop[0], 5) + for _start in start], axis=1) + assert_equal(t3, t4) + t5 = linspace(start, stop, 5, axis=-1) + assert_equal(t5, t2.T) + + def test_complex(self): + lim1 = linspace(1 + 2j, 3 + 4j, 5) + t1 = array([1.0+2.j, 1.5+2.5j, 2.0+3j, 2.5+3.5j, 3.0+4j]) + lim2 = linspace(1j, 10, 5) + t2 = array([0.0+1.j, 2.5+0.75j, 5.0+0.5j, 7.5+0.25j, 10.0+0j]) + assert_equal(lim1, t1) + assert_equal(lim2, t2) + + def test_physical_quantities(self): + a = PhysicalQuantity(0.0) + b = PhysicalQuantity(1.0) + assert_equal(linspace(a, b), linspace(0.0, 1.0)) + + def test_subclass(self): + a = array(0).view(PhysicalQuantity2) + b = array(1).view(PhysicalQuantity2) + ls = linspace(a, b) + assert type(ls) is PhysicalQuantity2 + assert_equal(ls, linspace(0.0, 1.0)) + ls = linspace(a, b, 1) + assert type(ls) is PhysicalQuantity2 + assert_equal(ls, linspace(0.0, 1.0, 1)) + + def test_array_interface(self): + # Regression test for https://github.com/numpy/numpy/pull/6659 + # Ensure that start/stop can be objects that implement + # __array_interface__ and are convertible to numeric scalars + + class Arrayish(object): + """ + A generic object that supports the __array_interface__ and hence + can in principle be converted to a numeric scalar, but is not + otherwise recognized as numeric, but also happens to support + multiplication by floats. + + Data should be an object that implements the buffer interface, + and contains at least 4 bytes. + """ + + def __init__(self, data): + self._data = data + + @property + def __array_interface__(self): + return {'shape': (), 'typestr': '<i4', 'data': self._data, + 'version': 3} + + def __mul__(self, other): + # For the purposes of this test any multiplication is an + # identity operation :) + return self + + one = Arrayish(array(1, dtype='<i4')) + five = Arrayish(array(5, dtype='<i4')) + + assert_equal(linspace(one, five), linspace(1, 5)) + + def test_denormal_numbers(self): + # Regression test for gh-5437. Will probably fail when compiled + # with ICC, which flushes denormals to zero + for ftype in sctypes['float']: + stop = nextafter(ftype(0), ftype(1)) * 5 # A denormal number + assert_(any(linspace(0, stop, 10, endpoint=False, dtype=ftype))) + + def test_equivalent_to_arange(self): + for j in range(1000): + assert_equal(linspace(0, j, j+1, dtype=int), + arange(j+1, dtype=int)) + + def test_retstep(self): + y = linspace(0, 1, 2, retstep=True) + assert_(isinstance(y, tuple) and len(y) == 2) + for num in (0, 1): + for ept in (False, True): + y = linspace(0, 1, num, endpoint=ept, retstep=True) + assert_(isinstance(y, tuple) and len(y) == 2 and + len(y[0]) == num and isnan(y[1]), + 'num={0}, endpoint={1}'.format(num, ept)) + + def test_object(self): + start = array(1, dtype='O') + stop = array(2, dtype='O') + y = linspace(start, stop, 3) + assert_array_equal(y, array([1., 1.5, 2.])) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_getlimits.py b/contrib/python/numpy/py2/numpy/core/tests/test_getlimits.py new file mode 100644 index 0000000000..2f66481836 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_getlimits.py @@ -0,0 +1,123 @@ +""" Test functions for limits module. + +""" +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.core import finfo, iinfo +from numpy import half, single, double, longdouble +from numpy.testing import assert_equal, assert_, assert_raises +from numpy.core.getlimits import _discovered_machar, _float_ma + +################################################## + +class TestPythonFloat(object): + def test_singleton(self): + ftype = finfo(float) + ftype2 = finfo(float) + assert_equal(id(ftype), id(ftype2)) + +class TestHalf(object): + def test_singleton(self): + ftype = finfo(half) + ftype2 = finfo(half) + assert_equal(id(ftype), id(ftype2)) + +class TestSingle(object): + def test_singleton(self): + ftype = finfo(single) + ftype2 = finfo(single) + assert_equal(id(ftype), id(ftype2)) + +class TestDouble(object): + def test_singleton(self): + ftype = finfo(double) + ftype2 = finfo(double) + assert_equal(id(ftype), id(ftype2)) + +class TestLongdouble(object): + def test_singleton(self): + ftype = finfo(longdouble) + ftype2 = finfo(longdouble) + assert_equal(id(ftype), id(ftype2)) + +class TestFinfo(object): + def test_basic(self): + dts = list(zip(['f2', 'f4', 'f8', 'c8', 'c16'], + [np.float16, np.float32, np.float64, np.complex64, + np.complex128])) + for dt1, dt2 in dts: + for attr in ('bits', 'eps', 'epsneg', 'iexp', 'machar', 'machep', + 'max', 'maxexp', 'min', 'minexp', 'negep', 'nexp', + 'nmant', 'precision', 'resolution', 'tiny'): + assert_equal(getattr(finfo(dt1), attr), + getattr(finfo(dt2), attr), attr) + assert_raises(ValueError, finfo, 'i4') + +class TestIinfo(object): + def test_basic(self): + dts = list(zip(['i1', 'i2', 'i4', 'i8', + 'u1', 'u2', 'u4', 'u8'], + [np.int8, np.int16, np.int32, np.int64, + np.uint8, np.uint16, np.uint32, np.uint64])) + for dt1, dt2 in dts: + for attr in ('bits', 'min', 'max'): + assert_equal(getattr(iinfo(dt1), attr), + getattr(iinfo(dt2), attr), attr) + assert_raises(ValueError, iinfo, 'f4') + + def test_unsigned_max(self): + types = np.sctypes['uint'] + for T in types: + assert_equal(iinfo(T).max, T(-1)) + +class TestRepr(object): + def test_iinfo_repr(self): + expected = "iinfo(min=-32768, max=32767, dtype=int16)" + assert_equal(repr(np.iinfo(np.int16)), expected) + + def test_finfo_repr(self): + expected = "finfo(resolution=1e-06, min=-3.4028235e+38," + \ + " max=3.4028235e+38, dtype=float32)" + assert_equal(repr(np.finfo(np.float32)), expected) + + +def test_instances(): + iinfo(10) + finfo(3.0) + + +def assert_ma_equal(discovered, ma_like): + # Check MachAr-like objects same as calculated MachAr instances + for key, value in discovered.__dict__.items(): + assert_equal(value, getattr(ma_like, key)) + if hasattr(value, 'shape'): + assert_equal(value.shape, getattr(ma_like, key).shape) + assert_equal(value.dtype, getattr(ma_like, key).dtype) + + +def test_known_types(): + # Test we are correctly compiling parameters for known types + for ftype, ma_like in ((np.float16, _float_ma[16]), + (np.float32, _float_ma[32]), + (np.float64, _float_ma[64])): + assert_ma_equal(_discovered_machar(ftype), ma_like) + # Suppress warning for broken discovery of double double on PPC + with np.errstate(all='ignore'): + ld_ma = _discovered_machar(np.longdouble) + bytes = np.dtype(np.longdouble).itemsize + if (ld_ma.it, ld_ma.maxexp) == (63, 16384) and bytes in (12, 16): + # 80-bit extended precision + assert_ma_equal(ld_ma, _float_ma[80]) + elif (ld_ma.it, ld_ma.maxexp) == (112, 16384) and bytes == 16: + # IEE 754 128-bit + assert_ma_equal(ld_ma, _float_ma[128]) + + +def test_plausible_finfo(): + # Assert that finfo returns reasonable results for all types + for ftype in np.sctypes['float'] + np.sctypes['complex']: + info = np.finfo(ftype) + assert_(info.nmant > 1) + assert_(info.minexp < -1) + assert_(info.maxexp > 1) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_half.py b/contrib/python/numpy/py2/numpy/core/tests/test_half.py new file mode 100644 index 0000000000..7707125014 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_half.py @@ -0,0 +1,518 @@ +from __future__ import division, absolute_import, print_function + +import platform +import pytest + +import numpy as np +from numpy import uint16, float16, float32, float64 +from numpy.testing import assert_, assert_equal + + +def assert_raises_fpe(strmatch, callable, *args, **kwargs): + try: + callable(*args, **kwargs) + except FloatingPointError as exc: + assert_(str(exc).find(strmatch) >= 0, + "Did not raise floating point %s error" % strmatch) + else: + assert_(False, + "Did not raise floating point %s error" % strmatch) + +class TestHalf(object): + def setup(self): + # An array of all possible float16 values + self.all_f16 = np.arange(0x10000, dtype=uint16) + self.all_f16.dtype = float16 + self.all_f32 = np.array(self.all_f16, dtype=float32) + self.all_f64 = np.array(self.all_f16, dtype=float64) + + # An array of all non-NaN float16 values, in sorted order + self.nonan_f16 = np.concatenate( + (np.arange(0xfc00, 0x7fff, -1, dtype=uint16), + np.arange(0x0000, 0x7c01, 1, dtype=uint16))) + self.nonan_f16.dtype = float16 + self.nonan_f32 = np.array(self.nonan_f16, dtype=float32) + self.nonan_f64 = np.array(self.nonan_f16, dtype=float64) + + # An array of all finite float16 values, in sorted order + self.finite_f16 = self.nonan_f16[1:-1] + self.finite_f32 = self.nonan_f32[1:-1] + self.finite_f64 = self.nonan_f64[1:-1] + + def test_half_conversions(self): + """Checks that all 16-bit values survive conversion + to/from 32-bit and 64-bit float""" + # Because the underlying routines preserve the NaN bits, every + # value is preserved when converting to/from other floats. + + # Convert from float32 back to float16 + b = np.array(self.all_f32, dtype=float16) + assert_equal(self.all_f16.view(dtype=uint16), + b.view(dtype=uint16)) + + # Convert from float64 back to float16 + b = np.array(self.all_f64, dtype=float16) + assert_equal(self.all_f16.view(dtype=uint16), + b.view(dtype=uint16)) + + # Convert float16 to longdouble and back + # This doesn't necessarily preserve the extra NaN bits, + # so exclude NaNs. + a_ld = np.array(self.nonan_f16, dtype=np.longdouble) + b = np.array(a_ld, dtype=float16) + assert_equal(self.nonan_f16.view(dtype=uint16), + b.view(dtype=uint16)) + + # Check the range for which all integers can be represented + i_int = np.arange(-2048, 2049) + i_f16 = np.array(i_int, dtype=float16) + j = np.array(i_f16, dtype=int) + assert_equal(i_int, j) + + @pytest.mark.parametrize("offset", [None, "up", "down"]) + @pytest.mark.parametrize("shift", [None, "up", "down"]) + @pytest.mark.parametrize("float_t", [np.float32, np.float64]) + def test_half_conversion_rounding(self, float_t, shift, offset): + # Assumes that round to even is used during casting. + max_pattern = np.float16(np.finfo(np.float16).max).view(np.uint16) + + # Test all (positive) finite numbers, denormals are most interesting + # however: + f16s_patterns = np.arange(0, max_pattern+1, dtype=np.uint16) + f16s_float = f16s_patterns.view(np.float16).astype(float_t) + + # Shift the values by half a bit up or a down (or do not shift), + if shift == "up": + f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[1:] + elif shift == "down": + f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[:-1] + else: + f16s_float = f16s_float[1:-1] + + # Increase the float by a minimal value: + if offset == "up": + f16s_float = np.nextafter(f16s_float, float_t(1e50)) + elif offset == "down": + f16s_float = np.nextafter(f16s_float, float_t(-1e50)) + + # Convert back to float16 and its bit pattern: + res_patterns = f16s_float.astype(np.float16).view(np.uint16) + + # The above calculations tries the original values, or the exact + # mid points between the float16 values. It then further offsets them + # by as little as possible. If no offset occurs, "round to even" + # logic will be necessary, an arbitrarily small offset should cause + # normal up/down rounding always. + + # Calculate the expecte pattern: + cmp_patterns = f16s_patterns[1:-1].copy() + + if shift == "down" and offset != "up": + shift_pattern = -1 + elif shift == "up" and offset != "down": + shift_pattern = 1 + else: + # There cannot be a shift, either shift is None, so all rounding + # will go back to original, or shift is reduced by offset too much. + shift_pattern = 0 + + # If rounding occurs, is it normal rounding or round to even? + if offset is None: + # Round to even occurs, modify only non-even, cast to allow + (-1) + cmp_patterns[0::2].view(np.int16)[...] += shift_pattern + else: + cmp_patterns.view(np.int16)[...] += shift_pattern + + assert_equal(res_patterns, cmp_patterns) + + @pytest.mark.parametrize(["float_t", "uint_t", "bits"], + [(np.float32, np.uint32, 23), + (np.float64, np.uint64, 52)]) + def test_half_conversion_denormal_round_even(self, float_t, uint_t, bits): + # Test specifically that all bits are considered when deciding + # whether round to even should occur (i.e. no bits are lost at the + # end. Compare also gh-12721. The most bits can get lost for the + # smallest denormal: + smallest_value = np.uint16(1).view(np.float16).astype(float_t) + assert smallest_value == 2**-24 + + # Will be rounded to zero based on round to even rule: + rounded_to_zero = smallest_value / float_t(2) + assert rounded_to_zero.astype(np.float16) == 0 + + # The significand will be all 0 for the float_t, test that we do not + # lose the lower ones of these: + for i in range(bits): + # slightly increasing the value should make it round up: + larger_pattern = rounded_to_zero.view(uint_t) | uint_t(1 << i) + larger_value = larger_pattern.view(float_t) + assert larger_value.astype(np.float16) == smallest_value + + def test_nans_infs(self): + with np.errstate(all='ignore'): + # Check some of the ufuncs + assert_equal(np.isnan(self.all_f16), np.isnan(self.all_f32)) + assert_equal(np.isinf(self.all_f16), np.isinf(self.all_f32)) + assert_equal(np.isfinite(self.all_f16), np.isfinite(self.all_f32)) + assert_equal(np.signbit(self.all_f16), np.signbit(self.all_f32)) + assert_equal(np.spacing(float16(65504)), np.inf) + + # Check comparisons of all values with NaN + nan = float16(np.nan) + + assert_(not (self.all_f16 == nan).any()) + assert_(not (nan == self.all_f16).any()) + + assert_((self.all_f16 != nan).all()) + assert_((nan != self.all_f16).all()) + + assert_(not (self.all_f16 < nan).any()) + assert_(not (nan < self.all_f16).any()) + + assert_(not (self.all_f16 <= nan).any()) + assert_(not (nan <= self.all_f16).any()) + + assert_(not (self.all_f16 > nan).any()) + assert_(not (nan > self.all_f16).any()) + + assert_(not (self.all_f16 >= nan).any()) + assert_(not (nan >= self.all_f16).any()) + + def test_half_values(self): + """Confirms a small number of known half values""" + a = np.array([1.0, -1.0, + 2.0, -2.0, + 0.0999755859375, 0.333251953125, # 1/10, 1/3 + 65504, -65504, # Maximum magnitude + 2.0**(-14), -2.0**(-14), # Minimum normal + 2.0**(-24), -2.0**(-24), # Minimum subnormal + 0, -1/1e1000, # Signed zeros + np.inf, -np.inf]) + b = np.array([0x3c00, 0xbc00, + 0x4000, 0xc000, + 0x2e66, 0x3555, + 0x7bff, 0xfbff, + 0x0400, 0x8400, + 0x0001, 0x8001, + 0x0000, 0x8000, + 0x7c00, 0xfc00], dtype=uint16) + b.dtype = float16 + assert_equal(a, b) + + def test_half_rounding(self): + """Checks that rounding when converting to half is correct""" + a = np.array([2.0**-25 + 2.0**-35, # Rounds to minimum subnormal + 2.0**-25, # Underflows to zero (nearest even mode) + 2.0**-26, # Underflows to zero + 1.0+2.0**-11 + 2.0**-16, # rounds to 1.0+2**(-10) + 1.0+2.0**-11, # rounds to 1.0 (nearest even mode) + 1.0+2.0**-12, # rounds to 1.0 + 65519, # rounds to 65504 + 65520], # rounds to inf + dtype=float64) + rounded = [2.0**-24, + 0.0, + 0.0, + 1.0+2.0**(-10), + 1.0, + 1.0, + 65504, + np.inf] + + # Check float64->float16 rounding + b = np.array(a, dtype=float16) + assert_equal(b, rounded) + + # Check float32->float16 rounding + a = np.array(a, dtype=float32) + b = np.array(a, dtype=float16) + assert_equal(b, rounded) + + def test_half_correctness(self): + """Take every finite float16, and check the casting functions with + a manual conversion.""" + + # Create an array of all finite float16s + a_bits = self.finite_f16.view(dtype=uint16) + + # Convert to 64-bit float manually + a_sgn = (-1.0)**((a_bits & 0x8000) >> 15) + a_exp = np.array((a_bits & 0x7c00) >> 10, dtype=np.int32) - 15 + a_man = (a_bits & 0x03ff) * 2.0**(-10) + # Implicit bit of normalized floats + a_man[a_exp != -15] += 1 + # Denormalized exponent is -14 + a_exp[a_exp == -15] = -14 + + a_manual = a_sgn * a_man * 2.0**a_exp + + a32_fail = np.nonzero(self.finite_f32 != a_manual)[0] + if len(a32_fail) != 0: + bad_index = a32_fail[0] + assert_equal(self.finite_f32, a_manual, + "First non-equal is half value %x -> %g != %g" % + (self.finite_f16[bad_index], + self.finite_f32[bad_index], + a_manual[bad_index])) + + a64_fail = np.nonzero(self.finite_f64 != a_manual)[0] + if len(a64_fail) != 0: + bad_index = a64_fail[0] + assert_equal(self.finite_f64, a_manual, + "First non-equal is half value %x -> %g != %g" % + (self.finite_f16[bad_index], + self.finite_f64[bad_index], + a_manual[bad_index])) + + def test_half_ordering(self): + """Make sure comparisons are working right""" + + # All non-NaN float16 values in reverse order + a = self.nonan_f16[::-1].copy() + + # 32-bit float copy + b = np.array(a, dtype=float32) + + # Should sort the same + a.sort() + b.sort() + assert_equal(a, b) + + # Comparisons should work + assert_((a[:-1] <= a[1:]).all()) + assert_(not (a[:-1] > a[1:]).any()) + assert_((a[1:] >= a[:-1]).all()) + assert_(not (a[1:] < a[:-1]).any()) + # All != except for +/-0 + assert_equal(np.nonzero(a[:-1] < a[1:])[0].size, a.size-2) + assert_equal(np.nonzero(a[1:] > a[:-1])[0].size, a.size-2) + + def test_half_funcs(self): + """Test the various ArrFuncs""" + + # fill + assert_equal(np.arange(10, dtype=float16), + np.arange(10, dtype=float32)) + + # fillwithscalar + a = np.zeros((5,), dtype=float16) + a.fill(1) + assert_equal(a, np.ones((5,), dtype=float16)) + + # nonzero and copyswap + a = np.array([0, 0, -1, -1/1e20, 0, 2.0**-24, 7.629e-6], dtype=float16) + assert_equal(a.nonzero()[0], + [2, 5, 6]) + a = a.byteswap().newbyteorder() + assert_equal(a.nonzero()[0], + [2, 5, 6]) + + # dot + a = np.arange(0, 10, 0.5, dtype=float16) + b = np.ones((20,), dtype=float16) + assert_equal(np.dot(a, b), + 95) + + # argmax + a = np.array([0, -np.inf, -2, 0.5, 12.55, 7.3, 2.1, 12.4], dtype=float16) + assert_equal(a.argmax(), + 4) + a = np.array([0, -np.inf, -2, np.inf, 12.55, np.nan, 2.1, 12.4], dtype=float16) + assert_equal(a.argmax(), + 5) + + # getitem + a = np.arange(10, dtype=float16) + for i in range(10): + assert_equal(a.item(i), i) + + def test_spacing_nextafter(self): + """Test np.spacing and np.nextafter""" + # All non-negative finite #'s + a = np.arange(0x7c00, dtype=uint16) + hinf = np.array((np.inf,), dtype=float16) + a_f16 = a.view(dtype=float16) + + assert_equal(np.spacing(a_f16[:-1]), a_f16[1:]-a_f16[:-1]) + + assert_equal(np.nextafter(a_f16[:-1], hinf), a_f16[1:]) + assert_equal(np.nextafter(a_f16[0], -hinf), -a_f16[1]) + assert_equal(np.nextafter(a_f16[1:], -hinf), a_f16[:-1]) + + # switch to negatives + a |= 0x8000 + + assert_equal(np.spacing(a_f16[0]), np.spacing(a_f16[1])) + assert_equal(np.spacing(a_f16[1:]), a_f16[:-1]-a_f16[1:]) + + assert_equal(np.nextafter(a_f16[0], hinf), -a_f16[1]) + assert_equal(np.nextafter(a_f16[1:], hinf), a_f16[:-1]) + assert_equal(np.nextafter(a_f16[:-1], -hinf), a_f16[1:]) + + def test_half_ufuncs(self): + """Test the various ufuncs""" + + a = np.array([0, 1, 2, 4, 2], dtype=float16) + b = np.array([-2, 5, 1, 4, 3], dtype=float16) + c = np.array([0, -1, -np.inf, np.nan, 6], dtype=float16) + + assert_equal(np.add(a, b), [-2, 6, 3, 8, 5]) + assert_equal(np.subtract(a, b), [2, -4, 1, 0, -1]) + assert_equal(np.multiply(a, b), [0, 5, 2, 16, 6]) + assert_equal(np.divide(a, b), [0, 0.199951171875, 2, 1, 0.66650390625]) + + assert_equal(np.equal(a, b), [False, False, False, True, False]) + assert_equal(np.not_equal(a, b), [True, True, True, False, True]) + assert_equal(np.less(a, b), [False, True, False, False, True]) + assert_equal(np.less_equal(a, b), [False, True, False, True, True]) + assert_equal(np.greater(a, b), [True, False, True, False, False]) + assert_equal(np.greater_equal(a, b), [True, False, True, True, False]) + assert_equal(np.logical_and(a, b), [False, True, True, True, True]) + assert_equal(np.logical_or(a, b), [True, True, True, True, True]) + assert_equal(np.logical_xor(a, b), [True, False, False, False, False]) + assert_equal(np.logical_not(a), [True, False, False, False, False]) + + assert_equal(np.isnan(c), [False, False, False, True, False]) + assert_equal(np.isinf(c), [False, False, True, False, False]) + assert_equal(np.isfinite(c), [True, True, False, False, True]) + assert_equal(np.signbit(b), [True, False, False, False, False]) + + assert_equal(np.copysign(b, a), [2, 5, 1, 4, 3]) + + assert_equal(np.maximum(a, b), [0, 5, 2, 4, 3]) + + x = np.maximum(b, c) + assert_(np.isnan(x[3])) + x[3] = 0 + assert_equal(x, [0, 5, 1, 0, 6]) + + assert_equal(np.minimum(a, b), [-2, 1, 1, 4, 2]) + + x = np.minimum(b, c) + assert_(np.isnan(x[3])) + x[3] = 0 + assert_equal(x, [-2, -1, -np.inf, 0, 3]) + + assert_equal(np.fmax(a, b), [0, 5, 2, 4, 3]) + assert_equal(np.fmax(b, c), [0, 5, 1, 4, 6]) + assert_equal(np.fmin(a, b), [-2, 1, 1, 4, 2]) + assert_equal(np.fmin(b, c), [-2, -1, -np.inf, 4, 3]) + + assert_equal(np.floor_divide(a, b), [0, 0, 2, 1, 0]) + assert_equal(np.remainder(a, b), [0, 1, 0, 0, 2]) + assert_equal(np.divmod(a, b), ([0, 0, 2, 1, 0], [0, 1, 0, 0, 2])) + assert_equal(np.square(b), [4, 25, 1, 16, 9]) + assert_equal(np.reciprocal(b), [-0.5, 0.199951171875, 1, 0.25, 0.333251953125]) + assert_equal(np.ones_like(b), [1, 1, 1, 1, 1]) + assert_equal(np.conjugate(b), b) + assert_equal(np.absolute(b), [2, 5, 1, 4, 3]) + assert_equal(np.negative(b), [2, -5, -1, -4, -3]) + assert_equal(np.positive(b), b) + assert_equal(np.sign(b), [-1, 1, 1, 1, 1]) + assert_equal(np.modf(b), ([0, 0, 0, 0, 0], b)) + assert_equal(np.frexp(b), ([-0.5, 0.625, 0.5, 0.5, 0.75], [2, 3, 1, 3, 2])) + assert_equal(np.ldexp(b, [0, 1, 2, 4, 2]), [-2, 10, 4, 64, 12]) + + def test_half_coercion(self): + """Test that half gets coerced properly with the other types""" + a16 = np.array((1,), dtype=float16) + a32 = np.array((1,), dtype=float32) + b16 = float16(1) + b32 = float32(1) + + assert_equal(np.power(a16, 2).dtype, float16) + assert_equal(np.power(a16, 2.0).dtype, float16) + assert_equal(np.power(a16, b16).dtype, float16) + assert_equal(np.power(a16, b32).dtype, float16) + assert_equal(np.power(a16, a16).dtype, float16) + assert_equal(np.power(a16, a32).dtype, float32) + + assert_equal(np.power(b16, 2).dtype, float64) + assert_equal(np.power(b16, 2.0).dtype, float64) + assert_equal(np.power(b16, b16).dtype, float16) + assert_equal(np.power(b16, b32).dtype, float32) + assert_equal(np.power(b16, a16).dtype, float16) + assert_equal(np.power(b16, a32).dtype, float32) + + assert_equal(np.power(a32, a16).dtype, float32) + assert_equal(np.power(a32, b16).dtype, float32) + assert_equal(np.power(b32, a16).dtype, float16) + assert_equal(np.power(b32, b16).dtype, float32) + + @pytest.mark.skipif(platform.machine() == "armv5tel", + reason="See gh-413.") + def test_half_fpe(self): + with np.errstate(all='raise'): + sx16 = np.array((1e-4,), dtype=float16) + bx16 = np.array((1e4,), dtype=float16) + sy16 = float16(1e-4) + by16 = float16(1e4) + + # Underflow errors + assert_raises_fpe('underflow', lambda a, b:a*b, sx16, sx16) + assert_raises_fpe('underflow', lambda a, b:a*b, sx16, sy16) + assert_raises_fpe('underflow', lambda a, b:a*b, sy16, sx16) + assert_raises_fpe('underflow', lambda a, b:a*b, sy16, sy16) + assert_raises_fpe('underflow', lambda a, b:a/b, sx16, bx16) + assert_raises_fpe('underflow', lambda a, b:a/b, sx16, by16) + assert_raises_fpe('underflow', lambda a, b:a/b, sy16, bx16) + assert_raises_fpe('underflow', lambda a, b:a/b, sy16, by16) + assert_raises_fpe('underflow', lambda a, b:a/b, + float16(2.**-14), float16(2**11)) + assert_raises_fpe('underflow', lambda a, b:a/b, + float16(-2.**-14), float16(2**11)) + assert_raises_fpe('underflow', lambda a, b:a/b, + float16(2.**-14+2**-24), float16(2)) + assert_raises_fpe('underflow', lambda a, b:a/b, + float16(-2.**-14-2**-24), float16(2)) + assert_raises_fpe('underflow', lambda a, b:a/b, + float16(2.**-14+2**-23), float16(4)) + + # Overflow errors + assert_raises_fpe('overflow', lambda a, b:a*b, bx16, bx16) + assert_raises_fpe('overflow', lambda a, b:a*b, bx16, by16) + assert_raises_fpe('overflow', lambda a, b:a*b, by16, bx16) + assert_raises_fpe('overflow', lambda a, b:a*b, by16, by16) + assert_raises_fpe('overflow', lambda a, b:a/b, bx16, sx16) + assert_raises_fpe('overflow', lambda a, b:a/b, bx16, sy16) + assert_raises_fpe('overflow', lambda a, b:a/b, by16, sx16) + assert_raises_fpe('overflow', lambda a, b:a/b, by16, sy16) + assert_raises_fpe('overflow', lambda a, b:a+b, + float16(65504), float16(17)) + assert_raises_fpe('overflow', lambda a, b:a-b, + float16(-65504), float16(17)) + assert_raises_fpe('overflow', np.nextafter, float16(65504), float16(np.inf)) + assert_raises_fpe('overflow', np.nextafter, float16(-65504), float16(-np.inf)) + assert_raises_fpe('overflow', np.spacing, float16(65504)) + + # Invalid value errors + assert_raises_fpe('invalid', np.divide, float16(np.inf), float16(np.inf)) + assert_raises_fpe('invalid', np.spacing, float16(np.inf)) + assert_raises_fpe('invalid', np.spacing, float16(np.nan)) + assert_raises_fpe('invalid', np.nextafter, float16(np.inf), float16(0)) + assert_raises_fpe('invalid', np.nextafter, float16(-np.inf), float16(0)) + assert_raises_fpe('invalid', np.nextafter, float16(0), float16(np.nan)) + + # These should not raise + float16(65472)+float16(32) + float16(2**-13)/float16(2) + float16(2**-14)/float16(2**10) + np.spacing(float16(-65504)) + np.nextafter(float16(65504), float16(-np.inf)) + np.nextafter(float16(-65504), float16(np.inf)) + float16(2**-14)/float16(2**10) + float16(-2**-14)/float16(2**10) + float16(2**-14+2**-23)/float16(2) + float16(-2**-14-2**-23)/float16(2) + + def test_half_array_interface(self): + """Test that half is compatible with __array_interface__""" + class Dummy: + pass + + a = np.ones((1,), dtype=float16) + b = Dummy() + b.__array_interface__ = a.__array_interface__ + c = np.array(b) + assert_(c.dtype == float16) + assert_equal(a, c) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_indexerrors.py b/contrib/python/numpy/py2/numpy/core/tests/test_indexerrors.py new file mode 100644 index 0000000000..63b43c473c --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_indexerrors.py @@ -0,0 +1,123 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import assert_raises + +class TestIndexErrors(object): + '''Tests to exercise indexerrors not covered by other tests.''' + + def test_arraytypes_fasttake(self): + 'take from a 0-length dimension' + x = np.empty((2, 3, 0, 4)) + assert_raises(IndexError, x.take, [0], axis=2) + assert_raises(IndexError, x.take, [1], axis=2) + assert_raises(IndexError, x.take, [0], axis=2, mode='wrap') + assert_raises(IndexError, x.take, [0], axis=2, mode='clip') + + def test_take_from_object(self): + # Check exception taking from object array + d = np.zeros(5, dtype=object) + assert_raises(IndexError, d.take, [6]) + + # Check exception taking from 0-d array + d = np.zeros((5, 0), dtype=object) + assert_raises(IndexError, d.take, [1], axis=1) + assert_raises(IndexError, d.take, [0], axis=1) + assert_raises(IndexError, d.take, [0]) + assert_raises(IndexError, d.take, [0], mode='wrap') + assert_raises(IndexError, d.take, [0], mode='clip') + + def test_multiindex_exceptions(self): + a = np.empty(5, dtype=object) + assert_raises(IndexError, a.item, 20) + a = np.empty((5, 0), dtype=object) + assert_raises(IndexError, a.item, (0, 0)) + + a = np.empty(5, dtype=object) + assert_raises(IndexError, a.itemset, 20, 0) + a = np.empty((5, 0), dtype=object) + assert_raises(IndexError, a.itemset, (0, 0), 0) + + def test_put_exceptions(self): + a = np.zeros((5, 5)) + assert_raises(IndexError, a.put, 100, 0) + a = np.zeros((5, 5), dtype=object) + assert_raises(IndexError, a.put, 100, 0) + a = np.zeros((5, 5, 0)) + assert_raises(IndexError, a.put, 100, 0) + a = np.zeros((5, 5, 0), dtype=object) + assert_raises(IndexError, a.put, 100, 0) + + def test_iterators_exceptions(self): + "cases in iterators.c" + def assign(obj, ind, val): + obj[ind] = val + + a = np.zeros([1, 2, 3]) + assert_raises(IndexError, lambda: a[0, 5, None, 2]) + assert_raises(IndexError, lambda: a[0, 5, 0, 2]) + assert_raises(IndexError, lambda: assign(a, (0, 5, None, 2), 1)) + assert_raises(IndexError, lambda: assign(a, (0, 5, 0, 2), 1)) + + a = np.zeros([1, 0, 3]) + assert_raises(IndexError, lambda: a[0, 0, None, 2]) + assert_raises(IndexError, lambda: assign(a, (0, 0, None, 2), 1)) + + a = np.zeros([1, 2, 3]) + assert_raises(IndexError, lambda: a.flat[10]) + assert_raises(IndexError, lambda: assign(a.flat, 10, 5)) + a = np.zeros([1, 0, 3]) + assert_raises(IndexError, lambda: a.flat[10]) + assert_raises(IndexError, lambda: assign(a.flat, 10, 5)) + + a = np.zeros([1, 2, 3]) + assert_raises(IndexError, lambda: a.flat[np.array(10)]) + assert_raises(IndexError, lambda: assign(a.flat, np.array(10), 5)) + a = np.zeros([1, 0, 3]) + assert_raises(IndexError, lambda: a.flat[np.array(10)]) + assert_raises(IndexError, lambda: assign(a.flat, np.array(10), 5)) + + a = np.zeros([1, 2, 3]) + assert_raises(IndexError, lambda: a.flat[np.array([10])]) + assert_raises(IndexError, lambda: assign(a.flat, np.array([10]), 5)) + a = np.zeros([1, 0, 3]) + assert_raises(IndexError, lambda: a.flat[np.array([10])]) + assert_raises(IndexError, lambda: assign(a.flat, np.array([10]), 5)) + + def test_mapping(self): + "cases from mapping.c" + + def assign(obj, ind, val): + obj[ind] = val + + a = np.zeros((0, 10)) + assert_raises(IndexError, lambda: a[12]) + + a = np.zeros((3, 5)) + assert_raises(IndexError, lambda: a[(10, 20)]) + assert_raises(IndexError, lambda: assign(a, (10, 20), 1)) + a = np.zeros((3, 0)) + assert_raises(IndexError, lambda: a[(1, 0)]) + assert_raises(IndexError, lambda: assign(a, (1, 0), 1)) + + a = np.zeros((10,)) + assert_raises(IndexError, lambda: assign(a, 10, 1)) + a = np.zeros((0,)) + assert_raises(IndexError, lambda: assign(a, 10, 1)) + + a = np.zeros((3, 5)) + assert_raises(IndexError, lambda: a[(1, [1, 20])]) + assert_raises(IndexError, lambda: assign(a, (1, [1, 20]), 1)) + a = np.zeros((3, 0)) + assert_raises(IndexError, lambda: a[(1, [0, 1])]) + assert_raises(IndexError, lambda: assign(a, (1, [0, 1]), 1)) + + def test_methods(self): + "cases from methods.c" + + a = np.zeros((3, 3)) + assert_raises(IndexError, lambda: a.item(100)) + assert_raises(IndexError, lambda: a.itemset(100, 1)) + a = np.zeros((0, 3)) + assert_raises(IndexError, lambda: a.item(100)) + assert_raises(IndexError, lambda: a.itemset(100, 1)) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_indexing.py b/contrib/python/numpy/py2/numpy/core/tests/test_indexing.py new file mode 100644 index 0000000000..f7485c3f7c --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_indexing.py @@ -0,0 +1,1334 @@ +from __future__ import division, absolute_import, print_function + +import sys +import warnings +import functools +import operator +import pytest + +import numpy as np +from numpy.core._multiarray_tests import array_indexing +from itertools import product +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_array_equal, assert_warns, + HAS_REFCOUNT, suppress_warnings, + ) + + +class TestIndexing(object): + def test_index_no_floats(self): + a = np.array([[[5]]]) + + assert_raises(IndexError, lambda: a[0.0]) + assert_raises(IndexError, lambda: a[0, 0.0]) + assert_raises(IndexError, lambda: a[0.0, 0]) + assert_raises(IndexError, lambda: a[0.0,:]) + assert_raises(IndexError, lambda: a[:, 0.0]) + assert_raises(IndexError, lambda: a[:, 0.0,:]) + assert_raises(IndexError, lambda: a[0.0,:,:]) + assert_raises(IndexError, lambda: a[0, 0, 0.0]) + assert_raises(IndexError, lambda: a[0.0, 0, 0]) + assert_raises(IndexError, lambda: a[0, 0.0, 0]) + assert_raises(IndexError, lambda: a[-1.4]) + assert_raises(IndexError, lambda: a[0, -1.4]) + assert_raises(IndexError, lambda: a[-1.4, 0]) + assert_raises(IndexError, lambda: a[-1.4,:]) + assert_raises(IndexError, lambda: a[:, -1.4]) + assert_raises(IndexError, lambda: a[:, -1.4,:]) + assert_raises(IndexError, lambda: a[-1.4,:,:]) + assert_raises(IndexError, lambda: a[0, 0, -1.4]) + assert_raises(IndexError, lambda: a[-1.4, 0, 0]) + assert_raises(IndexError, lambda: a[0, -1.4, 0]) + assert_raises(IndexError, lambda: a[0.0:, 0.0]) + assert_raises(IndexError, lambda: a[0.0:, 0.0,:]) + + def test_slicing_no_floats(self): + a = np.array([[5]]) + + # start as float. + assert_raises(TypeError, lambda: a[0.0:]) + assert_raises(TypeError, lambda: a[0:, 0.0:2]) + assert_raises(TypeError, lambda: a[0.0::2, :0]) + assert_raises(TypeError, lambda: a[0.0:1:2,:]) + assert_raises(TypeError, lambda: a[:, 0.0:]) + # stop as float. + assert_raises(TypeError, lambda: a[:0.0]) + assert_raises(TypeError, lambda: a[:0, 1:2.0]) + assert_raises(TypeError, lambda: a[:0.0:2, :0]) + assert_raises(TypeError, lambda: a[:0.0,:]) + assert_raises(TypeError, lambda: a[:, 0:4.0:2]) + # step as float. + assert_raises(TypeError, lambda: a[::1.0]) + assert_raises(TypeError, lambda: a[0:, :2:2.0]) + assert_raises(TypeError, lambda: a[1::4.0, :0]) + assert_raises(TypeError, lambda: a[::5.0,:]) + assert_raises(TypeError, lambda: a[:, 0:4:2.0]) + # mixed. + assert_raises(TypeError, lambda: a[1.0:2:2.0]) + assert_raises(TypeError, lambda: a[1.0::2.0]) + assert_raises(TypeError, lambda: a[0:, :2.0:2.0]) + assert_raises(TypeError, lambda: a[1.0:1:4.0, :0]) + assert_raises(TypeError, lambda: a[1.0:5.0:5.0,:]) + assert_raises(TypeError, lambda: a[:, 0.4:4.0:2.0]) + # should still get the DeprecationWarning if step = 0. + assert_raises(TypeError, lambda: a[::0.0]) + + def test_index_no_array_to_index(self): + # No non-scalar arrays. + a = np.array([[[1]]]) + + assert_raises(TypeError, lambda: a[a:a:a]) + + def test_none_index(self): + # `None` index adds newaxis + a = np.array([1, 2, 3]) + assert_equal(a[None], a[np.newaxis]) + assert_equal(a[None].ndim, a.ndim + 1) + + def test_empty_tuple_index(self): + # Empty tuple index creates a view + a = np.array([1, 2, 3]) + assert_equal(a[()], a) + assert_(a[()].base is a) + a = np.array(0) + assert_(isinstance(a[()], np.int_)) + + def test_void_scalar_empty_tuple(self): + s = np.zeros((), dtype='V4') + assert_equal(s[()].dtype, s.dtype) + assert_equal(s[()], s) + assert_equal(type(s[...]), np.ndarray) + + def test_same_kind_index_casting(self): + # Indexes should be cast with same-kind and not safe, even if that + # is somewhat unsafe. So test various different code paths. + index = np.arange(5) + u_index = index.astype(np.uintp) + arr = np.arange(10) + + assert_array_equal(arr[index], arr[u_index]) + arr[u_index] = np.arange(5) + assert_array_equal(arr, np.arange(10)) + + arr = np.arange(10).reshape(5, 2) + assert_array_equal(arr[index], arr[u_index]) + + arr[u_index] = np.arange(5)[:,None] + assert_array_equal(arr, np.arange(5)[:,None].repeat(2, axis=1)) + + arr = np.arange(25).reshape(5, 5) + assert_array_equal(arr[u_index, u_index], arr[index, index]) + + def test_empty_fancy_index(self): + # Empty list index creates an empty array + # with the same dtype (but with weird shape) + a = np.array([1, 2, 3]) + assert_equal(a[[]], []) + assert_equal(a[[]].dtype, a.dtype) + + b = np.array([], dtype=np.intp) + assert_equal(a[[]], []) + assert_equal(a[[]].dtype, a.dtype) + + b = np.array([]) + assert_raises(IndexError, a.__getitem__, b) + + def test_ellipsis_index(self): + a = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + assert_(a[...] is not a) + assert_equal(a[...], a) + # `a[...]` was `a` in numpy <1.9. + assert_(a[...].base is a) + + # Slicing with ellipsis can skip an + # arbitrary number of dimensions + assert_equal(a[0, ...], a[0]) + assert_equal(a[0, ...], a[0,:]) + assert_equal(a[..., 0], a[:, 0]) + + # Slicing with ellipsis always results + # in an array, not a scalar + assert_equal(a[0, ..., 1], np.array(2)) + + # Assignment with `(Ellipsis,)` on 0-d arrays + b = np.array(1) + b[(Ellipsis,)] = 2 + assert_equal(b, 2) + + def test_single_int_index(self): + # Single integer index selects one row + a = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + + assert_equal(a[0], [1, 2, 3]) + assert_equal(a[-1], [7, 8, 9]) + + # Index out of bounds produces IndexError + assert_raises(IndexError, a.__getitem__, 1 << 30) + # Index overflow produces IndexError + assert_raises(IndexError, a.__getitem__, 1 << 64) + + def test_single_bool_index(self): + # Single boolean index + a = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + + assert_equal(a[np.array(True)], a[None]) + assert_equal(a[np.array(False)], a[None][0:0]) + + def test_boolean_shape_mismatch(self): + arr = np.ones((5, 4, 3)) + + index = np.array([True]) + assert_raises(IndexError, arr.__getitem__, index) + + index = np.array([False] * 6) + assert_raises(IndexError, arr.__getitem__, index) + + index = np.zeros((4, 4), dtype=bool) + assert_raises(IndexError, arr.__getitem__, index) + + assert_raises(IndexError, arr.__getitem__, (slice(None), index)) + + def test_boolean_indexing_onedim(self): + # Indexing a 2-dimensional array with + # boolean array of length one + a = np.array([[ 0., 0., 0.]]) + b = np.array([ True], dtype=bool) + assert_equal(a[b], a) + # boolean assignment + a[b] = 1. + assert_equal(a, [[1., 1., 1.]]) + + def test_boolean_assignment_value_mismatch(self): + # A boolean assignment should fail when the shape of the values + # cannot be broadcast to the subscription. (see also gh-3458) + a = np.arange(4) + + def f(a, v): + a[a > -1] = v + + assert_raises(ValueError, f, a, []) + assert_raises(ValueError, f, a, [1, 2, 3]) + assert_raises(ValueError, f, a[:1], [1, 2, 3]) + + def test_boolean_assignment_needs_api(self): + # See also gh-7666 + # This caused a segfault on Python 2 due to the GIL not being + # held when the iterator does not need it, but the transfer function + # does + arr = np.zeros(1000) + indx = np.zeros(1000, dtype=bool) + indx[:100] = True + arr[indx] = np.ones(100, dtype=object) + + expected = np.zeros(1000) + expected[:100] = 1 + assert_array_equal(arr, expected) + + def test_boolean_indexing_twodim(self): + # Indexing a 2-dimensional array with + # 2-dimensional boolean array + a = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + b = np.array([[ True, False, True], + [False, True, False], + [ True, False, True]]) + assert_equal(a[b], [1, 3, 5, 7, 9]) + assert_equal(a[b[1]], [[4, 5, 6]]) + assert_equal(a[b[0]], a[b[2]]) + + # boolean assignment + a[b] = 0 + assert_equal(a, [[0, 2, 0], + [4, 0, 6], + [0, 8, 0]]) + + def test_boolean_indexing_list(self): + # Regression test for #13715. It's a use-after-free bug which the + # test won't directly catch, but it will show up in valgrind. + a = np.array([1, 2, 3]) + b = [True, False, True] + # Two variants of the test because the first takes a fast path + assert_equal(a[b], [1, 3]) + assert_equal(a[None, b], [[1, 3]]) + + def test_reverse_strides_and_subspace_bufferinit(self): + # This tests that the strides are not reversed for simple and + # subspace fancy indexing. + a = np.ones(5) + b = np.zeros(5, dtype=np.intp)[::-1] + c = np.arange(5)[::-1] + + a[b] = c + # If the strides are not reversed, the 0 in the arange comes last. + assert_equal(a[0], 0) + + # This also tests that the subspace buffer is initialized: + a = np.ones((5, 2)) + c = np.arange(10).reshape(5, 2)[::-1] + a[b, :] = c + assert_equal(a[0], [0, 1]) + + def test_reversed_strides_result_allocation(self): + # Test a bug when calculating the output strides for a result array + # when the subspace size was 1 (and test other cases as well) + a = np.arange(10)[:, None] + i = np.arange(10)[::-1] + assert_array_equal(a[i], a[i.copy('C')]) + + a = np.arange(20).reshape(-1, 2) + + def test_uncontiguous_subspace_assignment(self): + # During development there was a bug activating a skip logic + # based on ndim instead of size. + a = np.full((3, 4, 2), -1) + b = np.full((3, 4, 2), -1) + + a[[0, 1]] = np.arange(2 * 4 * 2).reshape(2, 4, 2).T + b[[0, 1]] = np.arange(2 * 4 * 2).reshape(2, 4, 2).T.copy() + + assert_equal(a, b) + + def test_too_many_fancy_indices_special_case(self): + # Just documents behaviour, this is a small limitation. + a = np.ones((1,) * 32) # 32 is NPY_MAXDIMS + assert_raises(IndexError, a.__getitem__, (np.array([0]),) * 32) + + def test_scalar_array_bool(self): + # NumPy bools can be used as boolean index (python ones as of yet not) + a = np.array(1) + assert_equal(a[np.bool_(True)], a[np.array(True)]) + assert_equal(a[np.bool_(False)], a[np.array(False)]) + + # After deprecating bools as integers: + #a = np.array([0,1,2]) + #assert_equal(a[True, :], a[None, :]) + #assert_equal(a[:, True], a[:, None]) + # + #assert_(not np.may_share_memory(a, a[True, :])) + + def test_everything_returns_views(self): + # Before `...` would return a itself. + a = np.arange(5) + + assert_(a is not a[()]) + assert_(a is not a[...]) + assert_(a is not a[:]) + + def test_broaderrors_indexing(self): + a = np.zeros((5, 5)) + assert_raises(IndexError, a.__getitem__, ([0, 1], [0, 1, 2])) + assert_raises(IndexError, a.__setitem__, ([0, 1], [0, 1, 2]), 0) + + def test_trivial_fancy_out_of_bounds(self): + a = np.zeros(5) + ind = np.ones(20, dtype=np.intp) + ind[-1] = 10 + assert_raises(IndexError, a.__getitem__, ind) + assert_raises(IndexError, a.__setitem__, ind, 0) + ind = np.ones(20, dtype=np.intp) + ind[0] = 11 + assert_raises(IndexError, a.__getitem__, ind) + assert_raises(IndexError, a.__setitem__, ind, 0) + + def test_trivial_fancy_not_possible(self): + # Test that the fast path for trivial assignment is not incorrectly + # used when the index is not contiguous or 1D, see also gh-11467. + a = np.arange(6) + idx = np.arange(6, dtype=np.intp).reshape(2, 1, 3)[:, :, 0] + assert_array_equal(a[idx], idx) + + # this case must not go into the fast path, note that idx is + # a non-contiuguous none 1D array here. + a[idx] = -1 + res = np.arange(6) + res[0] = -1 + res[3] = -1 + assert_array_equal(a, res) + + def test_nonbaseclass_values(self): + class SubClass(np.ndarray): + def __array_finalize__(self, old): + # Have array finalize do funny things + self.fill(99) + + a = np.zeros((5, 5)) + s = a.copy().view(type=SubClass) + s.fill(1) + + a[[0, 1, 2, 3, 4], :] = s + assert_((a == 1).all()) + + # Subspace is last, so transposing might want to finalize + a[:, [0, 1, 2, 3, 4]] = s + assert_((a == 1).all()) + + a.fill(0) + a[...] = s + assert_((a == 1).all()) + + def test_subclass_writeable(self): + d = np.rec.array([('NGC1001', 11), ('NGC1002', 1.), ('NGC1003', 1.)], + dtype=[('target', 'S20'), ('V_mag', '>f4')]) + ind = np.array([False, True, True], dtype=bool) + assert_(d[ind].flags.writeable) + ind = np.array([0, 1]) + assert_(d[ind].flags.writeable) + assert_(d[...].flags.writeable) + assert_(d[0].flags.writeable) + + def test_memory_order(self): + # This is not necessary to preserve. Memory layouts for + # more complex indices are not as simple. + a = np.arange(10) + b = np.arange(10).reshape(5,2).T + assert_(a[b].flags.f_contiguous) + + # Takes a different implementation branch: + a = a.reshape(-1, 1) + assert_(a[b, 0].flags.f_contiguous) + + def test_scalar_return_type(self): + # Full scalar indices should return scalars and object + # arrays should not call PyArray_Return on their items + class Zero(object): + # The most basic valid indexing + def __index__(self): + return 0 + + z = Zero() + + class ArrayLike(object): + # Simple array, should behave like the array + def __array__(self): + return np.array(0) + + a = np.zeros(()) + assert_(isinstance(a[()], np.float_)) + a = np.zeros(1) + assert_(isinstance(a[z], np.float_)) + a = np.zeros((1, 1)) + assert_(isinstance(a[z, np.array(0)], np.float_)) + assert_(isinstance(a[z, ArrayLike()], np.float_)) + + # And object arrays do not call it too often: + b = np.array(0) + a = np.array(0, dtype=object) + a[()] = b + assert_(isinstance(a[()], np.ndarray)) + a = np.array([b, None]) + assert_(isinstance(a[z], np.ndarray)) + a = np.array([[b, None]]) + assert_(isinstance(a[z, np.array(0)], np.ndarray)) + assert_(isinstance(a[z, ArrayLike()], np.ndarray)) + + def test_small_regressions(self): + # Reference count of intp for index checks + a = np.array([0]) + if HAS_REFCOUNT: + refcount = sys.getrefcount(np.dtype(np.intp)) + # item setting always checks indices in separate function: + a[np.array([0], dtype=np.intp)] = 1 + a[np.array([0], dtype=np.uint8)] = 1 + assert_raises(IndexError, a.__setitem__, + np.array([1], dtype=np.intp), 1) + assert_raises(IndexError, a.__setitem__, + np.array([1], dtype=np.uint8), 1) + + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(np.dtype(np.intp)), refcount) + + def test_unaligned(self): + v = (np.zeros(64, dtype=np.int8) + ord('a'))[1:-7] + d = v.view(np.dtype("S8")) + # unaligned source + x = (np.zeros(16, dtype=np.int8) + ord('a'))[1:-7] + x = x.view(np.dtype("S8")) + x[...] = np.array("b" * 8, dtype="S") + b = np.arange(d.size) + #trivial + assert_equal(d[b], d) + d[b] = x + # nontrivial + # unaligned index array + b = np.zeros(d.size + 1).view(np.int8)[1:-(np.intp(0).itemsize - 1)] + b = b.view(np.intp)[:d.size] + b[...] = np.arange(d.size) + assert_equal(d[b.astype(np.int16)], d) + d[b.astype(np.int16)] = x + # boolean + d[b % 2 == 0] + d[b % 2 == 0] = x[::2] + + def test_tuple_subclass(self): + arr = np.ones((5, 5)) + + # A tuple subclass should also be an nd-index + class TupleSubclass(tuple): + pass + index = ([1], [1]) + index = TupleSubclass(index) + assert_(arr[index].shape == (1,)) + # Unlike the non nd-index: + assert_(arr[index,].shape != (1,)) + + def test_broken_sequence_not_nd_index(self): + # See gh-5063: + # If we have an object which claims to be a sequence, but fails + # on item getting, this should not be converted to an nd-index (tuple) + # If this object happens to be a valid index otherwise, it should work + # This object here is very dubious and probably bad though: + class SequenceLike(object): + def __index__(self): + return 0 + + def __len__(self): + return 1 + + def __getitem__(self, item): + raise IndexError('Not possible') + + arr = np.arange(10) + assert_array_equal(arr[SequenceLike()], arr[SequenceLike(),]) + + # also test that field indexing does not segfault + # for a similar reason, by indexing a structured array + arr = np.zeros((1,), dtype=[('f1', 'i8'), ('f2', 'i8')]) + assert_array_equal(arr[SequenceLike()], arr[SequenceLike(),]) + + def test_indexing_array_weird_strides(self): + # See also gh-6221 + # the shapes used here come from the issue and create the correct + # size for the iterator buffering size. + x = np.ones(10) + x2 = np.ones((10, 2)) + ind = np.arange(10)[:, None, None, None] + ind = np.broadcast_to(ind, (10, 55, 4, 4)) + + # single advanced index case + assert_array_equal(x[ind], x[ind.copy()]) + # higher dimensional advanced index + zind = np.zeros(4, dtype=np.intp) + assert_array_equal(x2[ind, zind], x2[ind.copy(), zind]) + + def test_indexing_array_negative_strides(self): + # From gh-8264, + # core dumps if negative strides are used in iteration + arro = np.zeros((4, 4)) + arr = arro[::-1, ::-1] + + slices = (slice(None), [0, 1, 2, 3]) + arr[slices] = 10 + assert_array_equal(arr, 10.) + +class TestFieldIndexing(object): + def test_scalar_return_type(self): + # Field access on an array should return an array, even if it + # is 0-d. + a = np.zeros((), [('a','f8')]) + assert_(isinstance(a['a'], np.ndarray)) + assert_(isinstance(a[['a']], np.ndarray)) + + +class TestBroadcastedAssignments(object): + def assign(self, a, ind, val): + a[ind] = val + return a + + def test_prepending_ones(self): + a = np.zeros((3, 2)) + + a[...] = np.ones((1, 3, 2)) + # Fancy with subspace with and without transpose + a[[0, 1, 2], :] = np.ones((1, 3, 2)) + a[:, [0, 1]] = np.ones((1, 3, 2)) + # Fancy without subspace (with broadcasting) + a[[[0], [1], [2]], [0, 1]] = np.ones((1, 3, 2)) + + def test_prepend_not_one(self): + assign = self.assign + s_ = np.s_ + a = np.zeros(5) + + # Too large and not only ones. + assert_raises(ValueError, assign, a, s_[...], np.ones((2, 1))) + assert_raises(ValueError, assign, a, s_[[1, 2, 3],], np.ones((2, 1))) + assert_raises(ValueError, assign, a, s_[[[1], [2]],], np.ones((2,2,1))) + + def test_simple_broadcasting_errors(self): + assign = self.assign + s_ = np.s_ + a = np.zeros((5, 1)) + + assert_raises(ValueError, assign, a, s_[...], np.zeros((5, 2))) + assert_raises(ValueError, assign, a, s_[...], np.zeros((5, 0))) + assert_raises(ValueError, assign, a, s_[:, [0]], np.zeros((5, 2))) + assert_raises(ValueError, assign, a, s_[:, [0]], np.zeros((5, 0))) + assert_raises(ValueError, assign, a, s_[[0], :], np.zeros((2, 1))) + + def test_index_is_larger(self): + # Simple case of fancy index broadcasting of the index. + a = np.zeros((5, 5)) + a[[[0], [1], [2]], [0, 1, 2]] = [2, 3, 4] + + assert_((a[:3, :3] == [2, 3, 4]).all()) + + def test_broadcast_subspace(self): + a = np.zeros((100, 100)) + v = np.arange(100)[:,None] + b = np.arange(100)[::-1] + a[b] = v + assert_((a[::-1] == v).all()) + + +class TestSubclasses(object): + def test_basic(self): + # Test that indexing in various ways produces SubClass instances, + # and that the base is set up correctly: the original subclass + # instance for views, and a new ndarray for advanced/boolean indexing + # where a copy was made (latter a regression test for gh-11983). + class SubClass(np.ndarray): + pass + + a = np.arange(5) + s = a.view(SubClass) + s_slice = s[:3] + assert_(type(s_slice) is SubClass) + assert_(s_slice.base is s) + assert_array_equal(s_slice, a[:3]) + + s_fancy = s[[0, 1, 2]] + assert_(type(s_fancy) is SubClass) + assert_(s_fancy.base is not s) + assert_(type(s_fancy.base) is np.ndarray) + assert_array_equal(s_fancy, a[[0, 1, 2]]) + assert_array_equal(s_fancy.base, a[[0, 1, 2]]) + + s_bool = s[s > 0] + assert_(type(s_bool) is SubClass) + assert_(s_bool.base is not s) + assert_(type(s_bool.base) is np.ndarray) + assert_array_equal(s_bool, a[a > 0]) + assert_array_equal(s_bool.base, a[a > 0]) + + def test_finalize_gets_full_info(self): + # Array finalize should be called on the filled array. + class SubClass(np.ndarray): + def __array_finalize__(self, old): + self.finalize_status = np.array(self) + self.old = old + + s = np.arange(10).view(SubClass) + new_s = s[:3] + assert_array_equal(new_s.finalize_status, new_s) + assert_array_equal(new_s.old, s) + + new_s = s[[0,1,2,3]] + assert_array_equal(new_s.finalize_status, new_s) + assert_array_equal(new_s.old, s) + + new_s = s[s > 0] + assert_array_equal(new_s.finalize_status, new_s) + assert_array_equal(new_s.old, s) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_slice_decref_getsetslice(self): + # See gh-10066, a temporary slice object should be discarted. + # This test is only really interesting on Python 2 since + # it goes through `__set/getslice__` here and can probably be + # removed. Use 0:7 to make sure it is never None:7. + class KeepIndexObject(np.ndarray): + def __getitem__(self, indx): + self.indx = indx + if indx == slice(0, 7): + raise ValueError + + def __setitem__(self, indx, val): + self.indx = indx + if indx == slice(0, 4): + raise ValueError + + k = np.array([1]).view(KeepIndexObject) + k[0:5] + assert_equal(k.indx, slice(0, 5)) + assert_equal(sys.getrefcount(k.indx), 2) + try: + k[0:7] + raise AssertionError + except ValueError: + # The exception holds a reference to the slice so clear on Py2 + if hasattr(sys, 'exc_clear'): + with suppress_warnings() as sup: + sup.filter(DeprecationWarning) + sys.exc_clear() + assert_equal(k.indx, slice(0, 7)) + assert_equal(sys.getrefcount(k.indx), 2) + + k[0:3] = 6 + assert_equal(k.indx, slice(0, 3)) + assert_equal(sys.getrefcount(k.indx), 2) + try: + k[0:4] = 2 + raise AssertionError + except ValueError: + # The exception holds a reference to the slice so clear on Py2 + if hasattr(sys, 'exc_clear'): + with suppress_warnings() as sup: + sup.filter(DeprecationWarning) + sys.exc_clear() + assert_equal(k.indx, slice(0, 4)) + assert_equal(sys.getrefcount(k.indx), 2) + + +class TestFancyIndexingCast(object): + def test_boolean_index_cast_assign(self): + # Setup the boolean index and float arrays. + shape = (8, 63) + bool_index = np.zeros(shape).astype(bool) + bool_index[0, 1] = True + zero_array = np.zeros(shape) + + # Assigning float is fine. + zero_array[bool_index] = np.array([1]) + assert_equal(zero_array[0, 1], 1) + + # Fancy indexing works, although we get a cast warning. + assert_warns(np.ComplexWarning, + zero_array.__setitem__, ([0], [1]), np.array([2 + 1j])) + assert_equal(zero_array[0, 1], 2) # No complex part + + # Cast complex to float, throwing away the imaginary portion. + assert_warns(np.ComplexWarning, + zero_array.__setitem__, bool_index, np.array([1j])) + assert_equal(zero_array[0, 1], 0) + +class TestFancyIndexingEquivalence(object): + def test_object_assign(self): + # Check that the field and object special case using copyto is active. + # The right hand side cannot be converted to an array here. + a = np.arange(5, dtype=object) + b = a.copy() + a[:3] = [1, (1,2), 3] + b[[0, 1, 2]] = [1, (1,2), 3] + assert_array_equal(a, b) + + # test same for subspace fancy indexing + b = np.arange(5, dtype=object)[None, :] + b[[0], :3] = [[1, (1,2), 3]] + assert_array_equal(a, b[0]) + + # Check that swapping of axes works. + # There was a bug that made the later assignment throw a ValueError + # do to an incorrectly transposed temporary right hand side (gh-5714) + b = b.T + b[:3, [0]] = [[1], [(1,2)], [3]] + assert_array_equal(a, b[:, 0]) + + # Another test for the memory order of the subspace + arr = np.ones((3, 4, 5), dtype=object) + # Equivalent slicing assignment for comparison + cmp_arr = arr.copy() + cmp_arr[:1, ...] = [[[1], [2], [3], [4]]] + arr[[0], ...] = [[[1], [2], [3], [4]]] + assert_array_equal(arr, cmp_arr) + arr = arr.copy('F') + arr[[0], ...] = [[[1], [2], [3], [4]]] + assert_array_equal(arr, cmp_arr) + + def test_cast_equivalence(self): + # Yes, normal slicing uses unsafe casting. + a = np.arange(5) + b = a.copy() + + a[:3] = np.array(['2', '-3', '-1']) + b[[0, 2, 1]] = np.array(['2', '-1', '-3']) + assert_array_equal(a, b) + + # test the same for subspace fancy indexing + b = np.arange(5)[None, :] + b[[0], :3] = np.array([['2', '-3', '-1']]) + assert_array_equal(a, b[0]) + + +class TestMultiIndexingAutomated(object): + """ + These tests use code to mimic the C-Code indexing for selection. + + NOTE: + + * This still lacks tests for complex item setting. + * If you change behavior of indexing, you might want to modify + these tests to try more combinations. + * Behavior was written to match numpy version 1.8. (though a + first version matched 1.7.) + * Only tuple indices are supported by the mimicking code. + (and tested as of writing this) + * Error types should match most of the time as long as there + is only one error. For multiple errors, what gets raised + will usually not be the same one. They are *not* tested. + + Update 2016-11-30: It is probably not worth maintaining this test + indefinitely and it can be dropped if maintenance becomes a burden. + + """ + + def setup(self): + self.a = np.arange(np.prod([3, 1, 5, 6])).reshape(3, 1, 5, 6) + self.b = np.empty((3, 0, 5, 6)) + self.complex_indices = ['skip', Ellipsis, + 0, + # Boolean indices, up to 3-d for some special cases of eating up + # dimensions, also need to test all False + np.array([True, False, False]), + np.array([[True, False], [False, True]]), + np.array([[[False, False], [False, False]]]), + # Some slices: + slice(-5, 5, 2), + slice(1, 1, 100), + slice(4, -1, -2), + slice(None, None, -3), + # Some Fancy indexes: + np.empty((0, 1, 1), dtype=np.intp), # empty and can be broadcast + np.array([0, 1, -2]), + np.array([[2], [0], [1]]), + np.array([[0, -1], [0, 1]], dtype=np.dtype('intp').newbyteorder()), + np.array([2, -1], dtype=np.int8), + np.zeros([1]*31, dtype=int), # trigger too large array. + np.array([0., 1.])] # invalid datatype + # Some simpler indices that still cover a bit more + self.simple_indices = [Ellipsis, None, -1, [1], np.array([True]), + 'skip'] + # Very simple ones to fill the rest: + self.fill_indices = [slice(None, None), 0] + + def _get_multi_index(self, arr, indices): + """Mimic multi dimensional indexing. + + Parameters + ---------- + arr : ndarray + Array to be indexed. + indices : tuple of index objects + + Returns + ------- + out : ndarray + An array equivalent to the indexing operation (but always a copy). + `arr[indices]` should be identical. + no_copy : bool + Whether the indexing operation requires a copy. If this is `True`, + `np.may_share_memory(arr, arr[indices])` should be `True` (with + some exceptions for scalars and possibly 0-d arrays). + + Notes + ----- + While the function may mostly match the errors of normal indexing this + is generally not the case. + """ + in_indices = list(indices) + indices = [] + # if False, this is a fancy or boolean index + no_copy = True + # number of fancy/scalar indexes that are not consecutive + num_fancy = 0 + # number of dimensions indexed by a "fancy" index + fancy_dim = 0 + # NOTE: This is a funny twist (and probably OK to change). + # The boolean array has illegal indexes, but this is + # allowed if the broadcast fancy-indices are 0-sized. + # This variable is to catch that case. + error_unless_broadcast_to_empty = False + + # We need to handle Ellipsis and make arrays from indices, also + # check if this is fancy indexing (set no_copy). + ndim = 0 + ellipsis_pos = None # define here mostly to replace all but first. + for i, indx in enumerate(in_indices): + if indx is None: + continue + if isinstance(indx, np.ndarray) and indx.dtype == bool: + no_copy = False + if indx.ndim == 0: + raise IndexError + # boolean indices can have higher dimensions + ndim += indx.ndim + fancy_dim += indx.ndim + continue + if indx is Ellipsis: + if ellipsis_pos is None: + ellipsis_pos = i + continue # do not increment ndim counter + raise IndexError + if isinstance(indx, slice): + ndim += 1 + continue + if not isinstance(indx, np.ndarray): + # This could be open for changes in numpy. + # numpy should maybe raise an error if casting to intp + # is not safe. It rejects np.array([1., 2.]) but not + # [1., 2.] as index (same for ie. np.take). + # (Note the importance of empty lists if changing this here) + try: + indx = np.array(indx, dtype=np.intp) + except ValueError: + raise IndexError + in_indices[i] = indx + elif indx.dtype.kind != 'b' and indx.dtype.kind != 'i': + raise IndexError('arrays used as indices must be of ' + 'integer (or boolean) type') + if indx.ndim != 0: + no_copy = False + ndim += 1 + fancy_dim += 1 + + if arr.ndim - ndim < 0: + # we can't take more dimensions then we have, not even for 0-d + # arrays. since a[()] makes sense, but not a[(),]. We will + # raise an error later on, unless a broadcasting error occurs + # first. + raise IndexError + + if ndim == 0 and None not in in_indices: + # Well we have no indexes or one Ellipsis. This is legal. + return arr.copy(), no_copy + + if ellipsis_pos is not None: + in_indices[ellipsis_pos:ellipsis_pos+1] = ([slice(None, None)] * + (arr.ndim - ndim)) + + for ax, indx in enumerate(in_indices): + if isinstance(indx, slice): + # convert to an index array + indx = np.arange(*indx.indices(arr.shape[ax])) + indices.append(['s', indx]) + continue + elif indx is None: + # this is like taking a slice with one element from a new axis: + indices.append(['n', np.array([0], dtype=np.intp)]) + arr = arr.reshape((arr.shape[:ax] + (1,) + arr.shape[ax:])) + continue + if isinstance(indx, np.ndarray) and indx.dtype == bool: + if indx.shape != arr.shape[ax:ax+indx.ndim]: + raise IndexError + + try: + flat_indx = np.ravel_multi_index(np.nonzero(indx), + arr.shape[ax:ax+indx.ndim], mode='raise') + except Exception: + error_unless_broadcast_to_empty = True + # fill with 0s instead, and raise error later + flat_indx = np.array([0]*indx.sum(), dtype=np.intp) + # concatenate axis into a single one: + if indx.ndim != 0: + arr = arr.reshape((arr.shape[:ax] + + (np.prod(arr.shape[ax:ax+indx.ndim]),) + + arr.shape[ax+indx.ndim:])) + indx = flat_indx + else: + # This could be changed, a 0-d boolean index can + # make sense (even outside the 0-d indexed array case) + # Note that originally this is could be interpreted as + # integer in the full integer special case. + raise IndexError + else: + # If the index is a singleton, the bounds check is done + # before the broadcasting. This used to be different in <1.9 + if indx.ndim == 0: + if indx >= arr.shape[ax] or indx < -arr.shape[ax]: + raise IndexError + if indx.ndim == 0: + # The index is a scalar. This used to be two fold, but if + # fancy indexing was active, the check was done later, + # possibly after broadcasting it away (1.7. or earlier). + # Now it is always done. + if indx >= arr.shape[ax] or indx < - arr.shape[ax]: + raise IndexError + if (len(indices) > 0 and + indices[-1][0] == 'f' and + ax != ellipsis_pos): + # NOTE: There could still have been a 0-sized Ellipsis + # between them. Checked that with ellipsis_pos. + indices[-1].append(indx) + else: + # We have a fancy index that is not after an existing one. + # NOTE: A 0-d array triggers this as well, while one may + # expect it to not trigger it, since a scalar would not be + # considered fancy indexing. + num_fancy += 1 + indices.append(['f', indx]) + + if num_fancy > 1 and not no_copy: + # We have to flush the fancy indexes left + new_indices = indices[:] + axes = list(range(arr.ndim)) + fancy_axes = [] + new_indices.insert(0, ['f']) + ni = 0 + ai = 0 + for indx in indices: + ni += 1 + if indx[0] == 'f': + new_indices[0].extend(indx[1:]) + del new_indices[ni] + ni -= 1 + for ax in range(ai, ai + len(indx[1:])): + fancy_axes.append(ax) + axes.remove(ax) + ai += len(indx) - 1 # axis we are at + indices = new_indices + # and now we need to transpose arr: + arr = arr.transpose(*(fancy_axes + axes)) + + # We only have one 'f' index now and arr is transposed accordingly. + # Now handle newaxis by reshaping... + ax = 0 + for indx in indices: + if indx[0] == 'f': + if len(indx) == 1: + continue + # First of all, reshape arr to combine fancy axes into one: + orig_shape = arr.shape + orig_slice = orig_shape[ax:ax + len(indx[1:])] + arr = arr.reshape((arr.shape[:ax] + + (np.prod(orig_slice).astype(int),) + + arr.shape[ax + len(indx[1:]):])) + + # Check if broadcasting works + res = np.broadcast(*indx[1:]) + # unfortunately the indices might be out of bounds. So check + # that first, and use mode='wrap' then. However only if + # there are any indices... + if res.size != 0: + if error_unless_broadcast_to_empty: + raise IndexError + for _indx, _size in zip(indx[1:], orig_slice): + if _indx.size == 0: + continue + if np.any(_indx >= _size) or np.any(_indx < -_size): + raise IndexError + if len(indx[1:]) == len(orig_slice): + if np.product(orig_slice) == 0: + # Work around for a crash or IndexError with 'wrap' + # in some 0-sized cases. + try: + mi = np.ravel_multi_index(indx[1:], orig_slice, + mode='raise') + except Exception: + # This happens with 0-sized orig_slice (sometimes?) + # here it is a ValueError, but indexing gives a: + raise IndexError('invalid index into 0-sized') + else: + mi = np.ravel_multi_index(indx[1:], orig_slice, + mode='wrap') + else: + # Maybe never happens... + raise ValueError + arr = arr.take(mi.ravel(), axis=ax) + try: + arr = arr.reshape((arr.shape[:ax] + + mi.shape + + arr.shape[ax+1:])) + except ValueError: + # too many dimensions, probably + raise IndexError + ax += mi.ndim + continue + + # If we are here, we have a 1D array for take: + arr = arr.take(indx[1], axis=ax) + ax += 1 + + return arr, no_copy + + def _check_multi_index(self, arr, index): + """Check a multi index item getting and simple setting. + + Parameters + ---------- + arr : ndarray + Array to be indexed, must be a reshaped arange. + index : tuple of indexing objects + Index being tested. + """ + # Test item getting + try: + mimic_get, no_copy = self._get_multi_index(arr, index) + except Exception as e: + if HAS_REFCOUNT: + prev_refcount = sys.getrefcount(arr) + assert_raises(type(e), arr.__getitem__, index) + assert_raises(type(e), arr.__setitem__, index, 0) + if HAS_REFCOUNT: + assert_equal(prev_refcount, sys.getrefcount(arr)) + return + + self._compare_index_result(arr, index, mimic_get, no_copy) + + def _check_single_index(self, arr, index): + """Check a single index item getting and simple setting. + + Parameters + ---------- + arr : ndarray + Array to be indexed, must be an arange. + index : indexing object + Index being tested. Must be a single index and not a tuple + of indexing objects (see also `_check_multi_index`). + """ + try: + mimic_get, no_copy = self._get_multi_index(arr, (index,)) + except Exception as e: + if HAS_REFCOUNT: + prev_refcount = sys.getrefcount(arr) + assert_raises(type(e), arr.__getitem__, index) + assert_raises(type(e), arr.__setitem__, index, 0) + if HAS_REFCOUNT: + assert_equal(prev_refcount, sys.getrefcount(arr)) + return + + self._compare_index_result(arr, index, mimic_get, no_copy) + + def _compare_index_result(self, arr, index, mimic_get, no_copy): + """Compare mimicked result to indexing result. + """ + arr = arr.copy() + indexed_arr = arr[index] + assert_array_equal(indexed_arr, mimic_get) + # Check if we got a view, unless its a 0-sized or 0-d array. + # (then its not a view, and that does not matter) + if indexed_arr.size != 0 and indexed_arr.ndim != 0: + assert_(np.may_share_memory(indexed_arr, arr) == no_copy) + # Check reference count of the original array + if HAS_REFCOUNT: + if no_copy: + # refcount increases by one: + assert_equal(sys.getrefcount(arr), 3) + else: + assert_equal(sys.getrefcount(arr), 2) + + # Test non-broadcast setitem: + b = arr.copy() + b[index] = mimic_get + 1000 + if b.size == 0: + return # nothing to compare here... + if no_copy and indexed_arr.ndim != 0: + # change indexed_arr in-place to manipulate original: + indexed_arr += 1000 + assert_array_equal(arr, b) + return + # Use the fact that the array is originally an arange: + arr.flat[indexed_arr.ravel()] += 1000 + assert_array_equal(arr, b) + + def test_boolean(self): + a = np.array(5) + assert_equal(a[np.array(True)], 5) + a[np.array(True)] = 1 + assert_equal(a, 1) + # NOTE: This is different from normal broadcasting, as + # arr[boolean_array] works like in a multi index. Which means + # it is aligned to the left. This is probably correct for + # consistency with arr[boolean_array,] also no broadcasting + # is done at all + self._check_multi_index( + self.a, (np.zeros_like(self.a, dtype=bool),)) + self._check_multi_index( + self.a, (np.zeros_like(self.a, dtype=bool)[..., 0],)) + self._check_multi_index( + self.a, (np.zeros_like(self.a, dtype=bool)[None, ...],)) + + def test_multidim(self): + # Automatically test combinations with complex indexes on 2nd (or 1st) + # spot and the simple ones in one other spot. + with warnings.catch_warnings(): + # This is so that np.array(True) is not accepted in a full integer + # index, when running the file separately. + warnings.filterwarnings('error', '', DeprecationWarning) + warnings.filterwarnings('error', '', np.VisibleDeprecationWarning) + + def isskip(idx): + return isinstance(idx, str) and idx == "skip" + + for simple_pos in [0, 2, 3]: + tocheck = [self.fill_indices, self.complex_indices, + self.fill_indices, self.fill_indices] + tocheck[simple_pos] = self.simple_indices + for index in product(*tocheck): + index = tuple(i for i in index if not isskip(i)) + self._check_multi_index(self.a, index) + self._check_multi_index(self.b, index) + + # Check very simple item getting: + self._check_multi_index(self.a, (0, 0, 0, 0)) + self._check_multi_index(self.b, (0, 0, 0, 0)) + # Also check (simple cases of) too many indices: + assert_raises(IndexError, self.a.__getitem__, (0, 0, 0, 0, 0)) + assert_raises(IndexError, self.a.__setitem__, (0, 0, 0, 0, 0), 0) + assert_raises(IndexError, self.a.__getitem__, (0, 0, [1], 0, 0)) + assert_raises(IndexError, self.a.__setitem__, (0, 0, [1], 0, 0), 0) + + def test_1d(self): + a = np.arange(10) + for index in self.complex_indices: + self._check_single_index(a, index) + +class TestFloatNonIntegerArgument(object): + """ + These test that ``TypeError`` is raised when you try to use + non-integers as arguments to for indexing and slicing e.g. ``a[0.0:5]`` + and ``a[0.5]``, or other functions like ``array.reshape(1., -1)``. + + """ + def test_valid_indexing(self): + # These should raise no errors. + a = np.array([[[5]]]) + + a[np.array([0])] + a[[0, 0]] + a[:, [0, 0]] + a[:, 0,:] + a[:,:,:] + + def test_valid_slicing(self): + # These should raise no errors. + a = np.array([[[5]]]) + + a[::] + a[0:] + a[:2] + a[0:2] + a[::2] + a[1::2] + a[:2:2] + a[1:2:2] + + def test_non_integer_argument_errors(self): + a = np.array([[5]]) + + assert_raises(TypeError, np.reshape, a, (1., 1., -1)) + assert_raises(TypeError, np.reshape, a, (np.array(1.), -1)) + assert_raises(TypeError, np.take, a, [0], 1.) + assert_raises(TypeError, np.take, a, [0], np.float64(1.)) + + def test_non_integer_sequence_multiplication(self): + # NumPy scalar sequence multiply should not work with non-integers + def mult(a, b): + return a * b + + assert_raises(TypeError, mult, [1], np.float_(3)) + # following should be OK + mult([1], np.int_(3)) + + def test_reduce_axis_float_index(self): + d = np.zeros((3,3,3)) + assert_raises(TypeError, np.min, d, 0.5) + assert_raises(TypeError, np.min, d, (0.5, 1)) + assert_raises(TypeError, np.min, d, (1, 2.2)) + assert_raises(TypeError, np.min, d, (.2, 1.2)) + + +class TestBooleanIndexing(object): + # Using a boolean as integer argument/indexing is an error. + def test_bool_as_int_argument_errors(self): + a = np.array([[[1]]]) + + assert_raises(TypeError, np.reshape, a, (True, -1)) + assert_raises(TypeError, np.reshape, a, (np.bool_(True), -1)) + # Note that operator.index(np.array(True)) does not work, a boolean + # array is thus also deprecated, but not with the same message: + assert_raises(TypeError, operator.index, np.array(True)) + assert_warns(DeprecationWarning, operator.index, np.True_) + assert_raises(TypeError, np.take, args=(a, [0], False)) + + def test_boolean_indexing_weirdness(self): + # Weird boolean indexing things + a = np.ones((2, 3, 4)) + a[False, True, ...].shape == (0, 2, 3, 4) + a[True, [0, 1], True, True, [1], [[2]]] == (1, 2) + assert_raises(IndexError, lambda: a[False, [0, 1], ...]) + + +class TestArrayToIndexDeprecation(object): + """Creating an an index from array not 0-D is an error. + + """ + def test_array_to_index_error(self): + # so no exception is expected. The raising is effectively tested above. + a = np.array([[[1]]]) + + assert_raises(TypeError, operator.index, np.array([1])) + assert_raises(TypeError, np.reshape, a, (a, -1)) + assert_raises(TypeError, np.take, a, [0], a) + + +class TestNonIntegerArrayLike(object): + """Tests that array_likes only valid if can safely cast to integer. + + For instance, lists give IndexError when they cannot be safely cast to + an integer. + + """ + def test_basic(self): + a = np.arange(10) + + assert_raises(IndexError, a.__getitem__, [0.5, 1.5]) + assert_raises(IndexError, a.__getitem__, (['1', '2'],)) + + # The following is valid + a.__getitem__([]) + + +class TestMultipleEllipsisError(object): + """An index can only have a single ellipsis. + + """ + def test_basic(self): + a = np.arange(10) + assert_raises(IndexError, lambda: a[..., ...]) + assert_raises(IndexError, a.__getitem__, ((Ellipsis,) * 2,)) + assert_raises(IndexError, a.__getitem__, ((Ellipsis,) * 3,)) + + +class TestCApiAccess(object): + def test_getitem(self): + subscript = functools.partial(array_indexing, 0) + + # 0-d arrays don't work: + assert_raises(IndexError, subscript, np.ones(()), 0) + # Out of bound values: + assert_raises(IndexError, subscript, np.ones(10), 11) + assert_raises(IndexError, subscript, np.ones(10), -11) + assert_raises(IndexError, subscript, np.ones((10, 10)), 11) + assert_raises(IndexError, subscript, np.ones((10, 10)), -11) + + a = np.arange(10) + assert_array_equal(a[4], subscript(a, 4)) + a = a.reshape(5, 2) + assert_array_equal(a[-4], subscript(a, -4)) + + def test_setitem(self): + assign = functools.partial(array_indexing, 1) + + # Deletion is impossible: + assert_raises(ValueError, assign, np.ones(10), 0) + # 0-d arrays don't work: + assert_raises(IndexError, assign, np.ones(()), 0, 0) + # Out of bound values: + assert_raises(IndexError, assign, np.ones(10), 11, 0) + assert_raises(IndexError, assign, np.ones(10), -11, 0) + assert_raises(IndexError, assign, np.ones((10, 10)), 11, 0) + assert_raises(IndexError, assign, np.ones((10, 10)), -11, 0) + + a = np.arange(10) + assign(a, 4, 10) + assert_(a[4] == 10) + + a = a.reshape(5, 2) + assign(a, 4, 10) + assert_array_equal(a[-1], [10, 10]) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_item_selection.py b/contrib/python/numpy/py2/numpy/core/tests/test_item_selection.py new file mode 100644 index 0000000000..3bc24fc956 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_item_selection.py @@ -0,0 +1,87 @@ +from __future__ import division, absolute_import, print_function + +import sys + +import numpy as np +from numpy.testing import ( + assert_, assert_raises, assert_array_equal, HAS_REFCOUNT + ) + + +class TestTake(object): + def test_simple(self): + a = [[1, 2], [3, 4]] + a_str = [[b'1', b'2'], [b'3', b'4']] + modes = ['raise', 'wrap', 'clip'] + indices = [-1, 4] + index_arrays = [np.empty(0, dtype=np.intp), + np.empty(tuple(), dtype=np.intp), + np.empty((1, 1), dtype=np.intp)] + real_indices = {'raise': {-1: 1, 4: IndexError}, + 'wrap': {-1: 1, 4: 0}, + 'clip': {-1: 0, 4: 1}} + # Currently all types but object, use the same function generation. + # So it should not be necessary to test all. However test also a non + # refcounted struct on top of object. + types = int, object, np.dtype([('', 'i', 2)]) + for t in types: + # ta works, even if the array may be odd if buffer interface is used + ta = np.array(a if np.issubdtype(t, np.number) else a_str, dtype=t) + tresult = list(ta.T.copy()) + for index_array in index_arrays: + if index_array.size != 0: + tresult[0].shape = (2,) + index_array.shape + tresult[1].shape = (2,) + index_array.shape + for mode in modes: + for index in indices: + real_index = real_indices[mode][index] + if real_index is IndexError and index_array.size != 0: + index_array.put(0, index) + assert_raises(IndexError, ta.take, index_array, + mode=mode, axis=1) + elif index_array.size != 0: + index_array.put(0, index) + res = ta.take(index_array, mode=mode, axis=1) + assert_array_equal(res, tresult[real_index]) + else: + res = ta.take(index_array, mode=mode, axis=1) + assert_(res.shape == (2,) + index_array.shape) + + def test_refcounting(self): + objects = [object() for i in range(10)] + for mode in ('raise', 'clip', 'wrap'): + a = np.array(objects) + b = np.array([2, 2, 4, 5, 3, 5]) + a.take(b, out=a[:6], mode=mode) + del a + if HAS_REFCOUNT: + assert_(all(sys.getrefcount(o) == 3 for o in objects)) + # not contiguous, example: + a = np.array(objects * 2)[::2] + a.take(b, out=a[:6], mode=mode) + del a + if HAS_REFCOUNT: + assert_(all(sys.getrefcount(o) == 3 for o in objects)) + + def test_unicode_mode(self): + d = np.arange(10) + k = b'\xc3\xa4'.decode("UTF8") + assert_raises(ValueError, d.take, 5, mode=k) + + def test_empty_partition(self): + # In reference to github issue #6530 + a_original = np.array([0, 2, 4, 6, 8, 10]) + a = a_original.copy() + + # An empty partition should be a successful no-op + a.partition(np.array([], dtype=np.int16)) + + assert_array_equal(a, a_original) + + def test_empty_argpartition(self): + # In reference to github issue #6530 + a = np.array([0, 2, 4, 6, 8, 10]) + a = a.argpartition(np.array([], dtype=np.int16)) + + b = np.array([0, 1, 2, 3, 4, 5]) + assert_array_equal(a, b) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_longdouble.py b/contrib/python/numpy/py2/numpy/core/tests/test_longdouble.py new file mode 100644 index 0000000000..ee4197f8f7 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_longdouble.py @@ -0,0 +1,233 @@ +from __future__ import division, absolute_import, print_function + +import warnings +import pytest + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_array_equal, temppath, + ) +from numpy.core.tests._locales import CommaDecimalPointLocale + +LD_INFO = np.finfo(np.longdouble) +longdouble_longer_than_double = (LD_INFO.eps < np.finfo(np.double).eps) + + +_o = 1 + LD_INFO.eps +string_to_longdouble_inaccurate = (_o != np.longdouble(repr(_o))) +del _o + + +def test_scalar_extraction(): + """Confirm that extracting a value doesn't convert to python float""" + o = 1 + LD_INFO.eps + a = np.array([o, o, o]) + assert_equal(a[1], o) + + +# Conversions string -> long double + +# 0.1 not exactly representable in base 2 floating point. +repr_precision = len(repr(np.longdouble(0.1))) +# +2 from macro block starting around line 842 in scalartypes.c.src. +@pytest.mark.skipif(LD_INFO.precision + 2 >= repr_precision, + reason="repr precision not enough to show eps") +def test_repr_roundtrip(): + # We will only see eps in repr if within printing precision. + o = 1 + LD_INFO.eps + assert_equal(np.longdouble(repr(o)), o, "repr was %s" % repr(o)) + + +def test_unicode(): + np.longdouble(u"1.2") + + +def test_string(): + np.longdouble("1.2") + + +def test_bytes(): + np.longdouble(b"1.2") + + +@pytest.mark.skipif(string_to_longdouble_inaccurate, reason="Need strtold_l") +def test_repr_roundtrip_bytes(): + o = 1 + LD_INFO.eps + assert_equal(np.longdouble(repr(o).encode("ascii")), o) + + +def test_bogus_string(): + assert_raises(ValueError, np.longdouble, "spam") + assert_raises(ValueError, np.longdouble, "1.0 flub") + + +@pytest.mark.skipif(string_to_longdouble_inaccurate, reason="Need strtold_l") +def test_fromstring(): + o = 1 + LD_INFO.eps + s = (" " + repr(o))*5 + a = np.array([o]*5) + assert_equal(np.fromstring(s, sep=" ", dtype=np.longdouble), a, + err_msg="reading '%s'" % s) + + +def test_fromstring_bogus(): + assert_equal(np.fromstring("1. 2. 3. flop 4.", dtype=float, sep=" "), + np.array([1., 2., 3.])) + + +def test_fromstring_empty(): + assert_equal(np.fromstring("xxxxx", sep="x"), + np.array([])) + + +def test_fromstring_missing(): + assert_equal(np.fromstring("1xx3x4x5x6", sep="x"), + np.array([1])) + + +class TestFileBased(object): + + ldbl = 1 + LD_INFO.eps + tgt = np.array([ldbl]*5) + out = ''.join([repr(t) + '\n' for t in tgt]) + + def test_fromfile_bogus(self): + with temppath() as path: + with open(path, 'wt') as f: + f.write("1. 2. 3. flop 4.\n") + res = np.fromfile(path, dtype=float, sep=" ") + assert_equal(res, np.array([1., 2., 3.])) + + @pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") + def test_fromfile(self): + with temppath() as path: + with open(path, 'wt') as f: + f.write(self.out) + res = np.fromfile(path, dtype=np.longdouble, sep="\n") + assert_equal(res, self.tgt) + + @pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") + def test_genfromtxt(self): + with temppath() as path: + with open(path, 'wt') as f: + f.write(self.out) + res = np.genfromtxt(path, dtype=np.longdouble) + assert_equal(res, self.tgt) + + @pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") + def test_loadtxt(self): + with temppath() as path: + with open(path, 'wt') as f: + f.write(self.out) + res = np.loadtxt(path, dtype=np.longdouble) + assert_equal(res, self.tgt) + + @pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") + def test_tofile_roundtrip(self): + with temppath() as path: + self.tgt.tofile(path, sep=" ") + res = np.fromfile(path, dtype=np.longdouble, sep=" ") + assert_equal(res, self.tgt) + + +# Conversions long double -> string + + +def test_repr_exact(): + o = 1 + LD_INFO.eps + assert_(repr(o) != '1') + + +@pytest.mark.skipif(longdouble_longer_than_double, reason="BUG #2376") +@pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") +def test_format(): + o = 1 + LD_INFO.eps + assert_("{0:.40g}".format(o) != '1') + + +@pytest.mark.skipif(longdouble_longer_than_double, reason="BUG #2376") +@pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") +def test_percent(): + o = 1 + LD_INFO.eps + assert_("%.40g" % o != '1') + + +@pytest.mark.skipif(longdouble_longer_than_double, + reason="array repr problem") +@pytest.mark.skipif(string_to_longdouble_inaccurate, + reason="Need strtold_l") +def test_array_repr(): + o = 1 + LD_INFO.eps + a = np.array([o]) + b = np.array([1], dtype=np.longdouble) + if not np.all(a != b): + raise ValueError("precision loss creating arrays") + assert_(repr(a) != repr(b)) + +# +# Locale tests: scalar types formatting should be independent of the locale +# + +class TestCommaDecimalPointLocale(CommaDecimalPointLocale): + + def test_repr_roundtrip_foreign(self): + o = 1.5 + assert_equal(o, np.longdouble(repr(o))) + + def test_fromstring_foreign_repr(self): + f = 1.234 + a = np.fromstring(repr(f), dtype=float, sep=" ") + assert_equal(a[0], f) + + def test_fromstring_best_effort_float(self): + assert_equal(np.fromstring("1,234", dtype=float, sep=" "), + np.array([1.])) + + def test_fromstring_best_effort(self): + assert_equal(np.fromstring("1,234", dtype=np.longdouble, sep=" "), + np.array([1.])) + + def test_fromstring_foreign(self): + s = "1.234" + a = np.fromstring(s, dtype=np.longdouble, sep=" ") + assert_equal(a[0], np.longdouble(s)) + + def test_fromstring_foreign_sep(self): + a = np.array([1, 2, 3, 4]) + b = np.fromstring("1,2,3,4,", dtype=np.longdouble, sep=",") + assert_array_equal(a, b) + + def test_fromstring_foreign_value(self): + b = np.fromstring("1,234", dtype=np.longdouble, sep=" ") + assert_array_equal(b[0], 1) + +@pytest.mark.parametrize("int_val", [ + # cases discussed in gh-10723 + # and gh-9968 + 2 ** 1024, 0]) +def test_longdouble_from_int(int_val): + # for issue gh-9968 + str_val = str(int_val) + # we'll expect a RuntimeWarning on platforms + # with np.longdouble equivalent to np.double + # for large integer input + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + # can be inf==inf on some platforms + assert np.longdouble(int_val) == np.longdouble(str_val) + # we can't directly compare the int and + # max longdouble value on all platforms + if np.allclose(np.finfo(np.longdouble).max, + np.finfo(np.double).max) and w: + assert w[0].category is RuntimeWarning + +@pytest.mark.parametrize("bool_val", [ + True, False]) +def test_longdouble_from_bool(bool_val): + assert np.longdouble(bool_val) == np.longdouble(int(bool_val)) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_machar.py b/contrib/python/numpy/py2/numpy/core/tests/test_machar.py new file mode 100644 index 0000000000..ab8800c09d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_machar.py @@ -0,0 +1,32 @@ +""" +Test machar. Given recent changes to hardcode type data, we might want to get +rid of both MachAr and this test at some point. + +""" +from __future__ import division, absolute_import, print_function + +from numpy.core.machar import MachAr +import numpy.core.numerictypes as ntypes +from numpy import errstate, array + + +class TestMachAr(object): + def _run_machar_highprec(self): + # Instantiate MachAr instance with high enough precision to cause + # underflow + try: + hiprec = ntypes.float96 + MachAr(lambda v:array([v], hiprec)) + except AttributeError: + # Fixme, this needs to raise a 'skip' exception. + "Skipping test: no ntypes.float96 available on this platform." + + def test_underlow(self): + # Regression test for #759: + # instantiating MachAr for dtype = np.float96 raises spurious warning. + with errstate(all='raise'): + try: + self._run_machar_highprec() + except FloatingPointError as e: + msg = "Caught %s exception, should not have been raised." % e + raise AssertionError(msg) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_mem_overlap.py b/contrib/python/numpy/py2/numpy/core/tests/test_mem_overlap.py new file mode 100644 index 0000000000..3c8e0e7220 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_mem_overlap.py @@ -0,0 +1,950 @@ +from __future__ import division, absolute_import, print_function + +import sys +import itertools +import pytest + +import numpy as np +from numpy.core._multiarray_tests import solve_diophantine, internal_overlap +from numpy.core import _umath_tests +from numpy.lib.stride_tricks import as_strided +from numpy.compat import long +from numpy.testing import ( + assert_, assert_raises, assert_equal, assert_array_equal + ) + +if sys.version_info[0] >= 3: + xrange = range + + +ndims = 2 +size = 10 +shape = tuple([size] * ndims) + +MAY_SHARE_BOUNDS = 0 +MAY_SHARE_EXACT = -1 + + +def _indices_for_nelems(nelems): + """Returns slices of length nelems, from start onwards, in direction sign.""" + + if nelems == 0: + return [size // 2] # int index + + res = [] + for step in (1, 2): + for sign in (-1, 1): + start = size // 2 - nelems * step * sign // 2 + stop = start + nelems * step * sign + res.append(slice(start, stop, step * sign)) + + return res + + +def _indices_for_axis(): + """Returns (src, dst) pairs of indices.""" + + res = [] + for nelems in (0, 2, 3): + ind = _indices_for_nelems(nelems) + + # no itertools.product available in Py2.4 + res.extend([(a, b) for a in ind for b in ind]) # all assignments of size "nelems" + + return res + + +def _indices(ndims): + """Returns ((axis0_src, axis0_dst), (axis1_src, axis1_dst), ... ) index pairs.""" + + ind = _indices_for_axis() + + # no itertools.product available in Py2.4 + + res = [[]] + for i in range(ndims): + newres = [] + for elem in ind: + for others in res: + newres.append([elem] + others) + res = newres + + return res + + +def _check_assignment(srcidx, dstidx): + """Check assignment arr[dstidx] = arr[srcidx] works.""" + + arr = np.arange(np.product(shape)).reshape(shape) + + cpy = arr.copy() + + cpy[dstidx] = arr[srcidx] + arr[dstidx] = arr[srcidx] + + assert_(np.all(arr == cpy), + 'assigning arr[%s] = arr[%s]' % (dstidx, srcidx)) + + +def test_overlapping_assignments(): + # Test automatically generated assignments which overlap in memory. + + inds = _indices(ndims) + + for ind in inds: + srcidx = tuple([a[0] for a in ind]) + dstidx = tuple([a[1] for a in ind]) + + _check_assignment(srcidx, dstidx) + + +@pytest.mark.slow +def test_diophantine_fuzz(): + # Fuzz test the diophantine solver + rng = np.random.RandomState(1234) + + max_int = np.iinfo(np.intp).max + + for ndim in range(10): + feasible_count = 0 + infeasible_count = 0 + + min_count = 500//(ndim + 1) + + while min(feasible_count, infeasible_count) < min_count: + # Ensure big and small integer problems + A_max = 1 + rng.randint(0, 11, dtype=np.intp)**6 + U_max = rng.randint(0, 11, dtype=np.intp)**6 + + A_max = min(max_int, A_max) + U_max = min(max_int-1, U_max) + + A = tuple(int(rng.randint(1, A_max+1, dtype=np.intp)) + for j in range(ndim)) + U = tuple(int(rng.randint(0, U_max+2, dtype=np.intp)) + for j in range(ndim)) + + b_ub = min(max_int-2, sum(a*ub for a, ub in zip(A, U))) + b = rng.randint(-1, b_ub+2, dtype=np.intp) + + if ndim == 0 and feasible_count < min_count: + b = 0 + + X = solve_diophantine(A, U, b) + + if X is None: + # Check the simplified decision problem agrees + X_simplified = solve_diophantine(A, U, b, simplify=1) + assert_(X_simplified is None, (A, U, b, X_simplified)) + + # Check no solution exists (provided the problem is + # small enough so that brute force checking doesn't + # take too long) + try: + ranges = tuple(xrange(0, a*ub+1, a) for a, ub in zip(A, U)) + except OverflowError: + # xrange on 32-bit Python 2 may overflow + continue + + size = 1 + for r in ranges: + size *= len(r) + if size < 100000: + assert_(not any(sum(w) == b for w in itertools.product(*ranges))) + infeasible_count += 1 + else: + # Check the simplified decision problem agrees + X_simplified = solve_diophantine(A, U, b, simplify=1) + assert_(X_simplified is not None, (A, U, b, X_simplified)) + + # Check validity + assert_(sum(a*x for a, x in zip(A, X)) == b) + assert_(all(0 <= x <= ub for x, ub in zip(X, U))) + feasible_count += 1 + + +def test_diophantine_overflow(): + # Smoke test integer overflow detection + max_intp = np.iinfo(np.intp).max + max_int64 = np.iinfo(np.int64).max + + if max_int64 <= max_intp: + # Check that the algorithm works internally in 128-bit; + # solving this problem requires large intermediate numbers + A = (max_int64//2, max_int64//2 - 10) + U = (max_int64//2, max_int64//2 - 10) + b = 2*(max_int64//2) - 10 + + assert_equal(solve_diophantine(A, U, b), (1, 1)) + + +def check_may_share_memory_exact(a, b): + got = np.may_share_memory(a, b, max_work=MAY_SHARE_EXACT) + + assert_equal(np.may_share_memory(a, b), + np.may_share_memory(a, b, max_work=MAY_SHARE_BOUNDS)) + + a.fill(0) + b.fill(0) + a.fill(1) + exact = b.any() + + err_msg = "" + if got != exact: + err_msg = " " + "\n ".join([ + "base_a - base_b = %r" % (a.__array_interface__['data'][0] - b.__array_interface__['data'][0],), + "shape_a = %r" % (a.shape,), + "shape_b = %r" % (b.shape,), + "strides_a = %r" % (a.strides,), + "strides_b = %r" % (b.strides,), + "size_a = %r" % (a.size,), + "size_b = %r" % (b.size,) + ]) + + assert_equal(got, exact, err_msg=err_msg) + + +def test_may_share_memory_manual(): + # Manual test cases for may_share_memory + + # Base arrays + xs0 = [ + np.zeros([13, 21, 23, 22], dtype=np.int8), + np.zeros([13, 21, 23*2, 22], dtype=np.int8)[:,:,::2,:] + ] + + # Generate all negative stride combinations + xs = [] + for x in xs0: + for ss in itertools.product(*(([slice(None), slice(None, None, -1)],)*4)): + xp = x[ss] + xs.append(xp) + + for x in xs: + # The default is a simple extent check + assert_(np.may_share_memory(x[:,0,:], x[:,1,:])) + assert_(np.may_share_memory(x[:,0,:], x[:,1,:], max_work=None)) + + # Exact checks + check_may_share_memory_exact(x[:,0,:], x[:,1,:]) + check_may_share_memory_exact(x[:,::7], x[:,3::3]) + + try: + xp = x.ravel() + if xp.flags.owndata: + continue + xp = xp.view(np.int16) + except ValueError: + continue + + # 0-size arrays cannot overlap + check_may_share_memory_exact(x.ravel()[6:6], + xp.reshape(13, 21, 23, 11)[:,::7]) + + # Test itemsize is dealt with + check_may_share_memory_exact(x[:,::7], + xp.reshape(13, 21, 23, 11)) + check_may_share_memory_exact(x[:,::7], + xp.reshape(13, 21, 23, 11)[:,3::3]) + check_may_share_memory_exact(x.ravel()[6:7], + xp.reshape(13, 21, 23, 11)[:,::7]) + + # Check unit size + x = np.zeros([1], dtype=np.int8) + check_may_share_memory_exact(x, x) + check_may_share_memory_exact(x, x.copy()) + + +def iter_random_view_pairs(x, same_steps=True, equal_size=False): + rng = np.random.RandomState(1234) + + if equal_size and same_steps: + raise ValueError() + + def random_slice(n, step): + start = rng.randint(0, n+1, dtype=np.intp) + stop = rng.randint(start, n+1, dtype=np.intp) + if rng.randint(0, 2, dtype=np.intp) == 0: + stop, start = start, stop + step *= -1 + return slice(start, stop, step) + + def random_slice_fixed_size(n, step, size): + start = rng.randint(0, n+1 - size*step) + stop = start + (size-1)*step + 1 + if rng.randint(0, 2) == 0: + stop, start = start-1, stop-1 + if stop < 0: + stop = None + step *= -1 + return slice(start, stop, step) + + # First a few regular views + yield x, x + for j in range(1, 7, 3): + yield x[j:], x[:-j] + yield x[...,j:], x[...,:-j] + + # An array with zero stride internal overlap + strides = list(x.strides) + strides[0] = 0 + xp = as_strided(x, shape=x.shape, strides=strides) + yield x, xp + yield xp, xp + + # An array with non-zero stride internal overlap + strides = list(x.strides) + if strides[0] > 1: + strides[0] = 1 + xp = as_strided(x, shape=x.shape, strides=strides) + yield x, xp + yield xp, xp + + # Then discontiguous views + while True: + steps = tuple(rng.randint(1, 11, dtype=np.intp) + if rng.randint(0, 5, dtype=np.intp) == 0 else 1 + for j in range(x.ndim)) + s1 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps)) + + t1 = np.arange(x.ndim) + rng.shuffle(t1) + + if equal_size: + t2 = t1 + else: + t2 = np.arange(x.ndim) + rng.shuffle(t2) + + a = x[s1] + + if equal_size: + if a.size == 0: + continue + + steps2 = tuple(rng.randint(1, max(2, p//(1+pa))) + if rng.randint(0, 5) == 0 else 1 + for p, s, pa in zip(x.shape, s1, a.shape)) + s2 = tuple(random_slice_fixed_size(p, s, pa) + for p, s, pa in zip(x.shape, steps2, a.shape)) + elif same_steps: + steps2 = steps + else: + steps2 = tuple(rng.randint(1, 11, dtype=np.intp) + if rng.randint(0, 5, dtype=np.intp) == 0 else 1 + for j in range(x.ndim)) + + if not equal_size: + s2 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps2)) + + a = a.transpose(t1) + b = x[s2].transpose(t2) + + yield a, b + + +def check_may_share_memory_easy_fuzz(get_max_work, same_steps, min_count): + # Check that overlap problems with common strides are solved with + # little work. + x = np.zeros([17,34,71,97], dtype=np.int16) + + feasible = 0 + infeasible = 0 + + pair_iter = iter_random_view_pairs(x, same_steps) + + while min(feasible, infeasible) < min_count: + a, b = next(pair_iter) + + bounds_overlap = np.may_share_memory(a, b) + may_share_answer = np.may_share_memory(a, b) + easy_answer = np.may_share_memory(a, b, max_work=get_max_work(a, b)) + exact_answer = np.may_share_memory(a, b, max_work=MAY_SHARE_EXACT) + + if easy_answer != exact_answer: + # assert_equal is slow... + assert_equal(easy_answer, exact_answer) + + if may_share_answer != bounds_overlap: + assert_equal(may_share_answer, bounds_overlap) + + if bounds_overlap: + if exact_answer: + feasible += 1 + else: + infeasible += 1 + + +@pytest.mark.slow +def test_may_share_memory_easy_fuzz(): + # Check that overlap problems with common strides are always + # solved with little work. + + check_may_share_memory_easy_fuzz(get_max_work=lambda a, b: 1, + same_steps=True, + min_count=2000) + + +@pytest.mark.slow +def test_may_share_memory_harder_fuzz(): + # Overlap problems with not necessarily common strides take more + # work. + # + # The work bound below can't be reduced much. Harder problems can + # also exist but not be detected here, as the set of problems + # comes from RNG. + + check_may_share_memory_easy_fuzz(get_max_work=lambda a, b: max(a.size, b.size)//2, + same_steps=False, + min_count=2000) + + +def test_shares_memory_api(): + x = np.zeros([4, 5, 6], dtype=np.int8) + + assert_equal(np.shares_memory(x, x), True) + assert_equal(np.shares_memory(x, x.copy()), False) + + a = x[:,::2,::3] + b = x[:,::3,::2] + assert_equal(np.shares_memory(a, b), True) + assert_equal(np.shares_memory(a, b, max_work=None), True) + assert_raises(np.TooHardError, np.shares_memory, a, b, max_work=1) + assert_raises(np.TooHardError, np.shares_memory, a, b, max_work=long(1)) + + +def test_may_share_memory_bad_max_work(): + x = np.zeros([1]) + assert_raises(OverflowError, np.may_share_memory, x, x, max_work=10**100) + assert_raises(OverflowError, np.shares_memory, x, x, max_work=10**100) + + +def test_internal_overlap_diophantine(): + def check(A, U, exists=None): + X = solve_diophantine(A, U, 0, require_ub_nontrivial=1) + + if exists is None: + exists = (X is not None) + + if X is not None: + assert_(sum(a*x for a, x in zip(A, X)) == sum(a*u//2 for a, u in zip(A, U))) + assert_(all(0 <= x <= u for x, u in zip(X, U))) + assert_(any(x != u//2 for x, u in zip(X, U))) + + if exists: + assert_(X is not None, repr(X)) + else: + assert_(X is None, repr(X)) + + # Smoke tests + check((3, 2), (2*2, 3*2), exists=True) + check((3*2, 2), (15*2, (3-1)*2), exists=False) + + +def test_internal_overlap_slices(): + # Slicing an array never generates internal overlap + + x = np.zeros([17,34,71,97], dtype=np.int16) + + rng = np.random.RandomState(1234) + + def random_slice(n, step): + start = rng.randint(0, n+1, dtype=np.intp) + stop = rng.randint(start, n+1, dtype=np.intp) + if rng.randint(0, 2, dtype=np.intp) == 0: + stop, start = start, stop + step *= -1 + return slice(start, stop, step) + + cases = 0 + min_count = 5000 + + while cases < min_count: + steps = tuple(rng.randint(1, 11, dtype=np.intp) + if rng.randint(0, 5, dtype=np.intp) == 0 else 1 + for j in range(x.ndim)) + t1 = np.arange(x.ndim) + rng.shuffle(t1) + s1 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps)) + a = x[s1].transpose(t1) + + assert_(not internal_overlap(a)) + cases += 1 + + +def check_internal_overlap(a, manual_expected=None): + got = internal_overlap(a) + + # Brute-force check + m = set() + ranges = tuple(xrange(n) for n in a.shape) + for v in itertools.product(*ranges): + offset = sum(s*w for s, w in zip(a.strides, v)) + if offset in m: + expected = True + break + else: + m.add(offset) + else: + expected = False + + # Compare + if got != expected: + assert_equal(got, expected, err_msg=repr((a.strides, a.shape))) + if manual_expected is not None and expected != manual_expected: + assert_equal(expected, manual_expected) + return got + + +def test_internal_overlap_manual(): + # Stride tricks can construct arrays with internal overlap + + # We don't care about memory bounds, the array is not + # read/write accessed + x = np.arange(1).astype(np.int8) + + # Check low-dimensional special cases + + check_internal_overlap(x, False) # 1-dim + check_internal_overlap(x.reshape([]), False) # 0-dim + + a = as_strided(x, strides=(3, 4), shape=(4, 4)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(3, 4), shape=(5, 4)) + check_internal_overlap(a, True) + + a = as_strided(x, strides=(0,), shape=(0,)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(0,), shape=(1,)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(0,), shape=(2,)) + check_internal_overlap(a, True) + + a = as_strided(x, strides=(0, -9993), shape=(87, 22)) + check_internal_overlap(a, True) + + a = as_strided(x, strides=(0, -9993), shape=(1, 22)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(0, -9993), shape=(0, 22)) + check_internal_overlap(a, False) + + +def test_internal_overlap_fuzz(): + # Fuzz check; the brute-force check is fairly slow + + x = np.arange(1).astype(np.int8) + + overlap = 0 + no_overlap = 0 + min_count = 100 + + rng = np.random.RandomState(1234) + + while min(overlap, no_overlap) < min_count: + ndim = rng.randint(1, 4, dtype=np.intp) + + strides = tuple(rng.randint(-1000, 1000, dtype=np.intp) + for j in range(ndim)) + shape = tuple(rng.randint(1, 30, dtype=np.intp) + for j in range(ndim)) + + a = as_strided(x, strides=strides, shape=shape) + result = check_internal_overlap(a) + + if result: + overlap += 1 + else: + no_overlap += 1 + + +def test_non_ndarray_inputs(): + # Regression check for gh-5604 + + class MyArray(object): + def __init__(self, data): + self.data = data + + @property + def __array_interface__(self): + return self.data.__array_interface__ + + class MyArray2(object): + def __init__(self, data): + self.data = data + + def __array__(self): + return self.data + + for cls in [MyArray, MyArray2]: + x = np.arange(5) + + assert_(np.may_share_memory(cls(x[::2]), x[1::2])) + assert_(not np.shares_memory(cls(x[::2]), x[1::2])) + + assert_(np.shares_memory(cls(x[1::3]), x[::2])) + assert_(np.may_share_memory(cls(x[1::3]), x[::2])) + + +def view_element_first_byte(x): + """Construct an array viewing the first byte of each element of `x`""" + from numpy.lib.stride_tricks import DummyArray + interface = dict(x.__array_interface__) + interface['typestr'] = '|b1' + interface['descr'] = [('', '|b1')] + return np.asarray(DummyArray(interface, x)) + + +def assert_copy_equivalent(operation, args, out, **kwargs): + """ + Check that operation(*args, out=out) produces results + equivalent to out[...] = operation(*args, out=out.copy()) + """ + + kwargs['out'] = out + kwargs2 = dict(kwargs) + kwargs2['out'] = out.copy() + + out_orig = out.copy() + out[...] = operation(*args, **kwargs2) + expected = out.copy() + out[...] = out_orig + + got = operation(*args, **kwargs).copy() + + if (got != expected).any(): + assert_equal(got, expected) + + +class TestUFunc(object): + """ + Test ufunc call memory overlap handling + """ + + def check_unary_fuzz(self, operation, get_out_axis_size, dtype=np.int16, + count=5000): + shapes = [7, 13, 8, 21, 29, 32] + + rng = np.random.RandomState(1234) + + for ndim in range(1, 6): + x = rng.randint(0, 2**16, size=shapes[:ndim]).astype(dtype) + + it = iter_random_view_pairs(x, same_steps=False, equal_size=True) + + min_count = count // (ndim + 1)**2 + + overlapping = 0 + while overlapping < min_count: + a, b = next(it) + + a_orig = a.copy() + b_orig = b.copy() + + if get_out_axis_size is None: + assert_copy_equivalent(operation, [a], out=b) + + if np.shares_memory(a, b): + overlapping += 1 + else: + for axis in itertools.chain(range(ndim), [None]): + a[...] = a_orig + b[...] = b_orig + + # Determine size for reduction axis (None if scalar) + outsize, scalarize = get_out_axis_size(a, b, axis) + if outsize == 'skip': + continue + + # Slice b to get an output array of the correct size + sl = [slice(None)] * ndim + if axis is None: + if outsize is None: + sl = [slice(0, 1)] + [0]*(ndim - 1) + else: + sl = [slice(0, outsize)] + [0]*(ndim - 1) + else: + if outsize is None: + k = b.shape[axis]//2 + if ndim == 1: + sl[axis] = slice(k, k + 1) + else: + sl[axis] = k + else: + assert b.shape[axis] >= outsize + sl[axis] = slice(0, outsize) + b_out = b[tuple(sl)] + + if scalarize: + b_out = b_out.reshape([]) + + if np.shares_memory(a, b_out): + overlapping += 1 + + # Check result + assert_copy_equivalent(operation, [a], out=b_out, axis=axis) + + @pytest.mark.slow + def test_unary_ufunc_call_fuzz(self): + self.check_unary_fuzz(np.invert, None, np.int16) + + def test_binary_ufunc_accumulate_fuzz(self): + def get_out_axis_size(a, b, axis): + if axis is None: + if a.ndim == 1: + return a.size, False + else: + return 'skip', False # accumulate doesn't support this + else: + return a.shape[axis], False + + self.check_unary_fuzz(np.add.accumulate, get_out_axis_size, + dtype=np.int16, count=500) + + def test_binary_ufunc_reduce_fuzz(self): + def get_out_axis_size(a, b, axis): + return None, (axis is None or a.ndim == 1) + + self.check_unary_fuzz(np.add.reduce, get_out_axis_size, + dtype=np.int16, count=500) + + def test_binary_ufunc_reduceat_fuzz(self): + def get_out_axis_size(a, b, axis): + if axis is None: + if a.ndim == 1: + return a.size, False + else: + return 'skip', False # reduceat doesn't support this + else: + return a.shape[axis], False + + def do_reduceat(a, out, axis): + if axis is None: + size = len(a) + step = size//len(out) + else: + size = a.shape[axis] + step = a.shape[axis] // out.shape[axis] + idx = np.arange(0, size, step) + return np.add.reduceat(a, idx, out=out, axis=axis) + + self.check_unary_fuzz(do_reduceat, get_out_axis_size, + dtype=np.int16, count=500) + + def test_binary_ufunc_reduceat_manual(self): + def check(ufunc, a, ind, out): + c1 = ufunc.reduceat(a.copy(), ind.copy(), out=out.copy()) + c2 = ufunc.reduceat(a, ind, out=out) + assert_array_equal(c1, c2) + + # Exactly same input/output arrays + a = np.arange(10000, dtype=np.int16) + check(np.add, a, a[::-1].copy(), a) + + # Overlap with index + a = np.arange(10000, dtype=np.int16) + check(np.add, a, a[::-1], a) + + def test_unary_gufunc_fuzz(self): + shapes = [7, 13, 8, 21, 29, 32] + gufunc = _umath_tests.euclidean_pdist + + rng = np.random.RandomState(1234) + + for ndim in range(2, 6): + x = rng.rand(*shapes[:ndim]) + + it = iter_random_view_pairs(x, same_steps=False, equal_size=True) + + min_count = 500 // (ndim + 1)**2 + + overlapping = 0 + while overlapping < min_count: + a, b = next(it) + + if min(a.shape[-2:]) < 2 or min(b.shape[-2:]) < 2 or a.shape[-1] < 2: + continue + + # Ensure the shapes are so that euclidean_pdist is happy + if b.shape[-1] > b.shape[-2]: + b = b[...,0,:] + else: + b = b[...,:,0] + + n = a.shape[-2] + p = n * (n - 1) // 2 + if p <= b.shape[-1] and p > 0: + b = b[...,:p] + else: + n = max(2, int(np.sqrt(b.shape[-1]))//2) + p = n * (n - 1) // 2 + a = a[...,:n,:] + b = b[...,:p] + + # Call + if np.shares_memory(a, b): + overlapping += 1 + + with np.errstate(over='ignore', invalid='ignore'): + assert_copy_equivalent(gufunc, [a], out=b) + + def test_ufunc_at_manual(self): + def check(ufunc, a, ind, b=None): + a0 = a.copy() + if b is None: + ufunc.at(a0, ind.copy()) + c1 = a0.copy() + ufunc.at(a, ind) + c2 = a.copy() + else: + ufunc.at(a0, ind.copy(), b.copy()) + c1 = a0.copy() + ufunc.at(a, ind, b) + c2 = a.copy() + assert_array_equal(c1, c2) + + # Overlap with index + a = np.arange(10000, dtype=np.int16) + check(np.invert, a[::-1], a) + + # Overlap with second data array + a = np.arange(100, dtype=np.int16) + ind = np.arange(0, 100, 2, dtype=np.int16) + check(np.add, a, ind, a[25:75]) + + def test_unary_ufunc_1d_manual(self): + # Exercise branches in PyArray_EQUIVALENTLY_ITERABLE + + def check(a, b): + a_orig = a.copy() + b_orig = b.copy() + + b0 = b.copy() + c1 = ufunc(a, out=b0) + c2 = ufunc(a, out=b) + assert_array_equal(c1, c2) + + # Trigger "fancy ufunc loop" code path + mask = view_element_first_byte(b).view(np.bool_) + + a[...] = a_orig + b[...] = b_orig + c1 = ufunc(a, out=b.copy(), where=mask.copy()).copy() + + a[...] = a_orig + b[...] = b_orig + c2 = ufunc(a, out=b, where=mask.copy()).copy() + + # Also, mask overlapping with output + a[...] = a_orig + b[...] = b_orig + c3 = ufunc(a, out=b, where=mask).copy() + + assert_array_equal(c1, c2) + assert_array_equal(c1, c3) + + dtypes = [np.int8, np.int16, np.int32, np.int64, np.float32, + np.float64, np.complex64, np.complex128] + dtypes = [np.dtype(x) for x in dtypes] + + for dtype in dtypes: + if np.issubdtype(dtype, np.integer): + ufunc = np.invert + else: + ufunc = np.reciprocal + + n = 1000 + k = 10 + indices = [ + np.index_exp[:n], + np.index_exp[k:k+n], + np.index_exp[n-1::-1], + np.index_exp[k+n-1:k-1:-1], + np.index_exp[:2*n:2], + np.index_exp[k:k+2*n:2], + np.index_exp[2*n-1::-2], + np.index_exp[k+2*n-1:k-1:-2], + ] + + for xi, yi in itertools.product(indices, indices): + v = np.arange(1, 1 + n*2 + k, dtype=dtype) + x = v[xi] + y = v[yi] + + with np.errstate(all='ignore'): + check(x, y) + + # Scalar cases + check(x[:1], y) + check(x[-1:], y) + check(x[:1].reshape([]), y) + check(x[-1:].reshape([]), y) + + def test_unary_ufunc_where_same(self): + # Check behavior at wheremask overlap + ufunc = np.invert + + def check(a, out, mask): + c1 = ufunc(a, out=out.copy(), where=mask.copy()) + c2 = ufunc(a, out=out, where=mask) + assert_array_equal(c1, c2) + + # Check behavior with same input and output arrays + x = np.arange(100).astype(np.bool_) + check(x, x, x) + check(x, x.copy(), x) + check(x, x, x.copy()) + + @pytest.mark.slow + def test_binary_ufunc_1d_manual(self): + ufunc = np.add + + def check(a, b, c): + c0 = c.copy() + c1 = ufunc(a, b, out=c0) + c2 = ufunc(a, b, out=c) + assert_array_equal(c1, c2) + + for dtype in [np.int8, np.int16, np.int32, np.int64, + np.float32, np.float64, np.complex64, np.complex128]: + # Check different data dependency orders + + n = 1000 + k = 10 + + indices = [] + for p in [1, 2]: + indices.extend([ + np.index_exp[:p*n:p], + np.index_exp[k:k+p*n:p], + np.index_exp[p*n-1::-p], + np.index_exp[k+p*n-1:k-1:-p], + ]) + + for x, y, z in itertools.product(indices, indices, indices): + v = np.arange(6*n).astype(dtype) + x = v[x] + y = v[y] + z = v[z] + + check(x, y, z) + + # Scalar cases + check(x[:1], y, z) + check(x[-1:], y, z) + check(x[:1].reshape([]), y, z) + check(x[-1:].reshape([]), y, z) + check(x, y[:1], z) + check(x, y[-1:], z) + check(x, y[:1].reshape([]), z) + check(x, y[-1:].reshape([]), z) + + def test_inplace_op_simple_manual(self): + rng = np.random.RandomState(1234) + x = rng.rand(200, 200) # bigger than bufsize + + x += x.T + assert_array_equal(x - x.T, 0) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_memmap.py b/contrib/python/numpy/py2/numpy/core/tests/test_memmap.py new file mode 100644 index 0000000000..990d0ae265 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_memmap.py @@ -0,0 +1,206 @@ +from __future__ import division, absolute_import, print_function + +import sys +import os +import shutil +import mmap +import pytest +from tempfile import NamedTemporaryFile, TemporaryFile, mktemp, mkdtemp + +from numpy import ( + memmap, sum, average, product, ndarray, isscalar, add, subtract, multiply) +from numpy.compat import Path + +from numpy import arange, allclose, asarray +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, suppress_warnings + ) + +class TestMemmap(object): + def setup(self): + self.tmpfp = NamedTemporaryFile(prefix='mmap') + self.tempdir = mkdtemp() + self.shape = (3, 4) + self.dtype = 'float32' + self.data = arange(12, dtype=self.dtype) + self.data.resize(self.shape) + + def teardown(self): + self.tmpfp.close() + shutil.rmtree(self.tempdir) + + def test_roundtrip(self): + # Write data to file + fp = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + fp[:] = self.data[:] + del fp # Test __del__ machinery, which handles cleanup + + # Read data back from file + newfp = memmap(self.tmpfp, dtype=self.dtype, mode='r', + shape=self.shape) + assert_(allclose(self.data, newfp)) + assert_array_equal(self.data, newfp) + assert_equal(newfp.flags.writeable, False) + + def test_open_with_filename(self): + tmpname = mktemp('', 'mmap', dir=self.tempdir) + fp = memmap(tmpname, dtype=self.dtype, mode='w+', + shape=self.shape) + fp[:] = self.data[:] + del fp + + def test_unnamed_file(self): + with TemporaryFile() as f: + fp = memmap(f, dtype=self.dtype, shape=self.shape) + del fp + + def test_attributes(self): + offset = 1 + mode = "w+" + fp = memmap(self.tmpfp, dtype=self.dtype, mode=mode, + shape=self.shape, offset=offset) + assert_equal(offset, fp.offset) + assert_equal(mode, fp.mode) + del fp + + def test_filename(self): + tmpname = mktemp('', 'mmap', dir=self.tempdir) + fp = memmap(tmpname, dtype=self.dtype, mode='w+', + shape=self.shape) + abspath = os.path.abspath(tmpname) + fp[:] = self.data[:] + assert_equal(abspath, fp.filename) + b = fp[:1] + assert_equal(abspath, b.filename) + del b + del fp + + @pytest.mark.skipif(Path is None, reason="No pathlib.Path") + def test_path(self): + tmpname = mktemp('', 'mmap', dir=self.tempdir) + fp = memmap(Path(tmpname), dtype=self.dtype, mode='w+', + shape=self.shape) + # os.path.realpath does not resolve symlinks on Windows + # see: https://bugs.python.org/issue9949 + # use Path.resolve, just as memmap class does internally + abspath = str(Path(tmpname).resolve()) + fp[:] = self.data[:] + assert_equal(abspath, str(fp.filename.resolve())) + b = fp[:1] + assert_equal(abspath, str(b.filename.resolve())) + del b + del fp + + def test_filename_fileobj(self): + fp = memmap(self.tmpfp, dtype=self.dtype, mode="w+", + shape=self.shape) + assert_equal(fp.filename, self.tmpfp.name) + + @pytest.mark.skipif(sys.platform == 'gnu0', + reason="Known to fail on hurd") + def test_flush(self): + fp = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + fp[:] = self.data[:] + assert_equal(fp[0], self.data[0]) + fp.flush() + + def test_del(self): + # Make sure a view does not delete the underlying mmap + fp_base = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + fp_base[0] = 5 + fp_view = fp_base[0:1] + assert_equal(fp_view[0], 5) + del fp_view + # Should still be able to access and assign values after + # deleting the view + assert_equal(fp_base[0], 5) + fp_base[0] = 6 + assert_equal(fp_base[0], 6) + + def test_arithmetic_drops_references(self): + fp = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + tmp = (fp + 10) + if isinstance(tmp, memmap): + assert_(tmp._mmap is not fp._mmap) + + def test_indexing_drops_references(self): + fp = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + tmp = fp[(1, 2), (2, 3)] + if isinstance(tmp, memmap): + assert_(tmp._mmap is not fp._mmap) + + def test_slicing_keeps_references(self): + fp = memmap(self.tmpfp, dtype=self.dtype, mode='w+', + shape=self.shape) + assert_(fp[:2, :2]._mmap is fp._mmap) + + def test_view(self): + fp = memmap(self.tmpfp, dtype=self.dtype, shape=self.shape) + new1 = fp.view() + new2 = new1.view() + assert_(new1.base is fp) + assert_(new2.base is fp) + new_array = asarray(fp) + assert_(new_array.base is fp) + + def test_ufunc_return_ndarray(self): + fp = memmap(self.tmpfp, dtype=self.dtype, shape=self.shape) + fp[:] = self.data + + with suppress_warnings() as sup: + sup.filter(FutureWarning, "np.average currently does not preserve") + for unary_op in [sum, average, product]: + result = unary_op(fp) + assert_(isscalar(result)) + assert_(result.__class__ is self.data[0, 0].__class__) + + assert_(unary_op(fp, axis=0).__class__ is ndarray) + assert_(unary_op(fp, axis=1).__class__ is ndarray) + + for binary_op in [add, subtract, multiply]: + assert_(binary_op(fp, self.data).__class__ is ndarray) + assert_(binary_op(self.data, fp).__class__ is ndarray) + assert_(binary_op(fp, fp).__class__ is ndarray) + + fp += 1 + assert(fp.__class__ is memmap) + add(fp, 1, out=fp) + assert(fp.__class__ is memmap) + + def test_getitem(self): + fp = memmap(self.tmpfp, dtype=self.dtype, shape=self.shape) + fp[:] = self.data + + assert_(fp[1:, :-1].__class__ is memmap) + # Fancy indexing returns a copy that is not memmapped + assert_(fp[[0, 1]].__class__ is ndarray) + + def test_memmap_subclass(self): + class MemmapSubClass(memmap): + pass + + fp = MemmapSubClass(self.tmpfp, dtype=self.dtype, shape=self.shape) + fp[:] = self.data + + # We keep previous behavior for subclasses of memmap, i.e. the + # ufunc and __getitem__ output is never turned into a ndarray + assert_(sum(fp, axis=0).__class__ is MemmapSubClass) + assert_(sum(fp).__class__ is MemmapSubClass) + assert_(fp[1:, :-1].__class__ is MemmapSubClass) + assert(fp[[0, 1]].__class__ is MemmapSubClass) + + def test_mmap_offset_greater_than_allocation_granularity(self): + size = 5 * mmap.ALLOCATIONGRANULARITY + offset = mmap.ALLOCATIONGRANULARITY + 1 + fp = memmap(self.tmpfp, shape=size, mode='w+', offset=offset) + assert_(fp.offset == offset) + + def test_no_shape(self): + self.tmpfp.write(b'a'*16) + mm = memmap(self.tmpfp, dtype='float64') + assert_equal(mm.shape, (2,)) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_multiarray.py b/contrib/python/numpy/py2/numpy/core/tests/test_multiarray.py new file mode 100644 index 0000000000..c55556535c --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_multiarray.py @@ -0,0 +1,8325 @@ +from __future__ import division, absolute_import, print_function + +try: + # Accessing collections abstract classes from collections + # has been deprecated since Python 3.3 + import collections.abc as collections_abc +except ImportError: + import collections as collections_abc +import tempfile +import sys +import shutil +import warnings +import operator +import io +import itertools +import functools +import ctypes +import os +import gc +import weakref +import pytest +from contextlib import contextmanager + +from numpy.core.numeric import pickle + +if sys.version_info[0] >= 3: + import builtins +else: + import __builtin__ as builtins +from decimal import Decimal + +import numpy as np +from numpy.compat import strchar, unicode +import numpy.core._multiarray_tests as _multiarray_tests +from numpy.testing import ( + assert_, assert_raises, assert_warns, assert_equal, assert_almost_equal, + assert_array_equal, assert_raises_regex, assert_array_almost_equal, + assert_allclose, IS_PYPY, HAS_REFCOUNT, assert_array_less, runstring, + temppath, suppress_warnings, break_cycles, + ) +from numpy.core.tests._locales import CommaDecimalPointLocale + +# Need to test an object that does not fully implement math interface +from datetime import timedelta, datetime + + +if sys.version_info[:2] > (3, 2): + # In Python 3.3 the representation of empty shape, strides and sub-offsets + # is an empty tuple instead of None. + # https://docs.python.org/dev/whatsnew/3.3.html#api-changes + EMPTY = () +else: + EMPTY = None + + +def _aligned_zeros(shape, dtype=float, order="C", align=None): + """ + Allocate a new ndarray with aligned memory. + + The ndarray is guaranteed *not* aligned to twice the requested alignment. + Eg, if align=4, guarantees it is not aligned to 8. If align=None uses + dtype.alignment.""" + dtype = np.dtype(dtype) + if dtype == np.dtype(object): + # Can't do this, fall back to standard allocation (which + # should always be sufficiently aligned) + if align is not None: + raise ValueError("object array alignment not supported") + return np.zeros(shape, dtype=dtype, order=order) + if align is None: + align = dtype.alignment + if not hasattr(shape, '__len__'): + shape = (shape,) + size = functools.reduce(operator.mul, shape) * dtype.itemsize + buf = np.empty(size + 2*align + 1, np.uint8) + + ptr = buf.__array_interface__['data'][0] + offset = ptr % align + if offset != 0: + offset = align - offset + if (ptr % (2*align)) == 0: + offset += align + + # Note: slices producing 0-size arrays do not necessarily change + # data pointer --- so we use and allocate size+1 + buf = buf[offset:offset+size+1][:-1] + data = np.ndarray(shape, dtype, buf, order=order) + data.fill(0) + return data + + +class TestFlags(object): + def setup(self): + self.a = np.arange(10) + + def test_writeable(self): + mydict = locals() + self.a.flags.writeable = False + assert_raises(ValueError, runstring, 'self.a[0] = 3', mydict) + assert_raises(ValueError, runstring, 'self.a[0:1].itemset(3)', mydict) + self.a.flags.writeable = True + self.a[0] = 5 + self.a[0] = 0 + + def test_writeable_from_readonly(self): + # gh-9440 - make sure fromstring, from buffer on readonly buffers + # set writeable False + data = b'\x00' * 100 + vals = np.frombuffer(data, 'B') + assert_raises(ValueError, vals.setflags, write=True) + types = np.dtype( [('vals', 'u1'), ('res3', 'S4')] ) + values = np.core.records.fromstring(data, types) + vals = values['vals'] + assert_raises(ValueError, vals.setflags, write=True) + + def test_writeable_from_buffer(self): + data = bytearray(b'\x00' * 100) + vals = np.frombuffer(data, 'B') + assert_(vals.flags.writeable) + vals.setflags(write=False) + assert_(vals.flags.writeable is False) + vals.setflags(write=True) + assert_(vals.flags.writeable) + types = np.dtype( [('vals', 'u1'), ('res3', 'S4')] ) + values = np.core.records.fromstring(data, types) + vals = values['vals'] + assert_(vals.flags.writeable) + vals.setflags(write=False) + assert_(vals.flags.writeable is False) + vals.setflags(write=True) + assert_(vals.flags.writeable) + + @pytest.mark.skipif(sys.version_info[0] < 3, reason="Python 2 always copies") + @pytest.mark.skipif(IS_PYPY, reason="PyPy always copies") + def test_writeable_pickle(self): + import pickle + # Small arrays will be copied without setting base. + # See condition for using PyArray_SetBaseObject in + # array_setstate. + a = np.arange(1000) + for v in range(pickle.HIGHEST_PROTOCOL): + vals = pickle.loads(pickle.dumps(a, v)) + assert_(vals.flags.writeable) + assert_(isinstance(vals.base, bytes)) + + def test_writeable_from_c_data(self): + # Test that the writeable flag can be changed for an array wrapping + # low level C-data, but not owning its data. + # Also see that this is deprecated to change from python. + from numpy.core._multiarray_tests import get_c_wrapping_array + + arr_writeable = get_c_wrapping_array(True) + assert not arr_writeable.flags.owndata + assert arr_writeable.flags.writeable + view = arr_writeable[...] + + # Toggling the writeable flag works on the view: + view.flags.writeable = False + assert not view.flags.writeable + view.flags.writeable = True + assert view.flags.writeable + # Flag can be unset on the arr_writeable: + arr_writeable.flags.writeable = False + + arr_readonly = get_c_wrapping_array(False) + assert not arr_readonly.flags.owndata + assert not arr_readonly.flags.writeable + + for arr in [arr_writeable, arr_readonly]: + view = arr[...] + view.flags.writeable = False # make sure it is readonly + arr.flags.writeable = False + assert not arr.flags.writeable + + with assert_raises(ValueError): + view.flags.writeable = True + + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + with assert_raises(DeprecationWarning): + arr.flags.writeable = True + + with assert_warns(DeprecationWarning): + arr.flags.writeable = True + + + def test_otherflags(self): + assert_equal(self.a.flags.carray, True) + assert_equal(self.a.flags['C'], True) + assert_equal(self.a.flags.farray, False) + assert_equal(self.a.flags.behaved, True) + assert_equal(self.a.flags.fnc, False) + assert_equal(self.a.flags.forc, True) + assert_equal(self.a.flags.owndata, True) + assert_equal(self.a.flags.writeable, True) + assert_equal(self.a.flags.aligned, True) + with assert_warns(DeprecationWarning): + assert_equal(self.a.flags.updateifcopy, False) + with assert_warns(DeprecationWarning): + assert_equal(self.a.flags['U'], False) + assert_equal(self.a.flags['UPDATEIFCOPY'], False) + assert_equal(self.a.flags.writebackifcopy, False) + assert_equal(self.a.flags['X'], False) + assert_equal(self.a.flags['WRITEBACKIFCOPY'], False) + + def test_string_align(self): + a = np.zeros(4, dtype=np.dtype('|S4')) + assert_(a.flags.aligned) + # not power of two are accessed byte-wise and thus considered aligned + a = np.zeros(5, dtype=np.dtype('|S4')) + assert_(a.flags.aligned) + + def test_void_align(self): + a = np.zeros(4, dtype=np.dtype([("a", "i4"), ("b", "i4")])) + assert_(a.flags.aligned) + + +class TestHash(object): + # see #3793 + def test_int(self): + for st, ut, s in [(np.int8, np.uint8, 8), + (np.int16, np.uint16, 16), + (np.int32, np.uint32, 32), + (np.int64, np.uint64, 64)]: + for i in range(1, s): + assert_equal(hash(st(-2**i)), hash(-2**i), + err_msg="%r: -2**%d" % (st, i)) + assert_equal(hash(st(2**(i - 1))), hash(2**(i - 1)), + err_msg="%r: 2**%d" % (st, i - 1)) + assert_equal(hash(st(2**i - 1)), hash(2**i - 1), + err_msg="%r: 2**%d - 1" % (st, i)) + + i = max(i - 1, 1) + assert_equal(hash(ut(2**(i - 1))), hash(2**(i - 1)), + err_msg="%r: 2**%d" % (ut, i - 1)) + assert_equal(hash(ut(2**i - 1)), hash(2**i - 1), + err_msg="%r: 2**%d - 1" % (ut, i)) + + +class TestAttributes(object): + def setup(self): + self.one = np.arange(10) + self.two = np.arange(20).reshape(4, 5) + self.three = np.arange(60, dtype=np.float64).reshape(2, 5, 6) + + def test_attributes(self): + assert_equal(self.one.shape, (10,)) + assert_equal(self.two.shape, (4, 5)) + assert_equal(self.three.shape, (2, 5, 6)) + self.three.shape = (10, 3, 2) + assert_equal(self.three.shape, (10, 3, 2)) + self.three.shape = (2, 5, 6) + assert_equal(self.one.strides, (self.one.itemsize,)) + num = self.two.itemsize + assert_equal(self.two.strides, (5*num, num)) + num = self.three.itemsize + assert_equal(self.three.strides, (30*num, 6*num, num)) + assert_equal(self.one.ndim, 1) + assert_equal(self.two.ndim, 2) + assert_equal(self.three.ndim, 3) + num = self.two.itemsize + assert_equal(self.two.size, 20) + assert_equal(self.two.nbytes, 20*num) + assert_equal(self.two.itemsize, self.two.dtype.itemsize) + assert_equal(self.two.base, np.arange(20)) + + def test_dtypeattr(self): + assert_equal(self.one.dtype, np.dtype(np.int_)) + assert_equal(self.three.dtype, np.dtype(np.float_)) + assert_equal(self.one.dtype.char, 'l') + assert_equal(self.three.dtype.char, 'd') + assert_(self.three.dtype.str[0] in '<>') + assert_equal(self.one.dtype.str[1], 'i') + assert_equal(self.three.dtype.str[1], 'f') + + def test_int_subclassing(self): + # Regression test for https://github.com/numpy/numpy/pull/3526 + + numpy_int = np.int_(0) + + if sys.version_info[0] >= 3: + # On Py3k int_ should not inherit from int, because it's not + # fixed-width anymore + assert_equal(isinstance(numpy_int, int), False) + else: + # Otherwise, it should inherit from int... + assert_equal(isinstance(numpy_int, int), True) + + # ... and fast-path checks on C-API level should also work + from numpy.core._multiarray_tests import test_int_subclass + assert_equal(test_int_subclass(numpy_int), True) + + def test_stridesattr(self): + x = self.one + + def make_array(size, offset, strides): + return np.ndarray(size, buffer=x, dtype=int, + offset=offset*x.itemsize, + strides=strides*x.itemsize) + + assert_equal(make_array(4, 4, -1), np.array([4, 3, 2, 1])) + assert_raises(ValueError, make_array, 4, 4, -2) + assert_raises(ValueError, make_array, 4, 2, -1) + assert_raises(ValueError, make_array, 8, 3, 1) + assert_equal(make_array(8, 3, 0), np.array([3]*8)) + # Check behavior reported in gh-2503: + assert_raises(ValueError, make_array, (2, 3), 5, np.array([-2, -3])) + make_array(0, 0, 10) + + def test_set_stridesattr(self): + x = self.one + + def make_array(size, offset, strides): + try: + r = np.ndarray([size], dtype=int, buffer=x, + offset=offset*x.itemsize) + except Exception as e: + raise RuntimeError(e) + r.strides = strides = strides*x.itemsize + return r + + assert_equal(make_array(4, 4, -1), np.array([4, 3, 2, 1])) + assert_equal(make_array(7, 3, 1), np.array([3, 4, 5, 6, 7, 8, 9])) + assert_raises(ValueError, make_array, 4, 4, -2) + assert_raises(ValueError, make_array, 4, 2, -1) + assert_raises(RuntimeError, make_array, 8, 3, 1) + # Check that the true extent of the array is used. + # Test relies on as_strided base not exposing a buffer. + x = np.lib.stride_tricks.as_strided(np.arange(1), (10, 10), (0, 0)) + + def set_strides(arr, strides): + arr.strides = strides + + assert_raises(ValueError, set_strides, x, (10*x.itemsize, x.itemsize)) + + # Test for offset calculations: + x = np.lib.stride_tricks.as_strided(np.arange(10, dtype=np.int8)[-1], + shape=(10,), strides=(-1,)) + assert_raises(ValueError, set_strides, x[::-1], -1) + a = x[::-1] + a.strides = 1 + a[::2].strides = 2 + + def test_fill(self): + for t in "?bhilqpBHILQPfdgFDGO": + x = np.empty((3, 2, 1), t) + y = np.empty((3, 2, 1), t) + x.fill(1) + y[...] = 1 + assert_equal(x, y) + + def test_fill_max_uint64(self): + x = np.empty((3, 2, 1), dtype=np.uint64) + y = np.empty((3, 2, 1), dtype=np.uint64) + value = 2**64 - 1 + y[...] = value + x.fill(value) + assert_array_equal(x, y) + + def test_fill_struct_array(self): + # Filling from a scalar + x = np.array([(0, 0.0), (1, 1.0)], dtype='i4,f8') + x.fill(x[0]) + assert_equal(x['f1'][1], x['f1'][0]) + # Filling from a tuple that can be converted + # to a scalar + x = np.zeros(2, dtype=[('a', 'f8'), ('b', 'i4')]) + x.fill((3.5, -2)) + assert_array_equal(x['a'], [3.5, 3.5]) + assert_array_equal(x['b'], [-2, -2]) + + +class TestArrayConstruction(object): + def test_array(self): + d = np.ones(6) + r = np.array([d, d]) + assert_equal(r, np.ones((2, 6))) + + d = np.ones(6) + tgt = np.ones((2, 6)) + r = np.array([d, d]) + assert_equal(r, tgt) + tgt[1] = 2 + r = np.array([d, d + 1]) + assert_equal(r, tgt) + + d = np.ones(6) + r = np.array([[d, d]]) + assert_equal(r, np.ones((1, 2, 6))) + + d = np.ones(6) + r = np.array([[d, d], [d, d]]) + assert_equal(r, np.ones((2, 2, 6))) + + d = np.ones((6, 6)) + r = np.array([d, d]) + assert_equal(r, np.ones((2, 6, 6))) + + d = np.ones((6, )) + r = np.array([[d, d + 1], d + 2]) + assert_equal(len(r), 2) + assert_equal(r[0], [d, d + 1]) + assert_equal(r[1], d + 2) + + tgt = np.ones((2, 3), dtype=bool) + tgt[0, 2] = False + tgt[1, 0:2] = False + r = np.array([[True, True, False], [False, False, True]]) + assert_equal(r, tgt) + r = np.array([[True, False], [True, False], [False, True]]) + assert_equal(r, tgt.T) + + def test_array_empty(self): + assert_raises(TypeError, np.array) + + def test_array_copy_false(self): + d = np.array([1, 2, 3]) + e = np.array(d, copy=False) + d[1] = 3 + assert_array_equal(e, [1, 3, 3]) + e = np.array(d, copy=False, order='F') + d[1] = 4 + assert_array_equal(e, [1, 4, 3]) + e[2] = 7 + assert_array_equal(d, [1, 4, 7]) + + def test_array_copy_true(self): + d = np.array([[1,2,3], [1, 2, 3]]) + e = np.array(d, copy=True) + d[0, 1] = 3 + e[0, 2] = -7 + assert_array_equal(e, [[1, 2, -7], [1, 2, 3]]) + assert_array_equal(d, [[1, 3, 3], [1, 2, 3]]) + e = np.array(d, copy=True, order='F') + d[0, 1] = 5 + e[0, 2] = 7 + assert_array_equal(e, [[1, 3, 7], [1, 2, 3]]) + assert_array_equal(d, [[1, 5, 3], [1,2,3]]) + + def test_array_cont(self): + d = np.ones(10)[::2] + assert_(np.ascontiguousarray(d).flags.c_contiguous) + assert_(np.ascontiguousarray(d).flags.f_contiguous) + assert_(np.asfortranarray(d).flags.c_contiguous) + assert_(np.asfortranarray(d).flags.f_contiguous) + d = np.ones((10, 10))[::2,::2] + assert_(np.ascontiguousarray(d).flags.c_contiguous) + assert_(np.asfortranarray(d).flags.f_contiguous) + + +class TestAssignment(object): + def test_assignment_broadcasting(self): + a = np.arange(6).reshape(2, 3) + + # Broadcasting the input to the output + a[...] = np.arange(3) + assert_equal(a, [[0, 1, 2], [0, 1, 2]]) + a[...] = np.arange(2).reshape(2, 1) + assert_equal(a, [[0, 0, 0], [1, 1, 1]]) + + # For compatibility with <= 1.5, a limited version of broadcasting + # the output to the input. + # + # This behavior is inconsistent with NumPy broadcasting + # in general, because it only uses one of the two broadcasting + # rules (adding a new "1" dimension to the left of the shape), + # applied to the output instead of an input. In NumPy 2.0, this kind + # of broadcasting assignment will likely be disallowed. + a[...] = np.arange(6)[::-1].reshape(1, 2, 3) + assert_equal(a, [[5, 4, 3], [2, 1, 0]]) + # The other type of broadcasting would require a reduction operation. + + def assign(a, b): + a[...] = b + + assert_raises(ValueError, assign, a, np.arange(12).reshape(2, 2, 3)) + + def test_assignment_errors(self): + # Address issue #2276 + class C: + pass + a = np.zeros(1) + + def assign(v): + a[0] = v + + assert_raises((AttributeError, TypeError), assign, C()) + assert_raises(ValueError, assign, [1]) + + def test_unicode_assignment(self): + # gh-5049 + from numpy.core.numeric import set_string_function + + @contextmanager + def inject_str(s): + """ replace ndarray.__str__ temporarily """ + set_string_function(lambda x: s, repr=False) + try: + yield + finally: + set_string_function(None, repr=False) + + a1d = np.array([u'test']) + a0d = np.array(u'done') + with inject_str(u'bad'): + a1d[0] = a0d # previously this would invoke __str__ + assert_equal(a1d[0], u'done') + + # this would crash for the same reason + np.array([np.array(u'\xe5\xe4\xf6')]) + + def test_stringlike_empty_list(self): + # gh-8902 + u = np.array([u'done']) + b = np.array([b'done']) + + class bad_sequence(object): + def __getitem__(self): pass + def __len__(self): raise RuntimeError + + assert_raises(ValueError, operator.setitem, u, 0, []) + assert_raises(ValueError, operator.setitem, b, 0, []) + + assert_raises(ValueError, operator.setitem, u, 0, bad_sequence()) + assert_raises(ValueError, operator.setitem, b, 0, bad_sequence()) + + def test_longdouble_assignment(self): + # only relevant if longdouble is larger than float + # we're looking for loss of precision + + for dtype in (np.longdouble, np.longcomplex): + # gh-8902 + tinyb = np.nextafter(np.longdouble(0), 1).astype(dtype) + tinya = np.nextafter(np.longdouble(0), -1).astype(dtype) + + # construction + tiny1d = np.array([tinya]) + assert_equal(tiny1d[0], tinya) + + # scalar = scalar + tiny1d[0] = tinyb + assert_equal(tiny1d[0], tinyb) + + # 0d = scalar + tiny1d[0, ...] = tinya + assert_equal(tiny1d[0], tinya) + + # 0d = 0d + tiny1d[0, ...] = tinyb[...] + assert_equal(tiny1d[0], tinyb) + + # scalar = 0d + tiny1d[0] = tinyb[...] + assert_equal(tiny1d[0], tinyb) + + arr = np.array([np.array(tinya)]) + assert_equal(arr[0], tinya) + + def test_cast_to_string(self): + # cast to str should do "str(scalar)", not "str(scalar.item())" + # Example: In python2, str(float) is truncated, so we want to avoid + # str(np.float64(...).item()) as this would incorrectly truncate. + a = np.zeros(1, dtype='S20') + a[:] = np.array(['1.12345678901234567890'], dtype='f8') + assert_equal(a[0], b"1.1234567890123457") + + +class TestDtypedescr(object): + def test_construction(self): + d1 = np.dtype('i4') + assert_equal(d1, np.dtype(np.int32)) + d2 = np.dtype('f8') + assert_equal(d2, np.dtype(np.float64)) + + def test_byteorders(self): + assert_(np.dtype('<i4') != np.dtype('>i4')) + assert_(np.dtype([('a', '<i4')]) != np.dtype([('a', '>i4')])) + + def test_structured_non_void(self): + fields = [('a', '<i2'), ('b', '<i2')] + dt_int = np.dtype(('i4', fields)) + assert_equal(str(dt_int), "(numpy.int32, [('a', '<i2'), ('b', '<i2')])") + + # gh-9821 + arr_int = np.zeros(4, dt_int) + assert_equal(repr(arr_int), + "array([0, 0, 0, 0], dtype=(numpy.int32, [('a', '<i2'), ('b', '<i2')]))") + + +class TestZeroRank(object): + def setup(self): + self.d = np.array(0), np.array('x', object) + + def test_ellipsis_subscript(self): + a, b = self.d + assert_equal(a[...], 0) + assert_equal(b[...], 'x') + assert_(a[...].base is a) # `a[...] is a` in numpy <1.9. + assert_(b[...].base is b) # `b[...] is b` in numpy <1.9. + + def test_empty_subscript(self): + a, b = self.d + assert_equal(a[()], 0) + assert_equal(b[()], 'x') + assert_(type(a[()]) is a.dtype.type) + assert_(type(b[()]) is str) + + def test_invalid_subscript(self): + a, b = self.d + assert_raises(IndexError, lambda x: x[0], a) + assert_raises(IndexError, lambda x: x[0], b) + assert_raises(IndexError, lambda x: x[np.array([], int)], a) + assert_raises(IndexError, lambda x: x[np.array([], int)], b) + + def test_ellipsis_subscript_assignment(self): + a, b = self.d + a[...] = 42 + assert_equal(a, 42) + b[...] = '' + assert_equal(b.item(), '') + + def test_empty_subscript_assignment(self): + a, b = self.d + a[()] = 42 + assert_equal(a, 42) + b[()] = '' + assert_equal(b.item(), '') + + def test_invalid_subscript_assignment(self): + a, b = self.d + + def assign(x, i, v): + x[i] = v + + assert_raises(IndexError, assign, a, 0, 42) + assert_raises(IndexError, assign, b, 0, '') + assert_raises(ValueError, assign, a, (), '') + + def test_newaxis(self): + a, b = self.d + assert_equal(a[np.newaxis].shape, (1,)) + assert_equal(a[..., np.newaxis].shape, (1,)) + assert_equal(a[np.newaxis, ...].shape, (1,)) + assert_equal(a[..., np.newaxis].shape, (1,)) + assert_equal(a[np.newaxis, ..., np.newaxis].shape, (1, 1)) + assert_equal(a[..., np.newaxis, np.newaxis].shape, (1, 1)) + assert_equal(a[np.newaxis, np.newaxis, ...].shape, (1, 1)) + assert_equal(a[(np.newaxis,)*10].shape, (1,)*10) + + def test_invalid_newaxis(self): + a, b = self.d + + def subscript(x, i): + x[i] + + assert_raises(IndexError, subscript, a, (np.newaxis, 0)) + assert_raises(IndexError, subscript, a, (np.newaxis,)*50) + + def test_constructor(self): + x = np.ndarray(()) + x[()] = 5 + assert_equal(x[()], 5) + y = np.ndarray((), buffer=x) + y[()] = 6 + assert_equal(x[()], 6) + + def test_output(self): + x = np.array(2) + assert_raises(ValueError, np.add, x, [1], x) + + def test_real_imag(self): + # contiguity checks are for gh-11245 + x = np.array(1j) + xr = x.real + xi = x.imag + + assert_equal(xr, np.array(0)) + assert_(type(xr) is np.ndarray) + assert_equal(xr.flags.contiguous, True) + assert_equal(xr.flags.f_contiguous, True) + + assert_equal(xi, np.array(1)) + assert_(type(xi) is np.ndarray) + assert_equal(xi.flags.contiguous, True) + assert_equal(xi.flags.f_contiguous, True) + + +class TestScalarIndexing(object): + def setup(self): + self.d = np.array([0, 1])[0] + + def test_ellipsis_subscript(self): + a = self.d + assert_equal(a[...], 0) + assert_equal(a[...].shape, ()) + + def test_empty_subscript(self): + a = self.d + assert_equal(a[()], 0) + assert_equal(a[()].shape, ()) + + def test_invalid_subscript(self): + a = self.d + assert_raises(IndexError, lambda x: x[0], a) + assert_raises(IndexError, lambda x: x[np.array([], int)], a) + + def test_invalid_subscript_assignment(self): + a = self.d + + def assign(x, i, v): + x[i] = v + + assert_raises(TypeError, assign, a, 0, 42) + + def test_newaxis(self): + a = self.d + assert_equal(a[np.newaxis].shape, (1,)) + assert_equal(a[..., np.newaxis].shape, (1,)) + assert_equal(a[np.newaxis, ...].shape, (1,)) + assert_equal(a[..., np.newaxis].shape, (1,)) + assert_equal(a[np.newaxis, ..., np.newaxis].shape, (1, 1)) + assert_equal(a[..., np.newaxis, np.newaxis].shape, (1, 1)) + assert_equal(a[np.newaxis, np.newaxis, ...].shape, (1, 1)) + assert_equal(a[(np.newaxis,)*10].shape, (1,)*10) + + def test_invalid_newaxis(self): + a = self.d + + def subscript(x, i): + x[i] + + assert_raises(IndexError, subscript, a, (np.newaxis, 0)) + assert_raises(IndexError, subscript, a, (np.newaxis,)*50) + + def test_overlapping_assignment(self): + # With positive strides + a = np.arange(4) + a[:-1] = a[1:] + assert_equal(a, [1, 2, 3, 3]) + + a = np.arange(4) + a[1:] = a[:-1] + assert_equal(a, [0, 0, 1, 2]) + + # With positive and negative strides + a = np.arange(4) + a[:] = a[::-1] + assert_equal(a, [3, 2, 1, 0]) + + a = np.arange(6).reshape(2, 3) + a[::-1,:] = a[:, ::-1] + assert_equal(a, [[5, 4, 3], [2, 1, 0]]) + + a = np.arange(6).reshape(2, 3) + a[::-1, ::-1] = a[:, ::-1] + assert_equal(a, [[3, 4, 5], [0, 1, 2]]) + + # With just one element overlapping + a = np.arange(5) + a[:3] = a[2:] + assert_equal(a, [2, 3, 4, 3, 4]) + + a = np.arange(5) + a[2:] = a[:3] + assert_equal(a, [0, 1, 0, 1, 2]) + + a = np.arange(5) + a[2::-1] = a[2:] + assert_equal(a, [4, 3, 2, 3, 4]) + + a = np.arange(5) + a[2:] = a[2::-1] + assert_equal(a, [0, 1, 2, 1, 0]) + + a = np.arange(5) + a[2::-1] = a[:1:-1] + assert_equal(a, [2, 3, 4, 3, 4]) + + a = np.arange(5) + a[:1:-1] = a[2::-1] + assert_equal(a, [0, 1, 0, 1, 2]) + + +class TestCreation(object): + """ + Test the np.array constructor + """ + def test_from_attribute(self): + class x(object): + def __array__(self, dtype=None): + pass + + assert_raises(ValueError, np.array, x()) + + def test_from_string(self): + types = np.typecodes['AllInteger'] + np.typecodes['Float'] + nstr = ['123', '123'] + result = np.array([123, 123], dtype=int) + for type in types: + msg = 'String conversion for %s' % type + assert_equal(np.array(nstr, dtype=type), result, err_msg=msg) + + def test_void(self): + arr = np.array([], dtype='V') + assert_equal(arr.dtype.kind, 'V') + + def test_too_big_error(self): + # 45341 is the smallest integer greater than sqrt(2**31 - 1). + # 3037000500 is the smallest integer greater than sqrt(2**63 - 1). + # We want to make sure that the square byte array with those dimensions + # is too big on 32 or 64 bit systems respectively. + if np.iinfo('intp').max == 2**31 - 1: + shape = (46341, 46341) + elif np.iinfo('intp').max == 2**63 - 1: + shape = (3037000500, 3037000500) + else: + return + assert_raises(ValueError, np.empty, shape, dtype=np.int8) + assert_raises(ValueError, np.zeros, shape, dtype=np.int8) + assert_raises(ValueError, np.ones, shape, dtype=np.int8) + + def test_zeros(self): + types = np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + for dt in types: + d = np.zeros((13,), dtype=dt) + assert_equal(np.count_nonzero(d), 0) + # true for ieee floats + assert_equal(d.sum(), 0) + assert_(not d.any()) + + d = np.zeros(2, dtype='(2,4)i4') + assert_equal(np.count_nonzero(d), 0) + assert_equal(d.sum(), 0) + assert_(not d.any()) + + d = np.zeros(2, dtype='4i4') + assert_equal(np.count_nonzero(d), 0) + assert_equal(d.sum(), 0) + assert_(not d.any()) + + d = np.zeros(2, dtype='(2,4)i4, (2,4)i4') + assert_equal(np.count_nonzero(d), 0) + + @pytest.mark.slow + def test_zeros_big(self): + # test big array as they might be allocated different by the system + types = np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + for dt in types: + d = np.zeros((30 * 1024**2,), dtype=dt) + assert_(not d.any()) + # This test can fail on 32-bit systems due to insufficient + # contiguous memory. Deallocating the previous array increases the + # chance of success. + del(d) + + def test_zeros_obj(self): + # test initialization from PyLong(0) + d = np.zeros((13,), dtype=object) + assert_array_equal(d, [0] * 13) + assert_equal(np.count_nonzero(d), 0) + + def test_zeros_obj_obj(self): + d = np.zeros(10, dtype=[('k', object, 2)]) + assert_array_equal(d['k'], 0) + + def test_zeros_like_like_zeros(self): + # test zeros_like returns the same as zeros + for c in np.typecodes['All']: + if c == 'V': + continue + d = np.zeros((3,3), dtype=c) + assert_array_equal(np.zeros_like(d), d) + assert_equal(np.zeros_like(d).dtype, d.dtype) + # explicitly check some special cases + d = np.zeros((3,3), dtype='S5') + assert_array_equal(np.zeros_like(d), d) + assert_equal(np.zeros_like(d).dtype, d.dtype) + d = np.zeros((3,3), dtype='U5') + assert_array_equal(np.zeros_like(d), d) + assert_equal(np.zeros_like(d).dtype, d.dtype) + + d = np.zeros((3,3), dtype='<i4') + assert_array_equal(np.zeros_like(d), d) + assert_equal(np.zeros_like(d).dtype, d.dtype) + d = np.zeros((3,3), dtype='>i4') + assert_array_equal(np.zeros_like(d), d) + assert_equal(np.zeros_like(d).dtype, d.dtype) + + d = np.zeros((3,3), dtype='<M8[s]') + assert_array_equal(np.zeros_like(d), d) + assert_equal(np.zeros_like(d).dtype, d.dtype) + d = np.zeros((3,3), dtype='>M8[s]') + assert_array_equal(np.zeros_like(d), d) + assert_equal(np.zeros_like(d).dtype, d.dtype) + + d = np.zeros((3,3), dtype='f4,f4') + assert_array_equal(np.zeros_like(d), d) + assert_equal(np.zeros_like(d).dtype, d.dtype) + + def test_empty_unicode(self): + # don't throw decode errors on garbage memory + for i in range(5, 100, 5): + d = np.empty(i, dtype='U') + str(d) + + def test_sequence_non_homogenous(self): + assert_equal(np.array([4, 2**80]).dtype, object) + assert_equal(np.array([4, 2**80, 4]).dtype, object) + assert_equal(np.array([2**80, 4]).dtype, object) + assert_equal(np.array([2**80] * 3).dtype, object) + assert_equal(np.array([[1, 1],[1j, 1j]]).dtype, complex) + assert_equal(np.array([[1j, 1j],[1, 1]]).dtype, complex) + assert_equal(np.array([[1, 1, 1],[1, 1j, 1.], [1, 1, 1]]).dtype, complex) + + @pytest.mark.skipif(sys.version_info[0] >= 3, reason="Not Python 2") + def test_sequence_long(self): + assert_equal(np.array([long(4), long(4)]).dtype, np.long) + assert_equal(np.array([long(4), 2**80]).dtype, object) + assert_equal(np.array([long(4), 2**80, long(4)]).dtype, object) + assert_equal(np.array([2**80, long(4)]).dtype, object) + + def test_non_sequence_sequence(self): + """Should not segfault. + + Class Fail breaks the sequence protocol for new style classes, i.e., + those derived from object. Class Map is a mapping type indicated by + raising a ValueError. At some point we may raise a warning instead + of an error in the Fail case. + + """ + class Fail(object): + def __len__(self): + return 1 + + def __getitem__(self, index): + raise ValueError() + + class Map(object): + def __len__(self): + return 1 + + def __getitem__(self, index): + raise KeyError() + + a = np.array([Map()]) + assert_(a.shape == (1,)) + assert_(a.dtype == np.dtype(object)) + assert_raises(ValueError, np.array, [Fail()]) + + def test_no_len_object_type(self): + # gh-5100, want object array from iterable object without len() + class Point2: + def __init__(self): + pass + + def __getitem__(self, ind): + if ind in [0, 1]: + return ind + else: + raise IndexError() + d = np.array([Point2(), Point2(), Point2()]) + assert_equal(d.dtype, np.dtype(object)) + + def test_false_len_sequence(self): + # gh-7264, segfault for this example + class C: + def __getitem__(self, i): + raise IndexError + def __len__(self): + return 42 + + assert_raises(ValueError, np.array, C()) # segfault? + + def test_failed_len_sequence(self): + # gh-7393 + class A(object): + def __init__(self, data): + self._data = data + def __getitem__(self, item): + return type(self)(self._data[item]) + def __len__(self): + return len(self._data) + + # len(d) should give 3, but len(d[0]) will fail + d = A([1,2,3]) + assert_equal(len(np.array(d)), 3) + + def test_array_too_big(self): + # Test that array creation succeeds for arrays addressable by intp + # on the byte level and fails for too large arrays. + buf = np.zeros(100) + + max_bytes = np.iinfo(np.intp).max + for dtype in ["intp", "S20", "b"]: + dtype = np.dtype(dtype) + itemsize = dtype.itemsize + + np.ndarray(buffer=buf, strides=(0,), + shape=(max_bytes//itemsize,), dtype=dtype) + assert_raises(ValueError, np.ndarray, buffer=buf, strides=(0,), + shape=(max_bytes//itemsize + 1,), dtype=dtype) + + def test_jagged_ndim_object(self): + # Lists of mismatching depths are treated as object arrays + a = np.array([[1], 2, 3]) + assert_equal(a.shape, (3,)) + assert_equal(a.dtype, object) + + a = np.array([1, [2], 3]) + assert_equal(a.shape, (3,)) + assert_equal(a.dtype, object) + + a = np.array([1, 2, [3]]) + assert_equal(a.shape, (3,)) + assert_equal(a.dtype, object) + + def test_jagged_shape_object(self): + # The jagged dimension of a list is turned into an object array + a = np.array([[1, 1], [2], [3]]) + assert_equal(a.shape, (3,)) + assert_equal(a.dtype, object) + + a = np.array([[1], [2, 2], [3]]) + assert_equal(a.shape, (3,)) + assert_equal(a.dtype, object) + + a = np.array([[1], [2], [3, 3]]) + assert_equal(a.shape, (3,)) + assert_equal(a.dtype, object) + + +class TestStructured(object): + def test_subarray_field_access(self): + a = np.zeros((3, 5), dtype=[('a', ('i4', (2, 2)))]) + a['a'] = np.arange(60).reshape(3, 5, 2, 2) + + # Since the subarray is always in C-order, a transpose + # does not swap the subarray: + assert_array_equal(a.T['a'], a['a'].transpose(1, 0, 2, 3)) + + # In Fortran order, the subarray gets appended + # like in all other cases, not prepended as a special case + b = a.copy(order='F') + assert_equal(a['a'].shape, b['a'].shape) + assert_equal(a.T['a'].shape, a.T.copy()['a'].shape) + + def test_subarray_comparison(self): + # Check that comparisons between record arrays with + # multi-dimensional field types work properly + a = np.rec.fromrecords( + [([1, 2, 3], 'a', [[1, 2], [3, 4]]), ([3, 3, 3], 'b', [[0, 0], [0, 0]])], + dtype=[('a', ('f4', 3)), ('b', object), ('c', ('i4', (2, 2)))]) + b = a.copy() + assert_equal(a == b, [True, True]) + assert_equal(a != b, [False, False]) + b[1].b = 'c' + assert_equal(a == b, [True, False]) + assert_equal(a != b, [False, True]) + for i in range(3): + b[0].a = a[0].a + b[0].a[i] = 5 + assert_equal(a == b, [False, False]) + assert_equal(a != b, [True, True]) + for i in range(2): + for j in range(2): + b = a.copy() + b[0].c[i, j] = 10 + assert_equal(a == b, [False, True]) + assert_equal(a != b, [True, False]) + + # Check that broadcasting with a subarray works + a = np.array([[(0,)], [(1,)]], dtype=[('a', 'f8')]) + b = np.array([(0,), (0,), (1,)], dtype=[('a', 'f8')]) + assert_equal(a == b, [[True, True, False], [False, False, True]]) + assert_equal(b == a, [[True, True, False], [False, False, True]]) + a = np.array([[(0,)], [(1,)]], dtype=[('a', 'f8', (1,))]) + b = np.array([(0,), (0,), (1,)], dtype=[('a', 'f8', (1,))]) + assert_equal(a == b, [[True, True, False], [False, False, True]]) + assert_equal(b == a, [[True, True, False], [False, False, True]]) + a = np.array([[([0, 0],)], [([1, 1],)]], dtype=[('a', 'f8', (2,))]) + b = np.array([([0, 0],), ([0, 1],), ([1, 1],)], dtype=[('a', 'f8', (2,))]) + assert_equal(a == b, [[True, False, False], [False, False, True]]) + assert_equal(b == a, [[True, False, False], [False, False, True]]) + + # Check that broadcasting Fortran-style arrays with a subarray work + a = np.array([[([0, 0],)], [([1, 1],)]], dtype=[('a', 'f8', (2,))], order='F') + b = np.array([([0, 0],), ([0, 1],), ([1, 1],)], dtype=[('a', 'f8', (2,))]) + assert_equal(a == b, [[True, False, False], [False, False, True]]) + assert_equal(b == a, [[True, False, False], [False, False, True]]) + + # Check that incompatible sub-array shapes don't result to broadcasting + x = np.zeros((1,), dtype=[('a', ('f4', (1, 2))), ('b', 'i1')]) + y = np.zeros((1,), dtype=[('a', ('f4', (2,))), ('b', 'i1')]) + # This comparison invokes deprecated behaviour, and will probably + # start raising an error eventually. What we really care about in this + # test is just that it doesn't return True. + with suppress_warnings() as sup: + sup.filter(FutureWarning, "elementwise == comparison failed") + assert_equal(x == y, False) + + x = np.zeros((1,), dtype=[('a', ('f4', (2, 1))), ('b', 'i1')]) + y = np.zeros((1,), dtype=[('a', ('f4', (2,))), ('b', 'i1')]) + # This comparison invokes deprecated behaviour, and will probably + # start raising an error eventually. What we really care about in this + # test is just that it doesn't return True. + with suppress_warnings() as sup: + sup.filter(FutureWarning, "elementwise == comparison failed") + assert_equal(x == y, False) + + # Check that structured arrays that are different only in + # byte-order work + a = np.array([(5, 42), (10, 1)], dtype=[('a', '>i8'), ('b', '<f8')]) + b = np.array([(5, 43), (10, 1)], dtype=[('a', '<i8'), ('b', '>f8')]) + assert_equal(a == b, [False, True]) + + def test_casting(self): + # Check that casting a structured array to change its byte order + # works + a = np.array([(1,)], dtype=[('a', '<i4')]) + assert_(np.can_cast(a.dtype, [('a', '>i4')], casting='unsafe')) + b = a.astype([('a', '>i4')]) + assert_equal(b, a.byteswap().newbyteorder()) + assert_equal(a['a'][0], b['a'][0]) + + # Check that equality comparison works on structured arrays if + # they are 'equiv'-castable + a = np.array([(5, 42), (10, 1)], dtype=[('a', '>i4'), ('b', '<f8')]) + b = np.array([(5, 42), (10, 1)], dtype=[('a', '<i4'), ('b', '>f8')]) + assert_(np.can_cast(a.dtype, b.dtype, casting='equiv')) + assert_equal(a == b, [True, True]) + + # Check that 'equiv' casting can change byte order + assert_(np.can_cast(a.dtype, b.dtype, casting='equiv')) + c = a.astype(b.dtype, casting='equiv') + assert_equal(a == c, [True, True]) + + # Check that 'safe' casting can change byte order and up-cast + # fields + t = [('a', '<i8'), ('b', '>f8')] + assert_(np.can_cast(a.dtype, t, casting='safe')) + c = a.astype(t, casting='safe') + assert_equal((c == np.array([(5, 42), (10, 1)], dtype=t)), + [True, True]) + + # Check that 'same_kind' casting can change byte order and + # change field widths within a "kind" + t = [('a', '<i4'), ('b', '>f4')] + assert_(np.can_cast(a.dtype, t, casting='same_kind')) + c = a.astype(t, casting='same_kind') + assert_equal((c == np.array([(5, 42), (10, 1)], dtype=t)), + [True, True]) + + # Check that casting fails if the casting rule should fail on + # any of the fields + t = [('a', '>i8'), ('b', '<f4')] + assert_(not np.can_cast(a.dtype, t, casting='safe')) + assert_raises(TypeError, a.astype, t, casting='safe') + t = [('a', '>i2'), ('b', '<f8')] + assert_(not np.can_cast(a.dtype, t, casting='equiv')) + assert_raises(TypeError, a.astype, t, casting='equiv') + t = [('a', '>i8'), ('b', '<i2')] + assert_(not np.can_cast(a.dtype, t, casting='same_kind')) + assert_raises(TypeError, a.astype, t, casting='same_kind') + assert_(not np.can_cast(a.dtype, b.dtype, casting='no')) + assert_raises(TypeError, a.astype, b.dtype, casting='no') + + # Check that non-'unsafe' casting can't change the set of field names + for casting in ['no', 'safe', 'equiv', 'same_kind']: + t = [('a', '>i4')] + assert_(not np.can_cast(a.dtype, t, casting=casting)) + t = [('a', '>i4'), ('b', '<f8'), ('c', 'i4')] + assert_(not np.can_cast(a.dtype, t, casting=casting)) + + def test_objview(self): + # https://github.com/numpy/numpy/issues/3286 + a = np.array([], dtype=[('a', 'f'), ('b', 'f'), ('c', 'O')]) + a[['a', 'b']] # TypeError? + + # https://github.com/numpy/numpy/issues/3253 + dat2 = np.zeros(3, [('A', 'i'), ('B', '|O')]) + dat2[['B', 'A']] # TypeError? + + def test_setfield(self): + # https://github.com/numpy/numpy/issues/3126 + struct_dt = np.dtype([('elem', 'i4', 5),]) + dt = np.dtype([('field', 'i4', 10),('struct', struct_dt)]) + x = np.zeros(1, dt) + x[0]['field'] = np.ones(10, dtype='i4') + x[0]['struct'] = np.ones(1, dtype=struct_dt) + assert_equal(x[0]['field'], np.ones(10, dtype='i4')) + + def test_setfield_object(self): + # make sure object field assignment with ndarray value + # on void scalar mimics setitem behavior + b = np.zeros(1, dtype=[('x', 'O')]) + # next line should work identically to b['x'][0] = np.arange(3) + b[0]['x'] = np.arange(3) + assert_equal(b[0]['x'], np.arange(3)) + + # check that broadcasting check still works + c = np.zeros(1, dtype=[('x', 'O', 5)]) + + def testassign(): + c[0]['x'] = np.arange(3) + + assert_raises(ValueError, testassign) + + def test_zero_width_string(self): + # Test for PR #6430 / issues #473, #4955, #2585 + + dt = np.dtype([('I', int), ('S', 'S0')]) + + x = np.zeros(4, dtype=dt) + + assert_equal(x['S'], [b'', b'', b'', b'']) + assert_equal(x['S'].itemsize, 0) + + x['S'] = ['a', 'b', 'c', 'd'] + assert_equal(x['S'], [b'', b'', b'', b'']) + assert_equal(x['I'], [0, 0, 0, 0]) + + # Variation on test case from #4955 + x['S'][x['I'] == 0] = 'hello' + assert_equal(x['S'], [b'', b'', b'', b'']) + assert_equal(x['I'], [0, 0, 0, 0]) + + # Variation on test case from #2585 + x['S'] = 'A' + assert_equal(x['S'], [b'', b'', b'', b'']) + assert_equal(x['I'], [0, 0, 0, 0]) + + # Allow zero-width dtypes in ndarray constructor + y = np.ndarray(4, dtype=x['S'].dtype) + assert_equal(y.itemsize, 0) + assert_equal(x['S'], y) + + # More tests for indexing an array with zero-width fields + assert_equal(np.zeros(4, dtype=[('a', 'S0,S0'), + ('b', 'u1')])['a'].itemsize, 0) + assert_equal(np.empty(3, dtype='S0,S0').itemsize, 0) + assert_equal(np.zeros(4, dtype='S0,u1')['f0'].itemsize, 0) + + xx = x['S'].reshape((2, 2)) + assert_equal(xx.itemsize, 0) + assert_equal(xx, [[b'', b''], [b'', b'']]) + # check for no uninitialized memory due to viewing S0 array + assert_equal(xx[:].dtype, xx.dtype) + assert_array_equal(eval(repr(xx), dict(array=np.array)), xx) + + b = io.BytesIO() + np.save(b, xx) + + b.seek(0) + yy = np.load(b) + assert_equal(yy.itemsize, 0) + assert_equal(xx, yy) + + with temppath(suffix='.npy') as tmp: + np.save(tmp, xx) + yy = np.load(tmp) + assert_equal(yy.itemsize, 0) + assert_equal(xx, yy) + + def test_base_attr(self): + a = np.zeros(3, dtype='i4,f4') + b = a[0] + assert_(b.base is a) + + def test_assignment(self): + def testassign(arr, v): + c = arr.copy() + c[0] = v # assign using setitem + c[1:] = v # assign using "dtype_transfer" code paths + return c + + dt = np.dtype([('foo', 'i8'), ('bar', 'i8')]) + arr = np.ones(2, dt) + v1 = np.array([(2,3)], dtype=[('foo', 'i8'), ('bar', 'i8')]) + v2 = np.array([(2,3)], dtype=[('bar', 'i8'), ('foo', 'i8')]) + v3 = np.array([(2,3)], dtype=[('bar', 'i8'), ('baz', 'i8')]) + v4 = np.array([(2,)], dtype=[('bar', 'i8')]) + v5 = np.array([(2,3)], dtype=[('foo', 'f8'), ('bar', 'f8')]) + w = arr.view({'names': ['bar'], 'formats': ['i8'], 'offsets': [8]}) + + ans = np.array([(2,3),(2,3)], dtype=dt) + assert_equal(testassign(arr, v1), ans) + assert_equal(testassign(arr, v2), ans) + assert_equal(testassign(arr, v3), ans) + assert_raises(ValueError, lambda: testassign(arr, v4)) + assert_equal(testassign(arr, v5), ans) + w[:] = 4 + assert_equal(arr, np.array([(1,4),(1,4)], dtype=dt)) + + # test field-reordering, assignment by position, and self-assignment + a = np.array([(1,2,3)], + dtype=[('foo', 'i8'), ('bar', 'i8'), ('baz', 'f4')]) + a[['foo', 'bar']] = a[['bar', 'foo']] + assert_equal(a[0].item(), (2,1,3)) + + # test that this works even for 'simple_unaligned' structs + # (ie, that PyArray_EquivTypes cares about field order too) + a = np.array([(1,2)], dtype=[('a', 'i4'), ('b', 'i4')]) + a[['a', 'b']] = a[['b', 'a']] + assert_equal(a[0].item(), (2,1)) + + def test_structuredscalar_indexing(self): + # test gh-7262 + x = np.empty(shape=1, dtype="(2)3S,(2)3U") + assert_equal(x[["f0","f1"]][0], x[0][["f0","f1"]]) + assert_equal(x[0], x[0][()]) + + def test_multiindex_titles(self): + a = np.zeros(4, dtype=[(('a', 'b'), 'i'), ('c', 'i'), ('d', 'i')]) + assert_raises(KeyError, lambda : a[['a','c']]) + assert_raises(KeyError, lambda : a[['a','a']]) + assert_raises(ValueError, lambda : a[['b','b']]) # field exists, but repeated + a[['b','c']] # no exception + + +class TestBool(object): + def test_test_interning(self): + a0 = np.bool_(0) + b0 = np.bool_(False) + assert_(a0 is b0) + a1 = np.bool_(1) + b1 = np.bool_(True) + assert_(a1 is b1) + assert_(np.array([True])[0] is a1) + assert_(np.array(True)[()] is a1) + + def test_sum(self): + d = np.ones(101, dtype=bool) + assert_equal(d.sum(), d.size) + assert_equal(d[::2].sum(), d[::2].size) + assert_equal(d[::-2].sum(), d[::-2].size) + + d = np.frombuffer(b'\xff\xff' * 100, dtype=bool) + assert_equal(d.sum(), d.size) + assert_equal(d[::2].sum(), d[::2].size) + assert_equal(d[::-2].sum(), d[::-2].size) + + def check_count_nonzero(self, power, length): + powers = [2 ** i for i in range(length)] + for i in range(2**power): + l = [(i & x) != 0 for x in powers] + a = np.array(l, dtype=bool) + c = builtins.sum(l) + assert_equal(np.count_nonzero(a), c) + av = a.view(np.uint8) + av *= 3 + assert_equal(np.count_nonzero(a), c) + av *= 4 + assert_equal(np.count_nonzero(a), c) + av[av != 0] = 0xFF + assert_equal(np.count_nonzero(a), c) + + def test_count_nonzero(self): + # check all 12 bit combinations in a length 17 array + # covers most cases of the 16 byte unrolled code + self.check_count_nonzero(12, 17) + + @pytest.mark.slow + def test_count_nonzero_all(self): + # check all combinations in a length 17 array + # covers all cases of the 16 byte unrolled code + self.check_count_nonzero(17, 17) + + def test_count_nonzero_unaligned(self): + # prevent mistakes as e.g. gh-4060 + for o in range(7): + a = np.zeros((18,), dtype=bool)[o+1:] + a[:o] = True + assert_equal(np.count_nonzero(a), builtins.sum(a.tolist())) + a = np.ones((18,), dtype=bool)[o+1:] + a[:o] = False + assert_equal(np.count_nonzero(a), builtins.sum(a.tolist())) + + def _test_cast_from_flexible(self, dtype): + # empty string -> false + for n in range(3): + v = np.array(b'', (dtype, n)) + assert_equal(bool(v), False) + assert_equal(bool(v[()]), False) + assert_equal(v.astype(bool), False) + assert_(isinstance(v.astype(bool), np.ndarray)) + assert_(v[()].astype(bool) is np.False_) + + # anything else -> true + for n in range(1, 4): + for val in [b'a', b'0', b' ']: + v = np.array(val, (dtype, n)) + assert_equal(bool(v), True) + assert_equal(bool(v[()]), True) + assert_equal(v.astype(bool), True) + assert_(isinstance(v.astype(bool), np.ndarray)) + assert_(v[()].astype(bool) is np.True_) + + def test_cast_from_void(self): + self._test_cast_from_flexible(np.void) + + @pytest.mark.xfail(reason="See gh-9847") + def test_cast_from_unicode(self): + self._test_cast_from_flexible(np.unicode_) + + @pytest.mark.xfail(reason="See gh-9847") + def test_cast_from_bytes(self): + self._test_cast_from_flexible(np.bytes_) + + +class TestZeroSizeFlexible(object): + @staticmethod + def _zeros(shape, dtype=str): + dtype = np.dtype(dtype) + if dtype == np.void: + return np.zeros(shape, dtype=(dtype, 0)) + + # not constructable directly + dtype = np.dtype([('x', dtype, 0)]) + return np.zeros(shape, dtype=dtype)['x'] + + def test_create(self): + zs = self._zeros(10, bytes) + assert_equal(zs.itemsize, 0) + zs = self._zeros(10, np.void) + assert_equal(zs.itemsize, 0) + zs = self._zeros(10, unicode) + assert_equal(zs.itemsize, 0) + + def _test_sort_partition(self, name, kinds, **kwargs): + # Previously, these would all hang + for dt in [bytes, np.void, unicode]: + zs = self._zeros(10, dt) + sort_method = getattr(zs, name) + sort_func = getattr(np, name) + for kind in kinds: + sort_method(kind=kind, **kwargs) + sort_func(zs, kind=kind, **kwargs) + + def test_sort(self): + self._test_sort_partition('sort', kinds='qhm') + + def test_argsort(self): + self._test_sort_partition('argsort', kinds='qhm') + + def test_partition(self): + self._test_sort_partition('partition', kinds=['introselect'], kth=2) + + def test_argpartition(self): + self._test_sort_partition('argpartition', kinds=['introselect'], kth=2) + + def test_resize(self): + # previously an error + for dt in [bytes, np.void, unicode]: + zs = self._zeros(10, dt) + zs.resize(25) + zs.resize((10, 10)) + + def test_view(self): + for dt in [bytes, np.void, unicode]: + zs = self._zeros(10, dt) + + # viewing as itself should be allowed + assert_equal(zs.view(dt).dtype, np.dtype(dt)) + + # viewing as any non-empty type gives an empty result + assert_equal(zs.view((dt, 1)).shape, (0,)) + + def test_pickle(self): + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + for dt in [bytes, np.void, unicode]: + zs = self._zeros(10, dt) + p = pickle.dumps(zs, protocol=proto) + zs2 = pickle.loads(p) + + assert_equal(zs.dtype, zs2.dtype) + + @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5, + reason="requires pickle protocol 5") + def test_pickle_with_buffercallback(self): + array = np.arange(10) + buffers = [] + bytes_string = pickle.dumps(array, buffer_callback=buffers.append, + protocol=5) + array_from_buffer = pickle.loads(bytes_string, buffers=buffers) + # when using pickle protocol 5 with buffer callbacks, + # array_from_buffer is reconstructed from a buffer holding a view + # to the initial array's data, so modifying an element in array + # should modify it in array_from_buffer too. + array[0] = -1 + assert array_from_buffer[0] == -1, array_from_buffer[0] + + +class TestMethods(object): + def test_compress(self): + tgt = [[5, 6, 7, 8, 9]] + arr = np.arange(10).reshape(2, 5) + out = arr.compress([0, 1], axis=0) + assert_equal(out, tgt) + + tgt = [[1, 3], [6, 8]] + out = arr.compress([0, 1, 0, 1, 0], axis=1) + assert_equal(out, tgt) + + tgt = [[1], [6]] + arr = np.arange(10).reshape(2, 5) + out = arr.compress([0, 1], axis=1) + assert_equal(out, tgt) + + arr = np.arange(10).reshape(2, 5) + out = arr.compress([0, 1]) + assert_equal(out, 1) + + def test_choose(self): + x = 2*np.ones((3,), dtype=int) + y = 3*np.ones((3,), dtype=int) + x2 = 2*np.ones((2, 3), dtype=int) + y2 = 3*np.ones((2, 3), dtype=int) + ind = np.array([0, 0, 1]) + + A = ind.choose((x, y)) + assert_equal(A, [2, 2, 3]) + + A = ind.choose((x2, y2)) + assert_equal(A, [[2, 2, 3], [2, 2, 3]]) + + A = ind.choose((x, y2)) + assert_equal(A, [[2, 2, 3], [2, 2, 3]]) + + oned = np.ones(1) + # gh-12031, caused SEGFAULT + assert_raises(TypeError, oned.choose,np.void(0), [oned]) + + def test_prod(self): + ba = [1, 2, 10, 11, 6, 5, 4] + ba2 = [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]] + + for ctype in [np.int16, np.uint16, np.int32, np.uint32, + np.float32, np.float64, np.complex64, np.complex128]: + a = np.array(ba, ctype) + a2 = np.array(ba2, ctype) + if ctype in ['1', 'b']: + assert_raises(ArithmeticError, a.prod) + assert_raises(ArithmeticError, a2.prod, axis=1) + else: + assert_equal(a.prod(axis=0), 26400) + assert_array_equal(a2.prod(axis=0), + np.array([50, 36, 84, 180], ctype)) + assert_array_equal(a2.prod(axis=-1), + np.array([24, 1890, 600], ctype)) + + def test_repeat(self): + m = np.array([1, 2, 3, 4, 5, 6]) + m_rect = m.reshape((2, 3)) + + A = m.repeat([1, 3, 2, 1, 1, 2]) + assert_equal(A, [1, 2, 2, 2, 3, + 3, 4, 5, 6, 6]) + + A = m.repeat(2) + assert_equal(A, [1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6]) + + A = m_rect.repeat([2, 1], axis=0) + assert_equal(A, [[1, 2, 3], + [1, 2, 3], + [4, 5, 6]]) + + A = m_rect.repeat([1, 3, 2], axis=1) + assert_equal(A, [[1, 2, 2, 2, 3, 3], + [4, 5, 5, 5, 6, 6]]) + + A = m_rect.repeat(2, axis=0) + assert_equal(A, [[1, 2, 3], + [1, 2, 3], + [4, 5, 6], + [4, 5, 6]]) + + A = m_rect.repeat(2, axis=1) + assert_equal(A, [[1, 1, 2, 2, 3, 3], + [4, 4, 5, 5, 6, 6]]) + + def test_reshape(self): + arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) + + tgt = [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]] + assert_equal(arr.reshape(2, 6), tgt) + + tgt = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]] + assert_equal(arr.reshape(3, 4), tgt) + + tgt = [[1, 10, 8, 6], [4, 2, 11, 9], [7, 5, 3, 12]] + assert_equal(arr.reshape((3, 4), order='F'), tgt) + + tgt = [[1, 4, 7, 10], [2, 5, 8, 11], [3, 6, 9, 12]] + assert_equal(arr.T.reshape((3, 4), order='C'), tgt) + + def test_round(self): + def check_round(arr, expected, *round_args): + assert_equal(arr.round(*round_args), expected) + # With output array + out = np.zeros_like(arr) + res = arr.round(*round_args, out=out) + assert_equal(out, expected) + assert_equal(out, res) + + check_round(np.array([1.2, 1.5]), [1, 2]) + check_round(np.array(1.5), 2) + check_round(np.array([12.2, 15.5]), [10, 20], -1) + check_round(np.array([12.15, 15.51]), [12.2, 15.5], 1) + # Complex rounding + check_round(np.array([4.5 + 1.5j]), [4 + 2j]) + check_round(np.array([12.5 + 15.5j]), [10 + 20j], -1) + + def test_squeeze(self): + a = np.array([[[1], [2], [3]]]) + assert_equal(a.squeeze(), [1, 2, 3]) + assert_equal(a.squeeze(axis=(0,)), [[1], [2], [3]]) + assert_raises(ValueError, a.squeeze, axis=(1,)) + assert_equal(a.squeeze(axis=(2,)), [[1, 2, 3]]) + + def test_transpose(self): + a = np.array([[1, 2], [3, 4]]) + assert_equal(a.transpose(), [[1, 3], [2, 4]]) + assert_raises(ValueError, lambda: a.transpose(0)) + assert_raises(ValueError, lambda: a.transpose(0, 0)) + assert_raises(ValueError, lambda: a.transpose(0, 1, 2)) + + def test_sort(self): + # test ordering for floats and complex containing nans. It is only + # necessary to check the less-than comparison, so sorts that + # only follow the insertion sort path are sufficient. We only + # test doubles and complex doubles as the logic is the same. + + # check doubles + msg = "Test real sort order with nans" + a = np.array([np.nan, 1, 0]) + b = np.sort(a) + assert_equal(b, a[::-1], msg) + # check complex + msg = "Test complex sort order with nans" + a = np.zeros(9, dtype=np.complex128) + a.real += [np.nan, np.nan, np.nan, 1, 0, 1, 1, 0, 0] + a.imag += [np.nan, 1, 0, np.nan, np.nan, 1, 0, 1, 0] + b = np.sort(a) + assert_equal(b, a[::-1], msg) + + # all c scalar sorts use the same code with different types + # so it suffices to run a quick check with one type. The number + # of sorted items must be greater than ~50 to check the actual + # algorithm because quick and merge sort fall over to insertion + # sort for small arrays. + a = np.arange(101) + b = a[::-1].copy() + for kind in ['q', 'm', 'h']: + msg = "scalar sort, kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + # test complex sorts. These use the same code as the scalars + # but the compare function differs. + ai = a*1j + 1 + bi = b*1j + 1 + for kind in ['q', 'm', 'h']: + msg = "complex sort, real part == 1, kind=%s" % kind + c = ai.copy() + c.sort(kind=kind) + assert_equal(c, ai, msg) + c = bi.copy() + c.sort(kind=kind) + assert_equal(c, ai, msg) + ai = a + 1j + bi = b + 1j + for kind in ['q', 'm', 'h']: + msg = "complex sort, imag part == 1, kind=%s" % kind + c = ai.copy() + c.sort(kind=kind) + assert_equal(c, ai, msg) + c = bi.copy() + c.sort(kind=kind) + assert_equal(c, ai, msg) + + # test sorting of complex arrays requiring byte-swapping, gh-5441 + for endianness in '<>': + for dt in np.typecodes['Complex']: + arr = np.array([1+3.j, 2+2.j, 3+1.j], dtype=endianness + dt) + c = arr.copy() + c.sort() + msg = 'byte-swapped complex sort, dtype={0}'.format(dt) + assert_equal(c, arr, msg) + + # test string sorts. + s = 'aaaaaaaa' + a = np.array([s + chr(i) for i in range(101)]) + b = a[::-1].copy() + for kind in ['q', 'm', 'h']: + msg = "string sort, kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + # test unicode sorts. + s = 'aaaaaaaa' + a = np.array([s + chr(i) for i in range(101)], dtype=np.unicode) + b = a[::-1].copy() + for kind in ['q', 'm', 'h']: + msg = "unicode sort, kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + # test object array sorts. + a = np.empty((101,), dtype=object) + a[:] = list(range(101)) + b = a[::-1] + for kind in ['q', 'h', 'm']: + msg = "object sort, kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + # test record array sorts. + dt = np.dtype([('f', float), ('i', int)]) + a = np.array([(i, i) for i in range(101)], dtype=dt) + b = a[::-1] + for kind in ['q', 'h', 'm']: + msg = "object sort, kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + # test datetime64 sorts. + a = np.arange(0, 101, dtype='datetime64[D]') + b = a[::-1] + for kind in ['q', 'h', 'm']: + msg = "datetime64 sort, kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + # test timedelta64 sorts. + a = np.arange(0, 101, dtype='timedelta64[D]') + b = a[::-1] + for kind in ['q', 'h', 'm']: + msg = "timedelta64 sort, kind=%s" % kind + c = a.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + c = b.copy() + c.sort(kind=kind) + assert_equal(c, a, msg) + + # check axis handling. This should be the same for all type + # specific sorts, so we only check it for one type and one kind + a = np.array([[3, 2], [1, 0]]) + b = np.array([[1, 0], [3, 2]]) + c = np.array([[2, 3], [0, 1]]) + d = a.copy() + d.sort(axis=0) + assert_equal(d, b, "test sort with axis=0") + d = a.copy() + d.sort(axis=1) + assert_equal(d, c, "test sort with axis=1") + d = a.copy() + d.sort() + assert_equal(d, c, "test sort with default axis") + + # check axis handling for multidimensional empty arrays + a = np.array([]) + a.shape = (3, 2, 1, 0) + for axis in range(-a.ndim, a.ndim): + msg = 'test empty array sort with axis={0}'.format(axis) + assert_equal(np.sort(a, axis=axis), a, msg) + msg = 'test empty array sort with axis=None' + assert_equal(np.sort(a, axis=None), a.ravel(), msg) + + # test generic class with bogus ordering, + # should not segfault. + class Boom(object): + def __lt__(self, other): + return True + + a = np.array([Boom()]*100, dtype=object) + for kind in ['q', 'm', 'h']: + msg = "bogus comparison object sort, kind=%s" % kind + c.sort(kind=kind) + + def test_void_sort(self): + # gh-8210 - previously segfaulted + for i in range(4): + rand = np.random.randint(256, size=4000, dtype=np.uint8) + arr = rand.view('V4') + arr[::-1].sort() + + dt = np.dtype([('val', 'i4', (1,))]) + for i in range(4): + rand = np.random.randint(256, size=4000, dtype=np.uint8) + arr = rand.view(dt) + arr[::-1].sort() + + def test_sort_raises(self): + #gh-9404 + arr = np.array([0, datetime.now(), 1], dtype=object) + for kind in ['q', 'm', 'h']: + assert_raises(TypeError, arr.sort, kind=kind) + #gh-3879 + class Raiser(object): + def raises_anything(*args, **kwargs): + raise TypeError("SOMETHING ERRORED") + __eq__ = __ne__ = __lt__ = __gt__ = __ge__ = __le__ = raises_anything + arr = np.array([[Raiser(), n] for n in range(10)]).reshape(-1) + np.random.shuffle(arr) + for kind in ['q', 'm', 'h']: + assert_raises(TypeError, arr.sort, kind=kind) + + def test_sort_degraded(self): + # test degraded dataset would take minutes to run with normal qsort + d = np.arange(1000000) + do = d.copy() + x = d + # create a median of 3 killer where each median is the sorted second + # last element of the quicksort partition + while x.size > 3: + mid = x.size // 2 + x[mid], x[-2] = x[-2], x[mid] + x = x[:-2] + + assert_equal(np.sort(d), do) + assert_equal(d[np.argsort(d)], do) + + def test_copy(self): + def assert_fortran(arr): + assert_(arr.flags.fortran) + assert_(arr.flags.f_contiguous) + assert_(not arr.flags.c_contiguous) + + def assert_c(arr): + assert_(not arr.flags.fortran) + assert_(not arr.flags.f_contiguous) + assert_(arr.flags.c_contiguous) + + a = np.empty((2, 2), order='F') + # Test copying a Fortran array + assert_c(a.copy()) + assert_c(a.copy('C')) + assert_fortran(a.copy('F')) + assert_fortran(a.copy('A')) + + # Now test starting with a C array. + a = np.empty((2, 2), order='C') + assert_c(a.copy()) + assert_c(a.copy('C')) + assert_fortran(a.copy('F')) + assert_c(a.copy('A')) + + def test_sort_order(self): + # Test sorting an array with fields + x1 = np.array([21, 32, 14]) + x2 = np.array(['my', 'first', 'name']) + x3 = np.array([3.1, 4.5, 6.2]) + r = np.rec.fromarrays([x1, x2, x3], names='id,word,number') + + r.sort(order=['id']) + assert_equal(r.id, np.array([14, 21, 32])) + assert_equal(r.word, np.array(['name', 'my', 'first'])) + assert_equal(r.number, np.array([6.2, 3.1, 4.5])) + + r.sort(order=['word']) + assert_equal(r.id, np.array([32, 21, 14])) + assert_equal(r.word, np.array(['first', 'my', 'name'])) + assert_equal(r.number, np.array([4.5, 3.1, 6.2])) + + r.sort(order=['number']) + assert_equal(r.id, np.array([21, 32, 14])) + assert_equal(r.word, np.array(['my', 'first', 'name'])) + assert_equal(r.number, np.array([3.1, 4.5, 6.2])) + + assert_raises_regex(ValueError, 'duplicate', + lambda: r.sort(order=['id', 'id'])) + + if sys.byteorder == 'little': + strtype = '>i2' + else: + strtype = '<i2' + mydtype = [('name', strchar + '5'), ('col2', strtype)] + r = np.array([('a', 1), ('b', 255), ('c', 3), ('d', 258)], + dtype=mydtype) + r.sort(order='col2') + assert_equal(r['col2'], [1, 3, 255, 258]) + assert_equal(r, np.array([('a', 1), ('c', 3), ('b', 255), ('d', 258)], + dtype=mydtype)) + + def test_argsort(self): + # all c scalar argsorts use the same code with different types + # so it suffices to run a quick check with one type. The number + # of sorted items must be greater than ~50 to check the actual + # algorithm because quick and merge sort fall over to insertion + # sort for small arrays. + a = np.arange(101) + b = a[::-1].copy() + for kind in ['q', 'm', 'h']: + msg = "scalar argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), a, msg) + assert_equal(b.copy().argsort(kind=kind), b, msg) + + # test complex argsorts. These use the same code as the scalars + # but the compare function differs. + ai = a*1j + 1 + bi = b*1j + 1 + for kind in ['q', 'm', 'h']: + msg = "complex argsort, kind=%s" % kind + assert_equal(ai.copy().argsort(kind=kind), a, msg) + assert_equal(bi.copy().argsort(kind=kind), b, msg) + ai = a + 1j + bi = b + 1j + for kind in ['q', 'm', 'h']: + msg = "complex argsort, kind=%s" % kind + assert_equal(ai.copy().argsort(kind=kind), a, msg) + assert_equal(bi.copy().argsort(kind=kind), b, msg) + + # test argsort of complex arrays requiring byte-swapping, gh-5441 + for endianness in '<>': + for dt in np.typecodes['Complex']: + arr = np.array([1+3.j, 2+2.j, 3+1.j], dtype=endianness + dt) + msg = 'byte-swapped complex argsort, dtype={0}'.format(dt) + assert_equal(arr.argsort(), + np.arange(len(arr), dtype=np.intp), msg) + + # test string argsorts. + s = 'aaaaaaaa' + a = np.array([s + chr(i) for i in range(101)]) + b = a[::-1].copy() + r = np.arange(101) + rr = r[::-1] + for kind in ['q', 'm', 'h']: + msg = "string argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # test unicode argsorts. + s = 'aaaaaaaa' + a = np.array([s + chr(i) for i in range(101)], dtype=np.unicode) + b = a[::-1] + r = np.arange(101) + rr = r[::-1] + for kind in ['q', 'm', 'h']: + msg = "unicode argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # test object array argsorts. + a = np.empty((101,), dtype=object) + a[:] = list(range(101)) + b = a[::-1] + r = np.arange(101) + rr = r[::-1] + for kind in ['q', 'm', 'h']: + msg = "object argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # test structured array argsorts. + dt = np.dtype([('f', float), ('i', int)]) + a = np.array([(i, i) for i in range(101)], dtype=dt) + b = a[::-1] + r = np.arange(101) + rr = r[::-1] + for kind in ['q', 'm', 'h']: + msg = "structured array argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # test datetime64 argsorts. + a = np.arange(0, 101, dtype='datetime64[D]') + b = a[::-1] + r = np.arange(101) + rr = r[::-1] + for kind in ['q', 'h', 'm']: + msg = "datetime64 argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # test timedelta64 argsorts. + a = np.arange(0, 101, dtype='timedelta64[D]') + b = a[::-1] + r = np.arange(101) + rr = r[::-1] + for kind in ['q', 'h', 'm']: + msg = "timedelta64 argsort, kind=%s" % kind + assert_equal(a.copy().argsort(kind=kind), r, msg) + assert_equal(b.copy().argsort(kind=kind), rr, msg) + + # check axis handling. This should be the same for all type + # specific argsorts, so we only check it for one type and one kind + a = np.array([[3, 2], [1, 0]]) + b = np.array([[1, 1], [0, 0]]) + c = np.array([[1, 0], [1, 0]]) + assert_equal(a.copy().argsort(axis=0), b) + assert_equal(a.copy().argsort(axis=1), c) + assert_equal(a.copy().argsort(), c) + + # check axis handling for multidimensional empty arrays + a = np.array([]) + a.shape = (3, 2, 1, 0) + for axis in range(-a.ndim, a.ndim): + msg = 'test empty array argsort with axis={0}'.format(axis) + assert_equal(np.argsort(a, axis=axis), + np.zeros_like(a, dtype=np.intp), msg) + msg = 'test empty array argsort with axis=None' + assert_equal(np.argsort(a, axis=None), + np.zeros_like(a.ravel(), dtype=np.intp), msg) + + # check that stable argsorts are stable + r = np.arange(100) + # scalars + a = np.zeros(100) + assert_equal(a.argsort(kind='m'), r) + # complex + a = np.zeros(100, dtype=complex) + assert_equal(a.argsort(kind='m'), r) + # string + a = np.array(['aaaaaaaaa' for i in range(100)]) + assert_equal(a.argsort(kind='m'), r) + # unicode + a = np.array(['aaaaaaaaa' for i in range(100)], dtype=np.unicode) + assert_equal(a.argsort(kind='m'), r) + + def test_sort_unicode_kind(self): + d = np.arange(10) + k = b'\xc3\xa4'.decode("UTF8") + assert_raises(ValueError, d.sort, kind=k) + assert_raises(ValueError, d.argsort, kind=k) + + def test_searchsorted(self): + # test for floats and complex containing nans. The logic is the + # same for all float types so only test double types for now. + # The search sorted routines use the compare functions for the + # array type, so this checks if that is consistent with the sort + # order. + + # check double + a = np.array([0, 1, np.nan]) + msg = "Test real searchsorted with nans, side='l'" + b = a.searchsorted(a, side='l') + assert_equal(b, np.arange(3), msg) + msg = "Test real searchsorted with nans, side='r'" + b = a.searchsorted(a, side='r') + assert_equal(b, np.arange(1, 4), msg) + # check double complex + a = np.zeros(9, dtype=np.complex128) + a.real += [0, 0, 1, 1, 0, 1, np.nan, np.nan, np.nan] + a.imag += [0, 1, 0, 1, np.nan, np.nan, 0, 1, np.nan] + msg = "Test complex searchsorted with nans, side='l'" + b = a.searchsorted(a, side='l') + assert_equal(b, np.arange(9), msg) + msg = "Test complex searchsorted with nans, side='r'" + b = a.searchsorted(a, side='r') + assert_equal(b, np.arange(1, 10), msg) + msg = "Test searchsorted with little endian, side='l'" + a = np.array([0, 128], dtype='<i4') + b = a.searchsorted(np.array(128, dtype='<i4')) + assert_equal(b, 1, msg) + msg = "Test searchsorted with big endian, side='l'" + a = np.array([0, 128], dtype='>i4') + b = a.searchsorted(np.array(128, dtype='>i4')) + assert_equal(b, 1, msg) + + # Check 0 elements + a = np.ones(0) + b = a.searchsorted([0, 1, 2], 'l') + assert_equal(b, [0, 0, 0]) + b = a.searchsorted([0, 1, 2], 'r') + assert_equal(b, [0, 0, 0]) + a = np.ones(1) + # Check 1 element + b = a.searchsorted([0, 1, 2], 'l') + assert_equal(b, [0, 0, 1]) + b = a.searchsorted([0, 1, 2], 'r') + assert_equal(b, [0, 1, 1]) + # Check all elements equal + a = np.ones(2) + b = a.searchsorted([0, 1, 2], 'l') + assert_equal(b, [0, 0, 2]) + b = a.searchsorted([0, 1, 2], 'r') + assert_equal(b, [0, 2, 2]) + + # Test searching unaligned array + a = np.arange(10) + aligned = np.empty(a.itemsize * a.size + 1, 'uint8') + unaligned = aligned[1:].view(a.dtype) + unaligned[:] = a + # Test searching unaligned array + b = unaligned.searchsorted(a, 'l') + assert_equal(b, a) + b = unaligned.searchsorted(a, 'r') + assert_equal(b, a + 1) + # Test searching for unaligned keys + b = a.searchsorted(unaligned, 'l') + assert_equal(b, a) + b = a.searchsorted(unaligned, 'r') + assert_equal(b, a + 1) + + # Test smart resetting of binsearch indices + a = np.arange(5) + b = a.searchsorted([6, 5, 4], 'l') + assert_equal(b, [5, 5, 4]) + b = a.searchsorted([6, 5, 4], 'r') + assert_equal(b, [5, 5, 5]) + + # Test all type specific binary search functions + types = ''.join((np.typecodes['AllInteger'], np.typecodes['AllFloat'], + np.typecodes['Datetime'], '?O')) + for dt in types: + if dt == 'M': + dt = 'M8[D]' + if dt == '?': + a = np.arange(2, dtype=dt) + out = np.arange(2) + else: + a = np.arange(0, 5, dtype=dt) + out = np.arange(5) + b = a.searchsorted(a, 'l') + assert_equal(b, out) + b = a.searchsorted(a, 'r') + assert_equal(b, out + 1) + # Test empty array, use a fresh array to get warnings in + # valgrind if access happens. + e = np.ndarray(shape=0, buffer=b'', dtype=dt) + b = e.searchsorted(a, 'l') + assert_array_equal(b, np.zeros(len(a), dtype=np.intp)) + b = a.searchsorted(e, 'l') + assert_array_equal(b, np.zeros(0, dtype=np.intp)) + + def test_searchsorted_unicode(self): + # Test searchsorted on unicode strings. + + # 1.6.1 contained a string length miscalculation in + # arraytypes.c.src:UNICODE_compare() which manifested as + # incorrect/inconsistent results from searchsorted. + a = np.array(['P:\\20x_dapi_cy3\\20x_dapi_cy3_20100185_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100186_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100187_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100189_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100190_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100191_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100192_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100193_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100194_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100195_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100196_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100197_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100198_1', + 'P:\\20x_dapi_cy3\\20x_dapi_cy3_20100199_1'], + dtype=np.unicode) + ind = np.arange(len(a)) + assert_equal([a.searchsorted(v, 'left') for v in a], ind) + assert_equal([a.searchsorted(v, 'right') for v in a], ind + 1) + assert_equal([a.searchsorted(a[i], 'left') for i in ind], ind) + assert_equal([a.searchsorted(a[i], 'right') for i in ind], ind + 1) + + def test_searchsorted_with_sorter(self): + a = np.array([5, 2, 1, 3, 4]) + s = np.argsort(a) + assert_raises(TypeError, np.searchsorted, a, 0, sorter=(1, (2, 3))) + assert_raises(TypeError, np.searchsorted, a, 0, sorter=[1.1]) + assert_raises(ValueError, np.searchsorted, a, 0, sorter=[1, 2, 3, 4]) + assert_raises(ValueError, np.searchsorted, a, 0, sorter=[1, 2, 3, 4, 5, 6]) + + # bounds check + assert_raises(ValueError, np.searchsorted, a, 4, sorter=[0, 1, 2, 3, 5]) + assert_raises(ValueError, np.searchsorted, a, 0, sorter=[-1, 0, 1, 2, 3]) + assert_raises(ValueError, np.searchsorted, a, 0, sorter=[4, 0, -1, 2, 3]) + + a = np.random.rand(300) + s = a.argsort() + b = np.sort(a) + k = np.linspace(0, 1, 20) + assert_equal(b.searchsorted(k), a.searchsorted(k, sorter=s)) + + a = np.array([0, 1, 2, 3, 5]*20) + s = a.argsort() + k = [0, 1, 2, 3, 5] + expected = [0, 20, 40, 60, 80] + assert_equal(a.searchsorted(k, side='l', sorter=s), expected) + expected = [20, 40, 60, 80, 100] + assert_equal(a.searchsorted(k, side='r', sorter=s), expected) + + # Test searching unaligned array + keys = np.arange(10) + a = keys.copy() + np.random.shuffle(s) + s = a.argsort() + aligned = np.empty(a.itemsize * a.size + 1, 'uint8') + unaligned = aligned[1:].view(a.dtype) + # Test searching unaligned array + unaligned[:] = a + b = unaligned.searchsorted(keys, 'l', s) + assert_equal(b, keys) + b = unaligned.searchsorted(keys, 'r', s) + assert_equal(b, keys + 1) + # Test searching for unaligned keys + unaligned[:] = keys + b = a.searchsorted(unaligned, 'l', s) + assert_equal(b, keys) + b = a.searchsorted(unaligned, 'r', s) + assert_equal(b, keys + 1) + + # Test all type specific indirect binary search functions + types = ''.join((np.typecodes['AllInteger'], np.typecodes['AllFloat'], + np.typecodes['Datetime'], '?O')) + for dt in types: + if dt == 'M': + dt = 'M8[D]' + if dt == '?': + a = np.array([1, 0], dtype=dt) + # We want the sorter array to be of a type that is different + # from np.intp in all platforms, to check for #4698 + s = np.array([1, 0], dtype=np.int16) + out = np.array([1, 0]) + else: + a = np.array([3, 4, 1, 2, 0], dtype=dt) + # We want the sorter array to be of a type that is different + # from np.intp in all platforms, to check for #4698 + s = np.array([4, 2, 3, 0, 1], dtype=np.int16) + out = np.array([3, 4, 1, 2, 0], dtype=np.intp) + b = a.searchsorted(a, 'l', s) + assert_equal(b, out) + b = a.searchsorted(a, 'r', s) + assert_equal(b, out + 1) + # Test empty array, use a fresh array to get warnings in + # valgrind if access happens. + e = np.ndarray(shape=0, buffer=b'', dtype=dt) + b = e.searchsorted(a, 'l', s[:0]) + assert_array_equal(b, np.zeros(len(a), dtype=np.intp)) + b = a.searchsorted(e, 'l', s) + assert_array_equal(b, np.zeros(0, dtype=np.intp)) + + # Test non-contiguous sorter array + a = np.array([3, 4, 1, 2, 0]) + srt = np.empty((10,), dtype=np.intp) + srt[1::2] = -1 + srt[::2] = [4, 2, 3, 0, 1] + s = srt[::2] + out = np.array([3, 4, 1, 2, 0], dtype=np.intp) + b = a.searchsorted(a, 'l', s) + assert_equal(b, out) + b = a.searchsorted(a, 'r', s) + assert_equal(b, out + 1) + + def test_searchsorted_return_type(self): + # Functions returning indices should always return base ndarrays + class A(np.ndarray): + pass + a = np.arange(5).view(A) + b = np.arange(1, 3).view(A) + s = np.arange(5).view(A) + assert_(not isinstance(a.searchsorted(b, 'l'), A)) + assert_(not isinstance(a.searchsorted(b, 'r'), A)) + assert_(not isinstance(a.searchsorted(b, 'l', s), A)) + assert_(not isinstance(a.searchsorted(b, 'r', s), A)) + + def test_argpartition_out_of_range(self): + # Test out of range values in kth raise an error, gh-5469 + d = np.arange(10) + assert_raises(ValueError, d.argpartition, 10) + assert_raises(ValueError, d.argpartition, -11) + # Test also for generic type argpartition, which uses sorting + # and used to not bound check kth + d_obj = np.arange(10, dtype=object) + assert_raises(ValueError, d_obj.argpartition, 10) + assert_raises(ValueError, d_obj.argpartition, -11) + + def test_partition_out_of_range(self): + # Test out of range values in kth raise an error, gh-5469 + d = np.arange(10) + assert_raises(ValueError, d.partition, 10) + assert_raises(ValueError, d.partition, -11) + # Test also for generic type partition, which uses sorting + # and used to not bound check kth + d_obj = np.arange(10, dtype=object) + assert_raises(ValueError, d_obj.partition, 10) + assert_raises(ValueError, d_obj.partition, -11) + + def test_argpartition_integer(self): + # Test non-integer values in kth raise an error/ + d = np.arange(10) + assert_raises(TypeError, d.argpartition, 9.) + # Test also for generic type argpartition, which uses sorting + # and used to not bound check kth + d_obj = np.arange(10, dtype=object) + assert_raises(TypeError, d_obj.argpartition, 9.) + + def test_partition_integer(self): + # Test out of range values in kth raise an error, gh-5469 + d = np.arange(10) + assert_raises(TypeError, d.partition, 9.) + # Test also for generic type partition, which uses sorting + # and used to not bound check kth + d_obj = np.arange(10, dtype=object) + assert_raises(TypeError, d_obj.partition, 9.) + + def test_partition_empty_array(self): + # check axis handling for multidimensional empty arrays + a = np.array([]) + a.shape = (3, 2, 1, 0) + for axis in range(-a.ndim, a.ndim): + msg = 'test empty array partition with axis={0}'.format(axis) + assert_equal(np.partition(a, 0, axis=axis), a, msg) + msg = 'test empty array partition with axis=None' + assert_equal(np.partition(a, 0, axis=None), a.ravel(), msg) + + def test_argpartition_empty_array(self): + # check axis handling for multidimensional empty arrays + a = np.array([]) + a.shape = (3, 2, 1, 0) + for axis in range(-a.ndim, a.ndim): + msg = 'test empty array argpartition with axis={0}'.format(axis) + assert_equal(np.partition(a, 0, axis=axis), + np.zeros_like(a, dtype=np.intp), msg) + msg = 'test empty array argpartition with axis=None' + assert_equal(np.partition(a, 0, axis=None), + np.zeros_like(a.ravel(), dtype=np.intp), msg) + + def test_partition(self): + d = np.arange(10) + assert_raises(TypeError, np.partition, d, 2, kind=1) + assert_raises(ValueError, np.partition, d, 2, kind="nonsense") + assert_raises(ValueError, np.argpartition, d, 2, kind="nonsense") + assert_raises(ValueError, d.partition, 2, axis=0, kind="nonsense") + assert_raises(ValueError, d.argpartition, 2, axis=0, kind="nonsense") + for k in ("introselect",): + d = np.array([]) + assert_array_equal(np.partition(d, 0, kind=k), d) + assert_array_equal(np.argpartition(d, 0, kind=k), d) + d = np.ones(1) + assert_array_equal(np.partition(d, 0, kind=k)[0], d) + assert_array_equal(d[np.argpartition(d, 0, kind=k)], + np.partition(d, 0, kind=k)) + + # kth not modified + kth = np.array([30, 15, 5]) + okth = kth.copy() + np.partition(np.arange(40), kth) + assert_array_equal(kth, okth) + + for r in ([2, 1], [1, 2], [1, 1]): + d = np.array(r) + tgt = np.sort(d) + assert_array_equal(np.partition(d, 0, kind=k)[0], tgt[0]) + assert_array_equal(np.partition(d, 1, kind=k)[1], tgt[1]) + assert_array_equal(d[np.argpartition(d, 0, kind=k)], + np.partition(d, 0, kind=k)) + assert_array_equal(d[np.argpartition(d, 1, kind=k)], + np.partition(d, 1, kind=k)) + for i in range(d.size): + d[i:].partition(0, kind=k) + assert_array_equal(d, tgt) + + for r in ([3, 2, 1], [1, 2, 3], [2, 1, 3], [2, 3, 1], + [1, 1, 1], [1, 2, 2], [2, 2, 1], [1, 2, 1]): + d = np.array(r) + tgt = np.sort(d) + assert_array_equal(np.partition(d, 0, kind=k)[0], tgt[0]) + assert_array_equal(np.partition(d, 1, kind=k)[1], tgt[1]) + assert_array_equal(np.partition(d, 2, kind=k)[2], tgt[2]) + assert_array_equal(d[np.argpartition(d, 0, kind=k)], + np.partition(d, 0, kind=k)) + assert_array_equal(d[np.argpartition(d, 1, kind=k)], + np.partition(d, 1, kind=k)) + assert_array_equal(d[np.argpartition(d, 2, kind=k)], + np.partition(d, 2, kind=k)) + for i in range(d.size): + d[i:].partition(0, kind=k) + assert_array_equal(d, tgt) + + d = np.ones(50) + assert_array_equal(np.partition(d, 0, kind=k), d) + assert_array_equal(d[np.argpartition(d, 0, kind=k)], + np.partition(d, 0, kind=k)) + + # sorted + d = np.arange(49) + assert_equal(np.partition(d, 5, kind=k)[5], 5) + assert_equal(np.partition(d, 15, kind=k)[15], 15) + assert_array_equal(d[np.argpartition(d, 5, kind=k)], + np.partition(d, 5, kind=k)) + assert_array_equal(d[np.argpartition(d, 15, kind=k)], + np.partition(d, 15, kind=k)) + + # rsorted + d = np.arange(47)[::-1] + assert_equal(np.partition(d, 6, kind=k)[6], 6) + assert_equal(np.partition(d, 16, kind=k)[16], 16) + assert_array_equal(d[np.argpartition(d, 6, kind=k)], + np.partition(d, 6, kind=k)) + assert_array_equal(d[np.argpartition(d, 16, kind=k)], + np.partition(d, 16, kind=k)) + + assert_array_equal(np.partition(d, -6, kind=k), + np.partition(d, 41, kind=k)) + assert_array_equal(np.partition(d, -16, kind=k), + np.partition(d, 31, kind=k)) + assert_array_equal(d[np.argpartition(d, -6, kind=k)], + np.partition(d, 41, kind=k)) + + # median of 3 killer, O(n^2) on pure median 3 pivot quickselect + # exercises the median of median of 5 code used to keep O(n) + d = np.arange(1000000) + x = np.roll(d, d.size // 2) + mid = x.size // 2 + 1 + assert_equal(np.partition(x, mid)[mid], mid) + d = np.arange(1000001) + x = np.roll(d, d.size // 2 + 1) + mid = x.size // 2 + 1 + assert_equal(np.partition(x, mid)[mid], mid) + + # max + d = np.ones(10) + d[1] = 4 + assert_equal(np.partition(d, (2, -1))[-1], 4) + assert_equal(np.partition(d, (2, -1))[2], 1) + assert_equal(d[np.argpartition(d, (2, -1))][-1], 4) + assert_equal(d[np.argpartition(d, (2, -1))][2], 1) + d[1] = np.nan + assert_(np.isnan(d[np.argpartition(d, (2, -1))][-1])) + assert_(np.isnan(np.partition(d, (2, -1))[-1])) + + # equal elements + d = np.arange(47) % 7 + tgt = np.sort(np.arange(47) % 7) + np.random.shuffle(d) + for i in range(d.size): + assert_equal(np.partition(d, i, kind=k)[i], tgt[i]) + assert_array_equal(d[np.argpartition(d, 6, kind=k)], + np.partition(d, 6, kind=k)) + assert_array_equal(d[np.argpartition(d, 16, kind=k)], + np.partition(d, 16, kind=k)) + for i in range(d.size): + d[i:].partition(0, kind=k) + assert_array_equal(d, tgt) + + d = np.array([0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 9]) + kth = [0, 3, 19, 20] + assert_equal(np.partition(d, kth, kind=k)[kth], (0, 3, 7, 7)) + assert_equal(d[np.argpartition(d, kth, kind=k)][kth], (0, 3, 7, 7)) + + d = np.array([2, 1]) + d.partition(0, kind=k) + assert_raises(ValueError, d.partition, 2) + assert_raises(np.AxisError, d.partition, 3, axis=1) + assert_raises(ValueError, np.partition, d, 2) + assert_raises(np.AxisError, np.partition, d, 2, axis=1) + assert_raises(ValueError, d.argpartition, 2) + assert_raises(np.AxisError, d.argpartition, 3, axis=1) + assert_raises(ValueError, np.argpartition, d, 2) + assert_raises(np.AxisError, np.argpartition, d, 2, axis=1) + d = np.arange(10).reshape((2, 5)) + d.partition(1, axis=0, kind=k) + d.partition(4, axis=1, kind=k) + np.partition(d, 1, axis=0, kind=k) + np.partition(d, 4, axis=1, kind=k) + np.partition(d, 1, axis=None, kind=k) + np.partition(d, 9, axis=None, kind=k) + d.argpartition(1, axis=0, kind=k) + d.argpartition(4, axis=1, kind=k) + np.argpartition(d, 1, axis=0, kind=k) + np.argpartition(d, 4, axis=1, kind=k) + np.argpartition(d, 1, axis=None, kind=k) + np.argpartition(d, 9, axis=None, kind=k) + assert_raises(ValueError, d.partition, 2, axis=0) + assert_raises(ValueError, d.partition, 11, axis=1) + assert_raises(TypeError, d.partition, 2, axis=None) + assert_raises(ValueError, np.partition, d, 9, axis=1) + assert_raises(ValueError, np.partition, d, 11, axis=None) + assert_raises(ValueError, d.argpartition, 2, axis=0) + assert_raises(ValueError, d.argpartition, 11, axis=1) + assert_raises(ValueError, np.argpartition, d, 9, axis=1) + assert_raises(ValueError, np.argpartition, d, 11, axis=None) + + td = [(dt, s) for dt in [np.int32, np.float32, np.complex64] + for s in (9, 16)] + for dt, s in td: + aae = assert_array_equal + at = assert_ + + d = np.arange(s, dtype=dt) + np.random.shuffle(d) + d1 = np.tile(np.arange(s, dtype=dt), (4, 1)) + map(np.random.shuffle, d1) + d0 = np.transpose(d1) + for i in range(d.size): + p = np.partition(d, i, kind=k) + assert_equal(p[i], i) + # all before are smaller + assert_array_less(p[:i], p[i]) + # all after are larger + assert_array_less(p[i], p[i + 1:]) + aae(p, d[np.argpartition(d, i, kind=k)]) + + p = np.partition(d1, i, axis=1, kind=k) + aae(p[:, i], np.array([i] * d1.shape[0], dtype=dt)) + # array_less does not seem to work right + at((p[:, :i].T <= p[:, i]).all(), + msg="%d: %r <= %r" % (i, p[:, i], p[:, :i].T)) + at((p[:, i + 1:].T > p[:, i]).all(), + msg="%d: %r < %r" % (i, p[:, i], p[:, i + 1:].T)) + aae(p, d1[np.arange(d1.shape[0])[:, None], + np.argpartition(d1, i, axis=1, kind=k)]) + + p = np.partition(d0, i, axis=0, kind=k) + aae(p[i, :], np.array([i] * d1.shape[0], dtype=dt)) + # array_less does not seem to work right + at((p[:i, :] <= p[i, :]).all(), + msg="%d: %r <= %r" % (i, p[i, :], p[:i, :])) + at((p[i + 1:, :] > p[i, :]).all(), + msg="%d: %r < %r" % (i, p[i, :], p[:, i + 1:])) + aae(p, d0[np.argpartition(d0, i, axis=0, kind=k), + np.arange(d0.shape[1])[None, :]]) + + # check inplace + dc = d.copy() + dc.partition(i, kind=k) + assert_equal(dc, np.partition(d, i, kind=k)) + dc = d0.copy() + dc.partition(i, axis=0, kind=k) + assert_equal(dc, np.partition(d0, i, axis=0, kind=k)) + dc = d1.copy() + dc.partition(i, axis=1, kind=k) + assert_equal(dc, np.partition(d1, i, axis=1, kind=k)) + + def assert_partitioned(self, d, kth): + prev = 0 + for k in np.sort(kth): + assert_array_less(d[prev:k], d[k], err_msg='kth %d' % k) + assert_((d[k:] >= d[k]).all(), + msg="kth %d, %r not greater equal %d" % (k, d[k:], d[k])) + prev = k + 1 + + def test_partition_iterative(self): + d = np.arange(17) + kth = (0, 1, 2, 429, 231) + assert_raises(ValueError, d.partition, kth) + assert_raises(ValueError, d.argpartition, kth) + d = np.arange(10).reshape((2, 5)) + assert_raises(ValueError, d.partition, kth, axis=0) + assert_raises(ValueError, d.partition, kth, axis=1) + assert_raises(ValueError, np.partition, d, kth, axis=1) + assert_raises(ValueError, np.partition, d, kth, axis=None) + + d = np.array([3, 4, 2, 1]) + p = np.partition(d, (0, 3)) + self.assert_partitioned(p, (0, 3)) + self.assert_partitioned(d[np.argpartition(d, (0, 3))], (0, 3)) + + assert_array_equal(p, np.partition(d, (-3, -1))) + assert_array_equal(p, d[np.argpartition(d, (-3, -1))]) + + d = np.arange(17) + np.random.shuffle(d) + d.partition(range(d.size)) + assert_array_equal(np.arange(17), d) + np.random.shuffle(d) + assert_array_equal(np.arange(17), d[d.argpartition(range(d.size))]) + + # test unsorted kth + d = np.arange(17) + np.random.shuffle(d) + keys = np.array([1, 3, 8, -2]) + np.random.shuffle(d) + p = np.partition(d, keys) + self.assert_partitioned(p, keys) + p = d[np.argpartition(d, keys)] + self.assert_partitioned(p, keys) + np.random.shuffle(keys) + assert_array_equal(np.partition(d, keys), p) + assert_array_equal(d[np.argpartition(d, keys)], p) + + # equal kth + d = np.arange(20)[::-1] + self.assert_partitioned(np.partition(d, [5]*4), [5]) + self.assert_partitioned(np.partition(d, [5]*4 + [6, 13]), + [5]*4 + [6, 13]) + self.assert_partitioned(d[np.argpartition(d, [5]*4)], [5]) + self.assert_partitioned(d[np.argpartition(d, [5]*4 + [6, 13])], + [5]*4 + [6, 13]) + + d = np.arange(12) + np.random.shuffle(d) + d1 = np.tile(np.arange(12), (4, 1)) + map(np.random.shuffle, d1) + d0 = np.transpose(d1) + + kth = (1, 6, 7, -1) + p = np.partition(d1, kth, axis=1) + pa = d1[np.arange(d1.shape[0])[:, None], + d1.argpartition(kth, axis=1)] + assert_array_equal(p, pa) + for i in range(d1.shape[0]): + self.assert_partitioned(p[i,:], kth) + p = np.partition(d0, kth, axis=0) + pa = d0[np.argpartition(d0, kth, axis=0), + np.arange(d0.shape[1])[None,:]] + assert_array_equal(p, pa) + for i in range(d0.shape[1]): + self.assert_partitioned(p[:, i], kth) + + def test_partition_cdtype(self): + d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41), + ('Lancelot', 1.9, 38)], + dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')]) + + tgt = np.sort(d, order=['age', 'height']) + assert_array_equal(np.partition(d, range(d.size), + order=['age', 'height']), + tgt) + assert_array_equal(d[np.argpartition(d, range(d.size), + order=['age', 'height'])], + tgt) + for k in range(d.size): + assert_equal(np.partition(d, k, order=['age', 'height'])[k], + tgt[k]) + assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k], + tgt[k]) + + d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot']) + tgt = np.sort(d) + assert_array_equal(np.partition(d, range(d.size)), tgt) + for k in range(d.size): + assert_equal(np.partition(d, k)[k], tgt[k]) + assert_equal(d[np.argpartition(d, k)][k], tgt[k]) + + def test_partition_unicode_kind(self): + d = np.arange(10) + k = b'\xc3\xa4'.decode("UTF8") + assert_raises(ValueError, d.partition, 2, kind=k) + assert_raises(ValueError, d.argpartition, 2, kind=k) + + def test_partition_fuzz(self): + # a few rounds of random data testing + for j in range(10, 30): + for i in range(1, j - 2): + d = np.arange(j) + np.random.shuffle(d) + d = d % np.random.randint(2, 30) + idx = np.random.randint(d.size) + kth = [0, idx, i, i + 1] + tgt = np.sort(d)[kth] + assert_array_equal(np.partition(d, kth)[kth], tgt, + err_msg="data: %r\n kth: %r" % (d, kth)) + + def test_argpartition_gh5524(self): + # A test for functionality of argpartition on lists. + d = [6,7,3,2,9,0] + p = np.argpartition(d,1) + self.assert_partitioned(np.array(d)[p],[1]) + + def test_flatten(self): + x0 = np.array([[1, 2, 3], [4, 5, 6]], np.int32) + x1 = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], np.int32) + y0 = np.array([1, 2, 3, 4, 5, 6], np.int32) + y0f = np.array([1, 4, 2, 5, 3, 6], np.int32) + y1 = np.array([1, 2, 3, 4, 5, 6, 7, 8], np.int32) + y1f = np.array([1, 5, 3, 7, 2, 6, 4, 8], np.int32) + assert_equal(x0.flatten(), y0) + assert_equal(x0.flatten('F'), y0f) + assert_equal(x0.flatten('F'), x0.T.flatten()) + assert_equal(x1.flatten(), y1) + assert_equal(x1.flatten('F'), y1f) + assert_equal(x1.flatten('F'), x1.T.flatten()) + + @pytest.mark.parametrize('func', (np.dot, np.matmul)) + def test_arr_mult(self, func): + a = np.array([[1, 0], [0, 1]]) + b = np.array([[0, 1], [1, 0]]) + c = np.array([[9, 1], [1, -9]]) + d = np.arange(24).reshape(4, 6) + ddt = np.array( + [[ 55, 145, 235, 325], + [ 145, 451, 757, 1063], + [ 235, 757, 1279, 1801], + [ 325, 1063, 1801, 2539]] + ) + dtd = np.array( + [[504, 540, 576, 612, 648, 684], + [540, 580, 620, 660, 700, 740], + [576, 620, 664, 708, 752, 796], + [612, 660, 708, 756, 804, 852], + [648, 700, 752, 804, 856, 908], + [684, 740, 796, 852, 908, 964]] + ) + + + # gemm vs syrk optimizations + for et in [np.float32, np.float64, np.complex64, np.complex128]: + eaf = a.astype(et) + assert_equal(func(eaf, eaf), eaf) + assert_equal(func(eaf.T, eaf), eaf) + assert_equal(func(eaf, eaf.T), eaf) + assert_equal(func(eaf.T, eaf.T), eaf) + assert_equal(func(eaf.T.copy(), eaf), eaf) + assert_equal(func(eaf, eaf.T.copy()), eaf) + assert_equal(func(eaf.T.copy(), eaf.T.copy()), eaf) + + # syrk validations + for et in [np.float32, np.float64, np.complex64, np.complex128]: + eaf = a.astype(et) + ebf = b.astype(et) + assert_equal(func(ebf, ebf), eaf) + assert_equal(func(ebf.T, ebf), eaf) + assert_equal(func(ebf, ebf.T), eaf) + assert_equal(func(ebf.T, ebf.T), eaf) + + # syrk - different shape, stride, and view validations + for et in [np.float32, np.float64, np.complex64, np.complex128]: + edf = d.astype(et) + assert_equal( + func(edf[::-1, :], edf.T), + func(edf[::-1, :].copy(), edf.T.copy()) + ) + assert_equal( + func(edf[:, ::-1], edf.T), + func(edf[:, ::-1].copy(), edf.T.copy()) + ) + assert_equal( + func(edf, edf[::-1, :].T), + func(edf, edf[::-1, :].T.copy()) + ) + assert_equal( + func(edf, edf[:, ::-1].T), + func(edf, edf[:, ::-1].T.copy()) + ) + assert_equal( + func(edf[:edf.shape[0] // 2, :], edf[::2, :].T), + func(edf[:edf.shape[0] // 2, :].copy(), edf[::2, :].T.copy()) + ) + assert_equal( + func(edf[::2, :], edf[:edf.shape[0] // 2, :].T), + func(edf[::2, :].copy(), edf[:edf.shape[0] // 2, :].T.copy()) + ) + + # syrk - different shape + for et in [np.float32, np.float64, np.complex64, np.complex128]: + edf = d.astype(et) + eddtf = ddt.astype(et) + edtdf = dtd.astype(et) + assert_equal(func(edf, edf.T), eddtf) + assert_equal(func(edf.T, edf), edtdf) + + @pytest.mark.parametrize('func', (np.dot, np.matmul)) + @pytest.mark.parametrize('dtype', 'ifdFD') + def test_no_dgemv(self, func, dtype): + # check vector arg for contiguous before gemv + # gh-12156 + a = np.arange(8.0, dtype=dtype).reshape(2, 4) + b = np.broadcast_to(1., (4, 1)) + ret1 = func(a, b) + ret2 = func(a, b.copy()) + assert_equal(ret1, ret2) + + ret1 = func(b.T, a.T) + ret2 = func(b.T.copy(), a.T) + assert_equal(ret1, ret2) + + # check for unaligned data + dt = np.dtype(dtype) + a = np.zeros(8 * dt.itemsize // 2 + 1, dtype='int16')[1:].view(dtype) + a = a.reshape(2, 4) + b = a[0] + # make sure it is not aligned + assert_(a.__array_interface__['data'][0] % dt.itemsize != 0) + ret1 = func(a, b) + ret2 = func(a.copy(), b.copy()) + assert_equal(ret1, ret2) + + ret1 = func(b.T, a.T) + ret2 = func(b.T.copy(), a.T.copy()) + assert_equal(ret1, ret2) + + def test_dot(self): + a = np.array([[1, 0], [0, 1]]) + b = np.array([[0, 1], [1, 0]]) + c = np.array([[9, 1], [1, -9]]) + # function versus methods + assert_equal(np.dot(a, b), a.dot(b)) + assert_equal(np.dot(np.dot(a, b), c), a.dot(b).dot(c)) + + # test passing in an output array + c = np.zeros_like(a) + a.dot(b, c) + assert_equal(c, np.dot(a, b)) + + # test keyword args + c = np.zeros_like(a) + a.dot(b=b, out=c) + assert_equal(c, np.dot(a, b)) + + def test_dot_type_mismatch(self): + c = 1. + A = np.array((1,1), dtype='i,i') + + assert_raises(TypeError, np.dot, c, A) + assert_raises(TypeError, np.dot, A, c) + + def test_dot_out_mem_overlap(self): + np.random.seed(1) + + # Test BLAS and non-BLAS code paths, including all dtypes + # that dot() supports + dtypes = [np.dtype(code) for code in np.typecodes['All'] + if code not in 'USVM'] + for dtype in dtypes: + a = np.random.rand(3, 3).astype(dtype) + + # Valid dot() output arrays must be aligned + b = _aligned_zeros((3, 3), dtype=dtype) + b[...] = np.random.rand(3, 3) + + y = np.dot(a, b) + x = np.dot(a, b, out=b) + assert_equal(x, y, err_msg=repr(dtype)) + + # Check invalid output array + assert_raises(ValueError, np.dot, a, b, out=b[::2]) + assert_raises(ValueError, np.dot, a, b, out=b.T) + + def test_dot_matmul_out(self): + # gh-9641 + class Sub(np.ndarray): + pass + a = np.ones((2, 2)).view(Sub) + b = np.ones((2, 2)).view(Sub) + out = np.ones((2, 2)) + + # make sure out can be any ndarray (not only subclass of inputs) + np.dot(a, b, out=out) + np.matmul(a, b, out=out) + + def test_dot_matmul_inner_array_casting_fails(self): + + class A(object): + def __array__(self, *args, **kwargs): + raise NotImplementedError + + # Don't override the error from calling __array__() + assert_raises(NotImplementedError, np.dot, A(), A()) + assert_raises(NotImplementedError, np.matmul, A(), A()) + assert_raises(NotImplementedError, np.inner, A(), A()) + + def test_matmul_out(self): + # overlapping memory + a = np.arange(18).reshape(2, 3, 3) + b = np.matmul(a, a) + c = np.matmul(a, a, out=a) + assert_(c is a) + assert_equal(c, b) + a = np.arange(18).reshape(2, 3, 3) + c = np.matmul(a, a, out=a[::-1, ...]) + assert_(c.base is a.base) + assert_equal(c, b) + + def test_diagonal(self): + a = np.arange(12).reshape((3, 4)) + assert_equal(a.diagonal(), [0, 5, 10]) + assert_equal(a.diagonal(0), [0, 5, 10]) + assert_equal(a.diagonal(1), [1, 6, 11]) + assert_equal(a.diagonal(-1), [4, 9]) + assert_raises(np.AxisError, a.diagonal, axis1=0, axis2=5) + assert_raises(np.AxisError, a.diagonal, axis1=5, axis2=0) + assert_raises(np.AxisError, a.diagonal, axis1=5, axis2=5) + assert_raises(ValueError, a.diagonal, axis1=1, axis2=1) + + b = np.arange(8).reshape((2, 2, 2)) + assert_equal(b.diagonal(), [[0, 6], [1, 7]]) + assert_equal(b.diagonal(0), [[0, 6], [1, 7]]) + assert_equal(b.diagonal(1), [[2], [3]]) + assert_equal(b.diagonal(-1), [[4], [5]]) + assert_raises(ValueError, b.diagonal, axis1=0, axis2=0) + assert_equal(b.diagonal(0, 1, 2), [[0, 3], [4, 7]]) + assert_equal(b.diagonal(0, 0, 1), [[0, 6], [1, 7]]) + assert_equal(b.diagonal(offset=1, axis1=0, axis2=2), [[1], [3]]) + # Order of axis argument doesn't matter: + assert_equal(b.diagonal(0, 2, 1), [[0, 3], [4, 7]]) + + def test_diagonal_view_notwriteable(self): + # this test is only for 1.9, the diagonal view will be + # writeable in 1.10. + a = np.eye(3).diagonal() + assert_(not a.flags.writeable) + assert_(not a.flags.owndata) + + a = np.diagonal(np.eye(3)) + assert_(not a.flags.writeable) + assert_(not a.flags.owndata) + + a = np.diag(np.eye(3)) + assert_(not a.flags.writeable) + assert_(not a.flags.owndata) + + def test_diagonal_memleak(self): + # Regression test for a bug that crept in at one point + a = np.zeros((100, 100)) + if HAS_REFCOUNT: + assert_(sys.getrefcount(a) < 50) + for i in range(100): + a.diagonal() + if HAS_REFCOUNT: + assert_(sys.getrefcount(a) < 50) + + def test_size_zero_memleak(self): + # Regression test for issue 9615 + # Exercises a special-case code path for dot products of length + # zero in cblasfuncs (making it is specific to floating dtypes). + a = np.array([], dtype=np.float64) + x = np.array(2.0) + for _ in range(100): + np.dot(a, a, out=x) + if HAS_REFCOUNT: + assert_(sys.getrefcount(x) < 50) + + def test_trace(self): + a = np.arange(12).reshape((3, 4)) + assert_equal(a.trace(), 15) + assert_equal(a.trace(0), 15) + assert_equal(a.trace(1), 18) + assert_equal(a.trace(-1), 13) + + b = np.arange(8).reshape((2, 2, 2)) + assert_equal(b.trace(), [6, 8]) + assert_equal(b.trace(0), [6, 8]) + assert_equal(b.trace(1), [2, 3]) + assert_equal(b.trace(-1), [4, 5]) + assert_equal(b.trace(0, 0, 1), [6, 8]) + assert_equal(b.trace(0, 0, 2), [5, 9]) + assert_equal(b.trace(0, 1, 2), [3, 11]) + assert_equal(b.trace(offset=1, axis1=0, axis2=2), [1, 3]) + + def test_trace_subclass(self): + # The class would need to overwrite trace to ensure single-element + # output also has the right subclass. + class MyArray(np.ndarray): + pass + + b = np.arange(8).reshape((2, 2, 2)).view(MyArray) + t = b.trace() + assert_(isinstance(t, MyArray)) + + def test_put(self): + icodes = np.typecodes['AllInteger'] + fcodes = np.typecodes['AllFloat'] + for dt in icodes + fcodes + 'O': + tgt = np.array([0, 1, 0, 3, 0, 5], dtype=dt) + + # test 1-d + a = np.zeros(6, dtype=dt) + a.put([1, 3, 5], [1, 3, 5]) + assert_equal(a, tgt) + + # test 2-d + a = np.zeros((2, 3), dtype=dt) + a.put([1, 3, 5], [1, 3, 5]) + assert_equal(a, tgt.reshape(2, 3)) + + for dt in '?': + tgt = np.array([False, True, False, True, False, True], dtype=dt) + + # test 1-d + a = np.zeros(6, dtype=dt) + a.put([1, 3, 5], [True]*3) + assert_equal(a, tgt) + + # test 2-d + a = np.zeros((2, 3), dtype=dt) + a.put([1, 3, 5], [True]*3) + assert_equal(a, tgt.reshape(2, 3)) + + # check must be writeable + a = np.zeros(6) + a.flags.writeable = False + assert_raises(ValueError, a.put, [1, 3, 5], [1, 3, 5]) + + # when calling np.put, make sure a + # TypeError is raised if the object + # isn't an ndarray + bad_array = [1, 2, 3] + assert_raises(TypeError, np.put, bad_array, [0, 2], 5) + + def test_ravel(self): + a = np.array([[0, 1], [2, 3]]) + assert_equal(a.ravel(), [0, 1, 2, 3]) + assert_(not a.ravel().flags.owndata) + assert_equal(a.ravel('F'), [0, 2, 1, 3]) + assert_equal(a.ravel(order='C'), [0, 1, 2, 3]) + assert_equal(a.ravel(order='F'), [0, 2, 1, 3]) + assert_equal(a.ravel(order='A'), [0, 1, 2, 3]) + assert_(not a.ravel(order='A').flags.owndata) + assert_equal(a.ravel(order='K'), [0, 1, 2, 3]) + assert_(not a.ravel(order='K').flags.owndata) + assert_equal(a.ravel(), a.reshape(-1)) + + a = np.array([[0, 1], [2, 3]], order='F') + assert_equal(a.ravel(), [0, 1, 2, 3]) + assert_equal(a.ravel(order='A'), [0, 2, 1, 3]) + assert_equal(a.ravel(order='K'), [0, 2, 1, 3]) + assert_(not a.ravel(order='A').flags.owndata) + assert_(not a.ravel(order='K').flags.owndata) + assert_equal(a.ravel(), a.reshape(-1)) + assert_equal(a.ravel(order='A'), a.reshape(-1, order='A')) + + a = np.array([[0, 1], [2, 3]])[::-1, :] + assert_equal(a.ravel(), [2, 3, 0, 1]) + assert_equal(a.ravel(order='C'), [2, 3, 0, 1]) + assert_equal(a.ravel(order='F'), [2, 0, 3, 1]) + assert_equal(a.ravel(order='A'), [2, 3, 0, 1]) + # 'K' doesn't reverse the axes of negative strides + assert_equal(a.ravel(order='K'), [2, 3, 0, 1]) + assert_(a.ravel(order='K').flags.owndata) + + # Test simple 1-d copy behaviour: + a = np.arange(10)[::2] + assert_(a.ravel('K').flags.owndata) + assert_(a.ravel('C').flags.owndata) + assert_(a.ravel('F').flags.owndata) + + # Not contiguous and 1-sized axis with non matching stride + a = np.arange(2**3 * 2)[::2] + a = a.reshape(2, 1, 2, 2).swapaxes(-1, -2) + strides = list(a.strides) + strides[1] = 123 + a.strides = strides + assert_(a.ravel(order='K').flags.owndata) + assert_equal(a.ravel('K'), np.arange(0, 15, 2)) + + # contiguous and 1-sized axis with non matching stride works: + a = np.arange(2**3) + a = a.reshape(2, 1, 2, 2).swapaxes(-1, -2) + strides = list(a.strides) + strides[1] = 123 + a.strides = strides + assert_(np.may_share_memory(a.ravel(order='K'), a)) + assert_equal(a.ravel(order='K'), np.arange(2**3)) + + # Test negative strides (not very interesting since non-contiguous): + a = np.arange(4)[::-1].reshape(2, 2) + assert_(a.ravel(order='C').flags.owndata) + assert_(a.ravel(order='K').flags.owndata) + assert_equal(a.ravel('C'), [3, 2, 1, 0]) + assert_equal(a.ravel('K'), [3, 2, 1, 0]) + + # 1-element tidy strides test (NPY_RELAXED_STRIDES_CHECKING): + a = np.array([[1]]) + a.strides = (123, 432) + # If the stride is not 8, NPY_RELAXED_STRIDES_CHECKING is messing + # them up on purpose: + if np.ones(1).strides == (8,): + assert_(np.may_share_memory(a.ravel('K'), a)) + assert_equal(a.ravel('K').strides, (a.dtype.itemsize,)) + + for order in ('C', 'F', 'A', 'K'): + # 0-d corner case: + a = np.array(0) + assert_equal(a.ravel(order), [0]) + assert_(np.may_share_memory(a.ravel(order), a)) + + # Test that certain non-inplace ravels work right (mostly) for 'K': + b = np.arange(2**4 * 2)[::2].reshape(2, 2, 2, 2) + a = b[..., ::2] + assert_equal(a.ravel('K'), [0, 4, 8, 12, 16, 20, 24, 28]) + assert_equal(a.ravel('C'), [0, 4, 8, 12, 16, 20, 24, 28]) + assert_equal(a.ravel('A'), [0, 4, 8, 12, 16, 20, 24, 28]) + assert_equal(a.ravel('F'), [0, 16, 8, 24, 4, 20, 12, 28]) + + a = b[::2, ...] + assert_equal(a.ravel('K'), [0, 2, 4, 6, 8, 10, 12, 14]) + assert_equal(a.ravel('C'), [0, 2, 4, 6, 8, 10, 12, 14]) + assert_equal(a.ravel('A'), [0, 2, 4, 6, 8, 10, 12, 14]) + assert_equal(a.ravel('F'), [0, 8, 4, 12, 2, 10, 6, 14]) + + def test_ravel_subclass(self): + class ArraySubclass(np.ndarray): + pass + + a = np.arange(10).view(ArraySubclass) + assert_(isinstance(a.ravel('C'), ArraySubclass)) + assert_(isinstance(a.ravel('F'), ArraySubclass)) + assert_(isinstance(a.ravel('A'), ArraySubclass)) + assert_(isinstance(a.ravel('K'), ArraySubclass)) + + a = np.arange(10)[::2].view(ArraySubclass) + assert_(isinstance(a.ravel('C'), ArraySubclass)) + assert_(isinstance(a.ravel('F'), ArraySubclass)) + assert_(isinstance(a.ravel('A'), ArraySubclass)) + assert_(isinstance(a.ravel('K'), ArraySubclass)) + + def test_swapaxes(self): + a = np.arange(1*2*3*4).reshape(1, 2, 3, 4).copy() + idx = np.indices(a.shape) + assert_(a.flags['OWNDATA']) + b = a.copy() + # check exceptions + assert_raises(np.AxisError, a.swapaxes, -5, 0) + assert_raises(np.AxisError, a.swapaxes, 4, 0) + assert_raises(np.AxisError, a.swapaxes, 0, -5) + assert_raises(np.AxisError, a.swapaxes, 0, 4) + + for i in range(-4, 4): + for j in range(-4, 4): + for k, src in enumerate((a, b)): + c = src.swapaxes(i, j) + # check shape + shape = list(src.shape) + shape[i] = src.shape[j] + shape[j] = src.shape[i] + assert_equal(c.shape, shape, str((i, j, k))) + # check array contents + i0, i1, i2, i3 = [dim-1 for dim in c.shape] + j0, j1, j2, j3 = [dim-1 for dim in src.shape] + assert_equal(src[idx[j0], idx[j1], idx[j2], idx[j3]], + c[idx[i0], idx[i1], idx[i2], idx[i3]], + str((i, j, k))) + # check a view is always returned, gh-5260 + assert_(not c.flags['OWNDATA'], str((i, j, k))) + # check on non-contiguous input array + if k == 1: + b = c + + def test_conjugate(self): + a = np.array([1-1j, 1+1j, 23+23.0j]) + ac = a.conj() + assert_equal(a.real, ac.real) + assert_equal(a.imag, -ac.imag) + assert_equal(ac, a.conjugate()) + assert_equal(ac, np.conjugate(a)) + + a = np.array([1-1j, 1+1j, 23+23.0j], 'F') + ac = a.conj() + assert_equal(a.real, ac.real) + assert_equal(a.imag, -ac.imag) + assert_equal(ac, a.conjugate()) + assert_equal(ac, np.conjugate(a)) + + a = np.array([1, 2, 3]) + ac = a.conj() + assert_equal(a, ac) + assert_equal(ac, a.conjugate()) + assert_equal(ac, np.conjugate(a)) + + a = np.array([1.0, 2.0, 3.0]) + ac = a.conj() + assert_equal(a, ac) + assert_equal(ac, a.conjugate()) + assert_equal(ac, np.conjugate(a)) + + a = np.array([1-1j, 1+1j, 1, 2.0], object) + ac = a.conj() + assert_equal(ac, [k.conjugate() for k in a]) + assert_equal(ac, a.conjugate()) + assert_equal(ac, np.conjugate(a)) + + a = np.array([1-1j, 1, 2.0, 'f'], object) + assert_raises(AttributeError, lambda: a.conj()) + assert_raises(AttributeError, lambda: a.conjugate()) + + def test__complex__(self): + dtypes = ['i1', 'i2', 'i4', 'i8', + 'u1', 'u2', 'u4', 'u8', + 'f', 'd', 'g', 'F', 'D', 'G', + '?', 'O'] + for dt in dtypes: + a = np.array(7, dtype=dt) + b = np.array([7], dtype=dt) + c = np.array([[[[[7]]]]], dtype=dt) + + msg = 'dtype: {0}'.format(dt) + ap = complex(a) + assert_equal(ap, a, msg) + bp = complex(b) + assert_equal(bp, b, msg) + cp = complex(c) + assert_equal(cp, c, msg) + + def test__complex__should_not_work(self): + dtypes = ['i1', 'i2', 'i4', 'i8', + 'u1', 'u2', 'u4', 'u8', + 'f', 'd', 'g', 'F', 'D', 'G', + '?', 'O'] + for dt in dtypes: + a = np.array([1, 2, 3], dtype=dt) + assert_raises(TypeError, complex, a) + + dt = np.dtype([('a', 'f8'), ('b', 'i1')]) + b = np.array((1.0, 3), dtype=dt) + assert_raises(TypeError, complex, b) + + c = np.array([(1.0, 3), (2e-3, 7)], dtype=dt) + assert_raises(TypeError, complex, c) + + d = np.array('1+1j') + assert_raises(TypeError, complex, d) + + e = np.array(['1+1j'], 'U') + assert_raises(TypeError, complex, e) + +class TestCequenceMethods(object): + def test_array_contains(self): + assert_(4.0 in np.arange(16.).reshape(4,4)) + assert_(20.0 not in np.arange(16.).reshape(4,4)) + +class TestBinop(object): + def test_inplace(self): + # test refcount 1 inplace conversion + assert_array_almost_equal(np.array([0.5]) * np.array([1.0, 2.0]), + [0.5, 1.0]) + + d = np.array([0.5, 0.5])[::2] + assert_array_almost_equal(d * (d * np.array([1.0, 2.0])), + [0.25, 0.5]) + + a = np.array([0.5]) + b = np.array([0.5]) + c = a + b + c = a - b + c = a * b + c = a / b + assert_equal(a, b) + assert_almost_equal(c, 1.) + + c = a + b * 2. / b * a - a / b + assert_equal(a, b) + assert_equal(c, 0.5) + + # true divide + a = np.array([5]) + b = np.array([3]) + c = (a * a) / b + + assert_almost_equal(c, 25 / 3) + assert_equal(a, 5) + assert_equal(b, 3) + + # ndarray.__rop__ always calls ufunc + # ndarray.__iop__ always calls ufunc + # ndarray.__op__, __rop__: + # - defer if other has __array_ufunc__ and it is None + # or other is not a subclass and has higher array priority + # - else, call ufunc + def test_ufunc_binop_interaction(self): + # Python method name (without underscores) + # -> (numpy ufunc, has_in_place_version, preferred_dtype) + ops = { + 'add': (np.add, True, float), + 'sub': (np.subtract, True, float), + 'mul': (np.multiply, True, float), + 'truediv': (np.true_divide, True, float), + 'floordiv': (np.floor_divide, True, float), + 'mod': (np.remainder, True, float), + 'divmod': (np.divmod, False, float), + 'pow': (np.power, True, int), + 'lshift': (np.left_shift, True, int), + 'rshift': (np.right_shift, True, int), + 'and': (np.bitwise_and, True, int), + 'xor': (np.bitwise_xor, True, int), + 'or': (np.bitwise_or, True, int), + # 'ge': (np.less_equal, False), + # 'gt': (np.less, False), + # 'le': (np.greater_equal, False), + # 'lt': (np.greater, False), + # 'eq': (np.equal, False), + # 'ne': (np.not_equal, False), + } + if sys.version_info >= (3, 5): + ops['matmul'] = (np.matmul, False, float) + + class Coerced(Exception): + pass + + def array_impl(self): + raise Coerced + + def op_impl(self, other): + return "forward" + + def rop_impl(self, other): + return "reverse" + + def iop_impl(self, other): + return "in-place" + + def array_ufunc_impl(self, ufunc, method, *args, **kwargs): + return ("__array_ufunc__", ufunc, method, args, kwargs) + + # Create an object with the given base, in the given module, with a + # bunch of placeholder __op__ methods, and optionally a + # __array_ufunc__ and __array_priority__. + def make_obj(base, array_priority=False, array_ufunc=False, + alleged_module="__main__"): + class_namespace = {"__array__": array_impl} + if array_priority is not False: + class_namespace["__array_priority__"] = array_priority + for op in ops: + class_namespace["__{0}__".format(op)] = op_impl + class_namespace["__r{0}__".format(op)] = rop_impl + class_namespace["__i{0}__".format(op)] = iop_impl + if array_ufunc is not False: + class_namespace["__array_ufunc__"] = array_ufunc + eval_namespace = {"base": base, + "class_namespace": class_namespace, + "__name__": alleged_module, + } + MyType = eval("type('MyType', (base,), class_namespace)", + eval_namespace) + if issubclass(MyType, np.ndarray): + # Use this range to avoid special case weirdnesses around + # divide-by-0, pow(x, 2), overflow due to pow(big, big), etc. + return np.arange(3, 7).reshape(2, 2).view(MyType) + else: + return MyType() + + def check(obj, binop_override_expected, ufunc_override_expected, + inplace_override_expected, check_scalar=True): + for op, (ufunc, has_inplace, dtype) in ops.items(): + err_msg = ('op: %s, ufunc: %s, has_inplace: %s, dtype: %s' + % (op, ufunc, has_inplace, dtype)) + check_objs = [np.arange(3, 7, dtype=dtype).reshape(2, 2)] + if check_scalar: + check_objs.append(check_objs[0][0]) + for arr in check_objs: + arr_method = getattr(arr, "__{0}__".format(op)) + + def first_out_arg(result): + if op == "divmod": + assert_(isinstance(result, tuple)) + return result[0] + else: + return result + + # arr __op__ obj + if binop_override_expected: + assert_equal(arr_method(obj), NotImplemented, err_msg) + elif ufunc_override_expected: + assert_equal(arr_method(obj)[0], "__array_ufunc__", + err_msg) + else: + if (isinstance(obj, np.ndarray) and + (type(obj).__array_ufunc__ is + np.ndarray.__array_ufunc__)): + # __array__ gets ignored + res = first_out_arg(arr_method(obj)) + assert_(res.__class__ is obj.__class__, err_msg) + else: + assert_raises((TypeError, Coerced), + arr_method, obj, err_msg=err_msg) + # obj __op__ arr + arr_rmethod = getattr(arr, "__r{0}__".format(op)) + if ufunc_override_expected: + res = arr_rmethod(obj) + assert_equal(res[0], "__array_ufunc__", + err_msg=err_msg) + assert_equal(res[1], ufunc, err_msg=err_msg) + else: + if (isinstance(obj, np.ndarray) and + (type(obj).__array_ufunc__ is + np.ndarray.__array_ufunc__)): + # __array__ gets ignored + res = first_out_arg(arr_rmethod(obj)) + assert_(res.__class__ is obj.__class__, err_msg) + else: + # __array_ufunc__ = "asdf" creates a TypeError + assert_raises((TypeError, Coerced), + arr_rmethod, obj, err_msg=err_msg) + + # arr __iop__ obj + # array scalars don't have in-place operators + if has_inplace and isinstance(arr, np.ndarray): + arr_imethod = getattr(arr, "__i{0}__".format(op)) + if inplace_override_expected: + assert_equal(arr_method(obj), NotImplemented, + err_msg=err_msg) + elif ufunc_override_expected: + res = arr_imethod(obj) + assert_equal(res[0], "__array_ufunc__", err_msg) + assert_equal(res[1], ufunc, err_msg) + assert_(type(res[-1]["out"]) is tuple, err_msg) + assert_(res[-1]["out"][0] is arr, err_msg) + else: + if (isinstance(obj, np.ndarray) and + (type(obj).__array_ufunc__ is + np.ndarray.__array_ufunc__)): + # __array__ gets ignored + assert_(arr_imethod(obj) is arr, err_msg) + else: + assert_raises((TypeError, Coerced), + arr_imethod, obj, + err_msg=err_msg) + + op_fn = getattr(operator, op, None) + if op_fn is None: + op_fn = getattr(operator, op + "_", None) + if op_fn is None: + op_fn = getattr(builtins, op) + assert_equal(op_fn(obj, arr), "forward", err_msg) + if not isinstance(obj, np.ndarray): + if binop_override_expected: + assert_equal(op_fn(arr, obj), "reverse", err_msg) + elif ufunc_override_expected: + assert_equal(op_fn(arr, obj)[0], "__array_ufunc__", + err_msg) + if ufunc_override_expected: + assert_equal(ufunc(obj, arr)[0], "__array_ufunc__", + err_msg) + + # No array priority, no array_ufunc -> nothing called + check(make_obj(object), False, False, False) + # Negative array priority, no array_ufunc -> nothing called + # (has to be very negative, because scalar priority is -1000000.0) + check(make_obj(object, array_priority=-2**30), False, False, False) + # Positive array priority, no array_ufunc -> binops and iops only + check(make_obj(object, array_priority=1), True, False, True) + # ndarray ignores array_priority for ndarray subclasses + check(make_obj(np.ndarray, array_priority=1), False, False, False, + check_scalar=False) + # Positive array_priority and array_ufunc -> array_ufunc only + check(make_obj(object, array_priority=1, + array_ufunc=array_ufunc_impl), False, True, False) + check(make_obj(np.ndarray, array_priority=1, + array_ufunc=array_ufunc_impl), False, True, False) + # array_ufunc set to None -> defer binops only + check(make_obj(object, array_ufunc=None), True, False, False) + check(make_obj(np.ndarray, array_ufunc=None), True, False, False, + check_scalar=False) + + def test_ufunc_override_normalize_signature(self): + # gh-5674 + class SomeClass(object): + def __array_ufunc__(self, ufunc, method, *inputs, **kw): + return kw + + a = SomeClass() + kw = np.add(a, [1]) + assert_('sig' not in kw and 'signature' not in kw) + kw = np.add(a, [1], sig='ii->i') + assert_('sig' not in kw and 'signature' in kw) + assert_equal(kw['signature'], 'ii->i') + kw = np.add(a, [1], signature='ii->i') + assert_('sig' not in kw and 'signature' in kw) + assert_equal(kw['signature'], 'ii->i') + + def test_array_ufunc_index(self): + # Check that index is set appropriately, also if only an output + # is passed on (latter is another regression tests for github bug 4753) + # This also checks implicitly that 'out' is always a tuple. + class CheckIndex(object): + def __array_ufunc__(self, ufunc, method, *inputs, **kw): + for i, a in enumerate(inputs): + if a is self: + return i + # calls below mean we must be in an output. + for j, a in enumerate(kw['out']): + if a is self: + return (j,) + + a = CheckIndex() + dummy = np.arange(2.) + # 1 input, 1 output + assert_equal(np.sin(a), 0) + assert_equal(np.sin(dummy, a), (0,)) + assert_equal(np.sin(dummy, out=a), (0,)) + assert_equal(np.sin(dummy, out=(a,)), (0,)) + assert_equal(np.sin(a, a), 0) + assert_equal(np.sin(a, out=a), 0) + assert_equal(np.sin(a, out=(a,)), 0) + # 1 input, 2 outputs + assert_equal(np.modf(dummy, a), (0,)) + assert_equal(np.modf(dummy, None, a), (1,)) + assert_equal(np.modf(dummy, dummy, a), (1,)) + assert_equal(np.modf(dummy, out=(a, None)), (0,)) + assert_equal(np.modf(dummy, out=(a, dummy)), (0,)) + assert_equal(np.modf(dummy, out=(None, a)), (1,)) + assert_equal(np.modf(dummy, out=(dummy, a)), (1,)) + assert_equal(np.modf(a, out=(dummy, a)), 0) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', DeprecationWarning) + assert_equal(np.modf(dummy, out=a), (0,)) + assert_(w[0].category is DeprecationWarning) + assert_raises(ValueError, np.modf, dummy, out=(a,)) + + # 2 inputs, 1 output + assert_equal(np.add(a, dummy), 0) + assert_equal(np.add(dummy, a), 1) + assert_equal(np.add(dummy, dummy, a), (0,)) + assert_equal(np.add(dummy, a, a), 1) + assert_equal(np.add(dummy, dummy, out=a), (0,)) + assert_equal(np.add(dummy, dummy, out=(a,)), (0,)) + assert_equal(np.add(a, dummy, out=a), 0) + + def test_out_override(self): + # regression test for github bug 4753 + class OutClass(np.ndarray): + def __array_ufunc__(self, ufunc, method, *inputs, **kw): + if 'out' in kw: + tmp_kw = kw.copy() + tmp_kw.pop('out') + func = getattr(ufunc, method) + kw['out'][0][...] = func(*inputs, **tmp_kw) + + A = np.array([0]).view(OutClass) + B = np.array([5]) + C = np.array([6]) + np.multiply(C, B, A) + assert_equal(A[0], 30) + assert_(isinstance(A, OutClass)) + A[0] = 0 + np.multiply(C, B, out=A) + assert_equal(A[0], 30) + assert_(isinstance(A, OutClass)) + + def test_pow_override_with_errors(self): + # regression test for gh-9112 + class PowerOnly(np.ndarray): + def __array_ufunc__(self, ufunc, method, *inputs, **kw): + if ufunc is not np.power: + raise NotImplementedError + return "POWER!" + # explicit cast to float, to ensure the fast power path is taken. + a = np.array(5., dtype=np.float64).view(PowerOnly) + assert_equal(a ** 2.5, "POWER!") + with assert_raises(NotImplementedError): + a ** 0.5 + with assert_raises(NotImplementedError): + a ** 0 + with assert_raises(NotImplementedError): + a ** 1 + with assert_raises(NotImplementedError): + a ** -1 + with assert_raises(NotImplementedError): + a ** 2 + + def test_pow_array_object_dtype(self): + # test pow on arrays of object dtype + class SomeClass(object): + def __init__(self, num=None): + self.num = num + + # want to ensure a fast pow path is not taken + def __mul__(self, other): + raise AssertionError('__mul__ should not be called') + + def __div__(self, other): + raise AssertionError('__div__ should not be called') + + def __pow__(self, exp): + return SomeClass(num=self.num ** exp) + + def __eq__(self, other): + if isinstance(other, SomeClass): + return self.num == other.num + + __rpow__ = __pow__ + + def pow_for(exp, arr): + return np.array([x ** exp for x in arr]) + + obj_arr = np.array([SomeClass(1), SomeClass(2), SomeClass(3)]) + + assert_equal(obj_arr ** 0.5, pow_for(0.5, obj_arr)) + assert_equal(obj_arr ** 0, pow_for(0, obj_arr)) + assert_equal(obj_arr ** 1, pow_for(1, obj_arr)) + assert_equal(obj_arr ** -1, pow_for(-1, obj_arr)) + assert_equal(obj_arr ** 2, pow_for(2, obj_arr)) + + def test_pos_array_ufunc_override(self): + class A(np.ndarray): + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return getattr(ufunc, method)(*[i.view(np.ndarray) for + i in inputs], **kwargs) + tst = np.array('foo').view(A) + with assert_raises(TypeError): + +tst + + +class TestTemporaryElide(object): + # elision is only triggered on relatively large arrays + + def test_extension_incref_elide(self): + # test extension (e.g. cython) calling PyNumber_* slots without + # increasing the reference counts + # + # def incref_elide(a): + # d = input.copy() # refcount 1 + # return d, d + d # PyNumber_Add without increasing refcount + from numpy.core._multiarray_tests import incref_elide + d = np.ones(100000) + orig, res = incref_elide(d) + d + d + # the return original should not be changed to an inplace operation + assert_array_equal(orig, d) + assert_array_equal(res, d + d) + + def test_extension_incref_elide_stack(self): + # scanning if the refcount == 1 object is on the python stack to check + # that we are called directly from python is flawed as object may still + # be above the stack pointer and we have no access to the top of it + # + # def incref_elide_l(d): + # return l[4] + l[4] # PyNumber_Add without increasing refcount + from numpy.core._multiarray_tests import incref_elide_l + # padding with 1 makes sure the object on the stack is not overwritten + l = [1, 1, 1, 1, np.ones(100000)] + res = incref_elide_l(l) + # the return original should not be changed to an inplace operation + assert_array_equal(l[4], np.ones(100000)) + assert_array_equal(res, l[4] + l[4]) + + def test_temporary_with_cast(self): + # check that we don't elide into a temporary which would need casting + d = np.ones(200000, dtype=np.int64) + assert_equal(((d + d) + 2**222).dtype, np.dtype('O')) + + r = ((d + d) / 2) + assert_equal(r.dtype, np.dtype('f8')) + + r = np.true_divide((d + d), 2) + assert_equal(r.dtype, np.dtype('f8')) + + r = ((d + d) / 2.) + assert_equal(r.dtype, np.dtype('f8')) + + r = ((d + d) // 2) + assert_equal(r.dtype, np.dtype(np.int64)) + + # commutative elision into the astype result + f = np.ones(100000, dtype=np.float32) + assert_equal(((f + f) + f.astype(np.float64)).dtype, np.dtype('f8')) + + # no elision into lower type + d = f.astype(np.float64) + assert_equal(((f + f) + d).dtype, d.dtype) + l = np.ones(100000, dtype=np.longdouble) + assert_equal(((d + d) + l).dtype, l.dtype) + + # test unary abs with different output dtype + for dt in (np.complex64, np.complex128, np.clongdouble): + c = np.ones(100000, dtype=dt) + r = abs(c * 2.0) + assert_equal(r.dtype, np.dtype('f%d' % (c.itemsize // 2))) + + def test_elide_broadcast(self): + # test no elision on broadcast to higher dimension + # only triggers elision code path in debug mode as triggering it in + # normal mode needs 256kb large matching dimension, so a lot of memory + d = np.ones((2000, 1), dtype=int) + b = np.ones((2000), dtype=bool) + r = (1 - d) + b + assert_equal(r, 1) + assert_equal(r.shape, (2000, 2000)) + + def test_elide_scalar(self): + # check inplace op does not create ndarray from scalars + a = np.bool_() + assert_(type(~(a & a)) is np.bool_) + + def test_elide_scalar_readonly(self): + # The imaginary part of a real array is readonly. This needs to go + # through fast_scalar_power which is only called for powers of + # +1, -1, 0, 0.5, and 2, so use 2. Also need valid refcount for + # elision which can be gotten for the imaginary part of a real + # array. Should not error. + a = np.empty(100000, dtype=np.float64) + a.imag ** 2 + + def test_elide_readonly(self): + # don't try to elide readonly temporaries + r = np.asarray(np.broadcast_to(np.zeros(1), 100000).flat) * 0.0 + assert_equal(r, 0) + + def test_elide_updateifcopy(self): + a = np.ones(2**20)[::2] + b = a.flat.__array__() + 1 + del b + assert_equal(a, 1) + + +class TestCAPI(object): + def test_IsPythonScalar(self): + from numpy.core._multiarray_tests import IsPythonScalar + assert_(IsPythonScalar(b'foobar')) + assert_(IsPythonScalar(1)) + assert_(IsPythonScalar(2**80)) + assert_(IsPythonScalar(2.)) + assert_(IsPythonScalar("a")) + + +class TestSubscripting(object): + def test_test_zero_rank(self): + x = np.array([1, 2, 3]) + assert_(isinstance(x[0], np.int_)) + if sys.version_info[0] < 3: + assert_(isinstance(x[0], int)) + assert_(type(x[0, ...]) is np.ndarray) + + +class TestPickling(object): + def test_highest_available_pickle_protocol(self): + try: + import pickle5 + except ImportError: + pickle5 = None + + if sys.version_info[:2] >= (3, 8) or pickle5 is not None: + assert pickle.HIGHEST_PROTOCOL >= 5 + else: + assert pickle.HIGHEST_PROTOCOL < 5 + + @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL >= 5, + reason=('this tests the error messages when trying to' + 'protocol 5 although it is not available')) + def test_correct_protocol5_error_message(self): + array = np.arange(10) + + if sys.version_info[:2] in ((3, 6), (3, 7)): + # For the specific case of python3.6 and 3.7, raise a clear import + # error about the pickle5 backport when trying to use protocol=5 + # without the pickle5 package + with pytest.raises(ImportError): + array.__reduce_ex__(5) + + elif sys.version_info[:2] < (3, 6): + # when calling __reduce_ex__ explicitly with protocol=5 on python + # raise a ValueError saying that protocol 5 is not available for + # this python version + with pytest.raises(ValueError): + array.__reduce_ex__(5) + + def test_record_array_with_object_dtype(self): + my_object = object() + + arr_with_object = np.array( + [(my_object, 1, 2.0)], + dtype=[('a', object), ('b', int), ('c', float)]) + arr_without_object = np.array( + [('xxx', 1, 2.0)], + dtype=[('a', str), ('b', int), ('c', float)]) + + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + depickled_arr_with_object = pickle.loads( + pickle.dumps(arr_with_object, protocol=proto)) + depickled_arr_without_object = pickle.loads( + pickle.dumps(arr_without_object, protocol=proto)) + + assert_equal(arr_with_object.dtype, + depickled_arr_with_object.dtype) + assert_equal(arr_without_object.dtype, + depickled_arr_without_object.dtype) + + @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5, + reason="requires pickle protocol 5") + def test_f_contiguous_array(self): + f_contiguous_array = np.array([[1, 2, 3], [4, 5, 6]], order='F') + buffers = [] + + # When using pickle protocol 5, Fortran-contiguous arrays can be + # serialized using out-of-band buffers + bytes_string = pickle.dumps(f_contiguous_array, protocol=5, + buffer_callback=buffers.append) + + assert len(buffers) > 0 + + depickled_f_contiguous_array = pickle.loads(bytes_string, + buffers=buffers) + + assert_equal(f_contiguous_array, depickled_f_contiguous_array) + + def test_non_contiguous_array(self): + non_contiguous_array = np.arange(12).reshape(3, 4)[:, :2] + assert not non_contiguous_array.flags.c_contiguous + assert not non_contiguous_array.flags.f_contiguous + + # make sure non-contiguous arrays can be pickled-depickled + # using any protocol + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + depickled_non_contiguous_array = pickle.loads( + pickle.dumps(non_contiguous_array, protocol=proto)) + + assert_equal(non_contiguous_array, depickled_non_contiguous_array) + + def test_roundtrip(self): + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + carray = np.array([[2, 9], [7, 0], [3, 8]]) + DATA = [ + carray, + np.transpose(carray), + np.array([('xxx', 1, 2.0)], dtype=[('a', (str, 3)), ('b', int), + ('c', float)]) + ] + + refs = [weakref.ref(a) for a in DATA] + for a in DATA: + assert_equal( + a, pickle.loads(pickle.dumps(a, protocol=proto)), + err_msg="%r" % a) + del a, DATA, carray + break_cycles() + # check for reference leaks (gh-12793) + for ref in refs: + assert ref() is None + + def _loads(self, obj): + if sys.version_info[0] >= 3: + return pickle.loads(obj, encoding='latin1') + else: + return pickle.loads(obj) + + # version 0 pickles, using protocol=2 to pickle + # version 0 doesn't have a version field + def test_version0_int8(self): + s = b'\x80\x02cnumpy.core._internal\n_reconstruct\nq\x01cnumpy\nndarray\nq\x02K\x00\x85U\x01b\x87Rq\x03(K\x04\x85cnumpy\ndtype\nq\x04U\x02i1K\x00K\x01\x87Rq\x05(U\x01|NNJ\xff\xff\xff\xffJ\xff\xff\xff\xfftb\x89U\x04\x01\x02\x03\x04tb.' + a = np.array([1, 2, 3, 4], dtype=np.int8) + p = self._loads(s) + assert_equal(a, p) + + def test_version0_float32(self): + s = b'\x80\x02cnumpy.core._internal\n_reconstruct\nq\x01cnumpy\nndarray\nq\x02K\x00\x85U\x01b\x87Rq\x03(K\x04\x85cnumpy\ndtype\nq\x04U\x02f4K\x00K\x01\x87Rq\x05(U\x01<NNJ\xff\xff\xff\xffJ\xff\xff\xff\xfftb\x89U\x10\x00\x00\x80?\x00\x00\x00@\x00\x00@@\x00\x00\x80@tb.' + a = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32) + p = self._loads(s) + assert_equal(a, p) + + def test_version0_object(self): + s = b'\x80\x02cnumpy.core._internal\n_reconstruct\nq\x01cnumpy\nndarray\nq\x02K\x00\x85U\x01b\x87Rq\x03(K\x02\x85cnumpy\ndtype\nq\x04U\x02O8K\x00K\x01\x87Rq\x05(U\x01|NNJ\xff\xff\xff\xffJ\xff\xff\xff\xfftb\x89]q\x06(}q\x07U\x01aK\x01s}q\x08U\x01bK\x02setb.' + a = np.array([{'a': 1}, {'b': 2}]) + p = self._loads(s) + assert_equal(a, p) + + # version 1 pickles, using protocol=2 to pickle + def test_version1_int8(self): + s = b'\x80\x02cnumpy.core._internal\n_reconstruct\nq\x01cnumpy\nndarray\nq\x02K\x00\x85U\x01b\x87Rq\x03(K\x01K\x04\x85cnumpy\ndtype\nq\x04U\x02i1K\x00K\x01\x87Rq\x05(K\x01U\x01|NNJ\xff\xff\xff\xffJ\xff\xff\xff\xfftb\x89U\x04\x01\x02\x03\x04tb.' + a = np.array([1, 2, 3, 4], dtype=np.int8) + p = self._loads(s) + assert_equal(a, p) + + def test_version1_float32(self): + s = b'\x80\x02cnumpy.core._internal\n_reconstruct\nq\x01cnumpy\nndarray\nq\x02K\x00\x85U\x01b\x87Rq\x03(K\x01K\x04\x85cnumpy\ndtype\nq\x04U\x02f4K\x00K\x01\x87Rq\x05(K\x01U\x01<NNJ\xff\xff\xff\xffJ\xff\xff\xff\xfftb\x89U\x10\x00\x00\x80?\x00\x00\x00@\x00\x00@@\x00\x00\x80@tb.' + a = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32) + p = self._loads(s) + assert_equal(a, p) + + def test_version1_object(self): + s = b'\x80\x02cnumpy.core._internal\n_reconstruct\nq\x01cnumpy\nndarray\nq\x02K\x00\x85U\x01b\x87Rq\x03(K\x01K\x02\x85cnumpy\ndtype\nq\x04U\x02O8K\x00K\x01\x87Rq\x05(K\x01U\x01|NNJ\xff\xff\xff\xffJ\xff\xff\xff\xfftb\x89]q\x06(}q\x07U\x01aK\x01s}q\x08U\x01bK\x02setb.' + a = np.array([{'a': 1}, {'b': 2}]) + p = self._loads(s) + assert_equal(a, p) + + def test_subarray_int_shape(self): + s = b"cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\np1\n(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\np7\n(S'V6'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'|'\np11\nN(S'a'\np12\ng3\ntp13\n(dp14\ng12\n(g7\n(S'V4'\np15\nI0\nI1\ntp16\nRp17\n(I3\nS'|'\np18\n(g7\n(S'i1'\np19\nI0\nI1\ntp20\nRp21\n(I3\nS'|'\np22\nNNNI-1\nI-1\nI0\ntp23\nb(I2\nI2\ntp24\ntp25\nNNI4\nI1\nI0\ntp26\nbI0\ntp27\nsg3\n(g7\n(S'V2'\np28\nI0\nI1\ntp29\nRp30\n(I3\nS'|'\np31\n(g21\nI2\ntp32\nNNI2\nI1\nI0\ntp33\nbI4\ntp34\nsI6\nI1\nI0\ntp35\nbI00\nS'\\x01\\x01\\x01\\x01\\x01\\x02'\np36\ntp37\nb." + a = np.array([(1, (1, 2))], dtype=[('a', 'i1', (2, 2)), ('b', 'i1', 2)]) + p = self._loads(s) + assert_equal(a, p) + + def test_datetime64_byteorder(self): + original = np.array([['2015-02-24T00:00:00.000000000']], dtype='datetime64[ns]') + + original_byte_reversed = original.copy(order='K') + original_byte_reversed.dtype = original_byte_reversed.dtype.newbyteorder('S') + original_byte_reversed.byteswap(inplace=True) + + new = pickle.loads(pickle.dumps(original_byte_reversed)) + + assert_equal(original.dtype, new.dtype) + + def test_py2_can_load_py3_pickle_with_dtype_field_names(self): + # gh-2407 and PR #14275 + # Py2 should be able to load a pickle that was created in PY3 + # when the pickle contains a structured dtype with field names + import numpy as np + + expected_dtype = np.dtype([('SPOT', np.float64)]) + expected_data = np.array([(6.0)], dtype=expected_dtype) + # Pickled under Python 3.6.5 with protocol=2 by the code below: + # pickle.dumps(expected_data, protocol=2) + saved_pickle_from_py3 = b'''\ +\x80\x02cnumpy.core.multiarray\n_reconstruct\nq\x00cnumpy\nndarray\n\ +q\x01K\x00\x85q\x02c_codecs\nencode\nq\x03X\x01\x00\x00\x00bq\x04X\ +\x06\x00\x00\x00latin1q\x05\x86q\x06Rq\x07\x87q\x08Rq\t(K\x01K\x01\ +\x85q\ncnumpy\ndtype\nq\x0bX\x02\x00\x00\x00V8q\x0cK\x00K\x01\x87q\ +\rRq\x0e(K\x03X\x01\x00\x00\x00|q\x0fNX\x04\x00\x00\x00SPOTq\x10\ +\x85q\x11}q\x12h\x10h\x0bX\x02\x00\x00\x00f8q\x13K\x00K\x01\x87\ +q\x14Rq\x15(K\x03X\x01\x00\x00\x00<q\x16NNNJ\xff\xff\xff\xffJ\xff\ +\xff\xff\xffK\x00tq\x17bK\x00\x86q\x18sK\x08K\x01K\x10tq\x19b\x89h\ +\x03X\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18@q\x1ah\x05\x86q\ +\x1bRq\x1ctq\x1db.\ +''' + + if sys.version_info[0] < 3: # PY2 + assert pickle.loads(saved_pickle_from_py3) == expected_data + else: + # check that the string above is what we claim on PY3 + py3_pickle_dump = pickle.dumps(expected_data, protocol=2) + assert py3_pickle_dump == saved_pickle_from_py3 + + def test_py3_can_load_py2_pickle_with_dtype_field_names(self): + # gh-2407 and PR #14275 + # Roundtrip: Py3 should load a pickle that was created in PY2 + # after loading the saved_pickle (from PY3) in the test named + # 'test_py2_can_load_py3_pickle_with_dtype_field_names' + import numpy as np + + expected_dtype = np.dtype([('SPOT', np.float64)]) + expected = np.array([(6.0)], dtype=expected_dtype) + # Pickled under Python 2.7.16 with protocol=2 after it was loaded + # by test 'test_py2_can_load_py3_pickle_with_dtype_field_names' + pickle_from_py2 = b'''\ +\x80\x02cnumpy.core.multiarray\n_reconstruct\nq\x01cnumpy\nndarray\n\ +q\x02K\x00\x85U\x01b\x87Rq\x03(K\x01K\x01\x85cnumpy\ndtype\nq\x04U\x02\ +V8K\x00K\x01\x87Rq\x05(K\x03U\x01|NU\x04SPOTq\x06\x85q\x07}q\x08h\x06h\ +\x04U\x02f8K\x00K\x01\x87Rq\t(K\x03U\x01<NNNJ\xff\xff\xff\xffJ\xff\xff\ +\xff\xffK\x00tbK\x00\x86sK\x08K\x01K\x10tb\x89U\x08\x00\x00\x00\x00\x00\ +\x00\x18@tb.\ +''' + + if sys.version_info[0] >= 3: # PY3 + assert pickle.loads(pickle_from_py2) == expected + else: + # check that the string above is what we claim on PY2 + if sys.platform.startswith('linux') and not IS_PYPY: + assert pickle.dumps(expected, protocol=2) == pickle_from_py2 + + + +class TestFancyIndexing(object): + def test_list(self): + x = np.ones((1, 1)) + x[:, [0]] = 2.0 + assert_array_equal(x, np.array([[2.0]])) + + x = np.ones((1, 1, 1)) + x[:, :, [0]] = 2.0 + assert_array_equal(x, np.array([[[2.0]]])) + + def test_tuple(self): + x = np.ones((1, 1)) + x[:, (0,)] = 2.0 + assert_array_equal(x, np.array([[2.0]])) + x = np.ones((1, 1, 1)) + x[:, :, (0,)] = 2.0 + assert_array_equal(x, np.array([[[2.0]]])) + + def test_mask(self): + x = np.array([1, 2, 3, 4]) + m = np.array([0, 1, 0, 0], bool) + assert_array_equal(x[m], np.array([2])) + + def test_mask2(self): + x = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) + m = np.array([0, 1], bool) + m2 = np.array([[0, 1, 0, 0], [1, 0, 0, 0]], bool) + m3 = np.array([[0, 1, 0, 0], [0, 0, 0, 0]], bool) + assert_array_equal(x[m], np.array([[5, 6, 7, 8]])) + assert_array_equal(x[m2], np.array([2, 5])) + assert_array_equal(x[m3], np.array([2])) + + def test_assign_mask(self): + x = np.array([1, 2, 3, 4]) + m = np.array([0, 1, 0, 0], bool) + x[m] = 5 + assert_array_equal(x, np.array([1, 5, 3, 4])) + + def test_assign_mask2(self): + xorig = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) + m = np.array([0, 1], bool) + m2 = np.array([[0, 1, 0, 0], [1, 0, 0, 0]], bool) + m3 = np.array([[0, 1, 0, 0], [0, 0, 0, 0]], bool) + x = xorig.copy() + x[m] = 10 + assert_array_equal(x, np.array([[1, 2, 3, 4], [10, 10, 10, 10]])) + x = xorig.copy() + x[m2] = 10 + assert_array_equal(x, np.array([[1, 10, 3, 4], [10, 6, 7, 8]])) + x = xorig.copy() + x[m3] = 10 + assert_array_equal(x, np.array([[1, 10, 3, 4], [5, 6, 7, 8]])) + + +class TestStringCompare(object): + def test_string(self): + g1 = np.array(["This", "is", "example"]) + g2 = np.array(["This", "was", "example"]) + assert_array_equal(g1 == g2, [g1[i] == g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 != g2, [g1[i] != g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 <= g2, [g1[i] <= g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 >= g2, [g1[i] >= g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 < g2, [g1[i] < g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 > g2, [g1[i] > g2[i] for i in [0, 1, 2]]) + + def test_mixed(self): + g1 = np.array(["spam", "spa", "spammer", "and eggs"]) + g2 = "spam" + assert_array_equal(g1 == g2, [x == g2 for x in g1]) + assert_array_equal(g1 != g2, [x != g2 for x in g1]) + assert_array_equal(g1 < g2, [x < g2 for x in g1]) + assert_array_equal(g1 > g2, [x > g2 for x in g1]) + assert_array_equal(g1 <= g2, [x <= g2 for x in g1]) + assert_array_equal(g1 >= g2, [x >= g2 for x in g1]) + + def test_unicode(self): + g1 = np.array([u"This", u"is", u"example"]) + g2 = np.array([u"This", u"was", u"example"]) + assert_array_equal(g1 == g2, [g1[i] == g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 != g2, [g1[i] != g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 <= g2, [g1[i] <= g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 >= g2, [g1[i] >= g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 < g2, [g1[i] < g2[i] for i in [0, 1, 2]]) + assert_array_equal(g1 > g2, [g1[i] > g2[i] for i in [0, 1, 2]]) + + +class TestArgmax(object): + + nan_arr = [ + ([0, 1, 2, 3, np.nan], 4), + ([0, 1, 2, np.nan, 3], 3), + ([np.nan, 0, 1, 2, 3], 0), + ([np.nan, 0, np.nan, 2, 3], 0), + ([0, 1, 2, 3, complex(0, np.nan)], 4), + ([0, 1, 2, 3, complex(np.nan, 0)], 4), + ([0, 1, 2, complex(np.nan, 0), 3], 3), + ([0, 1, 2, complex(0, np.nan), 3], 3), + ([complex(0, np.nan), 0, 1, 2, 3], 0), + ([complex(np.nan, np.nan), 0, 1, 2, 3], 0), + ([complex(np.nan, 0), complex(np.nan, 2), complex(np.nan, 1)], 0), + ([complex(np.nan, np.nan), complex(np.nan, 2), complex(np.nan, 1)], 0), + ([complex(np.nan, 0), complex(np.nan, 2), complex(np.nan, np.nan)], 0), + + ([complex(0, 0), complex(0, 2), complex(0, 1)], 1), + ([complex(1, 0), complex(0, 2), complex(0, 1)], 0), + ([complex(1, 0), complex(0, 2), complex(1, 1)], 2), + + ([np.datetime64('1923-04-14T12:43:12'), + np.datetime64('1994-06-21T14:43:15'), + np.datetime64('2001-10-15T04:10:32'), + np.datetime64('1995-11-25T16:02:16'), + np.datetime64('2005-01-04T03:14:12'), + np.datetime64('2041-12-03T14:05:03')], 5), + ([np.datetime64('1935-09-14T04:40:11'), + np.datetime64('1949-10-12T12:32:11'), + np.datetime64('2010-01-03T05:14:12'), + np.datetime64('2015-11-20T12:20:59'), + np.datetime64('1932-09-23T10:10:13'), + np.datetime64('2014-10-10T03:50:30')], 3), + # Assorted tests with NaTs + ([np.datetime64('NaT'), + np.datetime64('NaT'), + np.datetime64('2010-01-03T05:14:12'), + np.datetime64('NaT'), + np.datetime64('2015-09-23T10:10:13'), + np.datetime64('1932-10-10T03:50:30')], 4), + ([np.datetime64('2059-03-14T12:43:12'), + np.datetime64('1996-09-21T14:43:15'), + np.datetime64('NaT'), + np.datetime64('2022-12-25T16:02:16'), + np.datetime64('1963-10-04T03:14:12'), + np.datetime64('2013-05-08T18:15:23')], 0), + ([np.timedelta64(2, 's'), + np.timedelta64(1, 's'), + np.timedelta64('NaT', 's'), + np.timedelta64(3, 's')], 3), + ([np.timedelta64('NaT', 's')] * 3, 0), + + ([timedelta(days=5, seconds=14), timedelta(days=2, seconds=35), + timedelta(days=-1, seconds=23)], 0), + ([timedelta(days=1, seconds=43), timedelta(days=10, seconds=5), + timedelta(days=5, seconds=14)], 1), + ([timedelta(days=10, seconds=24), timedelta(days=10, seconds=5), + timedelta(days=10, seconds=43)], 2), + + ([False, False, False, False, True], 4), + ([False, False, False, True, False], 3), + ([True, False, False, False, False], 0), + ([True, False, True, False, False], 0), + ] + + def test_all(self): + a = np.random.normal(0, 1, (4, 5, 6, 7, 8)) + for i in range(a.ndim): + amax = a.max(i) + aargmax = a.argmax(i) + axes = list(range(a.ndim)) + axes.remove(i) + assert_(np.all(amax == aargmax.choose(*a.transpose(i,*axes)))) + + def test_combinations(self): + for arr, pos in self.nan_arr: + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, + "invalid value encountered in reduce") + max_val = np.max(arr) + + assert_equal(np.argmax(arr), pos, err_msg="%r" % arr) + assert_equal(arr[np.argmax(arr)], max_val, err_msg="%r" % arr) + + def test_output_shape(self): + # see also gh-616 + a = np.ones((10, 5)) + # Check some simple shape mismatches + out = np.ones(11, dtype=np.int_) + assert_raises(ValueError, a.argmax, -1, out) + + out = np.ones((2, 5), dtype=np.int_) + assert_raises(ValueError, a.argmax, -1, out) + + # these could be relaxed possibly (used to allow even the previous) + out = np.ones((1, 10), dtype=np.int_) + assert_raises(ValueError, a.argmax, -1, out) + + out = np.ones(10, dtype=np.int_) + a.argmax(-1, out=out) + assert_equal(out, a.argmax(-1)) + + def test_argmax_unicode(self): + d = np.zeros(6031, dtype='<U9') + d[5942] = "as" + assert_equal(d.argmax(), 5942) + + def test_np_vs_ndarray(self): + # make sure both ndarray.argmax and numpy.argmax support out/axis args + a = np.random.normal(size=(2,3)) + + # check positional args + out1 = np.zeros(2, dtype=int) + out2 = np.zeros(2, dtype=int) + assert_equal(a.argmax(1, out1), np.argmax(a, 1, out2)) + assert_equal(out1, out2) + + # check keyword args + out1 = np.zeros(3, dtype=int) + out2 = np.zeros(3, dtype=int) + assert_equal(a.argmax(out=out1, axis=0), np.argmax(a, out=out2, axis=0)) + assert_equal(out1, out2) + + def test_object_argmax_with_NULLs(self): + # See gh-6032 + a = np.empty(4, dtype='O') + ctypes.memset(a.ctypes.data, 0, a.nbytes) + assert_equal(a.argmax(), 0) + a[3] = 10 + assert_equal(a.argmax(), 3) + a[1] = 30 + assert_equal(a.argmax(), 1) + + +class TestArgmin(object): + + nan_arr = [ + ([0, 1, 2, 3, np.nan], 4), + ([0, 1, 2, np.nan, 3], 3), + ([np.nan, 0, 1, 2, 3], 0), + ([np.nan, 0, np.nan, 2, 3], 0), + ([0, 1, 2, 3, complex(0, np.nan)], 4), + ([0, 1, 2, 3, complex(np.nan, 0)], 4), + ([0, 1, 2, complex(np.nan, 0), 3], 3), + ([0, 1, 2, complex(0, np.nan), 3], 3), + ([complex(0, np.nan), 0, 1, 2, 3], 0), + ([complex(np.nan, np.nan), 0, 1, 2, 3], 0), + ([complex(np.nan, 0), complex(np.nan, 2), complex(np.nan, 1)], 0), + ([complex(np.nan, np.nan), complex(np.nan, 2), complex(np.nan, 1)], 0), + ([complex(np.nan, 0), complex(np.nan, 2), complex(np.nan, np.nan)], 0), + + ([complex(0, 0), complex(0, 2), complex(0, 1)], 0), + ([complex(1, 0), complex(0, 2), complex(0, 1)], 2), + ([complex(1, 0), complex(0, 2), complex(1, 1)], 1), + + ([np.datetime64('1923-04-14T12:43:12'), + np.datetime64('1994-06-21T14:43:15'), + np.datetime64('2001-10-15T04:10:32'), + np.datetime64('1995-11-25T16:02:16'), + np.datetime64('2005-01-04T03:14:12'), + np.datetime64('2041-12-03T14:05:03')], 0), + ([np.datetime64('1935-09-14T04:40:11'), + np.datetime64('1949-10-12T12:32:11'), + np.datetime64('2010-01-03T05:14:12'), + np.datetime64('2014-11-20T12:20:59'), + np.datetime64('2015-09-23T10:10:13'), + np.datetime64('1932-10-10T03:50:30')], 5), + # Assorted tests with NaTs + ([np.datetime64('NaT'), + np.datetime64('NaT'), + np.datetime64('2010-01-03T05:14:12'), + np.datetime64('NaT'), + np.datetime64('2015-09-23T10:10:13'), + np.datetime64('1932-10-10T03:50:30')], 5), + ([np.datetime64('2059-03-14T12:43:12'), + np.datetime64('1996-09-21T14:43:15'), + np.datetime64('NaT'), + np.datetime64('2022-12-25T16:02:16'), + np.datetime64('1963-10-04T03:14:12'), + np.datetime64('2013-05-08T18:15:23')], 4), + ([np.timedelta64(2, 's'), + np.timedelta64(1, 's'), + np.timedelta64('NaT', 's'), + np.timedelta64(3, 's')], 1), + ([np.timedelta64('NaT', 's')] * 3, 0), + + ([timedelta(days=5, seconds=14), timedelta(days=2, seconds=35), + timedelta(days=-1, seconds=23)], 2), + ([timedelta(days=1, seconds=43), timedelta(days=10, seconds=5), + timedelta(days=5, seconds=14)], 0), + ([timedelta(days=10, seconds=24), timedelta(days=10, seconds=5), + timedelta(days=10, seconds=43)], 1), + + ([True, True, True, True, False], 4), + ([True, True, True, False, True], 3), + ([False, True, True, True, True], 0), + ([False, True, False, True, True], 0), + ] + + def test_all(self): + a = np.random.normal(0, 1, (4, 5, 6, 7, 8)) + for i in range(a.ndim): + amin = a.min(i) + aargmin = a.argmin(i) + axes = list(range(a.ndim)) + axes.remove(i) + assert_(np.all(amin == aargmin.choose(*a.transpose(i,*axes)))) + + def test_combinations(self): + for arr, pos in self.nan_arr: + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, + "invalid value encountered in reduce") + min_val = np.min(arr) + + assert_equal(np.argmin(arr), pos, err_msg="%r" % arr) + assert_equal(arr[np.argmin(arr)], min_val, err_msg="%r" % arr) + + def test_minimum_signed_integers(self): + + a = np.array([1, -2**7, -2**7 + 1], dtype=np.int8) + assert_equal(np.argmin(a), 1) + + a = np.array([1, -2**15, -2**15 + 1], dtype=np.int16) + assert_equal(np.argmin(a), 1) + + a = np.array([1, -2**31, -2**31 + 1], dtype=np.int32) + assert_equal(np.argmin(a), 1) + + a = np.array([1, -2**63, -2**63 + 1], dtype=np.int64) + assert_equal(np.argmin(a), 1) + + def test_output_shape(self): + # see also gh-616 + a = np.ones((10, 5)) + # Check some simple shape mismatches + out = np.ones(11, dtype=np.int_) + assert_raises(ValueError, a.argmin, -1, out) + + out = np.ones((2, 5), dtype=np.int_) + assert_raises(ValueError, a.argmin, -1, out) + + # these could be relaxed possibly (used to allow even the previous) + out = np.ones((1, 10), dtype=np.int_) + assert_raises(ValueError, a.argmin, -1, out) + + out = np.ones(10, dtype=np.int_) + a.argmin(-1, out=out) + assert_equal(out, a.argmin(-1)) + + def test_argmin_unicode(self): + d = np.ones(6031, dtype='<U9') + d[6001] = "0" + assert_equal(d.argmin(), 6001) + + def test_np_vs_ndarray(self): + # make sure both ndarray.argmin and numpy.argmin support out/axis args + a = np.random.normal(size=(2, 3)) + + # check positional args + out1 = np.zeros(2, dtype=int) + out2 = np.ones(2, dtype=int) + assert_equal(a.argmin(1, out1), np.argmin(a, 1, out2)) + assert_equal(out1, out2) + + # check keyword args + out1 = np.zeros(3, dtype=int) + out2 = np.ones(3, dtype=int) + assert_equal(a.argmin(out=out1, axis=0), np.argmin(a, out=out2, axis=0)) + assert_equal(out1, out2) + + def test_object_argmin_with_NULLs(self): + # See gh-6032 + a = np.empty(4, dtype='O') + ctypes.memset(a.ctypes.data, 0, a.nbytes) + assert_equal(a.argmin(), 0) + a[3] = 30 + assert_equal(a.argmin(), 3) + a[1] = 10 + assert_equal(a.argmin(), 1) + + +class TestMinMax(object): + + def test_scalar(self): + assert_raises(np.AxisError, np.amax, 1, 1) + assert_raises(np.AxisError, np.amin, 1, 1) + + assert_equal(np.amax(1, axis=0), 1) + assert_equal(np.amin(1, axis=0), 1) + assert_equal(np.amax(1, axis=None), 1) + assert_equal(np.amin(1, axis=None), 1) + + def test_axis(self): + assert_raises(np.AxisError, np.amax, [1, 2, 3], 1000) + assert_equal(np.amax([[1, 2, 3]], axis=1), 3) + + def test_datetime(self): + # NaTs are ignored + for dtype in ('m8[s]', 'm8[Y]'): + a = np.arange(10).astype(dtype) + a[3] = 'NaT' + assert_equal(np.amin(a), a[0]) + assert_equal(np.amax(a), a[9]) + a[0] = 'NaT' + assert_equal(np.amin(a), a[1]) + assert_equal(np.amax(a), a[9]) + a.fill('NaT') + assert_equal(np.amin(a), a[0]) + assert_equal(np.amax(a), a[0]) + + +class TestNewaxis(object): + def test_basic(self): + sk = np.array([0, -0.1, 0.1]) + res = 250*sk[:, np.newaxis] + assert_almost_equal(res.ravel(), 250*sk) + + +class TestClip(object): + def _check_range(self, x, cmin, cmax): + assert_(np.all(x >= cmin)) + assert_(np.all(x <= cmax)) + + def _clip_type(self, type_group, array_max, + clip_min, clip_max, inplace=False, + expected_min=None, expected_max=None): + if expected_min is None: + expected_min = clip_min + if expected_max is None: + expected_max = clip_max + + for T in np.sctypes[type_group]: + if sys.byteorder == 'little': + byte_orders = ['=', '>'] + else: + byte_orders = ['<', '='] + + for byteorder in byte_orders: + dtype = np.dtype(T).newbyteorder(byteorder) + + x = (np.random.random(1000) * array_max).astype(dtype) + if inplace: + x.clip(clip_min, clip_max, x) + else: + x = x.clip(clip_min, clip_max) + byteorder = '=' + + if x.dtype.byteorder == '|': + byteorder = '|' + assert_equal(x.dtype.byteorder, byteorder) + self._check_range(x, expected_min, expected_max) + return x + + def test_basic(self): + for inplace in [False, True]: + self._clip_type( + 'float', 1024, -12.8, 100.2, inplace=inplace) + self._clip_type( + 'float', 1024, 0, 0, inplace=inplace) + + self._clip_type( + 'int', 1024, -120, 100.5, inplace=inplace) + self._clip_type( + 'int', 1024, 0, 0, inplace=inplace) + + self._clip_type( + 'uint', 1024, 0, 0, inplace=inplace) + self._clip_type( + 'uint', 1024, -120, 100, inplace=inplace, expected_min=0) + + def test_record_array(self): + rec = np.array([(-5, 2.0, 3.0), (5.0, 4.0, 3.0)], + dtype=[('x', '<f8'), ('y', '<f8'), ('z', '<f8')]) + y = rec['x'].clip(-0.3, 0.5) + self._check_range(y, -0.3, 0.5) + + def test_max_or_min(self): + val = np.array([0, 1, 2, 3, 4, 5, 6, 7]) + x = val.clip(3) + assert_(np.all(x >= 3)) + x = val.clip(min=3) + assert_(np.all(x >= 3)) + x = val.clip(max=4) + assert_(np.all(x <= 4)) + + def test_nan(self): + input_arr = np.array([-2., np.nan, 0.5, 3., 0.25, np.nan]) + result = input_arr.clip(-1, 1) + expected = np.array([-1., np.nan, 0.5, 1., 0.25, np.nan]) + assert_array_equal(result, expected) + + +class TestCompress(object): + def test_axis(self): + tgt = [[5, 6, 7, 8, 9]] + arr = np.arange(10).reshape(2, 5) + out = np.compress([0, 1], arr, axis=0) + assert_equal(out, tgt) + + tgt = [[1, 3], [6, 8]] + out = np.compress([0, 1, 0, 1, 0], arr, axis=1) + assert_equal(out, tgt) + + def test_truncate(self): + tgt = [[1], [6]] + arr = np.arange(10).reshape(2, 5) + out = np.compress([0, 1], arr, axis=1) + assert_equal(out, tgt) + + def test_flatten(self): + arr = np.arange(10).reshape(2, 5) + out = np.compress([0, 1], arr) + assert_equal(out, 1) + + +class TestPutmask(object): + def tst_basic(self, x, T, mask, val): + np.putmask(x, mask, val) + assert_equal(x[mask], T(val)) + assert_equal(x.dtype, T) + + def test_ip_types(self): + unchecked_types = [bytes, unicode, np.void, object] + + x = np.random.random(1000)*100 + mask = x < 40 + + for val in [-100, 0, 15]: + for types in np.sctypes.values(): + for T in types: + if T not in unchecked_types: + self.tst_basic(x.copy().astype(T), T, mask, val) + + def test_mask_size(self): + assert_raises(ValueError, np.putmask, np.array([1, 2, 3]), [True], 5) + + @pytest.mark.parametrize('dtype', ('>i4', '<i4')) + def test_byteorder(self, dtype): + x = np.array([1, 2, 3], dtype) + np.putmask(x, [True, False, True], -1) + assert_array_equal(x, [-1, 2, -1]) + + def test_record_array(self): + # Note mixed byteorder. + rec = np.array([(-5, 2.0, 3.0), (5.0, 4.0, 3.0)], + dtype=[('x', '<f8'), ('y', '>f8'), ('z', '<f8')]) + np.putmask(rec['x'], [True, False], 10) + assert_array_equal(rec['x'], [10, 5]) + assert_array_equal(rec['y'], [2, 4]) + assert_array_equal(rec['z'], [3, 3]) + np.putmask(rec['y'], [True, False], 11) + assert_array_equal(rec['x'], [10, 5]) + assert_array_equal(rec['y'], [11, 4]) + assert_array_equal(rec['z'], [3, 3]) + + +class TestTake(object): + def tst_basic(self, x): + ind = list(range(x.shape[0])) + assert_array_equal(x.take(ind, axis=0), x) + + def test_ip_types(self): + unchecked_types = [bytes, unicode, np.void, object] + + x = np.random.random(24)*100 + x.shape = 2, 3, 4 + for types in np.sctypes.values(): + for T in types: + if T not in unchecked_types: + self.tst_basic(x.copy().astype(T)) + + def test_raise(self): + x = np.random.random(24)*100 + x.shape = 2, 3, 4 + assert_raises(IndexError, x.take, [0, 1, 2], axis=0) + assert_raises(IndexError, x.take, [-3], axis=0) + assert_array_equal(x.take([-1], axis=0)[0], x[1]) + + def test_clip(self): + x = np.random.random(24)*100 + x.shape = 2, 3, 4 + assert_array_equal(x.take([-1], axis=0, mode='clip')[0], x[0]) + assert_array_equal(x.take([2], axis=0, mode='clip')[0], x[1]) + + def test_wrap(self): + x = np.random.random(24)*100 + x.shape = 2, 3, 4 + assert_array_equal(x.take([-1], axis=0, mode='wrap')[0], x[1]) + assert_array_equal(x.take([2], axis=0, mode='wrap')[0], x[0]) + assert_array_equal(x.take([3], axis=0, mode='wrap')[0], x[1]) + + @pytest.mark.parametrize('dtype', ('>i4', '<i4')) + def test_byteorder(self, dtype): + x = np.array([1, 2, 3], dtype) + assert_array_equal(x.take([0, 2, 1]), [1, 3, 2]) + + def test_record_array(self): + # Note mixed byteorder. + rec = np.array([(-5, 2.0, 3.0), (5.0, 4.0, 3.0)], + dtype=[('x', '<f8'), ('y', '>f8'), ('z', '<f8')]) + rec1 = rec.take([1]) + assert_(rec1['x'] == 5.0 and rec1['y'] == 4.0) + + +class TestLexsort(object): + def test_basic(self): + a = [1, 2, 1, 3, 1, 5] + b = [0, 4, 5, 6, 2, 3] + idx = np.lexsort((b, a)) + expected_idx = np.array([0, 4, 2, 1, 3, 5]) + assert_array_equal(idx, expected_idx) + + x = np.vstack((b, a)) + idx = np.lexsort(x) + assert_array_equal(idx, expected_idx) + + assert_array_equal(x[1][idx], np.sort(x[1])) + + def test_datetime(self): + a = np.array([0,0,0], dtype='datetime64[D]') + b = np.array([2,1,0], dtype='datetime64[D]') + idx = np.lexsort((b, a)) + expected_idx = np.array([2, 1, 0]) + assert_array_equal(idx, expected_idx) + + a = np.array([0,0,0], dtype='timedelta64[D]') + b = np.array([2,1,0], dtype='timedelta64[D]') + idx = np.lexsort((b, a)) + expected_idx = np.array([2, 1, 0]) + assert_array_equal(idx, expected_idx) + + def test_object(self): # gh-6312 + a = np.random.choice(10, 1000) + b = np.random.choice(['abc', 'xy', 'wz', 'efghi', 'qwst', 'x'], 1000) + + for u in a, b: + left = np.lexsort((u.astype('O'),)) + right = np.argsort(u, kind='mergesort') + assert_array_equal(left, right) + + for u, v in (a, b), (b, a): + idx = np.lexsort((u, v)) + assert_array_equal(idx, np.lexsort((u.astype('O'), v))) + assert_array_equal(idx, np.lexsort((u, v.astype('O')))) + u, v = np.array(u, dtype='object'), np.array(v, dtype='object') + assert_array_equal(idx, np.lexsort((u, v))) + + def test_invalid_axis(self): # gh-7528 + x = np.linspace(0., 1., 42*3).reshape(42, 3) + assert_raises(np.AxisError, np.lexsort, x, axis=2) + +class TestIO(object): + """Test tofile, fromfile, tobytes, and fromstring""" + + def setup(self): + shape = (2, 4, 3) + rand = np.random.random + self.x = rand(shape) + rand(shape).astype(complex)*1j + self.x[0,:, 1] = [np.nan, np.inf, -np.inf, np.nan] + self.dtype = self.x.dtype + self.tempdir = tempfile.mkdtemp() + self.filename = tempfile.mktemp(dir=self.tempdir) + + def teardown(self): + shutil.rmtree(self.tempdir) + + def test_nofile(self): + # this should probably be supported as a file + # but for now test for proper errors + b = io.BytesIO() + assert_raises(IOError, np.fromfile, b, np.uint8, 80) + d = np.ones(7) + assert_raises(IOError, lambda x: x.tofile(b), d) + + def test_bool_fromstring(self): + v = np.array([True, False, True, False], dtype=np.bool_) + y = np.fromstring('1 0 -2.3 0.0', sep=' ', dtype=np.bool_) + assert_array_equal(v, y) + + def test_uint64_fromstring(self): + d = np.fromstring("9923372036854775807 104783749223640", + dtype=np.uint64, sep=' ') + e = np.array([9923372036854775807, 104783749223640], dtype=np.uint64) + assert_array_equal(d, e) + + def test_int64_fromstring(self): + d = np.fromstring("-25041670086757 104783749223640", + dtype=np.int64, sep=' ') + e = np.array([-25041670086757, 104783749223640], dtype=np.int64) + assert_array_equal(d, e) + + def test_empty_files_binary(self): + f = open(self.filename, 'w') + f.close() + y = np.fromfile(self.filename) + assert_(y.size == 0, "Array not empty") + + def test_empty_files_text(self): + f = open(self.filename, 'w') + f.close() + y = np.fromfile(self.filename, sep=" ") + assert_(y.size == 0, "Array not empty") + + def test_roundtrip_file(self): + f = open(self.filename, 'wb') + self.x.tofile(f) + f.close() + # NB. doesn't work with flush+seek, due to use of C stdio + f = open(self.filename, 'rb') + y = np.fromfile(f, dtype=self.dtype) + f.close() + assert_array_equal(y, self.x.flat) + + def test_roundtrip_filename(self): + self.x.tofile(self.filename) + y = np.fromfile(self.filename, dtype=self.dtype) + assert_array_equal(y, self.x.flat) + + def test_roundtrip_binary_str(self): + s = self.x.tobytes() + y = np.frombuffer(s, dtype=self.dtype) + assert_array_equal(y, self.x.flat) + + s = self.x.tobytes('F') + y = np.frombuffer(s, dtype=self.dtype) + assert_array_equal(y, self.x.flatten('F')) + + def test_roundtrip_str(self): + x = self.x.real.ravel() + s = "@".join(map(str, x)) + y = np.fromstring(s, sep="@") + # NB. str imbues less precision + nan_mask = ~np.isfinite(x) + assert_array_equal(x[nan_mask], y[nan_mask]) + assert_array_almost_equal(x[~nan_mask], y[~nan_mask], decimal=5) + + def test_roundtrip_repr(self): + x = self.x.real.ravel() + s = "@".join(map(repr, x)) + y = np.fromstring(s, sep="@") + assert_array_equal(x, y) + + def test_unseekable_fromfile(self): + # gh-6246 + self.x.tofile(self.filename) + + def fail(*args, **kwargs): + raise IOError('Can not tell or seek') + + with io.open(self.filename, 'rb', buffering=0) as f: + f.seek = fail + f.tell = fail + assert_raises(IOError, np.fromfile, f, dtype=self.dtype) + + def test_io_open_unbuffered_fromfile(self): + # gh-6632 + self.x.tofile(self.filename) + with io.open(self.filename, 'rb', buffering=0) as f: + y = np.fromfile(f, dtype=self.dtype) + assert_array_equal(y, self.x.flat) + + def test_largish_file(self): + # check the fallocate path on files > 16MB + d = np.zeros(4 * 1024 ** 2) + d.tofile(self.filename) + assert_equal(os.path.getsize(self.filename), d.nbytes) + assert_array_equal(d, np.fromfile(self.filename)) + # check offset + with open(self.filename, "r+b") as f: + f.seek(d.nbytes) + d.tofile(f) + assert_equal(os.path.getsize(self.filename), d.nbytes * 2) + # check append mode (gh-8329) + open(self.filename, "w").close() # delete file contents + with open(self.filename, "ab") as f: + d.tofile(f) + assert_array_equal(d, np.fromfile(self.filename)) + with open(self.filename, "ab") as f: + d.tofile(f) + assert_equal(os.path.getsize(self.filename), d.nbytes * 2) + + def test_io_open_buffered_fromfile(self): + # gh-6632 + self.x.tofile(self.filename) + with io.open(self.filename, 'rb', buffering=-1) as f: + y = np.fromfile(f, dtype=self.dtype) + assert_array_equal(y, self.x.flat) + + def test_file_position_after_fromfile(self): + # gh-4118 + sizes = [io.DEFAULT_BUFFER_SIZE//8, + io.DEFAULT_BUFFER_SIZE, + io.DEFAULT_BUFFER_SIZE*8] + + for size in sizes: + f = open(self.filename, 'wb') + f.seek(size-1) + f.write(b'\0') + f.close() + + for mode in ['rb', 'r+b']: + err_msg = "%d %s" % (size, mode) + + f = open(self.filename, mode) + f.read(2) + np.fromfile(f, dtype=np.float64, count=1) + pos = f.tell() + f.close() + assert_equal(pos, 10, err_msg=err_msg) + + def test_file_position_after_tofile(self): + # gh-4118 + sizes = [io.DEFAULT_BUFFER_SIZE//8, + io.DEFAULT_BUFFER_SIZE, + io.DEFAULT_BUFFER_SIZE*8] + + for size in sizes: + err_msg = "%d" % (size,) + + f = open(self.filename, 'wb') + f.seek(size-1) + f.write(b'\0') + f.seek(10) + f.write(b'12') + np.array([0], dtype=np.float64).tofile(f) + pos = f.tell() + f.close() + assert_equal(pos, 10 + 2 + 8, err_msg=err_msg) + + f = open(self.filename, 'r+b') + f.read(2) + f.seek(0, 1) # seek between read&write required by ANSI C + np.array([0], dtype=np.float64).tofile(f) + pos = f.tell() + f.close() + assert_equal(pos, 10, err_msg=err_msg) + + def test_load_object_array_fromfile(self): + # gh-12300 + with open(self.filename, 'w') as f: + # Ensure we have a file with consistent contents + pass + + with open(self.filename, 'rb') as f: + assert_raises_regex(ValueError, "Cannot read into object array", + np.fromfile, f, dtype=object) + + assert_raises_regex(ValueError, "Cannot read into object array", + np.fromfile, self.filename, dtype=object) + + def _check_from(self, s, value, **kw): + if 'sep' not in kw: + y = np.frombuffer(s, **kw) + else: + y = np.fromstring(s, **kw) + assert_array_equal(y, value) + + f = open(self.filename, 'wb') + f.write(s) + f.close() + y = np.fromfile(self.filename, **kw) + assert_array_equal(y, value) + + def test_nan(self): + self._check_from( + b"nan +nan -nan NaN nan(foo) +NaN(BAR) -NAN(q_u_u_x_)", + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + sep=' ') + + def test_inf(self): + self._check_from( + b"inf +inf -inf infinity -Infinity iNfInItY -inF", + [np.inf, np.inf, -np.inf, np.inf, -np.inf, np.inf, -np.inf], + sep=' ') + + def test_numbers(self): + self._check_from(b"1.234 -1.234 .3 .3e55 -123133.1231e+133", + [1.234, -1.234, .3, .3e55, -123133.1231e+133], sep=' ') + + def test_binary(self): + self._check_from(b'\x00\x00\x80?\x00\x00\x00@\x00\x00@@\x00\x00\x80@', + np.array([1, 2, 3, 4]), + dtype='<f4') + + @pytest.mark.slow # takes > 1 minute on mechanical hard drive + def test_big_binary(self): + """Test workarounds for 32-bit limited fwrite, fseek, and ftell + calls in windows. These normally would hang doing something like this. + See http://projects.scipy.org/numpy/ticket/1660""" + if sys.platform != 'win32': + return + try: + # before workarounds, only up to 2**32-1 worked + fourgbplus = 2**32 + 2**16 + testbytes = np.arange(8, dtype=np.int8) + n = len(testbytes) + flike = tempfile.NamedTemporaryFile() + f = flike.file + np.tile(testbytes, fourgbplus // testbytes.nbytes).tofile(f) + flike.seek(0) + a = np.fromfile(f, dtype=np.int8) + flike.close() + assert_(len(a) == fourgbplus) + # check only start and end for speed: + assert_((a[:n] == testbytes).all()) + assert_((a[-n:] == testbytes).all()) + except (MemoryError, ValueError): + pass + + def test_string(self): + self._check_from(b'1,2,3,4', [1., 2., 3., 4.], sep=',') + + def test_counted_string(self): + self._check_from(b'1,2,3,4', [1., 2., 3., 4.], count=4, sep=',') + self._check_from(b'1,2,3,4', [1., 2., 3.], count=3, sep=',') + self._check_from(b'1,2,3,4', [1., 2., 3., 4.], count=-1, sep=',') + + def test_string_with_ws(self): + self._check_from(b'1 2 3 4 ', [1, 2, 3, 4], dtype=int, sep=' ') + + def test_counted_string_with_ws(self): + self._check_from(b'1 2 3 4 ', [1, 2, 3], count=3, dtype=int, + sep=' ') + + def test_ascii(self): + self._check_from(b'1 , 2 , 3 , 4', [1., 2., 3., 4.], sep=',') + self._check_from(b'1,2,3,4', [1., 2., 3., 4.], dtype=float, sep=',') + + def test_malformed(self): + self._check_from(b'1.234 1,234', [1.234, 1.], sep=' ') + + def test_long_sep(self): + self._check_from(b'1_x_3_x_4_x_5', [1, 3, 4, 5], sep='_x_') + + def test_dtype(self): + v = np.array([1, 2, 3, 4], dtype=np.int_) + self._check_from(b'1,2,3,4', v, sep=',', dtype=np.int_) + + def test_dtype_bool(self): + # can't use _check_from because fromstring can't handle True/False + v = np.array([True, False, True, False], dtype=np.bool_) + s = b'1,0,-2.3,0' + f = open(self.filename, 'wb') + f.write(s) + f.close() + y = np.fromfile(self.filename, sep=',', dtype=np.bool_) + assert_(y.dtype == '?') + assert_array_equal(y, v) + + def test_tofile_sep(self): + x = np.array([1.51, 2, 3.51, 4], dtype=float) + f = open(self.filename, 'w') + x.tofile(f, sep=',') + f.close() + f = open(self.filename, 'r') + s = f.read() + f.close() + #assert_equal(s, '1.51,2.0,3.51,4.0') + y = np.array([float(p) for p in s.split(',')]) + assert_array_equal(x,y) + + def test_tofile_format(self): + x = np.array([1.51, 2, 3.51, 4], dtype=float) + f = open(self.filename, 'w') + x.tofile(f, sep=',', format='%.2f') + f.close() + f = open(self.filename, 'r') + s = f.read() + f.close() + assert_equal(s, '1.51,2.00,3.51,4.00') + + def test_locale(self): + with CommaDecimalPointLocale(): + self.test_numbers() + self.test_nan() + self.test_inf() + self.test_counted_string() + self.test_ascii() + self.test_malformed() + self.test_tofile_sep() + self.test_tofile_format() + + +class TestFromBuffer(object): + @pytest.mark.parametrize('byteorder', ['<', '>']) + @pytest.mark.parametrize('dtype', [float, int, complex]) + def test_basic(self, byteorder, dtype): + dt = np.dtype(dtype).newbyteorder(byteorder) + x = (np.random.random((4, 7)) * 5).astype(dt) + buf = x.tobytes() + assert_array_equal(np.frombuffer(buf, dtype=dt), x.flat) + + def test_empty(self): + assert_array_equal(np.frombuffer(b''), np.array([])) + + +class TestFlat(object): + def setup(self): + a0 = np.arange(20.0) + a = a0.reshape(4, 5) + a0.shape = (4, 5) + a.flags.writeable = False + self.a = a + self.b = a[::2, ::2] + self.a0 = a0 + self.b0 = a0[::2, ::2] + + def test_contiguous(self): + testpassed = False + try: + self.a.flat[12] = 100.0 + except ValueError: + testpassed = True + assert_(testpassed) + assert_(self.a.flat[12] == 12.0) + + def test_discontiguous(self): + testpassed = False + try: + self.b.flat[4] = 100.0 + except ValueError: + testpassed = True + assert_(testpassed) + assert_(self.b.flat[4] == 12.0) + + def test___array__(self): + c = self.a.flat.__array__() + d = self.b.flat.__array__() + e = self.a0.flat.__array__() + f = self.b0.flat.__array__() + + assert_(c.flags.writeable is False) + assert_(d.flags.writeable is False) + # for 1.14 all are set to non-writeable on the way to replacing the + # UPDATEIFCOPY array returned for non-contiguous arrays. + assert_(e.flags.writeable is True) + assert_(f.flags.writeable is False) + with assert_warns(DeprecationWarning): + assert_(c.flags.updateifcopy is False) + with assert_warns(DeprecationWarning): + assert_(d.flags.updateifcopy is False) + with assert_warns(DeprecationWarning): + assert_(e.flags.updateifcopy is False) + with assert_warns(DeprecationWarning): + # UPDATEIFCOPY is removed. + assert_(f.flags.updateifcopy is False) + assert_(c.flags.writebackifcopy is False) + assert_(d.flags.writebackifcopy is False) + assert_(e.flags.writebackifcopy is False) + assert_(f.flags.writebackifcopy is False) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_refcount(self): + # includes regression test for reference count error gh-13165 + inds = [np.intp(0), np.array([True]*self.a.size), np.array([0]), None] + indtype = np.dtype(np.intp) + rc_indtype = sys.getrefcount(indtype) + for ind in inds: + rc_ind = sys.getrefcount(ind) + for _ in range(100): + try: + self.a.flat[ind] + except IndexError: + pass + assert_(abs(sys.getrefcount(ind) - rc_ind) < 50) + assert_(abs(sys.getrefcount(indtype) - rc_indtype) < 50) + + +class TestResize(object): + def test_basic(self): + x = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + if IS_PYPY: + x.resize((5, 5), refcheck=False) + else: + x.resize((5, 5)) + assert_array_equal(x.flat[:9], + np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]).flat) + assert_array_equal(x[9:].flat, 0) + + def test_check_reference(self): + x = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + y = x + assert_raises(ValueError, x.resize, (5, 1)) + del y # avoid pyflakes unused variable warning. + + def test_int_shape(self): + x = np.eye(3) + if IS_PYPY: + x.resize(3, refcheck=False) + else: + x.resize(3) + assert_array_equal(x, np.eye(3)[0,:]) + + def test_none_shape(self): + x = np.eye(3) + x.resize(None) + assert_array_equal(x, np.eye(3)) + x.resize() + assert_array_equal(x, np.eye(3)) + + def test_0d_shape(self): + # to it multiple times to test it does not break alloc cache gh-9216 + for i in range(10): + x = np.empty((1,)) + x.resize(()) + assert_equal(x.shape, ()) + assert_equal(x.size, 1) + x = np.empty(()) + x.resize((1,)) + assert_equal(x.shape, (1,)) + assert_equal(x.size, 1) + + def test_invalid_arguments(self): + assert_raises(TypeError, np.eye(3).resize, 'hi') + assert_raises(ValueError, np.eye(3).resize, -1) + assert_raises(TypeError, np.eye(3).resize, order=1) + assert_raises(TypeError, np.eye(3).resize, refcheck='hi') + + def test_freeform_shape(self): + x = np.eye(3) + if IS_PYPY: + x.resize(3, 2, 1, refcheck=False) + else: + x.resize(3, 2, 1) + assert_(x.shape == (3, 2, 1)) + + def test_zeros_appended(self): + x = np.eye(3) + if IS_PYPY: + x.resize(2, 3, 3, refcheck=False) + else: + x.resize(2, 3, 3) + assert_array_equal(x[0], np.eye(3)) + assert_array_equal(x[1], np.zeros((3, 3))) + + def test_obj_obj(self): + # check memory is initialized on resize, gh-4857 + a = np.ones(10, dtype=[('k', object, 2)]) + if IS_PYPY: + a.resize(15, refcheck=False) + else: + a.resize(15,) + assert_equal(a.shape, (15,)) + assert_array_equal(a['k'][-5:], 0) + assert_array_equal(a['k'][:-5], 1) + + def test_empty_view(self): + # check that sizes containing a zero don't trigger a reallocate for + # already empty arrays + x = np.zeros((10, 0), int) + x_view = x[...] + x_view.resize((0, 10)) + x_view.resize((0, 100)) + + def test_check_weakref(self): + x = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + xref = weakref.ref(x) + assert_raises(ValueError, x.resize, (5, 1)) + del xref # avoid pyflakes unused variable warning. + + +class TestRecord(object): + def test_field_rename(self): + dt = np.dtype([('f', float), ('i', int)]) + dt.names = ['p', 'q'] + assert_equal(dt.names, ['p', 'q']) + + def test_multiple_field_name_occurrence(self): + def test_dtype_init(): + np.dtype([("A", "f8"), ("B", "f8"), ("A", "f8")]) + + # Error raised when multiple fields have the same name + assert_raises(ValueError, test_dtype_init) + + @pytest.mark.skipif(sys.version_info[0] < 3, reason="Not Python 3") + def test_bytes_fields(self): + # Bytes are not allowed in field names and not recognized in titles + # on Py3 + assert_raises(TypeError, np.dtype, [(b'a', int)]) + assert_raises(TypeError, np.dtype, [(('b', b'a'), int)]) + + dt = np.dtype([((b'a', 'b'), int)]) + assert_raises(TypeError, dt.__getitem__, b'a') + + x = np.array([(1,), (2,), (3,)], dtype=dt) + assert_raises(IndexError, x.__getitem__, b'a') + + y = x[0] + assert_raises(IndexError, y.__getitem__, b'a') + + @pytest.mark.skipif(sys.version_info[0] < 3, reason="Not Python 3") + def test_multiple_field_name_unicode(self): + def test_dtype_unicode(): + np.dtype([("\u20B9", "f8"), ("B", "f8"), ("\u20B9", "f8")]) + + # Error raised when multiple fields have the same name(unicode included) + assert_raises(ValueError, test_dtype_unicode) + + @pytest.mark.skipif(sys.version_info[0] >= 3, reason="Not Python 2") + def test_unicode_field_titles(self): + # Unicode field titles are added to field dict on Py2 + title = u'b' + dt = np.dtype([((title, 'a'), int)]) + dt[title] + dt['a'] + x = np.array([(1,), (2,), (3,)], dtype=dt) + x[title] + x['a'] + y = x[0] + y[title] + y['a'] + + @pytest.mark.skipif(sys.version_info[0] >= 3, reason="Not Python 2") + def test_unicode_field_names(self): + # Unicode field names are converted to ascii on Python 2: + encodable_name = u'b' + assert_equal(np.dtype([(encodable_name, int)]).names[0], b'b') + assert_equal(np.dtype([(('a', encodable_name), int)]).names[0], b'b') + + # But raises UnicodeEncodeError if it can't be encoded: + nonencodable_name = u'\uc3bc' + assert_raises(UnicodeEncodeError, np.dtype, [(nonencodable_name, int)]) + assert_raises(UnicodeEncodeError, np.dtype, [(('a', nonencodable_name), int)]) + + def test_fromarrays_unicode(self): + # A single name string provided to fromarrays() is allowed to be unicode + # on both Python 2 and 3: + x = np.core.records.fromarrays([[0], [1]], names=u'a,b', formats=u'i4,i4') + assert_equal(x['a'][0], 0) + assert_equal(x['b'][0], 1) + + def test_unicode_order(self): + # Test that we can sort with order as a unicode field name in both Python 2 and + # 3: + name = u'b' + x = np.array([1, 3, 2], dtype=[(name, int)]) + x.sort(order=name) + assert_equal(x[u'b'], np.array([1, 2, 3])) + + def test_field_names(self): + # Test unicode and 8-bit / byte strings can be used + a = np.zeros((1,), dtype=[('f1', 'i4'), + ('f2', 'i4'), + ('f3', [('sf1', 'i4')])]) + is_py3 = sys.version_info[0] >= 3 + if is_py3: + funcs = (str,) + # byte string indexing fails gracefully + assert_raises(IndexError, a.__setitem__, b'f1', 1) + assert_raises(IndexError, a.__getitem__, b'f1') + assert_raises(IndexError, a['f1'].__setitem__, b'sf1', 1) + assert_raises(IndexError, a['f1'].__getitem__, b'sf1') + else: + funcs = (str, unicode) + for func in funcs: + b = a.copy() + fn1 = func('f1') + b[fn1] = 1 + assert_equal(b[fn1], 1) + fnn = func('not at all') + assert_raises(ValueError, b.__setitem__, fnn, 1) + assert_raises(ValueError, b.__getitem__, fnn) + b[0][fn1] = 2 + assert_equal(b[fn1], 2) + # Subfield + assert_raises(ValueError, b[0].__setitem__, fnn, 1) + assert_raises(ValueError, b[0].__getitem__, fnn) + # Subfield + fn3 = func('f3') + sfn1 = func('sf1') + b[fn3][sfn1] = 1 + assert_equal(b[fn3][sfn1], 1) + assert_raises(ValueError, b[fn3].__setitem__, fnn, 1) + assert_raises(ValueError, b[fn3].__getitem__, fnn) + # multiple subfields + fn2 = func('f2') + b[fn2] = 3 + + assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3)) + assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2)) + assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,))) + + # non-ascii unicode field indexing is well behaved + if not is_py3: + pytest.skip('non ascii unicode field indexing skipped; ' + 'raises segfault on python 2.x') + else: + assert_raises(ValueError, a.__setitem__, u'\u03e0', 1) + assert_raises(ValueError, a.__getitem__, u'\u03e0') + + def test_record_hash(self): + a = np.array([(1, 2), (1, 2)], dtype='i1,i2') + a.flags.writeable = False + b = np.array([(1, 2), (3, 4)], dtype=[('num1', 'i1'), ('num2', 'i2')]) + b.flags.writeable = False + c = np.array([(1, 2), (3, 4)], dtype='i1,i2') + c.flags.writeable = False + assert_(hash(a[0]) == hash(a[1])) + assert_(hash(a[0]) == hash(b[0])) + assert_(hash(a[0]) != hash(b[1])) + assert_(hash(c[0]) == hash(a[0]) and c[0] == a[0]) + + def test_record_no_hash(self): + a = np.array([(1, 2), (1, 2)], dtype='i1,i2') + assert_raises(TypeError, hash, a[0]) + + def test_empty_structure_creation(self): + # make sure these do not raise errors (gh-5631) + np.array([()], dtype={'names': [], 'formats': [], + 'offsets': [], 'itemsize': 12}) + np.array([(), (), (), (), ()], dtype={'names': [], 'formats': [], + 'offsets': [], 'itemsize': 12}) + + def test_multifield_indexing_view(self): + a = np.ones(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u4')]) + v = a[['a', 'c']] + assert_(v.base is a) + assert_(v.dtype == np.dtype({'names': ['a', 'c'], + 'formats': ['i4', 'u4'], + 'offsets': [0, 8]})) + v[:] = (4,5) + assert_equal(a[0].item(), (4, 1, 5)) + +class TestView(object): + def test_basic(self): + x = np.array([(1, 2, 3, 4), (5, 6, 7, 8)], + dtype=[('r', np.int8), ('g', np.int8), + ('b', np.int8), ('a', np.int8)]) + # We must be specific about the endianness here: + y = x.view(dtype='<i4') + # ... and again without the keyword. + z = x.view('<i4') + assert_array_equal(y, z) + assert_array_equal(y, [67305985, 134678021]) + + +def _mean(a, **args): + return a.mean(**args) + + +def _var(a, **args): + return a.var(**args) + + +def _std(a, **args): + return a.std(**args) + + +class TestStats(object): + + funcs = [_mean, _var, _std] + + def setup(self): + np.random.seed(range(3)) + self.rmat = np.random.random((4, 5)) + self.cmat = self.rmat + 1j * self.rmat + self.omat = np.array([Decimal(repr(r)) for r in self.rmat.flat]) + self.omat = self.omat.reshape(4, 5) + + def test_python_type(self): + for x in (np.float16(1.), 1, 1., 1+0j): + assert_equal(np.mean([x]), 1.) + assert_equal(np.std([x]), 0.) + assert_equal(np.var([x]), 0.) + + def test_keepdims(self): + mat = np.eye(3) + for f in self.funcs: + for axis in [0, 1]: + res = f(mat, axis=axis, keepdims=True) + assert_(res.ndim == mat.ndim) + assert_(res.shape[axis] == 1) + for axis in [None]: + res = f(mat, axis=axis, keepdims=True) + assert_(res.shape == (1, 1)) + + def test_out(self): + mat = np.eye(3) + for f in self.funcs: + out = np.zeros(3) + tgt = f(mat, axis=1) + res = f(mat, axis=1, out=out) + assert_almost_equal(res, out) + assert_almost_equal(res, tgt) + out = np.empty(2) + assert_raises(ValueError, f, mat, axis=1, out=out) + out = np.empty((2, 2)) + assert_raises(ValueError, f, mat, axis=1, out=out) + + def test_dtype_from_input(self): + + icodes = np.typecodes['AllInteger'] + fcodes = np.typecodes['AllFloat'] + + # object type + for f in self.funcs: + mat = np.array([[Decimal(1)]*3]*3) + tgt = mat.dtype.type + res = f(mat, axis=1).dtype.type + assert_(res is tgt) + # scalar case + res = type(f(mat, axis=None)) + assert_(res is Decimal) + + # integer types + for f in self.funcs: + for c in icodes: + mat = np.eye(3, dtype=c) + tgt = np.float64 + res = f(mat, axis=1).dtype.type + assert_(res is tgt) + # scalar case + res = f(mat, axis=None).dtype.type + assert_(res is tgt) + + # mean for float types + for f in [_mean]: + for c in fcodes: + mat = np.eye(3, dtype=c) + tgt = mat.dtype.type + res = f(mat, axis=1).dtype.type + assert_(res is tgt) + # scalar case + res = f(mat, axis=None).dtype.type + assert_(res is tgt) + + # var, std for float types + for f in [_var, _std]: + for c in fcodes: + mat = np.eye(3, dtype=c) + # deal with complex types + tgt = mat.real.dtype.type + res = f(mat, axis=1).dtype.type + assert_(res is tgt) + # scalar case + res = f(mat, axis=None).dtype.type + assert_(res is tgt) + + def test_dtype_from_dtype(self): + mat = np.eye(3) + + # stats for integer types + # FIXME: + # this needs definition as there are lots places along the line + # where type casting may take place. + + # for f in self.funcs: + # for c in np.typecodes['AllInteger']: + # tgt = np.dtype(c).type + # res = f(mat, axis=1, dtype=c).dtype.type + # assert_(res is tgt) + # # scalar case + # res = f(mat, axis=None, dtype=c).dtype.type + # assert_(res is tgt) + + # stats for float types + for f in self.funcs: + for c in np.typecodes['AllFloat']: + tgt = np.dtype(c).type + res = f(mat, axis=1, dtype=c).dtype.type + assert_(res is tgt) + # scalar case + res = f(mat, axis=None, dtype=c).dtype.type + assert_(res is tgt) + + def test_ddof(self): + for f in [_var]: + for ddof in range(3): + dim = self.rmat.shape[1] + tgt = f(self.rmat, axis=1) * dim + res = f(self.rmat, axis=1, ddof=ddof) * (dim - ddof) + for f in [_std]: + for ddof in range(3): + dim = self.rmat.shape[1] + tgt = f(self.rmat, axis=1) * np.sqrt(dim) + res = f(self.rmat, axis=1, ddof=ddof) * np.sqrt(dim - ddof) + assert_almost_equal(res, tgt) + assert_almost_equal(res, tgt) + + def test_ddof_too_big(self): + dim = self.rmat.shape[1] + for f in [_var, _std]: + for ddof in range(dim, dim + 2): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(self.rmat, axis=1, ddof=ddof) + assert_(not (res < 0).any()) + assert_(len(w) > 0) + assert_(issubclass(w[0].category, RuntimeWarning)) + + def test_empty(self): + A = np.zeros((0, 3)) + for f in self.funcs: + for axis in [0, None]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(f(A, axis=axis)).all()) + assert_(len(w) > 0) + assert_(issubclass(w[0].category, RuntimeWarning)) + for axis in [1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_equal(f(A, axis=axis), np.zeros([])) + + def test_mean_values(self): + for mat in [self.rmat, self.cmat, self.omat]: + for axis in [0, 1]: + tgt = mat.sum(axis=axis) + res = _mean(mat, axis=axis) * mat.shape[axis] + assert_almost_equal(res, tgt) + for axis in [None]: + tgt = mat.sum(axis=axis) + res = _mean(mat, axis=axis) * np.prod(mat.shape) + assert_almost_equal(res, tgt) + + def test_mean_float16(self): + # This fail if the sum inside mean is done in float16 instead + # of float32. + assert_(_mean(np.ones(100000, dtype='float16')) == 1) + + def test_var_values(self): + for mat in [self.rmat, self.cmat, self.omat]: + for axis in [0, 1, None]: + msqr = _mean(mat * mat.conj(), axis=axis) + mean = _mean(mat, axis=axis) + tgt = msqr - mean * mean.conjugate() + res = _var(mat, axis=axis) + assert_almost_equal(res, tgt) + + def test_std_values(self): + for mat in [self.rmat, self.cmat, self.omat]: + for axis in [0, 1, None]: + tgt = np.sqrt(_var(mat, axis=axis)) + res = _std(mat, axis=axis) + assert_almost_equal(res, tgt) + + def test_subclass(self): + class TestArray(np.ndarray): + def __new__(cls, data, info): + result = np.array(data) + result = result.view(cls) + result.info = info + return result + + def __array_finalize__(self, obj): + self.info = getattr(obj, "info", '') + + dat = TestArray([[1, 2, 3, 4], [5, 6, 7, 8]], 'jubba') + res = dat.mean(1) + assert_(res.info == dat.info) + res = dat.std(1) + assert_(res.info == dat.info) + res = dat.var(1) + assert_(res.info == dat.info) + +class TestVdot(object): + def test_basic(self): + dt_numeric = np.typecodes['AllFloat'] + np.typecodes['AllInteger'] + dt_complex = np.typecodes['Complex'] + + # test real + a = np.eye(3) + for dt in dt_numeric + 'O': + b = a.astype(dt) + res = np.vdot(b, b) + assert_(np.isscalar(res)) + assert_equal(np.vdot(b, b), 3) + + # test complex + a = np.eye(3) * 1j + for dt in dt_complex + 'O': + b = a.astype(dt) + res = np.vdot(b, b) + assert_(np.isscalar(res)) + assert_equal(np.vdot(b, b), 3) + + # test boolean + b = np.eye(3, dtype=bool) + res = np.vdot(b, b) + assert_(np.isscalar(res)) + assert_equal(np.vdot(b, b), True) + + def test_vdot_array_order(self): + a = np.array([[1, 2], [3, 4]], order='C') + b = np.array([[1, 2], [3, 4]], order='F') + res = np.vdot(a, a) + + # integer arrays are exact + assert_equal(np.vdot(a, b), res) + assert_equal(np.vdot(b, a), res) + assert_equal(np.vdot(b, b), res) + + def test_vdot_uncontiguous(self): + for size in [2, 1000]: + # Different sizes match different branches in vdot. + a = np.zeros((size, 2, 2)) + b = np.zeros((size, 2, 2)) + a[:, 0, 0] = np.arange(size) + b[:, 0, 0] = np.arange(size) + 1 + # Make a and b uncontiguous: + a = a[..., 0] + b = b[..., 0] + + assert_equal(np.vdot(a, b), + np.vdot(a.flatten(), b.flatten())) + assert_equal(np.vdot(a, b.copy()), + np.vdot(a.flatten(), b.flatten())) + assert_equal(np.vdot(a.copy(), b), + np.vdot(a.flatten(), b.flatten())) + assert_equal(np.vdot(a.copy('F'), b), + np.vdot(a.flatten(), b.flatten())) + assert_equal(np.vdot(a, b.copy('F')), + np.vdot(a.flatten(), b.flatten())) + + +class TestDot(object): + def setup(self): + np.random.seed(128) + self.A = np.random.rand(4, 2) + self.b1 = np.random.rand(2, 1) + self.b2 = np.random.rand(2) + self.b3 = np.random.rand(1, 2) + self.b4 = np.random.rand(4) + self.N = 7 + + def test_dotmatmat(self): + A = self.A + res = np.dot(A.transpose(), A) + tgt = np.array([[1.45046013, 0.86323640], + [0.86323640, 0.84934569]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotmatvec(self): + A, b1 = self.A, self.b1 + res = np.dot(A, b1) + tgt = np.array([[0.32114320], [0.04889721], + [0.15696029], [0.33612621]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotmatvec2(self): + A, b2 = self.A, self.b2 + res = np.dot(A, b2) + tgt = np.array([0.29677940, 0.04518649, 0.14468333, 0.31039293]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecmat(self): + A, b4 = self.A, self.b4 + res = np.dot(b4, A) + tgt = np.array([1.23495091, 1.12222648]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecmat2(self): + b3, A = self.b3, self.A + res = np.dot(b3, A.transpose()) + tgt = np.array([[0.58793804, 0.08957460, 0.30605758, 0.62716383]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecmat3(self): + A, b4 = self.A, self.b4 + res = np.dot(A.transpose(), b4) + tgt = np.array([1.23495091, 1.12222648]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecvecouter(self): + b1, b3 = self.b1, self.b3 + res = np.dot(b1, b3) + tgt = np.array([[0.20128610, 0.08400440], [0.07190947, 0.03001058]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecvecinner(self): + b1, b3 = self.b1, self.b3 + res = np.dot(b3, b1) + tgt = np.array([[ 0.23129668]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotcolumnvect1(self): + b1 = np.ones((3, 1)) + b2 = [5.3] + res = np.dot(b1, b2) + tgt = np.array([5.3, 5.3, 5.3]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotcolumnvect2(self): + b1 = np.ones((3, 1)).transpose() + b2 = [6.2] + res = np.dot(b2, b1) + tgt = np.array([6.2, 6.2, 6.2]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecscalar(self): + np.random.seed(100) + b1 = np.random.rand(1, 1) + b2 = np.random.rand(1, 4) + res = np.dot(b1, b2) + tgt = np.array([[0.15126730, 0.23068496, 0.45905553, 0.00256425]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_dotvecscalar2(self): + np.random.seed(100) + b1 = np.random.rand(4, 1) + b2 = np.random.rand(1, 1) + res = np.dot(b1, b2) + tgt = np.array([[0.00256425],[0.00131359],[0.00200324],[ 0.00398638]]) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_all(self): + dims = [(), (1,), (1, 1)] + dout = [(), (1,), (1, 1), (1,), (), (1,), (1, 1), (1,), (1, 1)] + for dim, (dim1, dim2) in zip(dout, itertools.product(dims, dims)): + b1 = np.zeros(dim1) + b2 = np.zeros(dim2) + res = np.dot(b1, b2) + tgt = np.zeros(dim) + assert_(res.shape == tgt.shape) + assert_almost_equal(res, tgt, decimal=self.N) + + def test_vecobject(self): + class Vec(object): + def __init__(self, sequence=None): + if sequence is None: + sequence = [] + self.array = np.array(sequence) + + def __add__(self, other): + out = Vec() + out.array = self.array + other.array + return out + + def __sub__(self, other): + out = Vec() + out.array = self.array - other.array + return out + + def __mul__(self, other): # with scalar + out = Vec(self.array.copy()) + out.array *= other + return out + + def __rmul__(self, other): + return self*other + + U_non_cont = np.transpose([[1., 1.], [1., 2.]]) + U_cont = np.ascontiguousarray(U_non_cont) + x = np.array([Vec([1., 0.]), Vec([0., 1.])]) + zeros = np.array([Vec([0., 0.]), Vec([0., 0.])]) + zeros_test = np.dot(U_cont, x) - np.dot(U_non_cont, x) + assert_equal(zeros[0].array, zeros_test[0].array) + assert_equal(zeros[1].array, zeros_test[1].array) + + def test_dot_2args(self): + from numpy.core.multiarray import dot + + a = np.array([[1, 2], [3, 4]], dtype=float) + b = np.array([[1, 0], [1, 1]], dtype=float) + c = np.array([[3, 2], [7, 4]], dtype=float) + + d = dot(a, b) + assert_allclose(c, d) + + def test_dot_3args(self): + from numpy.core.multiarray import dot + + np.random.seed(22) + f = np.random.random_sample((1024, 16)) + v = np.random.random_sample((16, 32)) + + r = np.empty((1024, 32)) + for i in range(12): + dot(f, v, r) + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(r), 2) + r2 = dot(f, v, out=None) + assert_array_equal(r2, r) + assert_(r is dot(f, v, out=r)) + + v = v[:, 0].copy() # v.shape == (16,) + r = r[:, 0].copy() # r.shape == (1024,) + r2 = dot(f, v) + assert_(r is dot(f, v, r)) + assert_array_equal(r2, r) + + def test_dot_3args_errors(self): + from numpy.core.multiarray import dot + + np.random.seed(22) + f = np.random.random_sample((1024, 16)) + v = np.random.random_sample((16, 32)) + + r = np.empty((1024, 31)) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((1024,)) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((32,)) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((32, 1024)) + assert_raises(ValueError, dot, f, v, r) + assert_raises(ValueError, dot, f, v, r.T) + + r = np.empty((1024, 64)) + assert_raises(ValueError, dot, f, v, r[:, ::2]) + assert_raises(ValueError, dot, f, v, r[:, :32]) + + r = np.empty((1024, 32), dtype=np.float32) + assert_raises(ValueError, dot, f, v, r) + + r = np.empty((1024, 32), dtype=int) + assert_raises(ValueError, dot, f, v, r) + + def test_dot_array_order(self): + a = np.array([[1, 2], [3, 4]], order='C') + b = np.array([[1, 2], [3, 4]], order='F') + res = np.dot(a, a) + + # integer arrays are exact + assert_equal(np.dot(a, b), res) + assert_equal(np.dot(b, a), res) + assert_equal(np.dot(b, b), res) + + def test_accelerate_framework_sgemv_fix(self): + + def aligned_array(shape, align, dtype, order='C'): + d = dtype(0) + N = np.prod(shape) + tmp = np.zeros(N * d.nbytes + align, dtype=np.uint8) + address = tmp.__array_interface__["data"][0] + for offset in range(align): + if (address + offset) % align == 0: + break + tmp = tmp[offset:offset+N*d.nbytes].view(dtype=dtype) + return tmp.reshape(shape, order=order) + + def as_aligned(arr, align, dtype, order='C'): + aligned = aligned_array(arr.shape, align, dtype, order) + aligned[:] = arr[:] + return aligned + + def assert_dot_close(A, X, desired): + assert_allclose(np.dot(A, X), desired, rtol=1e-5, atol=1e-7) + + m = aligned_array(100, 15, np.float32) + s = aligned_array((100, 100), 15, np.float32) + np.dot(s, m) # this will always segfault if the bug is present + + testdata = itertools.product((15,32), (10000,), (200,89), ('C','F')) + for align, m, n, a_order in testdata: + # Calculation in double precision + A_d = np.random.rand(m, n) + X_d = np.random.rand(n) + desired = np.dot(A_d, X_d) + # Calculation with aligned single precision + A_f = as_aligned(A_d, align, np.float32, order=a_order) + X_f = as_aligned(X_d, align, np.float32) + assert_dot_close(A_f, X_f, desired) + # Strided A rows + A_d_2 = A_d[::2] + desired = np.dot(A_d_2, X_d) + A_f_2 = A_f[::2] + assert_dot_close(A_f_2, X_f, desired) + # Strided A columns, strided X vector + A_d_22 = A_d_2[:, ::2] + X_d_2 = X_d[::2] + desired = np.dot(A_d_22, X_d_2) + A_f_22 = A_f_2[:, ::2] + X_f_2 = X_f[::2] + assert_dot_close(A_f_22, X_f_2, desired) + # Check the strides are as expected + if a_order == 'F': + assert_equal(A_f_22.strides, (8, 8 * m)) + else: + assert_equal(A_f_22.strides, (8 * n, 8)) + assert_equal(X_f_2.strides, (8,)) + # Strides in A rows + cols only + X_f_2c = as_aligned(X_f_2, align, np.float32) + assert_dot_close(A_f_22, X_f_2c, desired) + # Strides just in A cols + A_d_12 = A_d[:, ::2] + desired = np.dot(A_d_12, X_d_2) + A_f_12 = A_f[:, ::2] + assert_dot_close(A_f_12, X_f_2c, desired) + # Strides in A cols and X + assert_dot_close(A_f_12, X_f_2, desired) + + +class MatmulCommon(object): + """Common tests for '@' operator and numpy.matmul. + + """ + # Should work with these types. Will want to add + # "O" at some point + types = "?bhilqBHILQefdgFDG" + + def test_exceptions(self): + dims = [ + ((1,), (2,)), # mismatched vector vector + ((2, 1,), (2,)), # mismatched matrix vector + ((2,), (1, 2)), # mismatched vector matrix + ((1, 2), (3, 1)), # mismatched matrix matrix + ((1,), ()), # vector scalar + ((), (1)), # scalar vector + ((1, 1), ()), # matrix scalar + ((), (1, 1)), # scalar matrix + ((2, 2, 1), (3, 1, 2)), # cannot broadcast + ] + + for dt, (dm1, dm2) in itertools.product(self.types, dims): + a = np.ones(dm1, dtype=dt) + b = np.ones(dm2, dtype=dt) + assert_raises(ValueError, self.matmul, a, b) + + def test_shapes(self): + dims = [ + ((1, 1), (2, 1, 1)), # broadcast first argument + ((2, 1, 1), (1, 1)), # broadcast second argument + ((2, 1, 1), (2, 1, 1)), # matrix stack sizes match + ] + + for dt, (dm1, dm2) in itertools.product(self.types, dims): + a = np.ones(dm1, dtype=dt) + b = np.ones(dm2, dtype=dt) + res = self.matmul(a, b) + assert_(res.shape == (2, 1, 1)) + + # vector vector returns scalars. + for dt in self.types: + a = np.ones((2,), dtype=dt) + b = np.ones((2,), dtype=dt) + c = self.matmul(a, b) + assert_(np.array(c).shape == ()) + + def test_result_types(self): + mat = np.ones((1,1)) + vec = np.ones((1,)) + for dt in self.types: + m = mat.astype(dt) + v = vec.astype(dt) + for arg in [(m, v), (v, m), (m, m)]: + res = self.matmul(*arg) + assert_(res.dtype == dt) + + # vector vector returns scalars + res = self.matmul(v, v) + assert_(type(res) is np.dtype(dt).type) + + def test_scalar_output(self): + vec1 = np.array([2]) + vec2 = np.array([3, 4]).reshape(1, -1) + tgt = np.array([6, 8]) + for dt in self.types[1:]: + v1 = vec1.astype(dt) + v2 = vec2.astype(dt) + res = self.matmul(v1, v2) + assert_equal(res, tgt) + res = self.matmul(v2.T, v1) + assert_equal(res, tgt) + + # boolean type + vec = np.array([True, True], dtype='?').reshape(1, -1) + res = self.matmul(vec[:, 0], vec) + assert_equal(res, True) + + def test_vector_vector_values(self): + vec1 = np.array([1, 2]) + vec2 = np.array([3, 4]).reshape(-1, 1) + tgt1 = np.array([11]) + tgt2 = np.array([[3, 6], [4, 8]]) + for dt in self.types[1:]: + v1 = vec1.astype(dt) + v2 = vec2.astype(dt) + res = self.matmul(v1, v2) + assert_equal(res, tgt1) + # no broadcast, we must make v1 into a 2d ndarray + res = self.matmul(v2, v1.reshape(1, -1)) + assert_equal(res, tgt2) + + # boolean type + vec = np.array([True, True], dtype='?') + res = self.matmul(vec, vec) + assert_equal(res, True) + + def test_vector_matrix_values(self): + vec = np.array([1, 2]) + mat1 = np.array([[1, 2], [3, 4]]) + mat2 = np.stack([mat1]*2, axis=0) + tgt1 = np.array([7, 10]) + tgt2 = np.stack([tgt1]*2, axis=0) + for dt in self.types[1:]: + v = vec.astype(dt) + m1 = mat1.astype(dt) + m2 = mat2.astype(dt) + res = self.matmul(v, m1) + assert_equal(res, tgt1) + res = self.matmul(v, m2) + assert_equal(res, tgt2) + + # boolean type + vec = np.array([True, False]) + mat1 = np.array([[True, False], [False, True]]) + mat2 = np.stack([mat1]*2, axis=0) + tgt1 = np.array([True, False]) + tgt2 = np.stack([tgt1]*2, axis=0) + + res = self.matmul(vec, mat1) + assert_equal(res, tgt1) + res = self.matmul(vec, mat2) + assert_equal(res, tgt2) + + def test_matrix_vector_values(self): + vec = np.array([1, 2]) + mat1 = np.array([[1, 2], [3, 4]]) + mat2 = np.stack([mat1]*2, axis=0) + tgt1 = np.array([5, 11]) + tgt2 = np.stack([tgt1]*2, axis=0) + for dt in self.types[1:]: + v = vec.astype(dt) + m1 = mat1.astype(dt) + m2 = mat2.astype(dt) + res = self.matmul(m1, v) + assert_equal(res, tgt1) + res = self.matmul(m2, v) + assert_equal(res, tgt2) + + # boolean type + vec = np.array([True, False]) + mat1 = np.array([[True, False], [False, True]]) + mat2 = np.stack([mat1]*2, axis=0) + tgt1 = np.array([True, False]) + tgt2 = np.stack([tgt1]*2, axis=0) + + res = self.matmul(vec, mat1) + assert_equal(res, tgt1) + res = self.matmul(vec, mat2) + assert_equal(res, tgt2) + + def test_matrix_matrix_values(self): + mat1 = np.array([[1, 2], [3, 4]]) + mat2 = np.array([[1, 0], [1, 1]]) + mat12 = np.stack([mat1, mat2], axis=0) + mat21 = np.stack([mat2, mat1], axis=0) + tgt11 = np.array([[7, 10], [15, 22]]) + tgt12 = np.array([[3, 2], [7, 4]]) + tgt21 = np.array([[1, 2], [4, 6]]) + tgt12_21 = np.stack([tgt12, tgt21], axis=0) + tgt11_12 = np.stack((tgt11, tgt12), axis=0) + tgt11_21 = np.stack((tgt11, tgt21), axis=0) + for dt in self.types[1:]: + m1 = mat1.astype(dt) + m2 = mat2.astype(dt) + m12 = mat12.astype(dt) + m21 = mat21.astype(dt) + + # matrix @ matrix + res = self.matmul(m1, m2) + assert_equal(res, tgt12) + res = self.matmul(m2, m1) + assert_equal(res, tgt21) + + # stacked @ matrix + res = self.matmul(m12, m1) + assert_equal(res, tgt11_21) + + # matrix @ stacked + res = self.matmul(m1, m12) + assert_equal(res, tgt11_12) + + # stacked @ stacked + res = self.matmul(m12, m21) + assert_equal(res, tgt12_21) + + # boolean type + m1 = np.array([[1, 1], [0, 0]], dtype=np.bool_) + m2 = np.array([[1, 0], [1, 1]], dtype=np.bool_) + m12 = np.stack([m1, m2], axis=0) + m21 = np.stack([m2, m1], axis=0) + tgt11 = m1 + tgt12 = m1 + tgt21 = np.array([[1, 1], [1, 1]], dtype=np.bool_) + tgt12_21 = np.stack([tgt12, tgt21], axis=0) + tgt11_12 = np.stack((tgt11, tgt12), axis=0) + tgt11_21 = np.stack((tgt11, tgt21), axis=0) + + # matrix @ matrix + res = self.matmul(m1, m2) + assert_equal(res, tgt12) + res = self.matmul(m2, m1) + assert_equal(res, tgt21) + + # stacked @ matrix + res = self.matmul(m12, m1) + assert_equal(res, tgt11_21) + + # matrix @ stacked + res = self.matmul(m1, m12) + assert_equal(res, tgt11_12) + + # stacked @ stacked + res = self.matmul(m12, m21) + assert_equal(res, tgt12_21) + + +class TestMatmul(MatmulCommon): + matmul = np.matmul + + def test_out_arg(self): + a = np.ones((5, 2), dtype=float) + b = np.array([[1, 3], [5, 7]], dtype=float) + tgt = np.dot(a, b) + + # test as positional argument + msg = "out positional argument" + out = np.zeros((5, 2), dtype=float) + self.matmul(a, b, out) + assert_array_equal(out, tgt, err_msg=msg) + + # test as keyword argument + msg = "out keyword argument" + out = np.zeros((5, 2), dtype=float) + self.matmul(a, b, out=out) + assert_array_equal(out, tgt, err_msg=msg) + + # test out with not allowed type cast (safe casting) + msg = "Cannot cast ufunc matmul output" + out = np.zeros((5, 2), dtype=np.int32) + assert_raises_regex(TypeError, msg, self.matmul, a, b, out=out) + + # test out with type upcast to complex + out = np.zeros((5, 2), dtype=np.complex128) + c = self.matmul(a, b, out=out) + assert_(c is out) + with suppress_warnings() as sup: + sup.filter(np.ComplexWarning, '') + c = c.astype(tgt.dtype) + assert_array_equal(c, tgt) + + def test_out_contiguous(self): + a = np.ones((5, 2), dtype=float) + b = np.array([[1, 3], [5, 7]], dtype=float) + v = np.array([1, 3], dtype=float) + tgt = np.dot(a, b) + tgt_mv = np.dot(a, v) + + # test out non-contiguous + out = np.ones((5, 2, 2), dtype=float) + c = self.matmul(a, b, out=out[..., 0]) + assert c.base is out + assert_array_equal(c, tgt) + c = self.matmul(a, v, out=out[:, 0, 0]) + assert_array_equal(c, tgt_mv) + c = self.matmul(v, a.T, out=out[:, 0, 0]) + assert_array_equal(c, tgt_mv) + + # test out contiguous in only last dim + out = np.ones((10, 2), dtype=float) + c = self.matmul(a, b, out=out[::2, :]) + assert_array_equal(c, tgt) + + # test transposes of out, args + out = np.ones((5, 2), dtype=float) + c = self.matmul(b.T, a.T, out=out.T) + assert_array_equal(out, tgt) + + m1 = np.arange(15.).reshape(5, 3) + m2 = np.arange(21.).reshape(3, 7) + m3 = np.arange(30.).reshape(5, 6)[:, ::2] # non-contiguous + vc = np.arange(10.) + vr = np.arange(6.) + m0 = np.zeros((3, 0)) + @pytest.mark.parametrize('args', ( + # matrix-matrix + (m1, m2), (m2.T, m1.T), (m2.T.copy(), m1.T), (m2.T, m1.T.copy()), + # matrix-matrix-transpose, contiguous and non + (m1, m1.T), (m1.T, m1), (m1, m3.T), (m3, m1.T), + (m3, m3.T), (m3.T, m3), + # matrix-matrix non-contiguous + (m3, m2), (m2.T, m3.T), (m2.T.copy(), m3.T), + # vector-matrix, matrix-vector, contiguous + (m1, vr[:3]), (vc[:5], m1), (m1.T, vc[:5]), (vr[:3], m1.T), + # vector-matrix, matrix-vector, vector non-contiguous + (m1, vr[::2]), (vc[::2], m1), (m1.T, vc[::2]), (vr[::2], m1.T), + # vector-matrix, matrix-vector, matrix non-contiguous + (m3, vr[:3]), (vc[:5], m3), (m3.T, vc[:5]), (vr[:3], m3.T), + # vector-matrix, matrix-vector, both non-contiguous + (m3, vr[::2]), (vc[::2], m3), (m3.T, vc[::2]), (vr[::2], m3.T), + # size == 0 + (m0, m0.T), (m0.T, m0), (m1, m0), (m0.T, m1.T), + )) + def test_dot_equivalent(self, args): + r1 = np.matmul(*args) + r2 = np.dot(*args) + assert_equal(r1, r2) + + r3 = np.matmul(args[0].copy(), args[1].copy()) + assert_equal(r1, r3) + + def test_matmul_object(self): + import fractions + + f = np.vectorize(fractions.Fraction) + def random_ints(): + return np.random.randint(1, 1000, size=(10, 3, 3)) + M1 = f(random_ints(), random_ints()) + M2 = f(random_ints(), random_ints()) + + M3 = self.matmul(M1, M2) + + [N1, N2, N3] = [a.astype(float) for a in [M1, M2, M3]] + + assert_allclose(N3, self.matmul(N1, N2)) + + def test_matmul_object_type_scalar(self): + from fractions import Fraction as F + v = np.array([F(2,3), F(5,7)]) + res = self.matmul(v, v) + assert_(type(res) is F) + + def test_matmul_empty(self): + a = np.empty((3, 0), dtype=object) + b = np.empty((0, 3), dtype=object) + c = np.zeros((3, 3)) + assert_array_equal(np.matmul(a, b), c) + + def test_matmul_exception_multiply(self): + # test that matmul fails if `__mul__` is missing + class add_not_multiply(): + def __add__(self, other): + return self + a = np.full((3,3), add_not_multiply()) + with assert_raises(TypeError): + b = np.matmul(a, a) + + def test_matmul_exception_add(self): + # test that matmul fails if `__add__` is missing + class multiply_not_add(): + def __mul__(self, other): + return self + a = np.full((3,3), multiply_not_add()) + with assert_raises(TypeError): + b = np.matmul(a, a) + + def test_matmul_bool(self): + # gh-14439 + a = np.array([[1, 0],[1, 1]], dtype=bool) + assert np.max(a.view(np.uint8)) == 1 + b = np.matmul(a, a) + # matmul with boolean output should always be 0, 1 + assert np.max(b.view(np.uint8)) == 1 + + np.random.seed(42) + d = np.random.randint(2, size=4*5, dtype=np.int8) + d = d.reshape(4, 5) > 0 + out1 = np.matmul(d, d.reshape(5, 4)) + out2 = np.dot(d, d.reshape(5, 4)) + assert_equal(out1, out2) + + c = np.matmul(np.zeros((2, 0), dtype=bool), np.zeros(0, dtype=bool)) + assert not np.any(c) + + +if sys.version_info[:2] >= (3, 5): + class TestMatmulOperator(MatmulCommon): + import operator + matmul = operator.matmul + + def test_array_priority_override(self): + + class A(object): + __array_priority__ = 1000 + + def __matmul__(self, other): + return "A" + + def __rmatmul__(self, other): + return "A" + + a = A() + b = np.ones(2) + assert_equal(self.matmul(a, b), "A") + assert_equal(self.matmul(b, a), "A") + + def test_matmul_raises(self): + assert_raises(TypeError, self.matmul, np.int8(5), np.int8(5)) + assert_raises(TypeError, self.matmul, np.void(b'abc'), np.void(b'abc')) + assert_raises(ValueError, self.matmul, np.arange(10), np.void(b'abc')) + + def test_matmul_inplace(): + # It would be nice to support in-place matmul eventually, but for now + # we don't have a working implementation, so better just to error out + # and nudge people to writing "a = a @ b". + a = np.eye(3) + b = np.eye(3) + assert_raises(TypeError, a.__imatmul__, b) + import operator + assert_raises(TypeError, operator.imatmul, a, b) + # we avoid writing the token `exec` so as not to crash python 2's + # parser + exec_ = getattr(builtins, "exec") + assert_raises(TypeError, exec_, "a @= b", globals(), locals()) + + def test_matmul_axes(): + a = np.arange(3*4*5).reshape(3, 4, 5) + c = np.matmul(a, a, axes=[(-2, -1), (-1, -2), (1, 2)]) + assert c.shape == (3, 4, 4) + d = np.matmul(a, a, axes=[(-2, -1), (-1, -2), (0, 1)]) + assert d.shape == (4, 4, 3) + e = np.swapaxes(d, 0, 2) + assert_array_equal(e, c) + f = np.matmul(a, np.arange(3), axes=[(1, 0), (0), (0)]) + assert f.shape == (4, 5) + + +class TestInner(object): + + def test_inner_type_mismatch(self): + c = 1. + A = np.array((1,1), dtype='i,i') + + assert_raises(TypeError, np.inner, c, A) + assert_raises(TypeError, np.inner, A, c) + + def test_inner_scalar_and_vector(self): + for dt in np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + '?': + sca = np.array(3, dtype=dt)[()] + vec = np.array([1, 2], dtype=dt) + desired = np.array([3, 6], dtype=dt) + assert_equal(np.inner(vec, sca), desired) + assert_equal(np.inner(sca, vec), desired) + + def test_vecself(self): + # Ticket 844. + # Inner product of a vector with itself segfaults or give + # meaningless result + a = np.zeros(shape=(1, 80), dtype=np.float64) + p = np.inner(a, a) + assert_almost_equal(p, 0, decimal=14) + + def test_inner_product_with_various_contiguities(self): + # github issue 6532 + for dt in np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + '?': + # check an inner product involving a matrix transpose + A = np.array([[1, 2], [3, 4]], dtype=dt) + B = np.array([[1, 3], [2, 4]], dtype=dt) + C = np.array([1, 1], dtype=dt) + desired = np.array([4, 6], dtype=dt) + assert_equal(np.inner(A.T, C), desired) + assert_equal(np.inner(C, A.T), desired) + assert_equal(np.inner(B, C), desired) + assert_equal(np.inner(C, B), desired) + # check a matrix product + desired = np.array([[7, 10], [15, 22]], dtype=dt) + assert_equal(np.inner(A, B), desired) + # check the syrk vs. gemm paths + desired = np.array([[5, 11], [11, 25]], dtype=dt) + assert_equal(np.inner(A, A), desired) + assert_equal(np.inner(A, A.copy()), desired) + # check an inner product involving an aliased and reversed view + a = np.arange(5).astype(dt) + b = a[::-1] + desired = np.array(10, dtype=dt).item() + assert_equal(np.inner(b, a), desired) + + def test_3d_tensor(self): + for dt in np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + '?': + a = np.arange(24).reshape(2,3,4).astype(dt) + b = np.arange(24, 48).reshape(2,3,4).astype(dt) + desired = np.array( + [[[[ 158, 182, 206], + [ 230, 254, 278]], + + [[ 566, 654, 742], + [ 830, 918, 1006]], + + [[ 974, 1126, 1278], + [1430, 1582, 1734]]], + + [[[1382, 1598, 1814], + [2030, 2246, 2462]], + + [[1790, 2070, 2350], + [2630, 2910, 3190]], + + [[2198, 2542, 2886], + [3230, 3574, 3918]]]], + dtype=dt + ) + assert_equal(np.inner(a, b), desired) + assert_equal(np.inner(b, a).transpose(2,3,0,1), desired) + + +class TestAlen(object): + def test_basic(self): + m = np.array([1, 2, 3]) + assert_equal(np.alen(m), 3) + + m = np.array([[1, 2, 3], [4, 5, 7]]) + assert_equal(np.alen(m), 2) + + m = [1, 2, 3] + assert_equal(np.alen(m), 3) + + m = [[1, 2, 3], [4, 5, 7]] + assert_equal(np.alen(m), 2) + + def test_singleton(self): + assert_equal(np.alen(5), 1) + + +class TestChoose(object): + def setup(self): + self.x = 2*np.ones((3,), dtype=int) + self.y = 3*np.ones((3,), dtype=int) + self.x2 = 2*np.ones((2, 3), dtype=int) + self.y2 = 3*np.ones((2, 3), dtype=int) + self.ind = [0, 0, 1] + + def test_basic(self): + A = np.choose(self.ind, (self.x, self.y)) + assert_equal(A, [2, 2, 3]) + + def test_broadcast1(self): + A = np.choose(self.ind, (self.x2, self.y2)) + assert_equal(A, [[2, 2, 3], [2, 2, 3]]) + + def test_broadcast2(self): + A = np.choose(self.ind, (self.x, self.y2)) + assert_equal(A, [[2, 2, 3], [2, 2, 3]]) + + +class TestRepeat(object): + def setup(self): + self.m = np.array([1, 2, 3, 4, 5, 6]) + self.m_rect = self.m.reshape((2, 3)) + + def test_basic(self): + A = np.repeat(self.m, [1, 3, 2, 1, 1, 2]) + assert_equal(A, [1, 2, 2, 2, 3, + 3, 4, 5, 6, 6]) + + def test_broadcast1(self): + A = np.repeat(self.m, 2) + assert_equal(A, [1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6]) + + def test_axis_spec(self): + A = np.repeat(self.m_rect, [2, 1], axis=0) + assert_equal(A, [[1, 2, 3], + [1, 2, 3], + [4, 5, 6]]) + + A = np.repeat(self.m_rect, [1, 3, 2], axis=1) + assert_equal(A, [[1, 2, 2, 2, 3, 3], + [4, 5, 5, 5, 6, 6]]) + + def test_broadcast2(self): + A = np.repeat(self.m_rect, 2, axis=0) + assert_equal(A, [[1, 2, 3], + [1, 2, 3], + [4, 5, 6], + [4, 5, 6]]) + + A = np.repeat(self.m_rect, 2, axis=1) + assert_equal(A, [[1, 1, 2, 2, 3, 3], + [4, 4, 5, 5, 6, 6]]) + + +# TODO: test for multidimensional +NEIGH_MODE = {'zero': 0, 'one': 1, 'constant': 2, 'circular': 3, 'mirror': 4} + + +@pytest.mark.parametrize('dt', [float, Decimal], ids=['float', 'object']) +class TestNeighborhoodIter(object): + # Simple, 2d tests + def test_simple2d(self, dt): + # Test zero and one padding for simple data type + x = np.array([[0, 1], [2, 3]], dtype=dt) + r = [np.array([[0, 0, 0], [0, 0, 1]], dtype=dt), + np.array([[0, 0, 0], [0, 1, 0]], dtype=dt), + np.array([[0, 0, 1], [0, 2, 3]], dtype=dt), + np.array([[0, 1, 0], [2, 3, 0]], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 0, -1, 1], x[0], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + r = [np.array([[1, 1, 1], [1, 0, 1]], dtype=dt), + np.array([[1, 1, 1], [0, 1, 1]], dtype=dt), + np.array([[1, 0, 1], [1, 2, 3]], dtype=dt), + np.array([[0, 1, 1], [2, 3, 1]], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 0, -1, 1], x[0], NEIGH_MODE['one']) + assert_array_equal(l, r) + + r = [np.array([[4, 4, 4], [4, 0, 1]], dtype=dt), + np.array([[4, 4, 4], [0, 1, 4]], dtype=dt), + np.array([[4, 0, 1], [4, 2, 3]], dtype=dt), + np.array([[0, 1, 4], [2, 3, 4]], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 0, -1, 1], 4, NEIGH_MODE['constant']) + assert_array_equal(l, r) + + def test_mirror2d(self, dt): + x = np.array([[0, 1], [2, 3]], dtype=dt) + r = [np.array([[0, 0, 1], [0, 0, 1]], dtype=dt), + np.array([[0, 1, 1], [0, 1, 1]], dtype=dt), + np.array([[0, 0, 1], [2, 2, 3]], dtype=dt), + np.array([[0, 1, 1], [2, 3, 3]], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 0, -1, 1], x[0], NEIGH_MODE['mirror']) + assert_array_equal(l, r) + + # Simple, 1d tests + def test_simple(self, dt): + # Test padding with constant values + x = np.linspace(1, 5, 5).astype(dt) + r = [[0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 0]] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 1], x[0], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + r = [[1, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 1]] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 1], x[0], NEIGH_MODE['one']) + assert_array_equal(l, r) + + r = [[x[4], 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, x[4]]] + l = _multiarray_tests.test_neighborhood_iterator( + x, [-1, 1], x[4], NEIGH_MODE['constant']) + assert_array_equal(l, r) + + # Test mirror modes + def test_mirror(self, dt): + x = np.linspace(1, 5, 5).astype(dt) + r = np.array([[2, 1, 1, 2, 3], [1, 1, 2, 3, 4], [1, 2, 3, 4, 5], + [2, 3, 4, 5, 5], [3, 4, 5, 5, 4]], dtype=dt) + l = _multiarray_tests.test_neighborhood_iterator( + x, [-2, 2], x[1], NEIGH_MODE['mirror']) + assert_([i.dtype == dt for i in l]) + assert_array_equal(l, r) + + # Circular mode + def test_circular(self, dt): + x = np.linspace(1, 5, 5).astype(dt) + r = np.array([[4, 5, 1, 2, 3], [5, 1, 2, 3, 4], [1, 2, 3, 4, 5], + [2, 3, 4, 5, 1], [3, 4, 5, 1, 2]], dtype=dt) + l = _multiarray_tests.test_neighborhood_iterator( + x, [-2, 2], x[0], NEIGH_MODE['circular']) + assert_array_equal(l, r) + + +# Test stacking neighborhood iterators +class TestStackedNeighborhoodIter(object): + # Simple, 1d test: stacking 2 constant-padded neigh iterators + def test_simple_const(self): + dt = np.float64 + # Test zero and one padding for simple data type + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([0], dtype=dt), + np.array([0], dtype=dt), + np.array([1], dtype=dt), + np.array([2], dtype=dt), + np.array([3], dtype=dt), + np.array([0], dtype=dt), + np.array([0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-2, 4], NEIGH_MODE['zero'], [0, 0], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + r = [np.array([1, 0, 1], dtype=dt), + np.array([0, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 0], dtype=dt), + np.array([3, 0, 1], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [-1, 1], NEIGH_MODE['one']) + assert_array_equal(l, r) + + # 2nd simple, 1d test: stacking 2 neigh iterators, mixing const padding and + # mirror padding + def test_simple_mirror(self): + dt = np.float64 + # Stacking zero on top of mirror + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([0, 1, 1], dtype=dt), + np.array([1, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 3], dtype=dt), + np.array([3, 3, 0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['mirror'], [-1, 1], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([1, 0, 0], dtype=dt), + np.array([0, 0, 1], dtype=dt), + np.array([0, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [-2, 0], NEIGH_MODE['mirror']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero: 2nd + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([0, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 0], dtype=dt), + np.array([3, 0, 0], dtype=dt), + np.array([0, 0, 3], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [0, 2], NEIGH_MODE['mirror']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero: 3rd + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([1, 0, 0, 1, 2], dtype=dt), + np.array([0, 0, 1, 2, 3], dtype=dt), + np.array([0, 1, 2, 3, 0], dtype=dt), + np.array([1, 2, 3, 0, 0], dtype=dt), + np.array([2, 3, 0, 0, 3], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [-2, 2], NEIGH_MODE['mirror']) + assert_array_equal(l, r) + + # 3rd simple, 1d test: stacking 2 neigh iterators, mixing const padding and + # circular padding + def test_simple_circular(self): + dt = np.float64 + # Stacking zero on top of mirror + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([0, 3, 1], dtype=dt), + np.array([3, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 1], dtype=dt), + np.array([3, 1, 0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['circular'], [-1, 1], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([3, 0, 0], dtype=dt), + np.array([0, 0, 1], dtype=dt), + np.array([0, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [-2, 0], NEIGH_MODE['circular']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero: 2nd + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([0, 1, 2], dtype=dt), + np.array([1, 2, 3], dtype=dt), + np.array([2, 3, 0], dtype=dt), + np.array([3, 0, 0], dtype=dt), + np.array([0, 0, 1], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [0, 2], NEIGH_MODE['circular']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero: 3rd + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([3, 0, 0, 1, 2], dtype=dt), + np.array([0, 0, 1, 2, 3], dtype=dt), + np.array([0, 1, 2, 3, 0], dtype=dt), + np.array([1, 2, 3, 0, 0], dtype=dt), + np.array([2, 3, 0, 0, 1], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [-1, 3], NEIGH_MODE['zero'], [-2, 2], NEIGH_MODE['circular']) + assert_array_equal(l, r) + + # 4th simple, 1d test: stacking 2 neigh iterators, but with lower iterator + # being strictly within the array + def test_simple_strict_within(self): + dt = np.float64 + # Stacking zero on top of zero, first neighborhood strictly inside the + # array + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([1, 2, 3, 0], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [1, 1], NEIGH_MODE['zero'], [-1, 2], NEIGH_MODE['zero']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero, first neighborhood strictly inside the + # array + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([1, 2, 3, 3], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [1, 1], NEIGH_MODE['zero'], [-1, 2], NEIGH_MODE['mirror']) + assert_array_equal(l, r) + + # Stacking mirror on top of zero, first neighborhood strictly inside the + # array + x = np.array([1, 2, 3], dtype=dt) + r = [np.array([1, 2, 3, 1], dtype=dt)] + l = _multiarray_tests.test_neighborhood_iterator_oob( + x, [1, 1], NEIGH_MODE['zero'], [-1, 2], NEIGH_MODE['circular']) + assert_array_equal(l, r) + +class TestWarnings(object): + + def test_complex_warning(self): + x = np.array([1, 2]) + y = np.array([1-2j, 1+2j]) + + with warnings.catch_warnings(): + warnings.simplefilter("error", np.ComplexWarning) + assert_raises(np.ComplexWarning, x.__setitem__, slice(None), y) + assert_equal(x, [1, 2]) + + +class TestMinScalarType(object): + + def test_usigned_shortshort(self): + dt = np.min_scalar_type(2**8-1) + wanted = np.dtype('uint8') + assert_equal(wanted, dt) + + def test_usigned_short(self): + dt = np.min_scalar_type(2**16-1) + wanted = np.dtype('uint16') + assert_equal(wanted, dt) + + def test_usigned_int(self): + dt = np.min_scalar_type(2**32-1) + wanted = np.dtype('uint32') + assert_equal(wanted, dt) + + def test_usigned_longlong(self): + dt = np.min_scalar_type(2**63-1) + wanted = np.dtype('uint64') + assert_equal(wanted, dt) + + def test_object(self): + dt = np.min_scalar_type(2**64) + wanted = np.dtype('O') + assert_equal(wanted, dt) + + +from numpy.core._internal import _dtype_from_pep3118 + + +class TestPEP3118Dtype(object): + def _check(self, spec, wanted): + dt = np.dtype(wanted) + actual = _dtype_from_pep3118(spec) + assert_equal(actual, dt, + err_msg="spec %r != dtype %r" % (spec, wanted)) + + def test_native_padding(self): + align = np.dtype('i').alignment + for j in range(8): + if j == 0: + s = 'bi' + else: + s = 'b%dxi' % j + self._check('@'+s, {'f0': ('i1', 0), + 'f1': ('i', align*(1 + j//align))}) + self._check('='+s, {'f0': ('i1', 0), + 'f1': ('i', 1+j)}) + + def test_native_padding_2(self): + # Native padding should work also for structs and sub-arrays + self._check('x3T{xi}', {'f0': (({'f0': ('i', 4)}, (3,)), 4)}) + self._check('^x3T{xi}', {'f0': (({'f0': ('i', 1)}, (3,)), 1)}) + + def test_trailing_padding(self): + # Trailing padding should be included, *and*, the item size + # should match the alignment if in aligned mode + align = np.dtype('i').alignment + size = np.dtype('i').itemsize + + def aligned(n): + return align*(1 + (n-1)//align) + + base = dict(formats=['i'], names=['f0']) + + self._check('ix', dict(itemsize=aligned(size + 1), **base)) + self._check('ixx', dict(itemsize=aligned(size + 2), **base)) + self._check('ixxx', dict(itemsize=aligned(size + 3), **base)) + self._check('ixxxx', dict(itemsize=aligned(size + 4), **base)) + self._check('i7x', dict(itemsize=aligned(size + 7), **base)) + + self._check('^ix', dict(itemsize=size + 1, **base)) + self._check('^ixx', dict(itemsize=size + 2, **base)) + self._check('^ixxx', dict(itemsize=size + 3, **base)) + self._check('^ixxxx', dict(itemsize=size + 4, **base)) + self._check('^i7x', dict(itemsize=size + 7, **base)) + + def test_native_padding_3(self): + dt = np.dtype( + [('a', 'b'), ('b', 'i'), + ('sub', np.dtype('b,i')), ('c', 'i')], + align=True) + self._check("T{b:a:xxxi:b:T{b:f0:=i:f1:}:sub:xxxi:c:}", dt) + + dt = np.dtype( + [('a', 'b'), ('b', 'i'), ('c', 'b'), ('d', 'b'), + ('e', 'b'), ('sub', np.dtype('b,i', align=True))]) + self._check("T{b:a:=i:b:b:c:b:d:b:e:T{b:f0:xxxi:f1:}:sub:}", dt) + + def test_padding_with_array_inside_struct(self): + dt = np.dtype( + [('a', 'b'), ('b', 'i'), ('c', 'b', (3,)), + ('d', 'i')], + align=True) + self._check("T{b:a:xxxi:b:3b:c:xi:d:}", dt) + + def test_byteorder_inside_struct(self): + # The byte order after @T{=i} should be '=', not '@'. + # Check this by noting the absence of native alignment. + self._check('@T{^i}xi', {'f0': ({'f0': ('i', 0)}, 0), + 'f1': ('i', 5)}) + + def test_intra_padding(self): + # Natively aligned sub-arrays may require some internal padding + align = np.dtype('i').alignment + size = np.dtype('i').itemsize + + def aligned(n): + return (align*(1 + (n-1)//align)) + + self._check('(3)T{ix}', (dict( + names=['f0'], + formats=['i'], + offsets=[0], + itemsize=aligned(size + 1) + ), (3,))) + + def test_char_vs_string(self): + dt = np.dtype('c') + self._check('c', dt) + + dt = np.dtype([('f0', 'S1', (4,)), ('f1', 'S4')]) + self._check('4c4s', dt) + + def test_field_order(self): + # gh-9053 - previously, we relied on dictionary key order + self._check("(0)I:a:f:b:", [('a', 'I', (0,)), ('b', 'f')]) + self._check("(0)I:b:f:a:", [('b', 'I', (0,)), ('a', 'f')]) + + def test_unnamed_fields(self): + self._check('ii', [('f0', 'i'), ('f1', 'i')]) + self._check('ii:f0:', [('f1', 'i'), ('f0', 'i')]) + + self._check('i', 'i') + self._check('i:f0:', [('f0', 'i')]) + + +class TestNewBufferProtocol(object): + """ Test PEP3118 buffers """ + + def _check_roundtrip(self, obj): + obj = np.asarray(obj) + x = memoryview(obj) + y = np.asarray(x) + y2 = np.array(x) + assert_(not y.flags.owndata) + assert_(y2.flags.owndata) + + assert_equal(y.dtype, obj.dtype) + assert_equal(y.shape, obj.shape) + assert_array_equal(obj, y) + + assert_equal(y2.dtype, obj.dtype) + assert_equal(y2.shape, obj.shape) + assert_array_equal(obj, y2) + + def test_roundtrip(self): + x = np.array([1, 2, 3, 4, 5], dtype='i4') + self._check_roundtrip(x) + + x = np.array([[1, 2], [3, 4]], dtype=np.float64) + self._check_roundtrip(x) + + x = np.zeros((3, 3, 3), dtype=np.float32)[:, 0,:] + self._check_roundtrip(x) + + dt = [('a', 'b'), + ('b', 'h'), + ('c', 'i'), + ('d', 'l'), + ('dx', 'q'), + ('e', 'B'), + ('f', 'H'), + ('g', 'I'), + ('h', 'L'), + ('hx', 'Q'), + ('i', np.single), + ('j', np.double), + ('k', np.longdouble), + ('ix', np.csingle), + ('jx', np.cdouble), + ('kx', np.clongdouble), + ('l', 'S4'), + ('m', 'U4'), + ('n', 'V3'), + ('o', '?'), + ('p', np.half), + ] + x = np.array( + [(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + b'aaaa', 'bbbb', b'xxx', True, 1.0)], + dtype=dt) + self._check_roundtrip(x) + + x = np.array(([[1, 2], [3, 4]],), dtype=[('a', (int, (2, 2)))]) + self._check_roundtrip(x) + + x = np.array([1, 2, 3], dtype='>i2') + self._check_roundtrip(x) + + x = np.array([1, 2, 3], dtype='<i2') + self._check_roundtrip(x) + + x = np.array([1, 2, 3], dtype='>i4') + self._check_roundtrip(x) + + x = np.array([1, 2, 3], dtype='<i4') + self._check_roundtrip(x) + + # check long long can be represented as non-native + x = np.array([1, 2, 3], dtype='>q') + self._check_roundtrip(x) + + # Native-only data types can be passed through the buffer interface + # only in native byte order + if sys.byteorder == 'little': + x = np.array([1, 2, 3], dtype='>g') + assert_raises(ValueError, self._check_roundtrip, x) + x = np.array([1, 2, 3], dtype='<g') + self._check_roundtrip(x) + else: + x = np.array([1, 2, 3], dtype='>g') + self._check_roundtrip(x) + x = np.array([1, 2, 3], dtype='<g') + assert_raises(ValueError, self._check_roundtrip, x) + + def test_roundtrip_half(self): + half_list = [ + 1.0, + -2.0, + 6.5504 * 10**4, # (max half precision) + 2**-14, # ~= 6.10352 * 10**-5 (minimum positive normal) + 2**-24, # ~= 5.96046 * 10**-8 (minimum strictly positive subnormal) + 0.0, + -0.0, + float('+inf'), + float('-inf'), + 0.333251953125, # ~= 1/3 + ] + + x = np.array(half_list, dtype='>e') + self._check_roundtrip(x) + x = np.array(half_list, dtype='<e') + self._check_roundtrip(x) + + def test_roundtrip_single_types(self): + for typ in np.typeDict.values(): + dtype = np.dtype(typ) + + if dtype.char in 'Mm': + # datetimes cannot be used in buffers + continue + if dtype.char == 'V': + # skip void + continue + + x = np.zeros(4, dtype=dtype) + self._check_roundtrip(x) + + if dtype.char not in 'qQgG': + dt = dtype.newbyteorder('<') + x = np.zeros(4, dtype=dt) + self._check_roundtrip(x) + + dt = dtype.newbyteorder('>') + x = np.zeros(4, dtype=dt) + self._check_roundtrip(x) + + def test_roundtrip_scalar(self): + # Issue #4015. + self._check_roundtrip(0) + + def test_invalid_buffer_format(self): + # datetime64 cannot be used fully in a buffer yet + # Should be fixed in the next Numpy major release + dt = np.dtype([('a', 'uint16'), ('b', 'M8[s]')]) + a = np.empty(3, dt) + assert_raises((ValueError, BufferError), memoryview, a) + assert_raises((ValueError, BufferError), memoryview, np.array((3), 'M8[D]')) + + def test_export_simple_1d(self): + x = np.array([1, 2, 3, 4, 5], dtype='i') + y = memoryview(x) + assert_equal(y.format, 'i') + assert_equal(y.shape, (5,)) + assert_equal(y.ndim, 1) + assert_equal(y.strides, (4,)) + assert_equal(y.suboffsets, EMPTY) + assert_equal(y.itemsize, 4) + + def test_export_simple_nd(self): + x = np.array([[1, 2], [3, 4]], dtype=np.float64) + y = memoryview(x) + assert_equal(y.format, 'd') + assert_equal(y.shape, (2, 2)) + assert_equal(y.ndim, 2) + assert_equal(y.strides, (16, 8)) + assert_equal(y.suboffsets, EMPTY) + assert_equal(y.itemsize, 8) + + def test_export_discontiguous(self): + x = np.zeros((3, 3, 3), dtype=np.float32)[:, 0,:] + y = memoryview(x) + assert_equal(y.format, 'f') + assert_equal(y.shape, (3, 3)) + assert_equal(y.ndim, 2) + assert_equal(y.strides, (36, 4)) + assert_equal(y.suboffsets, EMPTY) + assert_equal(y.itemsize, 4) + + def test_export_record(self): + dt = [('a', 'b'), + ('b', 'h'), + ('c', 'i'), + ('d', 'l'), + ('dx', 'q'), + ('e', 'B'), + ('f', 'H'), + ('g', 'I'), + ('h', 'L'), + ('hx', 'Q'), + ('i', np.single), + ('j', np.double), + ('k', np.longdouble), + ('ix', np.csingle), + ('jx', np.cdouble), + ('kx', np.clongdouble), + ('l', 'S4'), + ('m', 'U4'), + ('n', 'V3'), + ('o', '?'), + ('p', np.half), + ] + x = np.array( + [(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + b'aaaa', 'bbbb', b' ', True, 1.0)], + dtype=dt) + y = memoryview(x) + assert_equal(y.shape, (1,)) + assert_equal(y.ndim, 1) + assert_equal(y.suboffsets, EMPTY) + + sz = sum([np.dtype(b).itemsize for a, b in dt]) + if np.dtype('l').itemsize == 4: + assert_equal(y.format, 'T{b:a:=h:b:i:c:l:d:q:dx:B:e:@H:f:=I:g:L:h:Q:hx:f:i:d:j:^g:k:=Zf:ix:Zd:jx:^Zg:kx:4s:l:=4w:m:3x:n:?:o:@e:p:}') + else: + assert_equal(y.format, 'T{b:a:=h:b:i:c:q:d:q:dx:B:e:@H:f:=I:g:Q:h:Q:hx:f:i:d:j:^g:k:=Zf:ix:Zd:jx:^Zg:kx:4s:l:=4w:m:3x:n:?:o:@e:p:}') + # Cannot test if NPY_RELAXED_STRIDES_CHECKING changes the strides + if not (np.ones(1).strides[0] == np.iinfo(np.intp).max): + assert_equal(y.strides, (sz,)) + assert_equal(y.itemsize, sz) + + def test_export_subarray(self): + x = np.array(([[1, 2], [3, 4]],), dtype=[('a', ('i', (2, 2)))]) + y = memoryview(x) + assert_equal(y.format, 'T{(2,2)i:a:}') + assert_equal(y.shape, EMPTY) + assert_equal(y.ndim, 0) + assert_equal(y.strides, EMPTY) + assert_equal(y.suboffsets, EMPTY) + assert_equal(y.itemsize, 16) + + def test_export_endian(self): + x = np.array([1, 2, 3], dtype='>i') + y = memoryview(x) + if sys.byteorder == 'little': + assert_equal(y.format, '>i') + else: + assert_equal(y.format, 'i') + + x = np.array([1, 2, 3], dtype='<i') + y = memoryview(x) + if sys.byteorder == 'little': + assert_equal(y.format, 'i') + else: + assert_equal(y.format, '<i') + + def test_export_flags(self): + # Check SIMPLE flag, see also gh-3613 (exception should be BufferError) + assert_raises(ValueError, + _multiarray_tests.get_buffer_info, + np.arange(5)[::2], ('SIMPLE',)) + + def test_padding(self): + for j in range(8): + x = np.array([(1,), (2,)], dtype={'f0': (int, j)}) + self._check_roundtrip(x) + + def test_reference_leak(self): + if HAS_REFCOUNT: + count_1 = sys.getrefcount(np.core._internal) + a = np.zeros(4) + b = memoryview(a) + c = np.asarray(b) + if HAS_REFCOUNT: + count_2 = sys.getrefcount(np.core._internal) + assert_equal(count_1, count_2) + del c # avoid pyflakes unused variable warning. + + def test_padded_struct_array(self): + dt1 = np.dtype( + [('a', 'b'), ('b', 'i'), ('sub', np.dtype('b,i')), ('c', 'i')], + align=True) + x1 = np.arange(dt1.itemsize, dtype=np.int8).view(dt1) + self._check_roundtrip(x1) + + dt2 = np.dtype( + [('a', 'b'), ('b', 'i'), ('c', 'b', (3,)), ('d', 'i')], + align=True) + x2 = np.arange(dt2.itemsize, dtype=np.int8).view(dt2) + self._check_roundtrip(x2) + + dt3 = np.dtype( + [('a', 'b'), ('b', 'i'), ('c', 'b'), ('d', 'b'), + ('e', 'b'), ('sub', np.dtype('b,i', align=True))]) + x3 = np.arange(dt3.itemsize, dtype=np.int8).view(dt3) + self._check_roundtrip(x3) + + def test_relaxed_strides(self): + # Test that relaxed strides are converted to non-relaxed + c = np.ones((1, 10, 10), dtype='i8') + + # Check for NPY_RELAXED_STRIDES_CHECKING: + if np.ones((10, 1), order="C").flags.f_contiguous: + c.strides = (-1, 80, 8) + + assert_(memoryview(c).strides == (800, 80, 8)) + + # Writing C-contiguous data to a BytesIO buffer should work + fd = io.BytesIO() + fd.write(c.data) + + fortran = c.T + assert_(memoryview(fortran).strides == (8, 80, 800)) + + arr = np.ones((1, 10)) + if arr.flags.f_contiguous: + shape, strides = _multiarray_tests.get_buffer_info( + arr, ['F_CONTIGUOUS']) + assert_(strides[0] == 8) + arr = np.ones((10, 1), order='F') + shape, strides = _multiarray_tests.get_buffer_info( + arr, ['C_CONTIGUOUS']) + assert_(strides[-1] == 8) + + def test_out_of_order_fields(self): + dt = np.dtype(dict( + formats=['<i4', '<i4'], + names=['one', 'two'], + offsets=[4, 0], + itemsize=8 + )) + + # overlapping fields cannot be represented by PEP3118 + arr = np.empty(1, dt) + with assert_raises(ValueError): + memoryview(arr) + + def test_max_dims(self): + a = np.empty((1,) * 32) + self._check_roundtrip(a) + + @pytest.mark.skipif(sys.version_info < (2, 7, 7), reason="See gh-11115") + def test_error_too_many_dims(self): + def make_ctype(shape, scalar_type): + t = scalar_type + for dim in shape[::-1]: + t = dim * t + return t + + # construct a memoryview with 33 dimensions + c_u8_33d = make_ctype((1,)*33, ctypes.c_uint8) + m = memoryview(c_u8_33d()) + assert_equal(m.ndim, 33) + + assert_raises_regex( + RuntimeError, "ndim", + np.array, m) + + def test_error_pointer_type(self): + # gh-6741 + m = memoryview(ctypes.pointer(ctypes.c_uint8())) + assert_('&' in m.format) + + assert_raises_regex( + ValueError, "format string", + np.array, m) + + def test_error_message_unsupported(self): + # wchar has no corresponding numpy type - if this changes in future, we + # need a better way to construct an invalid memoryview format. + t = ctypes.c_wchar * 4 + with assert_raises(ValueError) as cm: + np.array(t()) + + exc = cm.exception + if sys.version_info.major > 2: + with assert_raises_regex( + NotImplementedError, + r"Unrepresentable .* 'u' \(UCS-2 strings\)" + ): + raise exc.__cause__ + + def test_ctypes_integer_via_memoryview(self): + # gh-11150, due to bpo-10746 + for c_integer in {ctypes.c_int, ctypes.c_long, ctypes.c_longlong}: + value = c_integer(42) + with warnings.catch_warnings(record=True): + warnings.filterwarnings('always', r'.*\bctypes\b', RuntimeWarning) + np.asarray(value) + + def test_ctypes_struct_via_memoryview(self): + # gh-10528 + class foo(ctypes.Structure): + _fields_ = [('a', ctypes.c_uint8), ('b', ctypes.c_uint32)] + f = foo(a=1, b=2) + + with warnings.catch_warnings(record=True): + warnings.filterwarnings('always', r'.*\bctypes\b', RuntimeWarning) + arr = np.asarray(f) + + assert_equal(arr['a'], 1) + assert_equal(arr['b'], 2) + f.a = 3 + assert_equal(arr['a'], 3) + + +class TestArrayAttributeDeletion(object): + + def test_multiarray_writable_attributes_deletion(self): + # ticket #2046, should not seqfault, raise AttributeError + a = np.ones(2) + attr = ['shape', 'strides', 'data', 'dtype', 'real', 'imag', 'flat'] + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "Assigning the 'data' attribute") + for s in attr: + assert_raises(AttributeError, delattr, a, s) + + def test_multiarray_not_writable_attributes_deletion(self): + a = np.ones(2) + attr = ["ndim", "flags", "itemsize", "size", "nbytes", "base", + "ctypes", "T", "__array_interface__", "__array_struct__", + "__array_priority__", "__array_finalize__"] + for s in attr: + assert_raises(AttributeError, delattr, a, s) + + def test_multiarray_flags_writable_attribute_deletion(self): + a = np.ones(2).flags + attr = ['writebackifcopy', 'updateifcopy', 'aligned', 'writeable'] + for s in attr: + assert_raises(AttributeError, delattr, a, s) + + def test_multiarray_flags_not_writable_attribute_deletion(self): + a = np.ones(2).flags + attr = ["contiguous", "c_contiguous", "f_contiguous", "fortran", + "owndata", "fnc", "forc", "behaved", "carray", "farray", + "num"] + for s in attr: + assert_raises(AttributeError, delattr, a, s) + + +class TestArrayInterface(): + class Foo(object): + def __init__(self, value): + self.value = value + self.iface = {'typestr': 'f8'} + + def __float__(self): + return float(self.value) + + @property + def __array_interface__(self): + return self.iface + + + f = Foo(0.5) + + @pytest.mark.parametrize('val, iface, expected', [ + (f, {}, 0.5), + ([f], {}, [0.5]), + ([f, f], {}, [0.5, 0.5]), + (f, {'shape': ()}, 0.5), + (f, {'shape': None}, TypeError), + (f, {'shape': (1, 1)}, [[0.5]]), + (f, {'shape': (2,)}, ValueError), + (f, {'strides': ()}, 0.5), + (f, {'strides': (2,)}, ValueError), + (f, {'strides': 16}, TypeError), + ]) + def test_scalar_interface(self, val, iface, expected): + # Test scalar coercion within the array interface + self.f.iface = {'typestr': 'f8'} + self.f.iface.update(iface) + if HAS_REFCOUNT: + pre_cnt = sys.getrefcount(np.dtype('f8')) + if isinstance(expected, type): + assert_raises(expected, np.array, val) + else: + result = np.array(val) + assert_equal(np.array(val), expected) + assert result.dtype == 'f8' + del result + if HAS_REFCOUNT: + post_cnt = sys.getrefcount(np.dtype('f8')) + assert_equal(pre_cnt, post_cnt) + +def test_interface_no_shape(): + class ArrayLike(object): + array = np.array(1) + __array_interface__ = array.__array_interface__ + assert_equal(np.array(ArrayLike()), 1) + + +def test_array_interface_itemsize(): + # See gh-6361 + my_dtype = np.dtype({'names': ['A', 'B'], 'formats': ['f4', 'f4'], + 'offsets': [0, 8], 'itemsize': 16}) + a = np.ones(10, dtype=my_dtype) + descr_t = np.dtype(a.__array_interface__['descr']) + typestr_t = np.dtype(a.__array_interface__['typestr']) + assert_equal(descr_t.itemsize, typestr_t.itemsize) + + +def test_array_interface_empty_shape(): + # See gh-7994 + arr = np.array([1, 2, 3]) + interface1 = dict(arr.__array_interface__) + interface1['shape'] = () + + class DummyArray1(object): + __array_interface__ = interface1 + + # NOTE: Because Py2 str/Py3 bytes supports the buffer interface, setting + # the interface data to bytes would invoke the bug this tests for, that + # __array_interface__ with shape=() is not allowed if the data is an object + # exposing the buffer interface + interface2 = dict(interface1) + interface2['data'] = arr[0].tobytes() + + class DummyArray2(object): + __array_interface__ = interface2 + + arr1 = np.asarray(DummyArray1()) + arr2 = np.asarray(DummyArray2()) + arr3 = arr[:1].reshape(()) + assert_equal(arr1, arr2) + assert_equal(arr1, arr3) + +@pytest.mark.skipif(IS_PYPY, reason='PyDict_GetItemString(.., "data") segfaults') +def test_array_interface_offset(): + arr = np.array([1, 2, 3], dtype='int32') + interface = dict(arr.__array_interface__) + if sys.version_info[0] < 3: + interface['data'] = buffer(arr) + else: + interface['data'] = memoryview(arr) + interface['shape'] = (2,) + interface['offset'] = 4 + + + class DummyArray(object): + __array_interface__ = interface + + arr1 = np.asarray(DummyArray()) + assert_equal(arr1, arr[1:]) + +def test_flat_element_deletion(): + it = np.ones(3).flat + try: + del it[1] + del it[1:2] + except TypeError: + pass + except Exception: + raise AssertionError + + +def test_scalar_element_deletion(): + a = np.zeros(2, dtype=[('x', 'int'), ('y', 'int')]) + assert_raises(ValueError, a[0].__delitem__, 'x') + + +class TestMemEventHook(object): + def test_mem_seteventhook(self): + # The actual tests are within the C code in + # multiarray/_multiarray_tests.c.src + _multiarray_tests.test_pydatamem_seteventhook_start() + # force an allocation and free of a numpy array + # needs to be larger then limit of small memory cacher in ctors.c + a = np.zeros(1000) + del a + break_cycles() + _multiarray_tests.test_pydatamem_seteventhook_end() + +class TestMapIter(object): + def test_mapiter(self): + # The actual tests are within the C code in + # multiarray/_multiarray_tests.c.src + + a = np.arange(12).reshape((3, 4)).astype(float) + index = ([1, 1, 2, 0], + [0, 0, 2, 3]) + vals = [50, 50, 30, 16] + + _multiarray_tests.test_inplace_increment(a, index, vals) + assert_equal(a, [[0.00, 1., 2.0, 19.], + [104., 5., 6.0, 7.0], + [8.00, 9., 40., 11.]]) + + b = np.arange(6).astype(float) + index = (np.array([1, 2, 0]),) + vals = [50, 4, 100.1] + _multiarray_tests.test_inplace_increment(b, index, vals) + assert_equal(b, [100.1, 51., 6., 3., 4., 5.]) + + +class TestAsCArray(object): + def test_1darray(self): + array = np.arange(24, dtype=np.double) + from_c = _multiarray_tests.test_as_c_array(array, 3) + assert_equal(array[3], from_c) + + def test_2darray(self): + array = np.arange(24, dtype=np.double).reshape(3, 8) + from_c = _multiarray_tests.test_as_c_array(array, 2, 4) + assert_equal(array[2, 4], from_c) + + def test_3darray(self): + array = np.arange(24, dtype=np.double).reshape(2, 3, 4) + from_c = _multiarray_tests.test_as_c_array(array, 1, 2, 3) + assert_equal(array[1, 2, 3], from_c) + + +class TestConversion(object): + def test_array_scalar_relational_operation(self): + # All integer + for dt1 in np.typecodes['AllInteger']: + assert_(1 > np.array(0, dtype=dt1), "type %s failed" % (dt1,)) + assert_(not 1 < np.array(0, dtype=dt1), "type %s failed" % (dt1,)) + + for dt2 in np.typecodes['AllInteger']: + assert_(np.array(1, dtype=dt1) > np.array(0, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1) < np.array(0, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + + # Unsigned integers + for dt1 in 'BHILQP': + assert_(-1 < np.array(1, dtype=dt1), "type %s failed" % (dt1,)) + assert_(not -1 > np.array(1, dtype=dt1), "type %s failed" % (dt1,)) + assert_(-1 != np.array(1, dtype=dt1), "type %s failed" % (dt1,)) + + # Unsigned vs signed + for dt2 in 'bhilqp': + assert_(np.array(1, dtype=dt1) > np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1) < np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + assert_(np.array(1, dtype=dt1) != np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + + # Signed integers and floats + for dt1 in 'bhlqp' + np.typecodes['Float']: + assert_(1 > np.array(-1, dtype=dt1), "type %s failed" % (dt1,)) + assert_(not 1 < np.array(-1, dtype=dt1), "type %s failed" % (dt1,)) + assert_(-1 == np.array(-1, dtype=dt1), "type %s failed" % (dt1,)) + + for dt2 in 'bhlqp' + np.typecodes['Float']: + assert_(np.array(1, dtype=dt1) > np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1) < np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + assert_(np.array(-1, dtype=dt1) == np.array(-1, dtype=dt2), + "type %s and %s failed" % (dt1, dt2)) + + def test_to_bool_scalar(self): + assert_equal(bool(np.array([False])), False) + assert_equal(bool(np.array([True])), True) + assert_equal(bool(np.array([[42]])), True) + assert_raises(ValueError, bool, np.array([1, 2])) + + class NotConvertible(object): + def __bool__(self): + raise NotImplementedError + __nonzero__ = __bool__ # python 2 + + assert_raises(NotImplementedError, bool, np.array(NotConvertible())) + assert_raises(NotImplementedError, bool, np.array([NotConvertible()])) + + self_containing = np.array([None]) + self_containing[0] = self_containing + try: + Error = RecursionError + except NameError: + Error = RuntimeError # python < 3.5 + assert_raises(Error, bool, self_containing) # previously stack overflow + self_containing[0] = None # resolve circular reference + + def test_to_int_scalar(self): + # gh-9972 means that these aren't always the same + int_funcs = (int, lambda x: x.__int__()) + for int_func in int_funcs: + assert_equal(int_func(np.array([1])), 1) + assert_equal(int_func(np.array([0])), 0) + assert_equal(int_func(np.array([[42]])), 42) + assert_raises(TypeError, int_func, np.array([1, 2])) + + # gh-9972 + assert_equal(4, int_func(np.array('4'))) + assert_equal(5, int_func(np.bytes_(b'5'))) + assert_equal(6, int_func(np.unicode_(u'6'))) + + class HasTrunc: + def __trunc__(self): + return 3 + assert_equal(3, int_func(np.array(HasTrunc()))) + assert_equal(3, int_func(np.array([HasTrunc()]))) + + class NotConvertible(object): + def __int__(self): + raise NotImplementedError + assert_raises(NotImplementedError, + int_func, np.array(NotConvertible())) + assert_raises(NotImplementedError, + int_func, np.array([NotConvertible()])) + + +class TestWhere(object): + def test_basic(self): + dts = [bool, np.int16, np.int32, np.int64, np.double, np.complex128, + np.longdouble, np.clongdouble] + for dt in dts: + c = np.ones(53, dtype=bool) + assert_equal(np.where( c, dt(0), dt(1)), dt(0)) + assert_equal(np.where(~c, dt(0), dt(1)), dt(1)) + assert_equal(np.where(True, dt(0), dt(1)), dt(0)) + assert_equal(np.where(False, dt(0), dt(1)), dt(1)) + d = np.ones_like(c).astype(dt) + e = np.zeros_like(d) + r = d.astype(dt) + c[7] = False + r[7] = e[7] + assert_equal(np.where(c, e, e), e) + assert_equal(np.where(c, d, e), r) + assert_equal(np.where(c, d, e[0]), r) + assert_equal(np.where(c, d[0], e), r) + assert_equal(np.where(c[::2], d[::2], e[::2]), r[::2]) + assert_equal(np.where(c[1::2], d[1::2], e[1::2]), r[1::2]) + assert_equal(np.where(c[::3], d[::3], e[::3]), r[::3]) + assert_equal(np.where(c[1::3], d[1::3], e[1::3]), r[1::3]) + assert_equal(np.where(c[::-2], d[::-2], e[::-2]), r[::-2]) + assert_equal(np.where(c[::-3], d[::-3], e[::-3]), r[::-3]) + assert_equal(np.where(c[1::-3], d[1::-3], e[1::-3]), r[1::-3]) + + def test_exotic(self): + # object + assert_array_equal(np.where(True, None, None), np.array(None)) + # zero sized + m = np.array([], dtype=bool).reshape(0, 3) + b = np.array([], dtype=np.float64).reshape(0, 3) + assert_array_equal(np.where(m, 0, b), np.array([]).reshape(0, 3)) + + # object cast + d = np.array([-1.34, -0.16, -0.54, -0.31, -0.08, -0.95, 0.000, 0.313, + 0.547, -0.18, 0.876, 0.236, 1.969, 0.310, 0.699, 1.013, + 1.267, 0.229, -1.39, 0.487]) + nan = float('NaN') + e = np.array(['5z', '0l', nan, 'Wz', nan, nan, 'Xq', 'cs', nan, nan, + 'QN', nan, nan, 'Fd', nan, nan, 'kp', nan, '36', 'i1'], + dtype=object) + m = np.array([0, 0, 1, 0, 1, 1, 0, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 0, 0], dtype=bool) + + r = e[:] + r[np.where(m)] = d[np.where(m)] + assert_array_equal(np.where(m, d, e), r) + + r = e[:] + r[np.where(~m)] = d[np.where(~m)] + assert_array_equal(np.where(m, e, d), r) + + assert_array_equal(np.where(m, e, e), e) + + # minimal dtype result with NaN scalar (e.g required by pandas) + d = np.array([1., 2.], dtype=np.float32) + e = float('NaN') + assert_equal(np.where(True, d, e).dtype, np.float32) + e = float('Infinity') + assert_equal(np.where(True, d, e).dtype, np.float32) + e = float('-Infinity') + assert_equal(np.where(True, d, e).dtype, np.float32) + # also check upcast + e = float(1e150) + assert_equal(np.where(True, d, e).dtype, np.float64) + + def test_ndim(self): + c = [True, False] + a = np.zeros((2, 25)) + b = np.ones((2, 25)) + r = np.where(np.array(c)[:,np.newaxis], a, b) + assert_array_equal(r[0], a[0]) + assert_array_equal(r[1], b[0]) + + a = a.T + b = b.T + r = np.where(c, a, b) + assert_array_equal(r[:,0], a[:,0]) + assert_array_equal(r[:,1], b[:,0]) + + def test_dtype_mix(self): + c = np.array([False, True, False, False, False, False, True, False, + False, False, True, False]) + a = np.uint32(1) + b = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.], + dtype=np.float64) + r = np.array([5., 1., 3., 2., -1., -4., 1., -10., 10., 1., 1., 3.], + dtype=np.float64) + assert_equal(np.where(c, a, b), r) + + a = a.astype(np.float32) + b = b.astype(np.int64) + assert_equal(np.where(c, a, b), r) + + # non bool mask + c = c.astype(int) + c[c != 0] = 34242324 + assert_equal(np.where(c, a, b), r) + # invert + tmpmask = c != 0 + c[c == 0] = 41247212 + c[tmpmask] = 0 + assert_equal(np.where(c, b, a), r) + + def test_foreign(self): + c = np.array([False, True, False, False, False, False, True, False, + False, False, True, False]) + r = np.array([5., 1., 3., 2., -1., -4., 1., -10., 10., 1., 1., 3.], + dtype=np.float64) + a = np.ones(1, dtype='>i4') + b = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.], + dtype=np.float64) + assert_equal(np.where(c, a, b), r) + + b = b.astype('>f8') + assert_equal(np.where(c, a, b), r) + + a = a.astype('<i4') + assert_equal(np.where(c, a, b), r) + + c = c.astype('>i4') + assert_equal(np.where(c, a, b), r) + + def test_error(self): + c = [True, True] + a = np.ones((4, 5)) + b = np.ones((5, 5)) + assert_raises(ValueError, np.where, c, a, a) + assert_raises(ValueError, np.where, c[0], a, b) + + def test_string(self): + # gh-4778 check strings are properly filled with nulls + a = np.array("abc") + b = np.array("x" * 753) + assert_equal(np.where(True, a, b), "abc") + assert_equal(np.where(False, b, a), "abc") + + # check native datatype sized strings + a = np.array("abcd") + b = np.array("x" * 8) + assert_equal(np.where(True, a, b), "abcd") + assert_equal(np.where(False, b, a), "abcd") + + def test_empty_result(self): + # pass empty where result through an assignment which reads the data of + # empty arrays, error detectable with valgrind, see gh-8922 + x = np.zeros((1, 1)) + ibad = np.vstack(np.where(x == 99.)) + assert_array_equal(ibad, + np.atleast_2d(np.array([[],[]], dtype=np.intp))) + + def test_largedim(self): + # invalid read regression gh-9304 + shape = [10, 2, 3, 4, 5, 6] + np.random.seed(2) + array = np.random.rand(*shape) + + for i in range(10): + benchmark = array.nonzero() + result = array.nonzero() + assert_array_equal(benchmark, result) + + +if not IS_PYPY: + # sys.getsizeof() is not valid on PyPy + class TestSizeOf(object): + + def test_empty_array(self): + x = np.array([]) + assert_(sys.getsizeof(x) > 0) + + def check_array(self, dtype): + elem_size = dtype(0).itemsize + + for length in [10, 50, 100, 500]: + x = np.arange(length, dtype=dtype) + assert_(sys.getsizeof(x) > length * elem_size) + + def test_array_int32(self): + self.check_array(np.int32) + + def test_array_int64(self): + self.check_array(np.int64) + + def test_array_float32(self): + self.check_array(np.float32) + + def test_array_float64(self): + self.check_array(np.float64) + + def test_view(self): + d = np.ones(100) + assert_(sys.getsizeof(d[...]) < sys.getsizeof(d)) + + def test_reshape(self): + d = np.ones(100) + assert_(sys.getsizeof(d) < sys.getsizeof(d.reshape(100, 1, 1).copy())) + + def test_resize(self): + d = np.ones(100) + old = sys.getsizeof(d) + d.resize(50) + assert_(old > sys.getsizeof(d)) + d.resize(150) + assert_(old < sys.getsizeof(d)) + + def test_error(self): + d = np.ones(100) + assert_raises(TypeError, d.__sizeof__, "a") + + +class TestHashing(object): + + def test_arrays_not_hashable(self): + x = np.ones(3) + assert_raises(TypeError, hash, x) + + def test_collections_hashable(self): + x = np.array([]) + assert_(not isinstance(x, collections_abc.Hashable)) + + +class TestArrayPriority(object): + # This will go away when __array_priority__ is settled, meanwhile + # it serves to check unintended changes. + op = operator + binary_ops = [ + op.pow, op.add, op.sub, op.mul, op.floordiv, op.truediv, op.mod, + op.and_, op.or_, op.xor, op.lshift, op.rshift, op.mod, op.gt, + op.ge, op.lt, op.le, op.ne, op.eq + ] + + # See #7949. Don't use "/" operator With -3 switch, since python reports it + # as a DeprecationWarning + if sys.version_info[0] < 3 and not sys.py3kwarning: + binary_ops.append(op.div) + + class Foo(np.ndarray): + __array_priority__ = 100. + + def __new__(cls, *args, **kwargs): + return np.array(*args, **kwargs).view(cls) + + class Bar(np.ndarray): + __array_priority__ = 101. + + def __new__(cls, *args, **kwargs): + return np.array(*args, **kwargs).view(cls) + + class Other(object): + __array_priority__ = 1000. + + def _all(self, other): + return self.__class__() + + __add__ = __radd__ = _all + __sub__ = __rsub__ = _all + __mul__ = __rmul__ = _all + __pow__ = __rpow__ = _all + __div__ = __rdiv__ = _all + __mod__ = __rmod__ = _all + __truediv__ = __rtruediv__ = _all + __floordiv__ = __rfloordiv__ = _all + __and__ = __rand__ = _all + __xor__ = __rxor__ = _all + __or__ = __ror__ = _all + __lshift__ = __rlshift__ = _all + __rshift__ = __rrshift__ = _all + __eq__ = _all + __ne__ = _all + __gt__ = _all + __ge__ = _all + __lt__ = _all + __le__ = _all + + def test_ndarray_subclass(self): + a = np.array([1, 2]) + b = self.Bar([1, 2]) + for f in self.binary_ops: + msg = repr(f) + assert_(isinstance(f(a, b), self.Bar), msg) + assert_(isinstance(f(b, a), self.Bar), msg) + + def test_ndarray_other(self): + a = np.array([1, 2]) + b = self.Other() + for f in self.binary_ops: + msg = repr(f) + assert_(isinstance(f(a, b), self.Other), msg) + assert_(isinstance(f(b, a), self.Other), msg) + + def test_subclass_subclass(self): + a = self.Foo([1, 2]) + b = self.Bar([1, 2]) + for f in self.binary_ops: + msg = repr(f) + assert_(isinstance(f(a, b), self.Bar), msg) + assert_(isinstance(f(b, a), self.Bar), msg) + + def test_subclass_other(self): + a = self.Foo([1, 2]) + b = self.Other() + for f in self.binary_ops: + msg = repr(f) + assert_(isinstance(f(a, b), self.Other), msg) + assert_(isinstance(f(b, a), self.Other), msg) + + +class TestBytestringArrayNonzero(object): + + def test_empty_bstring_array_is_falsey(self): + assert_(not np.array([''], dtype=str)) + + def test_whitespace_bstring_array_is_falsey(self): + a = np.array(['spam'], dtype=str) + a[0] = ' \0\0' + assert_(not a) + + def test_all_null_bstring_array_is_falsey(self): + a = np.array(['spam'], dtype=str) + a[0] = '\0\0\0\0' + assert_(not a) + + def test_null_inside_bstring_array_is_truthy(self): + a = np.array(['spam'], dtype=str) + a[0] = ' \0 \0' + assert_(a) + + +class TestUnicodeArrayNonzero(object): + + def test_empty_ustring_array_is_falsey(self): + assert_(not np.array([''], dtype=np.unicode)) + + def test_whitespace_ustring_array_is_falsey(self): + a = np.array(['eggs'], dtype=np.unicode) + a[0] = ' \0\0' + assert_(not a) + + def test_all_null_ustring_array_is_falsey(self): + a = np.array(['eggs'], dtype=np.unicode) + a[0] = '\0\0\0\0' + assert_(not a) + + def test_null_inside_ustring_array_is_truthy(self): + a = np.array(['eggs'], dtype=np.unicode) + a[0] = ' \0 \0' + assert_(a) + + +class TestFormat(object): + + def test_0d(self): + a = np.array(np.pi) + assert_equal('{:0.3g}'.format(a), '3.14') + assert_equal('{:0.3g}'.format(a[()]), '3.14') + + def test_1d_no_format(self): + a = np.array([np.pi]) + assert_equal('{}'.format(a), str(a)) + + def test_1d_format(self): + # until gh-5543, ensure that the behaviour matches what it used to be + a = np.array([np.pi]) + if sys.version_info[:2] >= (3, 4): + assert_raises(TypeError, '{:30}'.format, a) + else: + with suppress_warnings() as sup: + sup.filter(PendingDeprecationWarning) + res = '{:30}'.format(a) + dst = object.__format__(a, '30') + assert_equal(res, dst) + +from numpy.testing import IS_PYPY + +class TestCTypes(object): + + def test_ctypes_is_available(self): + test_arr = np.array([[1, 2, 3], [4, 5, 6]]) + + assert_equal(ctypes, test_arr.ctypes._ctypes) + assert_equal(tuple(test_arr.ctypes.shape), (2, 3)) + + def test_ctypes_is_not_available(self): + from numpy.core import _internal + _internal.ctypes = None + try: + test_arr = np.array([[1, 2, 3], [4, 5, 6]]) + + assert_(isinstance(test_arr.ctypes._ctypes, + _internal._missing_ctypes)) + assert_equal(tuple(test_arr.ctypes.shape), (2, 3)) + finally: + _internal.ctypes = ctypes + + def _make_readonly(x): + x.flags.writeable = False + return x + + @pytest.mark.parametrize('arr', [ + np.array([1, 2, 3]), + np.array([['one', 'two'], ['three', 'four']]), + np.array((1, 2), dtype='i4,i4'), + np.zeros((2,), dtype= + np.dtype(dict( + formats=['<i4', '<i4'], + names=['a', 'b'], + offsets=[0, 2], + itemsize=6 + )) + ), + np.array([None], dtype=object), + np.array([]), + np.empty((0, 0)), + _make_readonly(np.array([1, 2, 3])), + ], ids=[ + '1d', + '2d', + 'structured', + 'overlapping', + 'object', + 'empty', + 'empty-2d', + 'readonly' + ]) + def test_ctypes_data_as_holds_reference(self, arr): + # gh-9647 + # create a copy to ensure that pytest does not mess with the refcounts + arr = arr.copy() + + arr_ref = weakref.ref(arr) + + ctypes_ptr = arr.ctypes.data_as(ctypes.c_void_p) + + # `ctypes_ptr` should hold onto `arr` + del arr + break_cycles() + assert_(arr_ref() is not None, "ctypes pointer did not hold onto a reference") + + # but when the `ctypes_ptr` object dies, so should `arr` + del ctypes_ptr + if IS_PYPY: + # Pypy does not recycle arr objects immediately. Trigger gc to + # release arr. Cpython uses refcounts. An explicit call to gc + # should not be needed here. + break_cycles() + assert_(arr_ref() is None, "unknowable whether ctypes pointer holds a reference") + + def test_ctypes_as_parameter_holds_reference(self): + arr = np.array([None]).copy() + + arr_ref = weakref.ref(arr) + + ctypes_ptr = arr.ctypes._as_parameter_ + + # `ctypes_ptr` should hold onto `arr` + del arr + break_cycles() + assert_(arr_ref() is not None, "ctypes pointer did not hold onto a reference") + + # but when the `ctypes_ptr` object dies, so should `arr` + del ctypes_ptr + if IS_PYPY: + break_cycles() + assert_(arr_ref() is None, "unknowable whether ctypes pointer holds a reference") + + +class TestWritebackIfCopy(object): + # all these tests use the WRITEBACKIFCOPY mechanism + def test_argmax_with_out(self): + mat = np.eye(5) + out = np.empty(5, dtype='i2') + res = np.argmax(mat, 0, out=out) + assert_equal(res, range(5)) + + def test_argmin_with_out(self): + mat = -np.eye(5) + out = np.empty(5, dtype='i2') + res = np.argmin(mat, 0, out=out) + assert_equal(res, range(5)) + + def test_clip_with_out(self): + mat = np.eye(5) + out = np.eye(5, dtype='i2') + res = np.clip(mat, a_min=-10, a_max=0, out=out) + assert_(res is out) + assert_equal(np.sum(out), 0) + + def test_insert_noncontiguous(self): + a = np.arange(6).reshape(2,3).T # force non-c-contiguous + # uses arr_insert + np.place(a, a>2, [44, 55]) + assert_equal(a, np.array([[0, 44], [1, 55], [2, 44]])) + # hit one of the failing paths + assert_raises(ValueError, np.place, a, a>20, []) + + def test_put_noncontiguous(self): + a = np.arange(6).reshape(2,3).T # force non-c-contiguous + np.put(a, [0, 2], [44, 55]) + assert_equal(a, np.array([[44, 3], [55, 4], [2, 5]])) + + def test_putmask_noncontiguous(self): + a = np.arange(6).reshape(2,3).T # force non-c-contiguous + # uses arr_putmask + np.putmask(a, a>2, a**2) + assert_equal(a, np.array([[0, 9], [1, 16], [2, 25]])) + + def test_take_mode_raise(self): + a = np.arange(6, dtype='int') + out = np.empty(2, dtype='int') + np.take(a, [0, 2], out=out, mode='raise') + assert_equal(out, np.array([0, 2])) + + def test_choose_mod_raise(self): + a = np.array([[1, 0, 1], [0, 1, 0], [1, 0, 1]]) + out = np.empty((3,3), dtype='int') + choices = [-10, 10] + np.choose(a, choices, out=out, mode='raise') + assert_equal(out, np.array([[ 10, -10, 10], + [-10, 10, -10], + [ 10, -10, 10]])) + + def test_flatiter__array__(self): + a = np.arange(9).reshape(3,3) + b = a.T.flat + c = b.__array__() + # triggers the WRITEBACKIFCOPY resolution, assuming refcount semantics + del c + + def test_dot_out(self): + # if HAVE_CBLAS, will use WRITEBACKIFCOPY + a = np.arange(9, dtype=float).reshape(3,3) + b = np.dot(a, a, out=a) + assert_equal(b, np.array([[15, 18, 21], [42, 54, 66], [69, 90, 111]])) + + def test_view_assign(self): + from numpy.core._multiarray_tests import npy_create_writebackifcopy, npy_resolve + + arr = np.arange(9).reshape(3, 3).T + arr_wb = npy_create_writebackifcopy(arr) + assert_(arr_wb.flags.writebackifcopy) + assert_(arr_wb.base is arr) + arr_wb[...] = -100 + npy_resolve(arr_wb) + # arr changes after resolve, even though we assigned to arr_wb + assert_equal(arr, -100) + # after resolve, the two arrays no longer reference each other + assert_(arr_wb.ctypes.data != 0) + assert_equal(arr_wb.base, None) + # assigning to arr_wb does not get transferred to arr + arr_wb[...] = 100 + assert_equal(arr, -100) + + def test_dealloc_warning(self): + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + arr = np.arange(9).reshape(3, 3) + v = arr.T + _multiarray_tests.npy_abuse_writebackifcopy(v) + assert len(sup.log) == 1 + + def test_view_discard_refcount(self): + from numpy.core._multiarray_tests import npy_create_writebackifcopy, npy_discard + + arr = np.arange(9).reshape(3, 3).T + orig = arr.copy() + if HAS_REFCOUNT: + arr_cnt = sys.getrefcount(arr) + arr_wb = npy_create_writebackifcopy(arr) + assert_(arr_wb.flags.writebackifcopy) + assert_(arr_wb.base is arr) + arr_wb[...] = -100 + npy_discard(arr_wb) + # arr remains unchanged after discard + assert_equal(arr, orig) + # after discard, the two arrays no longer reference each other + assert_(arr_wb.ctypes.data != 0) + assert_equal(arr_wb.base, None) + if HAS_REFCOUNT: + assert_equal(arr_cnt, sys.getrefcount(arr)) + # assigning to arr_wb does not get transferred to arr + arr_wb[...] = 100 + assert_equal(arr, orig) + + +class TestArange(object): + def test_infinite(self): + assert_raises_regex( + ValueError, "size exceeded", + np.arange, 0, np.inf + ) + + def test_nan_step(self): + assert_raises_regex( + ValueError, "cannot compute length", + np.arange, 0, 1, np.nan + ) + + def test_zero_step(self): + assert_raises(ZeroDivisionError, np.arange, 0, 10, 0) + assert_raises(ZeroDivisionError, np.arange, 0.0, 10.0, 0.0) + + # empty range + assert_raises(ZeroDivisionError, np.arange, 0, 0, 0) + assert_raises(ZeroDivisionError, np.arange, 0.0, 0.0, 0.0) + + +class TestArrayFinalize(object): + """ Tests __array_finalize__ """ + + def test_receives_base(self): + # gh-11237 + class SavesBase(np.ndarray): + def __array_finalize__(self, obj): + self.saved_base = self.base + + a = np.array(1).view(SavesBase) + assert_(a.saved_base is a.base) + + def test_lifetime_on_error(self): + # gh-11237 + class RaisesInFinalize(np.ndarray): + def __array_finalize__(self, obj): + # crash, but keep this object alive + raise Exception(self) + + # a plain object can't be weakref'd + class Dummy(object): pass + + # get a weak reference to an object within an array + obj_arr = np.array(Dummy()) + obj_ref = weakref.ref(obj_arr[()]) + + # get an array that crashed in __array_finalize__ + with assert_raises(Exception) as e: + obj_arr.view(RaisesInFinalize) + if sys.version_info.major == 2: + # prevent an extra reference being kept + sys.exc_clear() + + obj_subarray = e.exception.args[0] + del e + assert_(isinstance(obj_subarray, RaisesInFinalize)) + + # reference should still be held by obj_arr + break_cycles() + assert_(obj_ref() is not None, "object should not already be dead") + + del obj_arr + break_cycles() + assert_(obj_ref() is not None, "obj_arr should not hold the last reference") + + del obj_subarray + break_cycles() + assert_(obj_ref() is None, "no references should remain") + + +def test_orderconverter_with_nonASCII_unicode_ordering(): + # gh-7475 + a = np.arange(5) + assert_raises(ValueError, a.flatten, order=u'\xe2') + + +def test_equal_override(): + # gh-9153: ndarray.__eq__ uses special logic for structured arrays, which + # did not respect overrides with __array_priority__ or __array_ufunc__. + # The PR fixed this for __array_priority__ and __array_ufunc__ = None. + class MyAlwaysEqual(object): + def __eq__(self, other): + return "eq" + + def __ne__(self, other): + return "ne" + + class MyAlwaysEqualOld(MyAlwaysEqual): + __array_priority__ = 10000 + + class MyAlwaysEqualNew(MyAlwaysEqual): + __array_ufunc__ = None + + array = np.array([(0, 1), (2, 3)], dtype='i4,i4') + for my_always_equal_cls in MyAlwaysEqualOld, MyAlwaysEqualNew: + my_always_equal = my_always_equal_cls() + assert_equal(my_always_equal == array, 'eq') + assert_equal(array == my_always_equal, 'eq') + assert_equal(my_always_equal != array, 'ne') + assert_equal(array != my_always_equal, 'ne') + + +def test_npymath_complex(): + # Smoketest npymath functions + from numpy.core._multiarray_tests import ( + npy_cabs, npy_carg) + + funcs = {npy_cabs: np.absolute, + npy_carg: np.angle} + vals = (1, np.inf, -np.inf, np.nan) + types = (np.complex64, np.complex128, np.clongdouble) + + for fun, npfun in funcs.items(): + for x, y in itertools.product(vals, vals): + for t in types: + z = t(complex(x, y)) + got = fun(z) + expected = npfun(z) + assert_allclose(got, expected) + + +def test_npymath_real(): + # Smoketest npymath functions + from numpy.core._multiarray_tests import ( + npy_log10, npy_cosh, npy_sinh, npy_tan, npy_tanh) + + funcs = {npy_log10: np.log10, + npy_cosh: np.cosh, + npy_sinh: np.sinh, + npy_tan: np.tan, + npy_tanh: np.tanh} + vals = (1, np.inf, -np.inf, np.nan) + types = (np.float32, np.float64, np.longdouble) + + with np.errstate(all='ignore'): + for fun, npfun in funcs.items(): + for x, t in itertools.product(vals, types): + z = t(x) + got = fun(z) + expected = npfun(z) + assert_allclose(got, expected) + +def test_uintalignment_and_alignment(): + # alignment code needs to satisfy these requrements: + # 1. numpy structs match C struct layout + # 2. ufuncs/casting is safe wrt to aligned access + # 3. copy code is safe wrt to "uint alidned" access + # + # Complex types are the main problem, whose alignment may not be the same + # as their "uint alignment". + # + # This test might only fail on certain platforms, where uint64 alignment is + # not equal to complex64 alignment. The second 2 tests will only fail + # for DEBUG=1. + + d1 = np.dtype('u1,c8', align=True) + d2 = np.dtype('u4,c8', align=True) + d3 = np.dtype({'names': ['a', 'b'], 'formats': ['u1', d1]}, align=True) + + assert_equal(np.zeros(1, dtype=d1)['f1'].flags['ALIGNED'], True) + assert_equal(np.zeros(1, dtype=d2)['f1'].flags['ALIGNED'], True) + assert_equal(np.zeros(1, dtype='u1,c8')['f1'].flags['ALIGNED'], False) + + # check that C struct matches numpy struct size + s = _multiarray_tests.get_struct_alignments() + for d, (alignment, size) in zip([d1,d2,d3], s): + assert_equal(d.alignment, alignment) + assert_equal(d.itemsize, size) + + # check that ufuncs don't complain in debug mode + # (this is probably OK if the aligned flag is true above) + src = np.zeros((2,2), dtype=d1)['f1'] # 4-byte aligned, often + np.exp(src) # assert fails? + + # check that copy code doesn't complain in debug mode + dst = np.zeros((2,2), dtype='c8') + dst[:,1] = src[:,1] # assert in lowlevel_strided_loops fails? + +class TestAlignment(object): + # adapted from scipy._lib.tests.test__util.test__aligned_zeros + # Checks that unusual memory alignments don't trip up numpy. + # In particular, check RELAXED_STRIDES don't trip alignment assertions in + # NDEBUG mode for size-0 arrays (gh-12503) + + def check(self, shape, dtype, order, align): + err_msg = repr((shape, dtype, order, align)) + x = _aligned_zeros(shape, dtype, order, align=align) + if align is None: + align = np.dtype(dtype).alignment + assert_equal(x.__array_interface__['data'][0] % align, 0) + if hasattr(shape, '__len__'): + assert_equal(x.shape, shape, err_msg) + else: + assert_equal(x.shape, (shape,), err_msg) + assert_equal(x.dtype, dtype) + if order == "C": + assert_(x.flags.c_contiguous, err_msg) + elif order == "F": + if x.size > 0: + assert_(x.flags.f_contiguous, err_msg) + elif order is None: + assert_(x.flags.c_contiguous, err_msg) + else: + raise ValueError() + + def test_various_alignments(self): + for align in [1, 2, 3, 4, 8, 12, 16, 32, 64, None]: + for n in [0, 1, 3, 11]: + for order in ["C", "F", None]: + for dtype in list(np.typecodes["All"]) + ['i4,i4,i4']: + if dtype == 'O': + # object dtype can't be misaligned + continue + for shape in [n, (1, 2, 3, n)]: + self.check(shape, np.dtype(dtype), order, align) + + def test_strided_loop_alignments(self): + # particularly test that complex64 and float128 use right alignment + # code-paths, since these are particularly problematic. It is useful to + # turn on USE_DEBUG for this test, so lowlevel-loop asserts are run. + for align in [1, 2, 4, 8, 12, 16, None]: + xf64 = _aligned_zeros(3, np.float64) + + xc64 = _aligned_zeros(3, np.complex64, align=align) + xf128 = _aligned_zeros(3, np.longdouble, align=align) + + # test casting, both to and from misaligned + with suppress_warnings() as sup: + sup.filter(np.ComplexWarning, "Casting complex values") + xc64.astype('f8') + xf64.astype(np.complex64) + test = xc64 + xf64 + + xf128.astype('f8') + xf64.astype(np.longdouble) + test = xf128 + xf64 + + test = xf128 + xc64 + + # test copy, both to and from misaligned + # contig copy + xf64[:] = xf64.copy() + xc64[:] = xc64.copy() + xf128[:] = xf128.copy() + # strided copy + xf64[::2] = xf64[::2].copy() + xc64[::2] = xc64[::2].copy() + xf128[::2] = xf128[::2].copy() + +def test_getfield(): + a = np.arange(32, dtype='uint16') + if sys.byteorder == 'little': + i = 0 + j = 1 + else: + i = 1 + j = 0 + b = a.getfield('int8', i) + assert_equal(b, a) + b = a.getfield('int8', j) + assert_equal(b, 0) + pytest.raises(ValueError, a.getfield, 'uint8', -1) + pytest.raises(ValueError, a.getfield, 'uint8', 16) + pytest.raises(ValueError, a.getfield, 'uint64', 0) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_nditer.py b/contrib/python/numpy/py2/numpy/core/tests/test_nditer.py new file mode 100644 index 0000000000..3a24ce55ec --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_nditer.py @@ -0,0 +1,2861 @@ +from __future__ import division, absolute_import, print_function + +import sys +import pytest + +import numpy as np +import numpy.core._multiarray_tests as _multiarray_tests +from numpy import array, arange, nditer, all +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_raises, + HAS_REFCOUNT, suppress_warnings + ) + + +def iter_multi_index(i): + ret = [] + while not i.finished: + ret.append(i.multi_index) + i.iternext() + return ret + +def iter_indices(i): + ret = [] + while not i.finished: + ret.append(i.index) + i.iternext() + return ret + +def iter_iterindices(i): + ret = [] + while not i.finished: + ret.append(i.iterindex) + i.iternext() + return ret + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +def test_iter_refcount(): + # Make sure the iterator doesn't leak + + # Basic + a = arange(6) + dt = np.dtype('f4').newbyteorder() + rc_a = sys.getrefcount(a) + rc_dt = sys.getrefcount(dt) + with nditer(a, [], + [['readwrite', 'updateifcopy']], + casting='unsafe', + op_dtypes=[dt]) as it: + assert_(not it.iterationneedsapi) + assert_(sys.getrefcount(a) > rc_a) + assert_(sys.getrefcount(dt) > rc_dt) + # del 'it' + it = None + assert_equal(sys.getrefcount(a), rc_a) + assert_equal(sys.getrefcount(dt), rc_dt) + + # With a copy + a = arange(6, dtype='f4') + dt = np.dtype('f4') + rc_a = sys.getrefcount(a) + rc_dt = sys.getrefcount(dt) + it = nditer(a, [], + [['readwrite']], + op_dtypes=[dt]) + rc2_a = sys.getrefcount(a) + rc2_dt = sys.getrefcount(dt) + it2 = it.copy() + assert_(sys.getrefcount(a) > rc2_a) + assert_(sys.getrefcount(dt) > rc2_dt) + it = None + assert_equal(sys.getrefcount(a), rc2_a) + assert_equal(sys.getrefcount(dt), rc2_dt) + it2 = None + assert_equal(sys.getrefcount(a), rc_a) + assert_equal(sys.getrefcount(dt), rc_dt) + + del it2 # avoid pyflakes unused variable warning + +def test_iter_best_order(): + # The iterator should always find the iteration order + # with increasing memory addresses + + # Test the ordering for 1-D to 5-D shapes + for shape in [(5,), (3, 4), (2, 3, 4), (2, 3, 4, 3), (2, 3, 2, 2, 3)]: + a = arange(np.prod(shape)) + # Test each combination of positive and negative strides + for dirs in range(2**len(shape)): + dirs_index = [slice(None)]*len(shape) + for bit in range(len(shape)): + if ((2**bit) & dirs): + dirs_index[bit] = slice(None, None, -1) + dirs_index = tuple(dirs_index) + + aview = a.reshape(shape)[dirs_index] + # C-order + i = nditer(aview, [], [['readonly']]) + assert_equal([x for x in i], a) + # Fortran-order + i = nditer(aview.T, [], [['readonly']]) + assert_equal([x for x in i], a) + # Other order + if len(shape) > 2: + i = nditer(aview.swapaxes(0, 1), [], [['readonly']]) + assert_equal([x for x in i], a) + +def test_iter_c_order(): + # Test forcing C order + + # Test the ordering for 1-D to 5-D shapes + for shape in [(5,), (3, 4), (2, 3, 4), (2, 3, 4, 3), (2, 3, 2, 2, 3)]: + a = arange(np.prod(shape)) + # Test each combination of positive and negative strides + for dirs in range(2**len(shape)): + dirs_index = [slice(None)]*len(shape) + for bit in range(len(shape)): + if ((2**bit) & dirs): + dirs_index[bit] = slice(None, None, -1) + dirs_index = tuple(dirs_index) + + aview = a.reshape(shape)[dirs_index] + # C-order + i = nditer(aview, order='C') + assert_equal([x for x in i], aview.ravel(order='C')) + # Fortran-order + i = nditer(aview.T, order='C') + assert_equal([x for x in i], aview.T.ravel(order='C')) + # Other order + if len(shape) > 2: + i = nditer(aview.swapaxes(0, 1), order='C') + assert_equal([x for x in i], + aview.swapaxes(0, 1).ravel(order='C')) + +def test_iter_f_order(): + # Test forcing F order + + # Test the ordering for 1-D to 5-D shapes + for shape in [(5,), (3, 4), (2, 3, 4), (2, 3, 4, 3), (2, 3, 2, 2, 3)]: + a = arange(np.prod(shape)) + # Test each combination of positive and negative strides + for dirs in range(2**len(shape)): + dirs_index = [slice(None)]*len(shape) + for bit in range(len(shape)): + if ((2**bit) & dirs): + dirs_index[bit] = slice(None, None, -1) + dirs_index = tuple(dirs_index) + + aview = a.reshape(shape)[dirs_index] + # C-order + i = nditer(aview, order='F') + assert_equal([x for x in i], aview.ravel(order='F')) + # Fortran-order + i = nditer(aview.T, order='F') + assert_equal([x for x in i], aview.T.ravel(order='F')) + # Other order + if len(shape) > 2: + i = nditer(aview.swapaxes(0, 1), order='F') + assert_equal([x for x in i], + aview.swapaxes(0, 1).ravel(order='F')) + +def test_iter_c_or_f_order(): + # Test forcing any contiguous (C or F) order + + # Test the ordering for 1-D to 5-D shapes + for shape in [(5,), (3, 4), (2, 3, 4), (2, 3, 4, 3), (2, 3, 2, 2, 3)]: + a = arange(np.prod(shape)) + # Test each combination of positive and negative strides + for dirs in range(2**len(shape)): + dirs_index = [slice(None)]*len(shape) + for bit in range(len(shape)): + if ((2**bit) & dirs): + dirs_index[bit] = slice(None, None, -1) + dirs_index = tuple(dirs_index) + + aview = a.reshape(shape)[dirs_index] + # C-order + i = nditer(aview, order='A') + assert_equal([x for x in i], aview.ravel(order='A')) + # Fortran-order + i = nditer(aview.T, order='A') + assert_equal([x for x in i], aview.T.ravel(order='A')) + # Other order + if len(shape) > 2: + i = nditer(aview.swapaxes(0, 1), order='A') + assert_equal([x for x in i], + aview.swapaxes(0, 1).ravel(order='A')) + +def test_iter_best_order_multi_index_1d(): + # The multi-indices should be correct with any reordering + + a = arange(4) + # 1D order + i = nditer(a, ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(0,), (1,), (2,), (3,)]) + # 1D reversed order + i = nditer(a[::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(3,), (2,), (1,), (0,)]) + +def test_iter_best_order_multi_index_2d(): + # The multi-indices should be correct with any reordering + + a = arange(6) + # 2D C-order + i = nditer(a.reshape(2, 3), ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]) + # 2D Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F'), ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(0, 0), (1, 0), (0, 1), (1, 1), (0, 2), (1, 2)]) + # 2D reversed C-order + i = nditer(a.reshape(2, 3)[::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(1, 0), (1, 1), (1, 2), (0, 0), (0, 1), (0, 2)]) + i = nditer(a.reshape(2, 3)[:, ::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(0, 2), (0, 1), (0, 0), (1, 2), (1, 1), (1, 0)]) + i = nditer(a.reshape(2, 3)[::-1, ::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(1, 2), (1, 1), (1, 0), (0, 2), (0, 1), (0, 0)]) + # 2D reversed Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F')[::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(1, 0), (0, 0), (1, 1), (0, 1), (1, 2), (0, 2)]) + i = nditer(a.reshape(2, 3).copy(order='F')[:, ::-1], + ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(0, 2), (1, 2), (0, 1), (1, 1), (0, 0), (1, 0)]) + i = nditer(a.reshape(2, 3).copy(order='F')[::-1, ::-1], + ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), [(1, 2), (0, 2), (1, 1), (0, 1), (1, 0), (0, 0)]) + +def test_iter_best_order_multi_index_3d(): + # The multi-indices should be correct with any reordering + + a = arange(12) + # 3D C-order + i = nditer(a.reshape(2, 3, 2), ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (0, 2, 0), (0, 2, 1), + (1, 0, 0), (1, 0, 1), (1, 1, 0), (1, 1, 1), (1, 2, 0), (1, 2, 1)]) + # 3D Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F'), ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 0, 0), (1, 0, 0), (0, 1, 0), (1, 1, 0), (0, 2, 0), (1, 2, 0), + (0, 0, 1), (1, 0, 1), (0, 1, 1), (1, 1, 1), (0, 2, 1), (1, 2, 1)]) + # 3D reversed C-order + i = nditer(a.reshape(2, 3, 2)[::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(1, 0, 0), (1, 0, 1), (1, 1, 0), (1, 1, 1), (1, 2, 0), (1, 2, 1), + (0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (0, 2, 0), (0, 2, 1)]) + i = nditer(a.reshape(2, 3, 2)[:, ::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 2, 0), (0, 2, 1), (0, 1, 0), (0, 1, 1), (0, 0, 0), (0, 0, 1), + (1, 2, 0), (1, 2, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]) + i = nditer(a.reshape(2, 3, 2)[:,:, ::-1], ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 0, 1), (0, 0, 0), (0, 1, 1), (0, 1, 0), (0, 2, 1), (0, 2, 0), + (1, 0, 1), (1, 0, 0), (1, 1, 1), (1, 1, 0), (1, 2, 1), (1, 2, 0)]) + # 3D reversed Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F')[::-1], + ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(1, 0, 0), (0, 0, 0), (1, 1, 0), (0, 1, 0), (1, 2, 0), (0, 2, 0), + (1, 0, 1), (0, 0, 1), (1, 1, 1), (0, 1, 1), (1, 2, 1), (0, 2, 1)]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:, ::-1], + ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 2, 0), (1, 2, 0), (0, 1, 0), (1, 1, 0), (0, 0, 0), (1, 0, 0), + (0, 2, 1), (1, 2, 1), (0, 1, 1), (1, 1, 1), (0, 0, 1), (1, 0, 1)]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:,:, ::-1], + ['multi_index'], [['readonly']]) + assert_equal(iter_multi_index(i), + [(0, 0, 1), (1, 0, 1), (0, 1, 1), (1, 1, 1), (0, 2, 1), (1, 2, 1), + (0, 0, 0), (1, 0, 0), (0, 1, 0), (1, 1, 0), (0, 2, 0), (1, 2, 0)]) + +def test_iter_best_order_c_index_1d(): + # The C index should be correct with any reordering + + a = arange(4) + # 1D order + i = nditer(a, ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 1, 2, 3]) + # 1D reversed order + i = nditer(a[::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [3, 2, 1, 0]) + +def test_iter_best_order_c_index_2d(): + # The C index should be correct with any reordering + + a = arange(6) + # 2D C-order + i = nditer(a.reshape(2, 3), ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 1, 2, 3, 4, 5]) + # 2D Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F'), + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 3, 1, 4, 2, 5]) + # 2D reversed C-order + i = nditer(a.reshape(2, 3)[::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [3, 4, 5, 0, 1, 2]) + i = nditer(a.reshape(2, 3)[:, ::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [2, 1, 0, 5, 4, 3]) + i = nditer(a.reshape(2, 3)[::-1, ::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [5, 4, 3, 2, 1, 0]) + # 2D reversed Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F')[::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [3, 0, 4, 1, 5, 2]) + i = nditer(a.reshape(2, 3).copy(order='F')[:, ::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [2, 5, 1, 4, 0, 3]) + i = nditer(a.reshape(2, 3).copy(order='F')[::-1, ::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), [5, 2, 4, 1, 3, 0]) + +def test_iter_best_order_c_index_3d(): + # The C index should be correct with any reordering + + a = arange(12) + # 3D C-order + i = nditer(a.reshape(2, 3, 2), ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) + # 3D Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F'), + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [0, 6, 2, 8, 4, 10, 1, 7, 3, 9, 5, 11]) + # 3D reversed C-order + i = nditer(a.reshape(2, 3, 2)[::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5]) + i = nditer(a.reshape(2, 3, 2)[:, ::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [4, 5, 2, 3, 0, 1, 10, 11, 8, 9, 6, 7]) + i = nditer(a.reshape(2, 3, 2)[:,:, ::-1], ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10]) + # 3D reversed Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F')[::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [6, 0, 8, 2, 10, 4, 7, 1, 9, 3, 11, 5]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:, ::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [4, 10, 2, 8, 0, 6, 5, 11, 3, 9, 1, 7]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:,:, ::-1], + ['c_index'], [['readonly']]) + assert_equal(iter_indices(i), + [1, 7, 3, 9, 5, 11, 0, 6, 2, 8, 4, 10]) + +def test_iter_best_order_f_index_1d(): + # The Fortran index should be correct with any reordering + + a = arange(4) + # 1D order + i = nditer(a, ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 1, 2, 3]) + # 1D reversed order + i = nditer(a[::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [3, 2, 1, 0]) + +def test_iter_best_order_f_index_2d(): + # The Fortran index should be correct with any reordering + + a = arange(6) + # 2D C-order + i = nditer(a.reshape(2, 3), ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 2, 4, 1, 3, 5]) + # 2D Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F'), + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [0, 1, 2, 3, 4, 5]) + # 2D reversed C-order + i = nditer(a.reshape(2, 3)[::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [1, 3, 5, 0, 2, 4]) + i = nditer(a.reshape(2, 3)[:, ::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [4, 2, 0, 5, 3, 1]) + i = nditer(a.reshape(2, 3)[::-1, ::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [5, 3, 1, 4, 2, 0]) + # 2D reversed Fortran-order + i = nditer(a.reshape(2, 3).copy(order='F')[::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [1, 0, 3, 2, 5, 4]) + i = nditer(a.reshape(2, 3).copy(order='F')[:, ::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [4, 5, 2, 3, 0, 1]) + i = nditer(a.reshape(2, 3).copy(order='F')[::-1, ::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), [5, 4, 3, 2, 1, 0]) + +def test_iter_best_order_f_index_3d(): + # The Fortran index should be correct with any reordering + + a = arange(12) + # 3D C-order + i = nditer(a.reshape(2, 3, 2), ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [0, 6, 2, 8, 4, 10, 1, 7, 3, 9, 5, 11]) + # 3D Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F'), + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) + # 3D reversed C-order + i = nditer(a.reshape(2, 3, 2)[::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [1, 7, 3, 9, 5, 11, 0, 6, 2, 8, 4, 10]) + i = nditer(a.reshape(2, 3, 2)[:, ::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [4, 10, 2, 8, 0, 6, 5, 11, 3, 9, 1, 7]) + i = nditer(a.reshape(2, 3, 2)[:,:, ::-1], ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [6, 0, 8, 2, 10, 4, 7, 1, 9, 3, 11, 5]) + # 3D reversed Fortran-order + i = nditer(a.reshape(2, 3, 2).copy(order='F')[::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:, ::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [4, 5, 2, 3, 0, 1, 10, 11, 8, 9, 6, 7]) + i = nditer(a.reshape(2, 3, 2).copy(order='F')[:,:, ::-1], + ['f_index'], [['readonly']]) + assert_equal(iter_indices(i), + [6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5]) + +def test_iter_no_inner_full_coalesce(): + # Check no_inner iterators which coalesce into a single inner loop + + for shape in [(5,), (3, 4), (2, 3, 4), (2, 3, 4, 3), (2, 3, 2, 2, 3)]: + size = np.prod(shape) + a = arange(size) + # Test each combination of forward and backwards indexing + for dirs in range(2**len(shape)): + dirs_index = [slice(None)]*len(shape) + for bit in range(len(shape)): + if ((2**bit) & dirs): + dirs_index[bit] = slice(None, None, -1) + dirs_index = tuple(dirs_index) + + aview = a.reshape(shape)[dirs_index] + # C-order + i = nditer(aview, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 1) + assert_equal(i[0].shape, (size,)) + # Fortran-order + i = nditer(aview.T, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 1) + assert_equal(i[0].shape, (size,)) + # Other order + if len(shape) > 2: + i = nditer(aview.swapaxes(0, 1), + ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 1) + assert_equal(i[0].shape, (size,)) + +def test_iter_no_inner_dim_coalescing(): + # Check no_inner iterators whose dimensions may not coalesce completely + + # Skipping the last element in a dimension prevents coalescing + # with the next-bigger dimension + a = arange(24).reshape(2, 3, 4)[:,:, :-1] + i = nditer(a, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 2) + assert_equal(i[0].shape, (3,)) + a = arange(24).reshape(2, 3, 4)[:, :-1,:] + i = nditer(a, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 2) + assert_equal(i[0].shape, (8,)) + a = arange(24).reshape(2, 3, 4)[:-1,:,:] + i = nditer(a, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 1) + assert_equal(i[0].shape, (12,)) + + # Even with lots of 1-sized dimensions, should still coalesce + a = arange(24).reshape(1, 1, 2, 1, 1, 3, 1, 1, 4, 1, 1) + i = nditer(a, ['external_loop'], [['readonly']]) + assert_equal(i.ndim, 1) + assert_equal(i[0].shape, (24,)) + +def test_iter_dim_coalescing(): + # Check that the correct number of dimensions are coalesced + + # Tracking a multi-index disables coalescing + a = arange(24).reshape(2, 3, 4) + i = nditer(a, ['multi_index'], [['readonly']]) + assert_equal(i.ndim, 3) + + # A tracked index can allow coalescing if it's compatible with the array + a3d = arange(24).reshape(2, 3, 4) + i = nditer(a3d, ['c_index'], [['readonly']]) + assert_equal(i.ndim, 1) + i = nditer(a3d.swapaxes(0, 1), ['c_index'], [['readonly']]) + assert_equal(i.ndim, 3) + i = nditer(a3d.T, ['c_index'], [['readonly']]) + assert_equal(i.ndim, 3) + i = nditer(a3d.T, ['f_index'], [['readonly']]) + assert_equal(i.ndim, 1) + i = nditer(a3d.T.swapaxes(0, 1), ['f_index'], [['readonly']]) + assert_equal(i.ndim, 3) + + # When C or F order is forced, coalescing may still occur + a3d = arange(24).reshape(2, 3, 4) + i = nditer(a3d, order='C') + assert_equal(i.ndim, 1) + i = nditer(a3d.T, order='C') + assert_equal(i.ndim, 3) + i = nditer(a3d, order='F') + assert_equal(i.ndim, 3) + i = nditer(a3d.T, order='F') + assert_equal(i.ndim, 1) + i = nditer(a3d, order='A') + assert_equal(i.ndim, 1) + i = nditer(a3d.T, order='A') + assert_equal(i.ndim, 1) + +def test_iter_broadcasting(): + # Standard NumPy broadcasting rules + + # 1D with scalar + i = nditer([arange(6), np.int32(2)], ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 6) + assert_equal(i.shape, (6,)) + + # 2D with scalar + i = nditer([arange(6).reshape(2, 3), np.int32(2)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 6) + assert_equal(i.shape, (2, 3)) + # 2D with 1D + i = nditer([arange(6).reshape(2, 3), arange(3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 6) + assert_equal(i.shape, (2, 3)) + i = nditer([arange(2).reshape(2, 1), arange(3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 6) + assert_equal(i.shape, (2, 3)) + # 2D with 2D + i = nditer([arange(2).reshape(2, 1), arange(3).reshape(1, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 6) + assert_equal(i.shape, (2, 3)) + + # 3D with scalar + i = nditer([np.int32(2), arange(24).reshape(4, 2, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + # 3D with 1D + i = nditer([arange(3), arange(24).reshape(4, 2, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + i = nditer([arange(3), arange(8).reshape(4, 2, 1)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + # 3D with 2D + i = nditer([arange(6).reshape(2, 3), arange(24).reshape(4, 2, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + i = nditer([arange(2).reshape(2, 1), arange(24).reshape(4, 2, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + i = nditer([arange(3).reshape(1, 3), arange(8).reshape(4, 2, 1)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + # 3D with 3D + i = nditer([arange(2).reshape(1, 2, 1), arange(3).reshape(1, 1, 3), + arange(4).reshape(4, 1, 1)], + ['multi_index'], [['readonly']]*3) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + i = nditer([arange(6).reshape(1, 2, 3), arange(4).reshape(4, 1, 1)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + i = nditer([arange(24).reshape(4, 2, 3), arange(12).reshape(4, 1, 3)], + ['multi_index'], [['readonly']]*2) + assert_equal(i.itersize, 24) + assert_equal(i.shape, (4, 2, 3)) + +def test_iter_itershape(): + # Check that allocated outputs work with a specified shape + a = np.arange(6, dtype='i2').reshape(2, 3) + i = nditer([a, None], [], [['readonly'], ['writeonly', 'allocate']], + op_axes=[[0, 1, None], None], + itershape=(-1, -1, 4)) + assert_equal(i.operands[1].shape, (2, 3, 4)) + assert_equal(i.operands[1].strides, (24, 8, 2)) + + i = nditer([a.T, None], [], [['readonly'], ['writeonly', 'allocate']], + op_axes=[[0, 1, None], None], + itershape=(-1, -1, 4)) + assert_equal(i.operands[1].shape, (3, 2, 4)) + assert_equal(i.operands[1].strides, (8, 24, 2)) + + i = nditer([a.T, None], [], [['readonly'], ['writeonly', 'allocate']], + order='F', + op_axes=[[0, 1, None], None], + itershape=(-1, -1, 4)) + assert_equal(i.operands[1].shape, (3, 2, 4)) + assert_equal(i.operands[1].strides, (2, 6, 12)) + + # If we specify 1 in the itershape, it shouldn't allow broadcasting + # of that dimension to a bigger value + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['writeonly', 'allocate']], + op_axes=[[0, 1, None], None], + itershape=(-1, 1, 4)) + # Test bug that for no op_axes but itershape, they are NULLed correctly + i = np.nditer([np.ones(2), None, None], itershape=(2,)) + +def test_iter_broadcasting_errors(): + # Check that errors are thrown for bad broadcasting shapes + + # 1D with 1D + assert_raises(ValueError, nditer, [arange(2), arange(3)], + [], [['readonly']]*2) + # 2D with 1D + assert_raises(ValueError, nditer, + [arange(6).reshape(2, 3), arange(2)], + [], [['readonly']]*2) + # 2D with 2D + assert_raises(ValueError, nditer, + [arange(6).reshape(2, 3), arange(9).reshape(3, 3)], + [], [['readonly']]*2) + assert_raises(ValueError, nditer, + [arange(6).reshape(2, 3), arange(4).reshape(2, 2)], + [], [['readonly']]*2) + # 3D with 3D + assert_raises(ValueError, nditer, + [arange(36).reshape(3, 3, 4), arange(24).reshape(2, 3, 4)], + [], [['readonly']]*2) + assert_raises(ValueError, nditer, + [arange(8).reshape(2, 4, 1), arange(24).reshape(2, 3, 4)], + [], [['readonly']]*2) + + # Verify that the error message mentions the right shapes + try: + nditer([arange(2).reshape(1, 2, 1), + arange(3).reshape(1, 3), + arange(6).reshape(2, 3)], + [], + [['readonly'], ['readonly'], ['writeonly', 'no_broadcast']]) + raise AssertionError('Should have raised a broadcast error') + except ValueError as e: + msg = str(e) + # The message should contain the shape of the 3rd operand + assert_(msg.find('(2,3)') >= 0, + 'Message "%s" doesn\'t contain operand shape (2,3)' % msg) + # The message should contain the broadcast shape + assert_(msg.find('(1,2,3)') >= 0, + 'Message "%s" doesn\'t contain broadcast shape (1,2,3)' % msg) + + try: + nditer([arange(6).reshape(2, 3), arange(2)], + [], + [['readonly'], ['readonly']], + op_axes=[[0, 1], [0, np.newaxis]], + itershape=(4, 3)) + raise AssertionError('Should have raised a broadcast error') + except ValueError as e: + msg = str(e) + # The message should contain "shape->remappedshape" for each operand + assert_(msg.find('(2,3)->(2,3)') >= 0, + 'Message "%s" doesn\'t contain operand shape (2,3)->(2,3)' % msg) + assert_(msg.find('(2,)->(2,newaxis)') >= 0, + ('Message "%s" doesn\'t contain remapped operand shape' + + '(2,)->(2,newaxis)') % msg) + # The message should contain the itershape parameter + assert_(msg.find('(4,3)') >= 0, + 'Message "%s" doesn\'t contain itershape parameter (4,3)' % msg) + + try: + nditer([np.zeros((2, 1, 1)), np.zeros((2,))], + [], + [['writeonly', 'no_broadcast'], ['readonly']]) + raise AssertionError('Should have raised a broadcast error') + except ValueError as e: + msg = str(e) + # The message should contain the shape of the bad operand + assert_(msg.find('(2,1,1)') >= 0, + 'Message "%s" doesn\'t contain operand shape (2,1,1)' % msg) + # The message should contain the broadcast shape + assert_(msg.find('(2,1,2)') >= 0, + 'Message "%s" doesn\'t contain the broadcast shape (2,1,2)' % msg) + +def test_iter_flags_errors(): + # Check that bad combinations of flags produce errors + + a = arange(6) + + # Not enough operands + assert_raises(ValueError, nditer, [], [], []) + # Too many operands + assert_raises(ValueError, nditer, [a]*100, [], [['readonly']]*100) + # Bad global flag + assert_raises(ValueError, nditer, [a], ['bad flag'], [['readonly']]) + # Bad op flag + assert_raises(ValueError, nditer, [a], [], [['readonly', 'bad flag']]) + # Bad order parameter + assert_raises(ValueError, nditer, [a], [], [['readonly']], order='G') + # Bad casting parameter + assert_raises(ValueError, nditer, [a], [], [['readonly']], casting='noon') + # op_flags must match ops + assert_raises(ValueError, nditer, [a]*3, [], [['readonly']]*2) + # Cannot track both a C and an F index + assert_raises(ValueError, nditer, a, + ['c_index', 'f_index'], [['readonly']]) + # Inner iteration and multi-indices/indices are incompatible + assert_raises(ValueError, nditer, a, + ['external_loop', 'multi_index'], [['readonly']]) + assert_raises(ValueError, nditer, a, + ['external_loop', 'c_index'], [['readonly']]) + assert_raises(ValueError, nditer, a, + ['external_loop', 'f_index'], [['readonly']]) + # Must specify exactly one of readwrite/readonly/writeonly per operand + assert_raises(ValueError, nditer, a, [], [[]]) + assert_raises(ValueError, nditer, a, [], [['readonly', 'writeonly']]) + assert_raises(ValueError, nditer, a, [], [['readonly', 'readwrite']]) + assert_raises(ValueError, nditer, a, [], [['writeonly', 'readwrite']]) + assert_raises(ValueError, nditer, a, + [], [['readonly', 'writeonly', 'readwrite']]) + # Python scalars are always readonly + assert_raises(TypeError, nditer, 1.5, [], [['writeonly']]) + assert_raises(TypeError, nditer, 1.5, [], [['readwrite']]) + # Array scalars are always readonly + assert_raises(TypeError, nditer, np.int32(1), [], [['writeonly']]) + assert_raises(TypeError, nditer, np.int32(1), [], [['readwrite']]) + # Check readonly array + a.flags.writeable = False + assert_raises(ValueError, nditer, a, [], [['writeonly']]) + assert_raises(ValueError, nditer, a, [], [['readwrite']]) + a.flags.writeable = True + # Multi-indices available only with the multi_index flag + i = nditer(arange(6), [], [['readonly']]) + assert_raises(ValueError, lambda i:i.multi_index, i) + # Index available only with an index flag + assert_raises(ValueError, lambda i:i.index, i) + # GotoCoords and GotoIndex incompatible with buffering or no_inner + + def assign_multi_index(i): + i.multi_index = (0,) + + def assign_index(i): + i.index = 0 + + def assign_iterindex(i): + i.iterindex = 0 + + def assign_iterrange(i): + i.iterrange = (0, 1) + i = nditer(arange(6), ['external_loop']) + assert_raises(ValueError, assign_multi_index, i) + assert_raises(ValueError, assign_index, i) + assert_raises(ValueError, assign_iterindex, i) + assert_raises(ValueError, assign_iterrange, i) + i = nditer(arange(6), ['buffered']) + assert_raises(ValueError, assign_multi_index, i) + assert_raises(ValueError, assign_index, i) + assert_raises(ValueError, assign_iterrange, i) + # Can't iterate if size is zero + assert_raises(ValueError, nditer, np.array([])) + +def test_iter_slice(): + a, b, c = np.arange(3), np.arange(3), np.arange(3.) + i = nditer([a, b, c], [], ['readwrite']) + with i: + i[0:2] = (3, 3) + assert_equal(a, [3, 1, 2]) + assert_equal(b, [3, 1, 2]) + assert_equal(c, [0, 1, 2]) + i[1] = 12 + assert_equal(i[0:2], [3, 12]) + +def test_iter_assign_mapping(): + a = np.arange(24, dtype='f8').reshape(2, 3, 4).T + it = np.nditer(a, [], [['readwrite', 'updateifcopy']], + casting='same_kind', op_dtypes=[np.dtype('f4')]) + with it: + it.operands[0][...] = 3 + it.operands[0][...] = 14 + assert_equal(a, 14) + it = np.nditer(a, [], [['readwrite', 'updateifcopy']], + casting='same_kind', op_dtypes=[np.dtype('f4')]) + with it: + x = it.operands[0][-1:1] + x[...] = 14 + it.operands[0][...] = -1234 + assert_equal(a, -1234) + # check for no warnings on dealloc + x = None + it = None + +def test_iter_nbo_align_contig(): + # Check that byte order, alignment, and contig changes work + + # Byte order change by requesting a specific dtype + a = np.arange(6, dtype='f4') + au = a.byteswap().newbyteorder() + assert_(a.dtype.byteorder != au.dtype.byteorder) + i = nditer(au, [], [['readwrite', 'updateifcopy']], + casting='equiv', + op_dtypes=[np.dtype('f4')]) + with i: + # context manager triggers UPDATEIFCOPY on i at exit + assert_equal(i.dtypes[0].byteorder, a.dtype.byteorder) + assert_equal(i.operands[0].dtype.byteorder, a.dtype.byteorder) + assert_equal(i.operands[0], a) + i.operands[0][:] = 2 + assert_equal(au, [2]*6) + del i # should not raise a warning + # Byte order change by requesting NBO + a = np.arange(6, dtype='f4') + au = a.byteswap().newbyteorder() + assert_(a.dtype.byteorder != au.dtype.byteorder) + with nditer(au, [], [['readwrite', 'updateifcopy', 'nbo']], + casting='equiv') as i: + # context manager triggers UPDATEIFCOPY on i at exit + assert_equal(i.dtypes[0].byteorder, a.dtype.byteorder) + assert_equal(i.operands[0].dtype.byteorder, a.dtype.byteorder) + assert_equal(i.operands[0], a) + i.operands[0][:] = 12345 + i.operands[0][:] = 2 + assert_equal(au, [2]*6) + + # Unaligned input + a = np.zeros((6*4+1,), dtype='i1')[1:] + a.dtype = 'f4' + a[:] = np.arange(6, dtype='f4') + assert_(not a.flags.aligned) + # Without 'aligned', shouldn't copy + i = nditer(a, [], [['readonly']]) + assert_(not i.operands[0].flags.aligned) + assert_equal(i.operands[0], a) + # With 'aligned', should make a copy + with nditer(a, [], [['readwrite', 'updateifcopy', 'aligned']]) as i: + assert_(i.operands[0].flags.aligned) + # context manager triggers UPDATEIFCOPY on i at exit + assert_equal(i.operands[0], a) + i.operands[0][:] = 3 + assert_equal(a, [3]*6) + + # Discontiguous input + a = arange(12) + # If it is contiguous, shouldn't copy + i = nditer(a[:6], [], [['readonly']]) + assert_(i.operands[0].flags.contiguous) + assert_equal(i.operands[0], a[:6]) + # If it isn't contiguous, should buffer + i = nditer(a[::2], ['buffered', 'external_loop'], + [['readonly', 'contig']], + buffersize=10) + assert_(i[0].flags.contiguous) + assert_equal(i[0], a[::2]) + +def test_iter_array_cast(): + # Check that arrays are cast as requested + + # No cast 'f4' -> 'f4' + a = np.arange(6, dtype='f4').reshape(2, 3) + i = nditer(a, [], [['readwrite']], op_dtypes=[np.dtype('f4')]) + with i: + assert_equal(i.operands[0], a) + assert_equal(i.operands[0].dtype, np.dtype('f4')) + + # Byte-order cast '<f4' -> '>f4' + a = np.arange(6, dtype='<f4').reshape(2, 3) + with nditer(a, [], [['readwrite', 'updateifcopy']], + casting='equiv', + op_dtypes=[np.dtype('>f4')]) as i: + assert_equal(i.operands[0], a) + assert_equal(i.operands[0].dtype, np.dtype('>f4')) + + # Safe case 'f4' -> 'f8' + a = np.arange(24, dtype='f4').reshape(2, 3, 4).swapaxes(1, 2) + i = nditer(a, [], [['readonly', 'copy']], + casting='safe', + op_dtypes=[np.dtype('f8')]) + assert_equal(i.operands[0], a) + assert_equal(i.operands[0].dtype, np.dtype('f8')) + # The memory layout of the temporary should match a (a is (48,4,16)) + # except negative strides get flipped to positive strides. + assert_equal(i.operands[0].strides, (96, 8, 32)) + a = a[::-1,:, ::-1] + i = nditer(a, [], [['readonly', 'copy']], + casting='safe', + op_dtypes=[np.dtype('f8')]) + assert_equal(i.operands[0], a) + assert_equal(i.operands[0].dtype, np.dtype('f8')) + assert_equal(i.operands[0].strides, (96, 8, 32)) + + # Same-kind cast 'f8' -> 'f4' -> 'f8' + a = np.arange(24, dtype='f8').reshape(2, 3, 4).T + with nditer(a, [], + [['readwrite', 'updateifcopy']], + casting='same_kind', + op_dtypes=[np.dtype('f4')]) as i: + assert_equal(i.operands[0], a) + assert_equal(i.operands[0].dtype, np.dtype('f4')) + assert_equal(i.operands[0].strides, (4, 16, 48)) + # Check that WRITEBACKIFCOPY is activated at exit + i.operands[0][2, 1, 1] = -12.5 + assert_(a[2, 1, 1] != -12.5) + assert_equal(a[2, 1, 1], -12.5) + + a = np.arange(6, dtype='i4')[::-2] + with nditer(a, [], + [['writeonly', 'updateifcopy']], + casting='unsafe', + op_dtypes=[np.dtype('f4')]) as i: + assert_equal(i.operands[0].dtype, np.dtype('f4')) + # Even though the stride was negative in 'a', it + # becomes positive in the temporary + assert_equal(i.operands[0].strides, (4,)) + i.operands[0][:] = [1, 2, 3] + assert_equal(a, [1, 2, 3]) + +def test_iter_array_cast_errors(): + # Check that invalid casts are caught + + # Need to enable copying for casts to occur + assert_raises(TypeError, nditer, arange(2, dtype='f4'), [], + [['readonly']], op_dtypes=[np.dtype('f8')]) + # Also need to allow casting for casts to occur + assert_raises(TypeError, nditer, arange(2, dtype='f4'), [], + [['readonly', 'copy']], casting='no', + op_dtypes=[np.dtype('f8')]) + assert_raises(TypeError, nditer, arange(2, dtype='f4'), [], + [['readonly', 'copy']], casting='equiv', + op_dtypes=[np.dtype('f8')]) + assert_raises(TypeError, nditer, arange(2, dtype='f8'), [], + [['writeonly', 'updateifcopy']], + casting='no', + op_dtypes=[np.dtype('f4')]) + assert_raises(TypeError, nditer, arange(2, dtype='f8'), [], + [['writeonly', 'updateifcopy']], + casting='equiv', + op_dtypes=[np.dtype('f4')]) + # '<f4' -> '>f4' should not work with casting='no' + assert_raises(TypeError, nditer, arange(2, dtype='<f4'), [], + [['readonly', 'copy']], casting='no', + op_dtypes=[np.dtype('>f4')]) + # 'f4' -> 'f8' is a safe cast, but 'f8' -> 'f4' isn't + assert_raises(TypeError, nditer, arange(2, dtype='f4'), [], + [['readwrite', 'updateifcopy']], + casting='safe', + op_dtypes=[np.dtype('f8')]) + assert_raises(TypeError, nditer, arange(2, dtype='f8'), [], + [['readwrite', 'updateifcopy']], + casting='safe', + op_dtypes=[np.dtype('f4')]) + # 'f4' -> 'i4' is neither a safe nor a same-kind cast + assert_raises(TypeError, nditer, arange(2, dtype='f4'), [], + [['readonly', 'copy']], + casting='same_kind', + op_dtypes=[np.dtype('i4')]) + assert_raises(TypeError, nditer, arange(2, dtype='i4'), [], + [['writeonly', 'updateifcopy']], + casting='same_kind', + op_dtypes=[np.dtype('f4')]) + +def test_iter_scalar_cast(): + # Check that scalars are cast as requested + + # No cast 'f4' -> 'f4' + i = nditer(np.float32(2.5), [], [['readonly']], + op_dtypes=[np.dtype('f4')]) + assert_equal(i.dtypes[0], np.dtype('f4')) + assert_equal(i.value.dtype, np.dtype('f4')) + assert_equal(i.value, 2.5) + # Safe cast 'f4' -> 'f8' + i = nditer(np.float32(2.5), [], + [['readonly', 'copy']], + casting='safe', + op_dtypes=[np.dtype('f8')]) + assert_equal(i.dtypes[0], np.dtype('f8')) + assert_equal(i.value.dtype, np.dtype('f8')) + assert_equal(i.value, 2.5) + # Same-kind cast 'f8' -> 'f4' + i = nditer(np.float64(2.5), [], + [['readonly', 'copy']], + casting='same_kind', + op_dtypes=[np.dtype('f4')]) + assert_equal(i.dtypes[0], np.dtype('f4')) + assert_equal(i.value.dtype, np.dtype('f4')) + assert_equal(i.value, 2.5) + # Unsafe cast 'f8' -> 'i4' + i = nditer(np.float64(3.0), [], + [['readonly', 'copy']], + casting='unsafe', + op_dtypes=[np.dtype('i4')]) + assert_equal(i.dtypes[0], np.dtype('i4')) + assert_equal(i.value.dtype, np.dtype('i4')) + assert_equal(i.value, 3) + # Readonly scalars may be cast even without setting COPY or BUFFERED + i = nditer(3, [], [['readonly']], op_dtypes=[np.dtype('f8')]) + assert_equal(i[0].dtype, np.dtype('f8')) + assert_equal(i[0], 3.) + +def test_iter_scalar_cast_errors(): + # Check that invalid casts are caught + + # Need to allow copying/buffering for write casts of scalars to occur + assert_raises(TypeError, nditer, np.float32(2), [], + [['readwrite']], op_dtypes=[np.dtype('f8')]) + assert_raises(TypeError, nditer, 2.5, [], + [['readwrite']], op_dtypes=[np.dtype('f4')]) + # 'f8' -> 'f4' isn't a safe cast if the value would overflow + assert_raises(TypeError, nditer, np.float64(1e60), [], + [['readonly']], + casting='safe', + op_dtypes=[np.dtype('f4')]) + # 'f4' -> 'i4' is neither a safe nor a same-kind cast + assert_raises(TypeError, nditer, np.float32(2), [], + [['readonly']], + casting='same_kind', + op_dtypes=[np.dtype('i4')]) + +def test_iter_object_arrays_basic(): + # Check that object arrays work + + obj = {'a':3,'b':'d'} + a = np.array([[1, 2, 3], None, obj, None], dtype='O') + if HAS_REFCOUNT: + rc = sys.getrefcount(obj) + + # Need to allow references for object arrays + assert_raises(TypeError, nditer, a) + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(obj), rc) + + i = nditer(a, ['refs_ok'], ['readonly']) + vals = [x_[()] for x_ in i] + assert_equal(np.array(vals, dtype='O'), a) + vals, i, x = [None]*3 + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(obj), rc) + + i = nditer(a.reshape(2, 2).T, ['refs_ok', 'buffered'], + ['readonly'], order='C') + assert_(i.iterationneedsapi) + vals = [x_[()] for x_ in i] + assert_equal(np.array(vals, dtype='O'), a.reshape(2, 2).ravel(order='F')) + vals, i, x = [None]*3 + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(obj), rc) + + i = nditer(a.reshape(2, 2).T, ['refs_ok', 'buffered'], + ['readwrite'], order='C') + with i: + for x in i: + x[...] = None + vals, i, x = [None]*3 + if HAS_REFCOUNT: + assert_(sys.getrefcount(obj) == rc-1) + assert_equal(a, np.array([None]*4, dtype='O')) + +def test_iter_object_arrays_conversions(): + # Conversions to/from objects + a = np.arange(6, dtype='O') + i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'], + casting='unsafe', op_dtypes='i4') + with i: + for x in i: + x[...] += 1 + assert_equal(a, np.arange(6)+1) + + a = np.arange(6, dtype='i4') + i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'], + casting='unsafe', op_dtypes='O') + with i: + for x in i: + x[...] += 1 + assert_equal(a, np.arange(6)+1) + + # Non-contiguous object array + a = np.zeros((6,), dtype=[('p', 'i1'), ('a', 'O')]) + a = a['a'] + a[:] = np.arange(6) + i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'], + casting='unsafe', op_dtypes='i4') + with i: + for x in i: + x[...] += 1 + assert_equal(a, np.arange(6)+1) + + #Non-contiguous value array + a = np.zeros((6,), dtype=[('p', 'i1'), ('a', 'i4')]) + a = a['a'] + a[:] = np.arange(6) + 98172488 + i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'], + casting='unsafe', op_dtypes='O') + with i: + ob = i[0][()] + if HAS_REFCOUNT: + rc = sys.getrefcount(ob) + for x in i: + x[...] += 1 + if HAS_REFCOUNT: + assert_(sys.getrefcount(ob) == rc-1) + assert_equal(a, np.arange(6)+98172489) + +def test_iter_common_dtype(): + # Check that the iterator finds a common data type correctly + + i = nditer([array([3], dtype='f4'), array([0], dtype='f8')], + ['common_dtype'], + [['readonly', 'copy']]*2, + casting='safe') + assert_equal(i.dtypes[0], np.dtype('f8')) + assert_equal(i.dtypes[1], np.dtype('f8')) + i = nditer([array([3], dtype='i4'), array([0], dtype='f4')], + ['common_dtype'], + [['readonly', 'copy']]*2, + casting='safe') + assert_equal(i.dtypes[0], np.dtype('f8')) + assert_equal(i.dtypes[1], np.dtype('f8')) + i = nditer([array([3], dtype='f4'), array(0, dtype='f8')], + ['common_dtype'], + [['readonly', 'copy']]*2, + casting='same_kind') + assert_equal(i.dtypes[0], np.dtype('f4')) + assert_equal(i.dtypes[1], np.dtype('f4')) + i = nditer([array([3], dtype='u4'), array(0, dtype='i4')], + ['common_dtype'], + [['readonly', 'copy']]*2, + casting='safe') + assert_equal(i.dtypes[0], np.dtype('u4')) + assert_equal(i.dtypes[1], np.dtype('u4')) + i = nditer([array([3], dtype='u4'), array(-12, dtype='i4')], + ['common_dtype'], + [['readonly', 'copy']]*2, + casting='safe') + assert_equal(i.dtypes[0], np.dtype('i8')) + assert_equal(i.dtypes[1], np.dtype('i8')) + i = nditer([array([3], dtype='u4'), array(-12, dtype='i4'), + array([2j], dtype='c8'), array([9], dtype='f8')], + ['common_dtype'], + [['readonly', 'copy']]*4, + casting='safe') + assert_equal(i.dtypes[0], np.dtype('c16')) + assert_equal(i.dtypes[1], np.dtype('c16')) + assert_equal(i.dtypes[2], np.dtype('c16')) + assert_equal(i.dtypes[3], np.dtype('c16')) + assert_equal(i.value, (3, -12, 2j, 9)) + + # When allocating outputs, other outputs aren't factored in + i = nditer([array([3], dtype='i4'), None, array([2j], dtype='c16')], [], + [['readonly', 'copy'], + ['writeonly', 'allocate'], + ['writeonly']], + casting='safe') + assert_equal(i.dtypes[0], np.dtype('i4')) + assert_equal(i.dtypes[1], np.dtype('i4')) + assert_equal(i.dtypes[2], np.dtype('c16')) + # But, if common data types are requested, they are + i = nditer([array([3], dtype='i4'), None, array([2j], dtype='c16')], + ['common_dtype'], + [['readonly', 'copy'], + ['writeonly', 'allocate'], + ['writeonly']], + casting='safe') + assert_equal(i.dtypes[0], np.dtype('c16')) + assert_equal(i.dtypes[1], np.dtype('c16')) + assert_equal(i.dtypes[2], np.dtype('c16')) + +def test_iter_copy_if_overlap(): + # Ensure the iterator makes copies on read/write overlap, if requested + + # Copy not needed, 1 op + for flag in ['readonly', 'writeonly', 'readwrite']: + a = arange(10) + i = nditer([a], ['copy_if_overlap'], [[flag]]) + with i: + assert_(i.operands[0] is a) + + # Copy needed, 2 ops, read-write overlap + x = arange(10) + a = x[1:] + b = x[:-1] + with nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']]) as i: + assert_(not np.shares_memory(*i.operands)) + + # Copy not needed with elementwise, 2 ops, exactly same arrays + x = arange(10) + a = x + b = x + i = nditer([a, b], ['copy_if_overlap'], [['readonly', 'overlap_assume_elementwise'], + ['readwrite', 'overlap_assume_elementwise']]) + with i: + assert_(i.operands[0] is a and i.operands[1] is b) + with nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']]) as i: + assert_(i.operands[0] is a and not np.shares_memory(i.operands[1], b)) + + # Copy not needed, 2 ops, no overlap + x = arange(10) + a = x[::2] + b = x[1::2] + i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['writeonly']]) + assert_(i.operands[0] is a and i.operands[1] is b) + + # Copy needed, 2 ops, read-write overlap + x = arange(4, dtype=np.int8) + a = x[3:] + b = x.view(np.int32)[:1] + with nditer([a, b], ['copy_if_overlap'], [['readonly'], ['writeonly']]) as i: + assert_(not np.shares_memory(*i.operands)) + + # Copy needed, 3 ops, read-write overlap + for flag in ['writeonly', 'readwrite']: + x = np.ones([10, 10]) + a = x + b = x.T + c = x + with nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['readonly'], [flag]]) as i: + a2, b2, c2 = i.operands + assert_(not np.shares_memory(a2, c2)) + assert_(not np.shares_memory(b2, c2)) + + # Copy not needed, 3 ops, read-only overlap + x = np.ones([10, 10]) + a = x + b = x.T + c = x + i = nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['readonly'], ['readonly']]) + a2, b2, c2 = i.operands + assert_(a is a2) + assert_(b is b2) + assert_(c is c2) + + # Copy not needed, 3 ops, read-only overlap + x = np.ones([10, 10]) + a = x + b = np.ones([10, 10]) + c = x.T + i = nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['writeonly'], ['readonly']]) + a2, b2, c2 = i.operands + assert_(a is a2) + assert_(b is b2) + assert_(c is c2) + + # Copy not needed, 3 ops, write-only overlap + x = np.arange(7) + a = x[:3] + b = x[3:6] + c = x[4:7] + i = nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['writeonly'], ['writeonly']]) + a2, b2, c2 = i.operands + assert_(a is a2) + assert_(b is b2) + assert_(c is c2) + +def test_iter_op_axes(): + # Check that custom axes work + + # Reverse the axes + a = arange(6).reshape(2, 3) + i = nditer([a, a.T], [], [['readonly']]*2, op_axes=[[0, 1], [1, 0]]) + assert_(all([x == y for (x, y) in i])) + a = arange(24).reshape(2, 3, 4) + i = nditer([a.T, a], [], [['readonly']]*2, op_axes=[[2, 1, 0], None]) + assert_(all([x == y for (x, y) in i])) + + # Broadcast 1D to any dimension + a = arange(1, 31).reshape(2, 3, 5) + b = arange(1, 3) + i = nditer([a, b], [], [['readonly']]*2, op_axes=[None, [0, -1, -1]]) + assert_equal([x*y for (x, y) in i], (a*b.reshape(2, 1, 1)).ravel()) + b = arange(1, 4) + i = nditer([a, b], [], [['readonly']]*2, op_axes=[None, [-1, 0, -1]]) + assert_equal([x*y for (x, y) in i], (a*b.reshape(1, 3, 1)).ravel()) + b = arange(1, 6) + i = nditer([a, b], [], [['readonly']]*2, + op_axes=[None, [np.newaxis, np.newaxis, 0]]) + assert_equal([x*y for (x, y) in i], (a*b.reshape(1, 1, 5)).ravel()) + + # Inner product-style broadcasting + a = arange(24).reshape(2, 3, 4) + b = arange(40).reshape(5, 2, 4) + i = nditer([a, b], ['multi_index'], [['readonly']]*2, + op_axes=[[0, 1, -1, -1], [-1, -1, 0, 1]]) + assert_equal(i.shape, (2, 3, 5, 2)) + + # Matrix product-style broadcasting + a = arange(12).reshape(3, 4) + b = arange(20).reshape(4, 5) + i = nditer([a, b], ['multi_index'], [['readonly']]*2, + op_axes=[[0, -1], [-1, 1]]) + assert_equal(i.shape, (3, 5)) + +def test_iter_op_axes_errors(): + # Check that custom axes throws errors for bad inputs + + # Wrong number of items in op_axes + a = arange(6).reshape(2, 3) + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0], [1], [0]]) + # Out of bounds items in op_axes + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[2, 1], [0, 1]]) + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0, 1], [2, -1]]) + # Duplicate items in op_axes + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0, 0], [0, 1]]) + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0, 1], [1, 1]]) + + # Different sized arrays in op_axes + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0, 1], [0, 1, 0]]) + + # Non-broadcastable dimensions in the result + assert_raises(ValueError, nditer, [a, a], [], [['readonly']]*2, + op_axes=[[0, 1], [1, 0]]) + +def test_iter_copy(): + # Check that copying the iterator works correctly + a = arange(24).reshape(2, 3, 4) + + # Simple iterator + i = nditer(a) + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + i.iterindex = 3 + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + # Buffered iterator + i = nditer(a, ['buffered', 'ranged'], order='F', buffersize=3) + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + i.iterindex = 3 + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + i.iterrange = (3, 9) + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + i.iterrange = (2, 18) + next(i) + next(i) + j = i.copy() + assert_equal([x[()] for x in i], [x[()] for x in j]) + + # Casting iterator + with nditer(a, ['buffered'], order='F', casting='unsafe', + op_dtypes='f8', buffersize=5) as i: + j = i.copy() + assert_equal([x[()] for x in j], a.ravel(order='F')) + + a = arange(24, dtype='<i4').reshape(2, 3, 4) + with nditer(a, ['buffered'], order='F', casting='unsafe', + op_dtypes='>f8', buffersize=5) as i: + j = i.copy() + assert_equal([x[()] for x in j], a.ravel(order='F')) + +def test_iter_allocate_output_simple(): + # Check that the iterator will properly allocate outputs + + # Simple case + a = arange(6) + i = nditer([a, None], [], [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')]) + assert_equal(i.operands[1].shape, a.shape) + assert_equal(i.operands[1].dtype, np.dtype('f4')) + +def test_iter_allocate_output_buffered_readwrite(): + # Allocated output with buffering + delay_bufalloc + + a = arange(6) + i = nditer([a, None], ['buffered', 'delay_bufalloc'], + [['readonly'], ['allocate', 'readwrite']]) + with i: + i.operands[1][:] = 1 + i.reset() + for x in i: + x[1][...] += x[0][...] + assert_equal(i.operands[1], a+1) + +def test_iter_allocate_output_itorder(): + # The allocated output should match the iteration order + + # C-order input, best iteration order + a = arange(6, dtype='i4').reshape(2, 3) + i = nditer([a, None], [], [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')]) + assert_equal(i.operands[1].shape, a.shape) + assert_equal(i.operands[1].strides, a.strides) + assert_equal(i.operands[1].dtype, np.dtype('f4')) + # F-order input, best iteration order + a = arange(24, dtype='i4').reshape(2, 3, 4).T + i = nditer([a, None], [], [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')]) + assert_equal(i.operands[1].shape, a.shape) + assert_equal(i.operands[1].strides, a.strides) + assert_equal(i.operands[1].dtype, np.dtype('f4')) + # Non-contiguous input, C iteration order + a = arange(24, dtype='i4').reshape(2, 3, 4).swapaxes(0, 1) + i = nditer([a, None], [], + [['readonly'], ['writeonly', 'allocate']], + order='C', + op_dtypes=[None, np.dtype('f4')]) + assert_equal(i.operands[1].shape, a.shape) + assert_equal(i.operands[1].strides, (32, 16, 4)) + assert_equal(i.operands[1].dtype, np.dtype('f4')) + +def test_iter_allocate_output_opaxes(): + # Specifying op_axes should work + + a = arange(24, dtype='i4').reshape(2, 3, 4) + i = nditer([None, a], [], [['writeonly', 'allocate'], ['readonly']], + op_dtypes=[np.dtype('u4'), None], + op_axes=[[1, 2, 0], None]) + assert_equal(i.operands[0].shape, (4, 2, 3)) + assert_equal(i.operands[0].strides, (4, 48, 16)) + assert_equal(i.operands[0].dtype, np.dtype('u4')) + +def test_iter_allocate_output_types_promotion(): + # Check type promotion of automatic outputs + + i = nditer([array([3], dtype='f4'), array([0], dtype='f8'), None], [], + [['readonly']]*2+[['writeonly', 'allocate']]) + assert_equal(i.dtypes[2], np.dtype('f8')) + i = nditer([array([3], dtype='i4'), array([0], dtype='f4'), None], [], + [['readonly']]*2+[['writeonly', 'allocate']]) + assert_equal(i.dtypes[2], np.dtype('f8')) + i = nditer([array([3], dtype='f4'), array(0, dtype='f8'), None], [], + [['readonly']]*2+[['writeonly', 'allocate']]) + assert_equal(i.dtypes[2], np.dtype('f4')) + i = nditer([array([3], dtype='u4'), array(0, dtype='i4'), None], [], + [['readonly']]*2+[['writeonly', 'allocate']]) + assert_equal(i.dtypes[2], np.dtype('u4')) + i = nditer([array([3], dtype='u4'), array(-12, dtype='i4'), None], [], + [['readonly']]*2+[['writeonly', 'allocate']]) + assert_equal(i.dtypes[2], np.dtype('i8')) + +def test_iter_allocate_output_types_byte_order(): + # Verify the rules for byte order changes + + # When there's just one input, the output type exactly matches + a = array([3], dtype='u4').newbyteorder() + i = nditer([a, None], [], + [['readonly'], ['writeonly', 'allocate']]) + assert_equal(i.dtypes[0], i.dtypes[1]) + # With two or more inputs, the output type is in native byte order + i = nditer([a, a, None], [], + [['readonly'], ['readonly'], ['writeonly', 'allocate']]) + assert_(i.dtypes[0] != i.dtypes[2]) + assert_equal(i.dtypes[0].newbyteorder('='), i.dtypes[2]) + +def test_iter_allocate_output_types_scalar(): + # If the inputs are all scalars, the output should be a scalar + + i = nditer([None, 1, 2.3, np.float32(12), np.complex128(3)], [], + [['writeonly', 'allocate']] + [['readonly']]*4) + assert_equal(i.operands[0].dtype, np.dtype('complex128')) + assert_equal(i.operands[0].ndim, 0) + +def test_iter_allocate_output_subtype(): + # Make sure that the subtype with priority wins + class MyNDArray(np.ndarray): + __array_priority__ = 15 + + # subclass vs ndarray + a = np.array([[1, 2], [3, 4]]).view(MyNDArray) + b = np.arange(4).reshape(2, 2).T + i = nditer([a, b, None], [], + [['readonly'], ['readonly'], ['writeonly', 'allocate']]) + assert_equal(type(a), type(i.operands[2])) + assert_(type(b) is not type(i.operands[2])) + assert_equal(i.operands[2].shape, (2, 2)) + + # If subtypes are disabled, we should get back an ndarray. + i = nditer([a, b, None], [], + [['readonly'], ['readonly'], + ['writeonly', 'allocate', 'no_subtype']]) + assert_equal(type(b), type(i.operands[2])) + assert_(type(a) is not type(i.operands[2])) + assert_equal(i.operands[2].shape, (2, 2)) + +def test_iter_allocate_output_errors(): + # Check that the iterator will throw errors for bad output allocations + + # Need an input if no output data type is specified + a = arange(6) + assert_raises(TypeError, nditer, [a, None], [], + [['writeonly'], ['writeonly', 'allocate']]) + # Allocated output should be flagged for writing + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['allocate', 'readonly']]) + # Allocated output can't have buffering without delayed bufalloc + assert_raises(ValueError, nditer, [a, None], ['buffered'], + ['allocate', 'readwrite']) + # Must specify at least one input + assert_raises(ValueError, nditer, [None, None], [], + [['writeonly', 'allocate'], + ['writeonly', 'allocate']], + op_dtypes=[np.dtype('f4'), np.dtype('f4')]) + # If using op_axes, must specify all the axes + a = arange(24, dtype='i4').reshape(2, 3, 4) + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')], + op_axes=[None, [0, np.newaxis, 1]]) + # If using op_axes, the axes must be within bounds + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')], + op_axes=[None, [0, 3, 1]]) + # If using op_axes, there can't be duplicates + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['writeonly', 'allocate']], + op_dtypes=[None, np.dtype('f4')], + op_axes=[None, [0, 2, 1, 0]]) + +def test_iter_remove_axis(): + a = arange(24).reshape(2, 3, 4) + + i = nditer(a, ['multi_index']) + i.remove_axis(1) + assert_equal([x for x in i], a[:, 0,:].ravel()) + + a = a[::-1,:,:] + i = nditer(a, ['multi_index']) + i.remove_axis(0) + assert_equal([x for x in i], a[0,:,:].ravel()) + +def test_iter_remove_multi_index_inner_loop(): + # Check that removing multi-index support works + + a = arange(24).reshape(2, 3, 4) + + i = nditer(a, ['multi_index']) + assert_equal(i.ndim, 3) + assert_equal(i.shape, (2, 3, 4)) + assert_equal(i.itviews[0].shape, (2, 3, 4)) + + # Removing the multi-index tracking causes all dimensions to coalesce + before = [x for x in i] + i.remove_multi_index() + after = [x for x in i] + + assert_equal(before, after) + assert_equal(i.ndim, 1) + assert_raises(ValueError, lambda i:i.shape, i) + assert_equal(i.itviews[0].shape, (24,)) + + # Removing the inner loop means there's just one iteration + i.reset() + assert_equal(i.itersize, 24) + assert_equal(i[0].shape, tuple()) + i.enable_external_loop() + assert_equal(i.itersize, 24) + assert_equal(i[0].shape, (24,)) + assert_equal(i.value, arange(24)) + +def test_iter_iterindex(): + # Make sure iterindex works + + buffersize = 5 + a = arange(24).reshape(4, 3, 2) + for flags in ([], ['buffered']): + i = nditer(a, flags, buffersize=buffersize) + assert_equal(iter_iterindices(i), list(range(24))) + i.iterindex = 2 + assert_equal(iter_iterindices(i), list(range(2, 24))) + + i = nditer(a, flags, order='F', buffersize=buffersize) + assert_equal(iter_iterindices(i), list(range(24))) + i.iterindex = 5 + assert_equal(iter_iterindices(i), list(range(5, 24))) + + i = nditer(a[::-1], flags, order='F', buffersize=buffersize) + assert_equal(iter_iterindices(i), list(range(24))) + i.iterindex = 9 + assert_equal(iter_iterindices(i), list(range(9, 24))) + + i = nditer(a[::-1, ::-1], flags, order='C', buffersize=buffersize) + assert_equal(iter_iterindices(i), list(range(24))) + i.iterindex = 13 + assert_equal(iter_iterindices(i), list(range(13, 24))) + + i = nditer(a[::1, ::-1], flags, buffersize=buffersize) + assert_equal(iter_iterindices(i), list(range(24))) + i.iterindex = 23 + assert_equal(iter_iterindices(i), list(range(23, 24))) + i.reset() + i.iterindex = 2 + assert_equal(iter_iterindices(i), list(range(2, 24))) + +def test_iter_iterrange(): + # Make sure getting and resetting the iterrange works + + buffersize = 5 + a = arange(24, dtype='i4').reshape(4, 3, 2) + a_fort = a.ravel(order='F') + + i = nditer(a, ['ranged'], ['readonly'], order='F', + buffersize=buffersize) + assert_equal(i.iterrange, (0, 24)) + assert_equal([x[()] for x in i], a_fort) + for r in [(0, 24), (1, 2), (3, 24), (5, 5), (0, 20), (23, 24)]: + i.iterrange = r + assert_equal(i.iterrange, r) + assert_equal([x[()] for x in i], a_fort[r[0]:r[1]]) + + i = nditer(a, ['ranged', 'buffered'], ['readonly'], order='F', + op_dtypes='f8', buffersize=buffersize) + assert_equal(i.iterrange, (0, 24)) + assert_equal([x[()] for x in i], a_fort) + for r in [(0, 24), (1, 2), (3, 24), (5, 5), (0, 20), (23, 24)]: + i.iterrange = r + assert_equal(i.iterrange, r) + assert_equal([x[()] for x in i], a_fort[r[0]:r[1]]) + + def get_array(i): + val = np.array([], dtype='f8') + for x in i: + val = np.concatenate((val, x)) + return val + + i = nditer(a, ['ranged', 'buffered', 'external_loop'], + ['readonly'], order='F', + op_dtypes='f8', buffersize=buffersize) + assert_equal(i.iterrange, (0, 24)) + assert_equal(get_array(i), a_fort) + for r in [(0, 24), (1, 2), (3, 24), (5, 5), (0, 20), (23, 24)]: + i.iterrange = r + assert_equal(i.iterrange, r) + assert_equal(get_array(i), a_fort[r[0]:r[1]]) + +def test_iter_buffering(): + # Test buffering with several buffer sizes and types + arrays = [] + # F-order swapped array + arrays.append(np.arange(24, + dtype='c16').reshape(2, 3, 4).T.newbyteorder().byteswap()) + # Contiguous 1-dimensional array + arrays.append(np.arange(10, dtype='f4')) + # Unaligned array + a = np.zeros((4*16+1,), dtype='i1')[1:] + a.dtype = 'i4' + a[:] = np.arange(16, dtype='i4') + arrays.append(a) + # 4-D F-order array + arrays.append(np.arange(120, dtype='i4').reshape(5, 3, 2, 4).T) + for a in arrays: + for buffersize in (1, 2, 3, 5, 8, 11, 16, 1024): + vals = [] + i = nditer(a, ['buffered', 'external_loop'], + [['readonly', 'nbo', 'aligned']], + order='C', + casting='equiv', + buffersize=buffersize) + while not i.finished: + assert_(i[0].size <= buffersize) + vals.append(i[0].copy()) + i.iternext() + assert_equal(np.concatenate(vals), a.ravel(order='C')) + +def test_iter_write_buffering(): + # Test that buffering of writes is working + + # F-order swapped array + a = np.arange(24).reshape(2, 3, 4).T.newbyteorder().byteswap() + i = nditer(a, ['buffered'], + [['readwrite', 'nbo', 'aligned']], + casting='equiv', + order='C', + buffersize=16) + x = 0 + with i: + while not i.finished: + i[0] = x + x += 1 + i.iternext() + assert_equal(a.ravel(order='C'), np.arange(24)) + +def test_iter_buffering_delayed_alloc(): + # Test that delaying buffer allocation works + + a = np.arange(6) + b = np.arange(1, dtype='f4') + i = nditer([a, b], ['buffered', 'delay_bufalloc', 'multi_index', 'reduce_ok'], + ['readwrite'], + casting='unsafe', + op_dtypes='f4') + assert_(i.has_delayed_bufalloc) + assert_raises(ValueError, lambda i:i.multi_index, i) + assert_raises(ValueError, lambda i:i[0], i) + assert_raises(ValueError, lambda i:i[0:2], i) + + def assign_iter(i): + i[0] = 0 + assert_raises(ValueError, assign_iter, i) + + i.reset() + assert_(not i.has_delayed_bufalloc) + assert_equal(i.multi_index, (0,)) + with i: + assert_equal(i[0], 0) + i[1] = 1 + assert_equal(i[0:2], [0, 1]) + assert_equal([[x[0][()], x[1][()]] for x in i], list(zip(range(6), [1]*6))) + +def test_iter_buffered_cast_simple(): + # Test that buffering can handle a simple cast + + a = np.arange(10, dtype='f4') + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('f8')], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + + assert_equal(a, 2*np.arange(10, dtype='f4')) + +def test_iter_buffered_cast_byteswapped(): + # Test that buffering can handle a cast which requires swap->cast->swap + + a = np.arange(10, dtype='f4').newbyteorder().byteswap() + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('f8').newbyteorder()], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + + assert_equal(a, 2*np.arange(10, dtype='f4')) + + with suppress_warnings() as sup: + sup.filter(np.ComplexWarning) + + a = np.arange(10, dtype='f8').newbyteorder().byteswap() + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='unsafe', + op_dtypes=[np.dtype('c8').newbyteorder()], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + + assert_equal(a, 2*np.arange(10, dtype='f8')) + +def test_iter_buffered_cast_byteswapped_complex(): + # Test that buffering can handle a cast which requires swap->cast->copy + + a = np.arange(10, dtype='c8').newbyteorder().byteswap() + a += 2j + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('c16')], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + assert_equal(a, 2*np.arange(10, dtype='c8') + 4j) + + a = np.arange(10, dtype='c8') + a += 2j + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('c16').newbyteorder()], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + assert_equal(a, 2*np.arange(10, dtype='c8') + 4j) + + a = np.arange(10, dtype=np.clongdouble).newbyteorder().byteswap() + a += 2j + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('c16')], + buffersize=3) + with i: + for v in i: + v[...] *= 2 + assert_equal(a, 2*np.arange(10, dtype=np.clongdouble) + 4j) + + a = np.arange(10, dtype=np.longdouble).newbyteorder().byteswap() + i = nditer(a, ['buffered', 'external_loop'], + [['readwrite', 'nbo', 'aligned']], + casting='same_kind', + op_dtypes=[np.dtype('f4')], + buffersize=7) + with i: + for v in i: + v[...] *= 2 + assert_equal(a, 2*np.arange(10, dtype=np.longdouble)) + +def test_iter_buffered_cast_structured_type(): + # Tests buffering of structured types + + # simple -> struct type (duplicates the value) + sdt = [('a', 'f4'), ('b', 'i8'), ('c', 'c8', (2, 3)), ('d', 'O')] + a = np.arange(3, dtype='f4') + 0.5 + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt) + vals = [np.array(x) for x in i] + assert_equal(vals[0]['a'], 0.5) + assert_equal(vals[0]['b'], 0) + assert_equal(vals[0]['c'], [[(0.5)]*3]*2) + assert_equal(vals[0]['d'], 0.5) + assert_equal(vals[1]['a'], 1.5) + assert_equal(vals[1]['b'], 1) + assert_equal(vals[1]['c'], [[(1.5)]*3]*2) + assert_equal(vals[1]['d'], 1.5) + assert_equal(vals[0].dtype, np.dtype(sdt)) + + # object -> struct type + sdt = [('a', 'f4'), ('b', 'i8'), ('c', 'c8', (2, 3)), ('d', 'O')] + a = np.zeros((3,), dtype='O') + a[0] = (0.5, 0.5, [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]], 0.5) + a[1] = (1.5, 1.5, [[1.5, 1.5, 1.5], [1.5, 1.5, 1.5]], 1.5) + a[2] = (2.5, 2.5, [[2.5, 2.5, 2.5], [2.5, 2.5, 2.5]], 2.5) + if HAS_REFCOUNT: + rc = sys.getrefcount(a[0]) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt) + vals = [x.copy() for x in i] + assert_equal(vals[0]['a'], 0.5) + assert_equal(vals[0]['b'], 0) + assert_equal(vals[0]['c'], [[(0.5)]*3]*2) + assert_equal(vals[0]['d'], 0.5) + assert_equal(vals[1]['a'], 1.5) + assert_equal(vals[1]['b'], 1) + assert_equal(vals[1]['c'], [[(1.5)]*3]*2) + assert_equal(vals[1]['d'], 1.5) + assert_equal(vals[0].dtype, np.dtype(sdt)) + vals, i, x = [None]*3 + if HAS_REFCOUNT: + assert_equal(sys.getrefcount(a[0]), rc) + + # single-field struct type -> simple + sdt = [('a', 'f4')] + a = np.array([(5.5,), (8,)], dtype=sdt) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes='i4') + assert_equal([x_[()] for x_ in i], [5, 8]) + + # make sure multi-field struct type -> simple doesn't work + sdt = [('a', 'f4'), ('b', 'i8'), ('d', 'O')] + a = np.array([(5.5, 7, 'test'), (8, 10, 11)], dtype=sdt) + assert_raises(TypeError, lambda: ( + nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes='i4'))) + + # struct type -> struct type (field-wise copy) + sdt1 = [('a', 'f4'), ('b', 'i8'), ('d', 'O')] + sdt2 = [('d', 'u2'), ('a', 'O'), ('b', 'f8')] + a = np.array([(1, 2, 3), (4, 5, 6)], dtype=sdt1) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + assert_equal([np.array(x_) for x_ in i], + [np.array((1, 2, 3), dtype=sdt2), + np.array((4, 5, 6), dtype=sdt2)]) + + # make sure struct type -> struct type with different + # number of fields fails + sdt1 = [('a', 'f4'), ('b', 'i8'), ('d', 'O')] + sdt2 = [('b', 'O'), ('a', 'f8')] + a = np.array([(1, 2, 3), (4, 5, 6)], dtype=sdt1) + + assert_raises(ValueError, lambda : ( + nditer(a, ['buffered', 'refs_ok'], ['readwrite'], + casting='unsafe', + op_dtypes=sdt2))) + + +def test_iter_buffered_cast_subarray(): + # Tests buffering of subarrays + + # one element -> many (copies it to all) + sdt1 = [('a', 'f4')] + sdt2 = [('a', 'f8', (3, 2, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + for x, count in zip(i, list(range(6))): + assert_(np.all(x['a'] == count)) + + # one element -> many -> back (copies it to all) + sdt1 = [('a', 'O', (1, 1))] + sdt2 = [('a', 'O', (3, 2, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:, 0, 0] = np.arange(6) + i = nditer(a, ['buffered', 'refs_ok'], ['readwrite'], + casting='unsafe', + op_dtypes=sdt2) + with i: + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_(np.all(x['a'] == count)) + x['a'][0] += 2 + count += 1 + assert_equal(a['a'], np.arange(6).reshape(6, 1, 1)+2) + + # many -> one element -> back (copies just element 0) + sdt1 = [('a', 'O', (3, 2, 2))] + sdt2 = [('a', 'O', (1,))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:, 0, 0, 0] = np.arange(6) + i = nditer(a, ['buffered', 'refs_ok'], ['readwrite'], + casting='unsafe', + op_dtypes=sdt2) + with i: + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], count) + x['a'] += 2 + count += 1 + assert_equal(a['a'], np.arange(6).reshape(6, 1, 1, 1)*np.ones((1, 3, 2, 2))+2) + + # many -> one element -> back (copies just element 0) + sdt1 = [('a', 'f8', (3, 2, 2))] + sdt2 = [('a', 'O', (1,))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:, 0, 0, 0] = np.arange(6) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], count) + count += 1 + + # many -> one element (copies just element 0) + sdt1 = [('a', 'O', (3, 2, 2))] + sdt2 = [('a', 'f4', (1,))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:, 0, 0, 0] = np.arange(6) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], count) + count += 1 + + # many -> matching shape (straightforward copy) + sdt1 = [('a', 'O', (3, 2, 2))] + sdt2 = [('a', 'f4', (3, 2, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*3*2*2).reshape(6, 3, 2, 2) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], a[count]['a']) + count += 1 + + # vector -> smaller vector (truncates) + sdt1 = [('a', 'f8', (6,))] + sdt2 = [('a', 'f4', (2,))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*6).reshape(6, 6) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], a[count]['a'][:2]) + count += 1 + + # vector -> bigger vector (pads with zeros) + sdt1 = [('a', 'f8', (2,))] + sdt2 = [('a', 'f4', (6,))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2).reshape(6, 2) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][:2], a[count]['a']) + assert_equal(x['a'][2:], [0, 0, 0, 0]) + count += 1 + + # vector -> matrix (broadcasts) + sdt1 = [('a', 'f8', (2,))] + sdt2 = [('a', 'f4', (2, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2).reshape(6, 2) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][0], a[count]['a']) + assert_equal(x['a'][1], a[count]['a']) + count += 1 + + # vector -> matrix (broadcasts and zero-pads) + sdt1 = [('a', 'f8', (2, 1))] + sdt2 = [('a', 'f4', (3, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2).reshape(6, 2, 1) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][:2, 0], a[count]['a'][:, 0]) + assert_equal(x['a'][:2, 1], a[count]['a'][:, 0]) + assert_equal(x['a'][2,:], [0, 0]) + count += 1 + + # matrix -> matrix (truncates and zero-pads) + sdt1 = [('a', 'f8', (2, 3))] + sdt2 = [('a', 'f4', (3, 2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2*3).reshape(6, 2, 3) + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][:2, 0], a[count]['a'][:, 0]) + assert_equal(x['a'][:2, 1], a[count]['a'][:, 1]) + assert_equal(x['a'][2,:], [0, 0]) + count += 1 + +def test_iter_buffering_badwriteback(): + # Writing back from a buffer cannot combine elements + + # a needs write buffering, but had a broadcast dimension + a = np.arange(6).reshape(2, 3, 1) + b = np.arange(12).reshape(2, 3, 2) + assert_raises(ValueError, nditer, [a, b], + ['buffered', 'external_loop'], + [['readwrite'], ['writeonly']], + order='C') + + # But if a is readonly, it's fine + nditer([a, b], ['buffered', 'external_loop'], + [['readonly'], ['writeonly']], + order='C') + + # If a has just one element, it's fine too (constant 0 stride, a reduction) + a = np.arange(1).reshape(1, 1, 1) + nditer([a, b], ['buffered', 'external_loop', 'reduce_ok'], + [['readwrite'], ['writeonly']], + order='C') + + # check that it fails on other dimensions too + a = np.arange(6).reshape(1, 3, 2) + assert_raises(ValueError, nditer, [a, b], + ['buffered', 'external_loop'], + [['readwrite'], ['writeonly']], + order='C') + a = np.arange(4).reshape(2, 1, 2) + assert_raises(ValueError, nditer, [a, b], + ['buffered', 'external_loop'], + [['readwrite'], ['writeonly']], + order='C') + +def test_iter_buffering_string(): + # Safe casting disallows shrinking strings + a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_) + assert_equal(a.dtype, np.dtype('S4')) + assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'], + op_dtypes='S2') + i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6') + assert_equal(i[0], b'abc') + assert_equal(i[0].dtype, np.dtype('S6')) + + a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode) + assert_equal(a.dtype, np.dtype('U4')) + assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'], + op_dtypes='U2') + i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6') + assert_equal(i[0], u'abc') + assert_equal(i[0].dtype, np.dtype('U6')) + +def test_iter_buffering_growinner(): + # Test that the inner loop grows when no buffering is needed + a = np.arange(30) + i = nditer(a, ['buffered', 'growinner', 'external_loop'], + buffersize=5) + # Should end up with just one inner loop here + assert_equal(i[0].size, a.size) + + +@pytest.mark.slow +def test_iter_buffered_reduce_reuse(): + # large enough array for all views, including negative strides. + a = np.arange(2*3**5)[3**5:3**5+1] + flags = ['buffered', 'delay_bufalloc', 'multi_index', 'reduce_ok', 'refs_ok'] + op_flags = [('readonly',), ('readwrite', 'allocate')] + op_axes_list = [[(0, 1, 2), (0, 1, -1)], [(0, 1, 2), (0, -1, -1)]] + # wrong dtype to force buffering + op_dtypes = [float, a.dtype] + + def get_params(): + for xs in range(-3**2, 3**2 + 1): + for ys in range(xs, 3**2 + 1): + for op_axes in op_axes_list: + # last stride is reduced and because of that not + # important for this test, as it is the inner stride. + strides = (xs * a.itemsize, ys * a.itemsize, a.itemsize) + arr = np.lib.stride_tricks.as_strided(a, (3, 3, 3), strides) + + for skip in [0, 1]: + yield arr, op_axes, skip + + for arr, op_axes, skip in get_params(): + nditer2 = np.nditer([arr.copy(), None], + op_axes=op_axes, flags=flags, op_flags=op_flags, + op_dtypes=op_dtypes) + with nditer2: + nditer2.operands[-1][...] = 0 + nditer2.reset() + nditer2.iterindex = skip + + for (a2_in, b2_in) in nditer2: + b2_in += a2_in.astype(np.int_) + + comp_res = nditer2.operands[-1] + + for bufsize in range(0, 3**3): + nditer1 = np.nditer([arr, None], + op_axes=op_axes, flags=flags, op_flags=op_flags, + buffersize=bufsize, op_dtypes=op_dtypes) + with nditer1: + nditer1.operands[-1][...] = 0 + nditer1.reset() + nditer1.iterindex = skip + + for (a1_in, b1_in) in nditer1: + b1_in += a1_in.astype(np.int_) + + res = nditer1.operands[-1] + assert_array_equal(res, comp_res) + + +def test_iter_no_broadcast(): + # Test that the no_broadcast flag works + a = np.arange(24).reshape(2, 3, 4) + b = np.arange(6).reshape(2, 3, 1) + c = np.arange(12).reshape(3, 4) + + nditer([a, b, c], [], + [['readonly', 'no_broadcast'], + ['readonly'], ['readonly']]) + assert_raises(ValueError, nditer, [a, b, c], [], + [['readonly'], ['readonly', 'no_broadcast'], ['readonly']]) + assert_raises(ValueError, nditer, [a, b, c], [], + [['readonly'], ['readonly'], ['readonly', 'no_broadcast']]) + + +class TestIterNested(object): + + def test_basic(self): + # Test nested iteration basic usage + a = arange(12).reshape(2, 3, 2) + + i, j = np.nested_iters(a, [[0], [1, 2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]]) + + i, j = np.nested_iters(a, [[0, 1], [2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11]]) + + i, j = np.nested_iters(a, [[0, 2], [1]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 2, 4], [1, 3, 5], [6, 8, 10], [7, 9, 11]]) + + def test_reorder(self): + # Test nested iteration basic usage + a = arange(12).reshape(2, 3, 2) + + # In 'K' order (default), it gets reordered + i, j = np.nested_iters(a, [[0], [2, 1]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]]) + + i, j = np.nested_iters(a, [[1, 0], [2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11]]) + + i, j = np.nested_iters(a, [[2, 0], [1]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 2, 4], [1, 3, 5], [6, 8, 10], [7, 9, 11]]) + + # In 'C' order, it doesn't + i, j = np.nested_iters(a, [[0], [2, 1]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 2, 4, 1, 3, 5], [6, 8, 10, 7, 9, 11]]) + + i, j = np.nested_iters(a, [[1, 0], [2]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1], [6, 7], [2, 3], [8, 9], [4, 5], [10, 11]]) + + i, j = np.nested_iters(a, [[2, 0], [1]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 2, 4], [6, 8, 10], [1, 3, 5], [7, 9, 11]]) + + def test_flip_axes(self): + # Test nested iteration with negative axes + a = arange(12).reshape(2, 3, 2)[::-1, ::-1, ::-1] + + # In 'K' order (default), the axes all get flipped + i, j = np.nested_iters(a, [[0], [1, 2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]]) + + i, j = np.nested_iters(a, [[0, 1], [2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11]]) + + i, j = np.nested_iters(a, [[0, 2], [1]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 2, 4], [1, 3, 5], [6, 8, 10], [7, 9, 11]]) + + # In 'C' order, flipping axes is disabled + i, j = np.nested_iters(a, [[0], [1, 2]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[11, 10, 9, 8, 7, 6], [5, 4, 3, 2, 1, 0]]) + + i, j = np.nested_iters(a, [[0, 1], [2]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[11, 10], [9, 8], [7, 6], [5, 4], [3, 2], [1, 0]]) + + i, j = np.nested_iters(a, [[0, 2], [1]], order='C') + vals = [list(j) for _ in i] + assert_equal(vals, [[11, 9, 7], [10, 8, 6], [5, 3, 1], [4, 2, 0]]) + + def test_broadcast(self): + # Test nested iteration with broadcasting + a = arange(2).reshape(2, 1) + b = arange(3).reshape(1, 3) + + i, j = np.nested_iters([a, b], [[0], [1]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[[0, 0], [0, 1], [0, 2]], [[1, 0], [1, 1], [1, 2]]]) + + i, j = np.nested_iters([a, b], [[1], [0]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[[0, 0], [1, 0]], [[0, 1], [1, 1]], [[0, 2], [1, 2]]]) + + def test_dtype_copy(self): + # Test nested iteration with a copy to change dtype + + # copy + a = arange(6, dtype='i4').reshape(2, 3) + i, j = np.nested_iters(a, [[0], [1]], + op_flags=['readonly', 'copy'], + op_dtypes='f8') + assert_equal(j[0].dtype, np.dtype('f8')) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1, 2], [3, 4, 5]]) + vals = None + + # writebackifcopy - using conext manager + a = arange(6, dtype='f4').reshape(2, 3) + i, j = np.nested_iters(a, [[0], [1]], + op_flags=['readwrite', 'updateifcopy'], + casting='same_kind', + op_dtypes='f8') + with i, j: + assert_equal(j[0].dtype, np.dtype('f8')) + for x in i: + for y in j: + y[...] += 1 + assert_equal(a, [[0, 1, 2], [3, 4, 5]]) + assert_equal(a, [[1, 2, 3], [4, 5, 6]]) + + # writebackifcopy - using close() + a = arange(6, dtype='f4').reshape(2, 3) + i, j = np.nested_iters(a, [[0], [1]], + op_flags=['readwrite', 'updateifcopy'], + casting='same_kind', + op_dtypes='f8') + assert_equal(j[0].dtype, np.dtype('f8')) + for x in i: + for y in j: + y[...] += 1 + assert_equal(a, [[0, 1, 2], [3, 4, 5]]) + i.close() + j.close() + assert_equal(a, [[1, 2, 3], [4, 5, 6]]) + + def test_dtype_buffered(self): + # Test nested iteration with buffering to change dtype + + a = arange(6, dtype='f4').reshape(2, 3) + i, j = np.nested_iters(a, [[0], [1]], + flags=['buffered'], + op_flags=['readwrite'], + casting='same_kind', + op_dtypes='f8') + assert_equal(j[0].dtype, np.dtype('f8')) + for x in i: + for y in j: + y[...] += 1 + assert_equal(a, [[1, 2, 3], [4, 5, 6]]) + + def test_0d(self): + a = np.arange(12).reshape(2, 3, 2) + i, j = np.nested_iters(a, [[], [1, 0, 2]]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]]) + + i, j = np.nested_iters(a, [[1, 0, 2], []]) + vals = [list(j) for _ in i] + assert_equal(vals, [[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11]]) + + i, j, k = np.nested_iters(a, [[2, 0], [], [1]]) + vals = [] + for x in i: + for y in j: + vals.append([z for z in k]) + assert_equal(vals, [[0, 2, 4], [1, 3, 5], [6, 8, 10], [7, 9, 11]]) + + def test_iter_nested_iters_dtype_buffered(self): + # Test nested iteration with buffering to change dtype + + a = arange(6, dtype='f4').reshape(2, 3) + i, j = np.nested_iters(a, [[0], [1]], + flags=['buffered'], + op_flags=['readwrite'], + casting='same_kind', + op_dtypes='f8') + with i, j: + assert_equal(j[0].dtype, np.dtype('f8')) + for x in i: + for y in j: + y[...] += 1 + assert_equal(a, [[1, 2, 3], [4, 5, 6]]) + +def test_iter_reduction_error(): + + a = np.arange(6) + assert_raises(ValueError, nditer, [a, None], [], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[[0], [-1]]) + + a = np.arange(6).reshape(2, 3) + assert_raises(ValueError, nditer, [a, None], ['external_loop'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[[0, 1], [-1, -1]]) + +def test_iter_reduction(): + # Test doing reductions with the iterator + + a = np.arange(6) + i = nditer([a, None], ['reduce_ok'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[[0], [-1]]) + # Need to initialize the output operand to the addition unit + with i: + i.operands[1][...] = 0 + # Do the reduction + for x, y in i: + y[...] += x + # Since no axes were specified, should have allocated a scalar + assert_equal(i.operands[1].ndim, 0) + assert_equal(i.operands[1], np.sum(a)) + + a = np.arange(6).reshape(2, 3) + i = nditer([a, None], ['reduce_ok', 'external_loop'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[[0, 1], [-1, -1]]) + # Need to initialize the output operand to the addition unit + with i: + i.operands[1][...] = 0 + # Reduction shape/strides for the output + assert_equal(i[1].shape, (6,)) + assert_equal(i[1].strides, (0,)) + # Do the reduction + for x, y in i: + # Use a for loop instead of ``y[...] += x`` + # (equivalent to ``y[...] = y[...].copy() + x``), + # because y has zero strides we use for the reduction + for j in range(len(y)): + y[j] += x[j] + # Since no axes were specified, should have allocated a scalar + assert_equal(i.operands[1].ndim, 0) + assert_equal(i.operands[1], np.sum(a)) + + # This is a tricky reduction case for the buffering double loop + # to handle + a = np.ones((2, 3, 5)) + it1 = nditer([a, None], ['reduce_ok', 'external_loop'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[None, [0, -1, 1]]) + it2 = nditer([a, None], ['reduce_ok', 'external_loop', + 'buffered', 'delay_bufalloc'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[None, [0, -1, 1]], buffersize=10) + with it1, it2: + it1.operands[1].fill(0) + it2.operands[1].fill(0) + it2.reset() + for x in it1: + x[1][...] += x[0] + for x in it2: + x[1][...] += x[0] + assert_equal(it1.operands[1], it2.operands[1]) + assert_equal(it2.operands[1].sum(), a.size) + +def test_iter_buffering_reduction(): + # Test doing buffered reductions with the iterator + + a = np.arange(6) + b = np.array(0., dtype='f8').byteswap().newbyteorder() + i = nditer([a, b], ['reduce_ok', 'buffered'], + [['readonly'], ['readwrite', 'nbo']], + op_axes=[[0], [-1]]) + with i: + assert_equal(i[1].dtype, np.dtype('f8')) + assert_(i[1].dtype != b.dtype) + # Do the reduction + for x, y in i: + y[...] += x + # Since no axes were specified, should have allocated a scalar + assert_equal(b, np.sum(a)) + + a = np.arange(6).reshape(2, 3) + b = np.array([0, 0], dtype='f8').byteswap().newbyteorder() + i = nditer([a, b], ['reduce_ok', 'external_loop', 'buffered'], + [['readonly'], ['readwrite', 'nbo']], + op_axes=[[0, 1], [0, -1]]) + # Reduction shape/strides for the output + with i: + assert_equal(i[1].shape, (3,)) + assert_equal(i[1].strides, (0,)) + # Do the reduction + for x, y in i: + # Use a for loop instead of ``y[...] += x`` + # (equivalent to ``y[...] = y[...].copy() + x``), + # because y has zero strides we use for the reduction + for j in range(len(y)): + y[j] += x[j] + assert_equal(b, np.sum(a, axis=1)) + + # Iterator inner double loop was wrong on this one + p = np.arange(2) + 1 + it = np.nditer([p, None], + ['delay_bufalloc', 'reduce_ok', 'buffered', 'external_loop'], + [['readonly'], ['readwrite', 'allocate']], + op_axes=[[-1, 0], [-1, -1]], + itershape=(2, 2)) + with it: + it.operands[1].fill(0) + it.reset() + assert_equal(it[0], [1, 2, 1, 2]) + + # Iterator inner loop should take argument contiguity into account + x = np.ones((7, 13, 8), np.int8)[4:6,1:11:6,1:5].transpose(1, 2, 0) + x[...] = np.arange(x.size).reshape(x.shape) + y_base = np.arange(4*4, dtype=np.int8).reshape(4, 4) + y_base_copy = y_base.copy() + y = y_base[::2,:,None] + + it = np.nditer([y, x], + ['buffered', 'external_loop', 'reduce_ok'], + [['readwrite'], ['readonly']]) + with it: + for a, b in it: + a.fill(2) + + assert_equal(y_base[1::2], y_base_copy[1::2]) + assert_equal(y_base[::2], 2) + +def test_iter_buffering_reduction_reuse_reduce_loops(): + # There was a bug triggering reuse of the reduce loop inappropriately, + # which caused processing to happen in unnecessarily small chunks + # and overran the buffer. + + a = np.zeros((2, 7)) + b = np.zeros((1, 7)) + it = np.nditer([a, b], flags=['reduce_ok', 'external_loop', 'buffered'], + op_flags=[['readonly'], ['readwrite']], + buffersize=5) + + with it: + bufsizes = [x.shape[0] for x, y in it] + assert_equal(bufsizes, [5, 2, 5, 2]) + assert_equal(sum(bufsizes), a.size) + +def test_iter_writemasked_badinput(): + a = np.zeros((2, 3)) + b = np.zeros((3,)) + m = np.array([[True, True, False], [False, True, False]]) + m2 = np.array([True, True, False]) + m3 = np.array([0, 1, 1], dtype='u1') + mbad1 = np.array([0, 1, 1], dtype='i1') + mbad2 = np.array([0, 1, 1], dtype='f4') + + # Need an 'arraymask' if any operand is 'writemasked' + assert_raises(ValueError, nditer, [a, m], [], + [['readwrite', 'writemasked'], ['readonly']]) + + # A 'writemasked' operand must not be readonly + assert_raises(ValueError, nditer, [a, m], [], + [['readonly', 'writemasked'], ['readonly', 'arraymask']]) + + # 'writemasked' and 'arraymask' may not be used together + assert_raises(ValueError, nditer, [a, m], [], + [['readonly'], ['readwrite', 'arraymask', 'writemasked']]) + + # 'arraymask' may only be specified once + assert_raises(ValueError, nditer, [a, m, m2], [], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask'], + ['readonly', 'arraymask']]) + + # An 'arraymask' with nothing 'writemasked' also doesn't make sense + assert_raises(ValueError, nditer, [a, m], [], + [['readwrite'], ['readonly', 'arraymask']]) + + # A writemasked reduction requires a similarly smaller mask + assert_raises(ValueError, nditer, [a, b, m], ['reduce_ok'], + [['readonly'], + ['readwrite', 'writemasked'], + ['readonly', 'arraymask']]) + # But this should work with a smaller/equal mask to the reduction operand + np.nditer([a, b, m2], ['reduce_ok'], + [['readonly'], + ['readwrite', 'writemasked'], + ['readonly', 'arraymask']]) + # The arraymask itself cannot be a reduction + assert_raises(ValueError, nditer, [a, b, m2], ['reduce_ok'], + [['readonly'], + ['readwrite', 'writemasked'], + ['readwrite', 'arraymask']]) + + # A uint8 mask is ok too + np.nditer([a, m3], ['buffered'], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']], + op_dtypes=['f4', None], + casting='same_kind') + # An int8 mask isn't ok + assert_raises(TypeError, np.nditer, [a, mbad1], ['buffered'], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']], + op_dtypes=['f4', None], + casting='same_kind') + # A float32 mask isn't ok + assert_raises(TypeError, np.nditer, [a, mbad2], ['buffered'], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']], + op_dtypes=['f4', None], + casting='same_kind') + +def test_iter_writemasked(): + a = np.zeros((3,), dtype='f8') + msk = np.array([True, True, False]) + + # When buffering is unused, 'writemasked' effectively does nothing. + # It's up to the user of the iterator to obey the requested semantics. + it = np.nditer([a, msk], [], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']]) + with it: + for x, m in it: + x[...] = 1 + # Because we violated the semantics, all the values became 1 + assert_equal(a, [1, 1, 1]) + + # Even if buffering is enabled, we still may be accessing the array + # directly. + it = np.nditer([a, msk], ['buffered'], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']]) + with it: + for x, m in it: + x[...] = 2.5 + # Because we violated the semantics, all the values became 2.5 + assert_equal(a, [2.5, 2.5, 2.5]) + + # If buffering will definitely happening, for instance because of + # a cast, only the items selected by the mask will be copied back from + # the buffer. + it = np.nditer([a, msk], ['buffered'], + [['readwrite', 'writemasked'], + ['readonly', 'arraymask']], + op_dtypes=['i8', None], + casting='unsafe') + with it: + for x, m in it: + x[...] = 3 + # Even though we violated the semantics, only the selected values + # were copied back + assert_equal(a, [3, 3, 2.5]) + +def test_iter_non_writable_attribute_deletion(): + it = np.nditer(np.ones(2)) + attr = ["value", "shape", "operands", "itviews", "has_delayed_bufalloc", + "iterationneedsapi", "has_multi_index", "has_index", "dtypes", + "ndim", "nop", "itersize", "finished"] + + for s in attr: + assert_raises(AttributeError, delattr, it, s) + + +def test_iter_writable_attribute_deletion(): + it = np.nditer(np.ones(2)) + attr = [ "multi_index", "index", "iterrange", "iterindex"] + for s in attr: + assert_raises(AttributeError, delattr, it, s) + + +def test_iter_element_deletion(): + it = np.nditer(np.ones(3)) + try: + del it[1] + del it[1:2] + except TypeError: + pass + except Exception: + raise AssertionError + +def test_iter_allocated_array_dtypes(): + # If the dtype of an allocated output has a shape, the shape gets + # tacked onto the end of the result. + it = np.nditer(([1, 3, 20], None), op_dtypes=[None, ('i4', (2,))]) + for a, b in it: + b[0] = a - 1 + b[1] = a + 1 + assert_equal(it.operands[1], [[0, 2], [2, 4], [19, 21]]) + + # Make sure this works for scalars too + it = np.nditer((10, 2, None), op_dtypes=[None, None, ('i4', (2, 2))]) + for a, b, c in it: + c[0, 0] = a - b + c[0, 1] = a + b + c[1, 0] = a * b + c[1, 1] = a / b + assert_equal(it.operands[2], [[8, 12], [20, 5]]) + + +def test_0d_iter(): + # Basic test for iteration of 0-d arrays: + i = nditer([2, 3], ['multi_index'], [['readonly']]*2) + assert_equal(i.ndim, 0) + assert_equal(next(i), (2, 3)) + assert_equal(i.multi_index, ()) + assert_equal(i.iterindex, 0) + assert_raises(StopIteration, next, i) + # test reset: + i.reset() + assert_equal(next(i), (2, 3)) + assert_raises(StopIteration, next, i) + + # test forcing to 0-d + i = nditer(np.arange(5), ['multi_index'], [['readonly']], op_axes=[()]) + assert_equal(i.ndim, 0) + assert_equal(len(i), 1) + # note that itershape=(), still behaves like None due to the conversions + + # Test a more complex buffered casting case (same as another test above) + sdt = [('a', 'f4'), ('b', 'i8'), ('c', 'c8', (2, 3)), ('d', 'O')] + a = np.array(0.5, dtype='f4') + i = nditer(a, ['buffered', 'refs_ok'], ['readonly'], + casting='unsafe', op_dtypes=sdt) + vals = next(i) + assert_equal(vals['a'], 0.5) + assert_equal(vals['b'], 0) + assert_equal(vals['c'], [[(0.5)]*3]*2) + assert_equal(vals['d'], 0.5) + + +def test_iter_too_large(): + # The total size of the iterator must not exceed the maximum intp due + # to broadcasting. Dividing by 1024 will keep it small enough to + # give a legal array. + size = np.iinfo(np.intp).max // 1024 + arr = np.lib.stride_tricks.as_strided(np.zeros(1), (size,), (0,)) + assert_raises(ValueError, nditer, (arr, arr[:, None])) + # test the same for multiindex. That may get more interesting when + # removing 0 dimensional axis is allowed (since an iterator can grow then) + assert_raises(ValueError, nditer, + (arr, arr[:, None]), flags=['multi_index']) + + +def test_iter_too_large_with_multiindex(): + # When a multi index is being tracked, the error is delayed this + # checks the delayed error messages and getting below that by + # removing an axis. + base_size = 2**10 + num = 1 + while base_size**num < np.iinfo(np.intp).max: + num += 1 + + shape_template = [1, 1] * num + arrays = [] + for i in range(num): + shape = shape_template[:] + shape[i * 2] = 2**10 + arrays.append(np.empty(shape)) + arrays = tuple(arrays) + + # arrays are now too large to be broadcast. The different modes test + # different nditer functionality with or without GIL. + for mode in range(6): + with assert_raises(ValueError): + _multiarray_tests.test_nditer_too_large(arrays, -1, mode) + # but if we do nothing with the nditer, it can be constructed: + _multiarray_tests.test_nditer_too_large(arrays, -1, 7) + + # When an axis is removed, things should work again (half the time): + for i in range(num): + for mode in range(6): + # an axis with size 1024 is removed: + _multiarray_tests.test_nditer_too_large(arrays, i*2, mode) + # an axis with size 1 is removed: + with assert_raises(ValueError): + _multiarray_tests.test_nditer_too_large(arrays, i*2 + 1, mode) + +def test_writebacks(): + a = np.arange(6, dtype='f4') + au = a.byteswap().newbyteorder() + assert_(a.dtype.byteorder != au.dtype.byteorder) + it = nditer(au, [], [['readwrite', 'updateifcopy']], + casting='equiv', op_dtypes=[np.dtype('f4')]) + with it: + it.operands[0][:] = 100 + assert_equal(au, 100) + # do it again, this time raise an error, + it = nditer(au, [], [['readwrite', 'updateifcopy']], + casting='equiv', op_dtypes=[np.dtype('f4')]) + try: + with it: + assert_equal(au.flags.writeable, False) + it.operands[0][:] = 0 + raise ValueError('exit context manager on exception') + except: + pass + assert_equal(au, 0) + assert_equal(au.flags.writeable, True) + # cannot reuse i outside context manager + assert_raises(ValueError, getattr, it, 'operands') + + it = nditer(au, [], [['readwrite', 'updateifcopy']], + casting='equiv', op_dtypes=[np.dtype('f4')]) + with it: + x = it.operands[0] + x[:] = 6 + assert_(x.flags.writebackifcopy) + assert_equal(au, 6) + assert_(not x.flags.writebackifcopy) + x[:] = 123 # x.data still valid + assert_equal(au, 6) # but not connected to au + + it = nditer(au, [], + [['readwrite', 'updateifcopy']], + casting='equiv', op_dtypes=[np.dtype('f4')]) + # reentering works + with it: + with it: + for x in it: + x[...] = 123 + + it = nditer(au, [], + [['readwrite', 'updateifcopy']], + casting='equiv', op_dtypes=[np.dtype('f4')]) + # make sure exiting the inner context manager closes the iterator + with it: + with it: + for x in it: + x[...] = 123 + assert_raises(ValueError, getattr, it, 'operands') + # do not crash if original data array is decrefed + it = nditer(au, [], + [['readwrite', 'updateifcopy']], + casting='equiv', op_dtypes=[np.dtype('f4')]) + del au + with it: + for x in it: + x[...] = 123 + # make sure we cannot reenter the closed iterator + enter = it.__enter__ + assert_raises(RuntimeError, enter) + +def test_close_equivalent(): + ''' using a context amanger and using nditer.close are equivalent + ''' + def add_close(x, y, out=None): + addop = np.add + it = np.nditer([x, y, out], [], + [['readonly'], ['readonly'], ['writeonly','allocate']]) + for (a, b, c) in it: + addop(a, b, out=c) + ret = it.operands[2] + it.close() + return ret + + def add_context(x, y, out=None): + addop = np.add + it = np.nditer([x, y, out], [], + [['readonly'], ['readonly'], ['writeonly','allocate']]) + with it: + for (a, b, c) in it: + addop(a, b, out=c) + return it.operands[2] + z = add_close(range(5), range(5)) + assert_equal(z, range(0, 10, 2)) + z = add_context(range(5), range(5)) + assert_equal(z, range(0, 10, 2)) + +def test_close_raises(): + it = np.nditer(np.arange(3)) + assert_equal (next(it), 0) + it.close() + assert_raises(StopIteration, next, it) + assert_raises(ValueError, getattr, it, 'operands') + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +def test_warn_noclose(): + a = np.arange(6, dtype='f4') + au = a.byteswap().newbyteorder() + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + it = np.nditer(au, [], [['readwrite', 'updateifcopy']], + casting='equiv', op_dtypes=[np.dtype('f4')]) + del it + assert len(sup.log) == 1 diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_numeric.py b/contrib/python/numpy/py2/numpy/core/tests/test_numeric.py new file mode 100644 index 0000000000..ed02c15616 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_numeric.py @@ -0,0 +1,2797 @@ +from __future__ import division, absolute_import, print_function + +import sys +import warnings +import itertools +import platform +import pytest +from decimal import Decimal + +import numpy as np +from numpy.core import umath +from numpy.random import rand, randint, randn +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_raises_regex, + assert_array_equal, assert_almost_equal, assert_array_almost_equal, + HAS_REFCOUNT + ) + + +class TestResize(object): + def test_copies(self): + A = np.array([[1, 2], [3, 4]]) + Ar1 = np.array([[1, 2, 3, 4], [1, 2, 3, 4]]) + assert_equal(np.resize(A, (2, 4)), Ar1) + + Ar2 = np.array([[1, 2], [3, 4], [1, 2], [3, 4]]) + assert_equal(np.resize(A, (4, 2)), Ar2) + + Ar3 = np.array([[1, 2, 3], [4, 1, 2], [3, 4, 1], [2, 3, 4]]) + assert_equal(np.resize(A, (4, 3)), Ar3) + + def test_zeroresize(self): + A = np.array([[1, 2], [3, 4]]) + Ar = np.resize(A, (0,)) + assert_array_equal(Ar, np.array([])) + assert_equal(A.dtype, Ar.dtype) + + Ar = np.resize(A, (0, 2)) + assert_equal(Ar.shape, (0, 2)) + + Ar = np.resize(A, (2, 0)) + assert_equal(Ar.shape, (2, 0)) + + def test_reshape_from_zero(self): + # See also gh-6740 + A = np.zeros(0, dtype=[('a', np.float32, 1)]) + Ar = np.resize(A, (2, 1)) + assert_array_equal(Ar, np.zeros((2, 1), Ar.dtype)) + assert_equal(A.dtype, Ar.dtype) + + +class TestNonarrayArgs(object): + # check that non-array arguments to functions wrap them in arrays + def test_choose(self): + choices = [[0, 1, 2], + [3, 4, 5], + [5, 6, 7]] + tgt = [5, 1, 5] + a = [2, 0, 1] + + out = np.choose(a, choices) + assert_equal(out, tgt) + + def test_clip(self): + arr = [-1, 5, 2, 3, 10, -4, -9] + out = np.clip(arr, 2, 7) + tgt = [2, 5, 2, 3, 7, 2, 2] + assert_equal(out, tgt) + + def test_compress(self): + arr = [[0, 1, 2, 3, 4], + [5, 6, 7, 8, 9]] + tgt = [[5, 6, 7, 8, 9]] + out = np.compress([0, 1], arr, axis=0) + assert_equal(out, tgt) + + def test_count_nonzero(self): + arr = [[0, 1, 7, 0, 0], + [3, 0, 0, 2, 19]] + tgt = np.array([2, 3]) + out = np.count_nonzero(arr, axis=1) + assert_equal(out, tgt) + + def test_cumproduct(self): + A = [[1, 2, 3], [4, 5, 6]] + assert_(np.all(np.cumproduct(A) == np.array([1, 2, 6, 24, 120, 720]))) + + def test_diagonal(self): + a = [[0, 1, 2, 3], + [4, 5, 6, 7], + [8, 9, 10, 11]] + out = np.diagonal(a) + tgt = [0, 5, 10] + + assert_equal(out, tgt) + + def test_mean(self): + A = [[1, 2, 3], [4, 5, 6]] + assert_(np.mean(A) == 3.5) + assert_(np.all(np.mean(A, 0) == np.array([2.5, 3.5, 4.5]))) + assert_(np.all(np.mean(A, 1) == np.array([2., 5.]))) + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(np.isnan(np.mean([]))) + assert_(w[0].category is RuntimeWarning) + + def test_ptp(self): + a = [3, 4, 5, 10, -3, -5, 6.0] + assert_equal(np.ptp(a, axis=0), 15.0) + + def test_prod(self): + arr = [[1, 2, 3, 4], + [5, 6, 7, 9], + [10, 3, 4, 5]] + tgt = [24, 1890, 600] + + assert_equal(np.prod(arr, axis=-1), tgt) + + def test_ravel(self): + a = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] + tgt = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + assert_equal(np.ravel(a), tgt) + + def test_repeat(self): + a = [1, 2, 3] + tgt = [1, 1, 2, 2, 3, 3] + + out = np.repeat(a, 2) + assert_equal(out, tgt) + + def test_reshape(self): + arr = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] + tgt = [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]] + assert_equal(np.reshape(arr, (2, 6)), tgt) + + def test_round(self): + arr = [1.56, 72.54, 6.35, 3.25] + tgt = [1.6, 72.5, 6.4, 3.2] + assert_equal(np.around(arr, decimals=1), tgt) + + def test_searchsorted(self): + arr = [-8, -5, -1, 3, 6, 10] + out = np.searchsorted(arr, 0) + assert_equal(out, 3) + + def test_size(self): + A = [[1, 2, 3], [4, 5, 6]] + assert_(np.size(A) == 6) + assert_(np.size(A, 0) == 2) + assert_(np.size(A, 1) == 3) + + def test_squeeze(self): + A = [[[1, 1, 1], [2, 2, 2], [3, 3, 3]]] + assert_(np.squeeze(A).shape == (3, 3)) + + def test_std(self): + A = [[1, 2, 3], [4, 5, 6]] + assert_almost_equal(np.std(A), 1.707825127659933) + assert_almost_equal(np.std(A, 0), np.array([1.5, 1.5, 1.5])) + assert_almost_equal(np.std(A, 1), np.array([0.81649658, 0.81649658])) + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(np.isnan(np.std([]))) + assert_(w[0].category is RuntimeWarning) + + def test_swapaxes(self): + tgt = [[[0, 4], [2, 6]], [[1, 5], [3, 7]]] + a = [[[0, 1], [2, 3]], [[4, 5], [6, 7]]] + out = np.swapaxes(a, 0, 2) + assert_equal(out, tgt) + + def test_sum(self): + m = [[1, 2, 3], + [4, 5, 6], + [7, 8, 9]] + tgt = [[6], [15], [24]] + out = np.sum(m, axis=1, keepdims=True) + + assert_equal(tgt, out) + + def test_take(self): + tgt = [2, 3, 5] + indices = [1, 2, 4] + a = [1, 2, 3, 4, 5] + + out = np.take(a, indices) + assert_equal(out, tgt) + + def test_trace(self): + c = [[1, 2], [3, 4], [5, 6]] + assert_equal(np.trace(c), 5) + + def test_transpose(self): + arr = [[1, 2], [3, 4], [5, 6]] + tgt = [[1, 3, 5], [2, 4, 6]] + assert_equal(np.transpose(arr, (1, 0)), tgt) + + def test_var(self): + A = [[1, 2, 3], [4, 5, 6]] + assert_almost_equal(np.var(A), 2.9166666666666665) + assert_almost_equal(np.var(A, 0), np.array([2.25, 2.25, 2.25])) + assert_almost_equal(np.var(A, 1), np.array([0.66666667, 0.66666667])) + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(np.isnan(np.var([]))) + assert_(w[0].category is RuntimeWarning) + + +class TestIsscalar(object): + def test_isscalar(self): + assert_(np.isscalar(3.1)) + assert_(np.isscalar(np.int16(12345))) + assert_(np.isscalar(False)) + assert_(np.isscalar('numpy')) + assert_(not np.isscalar([3.1])) + assert_(not np.isscalar(None)) + + # PEP 3141 + from fractions import Fraction + assert_(np.isscalar(Fraction(5, 17))) + from numbers import Number + assert_(np.isscalar(Number())) + + +class TestBoolScalar(object): + def test_logical(self): + f = np.False_ + t = np.True_ + s = "xyz" + assert_((t and s) is s) + assert_((f and s) is f) + + def test_bitwise_or(self): + f = np.False_ + t = np.True_ + assert_((t | t) is t) + assert_((f | t) is t) + assert_((t | f) is t) + assert_((f | f) is f) + + def test_bitwise_and(self): + f = np.False_ + t = np.True_ + assert_((t & t) is t) + assert_((f & t) is f) + assert_((t & f) is f) + assert_((f & f) is f) + + def test_bitwise_xor(self): + f = np.False_ + t = np.True_ + assert_((t ^ t) is f) + assert_((f ^ t) is t) + assert_((t ^ f) is t) + assert_((f ^ f) is f) + + +class TestBoolArray(object): + def setup(self): + # offset for simd tests + self.t = np.array([True] * 41, dtype=bool)[1::] + self.f = np.array([False] * 41, dtype=bool)[1::] + self.o = np.array([False] * 42, dtype=bool)[2::] + self.nm = self.f.copy() + self.im = self.t.copy() + self.nm[3] = True + self.nm[-2] = True + self.im[3] = False + self.im[-2] = False + + def test_all_any(self): + assert_(self.t.all()) + assert_(self.t.any()) + assert_(not self.f.all()) + assert_(not self.f.any()) + assert_(self.nm.any()) + assert_(self.im.any()) + assert_(not self.nm.all()) + assert_(not self.im.all()) + # check bad element in all positions + for i in range(256 - 7): + d = np.array([False] * 256, dtype=bool)[7::] + d[i] = True + assert_(np.any(d)) + e = np.array([True] * 256, dtype=bool)[7::] + e[i] = False + assert_(not np.all(e)) + assert_array_equal(e, ~d) + # big array test for blocked libc loops + for i in list(range(9, 6000, 507)) + [7764, 90021, -10]: + d = np.array([False] * 100043, dtype=bool) + d[i] = True + assert_(np.any(d), msg="%r" % i) + e = np.array([True] * 100043, dtype=bool) + e[i] = False + assert_(not np.all(e), msg="%r" % i) + + def test_logical_not_abs(self): + assert_array_equal(~self.t, self.f) + assert_array_equal(np.abs(~self.t), self.f) + assert_array_equal(np.abs(~self.f), self.t) + assert_array_equal(np.abs(self.f), self.f) + assert_array_equal(~np.abs(self.f), self.t) + assert_array_equal(~np.abs(self.t), self.f) + assert_array_equal(np.abs(~self.nm), self.im) + np.logical_not(self.t, out=self.o) + assert_array_equal(self.o, self.f) + np.abs(self.t, out=self.o) + assert_array_equal(self.o, self.t) + + def test_logical_and_or_xor(self): + assert_array_equal(self.t | self.t, self.t) + assert_array_equal(self.f | self.f, self.f) + assert_array_equal(self.t | self.f, self.t) + assert_array_equal(self.f | self.t, self.t) + np.logical_or(self.t, self.t, out=self.o) + assert_array_equal(self.o, self.t) + assert_array_equal(self.t & self.t, self.t) + assert_array_equal(self.f & self.f, self.f) + assert_array_equal(self.t & self.f, self.f) + assert_array_equal(self.f & self.t, self.f) + np.logical_and(self.t, self.t, out=self.o) + assert_array_equal(self.o, self.t) + assert_array_equal(self.t ^ self.t, self.f) + assert_array_equal(self.f ^ self.f, self.f) + assert_array_equal(self.t ^ self.f, self.t) + assert_array_equal(self.f ^ self.t, self.t) + np.logical_xor(self.t, self.t, out=self.o) + assert_array_equal(self.o, self.f) + + assert_array_equal(self.nm & self.t, self.nm) + assert_array_equal(self.im & self.f, False) + assert_array_equal(self.nm & True, self.nm) + assert_array_equal(self.im & False, self.f) + assert_array_equal(self.nm | self.t, self.t) + assert_array_equal(self.im | self.f, self.im) + assert_array_equal(self.nm | True, self.t) + assert_array_equal(self.im | False, self.im) + assert_array_equal(self.nm ^ self.t, self.im) + assert_array_equal(self.im ^ self.f, self.im) + assert_array_equal(self.nm ^ True, self.im) + assert_array_equal(self.im ^ False, self.im) + + +class TestBoolCmp(object): + def setup(self): + self.f = np.ones(256, dtype=np.float32) + self.ef = np.ones(self.f.size, dtype=bool) + self.d = np.ones(128, dtype=np.float64) + self.ed = np.ones(self.d.size, dtype=bool) + # generate values for all permutation of 256bit simd vectors + s = 0 + for i in range(32): + self.f[s:s+8] = [i & 2**x for x in range(8)] + self.ef[s:s+8] = [(i & 2**x) != 0 for x in range(8)] + s += 8 + s = 0 + for i in range(16): + self.d[s:s+4] = [i & 2**x for x in range(4)] + self.ed[s:s+4] = [(i & 2**x) != 0 for x in range(4)] + s += 4 + + self.nf = self.f.copy() + self.nd = self.d.copy() + self.nf[self.ef] = np.nan + self.nd[self.ed] = np.nan + + self.inff = self.f.copy() + self.infd = self.d.copy() + self.inff[::3][self.ef[::3]] = np.inf + self.infd[::3][self.ed[::3]] = np.inf + self.inff[1::3][self.ef[1::3]] = -np.inf + self.infd[1::3][self.ed[1::3]] = -np.inf + self.inff[2::3][self.ef[2::3]] = np.nan + self.infd[2::3][self.ed[2::3]] = np.nan + self.efnonan = self.ef.copy() + self.efnonan[2::3] = False + self.ednonan = self.ed.copy() + self.ednonan[2::3] = False + + self.signf = self.f.copy() + self.signd = self.d.copy() + self.signf[self.ef] *= -1. + self.signd[self.ed] *= -1. + self.signf[1::6][self.ef[1::6]] = -np.inf + self.signd[1::6][self.ed[1::6]] = -np.inf + self.signf[3::6][self.ef[3::6]] = -np.nan + self.signd[3::6][self.ed[3::6]] = -np.nan + self.signf[4::6][self.ef[4::6]] = -0. + self.signd[4::6][self.ed[4::6]] = -0. + + def test_float(self): + # offset for alignment test + for i in range(4): + assert_array_equal(self.f[i:] > 0, self.ef[i:]) + assert_array_equal(self.f[i:] - 1 >= 0, self.ef[i:]) + assert_array_equal(self.f[i:] == 0, ~self.ef[i:]) + assert_array_equal(-self.f[i:] < 0, self.ef[i:]) + assert_array_equal(-self.f[i:] + 1 <= 0, self.ef[i:]) + r = self.f[i:] != 0 + assert_array_equal(r, self.ef[i:]) + r2 = self.f[i:] != np.zeros_like(self.f[i:]) + r3 = 0 != self.f[i:] + assert_array_equal(r, r2) + assert_array_equal(r, r3) + # check bool == 0x1 + assert_array_equal(r.view(np.int8), r.astype(np.int8)) + assert_array_equal(r2.view(np.int8), r2.astype(np.int8)) + assert_array_equal(r3.view(np.int8), r3.astype(np.int8)) + + # isnan on amd64 takes the same code path + assert_array_equal(np.isnan(self.nf[i:]), self.ef[i:]) + assert_array_equal(np.isfinite(self.nf[i:]), ~self.ef[i:]) + assert_array_equal(np.isfinite(self.inff[i:]), ~self.ef[i:]) + assert_array_equal(np.isinf(self.inff[i:]), self.efnonan[i:]) + assert_array_equal(np.signbit(self.signf[i:]), self.ef[i:]) + + def test_double(self): + # offset for alignment test + for i in range(2): + assert_array_equal(self.d[i:] > 0, self.ed[i:]) + assert_array_equal(self.d[i:] - 1 >= 0, self.ed[i:]) + assert_array_equal(self.d[i:] == 0, ~self.ed[i:]) + assert_array_equal(-self.d[i:] < 0, self.ed[i:]) + assert_array_equal(-self.d[i:] + 1 <= 0, self.ed[i:]) + r = self.d[i:] != 0 + assert_array_equal(r, self.ed[i:]) + r2 = self.d[i:] != np.zeros_like(self.d[i:]) + r3 = 0 != self.d[i:] + assert_array_equal(r, r2) + assert_array_equal(r, r3) + # check bool == 0x1 + assert_array_equal(r.view(np.int8), r.astype(np.int8)) + assert_array_equal(r2.view(np.int8), r2.astype(np.int8)) + assert_array_equal(r3.view(np.int8), r3.astype(np.int8)) + + # isnan on amd64 takes the same code path + assert_array_equal(np.isnan(self.nd[i:]), self.ed[i:]) + assert_array_equal(np.isfinite(self.nd[i:]), ~self.ed[i:]) + assert_array_equal(np.isfinite(self.infd[i:]), ~self.ed[i:]) + assert_array_equal(np.isinf(self.infd[i:]), self.ednonan[i:]) + assert_array_equal(np.signbit(self.signd[i:]), self.ed[i:]) + + +class TestSeterr(object): + def test_default(self): + err = np.geterr() + assert_equal(err, + dict(divide='warn', + invalid='warn', + over='warn', + under='ignore') + ) + + def test_set(self): + with np.errstate(): + err = np.seterr() + old = np.seterr(divide='print') + assert_(err == old) + new = np.seterr() + assert_(new['divide'] == 'print') + np.seterr(over='raise') + assert_(np.geterr()['over'] == 'raise') + assert_(new['divide'] == 'print') + np.seterr(**old) + assert_(np.geterr() == old) + + @pytest.mark.skipif(platform.machine() == "armv5tel", reason="See gh-413.") + def test_divide_err(self): + with np.errstate(divide='raise'): + with assert_raises(FloatingPointError): + np.array([1.]) / np.array([0.]) + + np.seterr(divide='ignore') + np.array([1.]) / np.array([0.]) + + def test_errobj(self): + olderrobj = np.geterrobj() + self.called = 0 + try: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + with np.errstate(divide='warn'): + np.seterrobj([20000, 1, None]) + np.array([1.]) / np.array([0.]) + assert_equal(len(w), 1) + + def log_err(*args): + self.called += 1 + extobj_err = args + assert_(len(extobj_err) == 2) + assert_("divide" in extobj_err[0]) + + with np.errstate(divide='ignore'): + np.seterrobj([20000, 3, log_err]) + np.array([1.]) / np.array([0.]) + assert_equal(self.called, 1) + + np.seterrobj(olderrobj) + with np.errstate(divide='ignore'): + np.divide(1., 0., extobj=[20000, 3, log_err]) + assert_equal(self.called, 2) + finally: + np.seterrobj(olderrobj) + del self.called + + def test_errobj_noerrmask(self): + # errmask = 0 has a special code path for the default + olderrobj = np.geterrobj() + try: + # set errobj to something non default + np.seterrobj([umath.UFUNC_BUFSIZE_DEFAULT, + umath.ERR_DEFAULT + 1, None]) + # call a ufunc + np.isnan(np.array([6])) + # same with the default, lots of times to get rid of possible + # pre-existing stack in the code + for i in range(10000): + np.seterrobj([umath.UFUNC_BUFSIZE_DEFAULT, umath.ERR_DEFAULT, + None]) + np.isnan(np.array([6])) + finally: + np.seterrobj(olderrobj) + + +class TestFloatExceptions(object): + def assert_raises_fpe(self, fpeerr, flop, x, y): + ftype = type(x) + try: + flop(x, y) + assert_(False, + "Type %s did not raise fpe error '%s'." % (ftype, fpeerr)) + except FloatingPointError as exc: + assert_(str(exc).find(fpeerr) >= 0, + "Type %s raised wrong fpe error '%s'." % (ftype, exc)) + + def assert_op_raises_fpe(self, fpeerr, flop, sc1, sc2): + # Check that fpe exception is raised. + # + # Given a floating operation `flop` and two scalar values, check that + # the operation raises the floating point exception specified by + # `fpeerr`. Tests all variants with 0-d array scalars as well. + + self.assert_raises_fpe(fpeerr, flop, sc1, sc2) + self.assert_raises_fpe(fpeerr, flop, sc1[()], sc2) + self.assert_raises_fpe(fpeerr, flop, sc1, sc2[()]) + self.assert_raises_fpe(fpeerr, flop, sc1[()], sc2[()]) + + def test_floating_exceptions(self): + # Test basic arithmetic function errors + with np.errstate(all='raise'): + # Test for all real and complex float types + for typecode in np.typecodes['AllFloat']: + ftype = np.obj2sctype(typecode) + if np.dtype(ftype).kind == 'f': + # Get some extreme values for the type + fi = np.finfo(ftype) + ft_tiny = fi.tiny + ft_max = fi.max + ft_eps = fi.eps + underflow = 'underflow' + divbyzero = 'divide by zero' + else: + # 'c', complex, corresponding real dtype + rtype = type(ftype(0).real) + fi = np.finfo(rtype) + ft_tiny = ftype(fi.tiny) + ft_max = ftype(fi.max) + ft_eps = ftype(fi.eps) + # The complex types raise different exceptions + underflow = '' + divbyzero = '' + overflow = 'overflow' + invalid = 'invalid' + + self.assert_raises_fpe(underflow, + lambda a, b: a/b, ft_tiny, ft_max) + self.assert_raises_fpe(underflow, + lambda a, b: a*b, ft_tiny, ft_tiny) + self.assert_raises_fpe(overflow, + lambda a, b: a*b, ft_max, ftype(2)) + self.assert_raises_fpe(overflow, + lambda a, b: a/b, ft_max, ftype(0.5)) + self.assert_raises_fpe(overflow, + lambda a, b: a+b, ft_max, ft_max*ft_eps) + self.assert_raises_fpe(overflow, + lambda a, b: a-b, -ft_max, ft_max*ft_eps) + self.assert_raises_fpe(overflow, + np.power, ftype(2), ftype(2**fi.nexp)) + self.assert_raises_fpe(divbyzero, + lambda a, b: a/b, ftype(1), ftype(0)) + self.assert_raises_fpe(invalid, + lambda a, b: a/b, ftype(np.inf), ftype(np.inf)) + self.assert_raises_fpe(invalid, + lambda a, b: a/b, ftype(0), ftype(0)) + self.assert_raises_fpe(invalid, + lambda a, b: a-b, ftype(np.inf), ftype(np.inf)) + self.assert_raises_fpe(invalid, + lambda a, b: a+b, ftype(np.inf), ftype(-np.inf)) + self.assert_raises_fpe(invalid, + lambda a, b: a*b, ftype(0), ftype(np.inf)) + + def test_warnings(self): + # test warning code path + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + with np.errstate(all="warn"): + np.divide(1, 0.) + assert_equal(len(w), 1) + assert_("divide by zero" in str(w[0].message)) + np.array(1e300) * np.array(1e300) + assert_equal(len(w), 2) + assert_("overflow" in str(w[-1].message)) + np.array(np.inf) - np.array(np.inf) + assert_equal(len(w), 3) + assert_("invalid value" in str(w[-1].message)) + np.array(1e-300) * np.array(1e-300) + assert_equal(len(w), 4) + assert_("underflow" in str(w[-1].message)) + + +class TestTypes(object): + def check_promotion_cases(self, promote_func): + # tests that the scalars get coerced correctly. + b = np.bool_(0) + i8, i16, i32, i64 = np.int8(0), np.int16(0), np.int32(0), np.int64(0) + u8, u16, u32, u64 = np.uint8(0), np.uint16(0), np.uint32(0), np.uint64(0) + f32, f64, fld = np.float32(0), np.float64(0), np.longdouble(0) + c64, c128, cld = np.complex64(0), np.complex128(0), np.clongdouble(0) + + # coercion within the same kind + assert_equal(promote_func(i8, i16), np.dtype(np.int16)) + assert_equal(promote_func(i32, i8), np.dtype(np.int32)) + assert_equal(promote_func(i16, i64), np.dtype(np.int64)) + assert_equal(promote_func(u8, u32), np.dtype(np.uint32)) + assert_equal(promote_func(f32, f64), np.dtype(np.float64)) + assert_equal(promote_func(fld, f32), np.dtype(np.longdouble)) + assert_equal(promote_func(f64, fld), np.dtype(np.longdouble)) + assert_equal(promote_func(c128, c64), np.dtype(np.complex128)) + assert_equal(promote_func(cld, c128), np.dtype(np.clongdouble)) + assert_equal(promote_func(c64, fld), np.dtype(np.clongdouble)) + + # coercion between kinds + assert_equal(promote_func(b, i32), np.dtype(np.int32)) + assert_equal(promote_func(b, u8), np.dtype(np.uint8)) + assert_equal(promote_func(i8, u8), np.dtype(np.int16)) + assert_equal(promote_func(u8, i32), np.dtype(np.int32)) + assert_equal(promote_func(i64, u32), np.dtype(np.int64)) + assert_equal(promote_func(u64, i32), np.dtype(np.float64)) + assert_equal(promote_func(i32, f32), np.dtype(np.float64)) + assert_equal(promote_func(i64, f32), np.dtype(np.float64)) + assert_equal(promote_func(f32, i16), np.dtype(np.float32)) + assert_equal(promote_func(f32, u32), np.dtype(np.float64)) + assert_equal(promote_func(f32, c64), np.dtype(np.complex64)) + assert_equal(promote_func(c128, f32), np.dtype(np.complex128)) + assert_equal(promote_func(cld, f64), np.dtype(np.clongdouble)) + + # coercion between scalars and 1-D arrays + assert_equal(promote_func(np.array([b]), i8), np.dtype(np.int8)) + assert_equal(promote_func(np.array([b]), u8), np.dtype(np.uint8)) + assert_equal(promote_func(np.array([b]), i32), np.dtype(np.int32)) + assert_equal(promote_func(np.array([b]), u32), np.dtype(np.uint32)) + assert_equal(promote_func(np.array([i8]), i64), np.dtype(np.int8)) + assert_equal(promote_func(u64, np.array([i32])), np.dtype(np.int32)) + assert_equal(promote_func(i64, np.array([u32])), np.dtype(np.uint32)) + assert_equal(promote_func(np.int32(-1), np.array([u64])), + np.dtype(np.float64)) + assert_equal(promote_func(f64, np.array([f32])), np.dtype(np.float32)) + assert_equal(promote_func(fld, np.array([f32])), np.dtype(np.float32)) + assert_equal(promote_func(np.array([f64]), fld), np.dtype(np.float64)) + assert_equal(promote_func(fld, np.array([c64])), + np.dtype(np.complex64)) + assert_equal(promote_func(c64, np.array([f64])), + np.dtype(np.complex128)) + assert_equal(promote_func(np.complex64(3j), np.array([f64])), + np.dtype(np.complex128)) + + # coercion between scalars and 1-D arrays, where + # the scalar has greater kind than the array + assert_equal(promote_func(np.array([b]), f64), np.dtype(np.float64)) + assert_equal(promote_func(np.array([b]), i64), np.dtype(np.int64)) + assert_equal(promote_func(np.array([b]), u64), np.dtype(np.uint64)) + assert_equal(promote_func(np.array([i8]), f64), np.dtype(np.float64)) + assert_equal(promote_func(np.array([u16]), f64), np.dtype(np.float64)) + + # uint and int are treated as the same "kind" for + # the purposes of array-scalar promotion. + assert_equal(promote_func(np.array([u16]), i32), np.dtype(np.uint16)) + + # float and complex are treated as the same "kind" for + # the purposes of array-scalar promotion, so that you can do + # (0j + float32array) to get a complex64 array instead of + # a complex128 array. + assert_equal(promote_func(np.array([f32]), c128), + np.dtype(np.complex64)) + + def test_coercion(self): + def res_type(a, b): + return np.add(a, b).dtype + + self.check_promotion_cases(res_type) + + # Use-case: float/complex scalar * bool/int8 array + # shouldn't narrow the float/complex type + for a in [np.array([True, False]), np.array([-3, 12], dtype=np.int8)]: + b = 1.234 * a + assert_equal(b.dtype, np.dtype('f8'), "array type %s" % a.dtype) + b = np.longdouble(1.234) * a + assert_equal(b.dtype, np.dtype(np.longdouble), + "array type %s" % a.dtype) + b = np.float64(1.234) * a + assert_equal(b.dtype, np.dtype('f8'), "array type %s" % a.dtype) + b = np.float32(1.234) * a + assert_equal(b.dtype, np.dtype('f4'), "array type %s" % a.dtype) + b = np.float16(1.234) * a + assert_equal(b.dtype, np.dtype('f2'), "array type %s" % a.dtype) + + b = 1.234j * a + assert_equal(b.dtype, np.dtype('c16'), "array type %s" % a.dtype) + b = np.clongdouble(1.234j) * a + assert_equal(b.dtype, np.dtype(np.clongdouble), + "array type %s" % a.dtype) + b = np.complex128(1.234j) * a + assert_equal(b.dtype, np.dtype('c16'), "array type %s" % a.dtype) + b = np.complex64(1.234j) * a + assert_equal(b.dtype, np.dtype('c8'), "array type %s" % a.dtype) + + # The following use-case is problematic, and to resolve its + # tricky side-effects requires more changes. + # + # Use-case: (1-t)*a, where 't' is a boolean array and 'a' is + # a float32, shouldn't promote to float64 + # + # a = np.array([1.0, 1.5], dtype=np.float32) + # t = np.array([True, False]) + # b = t*a + # assert_equal(b, [1.0, 0.0]) + # assert_equal(b.dtype, np.dtype('f4')) + # b = (1-t)*a + # assert_equal(b, [0.0, 1.5]) + # assert_equal(b.dtype, np.dtype('f4')) + # + # Probably ~t (bitwise negation) is more proper to use here, + # but this is arguably less intuitive to understand at a glance, and + # would fail if 't' is actually an integer array instead of boolean: + # + # b = (~t)*a + # assert_equal(b, [0.0, 1.5]) + # assert_equal(b.dtype, np.dtype('f4')) + + def test_result_type(self): + self.check_promotion_cases(np.result_type) + assert_(np.result_type(None) == np.dtype(None)) + + def test_promote_types_endian(self): + # promote_types should always return native-endian types + assert_equal(np.promote_types('<i8', '<i8'), np.dtype('i8')) + assert_equal(np.promote_types('>i8', '>i8'), np.dtype('i8')) + + assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U21')) + assert_equal(np.promote_types('<i8', '<U16'), np.dtype('U21')) + assert_equal(np.promote_types('>U16', '>i8'), np.dtype('U21')) + assert_equal(np.promote_types('<U16', '<i8'), np.dtype('U21')) + + assert_equal(np.promote_types('<S5', '<U8'), np.dtype('U8')) + assert_equal(np.promote_types('>S5', '>U8'), np.dtype('U8')) + assert_equal(np.promote_types('<U8', '<S5'), np.dtype('U8')) + assert_equal(np.promote_types('>U8', '>S5'), np.dtype('U8')) + assert_equal(np.promote_types('<U5', '<U8'), np.dtype('U8')) + assert_equal(np.promote_types('>U8', '>U5'), np.dtype('U8')) + + assert_equal(np.promote_types('<M8', '<M8'), np.dtype('M8')) + assert_equal(np.promote_types('>M8', '>M8'), np.dtype('M8')) + assert_equal(np.promote_types('<m8', '<m8'), np.dtype('m8')) + assert_equal(np.promote_types('>m8', '>m8'), np.dtype('m8')) + + def test_promote_types_strings(self): + assert_equal(np.promote_types('bool', 'S'), np.dtype('S5')) + assert_equal(np.promote_types('b', 'S'), np.dtype('S4')) + assert_equal(np.promote_types('u1', 'S'), np.dtype('S3')) + assert_equal(np.promote_types('u2', 'S'), np.dtype('S5')) + assert_equal(np.promote_types('u4', 'S'), np.dtype('S10')) + assert_equal(np.promote_types('u8', 'S'), np.dtype('S20')) + assert_equal(np.promote_types('i1', 'S'), np.dtype('S4')) + assert_equal(np.promote_types('i2', 'S'), np.dtype('S6')) + assert_equal(np.promote_types('i4', 'S'), np.dtype('S11')) + assert_equal(np.promote_types('i8', 'S'), np.dtype('S21')) + assert_equal(np.promote_types('bool', 'U'), np.dtype('U5')) + assert_equal(np.promote_types('b', 'U'), np.dtype('U4')) + assert_equal(np.promote_types('u1', 'U'), np.dtype('U3')) + assert_equal(np.promote_types('u2', 'U'), np.dtype('U5')) + assert_equal(np.promote_types('u4', 'U'), np.dtype('U10')) + assert_equal(np.promote_types('u8', 'U'), np.dtype('U20')) + assert_equal(np.promote_types('i1', 'U'), np.dtype('U4')) + assert_equal(np.promote_types('i2', 'U'), np.dtype('U6')) + assert_equal(np.promote_types('i4', 'U'), np.dtype('U11')) + assert_equal(np.promote_types('i8', 'U'), np.dtype('U21')) + assert_equal(np.promote_types('bool', 'S1'), np.dtype('S5')) + assert_equal(np.promote_types('bool', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('b', 'S1'), np.dtype('S4')) + assert_equal(np.promote_types('b', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u1', 'S1'), np.dtype('S3')) + assert_equal(np.promote_types('u1', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u2', 'S1'), np.dtype('S5')) + assert_equal(np.promote_types('u2', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u4', 'S1'), np.dtype('S10')) + assert_equal(np.promote_types('u4', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u8', 'S1'), np.dtype('S20')) + assert_equal(np.promote_types('u8', 'S30'), np.dtype('S30')) + + def test_can_cast(self): + assert_(np.can_cast(np.int32, np.int64)) + assert_(np.can_cast(np.float64, complex)) + assert_(not np.can_cast(complex, float)) + + assert_(np.can_cast('i8', 'f8')) + assert_(not np.can_cast('i8', 'f4')) + assert_(np.can_cast('i4', 'S11')) + + assert_(np.can_cast('i8', 'i8', 'no')) + assert_(not np.can_cast('<i8', '>i8', 'no')) + + assert_(np.can_cast('<i8', '>i8', 'equiv')) + assert_(not np.can_cast('<i4', '>i8', 'equiv')) + + assert_(np.can_cast('<i4', '>i8', 'safe')) + assert_(not np.can_cast('<i8', '>i4', 'safe')) + + assert_(np.can_cast('<i8', '>i4', 'same_kind')) + assert_(not np.can_cast('<i8', '>u4', 'same_kind')) + + assert_(np.can_cast('<i8', '>u4', 'unsafe')) + + assert_(np.can_cast('bool', 'S5')) + assert_(not np.can_cast('bool', 'S4')) + + assert_(np.can_cast('b', 'S4')) + assert_(not np.can_cast('b', 'S3')) + + assert_(np.can_cast('u1', 'S3')) + assert_(not np.can_cast('u1', 'S2')) + assert_(np.can_cast('u2', 'S5')) + assert_(not np.can_cast('u2', 'S4')) + assert_(np.can_cast('u4', 'S10')) + assert_(not np.can_cast('u4', 'S9')) + assert_(np.can_cast('u8', 'S20')) + assert_(not np.can_cast('u8', 'S19')) + + assert_(np.can_cast('i1', 'S4')) + assert_(not np.can_cast('i1', 'S3')) + assert_(np.can_cast('i2', 'S6')) + assert_(not np.can_cast('i2', 'S5')) + assert_(np.can_cast('i4', 'S11')) + assert_(not np.can_cast('i4', 'S10')) + assert_(np.can_cast('i8', 'S21')) + assert_(not np.can_cast('i8', 'S20')) + + assert_(np.can_cast('bool', 'S5')) + assert_(not np.can_cast('bool', 'S4')) + + assert_(np.can_cast('b', 'U4')) + assert_(not np.can_cast('b', 'U3')) + + assert_(np.can_cast('u1', 'U3')) + assert_(not np.can_cast('u1', 'U2')) + assert_(np.can_cast('u2', 'U5')) + assert_(not np.can_cast('u2', 'U4')) + assert_(np.can_cast('u4', 'U10')) + assert_(not np.can_cast('u4', 'U9')) + assert_(np.can_cast('u8', 'U20')) + assert_(not np.can_cast('u8', 'U19')) + + assert_(np.can_cast('i1', 'U4')) + assert_(not np.can_cast('i1', 'U3')) + assert_(np.can_cast('i2', 'U6')) + assert_(not np.can_cast('i2', 'U5')) + assert_(np.can_cast('i4', 'U11')) + assert_(not np.can_cast('i4', 'U10')) + assert_(np.can_cast('i8', 'U21')) + assert_(not np.can_cast('i8', 'U20')) + + assert_raises(TypeError, np.can_cast, 'i4', None) + assert_raises(TypeError, np.can_cast, None, 'i4') + + # Also test keyword arguments + assert_(np.can_cast(from_=np.int32, to=np.int64)) + + def test_can_cast_simple_to_structured(self): + # Non-structured can only be cast to structured in 'unsafe' mode. + assert_(not np.can_cast('i4', 'i4,i4')) + assert_(not np.can_cast('i4', 'i4,i2')) + assert_(np.can_cast('i4', 'i4,i4', casting='unsafe')) + assert_(np.can_cast('i4', 'i4,i2', casting='unsafe')) + # Even if there is just a single field which is OK. + assert_(not np.can_cast('i2', [('f1', 'i4')])) + assert_(not np.can_cast('i2', [('f1', 'i4')], casting='same_kind')) + assert_(np.can_cast('i2', [('f1', 'i4')], casting='unsafe')) + # It should be the same for recursive structured or subarrays. + assert_(not np.can_cast('i2', [('f1', 'i4,i4')])) + assert_(np.can_cast('i2', [('f1', 'i4,i4')], casting='unsafe')) + assert_(not np.can_cast('i2', [('f1', '(2,3)i4')])) + assert_(np.can_cast('i2', [('f1', '(2,3)i4')], casting='unsafe')) + + def test_can_cast_structured_to_simple(self): + # Need unsafe casting for structured to simple. + assert_(not np.can_cast([('f1', 'i4')], 'i4')) + assert_(np.can_cast([('f1', 'i4')], 'i4', casting='unsafe')) + assert_(np.can_cast([('f1', 'i4')], 'i2', casting='unsafe')) + # Since it is unclear what is being cast, multiple fields to + # single should not work even for unsafe casting. + assert_(not np.can_cast('i4,i4', 'i4', casting='unsafe')) + # But a single field inside a single field is OK. + assert_(not np.can_cast([('f1', [('x', 'i4')])], 'i4')) + assert_(np.can_cast([('f1', [('x', 'i4')])], 'i4', casting='unsafe')) + # And a subarray is fine too - it will just take the first element + # (arguably not very consistently; might also take the first field). + assert_(not np.can_cast([('f0', '(3,)i4')], 'i4')) + assert_(np.can_cast([('f0', '(3,)i4')], 'i4', casting='unsafe')) + # But a structured subarray with multiple fields should fail. + assert_(not np.can_cast([('f0', ('i4,i4'), (2,))], 'i4', + casting='unsafe')) + + def test_can_cast_values(self): + # gh-5917 + for dt in np.sctypes['int'] + np.sctypes['uint']: + ii = np.iinfo(dt) + assert_(np.can_cast(ii.min, dt)) + assert_(np.can_cast(ii.max, dt)) + assert_(not np.can_cast(ii.min - 1, dt)) + assert_(not np.can_cast(ii.max + 1, dt)) + + for dt in np.sctypes['float']: + fi = np.finfo(dt) + assert_(np.can_cast(fi.min, dt)) + assert_(np.can_cast(fi.max, dt)) + + +# Custom exception class to test exception propagation in fromiter +class NIterError(Exception): + pass + + +class TestFromiter(object): + def makegen(self): + for x in range(24): + yield x**2 + + def test_types(self): + ai32 = np.fromiter(self.makegen(), np.int32) + ai64 = np.fromiter(self.makegen(), np.int64) + af = np.fromiter(self.makegen(), float) + assert_(ai32.dtype == np.dtype(np.int32)) + assert_(ai64.dtype == np.dtype(np.int64)) + assert_(af.dtype == np.dtype(float)) + + def test_lengths(self): + expected = np.array(list(self.makegen())) + a = np.fromiter(self.makegen(), int) + a20 = np.fromiter(self.makegen(), int, 20) + assert_(len(a) == len(expected)) + assert_(len(a20) == 20) + assert_raises(ValueError, np.fromiter, + self.makegen(), int, len(expected) + 10) + + def test_values(self): + expected = np.array(list(self.makegen())) + a = np.fromiter(self.makegen(), int) + a20 = np.fromiter(self.makegen(), int, 20) + assert_(np.alltrue(a == expected, axis=0)) + assert_(np.alltrue(a20 == expected[:20], axis=0)) + + def load_data(self, n, eindex): + # Utility method for the issue 2592 tests. + # Raise an exception at the desired index in the iterator. + for e in range(n): + if e == eindex: + raise NIterError('error at index %s' % eindex) + yield e + + def test_2592(self): + # Test iteration exceptions are correctly raised. + count, eindex = 10, 5 + assert_raises(NIterError, np.fromiter, + self.load_data(count, eindex), dtype=int, count=count) + + def test_2592_edge(self): + # Test iter. exceptions, edge case (exception at end of iterator). + count = 10 + eindex = count-1 + assert_raises(NIterError, np.fromiter, + self.load_data(count, eindex), dtype=int, count=count) + + +class TestNonzero(object): + def test_nonzero_trivial(self): + assert_equal(np.count_nonzero(np.array([])), 0) + assert_equal(np.count_nonzero(np.array([], dtype='?')), 0) + assert_equal(np.nonzero(np.array([])), ([],)) + + assert_equal(np.count_nonzero(np.array(0)), 0) + assert_equal(np.count_nonzero(np.array(0, dtype='?')), 0) + assert_equal(np.nonzero(np.array(0)), ([],)) + assert_equal(np.count_nonzero(np.array(1)), 1) + assert_equal(np.count_nonzero(np.array(1, dtype='?')), 1) + assert_equal(np.nonzero(np.array(1)), ([0],)) + + def test_nonzero_onedim(self): + x = np.array([1, 0, 2, -1, 0, 0, 8]) + assert_equal(np.count_nonzero(x), 4) + assert_equal(np.count_nonzero(x), 4) + assert_equal(np.nonzero(x), ([0, 2, 3, 6],)) + + x = np.array([(1, 2), (0, 0), (1, 1), (-1, 3), (0, 7)], + dtype=[('a', 'i4'), ('b', 'i2')]) + assert_equal(np.count_nonzero(x['a']), 3) + assert_equal(np.count_nonzero(x['b']), 4) + assert_equal(np.nonzero(x['a']), ([0, 2, 3],)) + assert_equal(np.nonzero(x['b']), ([0, 2, 3, 4],)) + + def test_nonzero_twodim(self): + x = np.array([[0, 1, 0], [2, 0, 3]]) + assert_equal(np.count_nonzero(x), 3) + assert_equal(np.nonzero(x), ([0, 1, 1], [1, 0, 2])) + + x = np.eye(3) + assert_equal(np.count_nonzero(x), 3) + assert_equal(np.nonzero(x), ([0, 1, 2], [0, 1, 2])) + + x = np.array([[(0, 1), (0, 0), (1, 11)], + [(1, 1), (1, 0), (0, 0)], + [(0, 0), (1, 5), (0, 1)]], dtype=[('a', 'f4'), ('b', 'u1')]) + assert_equal(np.count_nonzero(x['a']), 4) + assert_equal(np.count_nonzero(x['b']), 5) + assert_equal(np.nonzero(x['a']), ([0, 1, 1, 2], [2, 0, 1, 1])) + assert_equal(np.nonzero(x['b']), ([0, 0, 1, 2, 2], [0, 2, 0, 1, 2])) + + assert_(not x['a'].T.flags.aligned) + assert_equal(np.count_nonzero(x['a'].T), 4) + assert_equal(np.count_nonzero(x['b'].T), 5) + assert_equal(np.nonzero(x['a'].T), ([0, 1, 1, 2], [1, 1, 2, 0])) + assert_equal(np.nonzero(x['b'].T), ([0, 0, 1, 2, 2], [0, 1, 2, 0, 2])) + + def test_sparse(self): + # test special sparse condition boolean code path + for i in range(20): + c = np.zeros(200, dtype=bool) + c[i::20] = True + assert_equal(np.nonzero(c)[0], np.arange(i, 200 + i, 20)) + + c = np.zeros(400, dtype=bool) + c[10 + i:20 + i] = True + c[20 + i*2] = True + assert_equal(np.nonzero(c)[0], + np.concatenate((np.arange(10 + i, 20 + i), [20 + i*2]))) + + def test_return_type(self): + class C(np.ndarray): + pass + + for view in (C, np.ndarray): + for nd in range(1, 4): + shape = tuple(range(2, 2+nd)) + x = np.arange(np.prod(shape)).reshape(shape).view(view) + for nzx in (np.nonzero(x), x.nonzero()): + for nzx_i in nzx: + assert_(type(nzx_i) is np.ndarray) + assert_(nzx_i.flags.writeable) + + def test_count_nonzero_axis(self): + # Basic check of functionality + m = np.array([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]]) + + expected = np.array([1, 1, 1, 1, 1]) + assert_equal(np.count_nonzero(m, axis=0), expected) + + expected = np.array([2, 3]) + assert_equal(np.count_nonzero(m, axis=1), expected) + + assert_raises(ValueError, np.count_nonzero, m, axis=(1, 1)) + assert_raises(TypeError, np.count_nonzero, m, axis='foo') + assert_raises(np.AxisError, np.count_nonzero, m, axis=3) + assert_raises(TypeError, np.count_nonzero, + m, axis=np.array([[1], [2]])) + + def test_count_nonzero_axis_all_dtypes(self): + # More thorough test that the axis argument is respected + # for all dtypes and responds correctly when presented with + # either integer or tuple arguments for axis + msg = "Mismatch for dtype: %s" + + def assert_equal_w_dt(a, b, err_msg): + assert_equal(a.dtype, b.dtype, err_msg=err_msg) + assert_equal(a, b, err_msg=err_msg) + + for dt in np.typecodes['All']: + err_msg = msg % (np.dtype(dt).name,) + + if dt != 'V': + if dt != 'M': + m = np.zeros((3, 3), dtype=dt) + n = np.ones(1, dtype=dt) + + m[0, 0] = n[0] + m[1, 0] = n[0] + + else: # np.zeros doesn't work for np.datetime64 + m = np.array(['1970-01-01'] * 9) + m = m.reshape((3, 3)) + + m[0, 0] = '1970-01-12' + m[1, 0] = '1970-01-12' + m = m.astype(dt) + + expected = np.array([2, 0, 0], dtype=np.intp) + assert_equal_w_dt(np.count_nonzero(m, axis=0), + expected, err_msg=err_msg) + + expected = np.array([1, 1, 0], dtype=np.intp) + assert_equal_w_dt(np.count_nonzero(m, axis=1), + expected, err_msg=err_msg) + + expected = np.array(2) + assert_equal(np.count_nonzero(m, axis=(0, 1)), + expected, err_msg=err_msg) + assert_equal(np.count_nonzero(m, axis=None), + expected, err_msg=err_msg) + assert_equal(np.count_nonzero(m), + expected, err_msg=err_msg) + + if dt == 'V': + # There are no 'nonzero' objects for np.void, so the testing + # setup is slightly different for this dtype + m = np.array([np.void(1)] * 6).reshape((2, 3)) + + expected = np.array([0, 0, 0], dtype=np.intp) + assert_equal_w_dt(np.count_nonzero(m, axis=0), + expected, err_msg=err_msg) + + expected = np.array([0, 0], dtype=np.intp) + assert_equal_w_dt(np.count_nonzero(m, axis=1), + expected, err_msg=err_msg) + + expected = np.array(0) + assert_equal(np.count_nonzero(m, axis=(0, 1)), + expected, err_msg=err_msg) + assert_equal(np.count_nonzero(m, axis=None), + expected, err_msg=err_msg) + assert_equal(np.count_nonzero(m), + expected, err_msg=err_msg) + + def test_count_nonzero_axis_consistent(self): + # Check that the axis behaviour for valid axes in + # non-special cases is consistent (and therefore + # correct) by checking it against an integer array + # that is then casted to the generic object dtype + from itertools import combinations, permutations + + axis = (0, 1, 2, 3) + size = (5, 5, 5, 5) + msg = "Mismatch for axis: %s" + + rng = np.random.RandomState(1234) + m = rng.randint(-100, 100, size=size) + n = m.astype(object) + + for length in range(len(axis)): + for combo in combinations(axis, length): + for perm in permutations(combo): + assert_equal( + np.count_nonzero(m, axis=perm), + np.count_nonzero(n, axis=perm), + err_msg=msg % (perm,)) + + def test_countnonzero_axis_empty(self): + a = np.array([[0, 0, 1], [1, 0, 1]]) + assert_equal(np.count_nonzero(a, axis=()), a.astype(bool)) + + def test_array_method(self): + # Tests that the array method + # call to nonzero works + m = np.array([[1, 0, 0], [4, 0, 6]]) + tgt = [[0, 1, 1], [0, 0, 2]] + + assert_equal(m.nonzero(), tgt) + + def test_nonzero_invalid_object(self): + # gh-9295 + a = np.array([np.array([1, 2]), 3]) + assert_raises(ValueError, np.nonzero, a) + + class BoolErrors: + def __bool__(self): + raise ValueError("Not allowed") + def __nonzero__(self): + raise ValueError("Not allowed") + + assert_raises(ValueError, np.nonzero, np.array([BoolErrors()])) + + +class TestIndex(object): + def test_boolean(self): + a = rand(3, 5, 8) + V = rand(5, 8) + g1 = randint(0, 5, size=15) + g2 = randint(0, 8, size=15) + V[g1, g2] = -V[g1, g2] + assert_((np.array([a[0][V > 0], a[1][V > 0], a[2][V > 0]]) == a[:, V > 0]).all()) + + def test_boolean_edgecase(self): + a = np.array([], dtype='int32') + b = np.array([], dtype='bool') + c = a[b] + assert_equal(c, []) + assert_equal(c.dtype, np.dtype('int32')) + + +class TestBinaryRepr(object): + def test_zero(self): + assert_equal(np.binary_repr(0), '0') + + def test_positive(self): + assert_equal(np.binary_repr(10), '1010') + assert_equal(np.binary_repr(12522), + '11000011101010') + assert_equal(np.binary_repr(10736848), + '101000111101010011010000') + + def test_negative(self): + assert_equal(np.binary_repr(-1), '-1') + assert_equal(np.binary_repr(-10), '-1010') + assert_equal(np.binary_repr(-12522), + '-11000011101010') + assert_equal(np.binary_repr(-10736848), + '-101000111101010011010000') + + def test_sufficient_width(self): + assert_equal(np.binary_repr(0, width=5), '00000') + assert_equal(np.binary_repr(10, width=7), '0001010') + assert_equal(np.binary_repr(-5, width=7), '1111011') + + def test_neg_width_boundaries(self): + # see gh-8670 + + # Ensure that the example in the issue does not + # break before proceeding to a more thorough test. + assert_equal(np.binary_repr(-128, width=8), '10000000') + + for width in range(1, 11): + num = -2**(width - 1) + exp = '1' + (width - 1) * '0' + assert_equal(np.binary_repr(num, width=width), exp) + + +class TestBaseRepr(object): + def test_base3(self): + assert_equal(np.base_repr(3**5, 3), '100000') + + def test_positive(self): + assert_equal(np.base_repr(12, 10), '12') + assert_equal(np.base_repr(12, 10, 4), '000012') + assert_equal(np.base_repr(12, 4), '30') + assert_equal(np.base_repr(3731624803700888, 36), '10QR0ROFCEW') + + def test_negative(self): + assert_equal(np.base_repr(-12, 10), '-12') + assert_equal(np.base_repr(-12, 10, 4), '-000012') + assert_equal(np.base_repr(-12, 4), '-30') + + def test_base_range(self): + with assert_raises(ValueError): + np.base_repr(1, 1) + with assert_raises(ValueError): + np.base_repr(1, 37) + + +class TestArrayComparisons(object): + def test_array_equal(self): + res = np.array_equal(np.array([1, 2]), np.array([1, 2])) + assert_(res) + assert_(type(res) is bool) + res = np.array_equal(np.array([1, 2]), np.array([1, 2, 3])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equal(np.array([1, 2]), np.array([3, 4])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equal(np.array([1, 2]), np.array([1, 3])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equal(np.array(['a'], dtype='S1'), np.array(['a'], dtype='S1')) + assert_(res) + assert_(type(res) is bool) + res = np.array_equal(np.array([('a', 1)], dtype='S1,u4'), + np.array([('a', 1)], dtype='S1,u4')) + assert_(res) + assert_(type(res) is bool) + + def test_none_compares_elementwise(self): + a = np.array([None, 1, None], dtype=object) + assert_equal(a == None, [True, False, True]) + assert_equal(a != None, [False, True, False]) + + a = np.ones(3) + assert_equal(a == None, [False, False, False]) + assert_equal(a != None, [True, True, True]) + + def test_array_equiv(self): + res = np.array_equiv(np.array([1, 2]), np.array([1, 2])) + assert_(res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([1, 2, 3])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([3, 4])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([1, 3])) + assert_(not res) + assert_(type(res) is bool) + + res = np.array_equiv(np.array([1, 1]), np.array([1])) + assert_(res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 1]), np.array([[1], [1]])) + assert_(res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([2])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([[1], [2]])) + assert_(not res) + assert_(type(res) is bool) + res = np.array_equiv(np.array([1, 2]), np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])) + assert_(not res) + assert_(type(res) is bool) + + +def assert_array_strict_equal(x, y): + assert_array_equal(x, y) + # Check flags, 32 bit arches typically don't provide 16 byte alignment + if ((x.dtype.alignment <= 8 or + np.intp().dtype.itemsize != 4) and + sys.platform != 'win32'): + assert_(x.flags == y.flags) + else: + assert_(x.flags.owndata == y.flags.owndata) + assert_(x.flags.writeable == y.flags.writeable) + assert_(x.flags.c_contiguous == y.flags.c_contiguous) + assert_(x.flags.f_contiguous == y.flags.f_contiguous) + assert_(x.flags.writebackifcopy == y.flags.writebackifcopy) + # check endianness + assert_(x.dtype.isnative == y.dtype.isnative) + + +class TestClip(object): + def setup(self): + self.nr = 5 + self.nc = 3 + + def fastclip(self, a, m, M, out=None): + if out is None: + return a.clip(m, M) + else: + return a.clip(m, M, out) + + def clip(self, a, m, M, out=None): + # use slow-clip + selector = np.less(a, m) + 2*np.greater(a, M) + return selector.choose((a, m, M), out=out) + + # Handy functions + def _generate_data(self, n, m): + return randn(n, m) + + def _generate_data_complex(self, n, m): + return randn(n, m) + 1.j * rand(n, m) + + def _generate_flt_data(self, n, m): + return (randn(n, m)).astype(np.float32) + + def _neg_byteorder(self, a): + a = np.asarray(a) + if sys.byteorder == 'little': + a = a.astype(a.dtype.newbyteorder('>')) + else: + a = a.astype(a.dtype.newbyteorder('<')) + return a + + def _generate_non_native_data(self, n, m): + data = randn(n, m) + data = self._neg_byteorder(data) + assert_(not data.dtype.isnative) + return data + + def _generate_int_data(self, n, m): + return (10 * rand(n, m)).astype(np.int64) + + def _generate_int32_data(self, n, m): + return (10 * rand(n, m)).astype(np.int32) + + # Now the real test cases + def test_simple_double(self): + # Test native double input with scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = 0.1 + M = 0.6 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_simple_int(self): + # Test native int input with scalar min/max. + a = self._generate_int_data(self.nr, self.nc) + a = a.astype(int) + m = -2 + M = 4 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_array_double(self): + # Test native double input with array min/max. + a = self._generate_data(self.nr, self.nc) + m = np.zeros(a.shape) + M = m + 0.5 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_simple_nonnative(self): + # Test non native double input with scalar min/max. + # Test native double input with non native double scalar min/max. + a = self._generate_non_native_data(self.nr, self.nc) + m = -0.5 + M = 0.6 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_equal(ac, act) + + # Test native double input with non native double scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 + M = self._neg_byteorder(0.6) + assert_(not M.dtype.isnative) + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_equal(ac, act) + + def test_simple_complex(self): + # Test native complex input with native double scalar min/max. + # Test native input with complex double scalar min/max. + a = 3 * self._generate_data_complex(self.nr, self.nc) + m = -0.5 + M = 1. + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + # Test native input with complex double scalar min/max. + a = 3 * self._generate_data(self.nr, self.nc) + m = -0.5 + 1.j + M = 1. + 2.j + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_clip_complex(self): + # Address Issue gh-5354 for clipping complex arrays + # Test native complex input without explicit min/max + # ie, either min=None or max=None + a = np.ones(10, dtype=complex) + m = a.min() + M = a.max() + am = self.fastclip(a, m, None) + aM = self.fastclip(a, None, M) + assert_array_strict_equal(am, a) + assert_array_strict_equal(aM, a) + + def test_clip_non_contig(self): + # Test clip for non contiguous native input and native scalar min/max. + a = self._generate_data(self.nr * 2, self.nc * 3) + a = a[::2, ::3] + assert_(not a.flags['F_CONTIGUOUS']) + assert_(not a.flags['C_CONTIGUOUS']) + ac = self.fastclip(a, -1.6, 1.7) + act = self.clip(a, -1.6, 1.7) + assert_array_strict_equal(ac, act) + + def test_simple_out(self): + # Test native double input with scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 + M = 0.6 + ac = np.zeros(a.shape) + act = np.zeros(a.shape) + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_simple_int32_inout(self): + # Test native int32 input with double min/max and int32 out. + a = self._generate_int32_data(self.nr, self.nc) + m = np.float64(0) + M = np.float64(2) + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_simple_int64_out(self): + # Test native int32 input with int32 scalar min/max and int64 out. + a = self._generate_int32_data(self.nr, self.nc) + m = np.int32(-1) + M = np.int32(1) + ac = np.zeros(a.shape, dtype=np.int64) + act = ac.copy() + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_simple_int64_inout(self): + # Test native int32 input with double array min/max and int32 out. + a = self._generate_int32_data(self.nr, self.nc) + m = np.zeros(a.shape, np.float64) + M = np.float64(1) + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_simple_int32_out(self): + # Test native double input with scalar min/max and int out. + a = self._generate_data(self.nr, self.nc) + m = -1.0 + M = 2.0 + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_simple_inplace_01(self): + # Test native double input with array min/max in-place. + a = self._generate_data(self.nr, self.nc) + ac = a.copy() + m = np.zeros(a.shape) + M = 1.0 + self.fastclip(a, m, M, a) + self.clip(a, m, M, ac) + assert_array_strict_equal(a, ac) + + def test_simple_inplace_02(self): + # Test native double input with scalar min/max in-place. + a = self._generate_data(self.nr, self.nc) + ac = a.copy() + m = -0.5 + M = 0.6 + self.fastclip(a, m, M, a) + self.clip(ac, m, M, ac) + assert_array_strict_equal(a, ac) + + def test_noncontig_inplace(self): + # Test non contiguous double input with double scalar min/max in-place. + a = self._generate_data(self.nr * 2, self.nc * 3) + a = a[::2, ::3] + assert_(not a.flags['F_CONTIGUOUS']) + assert_(not a.flags['C_CONTIGUOUS']) + ac = a.copy() + m = -0.5 + M = 0.6 + self.fastclip(a, m, M, a) + self.clip(ac, m, M, ac) + assert_array_equal(a, ac) + + def test_type_cast_01(self): + # Test native double input with scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 + M = 0.6 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_02(self): + # Test native int32 input with int32 scalar min/max. + a = self._generate_int_data(self.nr, self.nc) + a = a.astype(np.int32) + m = -2 + M = 4 + ac = self.fastclip(a, m, M) + act = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_03(self): + # Test native int32 input with float64 scalar min/max. + a = self._generate_int32_data(self.nr, self.nc) + m = -2 + M = 4 + ac = self.fastclip(a, np.float64(m), np.float64(M)) + act = self.clip(a, np.float64(m), np.float64(M)) + assert_array_strict_equal(ac, act) + + def test_type_cast_04(self): + # Test native int32 input with float32 scalar min/max. + a = self._generate_int32_data(self.nr, self.nc) + m = np.float32(-2) + M = np.float32(4) + act = self.fastclip(a, m, M) + ac = self.clip(a, m, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_05(self): + # Test native int32 with double arrays min/max. + a = self._generate_int_data(self.nr, self.nc) + m = -0.5 + M = 1. + ac = self.fastclip(a, m * np.zeros(a.shape), M) + act = self.clip(a, m * np.zeros(a.shape), M) + assert_array_strict_equal(ac, act) + + def test_type_cast_06(self): + # Test native with NON native scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = 0.5 + m_s = self._neg_byteorder(m) + M = 1. + act = self.clip(a, m_s, M) + ac = self.fastclip(a, m_s, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_07(self): + # Test NON native with native array min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 * np.ones(a.shape) + M = 1. + a_s = self._neg_byteorder(a) + assert_(not a_s.dtype.isnative) + act = a_s.clip(m, M) + ac = self.fastclip(a_s, m, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_08(self): + # Test NON native with native scalar min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 + M = 1. + a_s = self._neg_byteorder(a) + assert_(not a_s.dtype.isnative) + ac = self.fastclip(a_s, m, M) + act = a_s.clip(m, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_09(self): + # Test native with NON native array min/max. + a = self._generate_data(self.nr, self.nc) + m = -0.5 * np.ones(a.shape) + M = 1. + m_s = self._neg_byteorder(m) + assert_(not m_s.dtype.isnative) + ac = self.fastclip(a, m_s, M) + act = self.clip(a, m_s, M) + assert_array_strict_equal(ac, act) + + def test_type_cast_10(self): + # Test native int32 with float min/max and float out for output argument. + a = self._generate_int_data(self.nr, self.nc) + b = np.zeros(a.shape, dtype=np.float32) + m = np.float32(-0.5) + M = np.float32(1) + act = self.clip(a, m, M, out=b) + ac = self.fastclip(a, m, M, out=b) + assert_array_strict_equal(ac, act) + + def test_type_cast_11(self): + # Test non native with native scalar, min/max, out non native + a = self._generate_non_native_data(self.nr, self.nc) + b = a.copy() + b = b.astype(b.dtype.newbyteorder('>')) + bt = b.copy() + m = -0.5 + M = 1. + self.fastclip(a, m, M, out=b) + self.clip(a, m, M, out=bt) + assert_array_strict_equal(b, bt) + + def test_type_cast_12(self): + # Test native int32 input and min/max and float out + a = self._generate_int_data(self.nr, self.nc) + b = np.zeros(a.shape, dtype=np.float32) + m = np.int32(0) + M = np.int32(1) + act = self.clip(a, m, M, out=b) + ac = self.fastclip(a, m, M, out=b) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_simple(self): + # Test native double input with scalar min/max + a = self._generate_data(self.nr, self.nc) + m = -0.5 + M = 0.6 + ac = np.zeros(a.shape) + act = np.zeros(a.shape) + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_simple2(self): + # Test native int32 input with double min/max and int32 out + a = self._generate_int32_data(self.nr, self.nc) + m = np.float64(0) + M = np.float64(2) + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_simple_int32(self): + # Test native int32 input with int32 scalar min/max and int64 out + a = self._generate_int32_data(self.nr, self.nc) + m = np.int32(-1) + M = np.int32(1) + ac = np.zeros(a.shape, dtype=np.int64) + act = ac.copy() + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_array_int32(self): + # Test native int32 input with double array min/max and int32 out + a = self._generate_int32_data(self.nr, self.nc) + m = np.zeros(a.shape, np.float64) + M = np.float64(1) + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_array_outint32(self): + # Test native double input with scalar min/max and int out + a = self._generate_data(self.nr, self.nc) + m = -1.0 + M = 2.0 + ac = np.zeros(a.shape, dtype=np.int32) + act = ac.copy() + self.fastclip(a, m, M, ac) + self.clip(a, m, M, act) + assert_array_strict_equal(ac, act) + + def test_clip_with_out_transposed(self): + # Test that the out argument works when tranposed + a = np.arange(16).reshape(4, 4) + out = np.empty_like(a).T + a.clip(4, 10, out=out) + expected = self.clip(a, 4, 10) + assert_array_equal(out, expected) + + def test_clip_with_out_memory_overlap(self): + # Test that the out argument works when it has memory overlap + a = np.arange(16).reshape(4, 4) + ac = a.copy() + a[:-1].clip(4, 10, out=a[1:]) + expected = self.clip(ac[:-1], 4, 10) + assert_array_equal(a[1:], expected) + + def test_clip_inplace_array(self): + # Test native double input with array min/max + a = self._generate_data(self.nr, self.nc) + ac = a.copy() + m = np.zeros(a.shape) + M = 1.0 + self.fastclip(a, m, M, a) + self.clip(a, m, M, ac) + assert_array_strict_equal(a, ac) + + def test_clip_inplace_simple(self): + # Test native double input with scalar min/max + a = self._generate_data(self.nr, self.nc) + ac = a.copy() + m = -0.5 + M = 0.6 + self.fastclip(a, m, M, a) + self.clip(a, m, M, ac) + assert_array_strict_equal(a, ac) + + def test_clip_func_takes_out(self): + # Ensure that the clip() function takes an out=argument. + a = self._generate_data(self.nr, self.nc) + ac = a.copy() + m = -0.5 + M = 0.6 + a2 = np.clip(a, m, M, out=a) + self.clip(a, m, M, ac) + assert_array_strict_equal(a2, ac) + assert_(a2 is a) + + def test_clip_nan(self): + d = np.arange(7.) + assert_equal(d.clip(min=np.nan), d) + assert_equal(d.clip(max=np.nan), d) + assert_equal(d.clip(min=np.nan, max=np.nan), d) + assert_equal(d.clip(min=-2, max=np.nan), d) + assert_equal(d.clip(min=np.nan, max=10), d) + + +class TestAllclose(object): + rtol = 1e-5 + atol = 1e-8 + + def setup(self): + self.olderr = np.seterr(invalid='ignore') + + def teardown(self): + np.seterr(**self.olderr) + + def tst_allclose(self, x, y): + assert_(np.allclose(x, y), "%s and %s not close" % (x, y)) + + def tst_not_allclose(self, x, y): + assert_(not np.allclose(x, y), "%s and %s shouldn't be close" % (x, y)) + + def test_ip_allclose(self): + # Parametric test factory. + arr = np.array([100, 1000]) + aran = np.arange(125).reshape((5, 5, 5)) + + atol = self.atol + rtol = self.rtol + + data = [([1, 0], [1, 0]), + ([atol], [0]), + ([1], [1+rtol+atol]), + (arr, arr + arr*rtol), + (arr, arr + arr*rtol + atol*2), + (aran, aran + aran*rtol), + (np.inf, np.inf), + (np.inf, [np.inf])] + + for (x, y) in data: + self.tst_allclose(x, y) + + def test_ip_not_allclose(self): + # Parametric test factory. + aran = np.arange(125).reshape((5, 5, 5)) + + atol = self.atol + rtol = self.rtol + + data = [([np.inf, 0], [1, np.inf]), + ([np.inf, 0], [1, 0]), + ([np.inf, np.inf], [1, np.inf]), + ([np.inf, np.inf], [1, 0]), + ([-np.inf, 0], [np.inf, 0]), + ([np.nan, 0], [np.nan, 0]), + ([atol*2], [0]), + ([1], [1+rtol+atol*2]), + (aran, aran + aran*atol + atol*2), + (np.array([np.inf, 1]), np.array([0, np.inf]))] + + for (x, y) in data: + self.tst_not_allclose(x, y) + + def test_no_parameter_modification(self): + x = np.array([np.inf, 1]) + y = np.array([0, np.inf]) + np.allclose(x, y) + assert_array_equal(x, np.array([np.inf, 1])) + assert_array_equal(y, np.array([0, np.inf])) + + def test_min_int(self): + # Could make problems because of abs(min_int) == min_int + min_int = np.iinfo(np.int_).min + a = np.array([min_int], dtype=np.int_) + assert_(np.allclose(a, a)) + + def test_equalnan(self): + x = np.array([1.0, np.nan]) + assert_(np.allclose(x, x, equal_nan=True)) + + def test_return_class_is_ndarray(self): + # Issue gh-6475 + # Check that allclose does not preserve subtypes + class Foo(np.ndarray): + def __new__(cls, *args, **kwargs): + return np.array(*args, **kwargs).view(cls) + + a = Foo([1]) + assert_(type(np.allclose(a, a)) is bool) + + +class TestIsclose(object): + rtol = 1e-5 + atol = 1e-8 + + def setup(self): + atol = self.atol + rtol = self.rtol + arr = np.array([100, 1000]) + aran = np.arange(125).reshape((5, 5, 5)) + + self.all_close_tests = [ + ([1, 0], [1, 0]), + ([atol], [0]), + ([1], [1 + rtol + atol]), + (arr, arr + arr*rtol), + (arr, arr + arr*rtol + atol), + (aran, aran + aran*rtol), + (np.inf, np.inf), + (np.inf, [np.inf]), + ([np.inf, -np.inf], [np.inf, -np.inf]), + ] + self.none_close_tests = [ + ([np.inf, 0], [1, np.inf]), + ([np.inf, -np.inf], [1, 0]), + ([np.inf, np.inf], [1, -np.inf]), + ([np.inf, np.inf], [1, 0]), + ([np.nan, 0], [np.nan, -np.inf]), + ([atol*2], [0]), + ([1], [1 + rtol + atol*2]), + (aran, aran + rtol*1.1*aran + atol*1.1), + (np.array([np.inf, 1]), np.array([0, np.inf])), + ] + self.some_close_tests = [ + ([np.inf, 0], [np.inf, atol*2]), + ([atol, 1, 1e6*(1 + 2*rtol) + atol], [0, np.nan, 1e6]), + (np.arange(3), [0, 1, 2.1]), + (np.nan, [np.nan, np.nan, np.nan]), + ([0], [atol, np.inf, -np.inf, np.nan]), + (0, [atol, np.inf, -np.inf, np.nan]), + ] + self.some_close_results = [ + [True, False], + [True, False, False], + [True, True, False], + [False, False, False], + [True, False, False, False], + [True, False, False, False], + ] + + def test_ip_isclose(self): + self.setup() + tests = self.some_close_tests + results = self.some_close_results + for (x, y), result in zip(tests, results): + assert_array_equal(np.isclose(x, y), result) + + def tst_all_isclose(self, x, y): + assert_(np.all(np.isclose(x, y)), "%s and %s not close" % (x, y)) + + def tst_none_isclose(self, x, y): + msg = "%s and %s shouldn't be close" + assert_(not np.any(np.isclose(x, y)), msg % (x, y)) + + def tst_isclose_allclose(self, x, y): + msg = "isclose.all() and allclose aren't same for %s and %s" + msg2 = "isclose and allclose aren't same for %s and %s" + if np.isscalar(x) and np.isscalar(y): + assert_(np.isclose(x, y) == np.allclose(x, y), msg=msg2 % (x, y)) + else: + assert_array_equal(np.isclose(x, y).all(), np.allclose(x, y), msg % (x, y)) + + def test_ip_all_isclose(self): + self.setup() + for (x, y) in self.all_close_tests: + self.tst_all_isclose(x, y) + + def test_ip_none_isclose(self): + self.setup() + for (x, y) in self.none_close_tests: + self.tst_none_isclose(x, y) + + def test_ip_isclose_allclose(self): + self.setup() + tests = (self.all_close_tests + self.none_close_tests + + self.some_close_tests) + for (x, y) in tests: + self.tst_isclose_allclose(x, y) + + def test_equal_nan(self): + assert_array_equal(np.isclose(np.nan, np.nan, equal_nan=True), [True]) + arr = np.array([1.0, np.nan]) + assert_array_equal(np.isclose(arr, arr, equal_nan=True), [True, True]) + + def test_masked_arrays(self): + # Make sure to test the output type when arguments are interchanged. + + x = np.ma.masked_where([True, True, False], np.arange(3)) + assert_(type(x) is type(np.isclose(2, x))) + assert_(type(x) is type(np.isclose(x, 2))) + + x = np.ma.masked_where([True, True, False], [np.nan, np.inf, np.nan]) + assert_(type(x) is type(np.isclose(np.inf, x))) + assert_(type(x) is type(np.isclose(x, np.inf))) + + x = np.ma.masked_where([True, True, False], [np.nan, np.nan, np.nan]) + y = np.isclose(np.nan, x, equal_nan=True) + assert_(type(x) is type(y)) + # Ensure that the mask isn't modified... + assert_array_equal([True, True, False], y.mask) + y = np.isclose(x, np.nan, equal_nan=True) + assert_(type(x) is type(y)) + # Ensure that the mask isn't modified... + assert_array_equal([True, True, False], y.mask) + + x = np.ma.masked_where([True, True, False], [np.nan, np.nan, np.nan]) + y = np.isclose(x, x, equal_nan=True) + assert_(type(x) is type(y)) + # Ensure that the mask isn't modified... + assert_array_equal([True, True, False], y.mask) + + def test_scalar_return(self): + assert_(np.isscalar(np.isclose(1, 1))) + + def test_no_parameter_modification(self): + x = np.array([np.inf, 1]) + y = np.array([0, np.inf]) + np.isclose(x, y) + assert_array_equal(x, np.array([np.inf, 1])) + assert_array_equal(y, np.array([0, np.inf])) + + def test_non_finite_scalar(self): + # GH7014, when two scalars are compared the output should also be a + # scalar + assert_(np.isclose(np.inf, -np.inf) is np.False_) + assert_(np.isclose(0, np.inf) is np.False_) + assert_(type(np.isclose(0, np.inf)) is np.bool_) + + +class TestStdVar(object): + def setup(self): + self.A = np.array([1, -1, 1, -1]) + self.real_var = 1 + + def test_basic(self): + assert_almost_equal(np.var(self.A), self.real_var) + assert_almost_equal(np.std(self.A)**2, self.real_var) + + def test_scalars(self): + assert_equal(np.var(1), 0) + assert_equal(np.std(1), 0) + + def test_ddof1(self): + assert_almost_equal(np.var(self.A, ddof=1), + self.real_var*len(self.A)/float(len(self.A)-1)) + assert_almost_equal(np.std(self.A, ddof=1)**2, + self.real_var*len(self.A)/float(len(self.A)-1)) + + def test_ddof2(self): + assert_almost_equal(np.var(self.A, ddof=2), + self.real_var*len(self.A)/float(len(self.A)-2)) + assert_almost_equal(np.std(self.A, ddof=2)**2, + self.real_var*len(self.A)/float(len(self.A)-2)) + + def test_out_scalar(self): + d = np.arange(10) + out = np.array(0.) + r = np.std(d, out=out) + assert_(r is out) + assert_array_equal(r, out) + r = np.var(d, out=out) + assert_(r is out) + assert_array_equal(r, out) + r = np.mean(d, out=out) + assert_(r is out) + assert_array_equal(r, out) + + +class TestStdVarComplex(object): + def test_basic(self): + A = np.array([1, 1.j, -1, -1.j]) + real_var = 1 + assert_almost_equal(np.var(A), real_var) + assert_almost_equal(np.std(A)**2, real_var) + + def test_scalars(self): + assert_equal(np.var(1j), 0) + assert_equal(np.std(1j), 0) + + +class TestCreationFuncs(object): + # Test ones, zeros, empty and full. + + def setup(self): + dtypes = {np.dtype(tp) for tp in itertools.chain(*np.sctypes.values())} + # void, bytes, str + variable_sized = {tp for tp in dtypes if tp.str.endswith('0')} + self.dtypes = sorted(dtypes - variable_sized | + {np.dtype(tp.str.replace("0", str(i))) + for tp in variable_sized for i in range(1, 10)}, + key=lambda dtype: dtype.str) + self.orders = {'C': 'c_contiguous', 'F': 'f_contiguous'} + self.ndims = 10 + + def check_function(self, func, fill_value=None): + par = ((0, 1, 2), + range(self.ndims), + self.orders, + self.dtypes) + fill_kwarg = {} + if fill_value is not None: + fill_kwarg = {'fill_value': fill_value} + + for size, ndims, order, dtype in itertools.product(*par): + shape = ndims * [size] + + # do not fill void type + if fill_kwarg and dtype.str.startswith('|V'): + continue + + arr = func(shape, order=order, dtype=dtype, + **fill_kwarg) + + assert_equal(arr.dtype, dtype) + assert_(getattr(arr.flags, self.orders[order])) + + if fill_value is not None: + if dtype.str.startswith('|S'): + val = str(fill_value) + else: + val = fill_value + assert_equal(arr, dtype.type(val)) + + def test_zeros(self): + self.check_function(np.zeros) + + def test_ones(self): + self.check_function(np.zeros) + + def test_empty(self): + self.check_function(np.empty) + + def test_full(self): + self.check_function(np.full, 0) + self.check_function(np.full, 1) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_for_reference_leak(self): + # Make sure we have an object for reference + dim = 1 + beg = sys.getrefcount(dim) + np.zeros([dim]*10) + assert_(sys.getrefcount(dim) == beg) + np.ones([dim]*10) + assert_(sys.getrefcount(dim) == beg) + np.empty([dim]*10) + assert_(sys.getrefcount(dim) == beg) + np.full([dim]*10, 0) + assert_(sys.getrefcount(dim) == beg) + + +class TestLikeFuncs(object): + '''Test ones_like, zeros_like, empty_like and full_like''' + + def setup(self): + self.data = [ + # Array scalars + (np.array(3.), None), + (np.array(3), 'f8'), + # 1D arrays + (np.arange(6, dtype='f4'), None), + (np.arange(6), 'c16'), + # 2D C-layout arrays + (np.arange(6).reshape(2, 3), None), + (np.arange(6).reshape(3, 2), 'i1'), + # 2D F-layout arrays + (np.arange(6).reshape((2, 3), order='F'), None), + (np.arange(6).reshape((3, 2), order='F'), 'i1'), + # 3D C-layout arrays + (np.arange(24).reshape(2, 3, 4), None), + (np.arange(24).reshape(4, 3, 2), 'f4'), + # 3D F-layout arrays + (np.arange(24).reshape((2, 3, 4), order='F'), None), + (np.arange(24).reshape((4, 3, 2), order='F'), 'f4'), + # 3D non-C/F-layout arrays + (np.arange(24).reshape(2, 3, 4).swapaxes(0, 1), None), + (np.arange(24).reshape(4, 3, 2).swapaxes(0, 1), '?'), + ] + + def compare_array_value(self, dz, value, fill_value): + if value is not None: + if fill_value: + try: + z = dz.dtype.type(value) + except OverflowError: + pass + else: + assert_(np.all(dz == z)) + else: + assert_(np.all(dz == value)) + + def check_like_function(self, like_function, value, fill_value=False): + if fill_value: + fill_kwarg = {'fill_value': value} + else: + fill_kwarg = {} + for d, dtype in self.data: + # default (K) order, dtype + dz = like_function(d, dtype=dtype, **fill_kwarg) + assert_equal(dz.shape, d.shape) + assert_equal(np.array(dz.strides)*d.dtype.itemsize, + np.array(d.strides)*dz.dtype.itemsize) + assert_equal(d.flags.c_contiguous, dz.flags.c_contiguous) + assert_equal(d.flags.f_contiguous, dz.flags.f_contiguous) + if dtype is None: + assert_equal(dz.dtype, d.dtype) + else: + assert_equal(dz.dtype, np.dtype(dtype)) + self.compare_array_value(dz, value, fill_value) + + # C order, default dtype + dz = like_function(d, order='C', dtype=dtype, **fill_kwarg) + assert_equal(dz.shape, d.shape) + assert_(dz.flags.c_contiguous) + if dtype is None: + assert_equal(dz.dtype, d.dtype) + else: + assert_equal(dz.dtype, np.dtype(dtype)) + self.compare_array_value(dz, value, fill_value) + + # F order, default dtype + dz = like_function(d, order='F', dtype=dtype, **fill_kwarg) + assert_equal(dz.shape, d.shape) + assert_(dz.flags.f_contiguous) + if dtype is None: + assert_equal(dz.dtype, d.dtype) + else: + assert_equal(dz.dtype, np.dtype(dtype)) + self.compare_array_value(dz, value, fill_value) + + # A order + dz = like_function(d, order='A', dtype=dtype, **fill_kwarg) + assert_equal(dz.shape, d.shape) + if d.flags.f_contiguous: + assert_(dz.flags.f_contiguous) + else: + assert_(dz.flags.c_contiguous) + if dtype is None: + assert_equal(dz.dtype, d.dtype) + else: + assert_equal(dz.dtype, np.dtype(dtype)) + self.compare_array_value(dz, value, fill_value) + + # Test the 'subok' parameter + class MyNDArray(np.ndarray): + pass + + a = np.array([[1, 2], [3, 4]]).view(MyNDArray) + + b = like_function(a, **fill_kwarg) + assert_(type(b) is MyNDArray) + + b = like_function(a, subok=False, **fill_kwarg) + assert_(type(b) is not MyNDArray) + + def test_ones_like(self): + self.check_like_function(np.ones_like, 1) + + def test_zeros_like(self): + self.check_like_function(np.zeros_like, 0) + + def test_empty_like(self): + self.check_like_function(np.empty_like, None) + + def test_filled_like(self): + self.check_like_function(np.full_like, 0, True) + self.check_like_function(np.full_like, 1, True) + self.check_like_function(np.full_like, 1000, True) + self.check_like_function(np.full_like, 123.456, True) + self.check_like_function(np.full_like, np.inf, True) + + +class TestCorrelate(object): + def _setup(self, dt): + self.x = np.array([1, 2, 3, 4, 5], dtype=dt) + self.xs = np.arange(1, 20)[::3] + self.y = np.array([-1, -2, -3], dtype=dt) + self.z1 = np.array([ -3., -8., -14., -20., -26., -14., -5.], dtype=dt) + self.z1_4 = np.array([-2., -5., -8., -11., -14., -5.], dtype=dt) + self.z1r = np.array([-15., -22., -22., -16., -10., -4., -1.], dtype=dt) + self.z2 = np.array([-5., -14., -26., -20., -14., -8., -3.], dtype=dt) + self.z2r = np.array([-1., -4., -10., -16., -22., -22., -15.], dtype=dt) + self.zs = np.array([-3., -14., -30., -48., -66., -84., + -102., -54., -19.], dtype=dt) + + def test_float(self): + self._setup(float) + z = np.correlate(self.x, self.y, 'full') + assert_array_almost_equal(z, self.z1) + z = np.correlate(self.x, self.y[:-1], 'full') + assert_array_almost_equal(z, self.z1_4) + z = np.correlate(self.y, self.x, 'full') + assert_array_almost_equal(z, self.z2) + z = np.correlate(self.x[::-1], self.y, 'full') + assert_array_almost_equal(z, self.z1r) + z = np.correlate(self.y, self.x[::-1], 'full') + assert_array_almost_equal(z, self.z2r) + z = np.correlate(self.xs, self.y, 'full') + assert_array_almost_equal(z, self.zs) + + def test_object(self): + self._setup(Decimal) + z = np.correlate(self.x, self.y, 'full') + assert_array_almost_equal(z, self.z1) + z = np.correlate(self.y, self.x, 'full') + assert_array_almost_equal(z, self.z2) + + def test_no_overwrite(self): + d = np.ones(100) + k = np.ones(3) + np.correlate(d, k) + assert_array_equal(d, np.ones(100)) + assert_array_equal(k, np.ones(3)) + + def test_complex(self): + x = np.array([1, 2, 3, 4+1j], dtype=complex) + y = np.array([-1, -2j, 3+1j], dtype=complex) + r_z = np.array([3-1j, 6, 8+1j, 11+5j, -5+8j, -4-1j], dtype=complex) + r_z = r_z[::-1].conjugate() + z = np.correlate(y, x, mode='full') + assert_array_almost_equal(z, r_z) + + +class TestConvolve(object): + def test_object(self): + d = [1.] * 100 + k = [1.] * 3 + assert_array_almost_equal(np.convolve(d, k)[2:-2], np.full(98, 3)) + + def test_no_overwrite(self): + d = np.ones(100) + k = np.ones(3) + np.convolve(d, k) + assert_array_equal(d, np.ones(100)) + assert_array_equal(k, np.ones(3)) + + +class TestArgwhere(object): + def test_2D(self): + x = np.arange(6).reshape((2, 3)) + assert_array_equal(np.argwhere(x > 1), + [[0, 2], + [1, 0], + [1, 1], + [1, 2]]) + + def test_list(self): + assert_equal(np.argwhere([4, 0, 2, 1, 3]), [[0], [2], [3], [4]]) + + +class TestStringFunction(object): + + def test_set_string_function(self): + a = np.array([1]) + np.set_string_function(lambda x: "FOO", repr=True) + assert_equal(repr(a), "FOO") + np.set_string_function(None, repr=True) + assert_equal(repr(a), "array([1])") + + np.set_string_function(lambda x: "FOO", repr=False) + assert_equal(str(a), "FOO") + np.set_string_function(None, repr=False) + assert_equal(str(a), "[1]") + + +class TestRoll(object): + def test_roll1d(self): + x = np.arange(10) + xr = np.roll(x, 2) + assert_equal(xr, np.array([8, 9, 0, 1, 2, 3, 4, 5, 6, 7])) + + def test_roll2d(self): + x2 = np.reshape(np.arange(10), (2, 5)) + x2r = np.roll(x2, 1) + assert_equal(x2r, np.array([[9, 0, 1, 2, 3], [4, 5, 6, 7, 8]])) + + x2r = np.roll(x2, 1, axis=0) + assert_equal(x2r, np.array([[5, 6, 7, 8, 9], [0, 1, 2, 3, 4]])) + + x2r = np.roll(x2, 1, axis=1) + assert_equal(x2r, np.array([[4, 0, 1, 2, 3], [9, 5, 6, 7, 8]])) + + # Roll multiple axes at once. + x2r = np.roll(x2, 1, axis=(0, 1)) + assert_equal(x2r, np.array([[9, 5, 6, 7, 8], [4, 0, 1, 2, 3]])) + + x2r = np.roll(x2, (1, 0), axis=(0, 1)) + assert_equal(x2r, np.array([[5, 6, 7, 8, 9], [0, 1, 2, 3, 4]])) + + x2r = np.roll(x2, (-1, 0), axis=(0, 1)) + assert_equal(x2r, np.array([[5, 6, 7, 8, 9], [0, 1, 2, 3, 4]])) + + x2r = np.roll(x2, (0, 1), axis=(0, 1)) + assert_equal(x2r, np.array([[4, 0, 1, 2, 3], [9, 5, 6, 7, 8]])) + + x2r = np.roll(x2, (0, -1), axis=(0, 1)) + assert_equal(x2r, np.array([[1, 2, 3, 4, 0], [6, 7, 8, 9, 5]])) + + x2r = np.roll(x2, (1, 1), axis=(0, 1)) + assert_equal(x2r, np.array([[9, 5, 6, 7, 8], [4, 0, 1, 2, 3]])) + + x2r = np.roll(x2, (-1, -1), axis=(0, 1)) + assert_equal(x2r, np.array([[6, 7, 8, 9, 5], [1, 2, 3, 4, 0]])) + + # Roll the same axis multiple times. + x2r = np.roll(x2, 1, axis=(0, 0)) + assert_equal(x2r, np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])) + + x2r = np.roll(x2, 1, axis=(1, 1)) + assert_equal(x2r, np.array([[3, 4, 0, 1, 2], [8, 9, 5, 6, 7]])) + + # Roll more than one turn in either direction. + x2r = np.roll(x2, 6, axis=1) + assert_equal(x2r, np.array([[4, 0, 1, 2, 3], [9, 5, 6, 7, 8]])) + + x2r = np.roll(x2, -4, axis=1) + assert_equal(x2r, np.array([[4, 0, 1, 2, 3], [9, 5, 6, 7, 8]])) + + def test_roll_empty(self): + x = np.array([]) + assert_equal(np.roll(x, 1), np.array([])) + + +class TestRollaxis(object): + + # expected shape indexed by (axis, start) for array of + # shape (1, 2, 3, 4) + tgtshape = {(0, 0): (1, 2, 3, 4), (0, 1): (1, 2, 3, 4), + (0, 2): (2, 1, 3, 4), (0, 3): (2, 3, 1, 4), + (0, 4): (2, 3, 4, 1), + (1, 0): (2, 1, 3, 4), (1, 1): (1, 2, 3, 4), + (1, 2): (1, 2, 3, 4), (1, 3): (1, 3, 2, 4), + (1, 4): (1, 3, 4, 2), + (2, 0): (3, 1, 2, 4), (2, 1): (1, 3, 2, 4), + (2, 2): (1, 2, 3, 4), (2, 3): (1, 2, 3, 4), + (2, 4): (1, 2, 4, 3), + (3, 0): (4, 1, 2, 3), (3, 1): (1, 4, 2, 3), + (3, 2): (1, 2, 4, 3), (3, 3): (1, 2, 3, 4), + (3, 4): (1, 2, 3, 4)} + + def test_exceptions(self): + a = np.arange(1*2*3*4).reshape(1, 2, 3, 4) + assert_raises(np.AxisError, np.rollaxis, a, -5, 0) + assert_raises(np.AxisError, np.rollaxis, a, 0, -5) + assert_raises(np.AxisError, np.rollaxis, a, 4, 0) + assert_raises(np.AxisError, np.rollaxis, a, 0, 5) + + def test_results(self): + a = np.arange(1*2*3*4).reshape(1, 2, 3, 4).copy() + aind = np.indices(a.shape) + assert_(a.flags['OWNDATA']) + for (i, j) in self.tgtshape: + # positive axis, positive start + res = np.rollaxis(a, axis=i, start=j) + i0, i1, i2, i3 = aind[np.array(res.shape) - 1] + assert_(np.all(res[i0, i1, i2, i3] == a)) + assert_(res.shape == self.tgtshape[(i, j)], str((i,j))) + assert_(not res.flags['OWNDATA']) + + # negative axis, positive start + ip = i + 1 + res = np.rollaxis(a, axis=-ip, start=j) + i0, i1, i2, i3 = aind[np.array(res.shape) - 1] + assert_(np.all(res[i0, i1, i2, i3] == a)) + assert_(res.shape == self.tgtshape[(4 - ip, j)]) + assert_(not res.flags['OWNDATA']) + + # positive axis, negative start + jp = j + 1 if j < 4 else j + res = np.rollaxis(a, axis=i, start=-jp) + i0, i1, i2, i3 = aind[np.array(res.shape) - 1] + assert_(np.all(res[i0, i1, i2, i3] == a)) + assert_(res.shape == self.tgtshape[(i, 4 - jp)]) + assert_(not res.flags['OWNDATA']) + + # negative axis, negative start + ip = i + 1 + jp = j + 1 if j < 4 else j + res = np.rollaxis(a, axis=-ip, start=-jp) + i0, i1, i2, i3 = aind[np.array(res.shape) - 1] + assert_(np.all(res[i0, i1, i2, i3] == a)) + assert_(res.shape == self.tgtshape[(4 - ip, 4 - jp)]) + assert_(not res.flags['OWNDATA']) + + +class TestMoveaxis(object): + def test_move_to_end(self): + x = np.random.randn(5, 6, 7) + for source, expected in [(0, (6, 7, 5)), + (1, (5, 7, 6)), + (2, (5, 6, 7)), + (-1, (5, 6, 7))]: + actual = np.moveaxis(x, source, -1).shape + assert_(actual, expected) + + def test_move_new_position(self): + x = np.random.randn(1, 2, 3, 4) + for source, destination, expected in [ + (0, 1, (2, 1, 3, 4)), + (1, 2, (1, 3, 2, 4)), + (1, -1, (1, 3, 4, 2)), + ]: + actual = np.moveaxis(x, source, destination).shape + assert_(actual, expected) + + def test_preserve_order(self): + x = np.zeros((1, 2, 3, 4)) + for source, destination in [ + (0, 0), + (3, -1), + (-1, 3), + ([0, -1], [0, -1]), + ([2, 0], [2, 0]), + (range(4), range(4)), + ]: + actual = np.moveaxis(x, source, destination).shape + assert_(actual, (1, 2, 3, 4)) + + def test_move_multiples(self): + x = np.zeros((0, 1, 2, 3)) + for source, destination, expected in [ + ([0, 1], [2, 3], (2, 3, 0, 1)), + ([2, 3], [0, 1], (2, 3, 0, 1)), + ([0, 1, 2], [2, 3, 0], (2, 3, 0, 1)), + ([3, 0], [1, 0], (0, 3, 1, 2)), + ([0, 3], [0, 1], (0, 3, 1, 2)), + ]: + actual = np.moveaxis(x, source, destination).shape + assert_(actual, expected) + + def test_errors(self): + x = np.random.randn(1, 2, 3) + assert_raises_regex(np.AxisError, 'source.*out of bounds', + np.moveaxis, x, 3, 0) + assert_raises_regex(np.AxisError, 'source.*out of bounds', + np.moveaxis, x, -4, 0) + assert_raises_regex(np.AxisError, 'destination.*out of bounds', + np.moveaxis, x, 0, 5) + assert_raises_regex(ValueError, 'repeated axis in `source`', + np.moveaxis, x, [0, 0], [0, 1]) + assert_raises_regex(ValueError, 'repeated axis in `destination`', + np.moveaxis, x, [0, 1], [1, 1]) + assert_raises_regex(ValueError, 'must have the same number', + np.moveaxis, x, 0, [0, 1]) + assert_raises_regex(ValueError, 'must have the same number', + np.moveaxis, x, [0, 1], [0]) + + def test_array_likes(self): + x = np.ma.zeros((1, 2, 3)) + result = np.moveaxis(x, 0, 0) + assert_(x.shape, result.shape) + assert_(isinstance(result, np.ma.MaskedArray)) + + x = [1, 2, 3] + result = np.moveaxis(x, 0, 0) + assert_(x, list(result)) + assert_(isinstance(result, np.ndarray)) + + +class TestCross(object): + def test_2x2(self): + u = [1, 2] + v = [3, 4] + z = -2 + cp = np.cross(u, v) + assert_equal(cp, z) + cp = np.cross(v, u) + assert_equal(cp, -z) + + def test_2x3(self): + u = [1, 2] + v = [3, 4, 5] + z = np.array([10, -5, -2]) + cp = np.cross(u, v) + assert_equal(cp, z) + cp = np.cross(v, u) + assert_equal(cp, -z) + + def test_3x3(self): + u = [1, 2, 3] + v = [4, 5, 6] + z = np.array([-3, 6, -3]) + cp = np.cross(u, v) + assert_equal(cp, z) + cp = np.cross(v, u) + assert_equal(cp, -z) + + def test_broadcasting(self): + # Ticket #2624 (Trac #2032) + u = np.tile([1, 2], (11, 1)) + v = np.tile([3, 4], (11, 1)) + z = -2 + assert_equal(np.cross(u, v), z) + assert_equal(np.cross(v, u), -z) + assert_equal(np.cross(u, u), 0) + + u = np.tile([1, 2], (11, 1)).T + v = np.tile([3, 4, 5], (11, 1)) + z = np.tile([10, -5, -2], (11, 1)) + assert_equal(np.cross(u, v, axisa=0), z) + assert_equal(np.cross(v, u.T), -z) + assert_equal(np.cross(v, v), 0) + + u = np.tile([1, 2, 3], (11, 1)).T + v = np.tile([3, 4], (11, 1)).T + z = np.tile([-12, 9, -2], (11, 1)) + assert_equal(np.cross(u, v, axisa=0, axisb=0), z) + assert_equal(np.cross(v.T, u.T), -z) + assert_equal(np.cross(u.T, u.T), 0) + + u = np.tile([1, 2, 3], (5, 1)) + v = np.tile([4, 5, 6], (5, 1)).T + z = np.tile([-3, 6, -3], (5, 1)) + assert_equal(np.cross(u, v, axisb=0), z) + assert_equal(np.cross(v.T, u), -z) + assert_equal(np.cross(u, u), 0) + + def test_broadcasting_shapes(self): + u = np.ones((2, 1, 3)) + v = np.ones((5, 3)) + assert_equal(np.cross(u, v).shape, (2, 5, 3)) + u = np.ones((10, 3, 5)) + v = np.ones((2, 5)) + assert_equal(np.cross(u, v, axisa=1, axisb=0).shape, (10, 5, 3)) + assert_raises(np.AxisError, np.cross, u, v, axisa=1, axisb=2) + assert_raises(np.AxisError, np.cross, u, v, axisa=3, axisb=0) + u = np.ones((10, 3, 5, 7)) + v = np.ones((5, 7, 2)) + assert_equal(np.cross(u, v, axisa=1, axisc=2).shape, (10, 5, 3, 7)) + assert_raises(np.AxisError, np.cross, u, v, axisa=-5, axisb=2) + assert_raises(np.AxisError, np.cross, u, v, axisa=1, axisb=-4) + # gh-5885 + u = np.ones((3, 4, 2)) + for axisc in range(-2, 2): + assert_equal(np.cross(u, u, axisc=axisc).shape, (3, 4)) + + +def test_outer_out_param(): + arr1 = np.ones((5,)) + arr2 = np.ones((2,)) + arr3 = np.linspace(-2, 2, 5) + out1 = np.ndarray(shape=(5,5)) + out2 = np.ndarray(shape=(2, 5)) + res1 = np.outer(arr1, arr3, out1) + assert_equal(res1, out1) + assert_equal(np.outer(arr2, arr3, out2), out2) + + +class TestRequire(object): + flag_names = ['C', 'C_CONTIGUOUS', 'CONTIGUOUS', + 'F', 'F_CONTIGUOUS', 'FORTRAN', + 'A', 'ALIGNED', + 'W', 'WRITEABLE', + 'O', 'OWNDATA'] + + def generate_all_false(self, dtype): + arr = np.zeros((2, 2), [('junk', 'i1'), ('a', dtype)]) + arr.setflags(write=False) + a = arr['a'] + assert_(not a.flags['C']) + assert_(not a.flags['F']) + assert_(not a.flags['O']) + assert_(not a.flags['W']) + assert_(not a.flags['A']) + return a + + def set_and_check_flag(self, flag, dtype, arr): + if dtype is None: + dtype = arr.dtype + b = np.require(arr, dtype, [flag]) + assert_(b.flags[flag]) + assert_(b.dtype == dtype) + + # a further call to np.require ought to return the same array + # unless OWNDATA is specified. + c = np.require(b, None, [flag]) + if flag[0] != 'O': + assert_(c is b) + else: + assert_(c.flags[flag]) + + def test_require_each(self): + + id = ['f8', 'i4'] + fd = [None, 'f8', 'c16'] + for idtype, fdtype, flag in itertools.product(id, fd, self.flag_names): + a = self.generate_all_false(idtype) + self.set_and_check_flag(flag, fdtype, a) + + def test_unknown_requirement(self): + a = self.generate_all_false('f8') + assert_raises(KeyError, np.require, a, None, 'Q') + + def test_non_array_input(self): + a = np.require([1, 2, 3, 4], 'i4', ['C', 'A', 'O']) + assert_(a.flags['O']) + assert_(a.flags['C']) + assert_(a.flags['A']) + assert_(a.dtype == 'i4') + assert_equal(a, [1, 2, 3, 4]) + + def test_C_and_F_simul(self): + a = self.generate_all_false('f8') + assert_raises(ValueError, np.require, a, None, ['C', 'F']) + + def test_ensure_array(self): + class ArraySubclass(np.ndarray): + pass + + a = ArraySubclass((2, 2)) + b = np.require(a, None, ['E']) + assert_(type(b) is np.ndarray) + + def test_preserve_subtype(self): + class ArraySubclass(np.ndarray): + pass + + for flag in self.flag_names: + a = ArraySubclass((2, 2)) + self.set_and_check_flag(flag, None, a) + + +class TestBroadcast(object): + def test_broadcast_in_args(self): + # gh-5881 + arrs = [np.empty((6, 7)), np.empty((5, 6, 1)), np.empty((7,)), + np.empty((5, 1, 7))] + mits = [np.broadcast(*arrs), + np.broadcast(np.broadcast(*arrs[:2]), np.broadcast(*arrs[2:])), + np.broadcast(arrs[0], np.broadcast(*arrs[1:-1]), arrs[-1])] + for mit in mits: + assert_equal(mit.shape, (5, 6, 7)) + assert_equal(mit.ndim, 3) + assert_equal(mit.nd, 3) + assert_equal(mit.numiter, 4) + for a, ia in zip(arrs, mit.iters): + assert_(a is ia.base) + + def test_broadcast_single_arg(self): + # gh-6899 + arrs = [np.empty((5, 6, 7))] + mit = np.broadcast(*arrs) + assert_equal(mit.shape, (5, 6, 7)) + assert_equal(mit.ndim, 3) + assert_equal(mit.nd, 3) + assert_equal(mit.numiter, 1) + assert_(arrs[0] is mit.iters[0].base) + + def test_number_of_arguments(self): + arr = np.empty((5,)) + for j in range(35): + arrs = [arr] * j + if j < 1 or j > 32: + assert_raises(ValueError, np.broadcast, *arrs) + else: + mit = np.broadcast(*arrs) + assert_equal(mit.numiter, j) + + def test_broadcast_error_kwargs(self): + #gh-13455 + arrs = [np.empty((5, 6, 7))] + mit = np.broadcast(*arrs) + mit2 = np.broadcast(*arrs, **{}) + assert_equal(mit.shape, mit2.shape) + assert_equal(mit.ndim, mit2.ndim) + assert_equal(mit.nd, mit2.nd) + assert_equal(mit.numiter, mit2.numiter) + assert_(mit.iters[0].base is mit2.iters[0].base) + + assert_raises(ValueError, np.broadcast, 1, **{'x': 1}) + +class TestKeepdims(object): + + class sub_array(np.ndarray): + def sum(self, axis=None, dtype=None, out=None): + return np.ndarray.sum(self, axis, dtype, out, keepdims=True) + + def test_raise(self): + sub_class = self.sub_array + x = np.arange(30).view(sub_class) + assert_raises(TypeError, np.sum, x, keepdims=True) + + +class TestTensordot(object): + + def test_zero_dimension(self): + # Test resolution to issue #5663 + a = np.ndarray((3,0)) + b = np.ndarray((0,4)) + td = np.tensordot(a, b, (1, 0)) + assert_array_equal(td, np.dot(a, b)) + assert_array_equal(td, np.einsum('ij,jk', a, b)) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_numerictypes.py b/contrib/python/numpy/py2/numpy/core/tests/test_numerictypes.py new file mode 100644 index 0000000000..d0ff5578a0 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_numerictypes.py @@ -0,0 +1,500 @@ +from __future__ import division, absolute_import, print_function + +import sys +import itertools + +import pytest +import numpy as np +from numpy.testing import assert_, assert_equal, assert_raises, IS_PYPY + +# This is the structure of the table used for plain objects: +# +# +-+-+-+ +# |x|y|z| +# +-+-+-+ + +# Structure of a plain array description: +Pdescr = [ + ('x', 'i4', (2,)), + ('y', 'f8', (2, 2)), + ('z', 'u1')] + +# A plain list of tuples with values for testing: +PbufferT = [ + # x y z + ([3, 2], [[6., 4.], [6., 4.]], 8), + ([4, 3], [[7., 5.], [7., 5.]], 9), + ] + + +# This is the structure of the table used for nested objects (DON'T PANIC!): +# +# +-+---------------------------------+-----+----------+-+-+ +# |x|Info |color|info |y|z| +# | +-----+--+----------------+----+--+ +----+-----+ | | +# | |value|y2|Info2 |name|z2| |Name|Value| | | +# | | | +----+-----+--+--+ | | | | | | | +# | | | |name|value|y3|z3| | | | | | | | +# +-+-----+--+----+-----+--+--+----+--+-----+----+-----+-+-+ +# + +# The corresponding nested array description: +Ndescr = [ + ('x', 'i4', (2,)), + ('Info', [ + ('value', 'c16'), + ('y2', 'f8'), + ('Info2', [ + ('name', 'S2'), + ('value', 'c16', (2,)), + ('y3', 'f8', (2,)), + ('z3', 'u4', (2,))]), + ('name', 'S2'), + ('z2', 'b1')]), + ('color', 'S2'), + ('info', [ + ('Name', 'U8'), + ('Value', 'c16')]), + ('y', 'f8', (2, 2)), + ('z', 'u1')] + +NbufferT = [ + # x Info color info y z + # value y2 Info2 name z2 Name Value + # name value y3 z3 + ([3, 2], (6j, 6., (b'nn', [6j, 4j], [6., 4.], [1, 2]), b'NN', True), b'cc', (u'NN', 6j), [[6., 4.], [6., 4.]], 8), + ([4, 3], (7j, 7., (b'oo', [7j, 5j], [7., 5.], [2, 1]), b'OO', False), b'dd', (u'OO', 7j), [[7., 5.], [7., 5.]], 9), + ] + + +byteorder = {'little':'<', 'big':'>'}[sys.byteorder] + +def normalize_descr(descr): + "Normalize a description adding the platform byteorder." + + out = [] + for item in descr: + dtype = item[1] + if isinstance(dtype, str): + if dtype[0] not in ['|', '<', '>']: + onebyte = dtype[1:] == "1" + if onebyte or dtype[0] in ['S', 'V', 'b']: + dtype = "|" + dtype + else: + dtype = byteorder + dtype + if len(item) > 2 and np.prod(item[2]) > 1: + nitem = (item[0], dtype, item[2]) + else: + nitem = (item[0], dtype) + out.append(nitem) + elif isinstance(dtype, list): + l = normalize_descr(dtype) + out.append((item[0], l)) + else: + raise ValueError("Expected a str or list and got %s" % + (type(item))) + return out + + +############################################################ +# Creation tests +############################################################ + +class CreateZeros(object): + """Check the creation of heterogeneous arrays zero-valued""" + + def test_zeros0D(self): + """Check creation of 0-dimensional objects""" + h = np.zeros((), dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + assert_(h.dtype.fields['x'][0].name[:4] == 'void') + assert_(h.dtype.fields['x'][0].char == 'V') + assert_(h.dtype.fields['x'][0].type == np.void) + # A small check that data is ok + assert_equal(h['z'], np.zeros((), dtype='u1')) + + def test_zerosSD(self): + """Check creation of single-dimensional objects""" + h = np.zeros((2,), dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + assert_(h.dtype['y'].name[:4] == 'void') + assert_(h.dtype['y'].char == 'V') + assert_(h.dtype['y'].type == np.void) + # A small check that data is ok + assert_equal(h['z'], np.zeros((2,), dtype='u1')) + + def test_zerosMD(self): + """Check creation of multi-dimensional objects""" + h = np.zeros((2, 3), dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + assert_(h.dtype['z'].name == 'uint8') + assert_(h.dtype['z'].char == 'B') + assert_(h.dtype['z'].type == np.uint8) + # A small check that data is ok + assert_equal(h['z'], np.zeros((2, 3), dtype='u1')) + + +class TestCreateZerosPlain(CreateZeros): + """Check the creation of heterogeneous arrays zero-valued (plain)""" + _descr = Pdescr + +class TestCreateZerosNested(CreateZeros): + """Check the creation of heterogeneous arrays zero-valued (nested)""" + _descr = Ndescr + + +class CreateValues(object): + """Check the creation of heterogeneous arrays with values""" + + def test_tuple(self): + """Check creation from tuples""" + h = np.array(self._buffer, dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + if self.multiple_rows: + assert_(h.shape == (2,)) + else: + assert_(h.shape == ()) + + def test_list_of_tuple(self): + """Check creation from list of tuples""" + h = np.array([self._buffer], dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + if self.multiple_rows: + assert_(h.shape == (1, 2)) + else: + assert_(h.shape == (1,)) + + def test_list_of_list_of_tuple(self): + """Check creation from list of list of tuples""" + h = np.array([[self._buffer]], dtype=self._descr) + assert_(normalize_descr(self._descr) == h.dtype.descr) + if self.multiple_rows: + assert_(h.shape == (1, 1, 2)) + else: + assert_(h.shape == (1, 1)) + + +class TestCreateValuesPlainSingle(CreateValues): + """Check the creation of heterogeneous arrays (plain, single row)""" + _descr = Pdescr + multiple_rows = 0 + _buffer = PbufferT[0] + +class TestCreateValuesPlainMultiple(CreateValues): + """Check the creation of heterogeneous arrays (plain, multiple rows)""" + _descr = Pdescr + multiple_rows = 1 + _buffer = PbufferT + +class TestCreateValuesNestedSingle(CreateValues): + """Check the creation of heterogeneous arrays (nested, single row)""" + _descr = Ndescr + multiple_rows = 0 + _buffer = NbufferT[0] + +class TestCreateValuesNestedMultiple(CreateValues): + """Check the creation of heterogeneous arrays (nested, multiple rows)""" + _descr = Ndescr + multiple_rows = 1 + _buffer = NbufferT + + +############################################################ +# Reading tests +############################################################ + +class ReadValuesPlain(object): + """Check the reading of values in heterogeneous arrays (plain)""" + + def test_access_fields(self): + h = np.array(self._buffer, dtype=self._descr) + if not self.multiple_rows: + assert_(h.shape == ()) + assert_equal(h['x'], np.array(self._buffer[0], dtype='i4')) + assert_equal(h['y'], np.array(self._buffer[1], dtype='f8')) + assert_equal(h['z'], np.array(self._buffer[2], dtype='u1')) + else: + assert_(len(h) == 2) + assert_equal(h['x'], np.array([self._buffer[0][0], + self._buffer[1][0]], dtype='i4')) + assert_equal(h['y'], np.array([self._buffer[0][1], + self._buffer[1][1]], dtype='f8')) + assert_equal(h['z'], np.array([self._buffer[0][2], + self._buffer[1][2]], dtype='u1')) + + +class TestReadValuesPlainSingle(ReadValuesPlain): + """Check the creation of heterogeneous arrays (plain, single row)""" + _descr = Pdescr + multiple_rows = 0 + _buffer = PbufferT[0] + +class TestReadValuesPlainMultiple(ReadValuesPlain): + """Check the values of heterogeneous arrays (plain, multiple rows)""" + _descr = Pdescr + multiple_rows = 1 + _buffer = PbufferT + +class ReadValuesNested(object): + """Check the reading of values in heterogeneous arrays (nested)""" + + def test_access_top_fields(self): + """Check reading the top fields of a nested array""" + h = np.array(self._buffer, dtype=self._descr) + if not self.multiple_rows: + assert_(h.shape == ()) + assert_equal(h['x'], np.array(self._buffer[0], dtype='i4')) + assert_equal(h['y'], np.array(self._buffer[4], dtype='f8')) + assert_equal(h['z'], np.array(self._buffer[5], dtype='u1')) + else: + assert_(len(h) == 2) + assert_equal(h['x'], np.array([self._buffer[0][0], + self._buffer[1][0]], dtype='i4')) + assert_equal(h['y'], np.array([self._buffer[0][4], + self._buffer[1][4]], dtype='f8')) + assert_equal(h['z'], np.array([self._buffer[0][5], + self._buffer[1][5]], dtype='u1')) + + def test_nested1_acessors(self): + """Check reading the nested fields of a nested array (1st level)""" + h = np.array(self._buffer, dtype=self._descr) + if not self.multiple_rows: + assert_equal(h['Info']['value'], + np.array(self._buffer[1][0], dtype='c16')) + assert_equal(h['Info']['y2'], + np.array(self._buffer[1][1], dtype='f8')) + assert_equal(h['info']['Name'], + np.array(self._buffer[3][0], dtype='U2')) + assert_equal(h['info']['Value'], + np.array(self._buffer[3][1], dtype='c16')) + else: + assert_equal(h['Info']['value'], + np.array([self._buffer[0][1][0], + self._buffer[1][1][0]], + dtype='c16')) + assert_equal(h['Info']['y2'], + np.array([self._buffer[0][1][1], + self._buffer[1][1][1]], + dtype='f8')) + assert_equal(h['info']['Name'], + np.array([self._buffer[0][3][0], + self._buffer[1][3][0]], + dtype='U2')) + assert_equal(h['info']['Value'], + np.array([self._buffer[0][3][1], + self._buffer[1][3][1]], + dtype='c16')) + + def test_nested2_acessors(self): + """Check reading the nested fields of a nested array (2nd level)""" + h = np.array(self._buffer, dtype=self._descr) + if not self.multiple_rows: + assert_equal(h['Info']['Info2']['value'], + np.array(self._buffer[1][2][1], dtype='c16')) + assert_equal(h['Info']['Info2']['z3'], + np.array(self._buffer[1][2][3], dtype='u4')) + else: + assert_equal(h['Info']['Info2']['value'], + np.array([self._buffer[0][1][2][1], + self._buffer[1][1][2][1]], + dtype='c16')) + assert_equal(h['Info']['Info2']['z3'], + np.array([self._buffer[0][1][2][3], + self._buffer[1][1][2][3]], + dtype='u4')) + + def test_nested1_descriptor(self): + """Check access nested descriptors of a nested array (1st level)""" + h = np.array(self._buffer, dtype=self._descr) + assert_(h.dtype['Info']['value'].name == 'complex128') + assert_(h.dtype['Info']['y2'].name == 'float64') + if sys.version_info[0] >= 3: + assert_(h.dtype['info']['Name'].name == 'str256') + else: + assert_(h.dtype['info']['Name'].name == 'unicode256') + assert_(h.dtype['info']['Value'].name == 'complex128') + + def test_nested2_descriptor(self): + """Check access nested descriptors of a nested array (2nd level)""" + h = np.array(self._buffer, dtype=self._descr) + assert_(h.dtype['Info']['Info2']['value'].name == 'void256') + assert_(h.dtype['Info']['Info2']['z3'].name == 'void64') + + +class TestReadValuesNestedSingle(ReadValuesNested): + """Check the values of heterogeneous arrays (nested, single row)""" + _descr = Ndescr + multiple_rows = False + _buffer = NbufferT[0] + +class TestReadValuesNestedMultiple(ReadValuesNested): + """Check the values of heterogeneous arrays (nested, multiple rows)""" + _descr = Ndescr + multiple_rows = True + _buffer = NbufferT + +class TestEmptyField(object): + def test_assign(self): + a = np.arange(10, dtype=np.float32) + a.dtype = [("int", "<0i4"), ("float", "<2f4")] + assert_(a['int'].shape == (5, 0)) + assert_(a['float'].shape == (5, 2)) + +class TestCommonType(object): + def test_scalar_loses1(self): + res = np.find_common_type(['f4', 'f4', 'i2'], ['f8']) + assert_(res == 'f4') + + def test_scalar_loses2(self): + res = np.find_common_type(['f4', 'f4'], ['i8']) + assert_(res == 'f4') + + def test_scalar_wins(self): + res = np.find_common_type(['f4', 'f4', 'i2'], ['c8']) + assert_(res == 'c8') + + def test_scalar_wins2(self): + res = np.find_common_type(['u4', 'i4', 'i4'], ['f4']) + assert_(res == 'f8') + + def test_scalar_wins3(self): # doesn't go up to 'f16' on purpose + res = np.find_common_type(['u8', 'i8', 'i8'], ['f8']) + assert_(res == 'f8') + +class TestMultipleFields(object): + def setup(self): + self.ary = np.array([(1, 2, 3, 4), (5, 6, 7, 8)], dtype='i4,f4,i2,c8') + + def _bad_call(self): + return self.ary['f0', 'f1'] + + def test_no_tuple(self): + assert_raises(IndexError, self._bad_call) + + def test_return(self): + res = self.ary[['f0', 'f2']].tolist() + assert_(res == [(1, 3), (5, 7)]) + + +class TestIsSubDType(object): + # scalar types can be promoted into dtypes + wrappers = [np.dtype, lambda x: x] + + def test_both_abstract(self): + assert_(np.issubdtype(np.floating, np.inexact)) + assert_(not np.issubdtype(np.inexact, np.floating)) + + def test_same(self): + for cls in (np.float32, np.int32): + for w1, w2 in itertools.product(self.wrappers, repeat=2): + assert_(np.issubdtype(w1(cls), w2(cls))) + + def test_subclass(self): + # note we cannot promote floating to a dtype, as it would turn into a + # concrete type + for w in self.wrappers: + assert_(np.issubdtype(w(np.float32), np.floating)) + assert_(np.issubdtype(w(np.float64), np.floating)) + + def test_subclass_backwards(self): + for w in self.wrappers: + assert_(not np.issubdtype(np.floating, w(np.float32))) + assert_(not np.issubdtype(np.floating, w(np.float64))) + + def test_sibling_class(self): + for w1, w2 in itertools.product(self.wrappers, repeat=2): + assert_(not np.issubdtype(w1(np.float32), w2(np.float64))) + assert_(not np.issubdtype(w1(np.float64), w2(np.float32))) + + +class TestSctypeDict(object): + def test_longdouble(self): + assert_(np.sctypeDict['f8'] is not np.longdouble) + assert_(np.sctypeDict['c16'] is not np.clongdouble) + + +class TestBitName(object): + def test_abstract(self): + assert_raises(ValueError, np.core.numerictypes.bitname, np.floating) + + +class TestMaximumSctype(object): + + # note that parametrizing with sctype['int'] and similar would skip types + # with the same size (gh-11923) + + @pytest.mark.parametrize('t', [np.byte, np.short, np.intc, np.int_, np.longlong]) + def test_int(self, t): + assert_equal(np.maximum_sctype(t), np.sctypes['int'][-1]) + + @pytest.mark.parametrize('t', [np.ubyte, np.ushort, np.uintc, np.uint, np.ulonglong]) + def test_uint(self, t): + assert_equal(np.maximum_sctype(t), np.sctypes['uint'][-1]) + + @pytest.mark.parametrize('t', [np.half, np.single, np.double, np.longdouble]) + def test_float(self, t): + assert_equal(np.maximum_sctype(t), np.sctypes['float'][-1]) + + @pytest.mark.parametrize('t', [np.csingle, np.cdouble, np.clongdouble]) + def test_complex(self, t): + assert_equal(np.maximum_sctype(t), np.sctypes['complex'][-1]) + + @pytest.mark.parametrize('t', [np.bool_, np.object_, np.unicode_, np.bytes_, np.void]) + def test_other(self, t): + assert_equal(np.maximum_sctype(t), t) + + +class Test_sctype2char(object): + # This function is old enough that we're really just documenting the quirks + # at this point. + + def test_scalar_type(self): + assert_equal(np.sctype2char(np.double), 'd') + assert_equal(np.sctype2char(np.int_), 'l') + assert_equal(np.sctype2char(np.unicode_), 'U') + assert_equal(np.sctype2char(np.bytes_), 'S') + + def test_other_type(self): + assert_equal(np.sctype2char(float), 'd') + assert_equal(np.sctype2char(list), 'O') + assert_equal(np.sctype2char(np.ndarray), 'O') + + def test_third_party_scalar_type(self): + from numpy.core._rational_tests import rational + assert_raises(KeyError, np.sctype2char, rational) + assert_raises(KeyError, np.sctype2char, rational(1)) + + def test_array_instance(self): + assert_equal(np.sctype2char(np.array([1.0, 2.0])), 'd') + + def test_abstract_type(self): + assert_raises(KeyError, np.sctype2char, np.floating) + + def test_non_type(self): + assert_raises(ValueError, np.sctype2char, 1) + +@pytest.mark.parametrize("rep, expected", [ + (np.int32, True), + (list, False), + (1.1, False), + (str, True), + (np.dtype(np.float64), True), + (np.dtype((np.int16, (3, 4))), True), + (np.dtype([('a', np.int8)]), True), + ]) +def test_issctype(rep, expected): + # ensure proper identification of scalar + # data-types by issctype() + actual = np.issctype(rep) + assert_equal(actual, expected) + + +@pytest.mark.skipif(sys.flags.optimize > 1, + reason="no docstrings present to inspect when PYTHONOPTIMIZE/Py_OptimizeFlag > 1") +@pytest.mark.xfail(IS_PYPY, reason="PyPy does not modify tp_doc") +class TestDocStrings(object): + def test_platform_dependent_aliases(self): + if np.int64 is np.int_: + assert_('int64' in np.int_.__doc__) + elif np.int64 is np.longlong: + assert_('int64' in np.longlong.__doc__) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_overrides.py b/contrib/python/numpy/py2/numpy/core/tests/test_overrides.py new file mode 100644 index 0000000000..8f1c16539b --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_overrides.py @@ -0,0 +1,392 @@ +from __future__ import division, absolute_import, print_function + +import inspect +import sys + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_raises_regex) +from numpy.core.overrides import ( + _get_implementing_args, array_function_dispatch, + verify_matching_signatures, ENABLE_ARRAY_FUNCTION) +from numpy.core.numeric import pickle +import pytest + + +requires_array_function = pytest.mark.skipif( + not ENABLE_ARRAY_FUNCTION, + reason="__array_function__ dispatch not enabled.") + + +def _return_not_implemented(self, *args, **kwargs): + return NotImplemented + + +# need to define this at the top level to test pickling +@array_function_dispatch(lambda array: (array,)) +def dispatched_one_arg(array): + """Docstring.""" + return 'original' + + +@array_function_dispatch(lambda array1, array2: (array1, array2)) +def dispatched_two_arg(array1, array2): + """Docstring.""" + return 'original' + + +@requires_array_function +class TestGetImplementingArgs(object): + + def test_ndarray(self): + array = np.array(1) + + args = _get_implementing_args([array]) + assert_equal(list(args), [array]) + + args = _get_implementing_args([array, array]) + assert_equal(list(args), [array]) + + args = _get_implementing_args([array, 1]) + assert_equal(list(args), [array]) + + args = _get_implementing_args([1, array]) + assert_equal(list(args), [array]) + + def test_ndarray_subclasses(self): + + class OverrideSub(np.ndarray): + __array_function__ = _return_not_implemented + + class NoOverrideSub(np.ndarray): + pass + + array = np.array(1).view(np.ndarray) + override_sub = np.array(1).view(OverrideSub) + no_override_sub = np.array(1).view(NoOverrideSub) + + args = _get_implementing_args([array, override_sub]) + assert_equal(list(args), [override_sub, array]) + + args = _get_implementing_args([array, no_override_sub]) + assert_equal(list(args), [no_override_sub, array]) + + args = _get_implementing_args( + [override_sub, no_override_sub]) + assert_equal(list(args), [override_sub, no_override_sub]) + + def test_ndarray_and_duck_array(self): + + class Other(object): + __array_function__ = _return_not_implemented + + array = np.array(1) + other = Other() + + args = _get_implementing_args([other, array]) + assert_equal(list(args), [other, array]) + + args = _get_implementing_args([array, other]) + assert_equal(list(args), [array, other]) + + def test_ndarray_subclass_and_duck_array(self): + + class OverrideSub(np.ndarray): + __array_function__ = _return_not_implemented + + class Other(object): + __array_function__ = _return_not_implemented + + array = np.array(1) + subarray = np.array(1).view(OverrideSub) + other = Other() + + assert_equal(_get_implementing_args([array, subarray, other]), + [subarray, array, other]) + assert_equal(_get_implementing_args([array, other, subarray]), + [subarray, array, other]) + + def test_many_duck_arrays(self): + + class A(object): + __array_function__ = _return_not_implemented + + class B(A): + __array_function__ = _return_not_implemented + + class C(A): + __array_function__ = _return_not_implemented + + class D(object): + __array_function__ = _return_not_implemented + + a = A() + b = B() + c = C() + d = D() + + assert_equal(_get_implementing_args([1]), []) + assert_equal(_get_implementing_args([a]), [a]) + assert_equal(_get_implementing_args([a, 1]), [a]) + assert_equal(_get_implementing_args([a, a, a]), [a]) + assert_equal(_get_implementing_args([a, d, a]), [a, d]) + assert_equal(_get_implementing_args([a, b]), [b, a]) + assert_equal(_get_implementing_args([b, a]), [b, a]) + assert_equal(_get_implementing_args([a, b, c]), [b, c, a]) + assert_equal(_get_implementing_args([a, c, b]), [c, b, a]) + + def test_too_many_duck_arrays(self): + namespace = dict(__array_function__=_return_not_implemented) + types = [type('A' + str(i), (object,), namespace) for i in range(33)] + relevant_args = [t() for t in types] + + actual = _get_implementing_args(relevant_args[:32]) + assert_equal(actual, relevant_args[:32]) + + with assert_raises_regex(TypeError, 'distinct argument types'): + _get_implementing_args(relevant_args) + + +@requires_array_function +class TestNDArrayArrayFunction(object): + + def test_method(self): + + class Other(object): + __array_function__ = _return_not_implemented + + class NoOverrideSub(np.ndarray): + pass + + class OverrideSub(np.ndarray): + __array_function__ = _return_not_implemented + + array = np.array([1]) + other = Other() + no_override_sub = array.view(NoOverrideSub) + override_sub = array.view(OverrideSub) + + result = array.__array_function__(func=dispatched_two_arg, + types=(np.ndarray,), + args=(array, 1.), kwargs={}) + assert_equal(result, 'original') + + result = array.__array_function__(func=dispatched_two_arg, + types=(np.ndarray, Other), + args=(array, other), kwargs={}) + assert_(result is NotImplemented) + + result = array.__array_function__(func=dispatched_two_arg, + types=(np.ndarray, NoOverrideSub), + args=(array, no_override_sub), + kwargs={}) + assert_equal(result, 'original') + + result = array.__array_function__(func=dispatched_two_arg, + types=(np.ndarray, OverrideSub), + args=(array, override_sub), + kwargs={}) + assert_equal(result, 'original') + + with assert_raises_regex(TypeError, 'no implementation found'): + np.concatenate((array, other)) + + expected = np.concatenate((array, array)) + result = np.concatenate((array, no_override_sub)) + assert_equal(result, expected.view(NoOverrideSub)) + result = np.concatenate((array, override_sub)) + assert_equal(result, expected.view(OverrideSub)) + + def test_no_wrapper(self): + array = np.array(1) + func = dispatched_one_arg.__wrapped__ + with assert_raises_regex(AttributeError, '__wrapped__'): + array.__array_function__(func=func, + types=(np.ndarray,), + args=(array,), kwargs={}) + + +@requires_array_function +class TestArrayFunctionDispatch(object): + + def test_pickle(self): + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + roundtripped = pickle.loads( + pickle.dumps(dispatched_one_arg, protocol=proto)) + assert_(roundtripped is dispatched_one_arg) + + def test_name_and_docstring(self): + assert_equal(dispatched_one_arg.__name__, 'dispatched_one_arg') + if sys.flags.optimize < 2: + assert_equal(dispatched_one_arg.__doc__, 'Docstring.') + + def test_interface(self): + + class MyArray(object): + def __array_function__(self, func, types, args, kwargs): + return (self, func, types, args, kwargs) + + original = MyArray() + (obj, func, types, args, kwargs) = dispatched_one_arg(original) + assert_(obj is original) + assert_(func is dispatched_one_arg) + assert_equal(set(types), {MyArray}) + # assert_equal uses the overloaded np.iscomplexobj() internally + assert_(args == (original,)) + assert_equal(kwargs, {}) + + def test_not_implemented(self): + + class MyArray(object): + def __array_function__(self, func, types, args, kwargs): + return NotImplemented + + array = MyArray() + with assert_raises_regex(TypeError, 'no implementation found'): + dispatched_one_arg(array) + + +@requires_array_function +class TestVerifyMatchingSignatures(object): + + def test_verify_matching_signatures(self): + + verify_matching_signatures(lambda x: 0, lambda x: 0) + verify_matching_signatures(lambda x=None: 0, lambda x=None: 0) + verify_matching_signatures(lambda x=1: 0, lambda x=None: 0) + + with assert_raises(RuntimeError): + verify_matching_signatures(lambda a: 0, lambda b: 0) + with assert_raises(RuntimeError): + verify_matching_signatures(lambda x: 0, lambda x=None: 0) + with assert_raises(RuntimeError): + verify_matching_signatures(lambda x=None: 0, lambda y=None: 0) + with assert_raises(RuntimeError): + verify_matching_signatures(lambda x=1: 0, lambda y=1: 0) + + def test_array_function_dispatch(self): + + with assert_raises(RuntimeError): + @array_function_dispatch(lambda x: (x,)) + def f(y): + pass + + # should not raise + @array_function_dispatch(lambda x: (x,), verify=False) + def f(y): + pass + + +def _new_duck_type_and_implements(): + """Create a duck array type and implements functions.""" + HANDLED_FUNCTIONS = {} + + class MyArray(object): + def __array_function__(self, func, types, args, kwargs): + if func not in HANDLED_FUNCTIONS: + return NotImplemented + if not all(issubclass(t, MyArray) for t in types): + return NotImplemented + return HANDLED_FUNCTIONS[func](*args, **kwargs) + + def implements(numpy_function): + """Register an __array_function__ implementations.""" + def decorator(func): + HANDLED_FUNCTIONS[numpy_function] = func + return func + return decorator + + return (MyArray, implements) + + +@requires_array_function +class TestArrayFunctionImplementation(object): + + def test_one_arg(self): + MyArray, implements = _new_duck_type_and_implements() + + @implements(dispatched_one_arg) + def _(array): + return 'myarray' + + assert_equal(dispatched_one_arg(1), 'original') + assert_equal(dispatched_one_arg(MyArray()), 'myarray') + + def test_optional_args(self): + MyArray, implements = _new_duck_type_and_implements() + + @array_function_dispatch(lambda array, option=None: (array,)) + def func_with_option(array, option='default'): + return option + + @implements(func_with_option) + def my_array_func_with_option(array, new_option='myarray'): + return new_option + + # we don't need to implement every option on __array_function__ + # implementations + assert_equal(func_with_option(1), 'default') + assert_equal(func_with_option(1, option='extra'), 'extra') + assert_equal(func_with_option(MyArray()), 'myarray') + with assert_raises(TypeError): + func_with_option(MyArray(), option='extra') + + # but new options on implementations can't be used + result = my_array_func_with_option(MyArray(), new_option='yes') + assert_equal(result, 'yes') + with assert_raises(TypeError): + func_with_option(MyArray(), new_option='no') + + def test_not_implemented(self): + MyArray, implements = _new_duck_type_and_implements() + + @array_function_dispatch(lambda array: (array,), module='my') + def func(array): + return array + + array = np.array(1) + assert_(func(array) is array) + assert_equal(func.__module__, 'my') + + with assert_raises_regex( + TypeError, "no implementation found for 'my.func'"): + func(MyArray()) + + +class TestNDArrayMethods(object): + + def test_repr(self): + # gh-12162: should still be defined even if __array_function__ doesn't + # implement np.array_repr() + + class MyArray(np.ndarray): + def __array_function__(*args, **kwargs): + return NotImplemented + + array = np.array(1).view(MyArray) + assert_equal(repr(array), 'MyArray(1)') + assert_equal(str(array), '1') + + +class TestNumPyFunctions(object): + + def test_set_module(self): + assert_equal(np.sum.__module__, 'numpy') + assert_equal(np.char.equal.__module__, 'numpy.char') + assert_equal(np.fft.fft.__module__, 'numpy.fft') + assert_equal(np.linalg.solve.__module__, 'numpy.linalg') + + @pytest.mark.skipif(sys.version_info[0] < 3, reason="Python 3 only") + def test_inspect_sum(self): + signature = inspect.signature(np.sum) + assert_('axis' in signature.parameters) + + @requires_array_function + def test_override_sum(self): + MyArray, implements = _new_duck_type_and_implements() + + @implements(np.sum) + def _(array): + return 'yes' + + assert_equal(np.sum(MyArray()), 'yes') diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_print.py b/contrib/python/numpy/py2/numpy/core/tests/test_print.py new file mode 100644 index 0000000000..c5c091e13a --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_print.py @@ -0,0 +1,205 @@ +from __future__ import division, absolute_import, print_function + +import sys + +import pytest + +import numpy as np +from numpy.testing import assert_, assert_equal +from numpy.core.tests._locales import CommaDecimalPointLocale + + +if sys.version_info[0] >= 3: + from io import StringIO +else: + from StringIO import StringIO + +_REF = {np.inf: 'inf', -np.inf: '-inf', np.nan: 'nan'} + + +@pytest.mark.parametrize('tp', [np.float32, np.double, np.longdouble]) +def test_float_types(tp): + """ Check formatting. + + This is only for the str function, and only for simple types. + The precision of np.float32 and np.longdouble aren't the same as the + python float precision. + + """ + for x in [0, 1, -1, 1e20]: + assert_equal(str(tp(x)), str(float(x)), + err_msg='Failed str formatting for type %s' % tp) + + if tp(1e16).itemsize > 4: + assert_equal(str(tp(1e16)), str(float('1e16')), + err_msg='Failed str formatting for type %s' % tp) + else: + ref = '1e+16' + assert_equal(str(tp(1e16)), ref, + err_msg='Failed str formatting for type %s' % tp) + + +@pytest.mark.parametrize('tp', [np.float32, np.double, np.longdouble]) +def test_nan_inf_float(tp): + """ Check formatting of nan & inf. + + This is only for the str function, and only for simple types. + The precision of np.float32 and np.longdouble aren't the same as the + python float precision. + + """ + for x in [np.inf, -np.inf, np.nan]: + assert_equal(str(tp(x)), _REF[x], + err_msg='Failed str formatting for type %s' % tp) + + +@pytest.mark.parametrize('tp', [np.complex64, np.cdouble, np.clongdouble]) +def test_complex_types(tp): + """Check formatting of complex types. + + This is only for the str function, and only for simple types. + The precision of np.float32 and np.longdouble aren't the same as the + python float precision. + + """ + for x in [0, 1, -1, 1e20]: + assert_equal(str(tp(x)), str(complex(x)), + err_msg='Failed str formatting for type %s' % tp) + assert_equal(str(tp(x*1j)), str(complex(x*1j)), + err_msg='Failed str formatting for type %s' % tp) + assert_equal(str(tp(x + x*1j)), str(complex(x + x*1j)), + err_msg='Failed str formatting for type %s' % tp) + + if tp(1e16).itemsize > 8: + assert_equal(str(tp(1e16)), str(complex(1e16)), + err_msg='Failed str formatting for type %s' % tp) + else: + ref = '(1e+16+0j)' + assert_equal(str(tp(1e16)), ref, + err_msg='Failed str formatting for type %s' % tp) + + +@pytest.mark.parametrize('dtype', [np.complex64, np.cdouble, np.clongdouble]) +def test_complex_inf_nan(dtype): + """Check inf/nan formatting of complex types.""" + TESTS = { + complex(np.inf, 0): "(inf+0j)", + complex(0, np.inf): "infj", + complex(-np.inf, 0): "(-inf+0j)", + complex(0, -np.inf): "-infj", + complex(np.inf, 1): "(inf+1j)", + complex(1, np.inf): "(1+infj)", + complex(-np.inf, 1): "(-inf+1j)", + complex(1, -np.inf): "(1-infj)", + complex(np.nan, 0): "(nan+0j)", + complex(0, np.nan): "nanj", + complex(-np.nan, 0): "(nan+0j)", + complex(0, -np.nan): "nanj", + complex(np.nan, 1): "(nan+1j)", + complex(1, np.nan): "(1+nanj)", + complex(-np.nan, 1): "(nan+1j)", + complex(1, -np.nan): "(1+nanj)", + } + for c, s in TESTS.items(): + assert_equal(str(dtype(c)), s) + + +# print tests +def _test_redirected_print(x, tp, ref=None): + file = StringIO() + file_tp = StringIO() + stdout = sys.stdout + try: + sys.stdout = file_tp + print(tp(x)) + sys.stdout = file + if ref: + print(ref) + else: + print(x) + finally: + sys.stdout = stdout + + assert_equal(file.getvalue(), file_tp.getvalue(), + err_msg='print failed for type%s' % tp) + + +@pytest.mark.parametrize('tp', [np.float32, np.double, np.longdouble]) +def test_float_type_print(tp): + """Check formatting when using print """ + for x in [0, 1, -1, 1e20]: + _test_redirected_print(float(x), tp) + + for x in [np.inf, -np.inf, np.nan]: + _test_redirected_print(float(x), tp, _REF[x]) + + if tp(1e16).itemsize > 4: + _test_redirected_print(float(1e16), tp) + else: + ref = '1e+16' + _test_redirected_print(float(1e16), tp, ref) + + +@pytest.mark.parametrize('tp', [np.complex64, np.cdouble, np.clongdouble]) +def test_complex_type_print(tp): + """Check formatting when using print """ + # We do not create complex with inf/nan directly because the feature is + # missing in python < 2.6 + for x in [0, 1, -1, 1e20]: + _test_redirected_print(complex(x), tp) + + if tp(1e16).itemsize > 8: + _test_redirected_print(complex(1e16), tp) + else: + ref = '(1e+16+0j)' + _test_redirected_print(complex(1e16), tp, ref) + + _test_redirected_print(complex(np.inf, 1), tp, '(inf+1j)') + _test_redirected_print(complex(-np.inf, 1), tp, '(-inf+1j)') + _test_redirected_print(complex(-np.nan, 1), tp, '(nan+1j)') + + +def test_scalar_format(): + """Test the str.format method with NumPy scalar types""" + tests = [('{0}', True, np.bool_), + ('{0}', False, np.bool_), + ('{0:d}', 130, np.uint8), + ('{0:d}', 50000, np.uint16), + ('{0:d}', 3000000000, np.uint32), + ('{0:d}', 15000000000000000000, np.uint64), + ('{0:d}', -120, np.int8), + ('{0:d}', -30000, np.int16), + ('{0:d}', -2000000000, np.int32), + ('{0:d}', -7000000000000000000, np.int64), + ('{0:g}', 1.5, np.float16), + ('{0:g}', 1.5, np.float32), + ('{0:g}', 1.5, np.float64), + ('{0:g}', 1.5, np.longdouble), + ('{0:g}', 1.5+0.5j, np.complex64), + ('{0:g}', 1.5+0.5j, np.complex128), + ('{0:g}', 1.5+0.5j, np.clongdouble)] + + for (fmat, val, valtype) in tests: + try: + assert_equal(fmat.format(val), fmat.format(valtype(val)), + "failed with val %s, type %s" % (val, valtype)) + except ValueError as e: + assert_(False, + "format raised exception (fmt='%s', val=%s, type=%s, exc='%s')" % + (fmat, repr(val), repr(valtype), str(e))) + + +# +# Locale tests: scalar types formatting should be independent of the locale +# + +class TestCommaDecimalPointLocale(CommaDecimalPointLocale): + + def test_locale_single(self): + assert_equal(str(np.float32(1.2)), str(float(1.2))) + + def test_locale_double(self): + assert_equal(str(np.double(1.2)), str(float(1.2))) + + def test_locale_longdouble(self): + assert_equal(str(np.longdouble('1.2')), str(float(1.2))) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_records.py b/contrib/python/numpy/py2/numpy/core/tests/test_records.py new file mode 100644 index 0000000000..95ed1fa5bb --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_records.py @@ -0,0 +1,499 @@ +from __future__ import division, absolute_import, print_function + +import sys +try: + # Accessing collections abstract classes from collections + # has been deprecated since Python 3.3 + import collections.abc as collections_abc +except ImportError: + import collections as collections_abc +import textwrap +from os import path +import pytest + +import numpy as np +from numpy.compat import Path +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_array_almost_equal, + assert_raises, temppath + ) +from numpy.core.numeric import pickle + + +class TestFromrecords(object): + def test_fromrecords(self): + r = np.rec.fromrecords([[456, 'dbe', 1.2], [2, 'de', 1.3]], + names='col1,col2,col3') + assert_equal(r[0].item(), (456, 'dbe', 1.2)) + assert_equal(r['col1'].dtype.kind, 'i') + if sys.version_info[0] >= 3: + assert_equal(r['col2'].dtype.kind, 'U') + assert_equal(r['col2'].dtype.itemsize, 12) + else: + assert_equal(r['col2'].dtype.kind, 'S') + assert_equal(r['col2'].dtype.itemsize, 3) + assert_equal(r['col3'].dtype.kind, 'f') + + def test_fromrecords_0len(self): + """ Verify fromrecords works with a 0-length input """ + dtype = [('a', float), ('b', float)] + r = np.rec.fromrecords([], dtype=dtype) + assert_equal(r.shape, (0,)) + + def test_fromrecords_2d(self): + data = [ + [(1, 2), (3, 4), (5, 6)], + [(6, 5), (4, 3), (2, 1)] + ] + expected_a = [[1, 3, 5], [6, 4, 2]] + expected_b = [[2, 4, 6], [5, 3, 1]] + + # try with dtype + r1 = np.rec.fromrecords(data, dtype=[('a', int), ('b', int)]) + assert_equal(r1['a'], expected_a) + assert_equal(r1['b'], expected_b) + + # try with names + r2 = np.rec.fromrecords(data, names=['a', 'b']) + assert_equal(r2['a'], expected_a) + assert_equal(r2['b'], expected_b) + + assert_equal(r1, r2) + + def test_method_array(self): + r = np.rec.array(b'abcdefg' * 100, formats='i2,a3,i4', shape=3, byteorder='big') + assert_equal(r[1].item(), (25444, b'efg', 1633837924)) + + def test_method_array2(self): + r = np.rec.array([(1, 11, 'a'), (2, 22, 'b'), (3, 33, 'c'), (4, 44, 'd'), (5, 55, 'ex'), + (6, 66, 'f'), (7, 77, 'g')], formats='u1,f4,a1') + assert_equal(r[1].item(), (2, 22.0, b'b')) + + def test_recarray_slices(self): + r = np.rec.array([(1, 11, 'a'), (2, 22, 'b'), (3, 33, 'c'), (4, 44, 'd'), (5, 55, 'ex'), + (6, 66, 'f'), (7, 77, 'g')], formats='u1,f4,a1') + assert_equal(r[1::2][1].item(), (4, 44.0, b'd')) + + def test_recarray_fromarrays(self): + x1 = np.array([1, 2, 3, 4]) + x2 = np.array(['a', 'dd', 'xyz', '12']) + x3 = np.array([1.1, 2, 3, 4]) + r = np.rec.fromarrays([x1, x2, x3], names='a,b,c') + assert_equal(r[1].item(), (2, 'dd', 2.0)) + x1[1] = 34 + assert_equal(r.a, np.array([1, 2, 3, 4])) + + def test_recarray_fromfile(self): + data_dir = path.join(path.dirname(__file__), 'data') + filename = path.join(data_dir, 'recarray_from_file.fits') + fd = open(filename, 'rb') + fd.seek(2880 * 2) + r1 = np.rec.fromfile(fd, formats='f8,i4,a5', shape=3, byteorder='big') + fd.seek(2880 * 2) + r2 = np.rec.array(fd, formats='f8,i4,a5', shape=3, byteorder='big') + fd.close() + assert_equal(r1, r2) + + def test_recarray_from_obj(self): + count = 10 + a = np.zeros(count, dtype='O') + b = np.zeros(count, dtype='f8') + c = np.zeros(count, dtype='f8') + for i in range(len(a)): + a[i] = list(range(1, 10)) + + mine = np.rec.fromarrays([a, b, c], names='date,data1,data2') + for i in range(len(a)): + assert_((mine.date[i] == list(range(1, 10)))) + assert_((mine.data1[i] == 0.0)) + assert_((mine.data2[i] == 0.0)) + + def test_recarray_repr(self): + a = np.array([(1, 0.1), (2, 0.2)], + dtype=[('foo', '<i4'), ('bar', '<f8')]) + a = np.rec.array(a) + assert_equal( + repr(a), + textwrap.dedent("""\ + rec.array([(1, 0.1), (2, 0.2)], + dtype=[('foo', '<i4'), ('bar', '<f8')])""") + ) + + # make sure non-structured dtypes also show up as rec.array + a = np.array(np.ones(4, dtype='f8')) + assert_(repr(np.rec.array(a)).startswith('rec.array')) + + # check that the 'np.record' part of the dtype isn't shown + a = np.rec.array(np.ones(3, dtype='i4,i4')) + assert_equal(repr(a).find('numpy.record'), -1) + a = np.rec.array(np.ones(3, dtype='i4')) + assert_(repr(a).find('dtype=int32') != -1) + + def test_0d_recarray_repr(self): + arr_0d = np.rec.array((1, 2.0, '2003'), dtype='<i4,<f8,<M8[Y]') + assert_equal(repr(arr_0d), textwrap.dedent("""\ + rec.array((1, 2., '2003'), + dtype=[('f0', '<i4'), ('f1', '<f8'), ('f2', '<M8[Y]')])""")) + + record = arr_0d[()] + assert_equal(repr(record), "(1, 2., '2003')") + # 1.13 converted to python scalars before the repr + try: + np.set_printoptions(legacy='1.13') + assert_equal(repr(record), '(1, 2.0, datetime.date(2003, 1, 1))') + finally: + np.set_printoptions(legacy=False) + + def test_recarray_from_repr(self): + a = np.array([(1,'ABC'), (2, "DEF")], + dtype=[('foo', int), ('bar', 'S4')]) + recordarr = np.rec.array(a) + recarr = a.view(np.recarray) + recordview = a.view(np.dtype((np.record, a.dtype))) + + recordarr_r = eval("numpy." + repr(recordarr), {'numpy': np}) + recarr_r = eval("numpy." + repr(recarr), {'numpy': np}) + recordview_r = eval("numpy." + repr(recordview), {'numpy': np}) + + assert_equal(type(recordarr_r), np.recarray) + assert_equal(recordarr_r.dtype.type, np.record) + assert_equal(recordarr, recordarr_r) + + assert_equal(type(recarr_r), np.recarray) + assert_equal(recarr_r.dtype.type, np.record) + assert_equal(recarr, recarr_r) + + assert_equal(type(recordview_r), np.ndarray) + assert_equal(recordview.dtype.type, np.record) + assert_equal(recordview, recordview_r) + + def test_recarray_views(self): + a = np.array([(1,'ABC'), (2, "DEF")], + dtype=[('foo', int), ('bar', 'S4')]) + b = np.array([1,2,3,4,5], dtype=np.int64) + + #check that np.rec.array gives right dtypes + assert_equal(np.rec.array(a).dtype.type, np.record) + assert_equal(type(np.rec.array(a)), np.recarray) + assert_equal(np.rec.array(b).dtype.type, np.int64) + assert_equal(type(np.rec.array(b)), np.recarray) + + #check that viewing as recarray does the same + assert_equal(a.view(np.recarray).dtype.type, np.record) + assert_equal(type(a.view(np.recarray)), np.recarray) + assert_equal(b.view(np.recarray).dtype.type, np.int64) + assert_equal(type(b.view(np.recarray)), np.recarray) + + #check that view to non-structured dtype preserves type=np.recarray + r = np.rec.array(np.ones(4, dtype="f4,i4")) + rv = r.view('f8').view('f4,i4') + assert_equal(type(rv), np.recarray) + assert_equal(rv.dtype.type, np.record) + + #check that getitem also preserves np.recarray and np.record + r = np.rec.array(np.ones(4, dtype=[('a', 'i4'), ('b', 'i4'), + ('c', 'i4,i4')])) + assert_equal(r['c'].dtype.type, np.record) + assert_equal(type(r['c']), np.recarray) + + #and that it preserves subclasses (gh-6949) + class C(np.recarray): + pass + + c = r.view(C) + assert_equal(type(c['c']), C) + + # check that accessing nested structures keep record type, but + # not for subarrays, non-void structures, non-structured voids + test_dtype = [('a', 'f4,f4'), ('b', 'V8'), ('c', ('f4',2)), + ('d', ('i8', 'i4,i4'))] + r = np.rec.array([((1,1), b'11111111', [1,1], 1), + ((1,1), b'11111111', [1,1], 1)], dtype=test_dtype) + assert_equal(r.a.dtype.type, np.record) + assert_equal(r.b.dtype.type, np.void) + assert_equal(r.c.dtype.type, np.float32) + assert_equal(r.d.dtype.type, np.int64) + # check the same, but for views + r = np.rec.array(np.ones(4, dtype='i4,i4')) + assert_equal(r.view('f4,f4').dtype.type, np.record) + assert_equal(r.view(('i4',2)).dtype.type, np.int32) + assert_equal(r.view('V8').dtype.type, np.void) + assert_equal(r.view(('i8', 'i4,i4')).dtype.type, np.int64) + + #check that we can undo the view + arrs = [np.ones(4, dtype='f4,i4'), np.ones(4, dtype='f8')] + for arr in arrs: + rec = np.rec.array(arr) + # recommended way to view as an ndarray: + arr2 = rec.view(rec.dtype.fields or rec.dtype, np.ndarray) + assert_equal(arr2.dtype.type, arr.dtype.type) + assert_equal(type(arr2), type(arr)) + + def test_recarray_from_names(self): + ra = np.rec.array([ + (1, 'abc', 3.7000002861022949, 0), + (2, 'xy', 6.6999998092651367, 1), + (0, ' ', 0.40000000596046448, 0)], + names='c1, c2, c3, c4') + pa = np.rec.fromrecords([ + (1, 'abc', 3.7000002861022949, 0), + (2, 'xy', 6.6999998092651367, 1), + (0, ' ', 0.40000000596046448, 0)], + names='c1, c2, c3, c4') + assert_(ra.dtype == pa.dtype) + assert_(ra.shape == pa.shape) + for k in range(len(ra)): + assert_(ra[k].item() == pa[k].item()) + + def test_recarray_conflict_fields(self): + ra = np.rec.array([(1, 'abc', 2.3), (2, 'xyz', 4.2), + (3, 'wrs', 1.3)], + names='field, shape, mean') + ra.mean = [1.1, 2.2, 3.3] + assert_array_almost_equal(ra['mean'], [1.1, 2.2, 3.3]) + assert_(type(ra.mean) is type(ra.var)) + ra.shape = (1, 3) + assert_(ra.shape == (1, 3)) + ra.shape = ['A', 'B', 'C'] + assert_array_equal(ra['shape'], [['A', 'B', 'C']]) + ra.field = 5 + assert_array_equal(ra['field'], [[5, 5, 5]]) + assert_(isinstance(ra.field, collections_abc.Callable)) + + def test_fromrecords_with_explicit_dtype(self): + a = np.rec.fromrecords([(1, 'a'), (2, 'bbb')], + dtype=[('a', int), ('b', object)]) + assert_equal(a.a, [1, 2]) + assert_equal(a[0].a, 1) + assert_equal(a.b, ['a', 'bbb']) + assert_equal(a[-1].b, 'bbb') + # + ndtype = np.dtype([('a', int), ('b', object)]) + a = np.rec.fromrecords([(1, 'a'), (2, 'bbb')], dtype=ndtype) + assert_equal(a.a, [1, 2]) + assert_equal(a[0].a, 1) + assert_equal(a.b, ['a', 'bbb']) + assert_equal(a[-1].b, 'bbb') + + def test_recarray_stringtypes(self): + # Issue #3993 + a = np.array([('abc ', 1), ('abc', 2)], + dtype=[('foo', 'S4'), ('bar', int)]) + a = a.view(np.recarray) + assert_equal(a.foo[0] == a.foo[1], False) + + def test_recarray_returntypes(self): + qux_fields = {'C': (np.dtype('S5'), 0), 'D': (np.dtype('S5'), 6)} + a = np.rec.array([('abc ', (1,1), 1, ('abcde', 'fgehi')), + ('abc', (2,3), 1, ('abcde', 'jklmn'))], + dtype=[('foo', 'S4'), + ('bar', [('A', int), ('B', int)]), + ('baz', int), ('qux', qux_fields)]) + assert_equal(type(a.foo), np.ndarray) + assert_equal(type(a['foo']), np.ndarray) + assert_equal(type(a.bar), np.recarray) + assert_equal(type(a['bar']), np.recarray) + assert_equal(a.bar.dtype.type, np.record) + assert_equal(type(a['qux']), np.recarray) + assert_equal(a.qux.dtype.type, np.record) + assert_equal(dict(a.qux.dtype.fields), qux_fields) + assert_equal(type(a.baz), np.ndarray) + assert_equal(type(a['baz']), np.ndarray) + assert_equal(type(a[0].bar), np.record) + assert_equal(type(a[0]['bar']), np.record) + assert_equal(a[0].bar.A, 1) + assert_equal(a[0].bar['A'], 1) + assert_equal(a[0]['bar'].A, 1) + assert_equal(a[0]['bar']['A'], 1) + assert_equal(a[0].qux.D, b'fgehi') + assert_equal(a[0].qux['D'], b'fgehi') + assert_equal(a[0]['qux'].D, b'fgehi') + assert_equal(a[0]['qux']['D'], b'fgehi') + + def test_zero_width_strings(self): + # Test for #6430, based on the test case from #1901 + + cols = [['test'] * 3, [''] * 3] + rec = np.rec.fromarrays(cols) + assert_equal(rec['f0'], ['test', 'test', 'test']) + assert_equal(rec['f1'], ['', '', '']) + + dt = np.dtype([('f0', '|S4'), ('f1', '|S')]) + rec = np.rec.fromarrays(cols, dtype=dt) + assert_equal(rec.itemsize, 4) + assert_equal(rec['f0'], [b'test', b'test', b'test']) + assert_equal(rec['f1'], [b'', b'', b'']) + + +@pytest.mark.skipif(Path is None, reason="No pathlib.Path") +class TestPathUsage(object): + # Test that pathlib.Path can be used + def test_tofile_fromfile(self): + with temppath(suffix='.bin') as path: + path = Path(path) + np.random.seed(123) + a = np.random.rand(10).astype('f8,i4,a5') + a[5] = (0.5,10,'abcde') + with path.open("wb") as fd: + a.tofile(fd) + x = np.core.records.fromfile(path, + formats='f8,i4,a5', + shape=10) + assert_array_equal(x, a) + + +class TestRecord(object): + def setup(self): + self.data = np.rec.fromrecords([(1, 2, 3), (4, 5, 6)], + dtype=[("col1", "<i4"), + ("col2", "<i4"), + ("col3", "<i4")]) + + def test_assignment1(self): + a = self.data + assert_equal(a.col1[0], 1) + a[0].col1 = 0 + assert_equal(a.col1[0], 0) + + def test_assignment2(self): + a = self.data + assert_equal(a.col1[0], 1) + a.col1[0] = 0 + assert_equal(a.col1[0], 0) + + def test_invalid_assignment(self): + a = self.data + + def assign_invalid_column(x): + x[0].col5 = 1 + + assert_raises(AttributeError, assign_invalid_column, a) + + def test_nonwriteable_setfield(self): + # gh-8171 + r = np.rec.array([(0,), (1,)], dtype=[('f', 'i4')]) + r.flags.writeable = False + with assert_raises(ValueError): + r.f = [2, 3] + with assert_raises(ValueError): + r.setfield([2,3], *r.dtype.fields['f']) + + def test_out_of_order_fields(self): + # names in the same order, padding added to descr + x = self.data[['col1', 'col2']] + assert_equal(x.dtype.names, ('col1', 'col2')) + assert_equal(x.dtype.descr, + [('col1', '<i4'), ('col2', '<i4'), ('', '|V4')]) + + # names change order to match indexing, as of 1.14 - descr can't + # represent that + y = self.data[['col2', 'col1']] + assert_equal(y.dtype.names, ('col2', 'col1')) + assert_raises(ValueError, lambda: y.dtype.descr) + + def test_pickle_1(self): + # Issue #1529 + a = np.array([(1, [])], dtype=[('a', np.int32), ('b', np.int32, 0)]) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + assert_equal(a, pickle.loads(pickle.dumps(a, protocol=proto))) + assert_equal(a[0], pickle.loads(pickle.dumps(a[0], + protocol=proto))) + + def test_pickle_2(self): + a = self.data + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + assert_equal(a, pickle.loads(pickle.dumps(a, protocol=proto))) + assert_equal(a[0], pickle.loads(pickle.dumps(a[0], + protocol=proto))) + + def test_pickle_3(self): + # Issue #7140 + a = self.data + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + pa = pickle.loads(pickle.dumps(a[0], protocol=proto)) + assert_(pa.flags.c_contiguous) + assert_(pa.flags.f_contiguous) + assert_(pa.flags.writeable) + assert_(pa.flags.aligned) + + def test_objview_record(self): + # https://github.com/numpy/numpy/issues/2599 + dt = np.dtype([('foo', 'i8'), ('bar', 'O')]) + r = np.zeros((1,3), dtype=dt).view(np.recarray) + r.foo = np.array([1, 2, 3]) # TypeError? + + # https://github.com/numpy/numpy/issues/3256 + ra = np.recarray((2,), dtype=[('x', object), ('y', float), ('z', int)]) + ra[['x','y']] # TypeError? + + def test_record_scalar_setitem(self): + # https://github.com/numpy/numpy/issues/3561 + rec = np.recarray(1, dtype=[('x', float, 5)]) + rec[0].x = 1 + assert_equal(rec[0].x, np.ones(5)) + + def test_missing_field(self): + # https://github.com/numpy/numpy/issues/4806 + arr = np.zeros((3,), dtype=[('x', int), ('y', int)]) + assert_raises(ValueError, lambda: arr[['nofield']]) + + @pytest.mark.parametrize('nfields', [0, 1, 2]) + def test_assign_dtype_attribute(self, nfields): + dt = np.dtype([('a', np.uint8), ('b', np.uint8), ('c', np.uint8)][:nfields]) + data = np.zeros(3, dt).view(np.recarray) + + # the original and resulting dtypes differ on whether they are records + assert data.dtype.type == np.record + assert dt.type != np.record + + # ensure that the dtype remains a record even when assigned + data.dtype = dt + assert data.dtype.type == np.record + + @pytest.mark.parametrize('nfields', [0, 1, 2]) + def test_nested_fields_are_records(self, nfields): + """ Test that nested structured types are treated as records too """ + dt = np.dtype([('a', np.uint8), ('b', np.uint8), ('c', np.uint8)][:nfields]) + dt_outer = np.dtype([('inner', dt)]) + + data = np.zeros(3, dt_outer).view(np.recarray) + assert isinstance(data, np.recarray) + assert isinstance(data['inner'], np.recarray) + + data0 = data[0] + assert isinstance(data0, np.record) + assert isinstance(data0['inner'], np.record) + + def test_nested_dtype_padding(self): + """ test that trailing padding is preserved """ + # construct a dtype with padding at the end + dt = np.dtype([('a', np.uint8), ('b', np.uint8), ('c', np.uint8)]) + dt_padded_end = np.dtype(dict( + names=['a', 'b'], + formats=[np.uint8, np.uint8], + offsets=[0, 1], + itemsize=3 + )) # dt[['a', 'b']], but that's not supported in 1.16 + assert dt_padded_end.itemsize == dt.itemsize + + dt_outer = np.dtype([('inner', dt_padded_end)]) + + data = np.zeros(3, dt_outer).view(np.recarray) + assert_equal(data['inner'].dtype, dt_padded_end) + + data0 = data[0] + assert_equal(data0['inner'].dtype, dt_padded_end) + + +def test_find_duplicate(): + l1 = [1, 2, 3, 4, 5, 6] + assert_(np.rec.find_duplicate(l1) == []) + + l2 = [1, 2, 1, 4, 5, 6] + assert_(np.rec.find_duplicate(l2) == [1]) + + l3 = [1, 2, 1, 4, 1, 6, 2, 3] + assert_(np.rec.find_duplicate(l3) == [1, 2]) + + l3 = [2, 2, 1, 4, 1, 6, 2, 3] + assert_(np.rec.find_duplicate(l3) == [2, 1]) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_regression.py b/contrib/python/numpy/py2/numpy/core/tests/test_regression.py new file mode 100644 index 0000000000..8d84b2c12d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_regression.py @@ -0,0 +1,2487 @@ +from __future__ import division, absolute_import, print_function + +import copy +import sys +import gc +import tempfile +import pytest +from os import path +from io import BytesIO +from itertools import chain + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, IS_PYPY, assert_almost_equal, + assert_array_equal, assert_array_almost_equal, assert_raises, + assert_raises_regex, assert_warns, suppress_warnings, + _assert_valid_refcount, HAS_REFCOUNT, + ) +from numpy.compat import asbytes, asunicode, long +from numpy.core.numeric import pickle + +try: + RecursionError +except NameError: + RecursionError = RuntimeError # python < 3.5 + +class TestRegression(object): + def test_invalid_round(self): + # Ticket #3 + v = 4.7599999999999998 + assert_array_equal(np.array([v]), np.array(v)) + + def test_mem_empty(self): + # Ticket #7 + np.empty((1,), dtype=[('x', np.int64)]) + + def test_pickle_transposed(self): + # Ticket #16 + a = np.transpose(np.array([[2, 9], [7, 0], [3, 8]])) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + f = BytesIO() + pickle.dump(a, f, protocol=proto) + f.seek(0) + b = pickle.load(f) + f.close() + assert_array_equal(a, b) + + def test_typeNA(self): + # Issue gh-515 + with suppress_warnings() as sup: + sup.filter(np.VisibleDeprecationWarning) + assert_equal(np.typeNA[np.int64], 'Int64') + assert_equal(np.typeNA[np.uint64], 'UInt64') + + def test_dtype_names(self): + # Ticket #35 + # Should succeed + np.dtype([(('name', 'label'), np.int32, 3)]) + + def test_reduce(self): + # Ticket #40 + assert_almost_equal(np.add.reduce([1., .5], dtype=None), 1.5) + + def test_zeros_order(self): + # Ticket #43 + np.zeros([3], int, 'C') + np.zeros([3], order='C') + np.zeros([3], int, order='C') + + def test_asarray_with_order(self): + # Check that nothing is done when order='F' and array C/F-contiguous + a = np.ones(2) + assert_(a is np.asarray(a, order='F')) + + def test_ravel_with_order(self): + # Check that ravel works when order='F' and array C/F-contiguous + a = np.ones(2) + assert_(not a.ravel('F').flags.owndata) + + def test_sort_bigendian(self): + # Ticket #47 + a = np.linspace(0, 10, 11) + c = a.astype(np.dtype('<f8')) + c.sort() + assert_array_almost_equal(c, a) + + def test_negative_nd_indexing(self): + # Ticket #49 + c = np.arange(125).reshape((5, 5, 5)) + origidx = np.array([-1, 0, 1]) + idx = np.array(origidx) + c[idx] + assert_array_equal(idx, origidx) + + def test_char_dump(self): + # Ticket #50 + ca = np.char.array(np.arange(1000, 1010), itemsize=4) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + f = BytesIO() + pickle.dump(ca, f, protocol=proto) + f.seek(0) + ca = np.load(f, allow_pickle=True) + f.close() + + def test_noncontiguous_fill(self): + # Ticket #58. + a = np.zeros((5, 3)) + b = a[:, :2,] + + def rs(): + b.shape = (10,) + + assert_raises(AttributeError, rs) + + def test_bool(self): + # Ticket #60 + np.bool_(1) # Should succeed + + def test_indexing1(self): + # Ticket #64 + descr = [('x', [('y', [('z', 'c16', (2,)),]),]),] + buffer = ((([6j, 4j],),),) + h = np.array(buffer, dtype=descr) + h['x']['y']['z'] + + def test_indexing2(self): + # Ticket #65 + descr = [('x', 'i4', (2,))] + buffer = ([3, 2],) + h = np.array(buffer, dtype=descr) + h['x'] + + def test_round(self): + # Ticket #67 + x = np.array([1+2j]) + assert_almost_equal(x**(-1), [1/(1+2j)]) + + def test_scalar_compare(self): + # Trac Ticket #72 + # https://github.com/numpy/numpy/issues/565 + a = np.array(['test', 'auto']) + assert_array_equal(a == 'auto', np.array([False, True])) + assert_(a[1] == 'auto') + assert_(a[0] != 'auto') + b = np.linspace(0, 10, 11) + # This should return true for now, but will eventually raise an error: + with suppress_warnings() as sup: + sup.filter(FutureWarning) + assert_(b != 'auto') + assert_(b[0] != 'auto') + + def test_unicode_swapping(self): + # Ticket #79 + ulen = 1 + ucs_value = u'\U0010FFFF' + ua = np.array([[[ucs_value*ulen]*2]*3]*4, dtype='U%s' % ulen) + ua.newbyteorder() # Should succeed. + + def test_object_array_fill(self): + # Ticket #86 + x = np.zeros(1, 'O') + x.fill([]) + + def test_mem_dtype_align(self): + # Ticket #93 + assert_raises(TypeError, np.dtype, + {'names':['a'], 'formats':['foo']}, align=1) + + def test_endian_bool_indexing(self): + # Ticket #105 + a = np.arange(10., dtype='>f8') + b = np.arange(10., dtype='<f8') + xa = np.where((a > 2) & (a < 6)) + xb = np.where((b > 2) & (b < 6)) + ya = ((a > 2) & (a < 6)) + yb = ((b > 2) & (b < 6)) + assert_array_almost_equal(xa, ya.nonzero()) + assert_array_almost_equal(xb, yb.nonzero()) + assert_(np.all(a[ya] > 0.5)) + assert_(np.all(b[yb] > 0.5)) + + def test_endian_where(self): + # GitHub issue #369 + net = np.zeros(3, dtype='>f4') + net[1] = 0.00458849 + net[2] = 0.605202 + max_net = net.max() + test = np.where(net <= 0., max_net, net) + correct = np.array([ 0.60520202, 0.00458849, 0.60520202]) + assert_array_almost_equal(test, correct) + + def test_endian_recarray(self): + # Ticket #2185 + dt = np.dtype([ + ('head', '>u4'), + ('data', '>u4', 2), + ]) + buf = np.recarray(1, dtype=dt) + buf[0]['head'] = 1 + buf[0]['data'][:] = [1, 1] + + h = buf[0]['head'] + d = buf[0]['data'][0] + buf[0]['head'] = h + buf[0]['data'][0] = d + assert_(buf[0]['head'] == 1) + + def test_mem_dot(self): + # Ticket #106 + x = np.random.randn(0, 1) + y = np.random.randn(10, 1) + # Dummy array to detect bad memory access: + _z = np.ones(10) + _dummy = np.empty((0, 10)) + z = np.lib.stride_tricks.as_strided(_z, _dummy.shape, _dummy.strides) + np.dot(x, np.transpose(y), out=z) + assert_equal(_z, np.ones(10)) + # Do the same for the built-in dot: + np.core.multiarray.dot(x, np.transpose(y), out=z) + assert_equal(_z, np.ones(10)) + + def test_arange_endian(self): + # Ticket #111 + ref = np.arange(10) + x = np.arange(10, dtype='<f8') + assert_array_equal(ref, x) + x = np.arange(10, dtype='>f8') + assert_array_equal(ref, x) + + def test_arange_inf_step(self): + ref = np.arange(0, 1, 10) + x = np.arange(0, 1, np.inf) + assert_array_equal(ref, x) + + ref = np.arange(0, 1, -10) + x = np.arange(0, 1, -np.inf) + assert_array_equal(ref, x) + + ref = np.arange(0, -1, -10) + x = np.arange(0, -1, -np.inf) + assert_array_equal(ref, x) + + ref = np.arange(0, -1, 10) + x = np.arange(0, -1, np.inf) + assert_array_equal(ref, x) + + def test_arange_underflow_stop_and_step(self): + finfo = np.finfo(np.float64) + + ref = np.arange(0, finfo.eps, 2 * finfo.eps) + x = np.arange(0, finfo.eps, finfo.max) + assert_array_equal(ref, x) + + ref = np.arange(0, finfo.eps, -2 * finfo.eps) + x = np.arange(0, finfo.eps, -finfo.max) + assert_array_equal(ref, x) + + ref = np.arange(0, -finfo.eps, -2 * finfo.eps) + x = np.arange(0, -finfo.eps, -finfo.max) + assert_array_equal(ref, x) + + ref = np.arange(0, -finfo.eps, 2 * finfo.eps) + x = np.arange(0, -finfo.eps, finfo.max) + assert_array_equal(ref, x) + + def test_argmax(self): + # Ticket #119 + a = np.random.normal(0, 1, (4, 5, 6, 7, 8)) + for i in range(a.ndim): + a.argmax(i) # Should succeed + + def test_mem_divmod(self): + # Ticket #126 + for i in range(10): + divmod(np.array([i])[0], 10) + + def test_hstack_invalid_dims(self): + # Ticket #128 + x = np.arange(9).reshape((3, 3)) + y = np.array([0, 0, 0]) + assert_raises(ValueError, np.hstack, (x, y)) + + def test_squeeze_type(self): + # Ticket #133 + a = np.array([3]) + b = np.array(3) + assert_(type(a.squeeze()) is np.ndarray) + assert_(type(b.squeeze()) is np.ndarray) + + def test_add_identity(self): + # Ticket #143 + assert_equal(0, np.add.identity) + + def test_numpy_float_python_long_addition(self): + # Check that numpy float and python longs can be added correctly. + a = np.float_(23.) + 2**135 + assert_equal(a, 23. + 2**135) + + def test_binary_repr_0(self): + # Ticket #151 + assert_equal('0', np.binary_repr(0)) + + def test_rec_iterate(self): + # Ticket #160 + descr = np.dtype([('i', int), ('f', float), ('s', '|S3')]) + x = np.rec.array([(1, 1.1, '1.0'), + (2, 2.2, '2.0')], dtype=descr) + x[0].tolist() + [i for i in x[0]] + + def test_unicode_string_comparison(self): + # Ticket #190 + a = np.array('hello', np.unicode_) + b = np.array('world') + a == b + + def test_tobytes_FORTRANORDER_discontiguous(self): + # Fix in r2836 + # Create non-contiguous Fortran ordered array + x = np.array(np.random.rand(3, 3), order='F')[:, :2] + assert_array_almost_equal(x.ravel(), np.frombuffer(x.tobytes())) + + def test_flat_assignment(self): + # Correct behaviour of ticket #194 + x = np.empty((3, 1)) + x.flat = np.arange(3) + assert_array_almost_equal(x, [[0], [1], [2]]) + x.flat = np.arange(3, dtype=float) + assert_array_almost_equal(x, [[0], [1], [2]]) + + def test_broadcast_flat_assignment(self): + # Ticket #194 + x = np.empty((3, 1)) + + def bfa(): + x[:] = np.arange(3) + + def bfb(): + x[:] = np.arange(3, dtype=float) + + assert_raises(ValueError, bfa) + assert_raises(ValueError, bfb) + + def test_nonarray_assignment(self): + # See also Issue gh-2870, test for non-array assignment + # and equivalent unsafe casted array assignment + a = np.arange(10) + b = np.ones(10, dtype=bool) + r = np.arange(10) + + def assign(a, b, c): + a[b] = c + + assert_raises(ValueError, assign, a, b, np.nan) + a[b] = np.array(np.nan) # but not this. + assert_raises(ValueError, assign, a, r, np.nan) + a[r] = np.array(np.nan) + + def test_unpickle_dtype_with_object(self): + # Implemented in r2840 + dt = np.dtype([('x', int), ('y', np.object_), ('z', 'O')]) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + f = BytesIO() + pickle.dump(dt, f, protocol=proto) + f.seek(0) + dt_ = pickle.load(f) + f.close() + assert_equal(dt, dt_) + + def test_mem_array_creation_invalid_specification(self): + # Ticket #196 + dt = np.dtype([('x', int), ('y', np.object_)]) + # Wrong way + assert_raises(ValueError, np.array, [1, 'object'], dt) + # Correct way + np.array([(1, 'object')], dt) + + def test_recarray_single_element(self): + # Ticket #202 + a = np.array([1, 2, 3], dtype=np.int32) + b = a.copy() + r = np.rec.array(a, shape=1, formats=['3i4'], names=['d']) + assert_array_equal(a, b) + assert_equal(a, r[0][0]) + + def test_zero_sized_array_indexing(self): + # Ticket #205 + tmp = np.array([]) + + def index_tmp(): + tmp[np.array(10)] + + assert_raises(IndexError, index_tmp) + + def test_chararray_rstrip(self): + # Ticket #222 + x = np.chararray((1,), 5) + x[0] = b'a ' + x = x.rstrip() + assert_equal(x[0], b'a') + + def test_object_array_shape(self): + # Ticket #239 + assert_equal(np.array([[1, 2], 3, 4], dtype=object).shape, (3,)) + assert_equal(np.array([[1, 2], [3, 4]], dtype=object).shape, (2, 2)) + assert_equal(np.array([(1, 2), (3, 4)], dtype=object).shape, (2, 2)) + assert_equal(np.array([], dtype=object).shape, (0,)) + assert_equal(np.array([[], [], []], dtype=object).shape, (3, 0)) + assert_equal(np.array([[3, 4], [5, 6], None], dtype=object).shape, (3,)) + + def test_mem_around(self): + # Ticket #243 + x = np.zeros((1,)) + y = [0] + decimal = 6 + np.around(abs(x-y), decimal) <= 10.0**(-decimal) + + def test_character_array_strip(self): + # Ticket #246 + x = np.char.array(("x", "x ", "x ")) + for c in x: + assert_equal(c, "x") + + def test_lexsort(self): + # Lexsort memory error + v = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + assert_equal(np.lexsort(v), 0) + + def test_lexsort_invalid_sequence(self): + # Issue gh-4123 + class BuggySequence(object): + def __len__(self): + return 4 + + def __getitem__(self, key): + raise KeyError + + assert_raises(KeyError, np.lexsort, BuggySequence()) + + def test_pickle_py2_bytes_encoding(self): + # Check that arrays and scalars pickled on Py2 are + # unpickleable on Py3 using encoding='bytes' + + test_data = [ + # (original, py2_pickle) + (np.unicode_('\u6f2c'), + b"cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n" + b"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\n" + b"I0\ntp6\nbS',o\\x00\\x00'\np7\ntp8\nRp9\n."), + + (np.array([9e123], dtype=np.float64), + b"cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\n" + b"p1\n(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\n" + b"p7\n(S'f8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'<'\np11\nNNNI-1\nI-1\n" + b"I0\ntp12\nbI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np13\ntp14\nb."), + + (np.array([(9e123,)], dtype=[('name', float)]), + b"cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\np1\n" + b"(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\np7\n" + b"(S'V8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'|'\np11\nN(S'name'\np12\ntp13\n" + b"(dp14\ng12\n(g7\n(S'f8'\np15\nI0\nI1\ntp16\nRp17\n(I3\nS'<'\np18\nNNNI-1\n" + b"I-1\nI0\ntp19\nbI0\ntp20\nsI8\nI1\nI0\ntp21\n" + b"bI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np22\ntp23\nb."), + ] + + if sys.version_info[:2] >= (3, 4): + # encoding='bytes' was added in Py3.4 + for original, data in test_data: + result = pickle.loads(data, encoding='bytes') + assert_equal(result, original) + + if isinstance(result, np.ndarray) and result.dtype.names is not None: + for name in result.dtype.names: + assert_(isinstance(name, str)) + + def test_pickle_dtype(self): + # Ticket #251 + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + pickle.dumps(float, protocol=proto) + + def test_swap_real(self): + # Ticket #265 + assert_equal(np.arange(4, dtype='>c8').imag.max(), 0.0) + assert_equal(np.arange(4, dtype='<c8').imag.max(), 0.0) + assert_equal(np.arange(4, dtype='>c8').real.max(), 3.0) + assert_equal(np.arange(4, dtype='<c8').real.max(), 3.0) + + def test_object_array_from_list(self): + # Ticket #270 + assert_(np.array([1, 'A', None]).shape == (3,)) + + def test_multiple_assign(self): + # Ticket #273 + a = np.zeros((3, 1), int) + a[[1, 2]] = 1 + + def test_empty_array_type(self): + assert_equal(np.array([]).dtype, np.zeros(0).dtype) + + def test_void_copyswap(self): + dt = np.dtype([('one', '<i4'), ('two', '<i4')]) + x = np.array((1, 2), dtype=dt) + x = x.byteswap() + assert_(x['one'] > 1 and x['two'] > 2) + + def test_method_args(self): + # Make sure methods and functions have same default axis + # keyword and arguments + funcs1 = ['argmax', 'argmin', 'sum', ('product', 'prod'), + ('sometrue', 'any'), + ('alltrue', 'all'), 'cumsum', ('cumproduct', 'cumprod'), + 'ptp', 'cumprod', 'prod', 'std', 'var', 'mean', + 'round', 'min', 'max', 'argsort', 'sort'] + funcs2 = ['compress', 'take', 'repeat'] + + for func in funcs1: + arr = np.random.rand(8, 7) + arr2 = arr.copy() + if isinstance(func, tuple): + func_meth = func[1] + func = func[0] + else: + func_meth = func + res1 = getattr(arr, func_meth)() + res2 = getattr(np, func)(arr2) + if res1 is None: + res1 = arr + + if res1.dtype.kind in 'uib': + assert_((res1 == res2).all(), func) + else: + assert_(abs(res1-res2).max() < 1e-8, func) + + for func in funcs2: + arr1 = np.random.rand(8, 7) + arr2 = np.random.rand(8, 7) + res1 = None + if func == 'compress': + arr1 = arr1.ravel() + res1 = getattr(arr2, func)(arr1) + else: + arr2 = (15*arr2).astype(int).ravel() + if res1 is None: + res1 = getattr(arr1, func)(arr2) + res2 = getattr(np, func)(arr1, arr2) + assert_(abs(res1-res2).max() < 1e-8, func) + + def test_mem_lexsort_strings(self): + # Ticket #298 + lst = ['abc', 'cde', 'fgh'] + np.lexsort((lst,)) + + def test_fancy_index(self): + # Ticket #302 + x = np.array([1, 2])[np.array([0])] + assert_equal(x.shape, (1,)) + + def test_recarray_copy(self): + # Ticket #312 + dt = [('x', np.int16), ('y', np.float64)] + ra = np.array([(1, 2.3)], dtype=dt) + rb = np.rec.array(ra, dtype=dt) + rb['x'] = 2. + assert_(ra['x'] != rb['x']) + + def test_rec_fromarray(self): + # Ticket #322 + x1 = np.array([[1, 2], [3, 4], [5, 6]]) + x2 = np.array(['a', 'dd', 'xyz']) + x3 = np.array([1.1, 2, 3]) + np.rec.fromarrays([x1, x2, x3], formats="(2,)i4,a3,f8") + + def test_object_array_assign(self): + x = np.empty((2, 2), object) + x.flat[2] = (1, 2, 3) + assert_equal(x.flat[2], (1, 2, 3)) + + def test_ndmin_float64(self): + # Ticket #324 + x = np.array([1, 2, 3], dtype=np.float64) + assert_equal(np.array(x, dtype=np.float32, ndmin=2).ndim, 2) + assert_equal(np.array(x, dtype=np.float64, ndmin=2).ndim, 2) + + def test_ndmin_order(self): + # Issue #465 and related checks + assert_(np.array([1, 2], order='C', ndmin=3).flags.c_contiguous) + assert_(np.array([1, 2], order='F', ndmin=3).flags.f_contiguous) + assert_(np.array(np.ones((2, 2), order='F'), ndmin=3).flags.f_contiguous) + assert_(np.array(np.ones((2, 2), order='C'), ndmin=3).flags.c_contiguous) + + def test_mem_axis_minimization(self): + # Ticket #327 + data = np.arange(5) + data = np.add.outer(data, data) + + def test_mem_float_imag(self): + # Ticket #330 + np.float64(1.0).imag + + def test_dtype_tuple(self): + # Ticket #334 + assert_(np.dtype('i4') == np.dtype(('i4', ()))) + + def test_dtype_posttuple(self): + # Ticket #335 + np.dtype([('col1', '()i4')]) + + def test_numeric_carray_compare(self): + # Ticket #341 + assert_equal(np.array(['X'], 'c'), b'X') + + def test_string_array_size(self): + # Ticket #342 + assert_raises(ValueError, + np.array, [['X'], ['X', 'X', 'X']], '|S1') + + def test_dtype_repr(self): + # Ticket #344 + dt1 = np.dtype(('uint32', 2)) + dt2 = np.dtype(('uint32', (2,))) + assert_equal(dt1.__repr__(), dt2.__repr__()) + + def test_reshape_order(self): + # Make sure reshape order works. + a = np.arange(6).reshape(2, 3, order='F') + assert_equal(a, [[0, 2, 4], [1, 3, 5]]) + a = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) + b = a[:, 1] + assert_equal(b.reshape(2, 2, order='F'), [[2, 6], [4, 8]]) + + def test_reshape_zero_strides(self): + # Issue #380, test reshaping of zero strided arrays + a = np.ones(1) + a = np.lib.stride_tricks.as_strided(a, shape=(5,), strides=(0,)) + assert_(a.reshape(5, 1).strides[0] == 0) + + def test_reshape_zero_size(self): + # GitHub Issue #2700, setting shape failed for 0-sized arrays + a = np.ones((0, 2)) + a.shape = (-1, 2) + + # Cannot test if NPY_RELAXED_STRIDES_CHECKING changes the strides. + # With NPY_RELAXED_STRIDES_CHECKING the test becomes superfluous. + @pytest.mark.skipif(np.ones(1).strides[0] == np.iinfo(np.intp).max, + reason="Using relaxed stride checking") + def test_reshape_trailing_ones_strides(self): + # GitHub issue gh-2949, bad strides for trailing ones of new shape + a = np.zeros(12, dtype=np.int32)[::2] # not contiguous + strides_c = (16, 8, 8, 8) + strides_f = (8, 24, 48, 48) + assert_equal(a.reshape(3, 2, 1, 1).strides, strides_c) + assert_equal(a.reshape(3, 2, 1, 1, order='F').strides, strides_f) + assert_equal(np.array(0, dtype=np.int32).reshape(1, 1).strides, (4, 4)) + + def test_repeat_discont(self): + # Ticket #352 + a = np.arange(12).reshape(4, 3)[:, 2] + assert_equal(a.repeat(3), [2, 2, 2, 5, 5, 5, 8, 8, 8, 11, 11, 11]) + + def test_array_index(self): + # Make sure optimization is not called in this case. + a = np.array([1, 2, 3]) + a2 = np.array([[1, 2, 3]]) + assert_equal(a[np.where(a == 3)], a2[np.where(a2 == 3)]) + + def test_object_argmax(self): + a = np.array([1, 2, 3], dtype=object) + assert_(a.argmax() == 2) + + def test_recarray_fields(self): + # Ticket #372 + dt0 = np.dtype([('f0', 'i4'), ('f1', 'i4')]) + dt1 = np.dtype([('f0', 'i8'), ('f1', 'i8')]) + for a in [np.array([(1, 2), (3, 4)], "i4,i4"), + np.rec.array([(1, 2), (3, 4)], "i4,i4"), + np.rec.array([(1, 2), (3, 4)]), + np.rec.fromarrays([(1, 2), (3, 4)], "i4,i4"), + np.rec.fromarrays([(1, 2), (3, 4)])]: + assert_(a.dtype in [dt0, dt1]) + + def test_random_shuffle(self): + # Ticket #374 + a = np.arange(5).reshape((5, 1)) + b = a.copy() + np.random.shuffle(b) + assert_equal(np.sort(b, axis=0), a) + + def test_refcount_vdot(self): + # Changeset #3443 + _assert_valid_refcount(np.vdot) + + def test_startswith(self): + ca = np.char.array(['Hi', 'There']) + assert_equal(ca.startswith('H'), [True, False]) + + def test_noncommutative_reduce_accumulate(self): + # Ticket #413 + tosubtract = np.arange(5) + todivide = np.array([2.0, 0.5, 0.25]) + assert_equal(np.subtract.reduce(tosubtract), -10) + assert_equal(np.divide.reduce(todivide), 16.0) + assert_array_equal(np.subtract.accumulate(tosubtract), + np.array([0, -1, -3, -6, -10])) + assert_array_equal(np.divide.accumulate(todivide), + np.array([2., 4., 16.])) + + def test_convolve_empty(self): + # Convolve should raise an error for empty input array. + assert_raises(ValueError, np.convolve, [], [1]) + assert_raises(ValueError, np.convolve, [1], []) + + def test_multidim_byteswap(self): + # Ticket #449 + r = np.array([(1, (0, 1, 2))], dtype="i2,3i2") + assert_array_equal(r.byteswap(), + np.array([(256, (0, 256, 512))], r.dtype)) + + def test_string_NULL(self): + # Changeset 3557 + assert_equal(np.array("a\x00\x0b\x0c\x00").item(), + 'a\x00\x0b\x0c') + + def test_junk_in_string_fields_of_recarray(self): + # Ticket #483 + r = np.array([[b'abc']], dtype=[('var1', '|S20')]) + assert_(asbytes(r['var1'][0][0]) == b'abc') + + def test_take_output(self): + # Ensure that 'take' honours output parameter. + x = np.arange(12).reshape((3, 4)) + a = np.take(x, [0, 2], axis=1) + b = np.zeros_like(a) + np.take(x, [0, 2], axis=1, out=b) + assert_array_equal(a, b) + + def test_take_object_fail(self): + # Issue gh-3001 + d = 123. + a = np.array([d, 1], dtype=object) + if HAS_REFCOUNT: + ref_d = sys.getrefcount(d) + try: + a.take([0, 100]) + except IndexError: + pass + if HAS_REFCOUNT: + assert_(ref_d == sys.getrefcount(d)) + + def test_array_str_64bit(self): + # Ticket #501 + s = np.array([1, np.nan], dtype=np.float64) + with np.errstate(all='raise'): + np.array_str(s) # Should succeed + + def test_frompyfunc_endian(self): + # Ticket #503 + from math import radians + uradians = np.frompyfunc(radians, 1, 1) + big_endian = np.array([83.4, 83.5], dtype='>f8') + little_endian = np.array([83.4, 83.5], dtype='<f8') + assert_almost_equal(uradians(big_endian).astype(float), + uradians(little_endian).astype(float)) + + def test_mem_string_arr(self): + # Ticket #514 + s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + t = [] + np.hstack((t, s)) + + def test_arr_transpose(self): + # Ticket #516 + x = np.random.rand(*(2,)*16) + x.transpose(list(range(16))) # Should succeed + + def test_string_mergesort(self): + # Ticket #540 + x = np.array(['a']*32) + assert_array_equal(x.argsort(kind='m'), np.arange(32)) + + def test_argmax_byteorder(self): + # Ticket #546 + a = np.arange(3, dtype='>f') + assert_(a[a.argmax()] == a.max()) + + def test_rand_seed(self): + # Ticket #555 + for l in np.arange(4): + np.random.seed(l) + + def test_mem_deallocation_leak(self): + # Ticket #562 + a = np.zeros(5, dtype=float) + b = np.array(a, dtype=float) + del a, b + + def test_mem_on_invalid_dtype(self): + "Ticket #583" + assert_raises(ValueError, np.fromiter, [['12', ''], ['13', '']], str) + + def test_dot_negative_stride(self): + # Ticket #588 + x = np.array([[1, 5, 25, 125., 625]]) + y = np.array([[20.], [160.], [640.], [1280.], [1024.]]) + z = y[::-1].copy() + y2 = y[::-1] + assert_equal(np.dot(x, z), np.dot(x, y2)) + + def test_object_casting(self): + # This used to trigger the object-type version of + # the bitwise_or operation, because float64 -> object + # casting succeeds + def rs(): + x = np.ones([484, 286]) + y = np.zeros([484, 286]) + x |= y + + assert_raises(TypeError, rs) + + def test_unicode_scalar(self): + # Ticket #600 + x = np.array(["DROND", "DROND1"], dtype="U6") + el = x[1] + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + new = pickle.loads(pickle.dumps(el, protocol=proto)) + assert_equal(new, el) + + def test_arange_non_native_dtype(self): + # Ticket #616 + for T in ('>f4', '<f4'): + dt = np.dtype(T) + assert_equal(np.arange(0, dtype=dt).dtype, dt) + assert_equal(np.arange(0.5, dtype=dt).dtype, dt) + assert_equal(np.arange(5, dtype=dt).dtype, dt) + + def test_bool_flat_indexing_invalid_nr_elements(self): + s = np.ones(10, dtype=float) + x = np.array((15,), dtype=float) + + def ia(x, s, v): + x[(s > 0)] = v + + assert_raises(IndexError, ia, x, s, np.zeros(9, dtype=float)) + assert_raises(IndexError, ia, x, s, np.zeros(11, dtype=float)) + + # Old special case (different code path): + assert_raises(ValueError, ia, x.flat, s, np.zeros(9, dtype=float)) + assert_raises(ValueError, ia, x.flat, s, np.zeros(11, dtype=float)) + + def test_mem_scalar_indexing(self): + # Ticket #603 + x = np.array([0], dtype=float) + index = np.array(0, dtype=np.int32) + x[index] + + def test_binary_repr_0_width(self): + assert_equal(np.binary_repr(0, width=3), '000') + + def test_fromstring(self): + assert_equal(np.fromstring("12:09:09", dtype=int, sep=":"), + [12, 9, 9]) + + def test_searchsorted_variable_length(self): + x = np.array(['a', 'aa', 'b']) + y = np.array(['d', 'e']) + assert_equal(x.searchsorted(y), [3, 3]) + + def test_string_argsort_with_zeros(self): + # Check argsort for strings containing zeros. + x = np.frombuffer(b"\x00\x02\x00\x01", dtype="|S2") + assert_array_equal(x.argsort(kind='m'), np.array([1, 0])) + assert_array_equal(x.argsort(kind='q'), np.array([1, 0])) + + def test_string_sort_with_zeros(self): + # Check sort for strings containing zeros. + x = np.frombuffer(b"\x00\x02\x00\x01", dtype="|S2") + y = np.frombuffer(b"\x00\x01\x00\x02", dtype="|S2") + assert_array_equal(np.sort(x, kind="q"), y) + + def test_copy_detection_zero_dim(self): + # Ticket #658 + np.indices((0, 3, 4)).T.reshape(-1, 3) + + def test_flat_byteorder(self): + # Ticket #657 + x = np.arange(10) + assert_array_equal(x.astype('>i4'), x.astype('<i4').flat[:]) + assert_array_equal(x.astype('>i4').flat[:], x.astype('<i4')) + + def test_sign_bit(self): + x = np.array([0, -0.0, 0]) + assert_equal(str(np.abs(x)), '[0. 0. 0.]') + + def test_flat_index_byteswap(self): + for dt in (np.dtype('<i4'), np.dtype('>i4')): + x = np.array([-1, 0, 1], dtype=dt) + assert_equal(x.flat[0].dtype, x[0].dtype) + + def test_copy_detection_corner_case(self): + # Ticket #658 + np.indices((0, 3, 4)).T.reshape(-1, 3) + + # Cannot test if NPY_RELAXED_STRIDES_CHECKING changes the strides. + # With NPY_RELAXED_STRIDES_CHECKING the test becomes superfluous, + # 0-sized reshape itself is tested elsewhere. + @pytest.mark.skipif(np.ones(1).strides[0] == np.iinfo(np.intp).max, + reason="Using relaxed stride checking") + def test_copy_detection_corner_case2(self): + # Ticket #771: strides are not set correctly when reshaping 0-sized + # arrays + b = np.indices((0, 3, 4)).T.reshape(-1, 3) + assert_equal(b.strides, (3 * b.itemsize, b.itemsize)) + + def test_object_array_refcounting(self): + # Ticket #633 + if not hasattr(sys, 'getrefcount'): + return + + # NB. this is probably CPython-specific + + cnt = sys.getrefcount + + a = object() + b = object() + c = object() + + cnt0_a = cnt(a) + cnt0_b = cnt(b) + cnt0_c = cnt(c) + + # -- 0d -> 1-d broadcast slice assignment + + arr = np.zeros(5, dtype=np.object_) + + arr[:] = a + assert_equal(cnt(a), cnt0_a + 5) + + arr[:] = b + assert_equal(cnt(a), cnt0_a) + assert_equal(cnt(b), cnt0_b + 5) + + arr[:2] = c + assert_equal(cnt(b), cnt0_b + 3) + assert_equal(cnt(c), cnt0_c + 2) + + del arr + + # -- 1-d -> 2-d broadcast slice assignment + + arr = np.zeros((5, 2), dtype=np.object_) + arr0 = np.zeros(2, dtype=np.object_) + + arr0[0] = a + assert_(cnt(a) == cnt0_a + 1) + arr0[1] = b + assert_(cnt(b) == cnt0_b + 1) + + arr[:, :] = arr0 + assert_(cnt(a) == cnt0_a + 6) + assert_(cnt(b) == cnt0_b + 6) + + arr[:, 0] = None + assert_(cnt(a) == cnt0_a + 1) + + del arr, arr0 + + # -- 2-d copying + flattening + + arr = np.zeros((5, 2), dtype=np.object_) + + arr[:, 0] = a + arr[:, 1] = b + assert_(cnt(a) == cnt0_a + 5) + assert_(cnt(b) == cnt0_b + 5) + + arr2 = arr.copy() + assert_(cnt(a) == cnt0_a + 10) + assert_(cnt(b) == cnt0_b + 10) + + arr2 = arr[:, 0].copy() + assert_(cnt(a) == cnt0_a + 10) + assert_(cnt(b) == cnt0_b + 5) + + arr2 = arr.flatten() + assert_(cnt(a) == cnt0_a + 10) + assert_(cnt(b) == cnt0_b + 10) + + del arr, arr2 + + # -- concatenate, repeat, take, choose + + arr1 = np.zeros((5, 1), dtype=np.object_) + arr2 = np.zeros((5, 1), dtype=np.object_) + + arr1[...] = a + arr2[...] = b + assert_(cnt(a) == cnt0_a + 5) + assert_(cnt(b) == cnt0_b + 5) + + tmp = np.concatenate((arr1, arr2)) + assert_(cnt(a) == cnt0_a + 5 + 5) + assert_(cnt(b) == cnt0_b + 5 + 5) + + tmp = arr1.repeat(3, axis=0) + assert_(cnt(a) == cnt0_a + 5 + 3*5) + + tmp = arr1.take([1, 2, 3], axis=0) + assert_(cnt(a) == cnt0_a + 5 + 3) + + x = np.array([[0], [1], [0], [1], [1]], int) + tmp = x.choose(arr1, arr2) + assert_(cnt(a) == cnt0_a + 5 + 2) + assert_(cnt(b) == cnt0_b + 5 + 3) + + del tmp # Avoid pyflakes unused variable warning + + def test_mem_custom_float_to_array(self): + # Ticket 702 + class MyFloat(object): + def __float__(self): + return 1.0 + + tmp = np.atleast_1d([MyFloat()]) + tmp.astype(float) # Should succeed + + def test_object_array_refcount_self_assign(self): + # Ticket #711 + class VictimObject(object): + deleted = False + + def __del__(self): + self.deleted = True + + d = VictimObject() + arr = np.zeros(5, dtype=np.object_) + arr[:] = d + del d + arr[:] = arr # refcount of 'd' might hit zero here + assert_(not arr[0].deleted) + arr[:] = arr # trying to induce a segfault by doing it again... + assert_(not arr[0].deleted) + + def test_mem_fromiter_invalid_dtype_string(self): + x = [1, 2, 3] + assert_raises(ValueError, + np.fromiter, [xi for xi in x], dtype='S') + + def test_reduce_big_object_array(self): + # Ticket #713 + oldsize = np.setbufsize(10*16) + a = np.array([None]*161, object) + assert_(not np.any(a)) + np.setbufsize(oldsize) + + def test_mem_0d_array_index(self): + # Ticket #714 + np.zeros(10)[np.array(0)] + + def test_nonnative_endian_fill(self): + # Non-native endian arrays were incorrectly filled with scalars + # before r5034. + if sys.byteorder == 'little': + dtype = np.dtype('>i4') + else: + dtype = np.dtype('<i4') + x = np.empty([1], dtype=dtype) + x.fill(1) + assert_equal(x, np.array([1], dtype=dtype)) + + def test_dot_alignment_sse2(self): + # Test for ticket #551, changeset r5140 + x = np.zeros((30, 40)) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + y = pickle.loads(pickle.dumps(x, protocol=proto)) + # y is now typically not aligned on a 8-byte boundary + z = np.ones((1, y.shape[0])) + # This shouldn't cause a segmentation fault: + np.dot(z, y) + + def test_astype_copy(self): + # Ticket #788, changeset r5155 + # The test data file was generated by scipy.io.savemat. + # The dtype is float64, but the isbuiltin attribute is 0. + data_dir = path.join(path.dirname(__file__), 'data') + filename = path.join(data_dir, "astype_copy.pkl") + if sys.version_info[0] >= 3: + f = open(filename, 'rb') + xp = pickle.load(f, encoding='latin1') + f.close() + else: + f = open(filename) + xp = pickle.load(f) + f.close() + xpd = xp.astype(np.float64) + assert_((xp.__array_interface__['data'][0] != + xpd.__array_interface__['data'][0])) + + def test_compress_small_type(self): + # Ticket #789, changeset 5217. + # compress with out argument segfaulted if cannot cast safely + import numpy as np + a = np.array([[1, 2], [3, 4]]) + b = np.zeros((2, 1), dtype=np.single) + try: + a.compress([True, False], axis=1, out=b) + raise AssertionError("compress with an out which cannot be " + "safely casted should not return " + "successfully") + except TypeError: + pass + + def test_attributes(self): + # Ticket #791 + class TestArray(np.ndarray): + def __new__(cls, data, info): + result = np.array(data) + result = result.view(cls) + result.info = info + return result + + def __array_finalize__(self, obj): + self.info = getattr(obj, 'info', '') + + dat = TestArray([[1, 2, 3, 4], [5, 6, 7, 8]], 'jubba') + assert_(dat.info == 'jubba') + dat.resize((4, 2)) + assert_(dat.info == 'jubba') + dat.sort() + assert_(dat.info == 'jubba') + dat.fill(2) + assert_(dat.info == 'jubba') + dat.put([2, 3, 4], [6, 3, 4]) + assert_(dat.info == 'jubba') + dat.setfield(4, np.int32, 0) + assert_(dat.info == 'jubba') + dat.setflags() + assert_(dat.info == 'jubba') + assert_(dat.all(1).info == 'jubba') + assert_(dat.any(1).info == 'jubba') + assert_(dat.argmax(1).info == 'jubba') + assert_(dat.argmin(1).info == 'jubba') + assert_(dat.argsort(1).info == 'jubba') + assert_(dat.astype(TestArray).info == 'jubba') + assert_(dat.byteswap().info == 'jubba') + assert_(dat.clip(2, 7).info == 'jubba') + assert_(dat.compress([0, 1, 1]).info == 'jubba') + assert_(dat.conj().info == 'jubba') + assert_(dat.conjugate().info == 'jubba') + assert_(dat.copy().info == 'jubba') + dat2 = TestArray([2, 3, 1, 0], 'jubba') + choices = [[0, 1, 2, 3], [10, 11, 12, 13], + [20, 21, 22, 23], [30, 31, 32, 33]] + assert_(dat2.choose(choices).info == 'jubba') + assert_(dat.cumprod(1).info == 'jubba') + assert_(dat.cumsum(1).info == 'jubba') + assert_(dat.diagonal().info == 'jubba') + assert_(dat.flatten().info == 'jubba') + assert_(dat.getfield(np.int32, 0).info == 'jubba') + assert_(dat.imag.info == 'jubba') + assert_(dat.max(1).info == 'jubba') + assert_(dat.mean(1).info == 'jubba') + assert_(dat.min(1).info == 'jubba') + assert_(dat.newbyteorder().info == 'jubba') + assert_(dat.prod(1).info == 'jubba') + assert_(dat.ptp(1).info == 'jubba') + assert_(dat.ravel().info == 'jubba') + assert_(dat.real.info == 'jubba') + assert_(dat.repeat(2).info == 'jubba') + assert_(dat.reshape((2, 4)).info == 'jubba') + assert_(dat.round().info == 'jubba') + assert_(dat.squeeze().info == 'jubba') + assert_(dat.std(1).info == 'jubba') + assert_(dat.sum(1).info == 'jubba') + assert_(dat.swapaxes(0, 1).info == 'jubba') + assert_(dat.take([2, 3, 5]).info == 'jubba') + assert_(dat.transpose().info == 'jubba') + assert_(dat.T.info == 'jubba') + assert_(dat.var(1).info == 'jubba') + assert_(dat.view(TestArray).info == 'jubba') + # These methods do not preserve subclasses + assert_(type(dat.nonzero()[0]) is np.ndarray) + assert_(type(dat.nonzero()[1]) is np.ndarray) + + def test_recarray_tolist(self): + # Ticket #793, changeset r5215 + # Comparisons fail for NaN, so we can't use random memory + # for the test. + buf = np.zeros(40, dtype=np.int8) + a = np.recarray(2, formats="i4,f8,f8", names="id,x,y", buf=buf) + b = a.tolist() + assert_( a[0].tolist() == b[0]) + assert_( a[1].tolist() == b[1]) + + def test_nonscalar_item_method(self): + # Make sure that .item() fails graciously when it should + a = np.arange(5) + assert_raises(ValueError, a.item) + + def test_char_array_creation(self): + a = np.array('123', dtype='c') + b = np.array([b'1', b'2', b'3']) + assert_equal(a, b) + + def test_unaligned_unicode_access(self): + # Ticket #825 + for i in range(1, 9): + msg = 'unicode offset: %d chars' % i + t = np.dtype([('a', 'S%d' % i), ('b', 'U2')]) + x = np.array([(b'a', u'b')], dtype=t) + if sys.version_info[0] >= 3: + assert_equal(str(x), "[(b'a', 'b')]", err_msg=msg) + else: + assert_equal(str(x), "[('a', u'b')]", err_msg=msg) + + def test_sign_for_complex_nan(self): + # Ticket 794. + with np.errstate(invalid='ignore'): + C = np.array([-np.inf, -2+1j, 0, 2-1j, np.inf, np.nan]) + have = np.sign(C) + want = np.array([-1+0j, -1+0j, 0+0j, 1+0j, 1+0j, np.nan]) + assert_equal(have, want) + + def test_for_equal_names(self): + # Ticket #674 + dt = np.dtype([('foo', float), ('bar', float)]) + a = np.zeros(10, dt) + b = list(a.dtype.names) + b[0] = "notfoo" + a.dtype.names = b + assert_(a.dtype.names[0] == "notfoo") + assert_(a.dtype.names[1] == "bar") + + def test_for_object_scalar_creation(self): + # Ticket #816 + a = np.object_() + b = np.object_(3) + b2 = np.object_(3.0) + c = np.object_([4, 5]) + d = np.object_([None, {}, []]) + assert_(a is None) + assert_(type(b) is int) + assert_(type(b2) is float) + assert_(type(c) is np.ndarray) + assert_(c.dtype == object) + assert_(d.dtype == object) + + def test_array_resize_method_system_error(self): + # Ticket #840 - order should be an invalid keyword. + x = np.array([[0, 1], [2, 3]]) + assert_raises(TypeError, x.resize, (2, 2), order='C') + + def test_for_zero_length_in_choose(self): + "Ticket #882" + a = np.array(1) + assert_raises(ValueError, lambda x: x.choose([]), a) + + def test_array_ndmin_overflow(self): + "Ticket #947." + assert_raises(ValueError, lambda: np.array([1], ndmin=33)) + + def test_void_scalar_with_titles(self): + # No ticket + data = [('john', 4), ('mary', 5)] + dtype1 = [(('source:yy', 'name'), 'O'), (('source:xx', 'id'), int)] + arr = np.array(data, dtype=dtype1) + assert_(arr[0][0] == 'john') + assert_(arr[0][1] == 4) + + def test_void_scalar_constructor(self): + #Issue #1550 + + #Create test string data, construct void scalar from data and assert + #that void scalar contains original data. + test_string = np.array("test") + test_string_void_scalar = np.core.multiarray.scalar( + np.dtype(("V", test_string.dtype.itemsize)), test_string.tobytes()) + + assert_(test_string_void_scalar.view(test_string.dtype) == test_string) + + #Create record scalar, construct from data and assert that + #reconstructed scalar is correct. + test_record = np.ones((), "i,i") + test_record_void_scalar = np.core.multiarray.scalar( + test_record.dtype, test_record.tobytes()) + + assert_(test_record_void_scalar == test_record) + + # Test pickle and unpickle of void and record scalars + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + assert_(pickle.loads( + pickle.dumps(test_string, protocol=proto)) == test_string) + assert_(pickle.loads( + pickle.dumps(test_record, protocol=proto)) == test_record) + + def test_blasdot_uninitialized_memory(self): + # Ticket #950 + for m in [0, 1, 2]: + for n in [0, 1, 2]: + for k in range(3): + # Try to ensure that x->data contains non-zero floats + x = np.array([123456789e199], dtype=np.float64) + if IS_PYPY: + x.resize((m, 0), refcheck=False) + else: + x.resize((m, 0)) + y = np.array([123456789e199], dtype=np.float64) + if IS_PYPY: + y.resize((0, n), refcheck=False) + else: + y.resize((0, n)) + + # `dot` should just return zero (m, n) matrix + z = np.dot(x, y) + assert_(np.all(z == 0)) + assert_(z.shape == (m, n)) + + def test_zeros(self): + # Regression test for #1061. + # Set a size which cannot fit into a 64 bits signed integer + sz = 2 ** 64 + with assert_raises_regex(ValueError, + 'Maximum allowed dimension exceeded'): + np.empty(sz) + + def test_huge_arange(self): + # Regression test for #1062. + # Set a size which cannot fit into a 64 bits signed integer + sz = 2 ** 64 + with assert_raises_regex(ValueError, + 'Maximum allowed size exceeded'): + np.arange(sz) + assert_(np.size == sz) + + def test_fromiter_bytes(self): + # Ticket #1058 + a = np.fromiter(list(range(10)), dtype='b') + b = np.fromiter(list(range(10)), dtype='B') + assert_(np.alltrue(a == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))) + assert_(np.alltrue(b == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))) + + def test_array_from_sequence_scalar_array(self): + # Ticket #1078: segfaults when creating an array with a sequence of + # 0d arrays. + a = np.array((np.ones(2), np.array(2))) + assert_equal(a.shape, (2,)) + assert_equal(a.dtype, np.dtype(object)) + assert_equal(a[0], np.ones(2)) + assert_equal(a[1], np.array(2)) + + a = np.array(((1,), np.array(1))) + assert_equal(a.shape, (2,)) + assert_equal(a.dtype, np.dtype(object)) + assert_equal(a[0], (1,)) + assert_equal(a[1], np.array(1)) + + def test_array_from_sequence_scalar_array2(self): + # Ticket #1081: weird array with strange input... + t = np.array([np.array([]), np.array(0, object)]) + assert_equal(t.shape, (2,)) + assert_equal(t.dtype, np.dtype(object)) + + def test_array_too_big(self): + # Ticket #1080. + assert_raises(ValueError, np.zeros, [975]*7, np.int8) + assert_raises(ValueError, np.zeros, [26244]*5, np.int8) + + def test_dtype_keyerrors_(self): + # Ticket #1106. + dt = np.dtype([('f1', np.uint)]) + assert_raises(KeyError, dt.__getitem__, "f2") + assert_raises(IndexError, dt.__getitem__, 1) + assert_raises(TypeError, dt.__getitem__, 0.0) + + def test_lexsort_buffer_length(self): + # Ticket #1217, don't segfault. + a = np.ones(100, dtype=np.int8) + b = np.ones(100, dtype=np.int32) + i = np.lexsort((a[::-1], b)) + assert_equal(i, np.arange(100, dtype=int)) + + def test_object_array_to_fixed_string(self): + # Ticket #1235. + a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_) + b = np.array(a, dtype=(np.str_, 8)) + assert_equal(a, b) + c = np.array(a, dtype=(np.str_, 5)) + assert_equal(c, np.array(['abcde', 'ijklm'])) + d = np.array(a, dtype=(np.str_, 12)) + assert_equal(a, d) + e = np.empty((2, ), dtype=(np.str_, 8)) + e[:] = a[:] + assert_equal(a, e) + + def test_unicode_to_string_cast(self): + # Ticket #1240. + a = np.array([[u'abc', u'\u03a3'], + [u'asdf', u'erw']], + dtype='U') + assert_raises(UnicodeEncodeError, np.array, a, 'S4') + + def test_mixed_string_unicode_array_creation(self): + a = np.array(['1234', u'123']) + assert_(a.itemsize == 16) + a = np.array([u'123', '1234']) + assert_(a.itemsize == 16) + a = np.array(['1234', u'123', '12345']) + assert_(a.itemsize == 20) + a = np.array([u'123', '1234', u'12345']) + assert_(a.itemsize == 20) + a = np.array([u'123', '1234', u'1234']) + assert_(a.itemsize == 16) + + def test_misaligned_objects_segfault(self): + # Ticket #1198 and #1267 + a1 = np.zeros((10,), dtype='O,c') + a2 = np.array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], 'S10') + a1['f0'] = a2 + repr(a1) + np.argmax(a1['f0']) + a1['f0'][1] = "FOO" + a1['f0'] = "FOO" + np.array(a1['f0'], dtype='S') + np.nonzero(a1['f0']) + a1.sort() + copy.deepcopy(a1) + + def test_misaligned_scalars_segfault(self): + # Ticket #1267 + s1 = np.array(('a', 'Foo'), dtype='c,O') + s2 = np.array(('b', 'Bar'), dtype='c,O') + s1['f1'] = s2['f1'] + s1['f1'] = 'Baz' + + def test_misaligned_dot_product_objects(self): + # Ticket #1267 + # This didn't require a fix, but it's worth testing anyway, because + # it may fail if .dot stops enforcing the arrays to be BEHAVED + a = np.array([[(1, 'a'), (0, 'a')], [(0, 'a'), (1, 'a')]], dtype='O,c') + b = np.array([[(4, 'a'), (1, 'a')], [(2, 'a'), (2, 'a')]], dtype='O,c') + np.dot(a['f0'], b['f0']) + + def test_byteswap_complex_scalar(self): + # Ticket #1259 and gh-441 + for dtype in [np.dtype('<'+t) for t in np.typecodes['Complex']]: + z = np.array([2.2-1.1j], dtype) + x = z[0] # always native-endian + y = x.byteswap() + if x.dtype.byteorder == z.dtype.byteorder: + # little-endian machine + assert_equal(x, np.frombuffer(y.tobytes(), dtype=dtype.newbyteorder())) + else: + # big-endian machine + assert_equal(x, np.frombuffer(y.tobytes(), dtype=dtype)) + # double check real and imaginary parts: + assert_equal(x.real, y.real.byteswap()) + assert_equal(x.imag, y.imag.byteswap()) + + def test_structured_arrays_with_objects1(self): + # Ticket #1299 + stra = 'aaaa' + strb = 'bbbb' + x = np.array([[(0, stra), (1, strb)]], 'i8,O') + x[x.nonzero()] = x.ravel()[:1] + assert_(x[0, 1] == x[0, 0]) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_structured_arrays_with_objects2(self): + # Ticket #1299 second test + stra = 'aaaa' + strb = 'bbbb' + numb = sys.getrefcount(strb) + numa = sys.getrefcount(stra) + x = np.array([[(0, stra), (1, strb)]], 'i8,O') + x[x.nonzero()] = x.ravel()[:1] + assert_(sys.getrefcount(strb) == numb) + assert_(sys.getrefcount(stra) == numa + 2) + + def test_duplicate_title_and_name(self): + # Ticket #1254 + dtspec = [(('a', 'a'), 'i'), ('b', 'i')] + assert_raises(ValueError, np.dtype, dtspec) + + def test_signed_integer_division_overflow(self): + # Ticket #1317. + def test_type(t): + min = np.array([np.iinfo(t).min]) + min //= -1 + + with np.errstate(divide="ignore"): + for t in (np.int8, np.int16, np.int32, np.int64, int, np.long): + test_type(t) + + def test_buffer_hashlib(self): + try: + from hashlib import md5 + except ImportError: + from md5 import new as md5 + + x = np.array([1, 2, 3], dtype=np.dtype('<i4')) + assert_equal(md5(x).hexdigest(), '2a1dd1e1e59d0a384c26951e316cd7e6') + + def test_0d_string_scalar(self): + # Bug #1436; the following should succeed + np.asarray('x', '>c') + + def test_log1p_compiler_shenanigans(self): + # Check if log1p is behaving on 32 bit intel systems. + assert_(np.isfinite(np.log1p(np.exp2(-53)))) + + def test_fromiter_comparison(self): + a = np.fromiter(list(range(10)), dtype='b') + b = np.fromiter(list(range(10)), dtype='B') + assert_(np.alltrue(a == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))) + assert_(np.alltrue(b == np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))) + + def test_fromstring_crash(self): + # Ticket #1345: the following should not cause a crash + np.fromstring(b'aa, aa, 1.0', sep=',') + + def test_ticket_1539(self): + dtypes = [x for x in np.typeDict.values() + if (issubclass(x, np.number) + and not issubclass(x, np.timedelta64))] + a = np.array([], np.bool_) # not x[0] because it is unordered + failures = [] + + for x in dtypes: + b = a.astype(x) + for y in dtypes: + c = a.astype(y) + try: + np.dot(b, c) + except TypeError: + failures.append((x, y)) + if failures: + raise AssertionError("Failures: %r" % failures) + + def test_ticket_1538(self): + x = np.finfo(np.float32) + for name in 'eps epsneg max min resolution tiny'.split(): + assert_equal(type(getattr(x, name)), np.float32, + err_msg=name) + + def test_ticket_1434(self): + # Check that the out= argument in var and std has an effect + data = np.array(((1, 2, 3), (4, 5, 6), (7, 8, 9))) + out = np.zeros((3,)) + + ret = data.var(axis=1, out=out) + assert_(ret is out) + assert_array_equal(ret, data.var(axis=1)) + + ret = data.std(axis=1, out=out) + assert_(ret is out) + assert_array_equal(ret, data.std(axis=1)) + + def test_complex_nan_maximum(self): + cnan = complex(0, np.nan) + assert_equal(np.maximum(1, cnan), cnan) + + def test_subclass_int_tuple_assignment(self): + # ticket #1563 + class Subclass(np.ndarray): + def __new__(cls, i): + return np.ones((i,)).view(cls) + + x = Subclass(5) + x[(0,)] = 2 # shouldn't raise an exception + assert_equal(x[0], 2) + + def test_ufunc_no_unnecessary_views(self): + # ticket #1548 + class Subclass(np.ndarray): + pass + x = np.array([1, 2, 3]).view(Subclass) + y = np.add(x, x, x) + assert_equal(id(x), id(y)) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_take_refcount(self): + # ticket #939 + a = np.arange(16, dtype=float) + a.shape = (4, 4) + lut = np.ones((5 + 3, 4), float) + rgba = np.empty(shape=a.shape + (4,), dtype=lut.dtype) + c1 = sys.getrefcount(rgba) + try: + lut.take(a, axis=0, mode='clip', out=rgba) + except TypeError: + pass + c2 = sys.getrefcount(rgba) + assert_equal(c1, c2) + + def test_fromfile_tofile_seeks(self): + # On Python 3, tofile/fromfile used to get (#1610) the Python + # file handle out of sync + f0 = tempfile.NamedTemporaryFile() + f = f0.file + f.write(np.arange(255, dtype='u1').tobytes()) + + f.seek(20) + ret = np.fromfile(f, count=4, dtype='u1') + assert_equal(ret, np.array([20, 21, 22, 23], dtype='u1')) + assert_equal(f.tell(), 24) + + f.seek(40) + np.array([1, 2, 3], dtype='u1').tofile(f) + assert_equal(f.tell(), 43) + + f.seek(40) + data = f.read(3) + assert_equal(data, b"\x01\x02\x03") + + f.seek(80) + f.read(4) + data = np.fromfile(f, dtype='u1', count=4) + assert_equal(data, np.array([84, 85, 86, 87], dtype='u1')) + + f.close() + + def test_complex_scalar_warning(self): + for tp in [np.csingle, np.cdouble, np.clongdouble]: + x = tp(1+2j) + assert_warns(np.ComplexWarning, float, x) + with suppress_warnings() as sup: + sup.filter(np.ComplexWarning) + assert_equal(float(x), float(x.real)) + + def test_complex_scalar_complex_cast(self): + for tp in [np.csingle, np.cdouble, np.clongdouble]: + x = tp(1+2j) + assert_equal(complex(x), 1+2j) + + def test_complex_boolean_cast(self): + # Ticket #2218 + for tp in [np.csingle, np.cdouble, np.clongdouble]: + x = np.array([0, 0+0.5j, 0.5+0j], dtype=tp) + assert_equal(x.astype(bool), np.array([0, 1, 1], dtype=bool)) + assert_(np.any(x)) + assert_(np.all(x[1:])) + + def test_uint_int_conversion(self): + x = 2**64 - 1 + assert_equal(int(np.uint64(x)), x) + + def test_duplicate_field_names_assign(self): + ra = np.fromiter(((i*3, i*2) for i in range(10)), dtype='i8,f8') + ra.dtype.names = ('f1', 'f2') + repr(ra) # should not cause a segmentation fault + assert_raises(ValueError, setattr, ra.dtype, 'names', ('f1', 'f1')) + + def test_eq_string_and_object_array(self): + # From e-mail thread "__eq__ with str and object" (Keith Goodman) + a1 = np.array(['a', 'b'], dtype=object) + a2 = np.array(['a', 'c']) + assert_array_equal(a1 == a2, [True, False]) + assert_array_equal(a2 == a1, [True, False]) + + def test_nonzero_byteswap(self): + a = np.array([0x80000000, 0x00000080, 0], dtype=np.uint32) + a.dtype = np.float32 + assert_equal(a.nonzero()[0], [1]) + a = a.byteswap().newbyteorder() + assert_equal(a.nonzero()[0], [1]) # [0] if nonzero() ignores swap + + def test_find_common_type_boolean(self): + # Ticket #1695 + assert_(np.find_common_type([], ['?', '?']) == '?') + + def test_empty_mul(self): + a = np.array([1.]) + a[1:1] *= 2 + assert_equal(a, [1.]) + + def test_array_side_effect(self): + # The second use of itemsize was throwing an exception because in + # ctors.c, discover_itemsize was calling PyObject_Length without + # checking the return code. This failed to get the length of the + # number 2, and the exception hung around until something checked + # PyErr_Occurred() and returned an error. + assert_equal(np.dtype('S10').itemsize, 10) + np.array([['abc', 2], ['long ', '0123456789']], dtype=np.string_) + assert_equal(np.dtype('S10').itemsize, 10) + + def test_any_float(self): + # all and any for floats + a = np.array([0.1, 0.9]) + assert_(np.any(a)) + assert_(np.all(a)) + + def test_large_float_sum(self): + a = np.arange(10000, dtype='f') + assert_equal(a.sum(dtype='d'), a.astype('d').sum()) + + def test_ufunc_casting_out(self): + a = np.array(1.0, dtype=np.float32) + b = np.array(1.0, dtype=np.float64) + c = np.array(1.0, dtype=np.float32) + np.add(a, b, out=c) + assert_equal(c, 2.0) + + def test_array_scalar_contiguous(self): + # Array scalars are both C and Fortran contiguous + assert_(np.array(1.0).flags.c_contiguous) + assert_(np.array(1.0).flags.f_contiguous) + assert_(np.array(np.float32(1.0)).flags.c_contiguous) + assert_(np.array(np.float32(1.0)).flags.f_contiguous) + + def test_squeeze_contiguous(self): + # Similar to GitHub issue #387 + a = np.zeros((1, 2)).squeeze() + b = np.zeros((2, 2, 2), order='F')[:, :, ::2].squeeze() + assert_(a.flags.c_contiguous) + assert_(a.flags.f_contiguous) + assert_(b.flags.f_contiguous) + + def test_squeeze_axis_handling(self): + # Issue #10779 + # Ensure proper handling of objects + # that don't support axis specification + # when squeezing + + class OldSqueeze(np.ndarray): + + def __new__(cls, + input_array): + obj = np.asarray(input_array).view(cls) + return obj + + # it is perfectly reasonable that prior + # to numpy version 1.7.0 a subclass of ndarray + # might have been created that did not expect + # squeeze to have an axis argument + # NOTE: this example is somewhat artificial; + # it is designed to simulate an old API + # expectation to guard against regression + def squeeze(self): + return super(OldSqueeze, self).squeeze() + + oldsqueeze = OldSqueeze(np.array([[1],[2],[3]])) + + # if no axis argument is specified the old API + # expectation should give the correct result + assert_equal(np.squeeze(oldsqueeze), + np.array([1,2,3])) + + # likewise, axis=None should work perfectly well + # with the old API expectation + assert_equal(np.squeeze(oldsqueeze, axis=None), + np.array([1,2,3])) + + # however, specification of any particular axis + # should raise a TypeError in the context of the + # old API specification, even when using a valid + # axis specification like 1 for this array + with assert_raises(TypeError): + # this would silently succeed for array + # subclasses / objects that did not support + # squeeze axis argument handling before fixing + # Issue #10779 + np.squeeze(oldsqueeze, axis=1) + + # check for the same behavior when using an invalid + # axis specification -- in this case axis=0 does not + # have size 1, but the priority should be to raise + # a TypeError for the axis argument and NOT a + # ValueError for squeezing a non-empty dimension + with assert_raises(TypeError): + np.squeeze(oldsqueeze, axis=0) + + # the new API knows how to handle the axis + # argument and will return a ValueError if + # attempting to squeeze an axis that is not + # of length 1 + with assert_raises(ValueError): + np.squeeze(np.array([[1],[2],[3]]), axis=0) + + def test_reduce_contiguous(self): + # GitHub issue #387 + a = np.add.reduce(np.zeros((2, 1, 2)), (0, 1)) + b = np.add.reduce(np.zeros((2, 1, 2)), 1) + assert_(a.flags.c_contiguous) + assert_(a.flags.f_contiguous) + assert_(b.flags.c_contiguous) + + def test_object_array_self_reference(self): + # Object arrays with references to themselves can cause problems + a = np.array(0, dtype=object) + a[()] = a + assert_raises(RecursionError, int, a) + assert_raises(RecursionError, long, a) + assert_raises(RecursionError, float, a) + if sys.version_info.major == 2: + # in python 3, this falls back on operator.index, which fails on + # on dtype=object + assert_raises(RecursionError, oct, a) + assert_raises(RecursionError, hex, a) + a[()] = None + + def test_object_array_circular_reference(self): + # Test the same for a circular reference. + a = np.array(0, dtype=object) + b = np.array(0, dtype=object) + a[()] = b + b[()] = a + assert_raises(RecursionError, int, a) + # NumPy has no tp_traverse currently, so circular references + # cannot be detected. So resolve it: + a[()] = None + + # This was causing a to become like the above + a = np.array(0, dtype=object) + a[...] += 1 + assert_equal(a, 1) + + def test_object_array_nested(self): + # but is fine with a reference to a different array + a = np.array(0, dtype=object) + b = np.array(0, dtype=object) + a[()] = b + assert_equal(int(a), int(0)) + assert_equal(long(a), long(0)) + assert_equal(float(a), float(0)) + if sys.version_info.major == 2: + # in python 3, this falls back on operator.index, which fails on + # on dtype=object + assert_equal(oct(a), oct(0)) + assert_equal(hex(a), hex(0)) + + def test_object_array_self_copy(self): + # An object array being copied into itself DECREF'ed before INCREF'ing + # causing segmentation faults (gh-3787) + a = np.array(object(), dtype=object) + np.copyto(a, a) + if HAS_REFCOUNT: + assert_(sys.getrefcount(a[()]) == 2) + a[()].__class__ # will segfault if object was deleted + + def test_zerosize_accumulate(self): + "Ticket #1733" + x = np.array([[42, 0]], dtype=np.uint32) + assert_equal(np.add.accumulate(x[:-1, 0]), []) + + def test_objectarray_setfield(self): + # Setfield should not overwrite Object fields with non-Object data + x = np.array([1, 2, 3], dtype=object) + assert_raises(TypeError, x.setfield, 4, np.int32, 0) + + def test_setting_rank0_string(self): + "Ticket #1736" + s1 = b"hello1" + s2 = b"hello2" + a = np.zeros((), dtype="S10") + a[()] = s1 + assert_equal(a, np.array(s1)) + a[()] = np.array(s2) + assert_equal(a, np.array(s2)) + + a = np.zeros((), dtype='f4') + a[()] = 3 + assert_equal(a, np.array(3)) + a[()] = np.array(4) + assert_equal(a, np.array(4)) + + def test_string_astype(self): + "Ticket #1748" + s1 = b'black' + s2 = b'white' + s3 = b'other' + a = np.array([[s1], [s2], [s3]]) + assert_equal(a.dtype, np.dtype('S5')) + b = a.astype(np.dtype('S0')) + assert_equal(b.dtype, np.dtype('S5')) + + def test_ticket_1756(self): + # Ticket #1756 + s = b'0123456789abcdef' + a = np.array([s]*5) + for i in range(1, 17): + a1 = np.array(a, "|S%d" % i) + a2 = np.array([s[:i]]*5) + assert_equal(a1, a2) + + def test_fields_strides(self): + "gh-2355" + r = np.frombuffer(b'abcdefghijklmnop'*4*3, dtype='i4,(2,3)u2') + assert_equal(r[0:3:2]['f1'], r['f1'][0:3:2]) + assert_equal(r[0:3:2]['f1'][0], r[0:3:2][0]['f1']) + assert_equal(r[0:3:2]['f1'][0][()], r[0:3:2][0]['f1'][()]) + assert_equal(r[0:3:2]['f1'][0].strides, r[0:3:2][0]['f1'].strides) + + def test_alignment_update(self): + # Check that alignment flag is updated on stride setting + a = np.arange(10) + assert_(a.flags.aligned) + a.strides = 3 + assert_(not a.flags.aligned) + + def test_ticket_1770(self): + "Should not segfault on python 3k" + import numpy as np + try: + a = np.zeros((1,), dtype=[('f1', 'f')]) + a['f1'] = 1 + a['f2'] = 1 + except ValueError: + pass + except Exception: + raise AssertionError + + def test_ticket_1608(self): + "x.flat shouldn't modify data" + x = np.array([[1, 2], [3, 4]]).T + np.array(x.flat) + assert_equal(x, [[1, 3], [2, 4]]) + + def test_pickle_string_overwrite(self): + import re + + data = np.array([1], dtype='b') + blob = pickle.dumps(data, protocol=1) + data = pickle.loads(blob) + + # Check that loads does not clobber interned strings + s = re.sub("a(.)", "\x01\\1", "a_") + assert_equal(s[0], "\x01") + data[0] = 0xbb + s = re.sub("a(.)", "\x01\\1", "a_") + assert_equal(s[0], "\x01") + + def test_pickle_bytes_overwrite(self): + if sys.version_info[0] >= 3: + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + data = np.array([1], dtype='b') + data = pickle.loads(pickle.dumps(data, protocol=proto)) + data[0] = 0xdd + bytestring = "\x01 ".encode('ascii') + assert_equal(bytestring[0:1], '\x01'.encode('ascii')) + + def test_pickle_py2_array_latin1_hack(self): + # Check that unpickling hacks in Py3 that support + # encoding='latin1' work correctly. + + # Python2 output for pickle.dumps(numpy.array([129], dtype='b')) + data = (b"cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\np1\n(I0\n" + b"tp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\np7\n(S'i1'\np8\n" + b"I0\nI1\ntp9\nRp10\n(I3\nS'|'\np11\nNNNI-1\nI-1\nI0\ntp12\nbI00\nS'\\x81'\n" + b"p13\ntp14\nb.") + if sys.version_info[0] >= 3: + # This should work: + result = pickle.loads(data, encoding='latin1') + assert_array_equal(result, np.array([129], dtype='b')) + # Should not segfault: + assert_raises(Exception, pickle.loads, data, encoding='koi8-r') + + def test_pickle_py2_scalar_latin1_hack(self): + # Check that scalar unpickling hack in Py3 that supports + # encoding='latin1' work correctly. + + # Python2 output for pickle.dumps(...) + datas = [ + # (original, python2_pickle, koi8r_validity) + (np.unicode_('\u6bd2'), + (b"cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n" + b"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\nI0\n" + b"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."), + 'invalid'), + + (np.float64(9e123), + (b"cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n" + b"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI-1\nI-1\nI0\ntp6\n" + b"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."), + 'invalid'), + + (np.bytes_(b'\x9c'), # different 8-bit code point in KOI8-R vs latin1 + (b"cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n" + b"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n" + b"tp8\nRp9\n."), + 'different'), + ] + if sys.version_info[0] >= 3: + for original, data, koi8r_validity in datas: + result = pickle.loads(data, encoding='latin1') + assert_equal(result, original) + + # Decoding under non-latin1 encoding (e.g.) KOI8-R can + # produce bad results, but should not segfault. + if koi8r_validity == 'different': + # Unicode code points happen to lie within latin1, + # but are different in koi8-r, resulting to silent + # bogus results + result = pickle.loads(data, encoding='koi8-r') + assert_(result != original) + elif koi8r_validity == 'invalid': + # Unicode code points outside latin1, so results + # to an encoding exception + assert_raises(ValueError, pickle.loads, data, encoding='koi8-r') + else: + raise ValueError(koi8r_validity) + + def test_structured_type_to_object(self): + a_rec = np.array([(0, 1), (3, 2)], dtype='i4,i8') + a_obj = np.empty((2,), dtype=object) + a_obj[0] = (0, 1) + a_obj[1] = (3, 2) + # astype records -> object + assert_equal(a_rec.astype(object), a_obj) + # '=' records -> object + b = np.empty_like(a_obj) + b[...] = a_rec + assert_equal(b, a_obj) + # '=' object -> records + b = np.empty_like(a_rec) + b[...] = a_obj + assert_equal(b, a_rec) + + def test_assign_obj_listoflists(self): + # Ticket # 1870 + # The inner list should get assigned to the object elements + a = np.zeros(4, dtype=object) + b = a.copy() + a[0] = [1] + a[1] = [2] + a[2] = [3] + a[3] = [4] + b[...] = [[1], [2], [3], [4]] + assert_equal(a, b) + # The first dimension should get broadcast + a = np.zeros((2, 2), dtype=object) + a[...] = [[1, 2]] + assert_equal(a, [[1, 2], [1, 2]]) + + def test_memoryleak(self): + # Ticket #1917 - ensure that array data doesn't leak + for i in range(1000): + # 100MB times 1000 would give 100GB of memory usage if it leaks + a = np.empty((100000000,), dtype='i1') + del a + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_ufunc_reduce_memoryleak(self): + a = np.arange(6) + acnt = sys.getrefcount(a) + np.add.reduce(a) + assert_equal(sys.getrefcount(a), acnt) + + def test_search_sorted_invalid_arguments(self): + # Ticket #2021, should not segfault. + x = np.arange(0, 4, dtype='datetime64[D]') + assert_raises(TypeError, x.searchsorted, 1) + + def test_string_truncation(self): + # Ticket #1990 - Data can be truncated in creation of an array from a + # mixed sequence of numeric values and strings + for val in [True, 1234, 123.4, complex(1, 234)]: + for tostr in [asunicode, asbytes]: + b = np.array([val, tostr('xx')]) + assert_equal(tostr(b[0]), tostr(val)) + b = np.array([tostr('xx'), val]) + assert_equal(tostr(b[1]), tostr(val)) + + # test also with longer strings + b = np.array([val, tostr('xxxxxxxxxx')]) + assert_equal(tostr(b[0]), tostr(val)) + b = np.array([tostr('xxxxxxxxxx'), val]) + assert_equal(tostr(b[1]), tostr(val)) + + def test_string_truncation_ucs2(self): + # Ticket #2081. Python compiled with two byte unicode + # can lead to truncation if itemsize is not properly + # adjusted for NumPy's four byte unicode. + if sys.version_info[0] >= 3: + a = np.array(['abcd']) + else: + a = np.array([u'abcd']) + assert_equal(a.dtype.itemsize, 16) + + def test_unique_stable(self): + # Ticket #2063 must always choose stable sort for argsort to + # get consistent results + v = np.array(([0]*5 + [1]*6 + [2]*6)*4) + res = np.unique(v, return_index=True) + tgt = (np.array([0, 1, 2]), np.array([ 0, 5, 11])) + assert_equal(res, tgt) + + def test_unicode_alloc_dealloc_match(self): + # Ticket #1578, the mismatch only showed up when running + # python-debug for python versions >= 2.7, and then as + # a core dump and error message. + a = np.array(['abc'], dtype=np.unicode)[0] + del a + + def test_refcount_error_in_clip(self): + # Ticket #1588 + a = np.zeros((2,), dtype='>i2').clip(min=0) + x = a + a + # This used to segfault: + y = str(x) + # Check the final string: + assert_(y == "[0 0]") + + def test_searchsorted_wrong_dtype(self): + # Ticket #2189, it used to segfault, so we check that it raises the + # proper exception. + a = np.array([('a', 1)], dtype='S1, int') + assert_raises(TypeError, np.searchsorted, a, 1.2) + # Ticket #2066, similar problem: + dtype = np.format_parser(['i4', 'i4'], [], []) + a = np.recarray((2, ), dtype) + assert_raises(TypeError, np.searchsorted, a, 1) + + def test_complex64_alignment(self): + # Issue gh-2668 (trac 2076), segfault on sparc due to misalignment + dtt = np.complex64 + arr = np.arange(10, dtype=dtt) + # 2D array + arr2 = np.reshape(arr, (2, 5)) + # Fortran write followed by (C or F) read caused bus error + data_str = arr2.tobytes('F') + data_back = np.ndarray(arr2.shape, + arr2.dtype, + buffer=data_str, + order='F') + assert_array_equal(arr2, data_back) + + def test_structured_count_nonzero(self): + arr = np.array([0, 1]).astype('i4, (2)i4')[:1] + count = np.count_nonzero(arr) + assert_equal(count, 0) + + def test_copymodule_preserves_f_contiguity(self): + a = np.empty((2, 2), order='F') + b = copy.copy(a) + c = copy.deepcopy(a) + assert_(b.flags.fortran) + assert_(b.flags.f_contiguous) + assert_(c.flags.fortran) + assert_(c.flags.f_contiguous) + + def test_fortran_order_buffer(self): + import numpy as np + a = np.array([['Hello', 'Foob']], dtype='U5', order='F') + arr = np.ndarray(shape=[1, 2, 5], dtype='U1', buffer=a) + arr2 = np.array([[[u'H', u'e', u'l', u'l', u'o'], + [u'F', u'o', u'o', u'b', u'']]]) + assert_array_equal(arr, arr2) + + def test_assign_from_sequence_error(self): + # Ticket #4024. + arr = np.array([1, 2, 3]) + assert_raises(ValueError, arr.__setitem__, slice(None), [9, 9]) + arr.__setitem__(slice(None), [9]) + assert_equal(arr, [9, 9, 9]) + + def test_format_on_flex_array_element(self): + # Ticket #4369. + dt = np.dtype([('date', '<M8[D]'), ('val', '<f8')]) + arr = np.array([('2000-01-01', 1)], dt) + formatted = '{0}'.format(arr[0]) + assert_equal(formatted, str(arr[0])) + + def test_deepcopy_on_0d_array(self): + # Ticket #3311. + arr = np.array(3) + arr_cp = copy.deepcopy(arr) + + assert_equal(arr, arr_cp) + assert_equal(arr.shape, arr_cp.shape) + assert_equal(int(arr), int(arr_cp)) + assert_(arr is not arr_cp) + assert_(isinstance(arr_cp, type(arr))) + + def test_deepcopy_F_order_object_array(self): + # Ticket #6456. + a = {'a': 1} + b = {'b': 2} + arr = np.array([[a, b], [a, b]], order='F') + arr_cp = copy.deepcopy(arr) + + assert_equal(arr, arr_cp) + assert_(arr is not arr_cp) + # Ensure that we have actually copied the item. + assert_(arr[0, 1] is not arr_cp[1, 1]) + # Ensure we are allowed to have references to the same object. + assert_(arr[0, 1] is arr[1, 1]) + # Check the references hold for the copied objects. + assert_(arr_cp[0, 1] is arr_cp[1, 1]) + + def test_deepcopy_empty_object_array(self): + # Ticket #8536. + # Deepcopy should succeed + a = np.array([], dtype=object) + b = copy.deepcopy(a) + assert_(a.shape == b.shape) + + def test_bool_subscript_crash(self): + # gh-4494 + c = np.rec.array([(1, 2, 3), (4, 5, 6)]) + masked = c[np.array([True, False])] + base = masked.base + del masked, c + base.dtype + + def test_richcompare_crash(self): + # gh-4613 + import operator as op + + # dummy class where __array__ throws exception + class Foo(object): + __array_priority__ = 1002 + + def __array__(self, *args, **kwargs): + raise Exception() + + rhs = Foo() + lhs = np.array(1) + for f in [op.lt, op.le, op.gt, op.ge]: + if sys.version_info[0] >= 3: + assert_raises(TypeError, f, lhs, rhs) + elif not sys.py3kwarning: + # With -3 switch in python 2, DeprecationWarning is raised + # which we are not interested in + f(lhs, rhs) + assert_(not op.eq(lhs, rhs)) + assert_(op.ne(lhs, rhs)) + + def test_richcompare_scalar_and_subclass(self): + # gh-4709 + class Foo(np.ndarray): + def __eq__(self, other): + return "OK" + + x = np.array([1, 2, 3]).view(Foo) + assert_equal(10 == x, "OK") + assert_equal(np.int32(10) == x, "OK") + assert_equal(np.array([10]) == x, "OK") + + def test_pickle_empty_string(self): + # gh-3926 + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + test_string = np.string_('') + assert_equal(pickle.loads( + pickle.dumps(test_string, protocol=proto)), test_string) + + def test_frompyfunc_many_args(self): + # gh-5672 + + def passer(*args): + pass + + assert_raises(ValueError, np.frompyfunc, passer, 32, 1) + + def test_repeat_broadcasting(self): + # gh-5743 + a = np.arange(60).reshape(3, 4, 5) + for axis in chain(range(-a.ndim, a.ndim), [None]): + assert_equal(a.repeat(2, axis=axis), a.repeat([2], axis=axis)) + + def test_frompyfunc_nout_0(self): + # gh-2014 + + def f(x): + x[0], x[-1] = x[-1], x[0] + + uf = np.frompyfunc(f, 1, 0) + a = np.array([[1, 2, 3], [4, 5], [6, 7, 8, 9]]) + assert_equal(uf(a), ()) + assert_array_equal(a, [[3, 2, 1], [5, 4], [9, 7, 8, 6]]) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_leak_in_structured_dtype_comparison(self): + # gh-6250 + recordtype = np.dtype([('a', np.float64), + ('b', np.int32), + ('d', (str, 5))]) + + # Simple case + a = np.zeros(2, dtype=recordtype) + for i in range(100): + a == a + assert_(sys.getrefcount(a) < 10) + + # The case in the bug report. + before = sys.getrefcount(a) + u, v = a[0], a[1] + u == v + del u, v + gc.collect() + after = sys.getrefcount(a) + assert_equal(before, after) + + def test_empty_percentile(self): + # gh-6530 / gh-6553 + assert_array_equal(np.percentile(np.arange(10), []), np.array([])) + + def test_void_compare_segfault(self): + # gh-6922. The following should not segfault + a = np.ones(3, dtype=[('object', 'O'), ('int', '<i2')]) + a.sort() + + def test_reshape_size_overflow(self): + # gh-7455 + a = np.ones(20)[::2] + if np.dtype(np.intp).itemsize == 8: + # 64 bit. The following are the prime factors of 2**63 + 5, + # plus a leading 2, so when multiplied together as int64, + # the result overflows to a total size of 10. + new_shape = (2, 13, 419, 691, 823, 2977518503) + else: + # 32 bit. The following are the prime factors of 2**31 + 5, + # plus a leading 2, so when multiplied together as int32, + # the result overflows to a total size of 10. + new_shape = (2, 7, 7, 43826197) + assert_raises(ValueError, a.reshape, new_shape) + + def test_invalid_structured_dtypes(self): + # gh-2865 + # mapping python objects to other dtypes + assert_raises(ValueError, np.dtype, ('O', [('name', 'i8')])) + assert_raises(ValueError, np.dtype, ('i8', [('name', 'O')])) + assert_raises(ValueError, np.dtype, + ('i8', [('name', [('name', 'O')])])) + assert_raises(ValueError, np.dtype, ([('a', 'i4'), ('b', 'i4')], 'O')) + assert_raises(ValueError, np.dtype, ('i8', 'O')) + # wrong number/type of tuple elements in dict + assert_raises(ValueError, np.dtype, + ('i', {'name': ('i', 0, 'title', 'oops')})) + assert_raises(ValueError, np.dtype, + ('i', {'name': ('i', 'wrongtype', 'title')})) + # disallowed as of 1.13 + assert_raises(ValueError, np.dtype, + ([('a', 'O'), ('b', 'O')], [('c', 'O'), ('d', 'O')])) + # allowed as a special case due to existing use, see gh-2798 + a = np.ones(1, dtype=('O', [('name', 'O')])) + assert_equal(a[0], 1) + + def test_correct_hash_dict(self): + # gh-8887 - __hash__ would be None despite tp_hash being set + all_types = set(np.typeDict.values()) - {np.void} + for t in all_types: + val = t() + + try: + hash(val) + except TypeError as e: + assert_equal(t.__hash__, None) + else: + assert_(t.__hash__ != None) + + def test_scalar_copy(self): + scalar_types = set(np.sctypeDict.values()) + values = { + np.void: b"a", + np.bytes_: b"a", + np.unicode_: "a", + np.datetime64: "2017-08-25", + } + for sctype in scalar_types: + item = sctype(values.get(sctype, 1)) + item2 = copy.copy(item) + assert_equal(item, item2) + + def test_void_item_memview(self): + va = np.zeros(10, 'V4') + x = va[:1].item() + va[0] = b'\xff\xff\xff\xff' + del va + assert_equal(x, b'\x00\x00\x00\x00') + + def test_void_getitem(self): + # Test fix for gh-11668. + assert_(np.array([b'a'], 'V1').astype('O') == b'a') + assert_(np.array([b'ab'], 'V2').astype('O') == b'ab') + assert_(np.array([b'abc'], 'V3').astype('O') == b'abc') + assert_(np.array([b'abcd'], 'V4').astype('O') == b'abcd') + + def test_structarray_title(self): + # The following used to segfault on pypy, due to NPY_TITLE_KEY + # not working properly and resulting to double-decref of the + # structured array field items: + # See: https://bitbucket.org/pypy/pypy/issues/2789 + for j in range(5): + structure = np.array([1], dtype=[(('x', 'X'), np.object_)]) + structure[0]['x'] = np.array([2]) + gc.collect() + + def test_dtype_scalar_squeeze(self): + # gh-11384 + values = { + 'S': b"a", + 'M': "2018-06-20", + } + for ch in np.typecodes['All']: + if ch in 'O': + continue + sctype = np.dtype(ch).type + scvalue = sctype(values.get(ch, 3)) + for axis in [None, ()]: + squeezed = scvalue.squeeze(axis=axis) + assert_equal(squeezed, scvalue) + assert_equal(type(squeezed), type(scvalue)) + + def test_field_access_by_title(self): + # gh-11507 + s = 'Some long field name' + if HAS_REFCOUNT: + base = sys.getrefcount(s) + t = np.dtype([((s, 'f1'), np.float64)]) + data = np.zeros(10, t) + for i in range(10): + str(data[['f1']]) + if HAS_REFCOUNT: + assert_(base <= sys.getrefcount(s)) + + @pytest.mark.parametrize('val', [ + # arrays and scalars + np.ones((10, 10), dtype='int32'), + np.uint64(10), + ]) + @pytest.mark.parametrize('protocol', + range(2, pickle.HIGHEST_PROTOCOL + 1) + ) + def test_pickle_module(self, protocol, val): + # gh-12837 + s = pickle.dumps(val, protocol) + assert b'_multiarray_umath' not in s + if protocol == 5 and len(val.shape) > 0: + # unpickling ndarray goes through _frombuffer for protocol 5 + assert b'numpy.core.numeric' in s + else: + assert b'numpy.core.multiarray' in s + + def test_object_casting_errors(self): + # gh-11993 + arr = np.array(['AAAAA', 18465886.0, 18465886.0], dtype=object) + assert_raises(TypeError, arr.astype, 'c8') + + def test_eff1d_casting(self): + # gh-12711 + x = np.array([1, 2, 4, 7, 0], dtype=np.int16) + res = np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99])) + assert_equal(res, [-99, 1, 2, 3, -7, 88, 99]) + assert_raises(ValueError, np.ediff1d, x, to_begin=(1<<20)) + assert_raises(ValueError, np.ediff1d, x, to_end=(1<<20)) + + def test_pickle_datetime64_array(self): + # gh-12745 (would fail with pickle5 installed) + d = np.datetime64('2015-07-04 12:59:59.50', 'ns') + arr = np.array([d]) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + dumped = pickle.dumps(arr, protocol=proto) + assert_equal(pickle.loads(dumped), arr) + + def test_bad_array_interface(self): + class T(object): + __array_interface__ = {} + + np.array([T()]) + + def test_2d__array__shape(self): + class T(object): + def __array__(self): + return np.ndarray(shape=(0,0)) + + # Make sure __array__ is used instead of Sequence methods. + def __iter__(self): + return iter([]) + + def __getitem__(self, idx): + raise AssertionError("__getitem__ was called") + + def __len__(self): + return 0 + + + t = T() + #gh-13659, would raise in broadcasting [x=t for x in result] + np.array([t]) + + @pytest.mark.skipif(sys.maxsize < 2 ** 31 + 1, reason='overflows 32-bit python') + @pytest.mark.skipif(sys.platform == 'win32' and sys.version_info[:2] < (3, 8), + reason='overflows on windows, fixed in bpo-16865') + def test_to_ctypes(self): + #gh-14214 + arr = np.zeros((2 ** 31 + 1,), 'b') + assert arr.size * arr.itemsize > 2 ** 31 + c_arr = np.ctypeslib.as_ctypes(arr) + assert_equal(c_arr._length_, arr.size) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_scalar_ctors.py b/contrib/python/numpy/py2/numpy/core/tests/test_scalar_ctors.py new file mode 100644 index 0000000000..b21bc9dad0 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_scalar_ctors.py @@ -0,0 +1,65 @@ +""" +Test the scalar constructors, which also do type-coercion +""" +from __future__ import division, absolute_import, print_function + +import sys +import platform +import pytest + +import numpy as np +from numpy.testing import ( + assert_equal, assert_almost_equal, assert_raises, assert_warns, + ) + +class TestFromString(object): + def test_floating(self): + # Ticket #640, floats from string + fsingle = np.single('1.234') + fdouble = np.double('1.234') + flongdouble = np.longdouble('1.234') + assert_almost_equal(fsingle, 1.234) + assert_almost_equal(fdouble, 1.234) + assert_almost_equal(flongdouble, 1.234) + + def test_floating_overflow(self): + """ Strings containing an unrepresentable float overflow """ + fhalf = np.half('1e10000') + assert_equal(fhalf, np.inf) + fsingle = np.single('1e10000') + assert_equal(fsingle, np.inf) + fdouble = np.double('1e10000') + assert_equal(fdouble, np.inf) + flongdouble = assert_warns(RuntimeWarning, np.longdouble, '1e10000') + assert_equal(flongdouble, np.inf) + + fhalf = np.half('-1e10000') + assert_equal(fhalf, -np.inf) + fsingle = np.single('-1e10000') + assert_equal(fsingle, -np.inf) + fdouble = np.double('-1e10000') + assert_equal(fdouble, -np.inf) + flongdouble = assert_warns(RuntimeWarning, np.longdouble, '-1e10000') + assert_equal(flongdouble, -np.inf) + + @pytest.mark.skipif((sys.version_info[0] >= 3) + or (sys.platform == "win32" + and platform.architecture()[0] == "64bit"), + reason="numpy.intp('0xff', 16) not supported on Py3 " + "or 64 bit Windows") + def test_intp(self): + # Ticket #99 + i_width = np.int_(0).nbytes*2 - 1 + np.intp('0x' + 'f'*i_width, 16) + assert_raises(OverflowError, np.intp, '0x' + 'f'*(i_width+1), 16) + assert_raises(ValueError, np.intp, '0x1', 32) + assert_equal(255, np.intp('0xFF', 16)) + + +class TestFromInt(object): + def test_intp(self): + # Ticket #99 + assert_equal(1024, np.intp(1024)) + + def test_uint64_from_negative(self): + assert_equal(np.uint64(-2), np.uint64(18446744073709551614)) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_scalarbuffer.py b/contrib/python/numpy/py2/numpy/core/tests/test_scalarbuffer.py new file mode 100644 index 0000000000..cd520d99b6 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_scalarbuffer.py @@ -0,0 +1,105 @@ +""" +Test scalar buffer interface adheres to PEP 3118 +""" +import sys +import numpy as np +import pytest + +from numpy.testing import assert_, assert_equal, assert_raises + +# PEP3118 format strings for native (standard alignment and byteorder) types +scalars_and_codes = [ + (np.bool_, '?'), + (np.byte, 'b'), + (np.short, 'h'), + (np.intc, 'i'), + (np.int_, 'l'), + (np.longlong, 'q'), + (np.ubyte, 'B'), + (np.ushort, 'H'), + (np.uintc, 'I'), + (np.uint, 'L'), + (np.ulonglong, 'Q'), + (np.half, 'e'), + (np.single, 'f'), + (np.double, 'd'), + (np.longdouble, 'g'), + (np.csingle, 'Zf'), + (np.cdouble, 'Zd'), + (np.clongdouble, 'Zg'), +] +scalars_only, codes_only = zip(*scalars_and_codes) + + +@pytest.mark.skipif(sys.version_info.major < 3, + reason="Python 2 scalars lack a buffer interface") +class TestScalarPEP3118(object): + + @pytest.mark.parametrize('scalar', scalars_only, ids=codes_only) + def test_scalar_match_array(self, scalar): + x = scalar() + a = np.array([], dtype=np.dtype(scalar)) + mv_x = memoryview(x) + mv_a = memoryview(a) + assert_equal(mv_x.format, mv_a.format) + + @pytest.mark.parametrize('scalar', scalars_only, ids=codes_only) + def test_scalar_dim(self, scalar): + x = scalar() + mv_x = memoryview(x) + assert_equal(mv_x.itemsize, np.dtype(scalar).itemsize) + assert_equal(mv_x.ndim, 0) + assert_equal(mv_x.shape, ()) + assert_equal(mv_x.strides, ()) + assert_equal(mv_x.suboffsets, ()) + + @pytest.mark.parametrize('scalar, code', scalars_and_codes, ids=codes_only) + def test_scalar_known_code(self, scalar, code): + x = scalar() + mv_x = memoryview(x) + assert_equal(mv_x.format, code) + + def test_void_scalar_structured_data(self): + dt = np.dtype([('name', np.unicode_, 16), ('grades', np.float64, (2,))]) + x = np.array(('ndarray_scalar', (1.2, 3.0)), dtype=dt)[()] + assert_(isinstance(x, np.void)) + mv_x = memoryview(x) + expected_size = 16 * np.dtype((np.unicode_, 1)).itemsize + expected_size += 2 * np.dtype((np.float64, 1)).itemsize + assert_equal(mv_x.itemsize, expected_size) + assert_equal(mv_x.ndim, 0) + assert_equal(mv_x.shape, ()) + assert_equal(mv_x.strides, ()) + assert_equal(mv_x.suboffsets, ()) + + # check scalar format string against ndarray format string + a = np.array([('Sarah', (8.0, 7.0)), ('John', (6.0, 7.0))], dtype=dt) + assert_(isinstance(a, np.ndarray)) + mv_a = memoryview(a) + assert_equal(mv_x.itemsize, mv_a.itemsize) + assert_equal(mv_x.format, mv_a.format) + + def test_datetime_memoryview(self): + # gh-11656 + # Values verified with v1.13.3, shape is not () as in test_scalar_dim + def as_dict(m): + return dict(strides=m.strides, shape=m.shape, itemsize=m.itemsize, + ndim=m.ndim, format=m.format) + + dt1 = np.datetime64('2016-01-01') + dt2 = np.datetime64('2017-01-01') + expected = {'strides': (1,), 'itemsize': 1, 'ndim': 1, + 'shape': (8,), 'format': 'B'} + v = memoryview(dt1) + res = as_dict(v) + assert_equal(res, expected) + + v = memoryview(dt2 - dt1) + res = as_dict(v) + assert_equal(res, expected) + + dt = np.dtype([('a', 'uint16'), ('b', 'M8[s]')]) + a = np.empty(1, dt) + # Fails to create a PEP 3118 valid buffer + assert_raises((ValueError, BufferError), memoryview, a[0]) + diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_scalarinherit.py b/contrib/python/numpy/py2/numpy/core/tests/test_scalarinherit.py new file mode 100644 index 0000000000..9e32cf624d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_scalarinherit.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +""" Test printing of scalar types. + +""" +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import assert_ + + +class A(object): + pass +class B(A, np.float64): + pass + +class C(B): + pass +class D(C, B): + pass + +class B0(np.float64, A): + pass +class C0(B0): + pass + +class TestInherit(object): + def test_init(self): + x = B(1.0) + assert_(str(x) == '1.0') + y = C(2.0) + assert_(str(y) == '2.0') + z = D(3.0) + assert_(str(z) == '3.0') + + def test_init2(self): + x = B0(1.0) + assert_(str(x) == '1.0') + y = C0(2.0) + assert_(str(y) == '2.0') + + +class TestCharacter(object): + def test_char_radd(self): + # GH issue 9620, reached gentype_add and raise TypeError + np_s = np.string_('abc') + np_u = np.unicode_('abc') + s = b'def' + u = u'def' + assert_(np_s.__radd__(np_s) is NotImplemented) + assert_(np_s.__radd__(np_u) is NotImplemented) + assert_(np_s.__radd__(s) is NotImplemented) + assert_(np_s.__radd__(u) is NotImplemented) + assert_(np_u.__radd__(np_s) is NotImplemented) + assert_(np_u.__radd__(np_u) is NotImplemented) + assert_(np_u.__radd__(s) is NotImplemented) + assert_(np_u.__radd__(u) is NotImplemented) + assert_(s + np_s == b'defabc') + assert_(u + np_u == u'defabc') + + + class Mystr(str, np.generic): + # would segfault + pass + + ret = s + Mystr('abc') + assert_(type(ret) is type(s)) + + def test_char_repeat(self): + np_s = np.string_('abc') + np_u = np.unicode_('abc') + np_i = np.int(5) + res_s = b'abc' * 5 + res_u = u'abc' * 5 + assert_(np_s * np_i == res_s) + assert_(np_u * np_i == res_u) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_scalarmath.py b/contrib/python/numpy/py2/numpy/core/tests/test_scalarmath.py new file mode 100644 index 0000000000..ebba457e3f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_scalarmath.py @@ -0,0 +1,666 @@ +from __future__ import division, absolute_import, print_function + +import sys +import warnings +import itertools +import operator +import platform +import pytest + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_almost_equal, + assert_array_equal, IS_PYPY, suppress_warnings, _gen_alignment_data, + assert_warns + ) + +types = [np.bool_, np.byte, np.ubyte, np.short, np.ushort, np.intc, np.uintc, + np.int_, np.uint, np.longlong, np.ulonglong, + np.single, np.double, np.longdouble, np.csingle, + np.cdouble, np.clongdouble] + +floating_types = np.floating.__subclasses__() +complex_floating_types = np.complexfloating.__subclasses__() + + +# This compares scalarmath against ufuncs. + +class TestTypes(object): + def test_types(self): + for atype in types: + a = atype(1) + assert_(a == 1, "error with %r: got %r" % (atype, a)) + + def test_type_add(self): + # list of types + for k, atype in enumerate(types): + a_scalar = atype(3) + a_array = np.array([3], dtype=atype) + for l, btype in enumerate(types): + b_scalar = btype(1) + b_array = np.array([1], dtype=btype) + c_scalar = a_scalar + b_scalar + c_array = a_array + b_array + # It was comparing the type numbers, but the new ufunc + # function-finding mechanism finds the lowest function + # to which both inputs can be cast - which produces 'l' + # when you do 'q' + 'b'. The old function finding mechanism + # skipped ahead based on the first argument, but that + # does not produce properly symmetric results... + assert_equal(c_scalar.dtype, c_array.dtype, + "error with types (%d/'%c' + %d/'%c')" % + (k, np.dtype(atype).char, l, np.dtype(btype).char)) + + def test_type_create(self): + for k, atype in enumerate(types): + a = np.array([1, 2, 3], atype) + b = atype([1, 2, 3]) + assert_equal(a, b) + + def test_leak(self): + # test leak of scalar objects + # a leak would show up in valgrind as still-reachable of ~2.6MB + for i in range(200000): + np.add(1, 1) + + +class TestBaseMath(object): + def test_blocked(self): + # test alignments offsets for simd instructions + # alignments for vz + 2 * (vs - 1) + 1 + for dt, sz in [(np.float32, 11), (np.float64, 7), (np.int32, 11)]: + for out, inp1, inp2, msg in _gen_alignment_data(dtype=dt, + type='binary', + max_size=sz): + exp1 = np.ones_like(inp1) + inp1[...] = np.ones_like(inp1) + inp2[...] = np.zeros_like(inp2) + assert_almost_equal(np.add(inp1, inp2), exp1, err_msg=msg) + assert_almost_equal(np.add(inp1, 2), exp1 + 2, err_msg=msg) + assert_almost_equal(np.add(1, inp2), exp1, err_msg=msg) + + np.add(inp1, inp2, out=out) + assert_almost_equal(out, exp1, err_msg=msg) + + inp2[...] += np.arange(inp2.size, dtype=dt) + 1 + assert_almost_equal(np.square(inp2), + np.multiply(inp2, inp2), err_msg=msg) + # skip true divide for ints + if dt != np.int32 or (sys.version_info.major < 3 and not sys.py3kwarning): + assert_almost_equal(np.reciprocal(inp2), + np.divide(1, inp2), err_msg=msg) + + inp1[...] = np.ones_like(inp1) + np.add(inp1, 2, out=out) + assert_almost_equal(out, exp1 + 2, err_msg=msg) + inp2[...] = np.ones_like(inp2) + np.add(2, inp2, out=out) + assert_almost_equal(out, exp1 + 2, err_msg=msg) + + def test_lower_align(self): + # check data that is not aligned to element size + # i.e doubles are aligned to 4 bytes on i386 + d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + o = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + assert_almost_equal(d + d, d * 2) + np.add(d, d, out=o) + np.add(np.ones_like(d), d, out=o) + np.add(d, np.ones_like(d), out=o) + np.add(np.ones_like(d), d) + np.add(d, np.ones_like(d)) + + +class TestPower(object): + def test_small_types(self): + for t in [np.int8, np.int16, np.float16]: + a = t(3) + b = a ** 4 + assert_(b == 81, "error with %r: got %r" % (t, b)) + + def test_large_types(self): + for t in [np.int32, np.int64, np.float32, np.float64, np.longdouble]: + a = t(51) + b = a ** 4 + msg = "error with %r: got %r" % (t, b) + if np.issubdtype(t, np.integer): + assert_(b == 6765201, msg) + else: + assert_almost_equal(b, 6765201, err_msg=msg) + + def test_integers_to_negative_integer_power(self): + # Note that the combination of uint64 with a signed integer + # has common type np.float64. The other combinations should all + # raise a ValueError for integer ** negative integer. + exp = [np.array(-1, dt)[()] for dt in 'bhilq'] + + # 1 ** -1 possible special case + base = [np.array(1, dt)[()] for dt in 'bhilqBHILQ'] + for i1, i2 in itertools.product(base, exp): + if i1.dtype != np.uint64: + assert_raises(ValueError, operator.pow, i1, i2) + else: + res = operator.pow(i1, i2) + assert_(res.dtype.type is np.float64) + assert_almost_equal(res, 1.) + + # -1 ** -1 possible special case + base = [np.array(-1, dt)[()] for dt in 'bhilq'] + for i1, i2 in itertools.product(base, exp): + if i1.dtype != np.uint64: + assert_raises(ValueError, operator.pow, i1, i2) + else: + res = operator.pow(i1, i2) + assert_(res.dtype.type is np.float64) + assert_almost_equal(res, -1.) + + # 2 ** -1 perhaps generic + base = [np.array(2, dt)[()] for dt in 'bhilqBHILQ'] + for i1, i2 in itertools.product(base, exp): + if i1.dtype != np.uint64: + assert_raises(ValueError, operator.pow, i1, i2) + else: + res = operator.pow(i1, i2) + assert_(res.dtype.type is np.float64) + assert_almost_equal(res, .5) + + def test_mixed_types(self): + typelist = [np.int8, np.int16, np.float16, + np.float32, np.float64, np.int8, + np.int16, np.int32, np.int64] + for t1 in typelist: + for t2 in typelist: + a = t1(3) + b = t2(2) + result = a**b + msg = ("error with %r and %r:" + "got %r, expected %r") % (t1, t2, result, 9) + if np.issubdtype(np.dtype(result), np.integer): + assert_(result == 9, msg) + else: + assert_almost_equal(result, 9, err_msg=msg) + + def test_modular_power(self): + # modular power is not implemented, so ensure it errors + a = 5 + b = 4 + c = 10 + expected = pow(a, b, c) # noqa: F841 + for t in (np.int32, np.float32, np.complex64): + # note that 3-operand power only dispatches on the first argument + assert_raises(TypeError, operator.pow, t(a), b, c) + assert_raises(TypeError, operator.pow, np.array(t(a)), b, c) + + +def floordiv_and_mod(x, y): + return (x // y, x % y) + + +def _signs(dt): + if dt in np.typecodes['UnsignedInteger']: + return (+1,) + else: + return (+1, -1) + + +class TestModulus(object): + + def test_modulus_basic(self): + dt = np.typecodes['AllInteger'] + np.typecodes['Float'] + for op in [floordiv_and_mod, divmod]: + for dt1, dt2 in itertools.product(dt, dt): + for sg1, sg2 in itertools.product(_signs(dt1), _signs(dt2)): + fmt = 'op: %s, dt1: %s, dt2: %s, sg1: %s, sg2: %s' + msg = fmt % (op.__name__, dt1, dt2, sg1, sg2) + a = np.array(sg1*71, dtype=dt1)[()] + b = np.array(sg2*19, dtype=dt2)[()] + div, rem = op(a, b) + assert_equal(div*b + rem, a, err_msg=msg) + if sg2 == -1: + assert_(b < rem <= 0, msg) + else: + assert_(b > rem >= 0, msg) + + def test_float_modulus_exact(self): + # test that float results are exact for small integers. This also + # holds for the same integers scaled by powers of two. + nlst = list(range(-127, 0)) + plst = list(range(1, 128)) + dividend = nlst + [0] + plst + divisor = nlst + plst + arg = list(itertools.product(dividend, divisor)) + tgt = list(divmod(*t) for t in arg) + + a, b = np.array(arg, dtype=int).T + # convert exact integer results from Python to float so that + # signed zero can be used, it is checked. + tgtdiv, tgtrem = np.array(tgt, dtype=float).T + tgtdiv = np.where((tgtdiv == 0.0) & ((b < 0) ^ (a < 0)), -0.0, tgtdiv) + tgtrem = np.where((tgtrem == 0.0) & (b < 0), -0.0, tgtrem) + + for op in [floordiv_and_mod, divmod]: + for dt in np.typecodes['Float']: + msg = 'op: %s, dtype: %s' % (op.__name__, dt) + fa = a.astype(dt) + fb = b.astype(dt) + # use list comprehension so a_ and b_ are scalars + div, rem = zip(*[op(a_, b_) for a_, b_ in zip(fa, fb)]) + assert_equal(div, tgtdiv, err_msg=msg) + assert_equal(rem, tgtrem, err_msg=msg) + + def test_float_modulus_roundoff(self): + # gh-6127 + dt = np.typecodes['Float'] + for op in [floordiv_and_mod, divmod]: + for dt1, dt2 in itertools.product(dt, dt): + for sg1, sg2 in itertools.product((+1, -1), (+1, -1)): + fmt = 'op: %s, dt1: %s, dt2: %s, sg1: %s, sg2: %s' + msg = fmt % (op.__name__, dt1, dt2, sg1, sg2) + a = np.array(sg1*78*6e-8, dtype=dt1)[()] + b = np.array(sg2*6e-8, dtype=dt2)[()] + div, rem = op(a, b) + # Equal assertion should hold when fmod is used + assert_equal(div*b + rem, a, err_msg=msg) + if sg2 == -1: + assert_(b < rem <= 0, msg) + else: + assert_(b > rem >= 0, msg) + + def test_float_modulus_corner_cases(self): + # Check remainder magnitude. + for dt in np.typecodes['Float']: + b = np.array(1.0, dtype=dt) + a = np.nextafter(np.array(0.0, dtype=dt), -b) + rem = operator.mod(a, b) + assert_(rem <= b, 'dt: %s' % dt) + rem = operator.mod(-a, -b) + assert_(rem >= -b, 'dt: %s' % dt) + + # Check nans, inf + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in remainder") + for dt in np.typecodes['Float']: + fone = np.array(1.0, dtype=dt) + fzer = np.array(0.0, dtype=dt) + finf = np.array(np.inf, dtype=dt) + fnan = np.array(np.nan, dtype=dt) + rem = operator.mod(fone, fzer) + assert_(np.isnan(rem), 'dt: %s' % dt) + # MSVC 2008 returns NaN here, so disable the check. + #rem = operator.mod(fone, finf) + #assert_(rem == fone, 'dt: %s' % dt) + rem = operator.mod(fone, fnan) + assert_(np.isnan(rem), 'dt: %s' % dt) + rem = operator.mod(finf, fone) + assert_(np.isnan(rem), 'dt: %s' % dt) + + +class TestComplexDivision(object): + def test_zero_division(self): + with np.errstate(all="ignore"): + for t in [np.complex64, np.complex128]: + a = t(0.0) + b = t(1.0) + assert_(np.isinf(b/a)) + b = t(complex(np.inf, np.inf)) + assert_(np.isinf(b/a)) + b = t(complex(np.inf, np.nan)) + assert_(np.isinf(b/a)) + b = t(complex(np.nan, np.inf)) + assert_(np.isinf(b/a)) + b = t(complex(np.nan, np.nan)) + assert_(np.isnan(b/a)) + b = t(0.) + assert_(np.isnan(b/a)) + + def test_signed_zeros(self): + with np.errstate(all="ignore"): + for t in [np.complex64, np.complex128]: + # tupled (numerator, denominator, expected) + # for testing as expected == numerator/denominator + data = ( + (( 0.0,-1.0), ( 0.0, 1.0), (-1.0,-0.0)), + (( 0.0,-1.0), ( 0.0,-1.0), ( 1.0,-0.0)), + (( 0.0,-1.0), (-0.0,-1.0), ( 1.0, 0.0)), + (( 0.0,-1.0), (-0.0, 1.0), (-1.0, 0.0)), + (( 0.0, 1.0), ( 0.0,-1.0), (-1.0, 0.0)), + (( 0.0,-1.0), ( 0.0,-1.0), ( 1.0,-0.0)), + ((-0.0,-1.0), ( 0.0,-1.0), ( 1.0,-0.0)), + ((-0.0, 1.0), ( 0.0,-1.0), (-1.0,-0.0)) + ) + for cases in data: + n = cases[0] + d = cases[1] + ex = cases[2] + result = t(complex(n[0], n[1])) / t(complex(d[0], d[1])) + # check real and imag parts separately to avoid comparison + # in array context, which does not account for signed zeros + assert_equal(result.real, ex[0]) + assert_equal(result.imag, ex[1]) + + def test_branches(self): + with np.errstate(all="ignore"): + for t in [np.complex64, np.complex128]: + # tupled (numerator, denominator, expected) + # for testing as expected == numerator/denominator + data = list() + + # trigger branch: real(fabs(denom)) > imag(fabs(denom)) + # followed by else condition as neither are == 0 + data.append((( 2.0, 1.0), ( 2.0, 1.0), (1.0, 0.0))) + + # trigger branch: real(fabs(denom)) > imag(fabs(denom)) + # followed by if condition as both are == 0 + # is performed in test_zero_division(), so this is skipped + + # trigger else if branch: real(fabs(denom)) < imag(fabs(denom)) + data.append((( 1.0, 2.0), ( 1.0, 2.0), (1.0, 0.0))) + + for cases in data: + n = cases[0] + d = cases[1] + ex = cases[2] + result = t(complex(n[0], n[1])) / t(complex(d[0], d[1])) + # check real and imag parts separately to avoid comparison + # in array context, which does not account for signed zeros + assert_equal(result.real, ex[0]) + assert_equal(result.imag, ex[1]) + + +class TestConversion(object): + def test_int_from_long(self): + l = [1e6, 1e12, 1e18, -1e6, -1e12, -1e18] + li = [10**6, 10**12, 10**18, -10**6, -10**12, -10**18] + for T in [None, np.float64, np.int64]: + a = np.array(l, dtype=T) + assert_equal([int(_m) for _m in a], li) + + a = np.array(l[:3], dtype=np.uint64) + assert_equal([int(_m) for _m in a], li[:3]) + + def test_iinfo_long_values(self): + for code in 'bBhH': + res = np.array(np.iinfo(code).max + 1, dtype=code) + tgt = np.iinfo(code).min + assert_(res == tgt) + + for code in np.typecodes['AllInteger']: + res = np.array(np.iinfo(code).max, dtype=code) + tgt = np.iinfo(code).max + assert_(res == tgt) + + for code in np.typecodes['AllInteger']: + res = np.typeDict[code](np.iinfo(code).max) + tgt = np.iinfo(code).max + assert_(res == tgt) + + def test_int_raise_behaviour(self): + def overflow_error_func(dtype): + np.typeDict[dtype](np.iinfo(dtype).max + 1) + + for code in 'lLqQ': + assert_raises(OverflowError, overflow_error_func, code) + + def test_int_from_infinite_longdouble(self): + # gh-627 + x = np.longdouble(np.inf) + assert_raises(OverflowError, int, x) + with suppress_warnings() as sup: + sup.record(np.ComplexWarning) + x = np.clongdouble(np.inf) + assert_raises(OverflowError, int, x) + assert_equal(len(sup.log), 1) + + @pytest.mark.skipif(not IS_PYPY, reason="Test is PyPy only (gh-9972)") + def test_int_from_infinite_longdouble___int__(self): + x = np.longdouble(np.inf) + assert_raises(OverflowError, x.__int__) + with suppress_warnings() as sup: + sup.record(np.ComplexWarning) + x = np.clongdouble(np.inf) + assert_raises(OverflowError, x.__int__) + assert_equal(len(sup.log), 1) + + @pytest.mark.skipif(np.finfo(np.double) == np.finfo(np.longdouble), + reason="long double is same as double") + @pytest.mark.skipif(platform.machine().startswith("ppc"), + reason="IBM double double") + def test_int_from_huge_longdouble(self): + # Produce a longdouble that would overflow a double, + # use exponent that avoids bug in Darwin pow function. + exp = np.finfo(np.double).maxexp - 1 + huge_ld = 2 * 1234 * np.longdouble(2) ** exp + huge_i = 2 * 1234 * 2 ** exp + assert_(huge_ld != np.inf) + assert_equal(int(huge_ld), huge_i) + + def test_int_from_longdouble(self): + x = np.longdouble(1.5) + assert_equal(int(x), 1) + x = np.longdouble(-10.5) + assert_equal(int(x), -10) + + def test_numpy_scalar_relational_operators(self): + # All integer + for dt1 in np.typecodes['AllInteger']: + assert_(1 > np.array(0, dtype=dt1)[()], "type %s failed" % (dt1,)) + assert_(not 1 < np.array(0, dtype=dt1)[()], "type %s failed" % (dt1,)) + + for dt2 in np.typecodes['AllInteger']: + assert_(np.array(1, dtype=dt1)[()] > np.array(0, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1)[()] < np.array(0, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + + #Unsigned integers + for dt1 in 'BHILQP': + assert_(-1 < np.array(1, dtype=dt1)[()], "type %s failed" % (dt1,)) + assert_(not -1 > np.array(1, dtype=dt1)[()], "type %s failed" % (dt1,)) + assert_(-1 != np.array(1, dtype=dt1)[()], "type %s failed" % (dt1,)) + + #unsigned vs signed + for dt2 in 'bhilqp': + assert_(np.array(1, dtype=dt1)[()] > np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1)[()] < np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + assert_(np.array(1, dtype=dt1)[()] != np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + + #Signed integers and floats + for dt1 in 'bhlqp' + np.typecodes['Float']: + assert_(1 > np.array(-1, dtype=dt1)[()], "type %s failed" % (dt1,)) + assert_(not 1 < np.array(-1, dtype=dt1)[()], "type %s failed" % (dt1,)) + assert_(-1 == np.array(-1, dtype=dt1)[()], "type %s failed" % (dt1,)) + + for dt2 in 'bhlqp' + np.typecodes['Float']: + assert_(np.array(1, dtype=dt1)[()] > np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + assert_(not np.array(1, dtype=dt1)[()] < np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + assert_(np.array(-1, dtype=dt1)[()] == np.array(-1, dtype=dt2)[()], + "type %s and %s failed" % (dt1, dt2)) + + def test_scalar_comparison_to_none(self): + # Scalars should just return False and not give a warnings. + # The comparisons are flagged by pep8, ignore that. + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', FutureWarning) + assert_(not np.float32(1) == None) + assert_(not np.str_('test') == None) + # This is dubious (see below): + assert_(not np.datetime64('NaT') == None) + + assert_(np.float32(1) != None) + assert_(np.str_('test') != None) + # This is dubious (see below): + assert_(np.datetime64('NaT') != None) + assert_(len(w) == 0) + + # For documentation purposes, this is why the datetime is dubious. + # At the time of deprecation this was no behaviour change, but + # it has to be considered when the deprecations are done. + assert_(np.equal(np.datetime64('NaT'), None)) + + +#class TestRepr(object): +# def test_repr(self): +# for t in types: +# val = t(1197346475.0137341) +# val_repr = repr(val) +# val2 = eval(val_repr) +# assert_equal( val, val2 ) + + +class TestRepr(object): + def _test_type_repr(self, t): + finfo = np.finfo(t) + last_fraction_bit_idx = finfo.nexp + finfo.nmant + last_exponent_bit_idx = finfo.nexp + storage_bytes = np.dtype(t).itemsize*8 + # could add some more types to the list below + for which in ['small denorm', 'small norm']: + # Values from https://en.wikipedia.org/wiki/IEEE_754 + constr = np.array([0x00]*storage_bytes, dtype=np.uint8) + if which == 'small denorm': + byte = last_fraction_bit_idx // 8 + bytebit = 7-(last_fraction_bit_idx % 8) + constr[byte] = 1 << bytebit + elif which == 'small norm': + byte = last_exponent_bit_idx // 8 + bytebit = 7-(last_exponent_bit_idx % 8) + constr[byte] = 1 << bytebit + else: + raise ValueError('hmm') + val = constr.view(t)[0] + val_repr = repr(val) + val2 = t(eval(val_repr)) + if not (val2 == 0 and val < 1e-100): + assert_equal(val, val2) + + def test_float_repr(self): + # long double test cannot work, because eval goes through a python + # float + for t in [np.float32, np.float64]: + self._test_type_repr(t) + + +if not IS_PYPY: + # sys.getsizeof() is not valid on PyPy + class TestSizeOf(object): + + def test_equal_nbytes(self): + for type in types: + x = type(0) + assert_(sys.getsizeof(x) > x.nbytes) + + def test_error(self): + d = np.float32() + assert_raises(TypeError, d.__sizeof__, "a") + + +class TestMultiply(object): + def test_seq_repeat(self): + # Test that basic sequences get repeated when multiplied with + # numpy integers. And errors are raised when multiplied with others. + # Some of this behaviour may be controversial and could be open for + # change. + accepted_types = set(np.typecodes["AllInteger"]) + deprecated_types = {'?'} + forbidden_types = ( + set(np.typecodes["All"]) - accepted_types - deprecated_types) + forbidden_types -= {'V'} # can't default-construct void scalars + + for seq_type in (list, tuple): + seq = seq_type([1, 2, 3]) + for numpy_type in accepted_types: + i = np.dtype(numpy_type).type(2) + assert_equal(seq * i, seq * int(i)) + assert_equal(i * seq, int(i) * seq) + + for numpy_type in deprecated_types: + i = np.dtype(numpy_type).type() + assert_equal( + assert_warns(DeprecationWarning, operator.mul, seq, i), + seq * int(i)) + assert_equal( + assert_warns(DeprecationWarning, operator.mul, i, seq), + int(i) * seq) + + for numpy_type in forbidden_types: + i = np.dtype(numpy_type).type() + assert_raises(TypeError, operator.mul, seq, i) + assert_raises(TypeError, operator.mul, i, seq) + + def test_no_seq_repeat_basic_array_like(self): + # Test that an array-like which does not know how to be multiplied + # does not attempt sequence repeat (raise TypeError). + # See also gh-7428. + class ArrayLike(object): + def __init__(self, arr): + self.arr = arr + def __array__(self): + return self.arr + + # Test for simple ArrayLike above and memoryviews (original report) + for arr_like in (ArrayLike(np.ones(3)), memoryview(np.ones(3))): + assert_array_equal(arr_like * np.float32(3.), np.full(3, 3.)) + assert_array_equal(np.float32(3.) * arr_like, np.full(3, 3.)) + assert_array_equal(arr_like * np.int_(3), np.full(3, 3)) + assert_array_equal(np.int_(3) * arr_like, np.full(3, 3)) + + +class TestNegative(object): + def test_exceptions(self): + a = np.ones((), dtype=np.bool_)[()] + assert_raises(TypeError, operator.neg, a) + + def test_result(self): + types = np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) + for dt in types: + a = np.ones((), dtype=dt)[()] + assert_equal(operator.neg(a) + a, 0) + + +class TestSubtract(object): + def test_exceptions(self): + a = np.ones((), dtype=np.bool_)[()] + assert_raises(TypeError, operator.sub, a, a) + + def test_result(self): + types = np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) + for dt in types: + a = np.ones((), dtype=dt)[()] + assert_equal(operator.sub(a, a), 0) + + +class TestAbs(object): + def _test_abs_func(self, absfunc): + for tp in floating_types + complex_floating_types: + x = tp(-1.5) + assert_equal(absfunc(x), 1.5) + x = tp(0.0) + res = absfunc(x) + # assert_equal() checks zero signedness + assert_equal(res, 0.0) + x = tp(-0.0) + res = absfunc(x) + assert_equal(res, 0.0) + + x = tp(np.finfo(tp).max) + assert_equal(absfunc(x), x.real) + + x = tp(np.finfo(tp).tiny) + assert_equal(absfunc(x), x.real) + + x = tp(np.finfo(tp).min) + assert_equal(absfunc(x), -x.real) + + def test_builtin_abs(self): + self._test_abs_func(abs) + + def test_numpy_abs(self): + self._test_abs_func(np.abs) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_scalarprint.py b/contrib/python/numpy/py2/numpy/core/tests/test_scalarprint.py new file mode 100644 index 0000000000..cde1355aa1 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_scalarprint.py @@ -0,0 +1,326 @@ +# -*- coding: utf-8 -*- +""" Test printing of scalar types. + +""" +from __future__ import division, absolute_import, print_function + +import code, sys +import platform +import pytest + +from tempfile import TemporaryFile +import numpy as np +from numpy.testing import assert_, assert_equal, suppress_warnings + +class TestRealScalars(object): + def test_str(self): + svals = [0.0, -0.0, 1, -1, np.inf, -np.inf, np.nan] + styps = [np.float16, np.float32, np.float64, np.longdouble] + wanted = [ + ['0.0', '0.0', '0.0', '0.0' ], + ['-0.0', '-0.0', '-0.0', '-0.0'], + ['1.0', '1.0', '1.0', '1.0' ], + ['-1.0', '-1.0', '-1.0', '-1.0'], + ['inf', 'inf', 'inf', 'inf' ], + ['-inf', '-inf', '-inf', '-inf'], + ['nan', 'nan', 'nan', 'nan']] + + for wants, val in zip(wanted, svals): + for want, styp in zip(wants, styps): + msg = 'for str({}({}))'.format(np.dtype(styp).name, repr(val)) + assert_equal(str(styp(val)), want, err_msg=msg) + + def test_scalar_cutoffs(self): + # test that both the str and repr of np.float64 behaves + # like python floats in python3. Note that in python2 + # the str has truncated digits, but we do not do this + def check(v): + # we compare str to repr, to avoid python2 truncation behavior + assert_equal(str(np.float64(v)), repr(v)) + assert_equal(repr(np.float64(v)), repr(v)) + + # check we use the same number of significant digits + check(1.12345678901234567890) + check(0.0112345678901234567890) + + # check switch from scientific output to positional and back + check(1e-5) + check(1e-4) + check(1e15) + check(1e16) + + def test_py2_float_print(self): + # gh-10753 + # In python2, the python float type implements an obsolte method + # tp_print, which overrides tp_repr and tp_str when using "print" to + # output to a "real file" (ie, not a StringIO). Make sure we don't + # inherit it. + x = np.double(0.1999999999999) + with TemporaryFile('r+t') as f: + print(x, file=f) + f.seek(0) + output = f.read() + assert_equal(output, str(x) + '\n') + # In python2 the value float('0.1999999999999') prints with reduced + # precision as '0.2', but we want numpy's np.double('0.1999999999999') + # to print the unique value, '0.1999999999999'. + + # gh-11031 + # Only in the python2 interactive shell and when stdout is a "real" + # file, the output of the last command is printed to stdout without + # Py_PRINT_RAW (unlike the print statement) so `>>> x` and `>>> print + # x` are potentially different. Make sure they are the same. The only + # way I found to get prompt-like output is using an actual prompt from + # the 'code' module. Again, must use tempfile to get a "real" file. + + # dummy user-input which enters one line and then ctrl-Ds. + def userinput(): + yield 'np.sqrt(2)' + raise EOFError + gen = userinput() + input_func = lambda prompt="": next(gen) + + with TemporaryFile('r+t') as fo, TemporaryFile('r+t') as fe: + orig_stdout, orig_stderr = sys.stdout, sys.stderr + sys.stdout, sys.stderr = fo, fe + + # py2 code.interact sends irrelevant internal DeprecationWarnings + with suppress_warnings() as sup: + sup.filter(DeprecationWarning) + code.interact(local={'np': np}, readfunc=input_func, banner='') + + sys.stdout, sys.stderr = orig_stdout, orig_stderr + + fo.seek(0) + capture = fo.read().strip() + + assert_equal(capture, repr(np.sqrt(2))) + + def test_dragon4(self): + # these tests are adapted from Ryan Juckett's dragon4 implementation, + # see dragon4.c for details. + + fpos32 = lambda x, **k: np.format_float_positional(np.float32(x), **k) + fsci32 = lambda x, **k: np.format_float_scientific(np.float32(x), **k) + fpos64 = lambda x, **k: np.format_float_positional(np.float64(x), **k) + fsci64 = lambda x, **k: np.format_float_scientific(np.float64(x), **k) + + preckwd = lambda prec: {'unique': False, 'precision': prec} + + assert_equal(fpos32('1.0'), "1.") + assert_equal(fsci32('1.0'), "1.e+00") + assert_equal(fpos32('10.234'), "10.234") + assert_equal(fpos32('-10.234'), "-10.234") + assert_equal(fsci32('10.234'), "1.0234e+01") + assert_equal(fsci32('-10.234'), "-1.0234e+01") + assert_equal(fpos32('1000.0'), "1000.") + assert_equal(fpos32('1.0', precision=0), "1.") + assert_equal(fsci32('1.0', precision=0), "1.e+00") + assert_equal(fpos32('10.234', precision=0), "10.") + assert_equal(fpos32('-10.234', precision=0), "-10.") + assert_equal(fsci32('10.234', precision=0), "1.e+01") + assert_equal(fsci32('-10.234', precision=0), "-1.e+01") + assert_equal(fpos32('10.234', precision=2), "10.23") + assert_equal(fsci32('-10.234', precision=2), "-1.02e+01") + assert_equal(fsci64('9.9999999999999995e-08', **preckwd(16)), + '9.9999999999999995e-08') + assert_equal(fsci64('9.8813129168249309e-324', **preckwd(16)), + '9.8813129168249309e-324') + assert_equal(fsci64('9.9999999999999694e-311', **preckwd(16)), + '9.9999999999999694e-311') + + + # test rounding + # 3.1415927410 is closest float32 to np.pi + assert_equal(fpos32('3.14159265358979323846', **preckwd(10)), + "3.1415927410") + assert_equal(fsci32('3.14159265358979323846', **preckwd(10)), + "3.1415927410e+00") + assert_equal(fpos64('3.14159265358979323846', **preckwd(10)), + "3.1415926536") + assert_equal(fsci64('3.14159265358979323846', **preckwd(10)), + "3.1415926536e+00") + # 299792448 is closest float32 to 299792458 + assert_equal(fpos32('299792458.0', **preckwd(5)), "299792448.00000") + assert_equal(fsci32('299792458.0', **preckwd(5)), "2.99792e+08") + assert_equal(fpos64('299792458.0', **preckwd(5)), "299792458.00000") + assert_equal(fsci64('299792458.0', **preckwd(5)), "2.99792e+08") + + assert_equal(fpos32('3.14159265358979323846', **preckwd(25)), + "3.1415927410125732421875000") + assert_equal(fpos64('3.14159265358979323846', **preckwd(50)), + "3.14159265358979311599796346854418516159057617187500") + assert_equal(fpos64('3.14159265358979323846'), "3.141592653589793") + + + # smallest numbers + assert_equal(fpos32(0.5**(126 + 23), unique=False, precision=149), + "0.00000000000000000000000000000000000000000000140129846432" + "4817070923729583289916131280261941876515771757068283889791" + "08268586060148663818836212158203125") + assert_equal(fpos64(0.5**(1022 + 52), unique=False, precision=1074), + "0.00000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000049406564584124654417656" + "8792868221372365059802614324764425585682500675507270208751" + "8652998363616359923797965646954457177309266567103559397963" + "9877479601078187812630071319031140452784581716784898210368" + "8718636056998730723050006387409153564984387312473397273169" + "6151400317153853980741262385655911710266585566867681870395" + "6031062493194527159149245532930545654440112748012970999954" + "1931989409080416563324524757147869014726780159355238611550" + "1348035264934720193790268107107491703332226844753335720832" + "4319360923828934583680601060115061698097530783422773183292" + "4790498252473077637592724787465608477820373446969953364701" + "7972677717585125660551199131504891101451037862738167250955" + "8373897335989936648099411642057026370902792427675445652290" + "87538682506419718265533447265625") + + # largest numbers + assert_equal(fpos32(np.finfo(np.float32).max, **preckwd(0)), + "340282346638528859811704183484516925440.") + assert_equal(fpos64(np.finfo(np.float64).max, **preckwd(0)), + "1797693134862315708145274237317043567980705675258449965989" + "1747680315726078002853876058955863276687817154045895351438" + "2464234321326889464182768467546703537516986049910576551282" + "0762454900903893289440758685084551339423045832369032229481" + "6580855933212334827479782620414472316873817718091929988125" + "0404026184124858368.") + # Warning: In unique mode only the integer digits necessary for + # uniqueness are computed, the rest are 0. Should we change this? + assert_equal(fpos32(np.finfo(np.float32).max, precision=0), + "340282350000000000000000000000000000000.") + + # test trailing zeros + assert_equal(fpos32('1.0', unique=False, precision=3), "1.000") + assert_equal(fpos64('1.0', unique=False, precision=3), "1.000") + assert_equal(fsci32('1.0', unique=False, precision=3), "1.000e+00") + assert_equal(fsci64('1.0', unique=False, precision=3), "1.000e+00") + assert_equal(fpos32('1.5', unique=False, precision=3), "1.500") + assert_equal(fpos64('1.5', unique=False, precision=3), "1.500") + assert_equal(fsci32('1.5', unique=False, precision=3), "1.500e+00") + assert_equal(fsci64('1.5', unique=False, precision=3), "1.500e+00") + # gh-10713 + assert_equal(fpos64('324', unique=False, precision=5, fractional=False), "324.00") + + def test_dragon4_interface(self): + tps = [np.float16, np.float32, np.float64] + if hasattr(np, 'float128'): + tps.append(np.float128) + + fpos = np.format_float_positional + fsci = np.format_float_scientific + + for tp in tps: + # test padding + assert_equal(fpos(tp('1.0'), pad_left=4, pad_right=4), " 1. ") + assert_equal(fpos(tp('-1.0'), pad_left=4, pad_right=4), " -1. ") + assert_equal(fpos(tp('-10.2'), + pad_left=4, pad_right=4), " -10.2 ") + + # test exp_digits + assert_equal(fsci(tp('1.23e1'), exp_digits=5), "1.23e+00001") + + # test fixed (non-unique) mode + assert_equal(fpos(tp('1.0'), unique=False, precision=4), "1.0000") + assert_equal(fsci(tp('1.0'), unique=False, precision=4), + "1.0000e+00") + + # test trimming + # trim of 'k' or '.' only affects non-unique mode, since unique + # mode will not output trailing 0s. + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='k'), + "1.0000") + + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='.'), + "1.") + assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='.'), + "1.2" if tp != np.float16 else "1.2002") + + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='0'), + "1.0") + assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='0'), + "1.2" if tp != np.float16 else "1.2002") + assert_equal(fpos(tp('1.'), trim='0'), "1.0") + + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='-'), + "1") + assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='-'), + "1.2" if tp != np.float16 else "1.2002") + assert_equal(fpos(tp('1.'), trim='-'), "1") + + @pytest.mark.skipif(not platform.machine().startswith("ppc64"), + reason="only applies to ppc float128 values") + def test_ppc64_ibm_double_double128(self): + # check that the precision decreases once we get into the subnormal + # range. Unlike float64, this starts around 1e-292 instead of 1e-308, + # which happens when the first double is normal and the second is + # subnormal. + x = np.float128('2.123123123123123123123123123123123e-286') + got = [str(x/np.float128('2e' + str(i))) for i in range(0,40)] + expected = [ + "1.06156156156156156156156156156157e-286", + "1.06156156156156156156156156156158e-287", + "1.06156156156156156156156156156159e-288", + "1.0615615615615615615615615615616e-289", + "1.06156156156156156156156156156157e-290", + "1.06156156156156156156156156156156e-291", + "1.0615615615615615615615615615616e-292", + "1.0615615615615615615615615615615e-293", + "1.061561561561561561561561561562e-294", + "1.06156156156156156156156156155e-295", + "1.0615615615615615615615615616e-296", + "1.06156156156156156156156156e-297", + "1.06156156156156156156156157e-298", + "1.0615615615615615615615616e-299", + "1.06156156156156156156156e-300", + "1.06156156156156156156155e-301", + "1.0615615615615615615616e-302", + "1.061561561561561561562e-303", + "1.06156156156156156156e-304", + "1.0615615615615615618e-305", + "1.06156156156156156e-306", + "1.06156156156156157e-307", + "1.0615615615615616e-308", + "1.06156156156156e-309", + "1.06156156156157e-310", + "1.0615615615616e-311", + "1.06156156156e-312", + "1.06156156154e-313", + "1.0615615616e-314", + "1.06156156e-315", + "1.06156155e-316", + "1.061562e-317", + "1.06156e-318", + "1.06155e-319", + "1.0617e-320", + "1.06e-321", + "1.04e-322", + "1e-323", + "0.0", + "0.0"] + assert_equal(got, expected) + + # Note: we follow glibc behavior, but it (or gcc) might not be right. + # In particular we can get two values that print the same but are not + # equal: + a = np.float128('2')/np.float128('3') + b = np.float128(str(a)) + assert_equal(str(a), str(b)) + assert_(a != b) + + def float32_roundtrip(self): + # gh-9360 + x = np.float32(1024 - 2**-14) + y = np.float32(1024 - 2**-13) + assert_(repr(x) != repr(y)) + assert_equal(np.float32(repr(x)), x) + assert_equal(np.float32(repr(y)), y) + + def float64_vs_python(self): + # gh-2643, gh-6136, gh-6908 + assert_equal(repr(np.float64(0.1)), repr(0.1)) + assert_(repr(np.float64(0.20000000000000004)) != repr(0.2)) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_shape_base.py b/contrib/python/numpy/py2/numpy/core/tests/test_shape_base.py new file mode 100644 index 0000000000..b996321c2e --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_shape_base.py @@ -0,0 +1,706 @@ +from __future__ import division, absolute_import, print_function + +import pytest +import sys +import numpy as np +from numpy.core import ( + array, arange, atleast_1d, atleast_2d, atleast_3d, block, vstack, hstack, + newaxis, concatenate, stack + ) +from numpy.core.shape_base import (_block_dispatcher, _block_setup, + _block_concatenate, _block_slicing) +from numpy.testing import ( + assert_, assert_raises, assert_array_equal, assert_equal, + assert_raises_regex, assert_warns + ) + +from numpy.compat import long + +class TestAtleast1d(object): + def test_0D_array(self): + a = array(1) + b = array(2) + res = [atleast_1d(a), atleast_1d(b)] + desired = [array([1]), array([2])] + assert_array_equal(res, desired) + + def test_1D_array(self): + a = array([1, 2]) + b = array([2, 3]) + res = [atleast_1d(a), atleast_1d(b)] + desired = [array([1, 2]), array([2, 3])] + assert_array_equal(res, desired) + + def test_2D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + res = [atleast_1d(a), atleast_1d(b)] + desired = [a, b] + assert_array_equal(res, desired) + + def test_3D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + a = array([a, a]) + b = array([b, b]) + res = [atleast_1d(a), atleast_1d(b)] + desired = [a, b] + assert_array_equal(res, desired) + + def test_r1array(self): + """ Test to make sure equivalent Travis O's r1array function + """ + assert_(atleast_1d(3).shape == (1,)) + assert_(atleast_1d(3j).shape == (1,)) + assert_(atleast_1d(long(3)).shape == (1,)) + assert_(atleast_1d(3.0).shape == (1,)) + assert_(atleast_1d([[2, 3], [4, 5]]).shape == (2, 2)) + + +class TestAtleast2d(object): + def test_0D_array(self): + a = array(1) + b = array(2) + res = [atleast_2d(a), atleast_2d(b)] + desired = [array([[1]]), array([[2]])] + assert_array_equal(res, desired) + + def test_1D_array(self): + a = array([1, 2]) + b = array([2, 3]) + res = [atleast_2d(a), atleast_2d(b)] + desired = [array([[1, 2]]), array([[2, 3]])] + assert_array_equal(res, desired) + + def test_2D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + res = [atleast_2d(a), atleast_2d(b)] + desired = [a, b] + assert_array_equal(res, desired) + + def test_3D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + a = array([a, a]) + b = array([b, b]) + res = [atleast_2d(a), atleast_2d(b)] + desired = [a, b] + assert_array_equal(res, desired) + + def test_r2array(self): + """ Test to make sure equivalent Travis O's r2array function + """ + assert_(atleast_2d(3).shape == (1, 1)) + assert_(atleast_2d([3j, 1]).shape == (1, 2)) + assert_(atleast_2d([[[3, 1], [4, 5]], [[3, 5], [1, 2]]]).shape == (2, 2, 2)) + + +class TestAtleast3d(object): + def test_0D_array(self): + a = array(1) + b = array(2) + res = [atleast_3d(a), atleast_3d(b)] + desired = [array([[[1]]]), array([[[2]]])] + assert_array_equal(res, desired) + + def test_1D_array(self): + a = array([1, 2]) + b = array([2, 3]) + res = [atleast_3d(a), atleast_3d(b)] + desired = [array([[[1], [2]]]), array([[[2], [3]]])] + assert_array_equal(res, desired) + + def test_2D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + res = [atleast_3d(a), atleast_3d(b)] + desired = [a[:,:, newaxis], b[:,:, newaxis]] + assert_array_equal(res, desired) + + def test_3D_array(self): + a = array([[1, 2], [1, 2]]) + b = array([[2, 3], [2, 3]]) + a = array([a, a]) + b = array([b, b]) + res = [atleast_3d(a), atleast_3d(b)] + desired = [a, b] + assert_array_equal(res, desired) + + +class TestHstack(object): + def test_non_iterable(self): + assert_raises(TypeError, hstack, 1) + + def test_empty_input(self): + assert_raises(ValueError, hstack, ()) + + def test_0D_array(self): + a = array(1) + b = array(2) + res = hstack([a, b]) + desired = array([1, 2]) + assert_array_equal(res, desired) + + def test_1D_array(self): + a = array([1]) + b = array([2]) + res = hstack([a, b]) + desired = array([1, 2]) + assert_array_equal(res, desired) + + def test_2D_array(self): + a = array([[1], [2]]) + b = array([[1], [2]]) + res = hstack([a, b]) + desired = array([[1, 1], [2, 2]]) + assert_array_equal(res, desired) + + def test_generator(self): + with assert_warns(FutureWarning): + hstack((np.arange(3) for _ in range(2))) + if sys.version_info.major > 2: + # map returns a list on Python 2 + with assert_warns(FutureWarning): + hstack(map(lambda x: x, np.ones((3, 2)))) + + +class TestVstack(object): + def test_non_iterable(self): + assert_raises(TypeError, vstack, 1) + + def test_empty_input(self): + assert_raises(ValueError, vstack, ()) + + def test_0D_array(self): + a = array(1) + b = array(2) + res = vstack([a, b]) + desired = array([[1], [2]]) + assert_array_equal(res, desired) + + def test_1D_array(self): + a = array([1]) + b = array([2]) + res = vstack([a, b]) + desired = array([[1], [2]]) + assert_array_equal(res, desired) + + def test_2D_array(self): + a = array([[1], [2]]) + b = array([[1], [2]]) + res = vstack([a, b]) + desired = array([[1], [2], [1], [2]]) + assert_array_equal(res, desired) + + def test_2D_array2(self): + a = array([1, 2]) + b = array([1, 2]) + res = vstack([a, b]) + desired = array([[1, 2], [1, 2]]) + assert_array_equal(res, desired) + + def test_generator(self): + with assert_warns(FutureWarning): + vstack((np.arange(3) for _ in range(2))) + + +class TestConcatenate(object): + def test_returns_copy(self): + a = np.eye(3) + b = np.concatenate([a]) + b[0, 0] = 2 + assert b[0, 0] != a[0, 0] + + def test_exceptions(self): + # test axis must be in bounds + for ndim in [1, 2, 3]: + a = np.ones((1,)*ndim) + np.concatenate((a, a), axis=0) # OK + assert_raises(np.AxisError, np.concatenate, (a, a), axis=ndim) + assert_raises(np.AxisError, np.concatenate, (a, a), axis=-(ndim + 1)) + + # Scalars cannot be concatenated + assert_raises(ValueError, concatenate, (0,)) + assert_raises(ValueError, concatenate, (np.array(0),)) + + # test shapes must match except for concatenation axis + a = np.ones((1, 2, 3)) + b = np.ones((2, 2, 3)) + axis = list(range(3)) + for i in range(3): + np.concatenate((a, b), axis=axis[0]) # OK + assert_raises(ValueError, np.concatenate, (a, b), axis=axis[1]) + assert_raises(ValueError, np.concatenate, (a, b), axis=axis[2]) + a = np.moveaxis(a, -1, 0) + b = np.moveaxis(b, -1, 0) + axis.append(axis.pop(0)) + + # No arrays to concatenate raises ValueError + assert_raises(ValueError, concatenate, ()) + + def test_concatenate_axis_None(self): + a = np.arange(4, dtype=np.float64).reshape((2, 2)) + b = list(range(3)) + c = ['x'] + r = np.concatenate((a, a), axis=None) + assert_equal(r.dtype, a.dtype) + assert_equal(r.ndim, 1) + r = np.concatenate((a, b), axis=None) + assert_equal(r.size, a.size + len(b)) + assert_equal(r.dtype, a.dtype) + r = np.concatenate((a, b, c), axis=None) + d = array(['0.0', '1.0', '2.0', '3.0', + '0', '1', '2', 'x']) + assert_array_equal(r, d) + + out = np.zeros(a.size + len(b)) + r = np.concatenate((a, b), axis=None) + rout = np.concatenate((a, b), axis=None, out=out) + assert_(out is rout) + assert_equal(r, rout) + + def test_large_concatenate_axis_None(self): + # When no axis is given, concatenate uses flattened versions. + # This also had a bug with many arrays (see gh-5979). + x = np.arange(1, 100) + r = np.concatenate(x, None) + assert_array_equal(x, r) + + # This should probably be deprecated: + r = np.concatenate(x, 100) # axis is >= MAXDIMS + assert_array_equal(x, r) + + def test_concatenate(self): + # Test concatenate function + # One sequence returns unmodified (but as array) + r4 = list(range(4)) + assert_array_equal(concatenate((r4,)), r4) + # Any sequence + assert_array_equal(concatenate((tuple(r4),)), r4) + assert_array_equal(concatenate((array(r4),)), r4) + # 1D default concatenation + r3 = list(range(3)) + assert_array_equal(concatenate((r4, r3)), r4 + r3) + # Mixed sequence types + assert_array_equal(concatenate((tuple(r4), r3)), r4 + r3) + assert_array_equal(concatenate((array(r4), r3)), r4 + r3) + # Explicit axis specification + assert_array_equal(concatenate((r4, r3), 0), r4 + r3) + # Including negative + assert_array_equal(concatenate((r4, r3), -1), r4 + r3) + # 2D + a23 = array([[10, 11, 12], [13, 14, 15]]) + a13 = array([[0, 1, 2]]) + res = array([[10, 11, 12], [13, 14, 15], [0, 1, 2]]) + assert_array_equal(concatenate((a23, a13)), res) + assert_array_equal(concatenate((a23, a13), 0), res) + assert_array_equal(concatenate((a23.T, a13.T), 1), res.T) + assert_array_equal(concatenate((a23.T, a13.T), -1), res.T) + # Arrays much match shape + assert_raises(ValueError, concatenate, (a23.T, a13.T), 0) + # 3D + res = arange(2 * 3 * 7).reshape((2, 3, 7)) + a0 = res[..., :4] + a1 = res[..., 4:6] + a2 = res[..., 6:] + assert_array_equal(concatenate((a0, a1, a2), 2), res) + assert_array_equal(concatenate((a0, a1, a2), -1), res) + assert_array_equal(concatenate((a0.T, a1.T, a2.T), 0), res.T) + + out = res.copy() + rout = concatenate((a0, a1, a2), 2, out=out) + assert_(out is rout) + assert_equal(res, rout) + + def test_bad_out_shape(self): + a = array([1, 2]) + b = array([3, 4]) + + assert_raises(ValueError, concatenate, (a, b), out=np.empty(5)) + assert_raises(ValueError, concatenate, (a, b), out=np.empty((4,1))) + assert_raises(ValueError, concatenate, (a, b), out=np.empty((1,4))) + concatenate((a, b), out=np.empty(4)) + + def test_out_dtype(self): + out = np.empty(4, np.float32) + res = concatenate((array([1, 2]), array([3, 4])), out=out) + assert_(out is res) + + out = np.empty(4, np.complex64) + res = concatenate((array([0.1, 0.2]), array([0.3, 0.4])), out=out) + assert_(out is res) + + # invalid cast + out = np.empty(4, np.int32) + assert_raises(TypeError, concatenate, + (array([0.1, 0.2]), array([0.3, 0.4])), out=out) + + +def test_stack(): + # non-iterable input + assert_raises(TypeError, stack, 1) + + # 0d input + for input_ in [(1, 2, 3), + [np.int32(1), np.int32(2), np.int32(3)], + [np.array(1), np.array(2), np.array(3)]]: + assert_array_equal(stack(input_), [1, 2, 3]) + # 1d input examples + a = np.array([1, 2, 3]) + b = np.array([4, 5, 6]) + r1 = array([[1, 2, 3], [4, 5, 6]]) + assert_array_equal(np.stack((a, b)), r1) + assert_array_equal(np.stack((a, b), axis=1), r1.T) + # all input types + assert_array_equal(np.stack(list([a, b])), r1) + assert_array_equal(np.stack(array([a, b])), r1) + # all shapes for 1d input + arrays = [np.random.randn(3) for _ in range(10)] + axes = [0, 1, -1, -2] + expected_shapes = [(10, 3), (3, 10), (3, 10), (10, 3)] + for axis, expected_shape in zip(axes, expected_shapes): + assert_equal(np.stack(arrays, axis).shape, expected_shape) + assert_raises_regex(np.AxisError, 'out of bounds', stack, arrays, axis=2) + assert_raises_regex(np.AxisError, 'out of bounds', stack, arrays, axis=-3) + # all shapes for 2d input + arrays = [np.random.randn(3, 4) for _ in range(10)] + axes = [0, 1, 2, -1, -2, -3] + expected_shapes = [(10, 3, 4), (3, 10, 4), (3, 4, 10), + (3, 4, 10), (3, 10, 4), (10, 3, 4)] + for axis, expected_shape in zip(axes, expected_shapes): + assert_equal(np.stack(arrays, axis).shape, expected_shape) + # empty arrays + assert_(stack([[], [], []]).shape == (3, 0)) + assert_(stack([[], [], []], axis=1).shape == (0, 3)) + # out + out = np.zeros_like(r1) + np.stack((a, b), out=out) + assert_array_equal(out, r1) + # edge cases + assert_raises_regex(ValueError, 'need at least one array', stack, []) + assert_raises_regex(ValueError, 'must have the same shape', + stack, [1, np.arange(3)]) + assert_raises_regex(ValueError, 'must have the same shape', + stack, [np.arange(3), 1]) + assert_raises_regex(ValueError, 'must have the same shape', + stack, [np.arange(3), 1], axis=1) + assert_raises_regex(ValueError, 'must have the same shape', + stack, [np.zeros((3, 3)), np.zeros(3)], axis=1) + assert_raises_regex(ValueError, 'must have the same shape', + stack, [np.arange(2), np.arange(3)]) + # generator is deprecated + with assert_warns(FutureWarning): + result = stack((x for x in range(3))) + assert_array_equal(result, np.array([0, 1, 2])) + + +class TestBlock(object): + @pytest.fixture(params=['block', 'force_concatenate', 'force_slicing']) + def block(self, request): + # blocking small arrays and large arrays go through different paths. + # the algorithm is triggered depending on the number of element + # copies required. + # We define a test fixture that forces most tests to go through + # both code paths. + # Ultimately, this should be removed if a single algorithm is found + # to be faster for both small and large arrays. + def _block_force_concatenate(arrays): + arrays, list_ndim, result_ndim, _ = _block_setup(arrays) + return _block_concatenate(arrays, list_ndim, result_ndim) + + def _block_force_slicing(arrays): + arrays, list_ndim, result_ndim, _ = _block_setup(arrays) + return _block_slicing(arrays, list_ndim, result_ndim) + + if request.param == 'force_concatenate': + return _block_force_concatenate + elif request.param == 'force_slicing': + return _block_force_slicing + elif request.param == 'block': + return block + else: + raise ValueError('Unknown blocking request. There is a typo in the tests.') + + def test_returns_copy(self, block): + a = np.eye(3) + b = block(a) + b[0, 0] = 2 + assert b[0, 0] != a[0, 0] + + def test_block_total_size_estimate(self, block): + _, _, _, total_size = _block_setup([1]) + assert total_size == 1 + + _, _, _, total_size = _block_setup([[1]]) + assert total_size == 1 + + _, _, _, total_size = _block_setup([[1, 1]]) + assert total_size == 2 + + _, _, _, total_size = _block_setup([[1], [1]]) + assert total_size == 2 + + _, _, _, total_size = _block_setup([[1, 2], [3, 4]]) + assert total_size == 4 + + def test_block_simple_row_wise(self, block): + a_2d = np.ones((2, 2)) + b_2d = 2 * a_2d + desired = np.array([[1, 1, 2, 2], + [1, 1, 2, 2]]) + result = block([a_2d, b_2d]) + assert_equal(desired, result) + + def test_block_simple_column_wise(self, block): + a_2d = np.ones((2, 2)) + b_2d = 2 * a_2d + expected = np.array([[1, 1], + [1, 1], + [2, 2], + [2, 2]]) + result = block([[a_2d], [b_2d]]) + assert_equal(expected, result) + + def test_block_with_1d_arrays_row_wise(self, block): + # # # 1-D vectors are treated as row arrays + a = np.array([1, 2, 3]) + b = np.array([2, 3, 4]) + expected = np.array([1, 2, 3, 2, 3, 4]) + result = block([a, b]) + assert_equal(expected, result) + + def test_block_with_1d_arrays_multiple_rows(self, block): + a = np.array([1, 2, 3]) + b = np.array([2, 3, 4]) + expected = np.array([[1, 2, 3, 2, 3, 4], + [1, 2, 3, 2, 3, 4]]) + result = block([[a, b], [a, b]]) + assert_equal(expected, result) + + def test_block_with_1d_arrays_column_wise(self, block): + # # # 1-D vectors are treated as row arrays + a_1d = np.array([1, 2, 3]) + b_1d = np.array([2, 3, 4]) + expected = np.array([[1, 2, 3], + [2, 3, 4]]) + result = block([[a_1d], [b_1d]]) + assert_equal(expected, result) + + def test_block_mixed_1d_and_2d(self, block): + a_2d = np.ones((2, 2)) + b_1d = np.array([2, 2]) + result = block([[a_2d], [b_1d]]) + expected = np.array([[1, 1], + [1, 1], + [2, 2]]) + assert_equal(expected, result) + + def test_block_complicated(self, block): + # a bit more complicated + one_2d = np.array([[1, 1, 1]]) + two_2d = np.array([[2, 2, 2]]) + three_2d = np.array([[3, 3, 3, 3, 3, 3]]) + four_1d = np.array([4, 4, 4, 4, 4, 4]) + five_0d = np.array(5) + six_1d = np.array([6, 6, 6, 6, 6]) + zero_2d = np.zeros((2, 6)) + + expected = np.array([[1, 1, 1, 2, 2, 2], + [3, 3, 3, 3, 3, 3], + [4, 4, 4, 4, 4, 4], + [5, 6, 6, 6, 6, 6], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0]]) + + result = block([[one_2d, two_2d], + [three_2d], + [four_1d], + [five_0d, six_1d], + [zero_2d]]) + assert_equal(result, expected) + + def test_nested(self, block): + one = np.array([1, 1, 1]) + two = np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]]) + three = np.array([3, 3, 3]) + four = np.array([4, 4, 4]) + five = np.array(5) + six = np.array([6, 6, 6, 6, 6]) + zero = np.zeros((2, 6)) + + result = block([ + [ + block([ + [one], + [three], + [four] + ]), + two + ], + [five, six], + [zero] + ]) + expected = np.array([[1, 1, 1, 2, 2, 2], + [3, 3, 3, 2, 2, 2], + [4, 4, 4, 2, 2, 2], + [5, 6, 6, 6, 6, 6], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0]]) + + assert_equal(result, expected) + + def test_3d(self, block): + a000 = np.ones((2, 2, 2), int) * 1 + + a100 = np.ones((3, 2, 2), int) * 2 + a010 = np.ones((2, 3, 2), int) * 3 + a001 = np.ones((2, 2, 3), int) * 4 + + a011 = np.ones((2, 3, 3), int) * 5 + a101 = np.ones((3, 2, 3), int) * 6 + a110 = np.ones((3, 3, 2), int) * 7 + + a111 = np.ones((3, 3, 3), int) * 8 + + result = block([ + [ + [a000, a001], + [a010, a011], + ], + [ + [a100, a101], + [a110, a111], + ] + ]) + expected = array([[[1, 1, 4, 4, 4], + [1, 1, 4, 4, 4], + [3, 3, 5, 5, 5], + [3, 3, 5, 5, 5], + [3, 3, 5, 5, 5]], + + [[1, 1, 4, 4, 4], + [1, 1, 4, 4, 4], + [3, 3, 5, 5, 5], + [3, 3, 5, 5, 5], + [3, 3, 5, 5, 5]], + + [[2, 2, 6, 6, 6], + [2, 2, 6, 6, 6], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8]], + + [[2, 2, 6, 6, 6], + [2, 2, 6, 6, 6], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8]], + + [[2, 2, 6, 6, 6], + [2, 2, 6, 6, 6], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8], + [7, 7, 8, 8, 8]]]) + + assert_array_equal(result, expected) + + def test_block_with_mismatched_shape(self, block): + a = np.array([0, 0]) + b = np.eye(2) + assert_raises(ValueError, block, [a, b]) + assert_raises(ValueError, block, [b, a]) + + to_block = [[np.ones((2,3)), np.ones((2,2))], + [np.ones((2,2)), np.ones((2,2))]] + assert_raises(ValueError, block, to_block) + def test_no_lists(self, block): + assert_equal(block(1), np.array(1)) + assert_equal(block(np.eye(3)), np.eye(3)) + + def test_invalid_nesting(self, block): + msg = 'depths are mismatched' + assert_raises_regex(ValueError, msg, block, [1, [2]]) + assert_raises_regex(ValueError, msg, block, [1, []]) + assert_raises_regex(ValueError, msg, block, [[1], 2]) + assert_raises_regex(ValueError, msg, block, [[], 2]) + assert_raises_regex(ValueError, msg, block, [ + [[1], [2]], + [[3, 4]], + [5] # missing brackets + ]) + + def test_empty_lists(self, block): + assert_raises_regex(ValueError, 'empty', block, []) + assert_raises_regex(ValueError, 'empty', block, [[]]) + assert_raises_regex(ValueError, 'empty', block, [[1], []]) + + def test_tuple(self, block): + assert_raises_regex(TypeError, 'tuple', block, ([1, 2], [3, 4])) + assert_raises_regex(TypeError, 'tuple', block, [(1, 2), (3, 4)]) + + def test_different_ndims(self, block): + a = 1. + b = 2 * np.ones((1, 2)) + c = 3 * np.ones((1, 1, 3)) + + result = block([a, b, c]) + expected = np.array([[[1., 2., 2., 3., 3., 3.]]]) + + assert_equal(result, expected) + + def test_different_ndims_depths(self, block): + a = 1. + b = 2 * np.ones((1, 2)) + c = 3 * np.ones((1, 2, 3)) + + result = block([[a, b], [c]]) + expected = np.array([[[1., 2., 2.], + [3., 3., 3.], + [3., 3., 3.]]]) + + assert_equal(result, expected) + + def test_block_memory_order(self, block): + # 3D + arr_c = np.zeros((3,)*3, order='C') + arr_f = np.zeros((3,)*3, order='F') + + b_c = [[[arr_c, arr_c], + [arr_c, arr_c]], + [[arr_c, arr_c], + [arr_c, arr_c]]] + + b_f = [[[arr_f, arr_f], + [arr_f, arr_f]], + [[arr_f, arr_f], + [arr_f, arr_f]]] + + assert block(b_c).flags['C_CONTIGUOUS'] + assert block(b_f).flags['F_CONTIGUOUS'] + + arr_c = np.zeros((3, 3), order='C') + arr_f = np.zeros((3, 3), order='F') + # 2D + b_c = [[arr_c, arr_c], + [arr_c, arr_c]] + + b_f = [[arr_f, arr_f], + [arr_f, arr_f]] + + assert block(b_c).flags['C_CONTIGUOUS'] + assert block(b_f).flags['F_CONTIGUOUS'] + + +def test_block_dispatcher(): + class ArrayLike(object): + pass + a = ArrayLike() + b = ArrayLike() + c = ArrayLike() + assert_equal(list(_block_dispatcher(a)), [a]) + assert_equal(list(_block_dispatcher([a])), [a]) + assert_equal(list(_block_dispatcher([a, b])), [a, b]) + assert_equal(list(_block_dispatcher([[a], [b, [c]]])), [a, b, c]) + # don't recurse into non-lists + assert_equal(list(_block_dispatcher((a, b))), [(a, b)]) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_ufunc.py b/contrib/python/numpy/py2/numpy/core/tests/test_ufunc.py new file mode 100644 index 0000000000..b83b8ccffa --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_ufunc.py @@ -0,0 +1,1859 @@ +from __future__ import division, absolute_import, print_function + +import warnings +import itertools + +import numpy as np +import numpy.core._umath_tests as umt +import numpy.linalg._umath_linalg as uml +import numpy.core._operand_flag_tests as opflag_tests +import numpy.core._rational_tests as _rational_tests +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_array_equal, + assert_almost_equal, assert_array_almost_equal, assert_no_warnings, + assert_allclose, + ) +from numpy.core.numeric import pickle + + +class TestUfuncKwargs(object): + def test_kwarg_exact(self): + assert_raises(TypeError, np.add, 1, 2, castingx='safe') + assert_raises(TypeError, np.add, 1, 2, dtypex=int) + assert_raises(TypeError, np.add, 1, 2, extobjx=[4096]) + assert_raises(TypeError, np.add, 1, 2, outx=None) + assert_raises(TypeError, np.add, 1, 2, sigx='ii->i') + assert_raises(TypeError, np.add, 1, 2, signaturex='ii->i') + assert_raises(TypeError, np.add, 1, 2, subokx=False) + assert_raises(TypeError, np.add, 1, 2, wherex=[True]) + + def test_sig_signature(self): + assert_raises(ValueError, np.add, 1, 2, sig='ii->i', + signature='ii->i') + + def test_sig_dtype(self): + assert_raises(RuntimeError, np.add, 1, 2, sig='ii->i', + dtype=int) + assert_raises(RuntimeError, np.add, 1, 2, signature='ii->i', + dtype=int) + + def test_extobj_refcount(self): + # Should not segfault with USE_DEBUG. + assert_raises(TypeError, np.add, 1, 2, extobj=[4096], parrot=True) + + +class TestUfunc(object): + def test_pickle(self): + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + assert_(pickle.loads(pickle.dumps(np.sin, + protocol=proto)) is np.sin) + + # Check that ufunc not defined in the top level numpy namespace + # such as numpy.core._rational_tests.test_add can also be pickled + res = pickle.loads(pickle.dumps(_rational_tests.test_add, + protocol=proto)) + assert_(res is _rational_tests.test_add) + + def test_pickle_withstring(self): + astring = (b"cnumpy.core\n_ufunc_reconstruct\np0\n" + b"(S'numpy.core.umath'\np1\nS'cos'\np2\ntp3\nRp4\n.") + assert_(pickle.loads(astring) is np.cos) + + def test_reduceat_shifting_sum(self): + L = 6 + x = np.arange(L) + idx = np.array(list(zip(np.arange(L - 2), np.arange(L - 2) + 2))).ravel() + assert_array_equal(np.add.reduceat(x, idx)[::2], [1, 3, 5, 7]) + + def test_generic_loops(self): + """Test generic loops. + + The loops to be tested are: + + PyUFunc_ff_f_As_dd_d + PyUFunc_ff_f + PyUFunc_dd_d + PyUFunc_gg_g + PyUFunc_FF_F_As_DD_D + PyUFunc_DD_D + PyUFunc_FF_F + PyUFunc_GG_G + PyUFunc_OO_O + PyUFunc_OO_O_method + PyUFunc_f_f_As_d_d + PyUFunc_d_d + PyUFunc_f_f + PyUFunc_g_g + PyUFunc_F_F_As_D_D + PyUFunc_F_F + PyUFunc_D_D + PyUFunc_G_G + PyUFunc_O_O + PyUFunc_O_O_method + PyUFunc_On_Om + + Where: + + f -- float + d -- double + g -- long double + F -- complex float + D -- complex double + G -- complex long double + O -- python object + + It is difficult to assure that each of these loops is entered from the + Python level as the special cased loops are a moving target and the + corresponding types are architecture dependent. We probably need to + define C level testing ufuncs to get at them. For the time being, I've + just looked at the signatures registered in the build directory to find + relevant functions. + + Fixme, currently untested: + + PyUFunc_ff_f_As_dd_d + PyUFunc_FF_F_As_DD_D + PyUFunc_f_f_As_d_d + PyUFunc_F_F_As_D_D + PyUFunc_On_Om + + """ + fone = np.exp + ftwo = lambda x, y: x**y + fone_val = 1 + ftwo_val = 1 + # check unary PyUFunc_f_f. + msg = "PyUFunc_f_f" + x = np.zeros(10, dtype=np.single)[0::2] + assert_almost_equal(fone(x), fone_val, err_msg=msg) + # check unary PyUFunc_d_d. + msg = "PyUFunc_d_d" + x = np.zeros(10, dtype=np.double)[0::2] + assert_almost_equal(fone(x), fone_val, err_msg=msg) + # check unary PyUFunc_g_g. + msg = "PyUFunc_g_g" + x = np.zeros(10, dtype=np.longdouble)[0::2] + assert_almost_equal(fone(x), fone_val, err_msg=msg) + # check unary PyUFunc_F_F. + msg = "PyUFunc_F_F" + x = np.zeros(10, dtype=np.csingle)[0::2] + assert_almost_equal(fone(x), fone_val, err_msg=msg) + # check unary PyUFunc_D_D. + msg = "PyUFunc_D_D" + x = np.zeros(10, dtype=np.cdouble)[0::2] + assert_almost_equal(fone(x), fone_val, err_msg=msg) + # check unary PyUFunc_G_G. + msg = "PyUFunc_G_G" + x = np.zeros(10, dtype=np.clongdouble)[0::2] + assert_almost_equal(fone(x), fone_val, err_msg=msg) + + # check binary PyUFunc_ff_f. + msg = "PyUFunc_ff_f" + x = np.ones(10, dtype=np.single)[0::2] + assert_almost_equal(ftwo(x, x), ftwo_val, err_msg=msg) + # check binary PyUFunc_dd_d. + msg = "PyUFunc_dd_d" + x = np.ones(10, dtype=np.double)[0::2] + assert_almost_equal(ftwo(x, x), ftwo_val, err_msg=msg) + # check binary PyUFunc_gg_g. + msg = "PyUFunc_gg_g" + x = np.ones(10, dtype=np.longdouble)[0::2] + assert_almost_equal(ftwo(x, x), ftwo_val, err_msg=msg) + # check binary PyUFunc_FF_F. + msg = "PyUFunc_FF_F" + x = np.ones(10, dtype=np.csingle)[0::2] + assert_almost_equal(ftwo(x, x), ftwo_val, err_msg=msg) + # check binary PyUFunc_DD_D. + msg = "PyUFunc_DD_D" + x = np.ones(10, dtype=np.cdouble)[0::2] + assert_almost_equal(ftwo(x, x), ftwo_val, err_msg=msg) + # check binary PyUFunc_GG_G. + msg = "PyUFunc_GG_G" + x = np.ones(10, dtype=np.clongdouble)[0::2] + assert_almost_equal(ftwo(x, x), ftwo_val, err_msg=msg) + + # class to use in testing object method loops + class foo(object): + def conjugate(self): + return np.bool_(1) + + def logical_xor(self, obj): + return np.bool_(1) + + # check unary PyUFunc_O_O + msg = "PyUFunc_O_O" + x = np.ones(10, dtype=object)[0::2] + assert_(np.all(np.abs(x) == 1), msg) + # check unary PyUFunc_O_O_method + msg = "PyUFunc_O_O_method" + x = np.zeros(10, dtype=object)[0::2] + for i in range(len(x)): + x[i] = foo() + assert_(np.all(np.conjugate(x) == True), msg) + + # check binary PyUFunc_OO_O + msg = "PyUFunc_OO_O" + x = np.ones(10, dtype=object)[0::2] + assert_(np.all(np.add(x, x) == 2), msg) + # check binary PyUFunc_OO_O_method + msg = "PyUFunc_OO_O_method" + x = np.zeros(10, dtype=object)[0::2] + for i in range(len(x)): + x[i] = foo() + assert_(np.all(np.logical_xor(x, x)), msg) + + # check PyUFunc_On_Om + # fixme -- I don't know how to do this yet + + def test_all_ufunc(self): + """Try to check presence and results of all ufuncs. + + The list of ufuncs comes from generate_umath.py and is as follows: + + ===== ==== ============= =============== ======================== + done args function types notes + ===== ==== ============= =============== ======================== + n 1 conjugate nums + O + n 1 absolute nums + O complex -> real + n 1 negative nums + O + n 1 sign nums + O -> int + n 1 invert bool + ints + O flts raise an error + n 1 degrees real + M cmplx raise an error + n 1 radians real + M cmplx raise an error + n 1 arccos flts + M + n 1 arccosh flts + M + n 1 arcsin flts + M + n 1 arcsinh flts + M + n 1 arctan flts + M + n 1 arctanh flts + M + n 1 cos flts + M + n 1 sin flts + M + n 1 tan flts + M + n 1 cosh flts + M + n 1 sinh flts + M + n 1 tanh flts + M + n 1 exp flts + M + n 1 expm1 flts + M + n 1 log flts + M + n 1 log10 flts + M + n 1 log1p flts + M + n 1 sqrt flts + M real x < 0 raises error + n 1 ceil real + M + n 1 trunc real + M + n 1 floor real + M + n 1 fabs real + M + n 1 rint flts + M + n 1 isnan flts -> bool + n 1 isinf flts -> bool + n 1 isfinite flts -> bool + n 1 signbit real -> bool + n 1 modf real -> (frac, int) + n 1 logical_not bool + nums + M -> bool + n 2 left_shift ints + O flts raise an error + n 2 right_shift ints + O flts raise an error + n 2 add bool + nums + O boolean + is || + n 2 subtract bool + nums + O boolean - is ^ + n 2 multiply bool + nums + O boolean * is & + n 2 divide nums + O + n 2 floor_divide nums + O + n 2 true_divide nums + O bBhH -> f, iIlLqQ -> d + n 2 fmod nums + M + n 2 power nums + O + n 2 greater bool + nums + O -> bool + n 2 greater_equal bool + nums + O -> bool + n 2 less bool + nums + O -> bool + n 2 less_equal bool + nums + O -> bool + n 2 equal bool + nums + O -> bool + n 2 not_equal bool + nums + O -> bool + n 2 logical_and bool + nums + M -> bool + n 2 logical_or bool + nums + M -> bool + n 2 logical_xor bool + nums + M -> bool + n 2 maximum bool + nums + O + n 2 minimum bool + nums + O + n 2 bitwise_and bool + ints + O flts raise an error + n 2 bitwise_or bool + ints + O flts raise an error + n 2 bitwise_xor bool + ints + O flts raise an error + n 2 arctan2 real + M + n 2 remainder ints + real + O + n 2 hypot real + M + ===== ==== ============= =============== ======================== + + Types other than those listed will be accepted, but they are cast to + the smallest compatible type for which the function is defined. The + casting rules are: + + bool -> int8 -> float32 + ints -> double + + """ + pass + + # from include/numpy/ufuncobject.h + size_inferred = 2 + can_ignore = 4 + def test_signature0(self): + # the arguments to test_signature are: nin, nout, core_signature + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, "(i),(i)->()") + assert_equal(enabled, 1) + assert_equal(num_dims, (1, 1, 0)) + assert_equal(ixs, (0, 0)) + assert_equal(flags, (self.size_inferred,)) + assert_equal(sizes, (-1,)) + + def test_signature1(self): + # empty core signature; treat as plain ufunc (with trivial core) + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, "(),()->()") + assert_equal(enabled, 0) + assert_equal(num_dims, (0, 0, 0)) + assert_equal(ixs, ()) + assert_equal(flags, ()) + assert_equal(sizes, ()) + + def test_signature2(self): + # more complicated names for variables + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, "(i1,i2),(J_1)->(_kAB)") + assert_equal(enabled, 1) + assert_equal(num_dims, (2, 1, 1)) + assert_equal(ixs, (0, 1, 2, 3)) + assert_equal(flags, (self.size_inferred,)*4) + assert_equal(sizes, (-1, -1, -1, -1)) + + def test_signature3(self): + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, u"(i1, i12), (J_1)->(i12, i2)") + assert_equal(enabled, 1) + assert_equal(num_dims, (2, 1, 2)) + assert_equal(ixs, (0, 1, 2, 1, 3)) + assert_equal(flags, (self.size_inferred,)*4) + assert_equal(sizes, (-1, -1, -1, -1)) + + def test_signature4(self): + # matrix_multiply signature from _umath_tests + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, "(n,k),(k,m)->(n,m)") + assert_equal(enabled, 1) + assert_equal(num_dims, (2, 2, 2)) + assert_equal(ixs, (0, 1, 1, 2, 0, 2)) + assert_equal(flags, (self.size_inferred,)*3) + assert_equal(sizes, (-1, -1, -1)) + + def test_signature5(self): + # matmul signature from _umath_tests + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 2, 1, "(n?,k),(k,m?)->(n?,m?)") + assert_equal(enabled, 1) + assert_equal(num_dims, (2, 2, 2)) + assert_equal(ixs, (0, 1, 1, 2, 0, 2)) + assert_equal(flags, (self.size_inferred | self.can_ignore, + self.size_inferred, + self.size_inferred | self.can_ignore)) + assert_equal(sizes, (-1, -1, -1)) + + def test_signature6(self): + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 1, 1, "(3)->()") + assert_equal(enabled, 1) + assert_equal(num_dims, (1, 0)) + assert_equal(ixs, (0,)) + assert_equal(flags, (0,)) + assert_equal(sizes, (3,)) + + def test_signature7(self): + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 3, 1, "(3),(03,3),(n)->(9)") + assert_equal(enabled, 1) + assert_equal(num_dims, (1, 2, 1, 1)) + assert_equal(ixs, (0, 0, 0, 1, 2)) + assert_equal(flags, (0, self.size_inferred, 0)) + assert_equal(sizes, (3, -1, 9)) + + def test_signature8(self): + enabled, num_dims, ixs, flags, sizes = umt.test_signature( + 3, 1, "(3?),(3?,3?),(n)->(9)") + assert_equal(enabled, 1) + assert_equal(num_dims, (1, 2, 1, 1)) + assert_equal(ixs, (0, 0, 0, 1, 2)) + assert_equal(flags, (self.can_ignore, self.size_inferred, 0)) + assert_equal(sizes, (3, -1, 9)) + + def test_signature_failure0(self): + # in the following calls, a ValueError should be raised because + # of error in core signature + # FIXME These should be using assert_raises + + # error: extra parenthesis + msg = "core_sig: extra parenthesis" + try: + ret = umt.test_signature(2, 1, "((i)),(i)->()") + assert_equal(ret, None, err_msg=msg) + except ValueError: + pass + + def test_signature_failure1(self): + # error: parenthesis matching + msg = "core_sig: parenthesis matching" + try: + ret = umt.test_signature(2, 1, "(i),)i(->()") + assert_equal(ret, None, err_msg=msg) + except ValueError: + pass + + def test_signature_failure2(self): + # error: incomplete signature. letters outside of parenthesis are ignored + msg = "core_sig: incomplete signature" + try: + ret = umt.test_signature(2, 1, "(i),->()") + assert_equal(ret, None, err_msg=msg) + except ValueError: + pass + + def test_signature_failure3(self): + # error: incomplete signature. 2 output arguments are specified + msg = "core_sig: incomplete signature" + try: + ret = umt.test_signature(2, 2, "(i),(i)->()") + assert_equal(ret, None, err_msg=msg) + except ValueError: + pass + + def test_get_signature(self): + assert_equal(umt.inner1d.signature, "(i),(i)->()") + + def test_forced_sig(self): + a = 0.5*np.arange(3, dtype='f8') + assert_equal(np.add(a, 0.5), [0.5, 1, 1.5]) + assert_equal(np.add(a, 0.5, sig='i', casting='unsafe'), [0, 0, 1]) + assert_equal(np.add(a, 0.5, sig='ii->i', casting='unsafe'), [0, 0, 1]) + assert_equal(np.add(a, 0.5, sig=('i4',), casting='unsafe'), [0, 0, 1]) + assert_equal(np.add(a, 0.5, sig=('i4', 'i4', 'i4'), + casting='unsafe'), [0, 0, 1]) + + b = np.zeros((3,), dtype='f8') + np.add(a, 0.5, out=b) + assert_equal(b, [0.5, 1, 1.5]) + b[:] = 0 + np.add(a, 0.5, sig='i', out=b, casting='unsafe') + assert_equal(b, [0, 0, 1]) + b[:] = 0 + np.add(a, 0.5, sig='ii->i', out=b, casting='unsafe') + assert_equal(b, [0, 0, 1]) + b[:] = 0 + np.add(a, 0.5, sig=('i4',), out=b, casting='unsafe') + assert_equal(b, [0, 0, 1]) + b[:] = 0 + np.add(a, 0.5, sig=('i4', 'i4', 'i4'), out=b, casting='unsafe') + assert_equal(b, [0, 0, 1]) + + def test_true_divide(self): + a = np.array(10) + b = np.array(20) + tgt = np.array(0.5) + + for tc in 'bhilqBHILQefdgFDG': + dt = np.dtype(tc) + aa = a.astype(dt) + bb = b.astype(dt) + + # Check result value and dtype. + for x, y in itertools.product([aa, -aa], [bb, -bb]): + + # Check with no output type specified + if tc in 'FDG': + tgt = complex(x)/complex(y) + else: + tgt = float(x)/float(y) + + res = np.true_divide(x, y) + rtol = max(np.finfo(res).resolution, 1e-15) + assert_allclose(res, tgt, rtol=rtol) + + if tc in 'bhilqBHILQ': + assert_(res.dtype.name == 'float64') + else: + assert_(res.dtype.name == dt.name ) + + # Check with output type specified. This also checks for the + # incorrect casts in issue gh-3484 because the unary '-' does + # not change types, even for unsigned types, Hence casts in the + # ufunc from signed to unsigned and vice versa will lead to + # errors in the values. + for tcout in 'bhilqBHILQ': + dtout = np.dtype(tcout) + assert_raises(TypeError, np.true_divide, x, y, dtype=dtout) + + for tcout in 'efdg': + dtout = np.dtype(tcout) + if tc in 'FDG': + # Casting complex to float is not allowed + assert_raises(TypeError, np.true_divide, x, y, dtype=dtout) + else: + tgt = float(x)/float(y) + rtol = max(np.finfo(dtout).resolution, 1e-15) + atol = max(np.finfo(dtout).tiny, 3e-308) + # Some test values result in invalid for float16. + with np.errstate(invalid='ignore'): + res = np.true_divide(x, y, dtype=dtout) + if not np.isfinite(res) and tcout == 'e': + continue + assert_allclose(res, tgt, rtol=rtol, atol=atol) + assert_(res.dtype.name == dtout.name) + + for tcout in 'FDG': + dtout = np.dtype(tcout) + tgt = complex(x)/complex(y) + rtol = max(np.finfo(dtout).resolution, 1e-15) + atol = max(np.finfo(dtout).tiny, 3e-308) + res = np.true_divide(x, y, dtype=dtout) + if not np.isfinite(res): + continue + assert_allclose(res, tgt, rtol=rtol, atol=atol) + assert_(res.dtype.name == dtout.name) + + # Check booleans + a = np.ones((), dtype=np.bool_) + res = np.true_divide(a, a) + assert_(res == 1.0) + assert_(res.dtype.name == 'float64') + res = np.true_divide(~a, a) + assert_(res == 0.0) + assert_(res.dtype.name == 'float64') + + def test_sum_stability(self): + a = np.ones(500, dtype=np.float32) + assert_almost_equal((a / 10.).sum() - a.size / 10., 0, 4) + + a = np.ones(500, dtype=np.float64) + assert_almost_equal((a / 10.).sum() - a.size / 10., 0, 13) + + def test_sum(self): + for dt in (int, np.float16, np.float32, np.float64, np.longdouble): + for v in (0, 1, 2, 7, 8, 9, 15, 16, 19, 127, + 128, 1024, 1235): + tgt = dt(v * (v + 1) / 2) + d = np.arange(1, v + 1, dtype=dt) + + # warning if sum overflows, which it does in float16 + overflow = not np.isfinite(tgt) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert_almost_equal(np.sum(d), tgt) + assert_equal(len(w), 1 * overflow) + + assert_almost_equal(np.sum(d[::-1]), tgt) + assert_equal(len(w), 2 * overflow) + + d = np.ones(500, dtype=dt) + assert_almost_equal(np.sum(d[::2]), 250.) + assert_almost_equal(np.sum(d[1::2]), 250.) + assert_almost_equal(np.sum(d[::3]), 167.) + assert_almost_equal(np.sum(d[1::3]), 167.) + assert_almost_equal(np.sum(d[::-2]), 250.) + assert_almost_equal(np.sum(d[-1::-2]), 250.) + assert_almost_equal(np.sum(d[::-3]), 167.) + assert_almost_equal(np.sum(d[-1::-3]), 167.) + # sum with first reduction entry != 0 + d = np.ones((1,), dtype=dt) + d += d + assert_almost_equal(d, 2.) + + def test_sum_complex(self): + for dt in (np.complex64, np.complex128, np.clongdouble): + for v in (0, 1, 2, 7, 8, 9, 15, 16, 19, 127, + 128, 1024, 1235): + tgt = dt(v * (v + 1) / 2) - dt((v * (v + 1) / 2) * 1j) + d = np.empty(v, dtype=dt) + d.real = np.arange(1, v + 1) + d.imag = -np.arange(1, v + 1) + assert_almost_equal(np.sum(d), tgt) + assert_almost_equal(np.sum(d[::-1]), tgt) + + d = np.ones(500, dtype=dt) + 1j + assert_almost_equal(np.sum(d[::2]), 250. + 250j) + assert_almost_equal(np.sum(d[1::2]), 250. + 250j) + assert_almost_equal(np.sum(d[::3]), 167. + 167j) + assert_almost_equal(np.sum(d[1::3]), 167. + 167j) + assert_almost_equal(np.sum(d[::-2]), 250. + 250j) + assert_almost_equal(np.sum(d[-1::-2]), 250. + 250j) + assert_almost_equal(np.sum(d[::-3]), 167. + 167j) + assert_almost_equal(np.sum(d[-1::-3]), 167. + 167j) + # sum with first reduction entry != 0 + d = np.ones((1,), dtype=dt) + 1j + d += d + assert_almost_equal(d, 2. + 2j) + + def test_sum_initial(self): + # Integer, single axis + assert_equal(np.sum([3], initial=2), 5) + + # Floating point + assert_almost_equal(np.sum([0.2], initial=0.1), 0.3) + + # Multiple non-adjacent axes + assert_equal(np.sum(np.ones((2, 3, 5), dtype=np.int64), axis=(0, 2), initial=2), + [12, 12, 12]) + + def test_inner1d(self): + a = np.arange(6).reshape((2, 3)) + assert_array_equal(umt.inner1d(a, a), np.sum(a*a, axis=-1)) + a = np.arange(6) + assert_array_equal(umt.inner1d(a, a), np.sum(a*a)) + + def test_broadcast(self): + msg = "broadcast" + a = np.arange(4).reshape((2, 1, 2)) + b = np.arange(4).reshape((1, 2, 2)) + assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg) + msg = "extend & broadcast loop dimensions" + b = np.arange(4).reshape((2, 2)) + assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg) + # Broadcast in core dimensions should fail + a = np.arange(8).reshape((4, 2)) + b = np.arange(4).reshape((4, 1)) + assert_raises(ValueError, umt.inner1d, a, b) + # Extend core dimensions should fail + a = np.arange(8).reshape((4, 2)) + b = np.array(7) + assert_raises(ValueError, umt.inner1d, a, b) + # Broadcast should fail + a = np.arange(2).reshape((2, 1, 1)) + b = np.arange(3).reshape((3, 1, 1)) + assert_raises(ValueError, umt.inner1d, a, b) + + def test_type_cast(self): + msg = "type cast" + a = np.arange(6, dtype='short').reshape((2, 3)) + assert_array_equal(umt.inner1d(a, a), np.sum(a*a, axis=-1), + err_msg=msg) + msg = "type cast on one argument" + a = np.arange(6).reshape((2, 3)) + b = a + 0.1 + assert_array_almost_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), + err_msg=msg) + + def test_endian(self): + msg = "big endian" + a = np.arange(6, dtype='>i4').reshape((2, 3)) + assert_array_equal(umt.inner1d(a, a), np.sum(a*a, axis=-1), + err_msg=msg) + msg = "little endian" + a = np.arange(6, dtype='<i4').reshape((2, 3)) + assert_array_equal(umt.inner1d(a, a), np.sum(a*a, axis=-1), + err_msg=msg) + + # Output should always be native-endian + Ba = np.arange(1, dtype='>f8') + La = np.arange(1, dtype='<f8') + assert_equal((Ba+Ba).dtype, np.dtype('f8')) + assert_equal((Ba+La).dtype, np.dtype('f8')) + assert_equal((La+Ba).dtype, np.dtype('f8')) + assert_equal((La+La).dtype, np.dtype('f8')) + + assert_equal(np.absolute(La).dtype, np.dtype('f8')) + assert_equal(np.absolute(Ba).dtype, np.dtype('f8')) + assert_equal(np.negative(La).dtype, np.dtype('f8')) + assert_equal(np.negative(Ba).dtype, np.dtype('f8')) + + def test_incontiguous_array(self): + msg = "incontiguous memory layout of array" + x = np.arange(64).reshape((2, 2, 2, 2, 2, 2)) + a = x[:, 0,:, 0,:, 0] + b = x[:, 1,:, 1,:, 1] + a[0, 0, 0] = -1 + msg2 = "make sure it references to the original array" + assert_equal(x[0, 0, 0, 0, 0, 0], -1, err_msg=msg2) + assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg) + x = np.arange(24).reshape(2, 3, 4) + a = x.T + b = x.T + a[0, 0, 0] = -1 + assert_equal(x[0, 0, 0], -1, err_msg=msg2) + assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg) + + def test_output_argument(self): + msg = "output argument" + a = np.arange(12).reshape((2, 3, 2)) + b = np.arange(4).reshape((2, 1, 2)) + 1 + c = np.zeros((2, 3), dtype='int') + umt.inner1d(a, b, c) + assert_array_equal(c, np.sum(a*b, axis=-1), err_msg=msg) + c[:] = -1 + umt.inner1d(a, b, out=c) + assert_array_equal(c, np.sum(a*b, axis=-1), err_msg=msg) + + msg = "output argument with type cast" + c = np.zeros((2, 3), dtype='int16') + umt.inner1d(a, b, c) + assert_array_equal(c, np.sum(a*b, axis=-1), err_msg=msg) + c[:] = -1 + umt.inner1d(a, b, out=c) + assert_array_equal(c, np.sum(a*b, axis=-1), err_msg=msg) + + msg = "output argument with incontiguous layout" + c = np.zeros((2, 3, 4), dtype='int16') + umt.inner1d(a, b, c[..., 0]) + assert_array_equal(c[..., 0], np.sum(a*b, axis=-1), err_msg=msg) + c[:] = -1 + umt.inner1d(a, b, out=c[..., 0]) + assert_array_equal(c[..., 0], np.sum(a*b, axis=-1), err_msg=msg) + + def test_axes_argument(self): + # inner1d signature: '(i),(i)->()' + inner1d = umt.inner1d + a = np.arange(27.).reshape((3, 3, 3)) + b = np.arange(10., 19.).reshape((3, 1, 3)) + # basic tests on inputs (outputs tested below with matrix_multiply). + c = inner1d(a, b) + assert_array_equal(c, (a * b).sum(-1)) + # default + c = inner1d(a, b, axes=[(-1,), (-1,), ()]) + assert_array_equal(c, (a * b).sum(-1)) + # integers ok for single axis. + c = inner1d(a, b, axes=[-1, -1, ()]) + assert_array_equal(c, (a * b).sum(-1)) + # mix fine + c = inner1d(a, b, axes=[(-1,), -1, ()]) + assert_array_equal(c, (a * b).sum(-1)) + # can omit last axis. + c = inner1d(a, b, axes=[-1, -1]) + assert_array_equal(c, (a * b).sum(-1)) + # can pass in other types of integer (with __index__ protocol) + c = inner1d(a, b, axes=[np.int8(-1), np.array(-1, dtype=np.int32)]) + assert_array_equal(c, (a * b).sum(-1)) + # swap some axes + c = inner1d(a, b, axes=[0, 0]) + assert_array_equal(c, (a * b).sum(0)) + c = inner1d(a, b, axes=[0, 2]) + assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1)) + # Check errors for improperly constructed axes arguments. + # should have list. + assert_raises(TypeError, inner1d, a, b, axes=-1) + # needs enough elements + assert_raises(ValueError, inner1d, a, b, axes=[-1]) + # should pass in indices. + assert_raises(TypeError, inner1d, a, b, axes=[-1.0, -1.0]) + assert_raises(TypeError, inner1d, a, b, axes=[(-1.0,), -1]) + assert_raises(TypeError, inner1d, a, b, axes=[None, 1]) + # cannot pass an index unless there is only one dimension + # (output is wrong in this case) + assert_raises(TypeError, inner1d, a, b, axes=[-1, -1, -1]) + # or pass in generally the wrong number of axes + assert_raises(ValueError, inner1d, a, b, axes=[-1, -1, (-1,)]) + assert_raises(ValueError, inner1d, a, b, axes=[-1, (-2, -1), ()]) + # axes need to have same length. + assert_raises(ValueError, inner1d, a, b, axes=[0, 1]) + + # matrix_multiply signature: '(m,n),(n,p)->(m,p)' + mm = umt.matrix_multiply + a = np.arange(12).reshape((2, 3, 2)) + b = np.arange(8).reshape((2, 2, 2, 1)) + 1 + # Sanity check. + c = mm(a, b) + assert_array_equal(c, np.matmul(a, b)) + # Default axes. + c = mm(a, b, axes=[(-2, -1), (-2, -1), (-2, -1)]) + assert_array_equal(c, np.matmul(a, b)) + # Default with explicit axes. + c = mm(a, b, axes=[(1, 2), (2, 3), (2, 3)]) + assert_array_equal(c, np.matmul(a, b)) + # swap some axes. + c = mm(a, b, axes=[(0, -1), (1, 2), (-2, -1)]) + assert_array_equal(c, np.matmul(a.transpose(1, 0, 2), + b.transpose(0, 3, 1, 2))) + # Default with output array. + c = np.empty((2, 2, 3, 1)) + d = mm(a, b, out=c, axes=[(1, 2), (2, 3), (2, 3)]) + assert_(c is d) + assert_array_equal(c, np.matmul(a, b)) + # Transposed output array + c = np.empty((1, 2, 2, 3)) + d = mm(a, b, out=c, axes=[(-2, -1), (-2, -1), (3, 0)]) + assert_(c is d) + assert_array_equal(c, np.matmul(a, b).transpose(3, 0, 1, 2)) + # Check errors for improperly constructed axes arguments. + # wrong argument + assert_raises(TypeError, mm, a, b, axis=1) + # axes should be list + assert_raises(TypeError, mm, a, b, axes=1) + assert_raises(TypeError, mm, a, b, axes=((-2, -1), (-2, -1), (-2, -1))) + # list needs to have right length + assert_raises(ValueError, mm, a, b, axes=[]) + assert_raises(ValueError, mm, a, b, axes=[(-2, -1)]) + # list should contain tuples for multiple axes + assert_raises(TypeError, mm, a, b, axes=[-1, -1, -1]) + assert_raises(TypeError, mm, a, b, axes=[(-2, -1), (-2, -1), -1]) + assert_raises(TypeError, + mm, a, b, axes=[[-2, -1], [-2, -1], [-2, -1]]) + assert_raises(TypeError, + mm, a, b, axes=[(-2, -1), (-2, -1), [-2, -1]]) + assert_raises(TypeError, mm, a, b, axes=[(-2, -1), (-2, -1), None]) + # tuples should not have duplicated values + assert_raises(ValueError, mm, a, b, axes=[(-2, -1), (-2, -1), (-2, -2)]) + # arrays should have enough axes. + z = np.zeros((2, 2)) + assert_raises(ValueError, mm, z, z[0]) + assert_raises(ValueError, mm, z, z, out=z[:, 0]) + assert_raises(ValueError, mm, z[1], z, axes=[0, 1]) + assert_raises(ValueError, mm, z, z, out=z[0], axes=[0, 1]) + # Regular ufuncs should not accept axes. + assert_raises(TypeError, np.add, 1., 1., axes=[0]) + # should be able to deal with bad unrelated kwargs. + assert_raises(TypeError, mm, z, z, axes=[0, 1], parrot=True) + + def test_axis_argument(self): + # inner1d signature: '(i),(i)->()' + inner1d = umt.inner1d + a = np.arange(27.).reshape((3, 3, 3)) + b = np.arange(10., 19.).reshape((3, 1, 3)) + c = inner1d(a, b) + assert_array_equal(c, (a * b).sum(-1)) + c = inner1d(a, b, axis=-1) + assert_array_equal(c, (a * b).sum(-1)) + out = np.zeros_like(c) + d = inner1d(a, b, axis=-1, out=out) + assert_(d is out) + assert_array_equal(d, c) + c = inner1d(a, b, axis=0) + assert_array_equal(c, (a * b).sum(0)) + # Sanity checks on innerwt and cumsum. + a = np.arange(6).reshape((2, 3)) + b = np.arange(10, 16).reshape((2, 3)) + w = np.arange(20, 26).reshape((2, 3)) + assert_array_equal(umt.innerwt(a, b, w, axis=0), + np.sum(a * b * w, axis=0)) + assert_array_equal(umt.cumsum(a, axis=0), np.cumsum(a, axis=0)) + assert_array_equal(umt.cumsum(a, axis=-1), np.cumsum(a, axis=-1)) + out = np.empty_like(a) + b = umt.cumsum(a, out=out, axis=0) + assert_(out is b) + assert_array_equal(b, np.cumsum(a, axis=0)) + b = umt.cumsum(a, out=out, axis=1) + assert_(out is b) + assert_array_equal(b, np.cumsum(a, axis=-1)) + # Check errors. + # Cannot pass in both axis and axes. + assert_raises(TypeError, inner1d, a, b, axis=0, axes=[0, 0]) + # Not an integer. + assert_raises(TypeError, inner1d, a, b, axis=[0]) + # more than 1 core dimensions. + mm = umt.matrix_multiply + assert_raises(TypeError, mm, a, b, axis=1) + # Output wrong size in axis. + out = np.empty((1, 2, 3), dtype=a.dtype) + assert_raises(ValueError, umt.cumsum, a, out=out, axis=0) + # Regular ufuncs should not accept axis. + assert_raises(TypeError, np.add, 1., 1., axis=0) + + def test_keepdims_argument(self): + # inner1d signature: '(i),(i)->()' + inner1d = umt.inner1d + a = np.arange(27.).reshape((3, 3, 3)) + b = np.arange(10., 19.).reshape((3, 1, 3)) + c = inner1d(a, b) + assert_array_equal(c, (a * b).sum(-1)) + c = inner1d(a, b, keepdims=False) + assert_array_equal(c, (a * b).sum(-1)) + c = inner1d(a, b, keepdims=True) + assert_array_equal(c, (a * b).sum(-1, keepdims=True)) + out = np.zeros_like(c) + d = inner1d(a, b, keepdims=True, out=out) + assert_(d is out) + assert_array_equal(d, c) + # Now combined with axis and axes. + c = inner1d(a, b, axis=-1, keepdims=False) + assert_array_equal(c, (a * b).sum(-1, keepdims=False)) + c = inner1d(a, b, axis=-1, keepdims=True) + assert_array_equal(c, (a * b).sum(-1, keepdims=True)) + c = inner1d(a, b, axis=0, keepdims=False) + assert_array_equal(c, (a * b).sum(0, keepdims=False)) + c = inner1d(a, b, axis=0, keepdims=True) + assert_array_equal(c, (a * b).sum(0, keepdims=True)) + c = inner1d(a, b, axes=[(-1,), (-1,), ()], keepdims=False) + assert_array_equal(c, (a * b).sum(-1)) + c = inner1d(a, b, axes=[(-1,), (-1,), (-1,)], keepdims=True) + assert_array_equal(c, (a * b).sum(-1, keepdims=True)) + c = inner1d(a, b, axes=[0, 0], keepdims=False) + assert_array_equal(c, (a * b).sum(0)) + c = inner1d(a, b, axes=[0, 0, 0], keepdims=True) + assert_array_equal(c, (a * b).sum(0, keepdims=True)) + c = inner1d(a, b, axes=[0, 2], keepdims=False) + assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1)) + c = inner1d(a, b, axes=[0, 2], keepdims=True) + assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1, + keepdims=True)) + c = inner1d(a, b, axes=[0, 2, 2], keepdims=True) + assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1, + keepdims=True)) + c = inner1d(a, b, axes=[0, 2, 0], keepdims=True) + assert_array_equal(c, (a * b.transpose(2, 0, 1)).sum(0, keepdims=True)) + # Hardly useful, but should work. + c = inner1d(a, b, axes=[0, 2, 1], keepdims=True) + assert_array_equal(c, (a.transpose(1, 0, 2) * b.transpose(0, 2, 1)) + .sum(1, keepdims=True)) + # Check with two core dimensions. + a = np.eye(3) * np.arange(4.)[:, np.newaxis, np.newaxis] + expected = uml.det(a) + c = uml.det(a, keepdims=False) + assert_array_equal(c, expected) + c = uml.det(a, keepdims=True) + assert_array_equal(c, expected[:, np.newaxis, np.newaxis]) + a = np.eye(3) * np.arange(4.)[:, np.newaxis, np.newaxis] + expected_s, expected_l = uml.slogdet(a) + cs, cl = uml.slogdet(a, keepdims=False) + assert_array_equal(cs, expected_s) + assert_array_equal(cl, expected_l) + cs, cl = uml.slogdet(a, keepdims=True) + assert_array_equal(cs, expected_s[:, np.newaxis, np.newaxis]) + assert_array_equal(cl, expected_l[:, np.newaxis, np.newaxis]) + # Sanity check on innerwt. + a = np.arange(6).reshape((2, 3)) + b = np.arange(10, 16).reshape((2, 3)) + w = np.arange(20, 26).reshape((2, 3)) + assert_array_equal(umt.innerwt(a, b, w, keepdims=True), + np.sum(a * b * w, axis=-1, keepdims=True)) + assert_array_equal(umt.innerwt(a, b, w, axis=0, keepdims=True), + np.sum(a * b * w, axis=0, keepdims=True)) + # Check errors. + # Not a boolean + assert_raises(TypeError, inner1d, a, b, keepdims='true') + # More than 1 core dimension, and core output dimensions. + mm = umt.matrix_multiply + assert_raises(TypeError, mm, a, b, keepdims=True) + assert_raises(TypeError, mm, a, b, keepdims=False) + # Regular ufuncs should not accept keepdims. + assert_raises(TypeError, np.add, 1., 1., keepdims=False) + + def test_innerwt(self): + a = np.arange(6).reshape((2, 3)) + b = np.arange(10, 16).reshape((2, 3)) + w = np.arange(20, 26).reshape((2, 3)) + assert_array_equal(umt.innerwt(a, b, w), np.sum(a*b*w, axis=-1)) + a = np.arange(100, 124).reshape((2, 3, 4)) + b = np.arange(200, 224).reshape((2, 3, 4)) + w = np.arange(300, 324).reshape((2, 3, 4)) + assert_array_equal(umt.innerwt(a, b, w), np.sum(a*b*w, axis=-1)) + + def test_innerwt_empty(self): + """Test generalized ufunc with zero-sized operands""" + a = np.array([], dtype='f8') + b = np.array([], dtype='f8') + w = np.array([], dtype='f8') + assert_array_equal(umt.innerwt(a, b, w), np.sum(a*b*w, axis=-1)) + + def test_cross1d(self): + """Test with fixed-sized signature.""" + a = np.eye(3) + assert_array_equal(umt.cross1d(a, a), np.zeros((3, 3))) + out = np.zeros((3, 3)) + result = umt.cross1d(a[0], a, out) + assert_(result is out) + assert_array_equal(result, np.vstack((np.zeros(3), a[2], -a[1]))) + assert_raises(ValueError, umt.cross1d, np.eye(4), np.eye(4)) + assert_raises(ValueError, umt.cross1d, a, np.arange(4.)) + assert_raises(ValueError, umt.cross1d, a, np.arange(3.), np.zeros((3, 4))) + + def test_can_ignore_signature(self): + # Comparing the effects of ? in signature: + # matrix_multiply: (m,n),(n,p)->(m,p) # all must be there. + # matmul: (m?,n),(n,p?)->(m?,p?) # allow missing m, p. + mat = np.arange(12).reshape((2, 3, 2)) + single_vec = np.arange(2) + col_vec = single_vec[:, np.newaxis] + col_vec_array = np.arange(8).reshape((2, 2, 2, 1)) + 1 + # matrix @ single column vector with proper dimension + mm_col_vec = umt.matrix_multiply(mat, col_vec) + # matmul does the same thing + matmul_col_vec = umt.matmul(mat, col_vec) + assert_array_equal(matmul_col_vec, mm_col_vec) + # matrix @ vector without dimension making it a column vector. + # matrix multiply fails -> missing core dim. + assert_raises(ValueError, umt.matrix_multiply, mat, single_vec) + # matmul mimicker passes, and returns a vector. + matmul_col = umt.matmul(mat, single_vec) + assert_array_equal(matmul_col, mm_col_vec.squeeze()) + # Now with a column array: same as for column vector, + # broadcasting sensibly. + mm_col_vec = umt.matrix_multiply(mat, col_vec_array) + matmul_col_vec = umt.matmul(mat, col_vec_array) + assert_array_equal(matmul_col_vec, mm_col_vec) + # As above, but for row vector + single_vec = np.arange(3) + row_vec = single_vec[np.newaxis, :] + row_vec_array = np.arange(24).reshape((4, 2, 1, 1, 3)) + 1 + # row vector @ matrix + mm_row_vec = umt.matrix_multiply(row_vec, mat) + matmul_row_vec = umt.matmul(row_vec, mat) + assert_array_equal(matmul_row_vec, mm_row_vec) + # single row vector @ matrix + assert_raises(ValueError, umt.matrix_multiply, single_vec, mat) + matmul_row = umt.matmul(single_vec, mat) + assert_array_equal(matmul_row, mm_row_vec.squeeze()) + # row vector array @ matrix + mm_row_vec = umt.matrix_multiply(row_vec_array, mat) + matmul_row_vec = umt.matmul(row_vec_array, mat) + assert_array_equal(matmul_row_vec, mm_row_vec) + # Now for vector combinations + # row vector @ column vector + col_vec = row_vec.T + col_vec_array = row_vec_array.swapaxes(-2, -1) + mm_row_col_vec = umt.matrix_multiply(row_vec, col_vec) + matmul_row_col_vec = umt.matmul(row_vec, col_vec) + assert_array_equal(matmul_row_col_vec, mm_row_col_vec) + # single row vector @ single col vector + assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec) + matmul_row_col = umt.matmul(single_vec, single_vec) + assert_array_equal(matmul_row_col, mm_row_col_vec.squeeze()) + # row vector array @ matrix + mm_row_col_array = umt.matrix_multiply(row_vec_array, col_vec_array) + matmul_row_col_array = umt.matmul(row_vec_array, col_vec_array) + assert_array_equal(matmul_row_col_array, mm_row_col_array) + # Finally, check that things are *not* squeezed if one gives an + # output. + out = np.zeros_like(mm_row_col_array) + out = umt.matrix_multiply(row_vec_array, col_vec_array, out=out) + assert_array_equal(out, mm_row_col_array) + out[:] = 0 + out = umt.matmul(row_vec_array, col_vec_array, out=out) + assert_array_equal(out, mm_row_col_array) + # And check one cannot put missing dimensions back. + out = np.zeros_like(mm_row_col_vec) + assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec, + out) + # But fine for matmul, since it is just a broadcast. + out = umt.matmul(single_vec, single_vec, out) + assert_array_equal(out, mm_row_col_vec.squeeze()) + + def test_matrix_multiply(self): + self.compare_matrix_multiply_results(np.long) + self.compare_matrix_multiply_results(np.double) + + def test_matrix_multiply_umath_empty(self): + res = umt.matrix_multiply(np.ones((0, 10)), np.ones((10, 0))) + assert_array_equal(res, np.zeros((0, 0))) + res = umt.matrix_multiply(np.ones((10, 0)), np.ones((0, 10))) + assert_array_equal(res, np.zeros((10, 10))) + + def compare_matrix_multiply_results(self, tp): + d1 = np.array(np.random.rand(2, 3, 4), dtype=tp) + d2 = np.array(np.random.rand(2, 3, 4), dtype=tp) + msg = "matrix multiply on type %s" % d1.dtype.name + + def permute_n(n): + if n == 1: + return ([0],) + ret = () + base = permute_n(n-1) + for perm in base: + for i in range(n): + new = perm + [n-1] + new[n-1] = new[i] + new[i] = n-1 + ret += (new,) + return ret + + def slice_n(n): + if n == 0: + return ((),) + ret = () + base = slice_n(n-1) + for sl in base: + ret += (sl+(slice(None),),) + ret += (sl+(slice(0, 1),),) + return ret + + def broadcastable(s1, s2): + return s1 == s2 or s1 == 1 or s2 == 1 + + permute_3 = permute_n(3) + slice_3 = slice_n(3) + ((slice(None, None, -1),)*3,) + + ref = True + for p1 in permute_3: + for p2 in permute_3: + for s1 in slice_3: + for s2 in slice_3: + a1 = d1.transpose(p1)[s1] + a2 = d2.transpose(p2)[s2] + ref = ref and a1.base is not None + ref = ref and a2.base is not None + if (a1.shape[-1] == a2.shape[-2] and + broadcastable(a1.shape[0], a2.shape[0])): + assert_array_almost_equal( + umt.matrix_multiply(a1, a2), + np.sum(a2[..., np.newaxis].swapaxes(-3, -1) * + a1[..., np.newaxis,:], axis=-1), + err_msg=msg + ' %s %s' % (str(a1.shape), + str(a2.shape))) + + assert_equal(ref, True, err_msg="reference check") + + def test_euclidean_pdist(self): + a = np.arange(12, dtype=float).reshape(4, 3) + out = np.empty((a.shape[0] * (a.shape[0] - 1) // 2,), dtype=a.dtype) + umt.euclidean_pdist(a, out) + b = np.sqrt(np.sum((a[:, None] - a)**2, axis=-1)) + b = b[~np.tri(a.shape[0], dtype=bool)] + assert_almost_equal(out, b) + # An output array is required to determine p with signature (n,d)->(p) + assert_raises(ValueError, umt.euclidean_pdist, a) + + def test_cumsum(self): + a = np.arange(10) + result = umt.cumsum(a) + assert_array_equal(result, a.cumsum()) + + def test_object_logical(self): + a = np.array([3, None, True, False, "test", ""], dtype=object) + assert_equal(np.logical_or(a, None), + np.array([x or None for x in a], dtype=object)) + assert_equal(np.logical_or(a, True), + np.array([x or True for x in a], dtype=object)) + assert_equal(np.logical_or(a, 12), + np.array([x or 12 for x in a], dtype=object)) + assert_equal(np.logical_or(a, "blah"), + np.array([x or "blah" for x in a], dtype=object)) + + assert_equal(np.logical_and(a, None), + np.array([x and None for x in a], dtype=object)) + assert_equal(np.logical_and(a, True), + np.array([x and True for x in a], dtype=object)) + assert_equal(np.logical_and(a, 12), + np.array([x and 12 for x in a], dtype=object)) + assert_equal(np.logical_and(a, "blah"), + np.array([x and "blah" for x in a], dtype=object)) + + assert_equal(np.logical_not(a), + np.array([not x for x in a], dtype=object)) + + assert_equal(np.logical_or.reduce(a), 3) + assert_equal(np.logical_and.reduce(a), None) + + def test_object_comparison(self): + class HasComparisons(object): + def __eq__(self, other): + return '==' + + arr0d = np.array(HasComparisons()) + assert_equal(arr0d == arr0d, True) + assert_equal(np.equal(arr0d, arr0d), True) # normal behavior is a cast + assert_equal(np.equal(arr0d, arr0d, dtype=object), '==') + + arr1d = np.array([HasComparisons()]) + assert_equal(arr1d == arr1d, np.array([True])) + assert_equal(np.equal(arr1d, arr1d), np.array([True])) # normal behavior is a cast + assert_equal(np.equal(arr1d, arr1d, dtype=object), np.array(['=='])) + + def test_object_array_reduction(self): + # Reductions on object arrays + a = np.array(['a', 'b', 'c'], dtype=object) + assert_equal(np.sum(a), 'abc') + assert_equal(np.max(a), 'c') + assert_equal(np.min(a), 'a') + a = np.array([True, False, True], dtype=object) + assert_equal(np.sum(a), 2) + assert_equal(np.prod(a), 0) + assert_equal(np.any(a), True) + assert_equal(np.all(a), False) + assert_equal(np.max(a), True) + assert_equal(np.min(a), False) + assert_equal(np.array([[1]], dtype=object).sum(), 1) + assert_equal(np.array([[[1, 2]]], dtype=object).sum((0, 1)), [1, 2]) + assert_equal(np.array([1], dtype=object).sum(initial=1), 2) + + def test_object_array_accumulate_inplace(self): + # Checks that in-place accumulates work, see also gh-7402 + arr = np.ones(4, dtype=object) + arr[:] = [[1] for i in range(4)] + # Twice reproduced also for tuples: + np.add.accumulate(arr, out=arr) + np.add.accumulate(arr, out=arr) + assert_array_equal(arr, np.array([[1]*i for i in [1, 3, 6, 10]])) + + # And the same if the axis argument is used + arr = np.ones((2, 4), dtype=object) + arr[0, :] = [[2] for i in range(4)] + np.add.accumulate(arr, out=arr, axis=-1) + np.add.accumulate(arr, out=arr, axis=-1) + assert_array_equal(arr[0, :], np.array([[2]*i for i in [1, 3, 6, 10]])) + + def test_object_array_reduceat_inplace(self): + # Checks that in-place reduceats work, see also gh-7465 + arr = np.empty(4, dtype=object) + arr[:] = [[1] for i in range(4)] + out = np.empty(4, dtype=object) + out[:] = [[1] for i in range(4)] + np.add.reduceat(arr, np.arange(4), out=arr) + np.add.reduceat(arr, np.arange(4), out=arr) + assert_array_equal(arr, out) + + # And the same if the axis argument is used + arr = np.ones((2, 4), dtype=object) + arr[0, :] = [[2] for i in range(4)] + out = np.ones((2, 4), dtype=object) + out[0, :] = [[2] for i in range(4)] + np.add.reduceat(arr, np.arange(4), out=arr, axis=-1) + np.add.reduceat(arr, np.arange(4), out=arr, axis=-1) + assert_array_equal(arr, out) + + def test_zerosize_reduction(self): + # Test with default dtype and object dtype + for a in [[], np.array([], dtype=object)]: + assert_equal(np.sum(a), 0) + assert_equal(np.prod(a), 1) + assert_equal(np.any(a), False) + assert_equal(np.all(a), True) + assert_raises(ValueError, np.max, a) + assert_raises(ValueError, np.min, a) + + def test_axis_out_of_bounds(self): + a = np.array([False, False]) + assert_raises(np.AxisError, a.all, axis=1) + a = np.array([False, False]) + assert_raises(np.AxisError, a.all, axis=-2) + + a = np.array([False, False]) + assert_raises(np.AxisError, a.any, axis=1) + a = np.array([False, False]) + assert_raises(np.AxisError, a.any, axis=-2) + + def test_scalar_reduction(self): + # The functions 'sum', 'prod', etc allow specifying axis=0 + # even for scalars + assert_equal(np.sum(3, axis=0), 3) + assert_equal(np.prod(3.5, axis=0), 3.5) + assert_equal(np.any(True, axis=0), True) + assert_equal(np.all(False, axis=0), False) + assert_equal(np.max(3, axis=0), 3) + assert_equal(np.min(2.5, axis=0), 2.5) + + # Check scalar behaviour for ufuncs without an identity + assert_equal(np.power.reduce(3), 3) + + # Make sure that scalars are coming out from this operation + assert_(type(np.prod(np.float32(2.5), axis=0)) is np.float32) + assert_(type(np.sum(np.float32(2.5), axis=0)) is np.float32) + assert_(type(np.max(np.float32(2.5), axis=0)) is np.float32) + assert_(type(np.min(np.float32(2.5), axis=0)) is np.float32) + + # check if scalars/0-d arrays get cast + assert_(type(np.any(0, axis=0)) is np.bool_) + + # assert that 0-d arrays get wrapped + class MyArray(np.ndarray): + pass + a = np.array(1).view(MyArray) + assert_(type(np.any(a)) is MyArray) + + def test_casting_out_param(self): + # Test that it's possible to do casts on output + a = np.ones((200, 100), np.int64) + b = np.ones((200, 100), np.int64) + c = np.ones((200, 100), np.float64) + np.add(a, b, out=c) + assert_equal(c, 2) + + a = np.zeros(65536) + b = np.zeros(65536, dtype=np.float32) + np.subtract(a, 0, out=b) + assert_equal(b, 0) + + def test_where_param(self): + # Test that the where= ufunc parameter works with regular arrays + a = np.arange(7) + b = np.ones(7) + c = np.zeros(7) + np.add(a, b, out=c, where=(a % 2 == 1)) + assert_equal(c, [0, 2, 0, 4, 0, 6, 0]) + + a = np.arange(4).reshape(2, 2) + 2 + np.power(a, [2, 3], out=a, where=[[0, 1], [1, 0]]) + assert_equal(a, [[2, 27], [16, 5]]) + # Broadcasting the where= parameter + np.subtract(a, 2, out=a, where=[True, False]) + assert_equal(a, [[0, 27], [14, 5]]) + + def test_where_param_buffer_output(self): + # This test is temporarily skipped because it requires + # adding masking features to the nditer to work properly + + # With casting on output + a = np.ones(10, np.int64) + b = np.ones(10, np.int64) + c = 1.5 * np.ones(10, np.float64) + np.add(a, b, out=c, where=[1, 0, 0, 1, 0, 0, 1, 1, 1, 0]) + assert_equal(c, [2, 1.5, 1.5, 2, 1.5, 1.5, 2, 2, 2, 1.5]) + + def test_where_param_alloc(self): + # With casting and allocated output + a = np.array([1], dtype=np.int64) + m = np.array([True], dtype=bool) + assert_equal(np.sqrt(a, where=m), [1]) + + # No casting and allocated output + a = np.array([1], dtype=np.float64) + m = np.array([True], dtype=bool) + assert_equal(np.sqrt(a, where=m), [1]) + + def check_identityless_reduction(self, a): + # np.minimum.reduce is an identityless reduction + + # Verify that it sees the zero at various positions + a[...] = 1 + a[1, 0, 0] = 0 + assert_equal(np.minimum.reduce(a, axis=None), 0) + assert_equal(np.minimum.reduce(a, axis=(0, 1)), [0, 1, 1, 1]) + assert_equal(np.minimum.reduce(a, axis=(0, 2)), [0, 1, 1]) + assert_equal(np.minimum.reduce(a, axis=(1, 2)), [1, 0]) + assert_equal(np.minimum.reduce(a, axis=0), + [[0, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=1), + [[1, 1, 1, 1], [0, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=2), + [[1, 1, 1], [0, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=()), a) + + a[...] = 1 + a[0, 1, 0] = 0 + assert_equal(np.minimum.reduce(a, axis=None), 0) + assert_equal(np.minimum.reduce(a, axis=(0, 1)), [0, 1, 1, 1]) + assert_equal(np.minimum.reduce(a, axis=(0, 2)), [1, 0, 1]) + assert_equal(np.minimum.reduce(a, axis=(1, 2)), [0, 1]) + assert_equal(np.minimum.reduce(a, axis=0), + [[1, 1, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=1), + [[0, 1, 1, 1], [1, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=2), + [[1, 0, 1], [1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=()), a) + + a[...] = 1 + a[0, 0, 1] = 0 + assert_equal(np.minimum.reduce(a, axis=None), 0) + assert_equal(np.minimum.reduce(a, axis=(0, 1)), [1, 0, 1, 1]) + assert_equal(np.minimum.reduce(a, axis=(0, 2)), [0, 1, 1]) + assert_equal(np.minimum.reduce(a, axis=(1, 2)), [0, 1]) + assert_equal(np.minimum.reduce(a, axis=0), + [[1, 0, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=1), + [[1, 0, 1, 1], [1, 1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=2), + [[0, 1, 1], [1, 1, 1]]) + assert_equal(np.minimum.reduce(a, axis=()), a) + + def test_identityless_reduction_corder(self): + a = np.empty((2, 3, 4), order='C') + self.check_identityless_reduction(a) + + def test_identityless_reduction_forder(self): + a = np.empty((2, 3, 4), order='F') + self.check_identityless_reduction(a) + + def test_identityless_reduction_otherorder(self): + a = np.empty((2, 4, 3), order='C').swapaxes(1, 2) + self.check_identityless_reduction(a) + + def test_identityless_reduction_noncontig(self): + a = np.empty((3, 5, 4), order='C').swapaxes(1, 2) + a = a[1:, 1:, 1:] + self.check_identityless_reduction(a) + + def test_identityless_reduction_noncontig_unaligned(self): + a = np.empty((3*4*5*8 + 1,), dtype='i1') + a = a[1:].view(dtype='f8') + a.shape = (3, 4, 5) + a = a[1:, 1:, 1:] + self.check_identityless_reduction(a) + + def test_initial_reduction(self): + # np.minimum.reduce is an identityless reduction + + # For cases like np.maximum(np.abs(...), initial=0) + # More generally, a supremum over non-negative numbers. + assert_equal(np.maximum.reduce([], initial=0), 0) + + # For cases like reduction of an empty array over the reals. + assert_equal(np.minimum.reduce([], initial=np.inf), np.inf) + assert_equal(np.maximum.reduce([], initial=-np.inf), -np.inf) + + # Random tests + assert_equal(np.minimum.reduce([5], initial=4), 4) + assert_equal(np.maximum.reduce([4], initial=5), 5) + assert_equal(np.maximum.reduce([5], initial=4), 5) + assert_equal(np.minimum.reduce([4], initial=5), 4) + + # Check initial=None raises ValueError for both types of ufunc reductions + assert_raises(ValueError, np.minimum.reduce, [], initial=None) + assert_raises(ValueError, np.add.reduce, [], initial=None) + + # Check that np._NoValue gives default behavior. + assert_equal(np.add.reduce([], initial=np._NoValue), 0) + + # Check that initial kwarg behaves as intended for dtype=object + a = np.array([10], dtype=object) + res = np.add.reduce(a, initial=5) + assert_equal(res, 15) + + def test_identityless_reduction_nonreorderable(self): + a = np.array([[8.0, 2.0, 2.0], [1.0, 0.5, 0.25]]) + + res = np.divide.reduce(a, axis=0) + assert_equal(res, [8.0, 4.0, 8.0]) + + res = np.divide.reduce(a, axis=1) + assert_equal(res, [2.0, 8.0]) + + res = np.divide.reduce(a, axis=()) + assert_equal(res, a) + + assert_raises(ValueError, np.divide.reduce, a, axis=(0, 1)) + + def test_reduce_zero_axis(self): + # If we have a n x m array and do a reduction with axis=1, then we are + # doing n reductions, and each reduction takes an m-element array. For + # a reduction operation without an identity, then: + # n > 0, m > 0: fine + # n = 0, m > 0: fine, doing 0 reductions of m-element arrays + # n > 0, m = 0: can't reduce a 0-element array, ValueError + # n = 0, m = 0: can't reduce a 0-element array, ValueError (for + # consistency with the above case) + # This test doesn't actually look at return values, it just checks to + # make sure that error we get an error in exactly those cases where we + # expect one, and assumes the calculations themselves are done + # correctly. + + def ok(f, *args, **kwargs): + f(*args, **kwargs) + + def err(f, *args, **kwargs): + assert_raises(ValueError, f, *args, **kwargs) + + def t(expect, func, n, m): + expect(func, np.zeros((n, m)), axis=1) + expect(func, np.zeros((m, n)), axis=0) + expect(func, np.zeros((n // 2, n // 2, m)), axis=2) + expect(func, np.zeros((n // 2, m, n // 2)), axis=1) + expect(func, np.zeros((n, m // 2, m // 2)), axis=(1, 2)) + expect(func, np.zeros((m // 2, n, m // 2)), axis=(0, 2)) + expect(func, np.zeros((m // 3, m // 3, m // 3, + n // 2, n // 2)), + axis=(0, 1, 2)) + # Check what happens if the inner (resp. outer) dimensions are a + # mix of zero and non-zero: + expect(func, np.zeros((10, m, n)), axis=(0, 1)) + expect(func, np.zeros((10, n, m)), axis=(0, 2)) + expect(func, np.zeros((m, 10, n)), axis=0) + expect(func, np.zeros((10, m, n)), axis=1) + expect(func, np.zeros((10, n, m)), axis=2) + + # np.maximum is just an arbitrary ufunc with no reduction identity + assert_equal(np.maximum.identity, None) + t(ok, np.maximum.reduce, 30, 30) + t(ok, np.maximum.reduce, 0, 30) + t(err, np.maximum.reduce, 30, 0) + t(err, np.maximum.reduce, 0, 0) + err(np.maximum.reduce, []) + np.maximum.reduce(np.zeros((0, 0)), axis=()) + + # all of the combinations are fine for a reduction that has an + # identity + t(ok, np.add.reduce, 30, 30) + t(ok, np.add.reduce, 0, 30) + t(ok, np.add.reduce, 30, 0) + t(ok, np.add.reduce, 0, 0) + np.add.reduce([]) + np.add.reduce(np.zeros((0, 0)), axis=()) + + # OTOH, accumulate always makes sense for any combination of n and m, + # because it maps an m-element array to an m-element array. These + # tests are simpler because accumulate doesn't accept multiple axes. + for uf in (np.maximum, np.add): + uf.accumulate(np.zeros((30, 0)), axis=0) + uf.accumulate(np.zeros((0, 30)), axis=0) + uf.accumulate(np.zeros((30, 30)), axis=0) + uf.accumulate(np.zeros((0, 0)), axis=0) + + def test_safe_casting(self): + # In old versions of numpy, in-place operations used the 'unsafe' + # casting rules. In versions >= 1.10, 'same_kind' is the + # default and an exception is raised instead of a warning. + # when 'same_kind' is not satisfied. + a = np.array([1, 2, 3], dtype=int) + # Non-in-place addition is fine + assert_array_equal(assert_no_warnings(np.add, a, 1.1), + [2.1, 3.1, 4.1]) + assert_raises(TypeError, np.add, a, 1.1, out=a) + + def add_inplace(a, b): + a += b + + assert_raises(TypeError, add_inplace, a, 1.1) + # Make sure that explicitly overriding the exception is allowed: + assert_no_warnings(np.add, a, 1.1, out=a, casting="unsafe") + assert_array_equal(a, [2, 3, 4]) + + def test_ufunc_custom_out(self): + # Test ufunc with built in input types and custom output type + + a = np.array([0, 1, 2], dtype='i8') + b = np.array([0, 1, 2], dtype='i8') + c = np.empty(3, dtype=_rational_tests.rational) + + # Output must be specified so numpy knows what + # ufunc signature to look for + result = _rational_tests.test_add(a, b, c) + target = np.array([0, 2, 4], dtype=_rational_tests.rational) + assert_equal(result, target) + + # no output type should raise TypeError + with assert_raises(TypeError): + _rational_tests.test_add(a, b) + + def test_operand_flags(self): + a = np.arange(16, dtype='l').reshape(4, 4) + b = np.arange(9, dtype='l').reshape(3, 3) + opflag_tests.inplace_add(a[:-1, :-1], b) + assert_equal(a, np.array([[0, 2, 4, 3], [7, 9, 11, 7], + [14, 16, 18, 11], [12, 13, 14, 15]], dtype='l')) + + a = np.array(0) + opflag_tests.inplace_add(a, 3) + assert_equal(a, 3) + opflag_tests.inplace_add(a, [3, 4]) + assert_equal(a, 10) + + def test_struct_ufunc(self): + import numpy.core._struct_ufunc_tests as struct_ufunc + + a = np.array([(1, 2, 3)], dtype='u8,u8,u8') + b = np.array([(1, 2, 3)], dtype='u8,u8,u8') + + result = struct_ufunc.add_triplet(a, b) + assert_equal(result, np.array([(2, 4, 6)], dtype='u8,u8,u8')) + + def test_custom_ufunc(self): + a = np.array( + [_rational_tests.rational(1, 2), + _rational_tests.rational(1, 3), + _rational_tests.rational(1, 4)], + dtype=_rational_tests.rational) + b = np.array( + [_rational_tests.rational(1, 2), + _rational_tests.rational(1, 3), + _rational_tests.rational(1, 4)], + dtype=_rational_tests.rational) + + result = _rational_tests.test_add_rationals(a, b) + expected = np.array( + [_rational_tests.rational(1), + _rational_tests.rational(2, 3), + _rational_tests.rational(1, 2)], + dtype=_rational_tests.rational) + assert_equal(result, expected) + + def test_custom_ufunc_forced_sig(self): + # gh-9351 - looking for a non-first userloop would previously hang + with assert_raises(TypeError): + np.multiply(_rational_tests.rational(1), 1, + signature=(_rational_tests.rational, int, None)) + + def test_custom_array_like(self): + + class MyThing(object): + __array_priority__ = 1000 + + rmul_count = 0 + getitem_count = 0 + + def __init__(self, shape): + self.shape = shape + + def __len__(self): + return self.shape[0] + + def __getitem__(self, i): + MyThing.getitem_count += 1 + if not isinstance(i, tuple): + i = (i,) + if len(i) > self.ndim: + raise IndexError("boo") + + return MyThing(self.shape[len(i):]) + + def __rmul__(self, other): + MyThing.rmul_count += 1 + return self + + np.float64(5)*MyThing((3, 3)) + assert_(MyThing.rmul_count == 1, MyThing.rmul_count) + assert_(MyThing.getitem_count <= 2, MyThing.getitem_count) + + def test_inplace_fancy_indexing(self): + + a = np.arange(10) + np.add.at(a, [2, 5, 2], 1) + assert_equal(a, [0, 1, 4, 3, 4, 6, 6, 7, 8, 9]) + + a = np.arange(10) + b = np.array([100, 100, 100]) + np.add.at(a, [2, 5, 2], b) + assert_equal(a, [0, 1, 202, 3, 4, 105, 6, 7, 8, 9]) + + a = np.arange(9).reshape(3, 3) + b = np.array([[100, 100, 100], [200, 200, 200], [300, 300, 300]]) + np.add.at(a, (slice(None), [1, 2, 1]), b) + assert_equal(a, [[0, 201, 102], [3, 404, 205], [6, 607, 308]]) + + a = np.arange(27).reshape(3, 3, 3) + b = np.array([100, 200, 300]) + np.add.at(a, (slice(None), slice(None), [1, 2, 1]), b) + assert_equal(a, + [[[0, 401, 202], + [3, 404, 205], + [6, 407, 208]], + + [[9, 410, 211], + [12, 413, 214], + [15, 416, 217]], + + [[18, 419, 220], + [21, 422, 223], + [24, 425, 226]]]) + + a = np.arange(9).reshape(3, 3) + b = np.array([[100, 100, 100], [200, 200, 200], [300, 300, 300]]) + np.add.at(a, ([1, 2, 1], slice(None)), b) + assert_equal(a, [[0, 1, 2], [403, 404, 405], [206, 207, 208]]) + + a = np.arange(27).reshape(3, 3, 3) + b = np.array([100, 200, 300]) + np.add.at(a, (slice(None), [1, 2, 1], slice(None)), b) + assert_equal(a, + [[[0, 1, 2], + [203, 404, 605], + [106, 207, 308]], + + [[9, 10, 11], + [212, 413, 614], + [115, 216, 317]], + + [[18, 19, 20], + [221, 422, 623], + [124, 225, 326]]]) + + a = np.arange(9).reshape(3, 3) + b = np.array([100, 200, 300]) + np.add.at(a, (0, [1, 2, 1]), b) + assert_equal(a, [[0, 401, 202], [3, 4, 5], [6, 7, 8]]) + + a = np.arange(27).reshape(3, 3, 3) + b = np.array([100, 200, 300]) + np.add.at(a, ([1, 2, 1], 0, slice(None)), b) + assert_equal(a, + [[[0, 1, 2], + [3, 4, 5], + [6, 7, 8]], + + [[209, 410, 611], + [12, 13, 14], + [15, 16, 17]], + + [[118, 219, 320], + [21, 22, 23], + [24, 25, 26]]]) + + a = np.arange(27).reshape(3, 3, 3) + b = np.array([100, 200, 300]) + np.add.at(a, (slice(None), slice(None), slice(None)), b) + assert_equal(a, + [[[100, 201, 302], + [103, 204, 305], + [106, 207, 308]], + + [[109, 210, 311], + [112, 213, 314], + [115, 216, 317]], + + [[118, 219, 320], + [121, 222, 323], + [124, 225, 326]]]) + + a = np.arange(10) + np.negative.at(a, [2, 5, 2]) + assert_equal(a, [0, 1, 2, 3, 4, -5, 6, 7, 8, 9]) + + # Test 0-dim array + a = np.array(0) + np.add.at(a, (), 1) + assert_equal(a, 1) + + assert_raises(IndexError, np.add.at, a, 0, 1) + assert_raises(IndexError, np.add.at, a, [], 1) + + # Test mixed dtypes + a = np.arange(10) + np.power.at(a, [1, 2, 3, 2], 3.5) + assert_equal(a, np.array([0, 1, 4414, 46, 4, 5, 6, 7, 8, 9])) + + # Test boolean indexing and boolean ufuncs + a = np.arange(10) + index = a % 2 == 0 + np.equal.at(a, index, [0, 2, 4, 6, 8]) + assert_equal(a, [1, 1, 1, 3, 1, 5, 1, 7, 1, 9]) + + # Test unary operator + a = np.arange(10, dtype='u4') + np.invert.at(a, [2, 5, 2]) + assert_equal(a, [0, 1, 2, 3, 4, 5 ^ 0xffffffff, 6, 7, 8, 9]) + + # Test empty subspace + orig = np.arange(4) + a = orig[:, None][:, 0:0] + np.add.at(a, [0, 1], 3) + assert_array_equal(orig, np.arange(4)) + + # Test with swapped byte order + index = np.array([1, 2, 1], np.dtype('i').newbyteorder()) + values = np.array([1, 2, 3, 4], np.dtype('f').newbyteorder()) + np.add.at(values, index, 3) + assert_array_equal(values, [1, 8, 6, 4]) + + # Test exception thrown + values = np.array(['a', 1], dtype=object) + assert_raises(TypeError, np.add.at, values, [0, 1], 1) + assert_array_equal(values, np.array(['a', 1], dtype=object)) + + # Test multiple output ufuncs raise error, gh-5665 + assert_raises(ValueError, np.modf.at, np.arange(10), [1]) + + def test_reduce_arguments(self): + f = np.add.reduce + d = np.ones((5,2), dtype=int) + o = np.ones((2,), dtype=d.dtype) + r = o * 5 + assert_equal(f(d), r) + # a, axis=0, dtype=None, out=None, keepdims=False + assert_equal(f(d, axis=0), r) + assert_equal(f(d, 0), r) + assert_equal(f(d, 0, dtype=None), r) + assert_equal(f(d, 0, dtype='i'), r) + assert_equal(f(d, 0, 'i'), r) + assert_equal(f(d, 0, None), r) + assert_equal(f(d, 0, None, out=None), r) + assert_equal(f(d, 0, None, out=o), r) + assert_equal(f(d, 0, None, o), r) + assert_equal(f(d, 0, None, None), r) + assert_equal(f(d, 0, None, None, keepdims=False), r) + assert_equal(f(d, 0, None, None, True), r.reshape((1,) + r.shape)) + assert_equal(f(d, 0, None, None, False, 0), r) + assert_equal(f(d, 0, None, None, False, initial=0), r) + # multiple keywords + assert_equal(f(d, axis=0, dtype=None, out=None, keepdims=False), r) + assert_equal(f(d, 0, dtype=None, out=None, keepdims=False), r) + assert_equal(f(d, 0, None, out=None, keepdims=False), r) + assert_equal(f(d, 0, None, out=None, keepdims=False, initial=0), r) + + # too little + assert_raises(TypeError, f) + # too much + assert_raises(TypeError, f, d, 0, None, None, False, 0, 1) + # invalid axis + assert_raises(TypeError, f, d, "invalid") + assert_raises(TypeError, f, d, axis="invalid") + assert_raises(TypeError, f, d, axis="invalid", dtype=None, + keepdims=True) + # invalid dtype + assert_raises(TypeError, f, d, 0, "invalid") + assert_raises(TypeError, f, d, dtype="invalid") + assert_raises(TypeError, f, d, dtype="invalid", out=None) + # invalid out + assert_raises(TypeError, f, d, 0, None, "invalid") + assert_raises(TypeError, f, d, out="invalid") + assert_raises(TypeError, f, d, out="invalid", dtype=None) + # keepdims boolean, no invalid value + # assert_raises(TypeError, f, d, 0, None, None, "invalid") + # assert_raises(TypeError, f, d, keepdims="invalid", axis=0, dtype=None) + # invalid mix + assert_raises(TypeError, f, d, 0, keepdims="invalid", dtype="invalid", + out=None) + + # invalid keyord + assert_raises(TypeError, f, d, axis=0, dtype=None, invalid=0) + assert_raises(TypeError, f, d, invalid=0) + assert_raises(TypeError, f, d, 0, keepdims=True, invalid="invalid", + out=None) + assert_raises(TypeError, f, d, axis=0, dtype=None, keepdims=True, + out=None, invalid=0) + assert_raises(TypeError, f, d, axis=0, dtype=None, + out=None, invalid=0) + + def test_structured_equal(self): + # https://github.com/numpy/numpy/issues/4855 + + class MyA(np.ndarray): + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return getattr(ufunc, method)(*(input.view(np.ndarray) + for input in inputs), **kwargs) + a = np.arange(12.).reshape(4,3) + ra = a.view(dtype=('f8,f8,f8')).squeeze() + mra = ra.view(MyA) + + target = np.array([ True, False, False, False], dtype=bool) + assert_equal(np.all(target == (mra == ra[0])), True) + + def test_scalar_equal(self): + # Scalar comparisons should always work, without deprecation warnings. + # even when the ufunc fails. + a = np.array(0.) + b = np.array('a') + assert_(a != b) + assert_(b != a) + assert_(not (a == b)) + assert_(not (b == a)) + + def test_NotImplemented_not_returned(self): + # See gh-5964 and gh-2091. Some of these functions are not operator + # related and were fixed for other reasons in the past. + binary_funcs = [ + np.power, np.add, np.subtract, np.multiply, np.divide, + np.true_divide, np.floor_divide, np.bitwise_and, np.bitwise_or, + np.bitwise_xor, np.left_shift, np.right_shift, np.fmax, + np.fmin, np.fmod, np.hypot, np.logaddexp, np.logaddexp2, + np.logical_and, np.logical_or, np.logical_xor, np.maximum, + np.minimum, np.mod, + np.greater, np.greater_equal, np.less, np.less_equal, + np.equal, np.not_equal] + + a = np.array('1') + b = 1 + c = np.array([1., 2.]) + for f in binary_funcs: + assert_raises(TypeError, f, a, b) + assert_raises(TypeError, f, c, a) + + def test_reduce_noncontig_output(self): + # Check that reduction deals with non-contiguous output arrays + # appropriately. + # + # gh-8036 + + x = np.arange(7*13*8, dtype=np.int16).reshape(7, 13, 8) + x = x[4:6,1:11:6,1:5].transpose(1, 2, 0) + y_base = np.arange(4*4, dtype=np.int16).reshape(4, 4) + y = y_base[::2,:] + + y_base_copy = y_base.copy() + + r0 = np.add.reduce(x, out=y.copy(), axis=2) + r1 = np.add.reduce(x, out=y, axis=2) + + # The results should match, and y_base shouldn't get clobbered + assert_equal(r0, r1) + assert_equal(y_base[1,:], y_base_copy[1,:]) + assert_equal(y_base[3,:], y_base_copy[3,:]) + + def test_no_doc_string(self): + # gh-9337 + assert_('\n' not in umt.inner1d_no_doc.__doc__) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_umath.py b/contrib/python/numpy/py2/numpy/core/tests/test_umath.py new file mode 100644 index 0000000000..eb6a67fa3f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_umath.py @@ -0,0 +1,2920 @@ +from __future__ import division, absolute_import, print_function + +import platform +import warnings +import fnmatch +import itertools +import pytest + +import numpy.core.umath as ncu +from numpy.core import _umath_tests as ncu_tests +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_raises_regex, + assert_array_equal, assert_almost_equal, assert_array_almost_equal, + assert_allclose, assert_no_warnings, suppress_warnings, + _gen_alignment_data + ) + + +def on_powerpc(): + """ True if we are running on a Power PC platform.""" + return platform.processor() == 'powerpc' or \ + platform.machine().startswith('ppc') + + +class _FilterInvalids(object): + def setup(self): + self.olderr = np.seterr(invalid='ignore') + + def teardown(self): + np.seterr(**self.olderr) + + +class TestConstants(object): + def test_pi(self): + assert_allclose(ncu.pi, 3.141592653589793, 1e-15) + + def test_e(self): + assert_allclose(ncu.e, 2.718281828459045, 1e-15) + + def test_euler_gamma(self): + assert_allclose(ncu.euler_gamma, 0.5772156649015329, 1e-15) + + +class TestOut(object): + def test_out_subok(self): + for subok in (True, False): + a = np.array(0.5) + o = np.empty(()) + + r = np.add(a, 2, o, subok=subok) + assert_(r is o) + r = np.add(a, 2, out=o, subok=subok) + assert_(r is o) + r = np.add(a, 2, out=(o,), subok=subok) + assert_(r is o) + + d = np.array(5.7) + o1 = np.empty(()) + o2 = np.empty((), dtype=np.int32) + + r1, r2 = np.frexp(d, o1, None, subok=subok) + assert_(r1 is o1) + r1, r2 = np.frexp(d, None, o2, subok=subok) + assert_(r2 is o2) + r1, r2 = np.frexp(d, o1, o2, subok=subok) + assert_(r1 is o1) + assert_(r2 is o2) + + r1, r2 = np.frexp(d, out=(o1, None), subok=subok) + assert_(r1 is o1) + r1, r2 = np.frexp(d, out=(None, o2), subok=subok) + assert_(r2 is o2) + r1, r2 = np.frexp(d, out=(o1, o2), subok=subok) + assert_(r1 is o1) + assert_(r2 is o2) + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', DeprecationWarning) + r1, r2 = np.frexp(d, out=o1, subok=subok) + assert_(r1 is o1) + assert_(w[0].category is DeprecationWarning) + + assert_raises(ValueError, np.add, a, 2, o, o, subok=subok) + assert_raises(ValueError, np.add, a, 2, o, out=o, subok=subok) + assert_raises(ValueError, np.add, a, 2, None, out=o, subok=subok) + assert_raises(ValueError, np.add, a, 2, out=(o, o), subok=subok) + assert_raises(ValueError, np.add, a, 2, out=(), subok=subok) + assert_raises(TypeError, np.add, a, 2, [], subok=subok) + assert_raises(TypeError, np.add, a, 2, out=[], subok=subok) + assert_raises(TypeError, np.add, a, 2, out=([],), subok=subok) + o.flags.writeable = False + assert_raises(ValueError, np.add, a, 2, o, subok=subok) + assert_raises(ValueError, np.add, a, 2, out=o, subok=subok) + assert_raises(ValueError, np.add, a, 2, out=(o,), subok=subok) + + def test_out_wrap_subok(self): + class ArrayWrap(np.ndarray): + __array_priority__ = 10 + + def __new__(cls, arr): + return np.asarray(arr).view(cls).copy() + + def __array_wrap__(self, arr, context): + return arr.view(type(self)) + + for subok in (True, False): + a = ArrayWrap([0.5]) + + r = np.add(a, 2, subok=subok) + if subok: + assert_(isinstance(r, ArrayWrap)) + else: + assert_(type(r) == np.ndarray) + + r = np.add(a, 2, None, subok=subok) + if subok: + assert_(isinstance(r, ArrayWrap)) + else: + assert_(type(r) == np.ndarray) + + r = np.add(a, 2, out=None, subok=subok) + if subok: + assert_(isinstance(r, ArrayWrap)) + else: + assert_(type(r) == np.ndarray) + + r = np.add(a, 2, out=(None,), subok=subok) + if subok: + assert_(isinstance(r, ArrayWrap)) + else: + assert_(type(r) == np.ndarray) + + d = ArrayWrap([5.7]) + o1 = np.empty((1,)) + o2 = np.empty((1,), dtype=np.int32) + + r1, r2 = np.frexp(d, o1, subok=subok) + if subok: + assert_(isinstance(r2, ArrayWrap)) + else: + assert_(type(r2) == np.ndarray) + + r1, r2 = np.frexp(d, o1, None, subok=subok) + if subok: + assert_(isinstance(r2, ArrayWrap)) + else: + assert_(type(r2) == np.ndarray) + + r1, r2 = np.frexp(d, None, o2, subok=subok) + if subok: + assert_(isinstance(r1, ArrayWrap)) + else: + assert_(type(r1) == np.ndarray) + + r1, r2 = np.frexp(d, out=(o1, None), subok=subok) + if subok: + assert_(isinstance(r2, ArrayWrap)) + else: + assert_(type(r2) == np.ndarray) + + r1, r2 = np.frexp(d, out=(None, o2), subok=subok) + if subok: + assert_(isinstance(r1, ArrayWrap)) + else: + assert_(type(r1) == np.ndarray) + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', DeprecationWarning) + r1, r2 = np.frexp(d, out=o1, subok=subok) + if subok: + assert_(isinstance(r2, ArrayWrap)) + else: + assert_(type(r2) == np.ndarray) + assert_(w[0].category is DeprecationWarning) + + +class TestComparisons(object): + def test_ignore_object_identity_in_equal(self): + # Check error raised when comparing identical objects whose comparison + # is not a simple boolean, e.g., arrays that are compared elementwise. + a = np.array([np.array([1, 2, 3]), None], dtype=object) + assert_raises(ValueError, np.equal, a, a) + + # Check error raised when comparing identical non-comparable objects. + class FunkyType(object): + def __eq__(self, other): + raise TypeError("I won't compare") + + a = np.array([FunkyType()]) + assert_raises(TypeError, np.equal, a, a) + + # Check identity doesn't override comparison mismatch. + a = np.array([np.nan], dtype=object) + assert_equal(np.equal(a, a), [False]) + + def test_ignore_object_identity_in_not_equal(self): + # Check error raised when comparing identical objects whose comparison + # is not a simple boolean, e.g., arrays that are compared elementwise. + a = np.array([np.array([1, 2, 3]), None], dtype=object) + assert_raises(ValueError, np.not_equal, a, a) + + # Check error raised when comparing identical non-comparable objects. + class FunkyType(object): + def __ne__(self, other): + raise TypeError("I won't compare") + + a = np.array([FunkyType()]) + assert_raises(TypeError, np.not_equal, a, a) + + # Check identity doesn't override comparison mismatch. + a = np.array([np.nan], dtype=object) + assert_equal(np.not_equal(a, a), [True]) + + +class TestAdd(object): + def test_reduce_alignment(self): + # gh-9876 + # make sure arrays with weird strides work with the optimizations in + # pairwise_sum_@TYPE@. On x86, the 'b' field will count as aligned at a + # 4 byte offset, even though its itemsize is 8. + a = np.zeros(2, dtype=[('a', np.int32), ('b', np.float64)]) + a['a'] = -1 + assert_equal(a['b'].sum(), 0) + + +class TestDivision(object): + def test_division_int(self): + # int division should follow Python + x = np.array([5, 10, 90, 100, -5, -10, -90, -100, -120]) + if 5 / 10 == 0.5: + assert_equal(x / 100, [0.05, 0.1, 0.9, 1, + -0.05, -0.1, -0.9, -1, -1.2]) + else: + assert_equal(x / 100, [0, 0, 0, 1, -1, -1, -1, -1, -2]) + assert_equal(x // 100, [0, 0, 0, 1, -1, -1, -1, -1, -2]) + assert_equal(x % 100, [5, 10, 90, 0, 95, 90, 10, 0, 80]) + + def test_division_complex(self): + # check that implementation is correct + msg = "Complex division implementation check" + x = np.array([1. + 1.*1j, 1. + .5*1j, 1. + 2.*1j], dtype=np.complex128) + assert_almost_equal(x**2/x, x, err_msg=msg) + # check overflow, underflow + msg = "Complex division overflow/underflow check" + x = np.array([1.e+110, 1.e-110], dtype=np.complex128) + y = x**2/x + assert_almost_equal(y/x, [1, 1], err_msg=msg) + + def test_zero_division_complex(self): + with np.errstate(invalid="ignore", divide="ignore"): + x = np.array([0.0], dtype=np.complex128) + y = 1.0/x + assert_(np.isinf(y)[0]) + y = complex(np.inf, np.nan)/x + assert_(np.isinf(y)[0]) + y = complex(np.nan, np.inf)/x + assert_(np.isinf(y)[0]) + y = complex(np.inf, np.inf)/x + assert_(np.isinf(y)[0]) + y = 0.0/x + assert_(np.isnan(y)[0]) + + def test_floor_division_complex(self): + # check that implementation is correct + msg = "Complex floor division implementation check" + x = np.array([.9 + 1j, -.1 + 1j, .9 + .5*1j, .9 + 2.*1j], dtype=np.complex128) + y = np.array([0., -1., 0., 0.], dtype=np.complex128) + assert_equal(np.floor_divide(x**2, x), y, err_msg=msg) + # check overflow, underflow + msg = "Complex floor division overflow/underflow check" + x = np.array([1.e+110, 1.e-110], dtype=np.complex128) + y = np.floor_divide(x**2, x) + assert_equal(y, [1.e+110, 0], err_msg=msg) + + def test_floor_division_signed_zero(self): + # Check that the sign bit is correctly set when dividing positive and + # negative zero by one. + x = np.zeros(10) + assert_equal(np.signbit(x//1), 0) + assert_equal(np.signbit((-x)//1), 1) + +def floor_divide_and_remainder(x, y): + return (np.floor_divide(x, y), np.remainder(x, y)) + + +def _signs(dt): + if dt in np.typecodes['UnsignedInteger']: + return (+1,) + else: + return (+1, -1) + + +class TestRemainder(object): + + def test_remainder_basic(self): + dt = np.typecodes['AllInteger'] + np.typecodes['Float'] + for op in [floor_divide_and_remainder, np.divmod]: + for dt1, dt2 in itertools.product(dt, dt): + for sg1, sg2 in itertools.product(_signs(dt1), _signs(dt2)): + fmt = 'op: %s, dt1: %s, dt2: %s, sg1: %s, sg2: %s' + msg = fmt % (op.__name__, dt1, dt2, sg1, sg2) + a = np.array(sg1*71, dtype=dt1) + b = np.array(sg2*19, dtype=dt2) + div, rem = op(a, b) + assert_equal(div*b + rem, a, err_msg=msg) + if sg2 == -1: + assert_(b < rem <= 0, msg) + else: + assert_(b > rem >= 0, msg) + + def test_float_remainder_exact(self): + # test that float results are exact for small integers. This also + # holds for the same integers scaled by powers of two. + nlst = list(range(-127, 0)) + plst = list(range(1, 128)) + dividend = nlst + [0] + plst + divisor = nlst + plst + arg = list(itertools.product(dividend, divisor)) + tgt = list(divmod(*t) for t in arg) + + a, b = np.array(arg, dtype=int).T + # convert exact integer results from Python to float so that + # signed zero can be used, it is checked. + tgtdiv, tgtrem = np.array(tgt, dtype=float).T + tgtdiv = np.where((tgtdiv == 0.0) & ((b < 0) ^ (a < 0)), -0.0, tgtdiv) + tgtrem = np.where((tgtrem == 0.0) & (b < 0), -0.0, tgtrem) + + for op in [floor_divide_and_remainder, np.divmod]: + for dt in np.typecodes['Float']: + msg = 'op: %s, dtype: %s' % (op.__name__, dt) + fa = a.astype(dt) + fb = b.astype(dt) + div, rem = op(fa, fb) + assert_equal(div, tgtdiv, err_msg=msg) + assert_equal(rem, tgtrem, err_msg=msg) + + def test_float_remainder_roundoff(self): + # gh-6127 + dt = np.typecodes['Float'] + for op in [floor_divide_and_remainder, np.divmod]: + for dt1, dt2 in itertools.product(dt, dt): + for sg1, sg2 in itertools.product((+1, -1), (+1, -1)): + fmt = 'op: %s, dt1: %s, dt2: %s, sg1: %s, sg2: %s' + msg = fmt % (op.__name__, dt1, dt2, sg1, sg2) + a = np.array(sg1*78*6e-8, dtype=dt1) + b = np.array(sg2*6e-8, dtype=dt2) + div, rem = op(a, b) + # Equal assertion should hold when fmod is used + assert_equal(div*b + rem, a, err_msg=msg) + if sg2 == -1: + assert_(b < rem <= 0, msg) + else: + assert_(b > rem >= 0, msg) + + def test_float_remainder_corner_cases(self): + # Check remainder magnitude. + for dt in np.typecodes['Float']: + b = np.array(1.0, dtype=dt) + a = np.nextafter(np.array(0.0, dtype=dt), -b) + rem = np.remainder(a, b) + assert_(rem <= b, 'dt: %s' % dt) + rem = np.remainder(-a, -b) + assert_(rem >= -b, 'dt: %s' % dt) + + # Check nans, inf + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in remainder") + for dt in np.typecodes['Float']: + fone = np.array(1.0, dtype=dt) + fzer = np.array(0.0, dtype=dt) + finf = np.array(np.inf, dtype=dt) + fnan = np.array(np.nan, dtype=dt) + rem = np.remainder(fone, fzer) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + # MSVC 2008 returns NaN here, so disable the check. + #rem = np.remainder(fone, finf) + #assert_(rem == fone, 'dt: %s, rem: %s' % (dt, rem)) + rem = np.remainder(fone, fnan) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + rem = np.remainder(finf, fone) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + + +class TestCbrt(object): + def test_cbrt_scalar(self): + assert_almost_equal((np.cbrt(np.float32(-2.5)**3)), -2.5) + + def test_cbrt(self): + x = np.array([1., 2., -3., np.inf, -np.inf]) + assert_almost_equal(np.cbrt(x**3), x) + + assert_(np.isnan(np.cbrt(np.nan))) + assert_equal(np.cbrt(np.inf), np.inf) + assert_equal(np.cbrt(-np.inf), -np.inf) + + +class TestPower(object): + def test_power_float(self): + x = np.array([1., 2., 3.]) + assert_equal(x**0, [1., 1., 1.]) + assert_equal(x**1, x) + assert_equal(x**2, [1., 4., 9.]) + y = x.copy() + y **= 2 + assert_equal(y, [1., 4., 9.]) + assert_almost_equal(x**(-1), [1., 0.5, 1./3]) + assert_almost_equal(x**(0.5), [1., ncu.sqrt(2), ncu.sqrt(3)]) + + for out, inp, msg in _gen_alignment_data(dtype=np.float32, + type='unary', + max_size=11): + exp = [ncu.sqrt(i) for i in inp] + assert_almost_equal(inp**(0.5), exp, err_msg=msg) + np.sqrt(inp, out=out) + assert_equal(out, exp, err_msg=msg) + + for out, inp, msg in _gen_alignment_data(dtype=np.float64, + type='unary', + max_size=7): + exp = [ncu.sqrt(i) for i in inp] + assert_almost_equal(inp**(0.5), exp, err_msg=msg) + np.sqrt(inp, out=out) + assert_equal(out, exp, err_msg=msg) + + def test_power_complex(self): + x = np.array([1+2j, 2+3j, 3+4j]) + assert_equal(x**0, [1., 1., 1.]) + assert_equal(x**1, x) + assert_almost_equal(x**2, [-3+4j, -5+12j, -7+24j]) + assert_almost_equal(x**3, [(1+2j)**3, (2+3j)**3, (3+4j)**3]) + assert_almost_equal(x**4, [(1+2j)**4, (2+3j)**4, (3+4j)**4]) + assert_almost_equal(x**(-1), [1/(1+2j), 1/(2+3j), 1/(3+4j)]) + assert_almost_equal(x**(-2), [1/(1+2j)**2, 1/(2+3j)**2, 1/(3+4j)**2]) + assert_almost_equal(x**(-3), [(-11+2j)/125, (-46-9j)/2197, + (-117-44j)/15625]) + assert_almost_equal(x**(0.5), [ncu.sqrt(1+2j), ncu.sqrt(2+3j), + ncu.sqrt(3+4j)]) + norm = 1./((x**14)[0]) + assert_almost_equal(x**14 * norm, + [i * norm for i in [-76443+16124j, 23161315+58317492j, + 5583548873 + 2465133864j]]) + + # Ticket #836 + def assert_complex_equal(x, y): + assert_array_equal(x.real, y.real) + assert_array_equal(x.imag, y.imag) + + for z in [complex(0, np.inf), complex(1, np.inf)]: + z = np.array([z], dtype=np.complex_) + with np.errstate(invalid="ignore"): + assert_complex_equal(z**1, z) + assert_complex_equal(z**2, z*z) + assert_complex_equal(z**3, z*z*z) + + def test_power_zero(self): + # ticket #1271 + zero = np.array([0j]) + one = np.array([1+0j]) + cnan = np.array([complex(np.nan, np.nan)]) + # FIXME cinf not tested. + #cinf = np.array([complex(np.inf, 0)]) + + def assert_complex_equal(x, y): + x, y = np.asarray(x), np.asarray(y) + assert_array_equal(x.real, y.real) + assert_array_equal(x.imag, y.imag) + + # positive powers + for p in [0.33, 0.5, 1, 1.5, 2, 3, 4, 5, 6.6]: + assert_complex_equal(np.power(zero, p), zero) + + # zero power + assert_complex_equal(np.power(zero, 0), one) + with np.errstate(invalid="ignore"): + assert_complex_equal(np.power(zero, 0+1j), cnan) + + # negative power + for p in [0.33, 0.5, 1, 1.5, 2, 3, 4, 5, 6.6]: + assert_complex_equal(np.power(zero, -p), cnan) + assert_complex_equal(np.power(zero, -1+0.2j), cnan) + + def test_fast_power(self): + x = np.array([1, 2, 3], np.int16) + res = x**2.0 + assert_((x**2.00001).dtype is res.dtype) + assert_array_equal(res, [1, 4, 9]) + # check the inplace operation on the casted copy doesn't mess with x + assert_(not np.may_share_memory(res, x)) + assert_array_equal(x, [1, 2, 3]) + + # Check that the fast path ignores 1-element not 0-d arrays + res = x ** np.array([[[2]]]) + assert_equal(res.shape, (1, 1, 3)) + + def test_integer_power(self): + a = np.array([15, 15], 'i8') + b = np.power(a, a) + assert_equal(b, [437893890380859375, 437893890380859375]) + + def test_integer_power_with_integer_zero_exponent(self): + dtypes = np.typecodes['Integer'] + for dt in dtypes: + arr = np.arange(-10, 10, dtype=dt) + assert_equal(np.power(arr, 0), np.ones_like(arr)) + + dtypes = np.typecodes['UnsignedInteger'] + for dt in dtypes: + arr = np.arange(10, dtype=dt) + assert_equal(np.power(arr, 0), np.ones_like(arr)) + + def test_integer_power_of_1(self): + dtypes = np.typecodes['AllInteger'] + for dt in dtypes: + arr = np.arange(10, dtype=dt) + assert_equal(np.power(1, arr), np.ones_like(arr)) + + def test_integer_power_of_zero(self): + dtypes = np.typecodes['AllInteger'] + for dt in dtypes: + arr = np.arange(1, 10, dtype=dt) + assert_equal(np.power(0, arr), np.zeros_like(arr)) + + def test_integer_to_negative_power(self): + dtypes = np.typecodes['Integer'] + for dt in dtypes: + a = np.array([0, 1, 2, 3], dtype=dt) + b = np.array([0, 1, 2, -3], dtype=dt) + one = np.array(1, dtype=dt) + minusone = np.array(-1, dtype=dt) + assert_raises(ValueError, np.power, a, b) + assert_raises(ValueError, np.power, a, minusone) + assert_raises(ValueError, np.power, one, b) + assert_raises(ValueError, np.power, one, minusone) + + +class TestFloat_power(object): + def test_type_conversion(self): + arg_type = '?bhilBHILefdgFDG' + res_type = 'ddddddddddddgDDG' + for dtin, dtout in zip(arg_type, res_type): + msg = "dtin: %s, dtout: %s" % (dtin, dtout) + arg = np.ones(1, dtype=dtin) + res = np.float_power(arg, arg) + assert_(res.dtype.name == np.dtype(dtout).name, msg) + + +class TestLog2(object): + def test_log2_values(self): + x = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] + y = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + for dt in ['f', 'd', 'g']: + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt) + assert_almost_equal(np.log2(xf), yf) + + def test_log2_ints(self): + # a good log2 implementation should provide this, + # might fail on OS with bad libm + for i in range(1, 65): + v = np.log2(2.**i) + assert_equal(v, float(i), err_msg='at exponent %d' % i) + + def test_log2_special(self): + assert_equal(np.log2(1.), 0.) + assert_equal(np.log2(np.inf), np.inf) + assert_(np.isnan(np.log2(np.nan))) + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(np.isnan(np.log2(-1.))) + assert_(np.isnan(np.log2(-np.inf))) + assert_equal(np.log2(0.), -np.inf) + assert_(w[0].category is RuntimeWarning) + assert_(w[1].category is RuntimeWarning) + assert_(w[2].category is RuntimeWarning) + + +class TestExp2(object): + def test_exp2_values(self): + x = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] + y = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + for dt in ['f', 'd', 'g']: + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt) + assert_almost_equal(np.exp2(yf), xf) + + +class TestLogAddExp2(_FilterInvalids): + # Need test for intermediate precisions + def test_logaddexp2_values(self): + x = [1, 2, 3, 4, 5] + y = [5, 4, 3, 2, 1] + z = [6, 6, 6, 6, 6] + for dt, dec_ in zip(['f', 'd', 'g'], [6, 15, 15]): + xf = np.log2(np.array(x, dtype=dt)) + yf = np.log2(np.array(y, dtype=dt)) + zf = np.log2(np.array(z, dtype=dt)) + assert_almost_equal(np.logaddexp2(xf, yf), zf, decimal=dec_) + + def test_logaddexp2_range(self): + x = [1000000, -1000000, 1000200, -1000200] + y = [1000200, -1000200, 1000000, -1000000] + z = [1000200, -1000000, 1000200, -1000000] + for dt in ['f', 'd', 'g']: + logxf = np.array(x, dtype=dt) + logyf = np.array(y, dtype=dt) + logzf = np.array(z, dtype=dt) + assert_almost_equal(np.logaddexp2(logxf, logyf), logzf) + + def test_inf(self): + inf = np.inf + x = [inf, -inf, inf, -inf, inf, 1, -inf, 1] + y = [inf, inf, -inf, -inf, 1, inf, 1, -inf] + z = [inf, inf, inf, -inf, inf, inf, 1, 1] + with np.errstate(invalid='raise'): + for dt in ['f', 'd', 'g']: + logxf = np.array(x, dtype=dt) + logyf = np.array(y, dtype=dt) + logzf = np.array(z, dtype=dt) + assert_equal(np.logaddexp2(logxf, logyf), logzf) + + def test_nan(self): + assert_(np.isnan(np.logaddexp2(np.nan, np.inf))) + assert_(np.isnan(np.logaddexp2(np.inf, np.nan))) + assert_(np.isnan(np.logaddexp2(np.nan, 0))) + assert_(np.isnan(np.logaddexp2(0, np.nan))) + assert_(np.isnan(np.logaddexp2(np.nan, np.nan))) + + +class TestLog(object): + def test_log_values(self): + x = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] + y = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + for dt in ['f', 'd', 'g']: + log2_ = 0.69314718055994530943 + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt)*log2_ + assert_almost_equal(np.log(xf), yf) + + +class TestExp(object): + def test_exp_values(self): + x = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] + y = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + for dt in ['f', 'd', 'g']: + log2_ = 0.69314718055994530943 + xf = np.array(x, dtype=dt) + yf = np.array(y, dtype=dt)*log2_ + assert_almost_equal(np.exp(yf), xf) + + +class TestLogAddExp(_FilterInvalids): + def test_logaddexp_values(self): + x = [1, 2, 3, 4, 5] + y = [5, 4, 3, 2, 1] + z = [6, 6, 6, 6, 6] + for dt, dec_ in zip(['f', 'd', 'g'], [6, 15, 15]): + xf = np.log(np.array(x, dtype=dt)) + yf = np.log(np.array(y, dtype=dt)) + zf = np.log(np.array(z, dtype=dt)) + assert_almost_equal(np.logaddexp(xf, yf), zf, decimal=dec_) + + def test_logaddexp_range(self): + x = [1000000, -1000000, 1000200, -1000200] + y = [1000200, -1000200, 1000000, -1000000] + z = [1000200, -1000000, 1000200, -1000000] + for dt in ['f', 'd', 'g']: + logxf = np.array(x, dtype=dt) + logyf = np.array(y, dtype=dt) + logzf = np.array(z, dtype=dt) + assert_almost_equal(np.logaddexp(logxf, logyf), logzf) + + def test_inf(self): + inf = np.inf + x = [inf, -inf, inf, -inf, inf, 1, -inf, 1] + y = [inf, inf, -inf, -inf, 1, inf, 1, -inf] + z = [inf, inf, inf, -inf, inf, inf, 1, 1] + with np.errstate(invalid='raise'): + for dt in ['f', 'd', 'g']: + logxf = np.array(x, dtype=dt) + logyf = np.array(y, dtype=dt) + logzf = np.array(z, dtype=dt) + assert_equal(np.logaddexp(logxf, logyf), logzf) + + def test_nan(self): + assert_(np.isnan(np.logaddexp(np.nan, np.inf))) + assert_(np.isnan(np.logaddexp(np.inf, np.nan))) + assert_(np.isnan(np.logaddexp(np.nan, 0))) + assert_(np.isnan(np.logaddexp(0, np.nan))) + assert_(np.isnan(np.logaddexp(np.nan, np.nan))) + + def test_reduce(self): + assert_equal(np.logaddexp.identity, -np.inf) + assert_equal(np.logaddexp.reduce([]), -np.inf) + + +class TestLog1p(object): + def test_log1p(self): + assert_almost_equal(ncu.log1p(0.2), ncu.log(1.2)) + assert_almost_equal(ncu.log1p(1e-6), ncu.log(1+1e-6)) + + def test_special(self): + with np.errstate(invalid="ignore", divide="ignore"): + assert_equal(ncu.log1p(np.nan), np.nan) + assert_equal(ncu.log1p(np.inf), np.inf) + assert_equal(ncu.log1p(-1.), -np.inf) + assert_equal(ncu.log1p(-2.), np.nan) + assert_equal(ncu.log1p(-np.inf), np.nan) + + +class TestExpm1(object): + def test_expm1(self): + assert_almost_equal(ncu.expm1(0.2), ncu.exp(0.2)-1) + assert_almost_equal(ncu.expm1(1e-6), ncu.exp(1e-6)-1) + + def test_special(self): + assert_equal(ncu.expm1(np.inf), np.inf) + assert_equal(ncu.expm1(0.), 0.) + assert_equal(ncu.expm1(-0.), -0.) + assert_equal(ncu.expm1(np.inf), np.inf) + assert_equal(ncu.expm1(-np.inf), -1.) + + +class TestHypot(object): + def test_simple(self): + assert_almost_equal(ncu.hypot(1, 1), ncu.sqrt(2)) + assert_almost_equal(ncu.hypot(0, 0), 0) + + def test_reduce(self): + assert_almost_equal(ncu.hypot.reduce([3.0, 4.0]), 5.0) + assert_almost_equal(ncu.hypot.reduce([3.0, 4.0, 0]), 5.0) + assert_almost_equal(ncu.hypot.reduce([9.0, 12.0, 20.0]), 25.0) + assert_equal(ncu.hypot.reduce([]), 0.0) + + +def assert_hypot_isnan(x, y): + with np.errstate(invalid='ignore'): + assert_(np.isnan(ncu.hypot(x, y)), + "hypot(%s, %s) is %s, not nan" % (x, y, ncu.hypot(x, y))) + + +def assert_hypot_isinf(x, y): + with np.errstate(invalid='ignore'): + assert_(np.isinf(ncu.hypot(x, y)), + "hypot(%s, %s) is %s, not inf" % (x, y, ncu.hypot(x, y))) + + +class TestHypotSpecialValues(object): + def test_nan_outputs(self): + assert_hypot_isnan(np.nan, np.nan) + assert_hypot_isnan(np.nan, 1) + + def test_nan_outputs2(self): + assert_hypot_isinf(np.nan, np.inf) + assert_hypot_isinf(np.inf, np.nan) + assert_hypot_isinf(np.inf, 0) + assert_hypot_isinf(0, np.inf) + assert_hypot_isinf(np.inf, np.inf) + assert_hypot_isinf(np.inf, 23.0) + + def test_no_fpe(self): + assert_no_warnings(ncu.hypot, np.inf, 0) + + +def assert_arctan2_isnan(x, y): + assert_(np.isnan(ncu.arctan2(x, y)), "arctan(%s, %s) is %s, not nan" % (x, y, ncu.arctan2(x, y))) + + +def assert_arctan2_ispinf(x, y): + assert_((np.isinf(ncu.arctan2(x, y)) and ncu.arctan2(x, y) > 0), "arctan(%s, %s) is %s, not +inf" % (x, y, ncu.arctan2(x, y))) + + +def assert_arctan2_isninf(x, y): + assert_((np.isinf(ncu.arctan2(x, y)) and ncu.arctan2(x, y) < 0), "arctan(%s, %s) is %s, not -inf" % (x, y, ncu.arctan2(x, y))) + + +def assert_arctan2_ispzero(x, y): + assert_((ncu.arctan2(x, y) == 0 and not np.signbit(ncu.arctan2(x, y))), "arctan(%s, %s) is %s, not +0" % (x, y, ncu.arctan2(x, y))) + + +def assert_arctan2_isnzero(x, y): + assert_((ncu.arctan2(x, y) == 0 and np.signbit(ncu.arctan2(x, y))), "arctan(%s, %s) is %s, not -0" % (x, y, ncu.arctan2(x, y))) + + +class TestArctan2SpecialValues(object): + def test_one_one(self): + # atan2(1, 1) returns pi/4. + assert_almost_equal(ncu.arctan2(1, 1), 0.25 * np.pi) + assert_almost_equal(ncu.arctan2(-1, 1), -0.25 * np.pi) + assert_almost_equal(ncu.arctan2(1, -1), 0.75 * np.pi) + + def test_zero_nzero(self): + # atan2(+-0, -0) returns +-pi. + assert_almost_equal(ncu.arctan2(np.PZERO, np.NZERO), np.pi) + assert_almost_equal(ncu.arctan2(np.NZERO, np.NZERO), -np.pi) + + def test_zero_pzero(self): + # atan2(+-0, +0) returns +-0. + assert_arctan2_ispzero(np.PZERO, np.PZERO) + assert_arctan2_isnzero(np.NZERO, np.PZERO) + + def test_zero_negative(self): + # atan2(+-0, x) returns +-pi for x < 0. + assert_almost_equal(ncu.arctan2(np.PZERO, -1), np.pi) + assert_almost_equal(ncu.arctan2(np.NZERO, -1), -np.pi) + + def test_zero_positive(self): + # atan2(+-0, x) returns +-0 for x > 0. + assert_arctan2_ispzero(np.PZERO, 1) + assert_arctan2_isnzero(np.NZERO, 1) + + def test_positive_zero(self): + # atan2(y, +-0) returns +pi/2 for y > 0. + assert_almost_equal(ncu.arctan2(1, np.PZERO), 0.5 * np.pi) + assert_almost_equal(ncu.arctan2(1, np.NZERO), 0.5 * np.pi) + + def test_negative_zero(self): + # atan2(y, +-0) returns -pi/2 for y < 0. + assert_almost_equal(ncu.arctan2(-1, np.PZERO), -0.5 * np.pi) + assert_almost_equal(ncu.arctan2(-1, np.NZERO), -0.5 * np.pi) + + def test_any_ninf(self): + # atan2(+-y, -infinity) returns +-pi for finite y > 0. + assert_almost_equal(ncu.arctan2(1, np.NINF), np.pi) + assert_almost_equal(ncu.arctan2(-1, np.NINF), -np.pi) + + def test_any_pinf(self): + # atan2(+-y, +infinity) returns +-0 for finite y > 0. + assert_arctan2_ispzero(1, np.inf) + assert_arctan2_isnzero(-1, np.inf) + + def test_inf_any(self): + # atan2(+-infinity, x) returns +-pi/2 for finite x. + assert_almost_equal(ncu.arctan2( np.inf, 1), 0.5 * np.pi) + assert_almost_equal(ncu.arctan2(-np.inf, 1), -0.5 * np.pi) + + def test_inf_ninf(self): + # atan2(+-infinity, -infinity) returns +-3*pi/4. + assert_almost_equal(ncu.arctan2( np.inf, -np.inf), 0.75 * np.pi) + assert_almost_equal(ncu.arctan2(-np.inf, -np.inf), -0.75 * np.pi) + + def test_inf_pinf(self): + # atan2(+-infinity, +infinity) returns +-pi/4. + assert_almost_equal(ncu.arctan2( np.inf, np.inf), 0.25 * np.pi) + assert_almost_equal(ncu.arctan2(-np.inf, np.inf), -0.25 * np.pi) + + def test_nan_any(self): + # atan2(nan, x) returns nan for any x, including inf + assert_arctan2_isnan(np.nan, np.inf) + assert_arctan2_isnan(np.inf, np.nan) + assert_arctan2_isnan(np.nan, np.nan) + + +class TestLdexp(object): + def _check_ldexp(self, tp): + assert_almost_equal(ncu.ldexp(np.array(2., np.float32), + np.array(3, tp)), 16.) + assert_almost_equal(ncu.ldexp(np.array(2., np.float64), + np.array(3, tp)), 16.) + assert_almost_equal(ncu.ldexp(np.array(2., np.longdouble), + np.array(3, tp)), 16.) + + def test_ldexp(self): + # The default Python int type should work + assert_almost_equal(ncu.ldexp(2., 3), 16.) + # The following int types should all be accepted + self._check_ldexp(np.int8) + self._check_ldexp(np.int16) + self._check_ldexp(np.int32) + self._check_ldexp('i') + self._check_ldexp('l') + + def test_ldexp_overflow(self): + # silence warning emitted on overflow + with np.errstate(over="ignore"): + imax = np.iinfo(np.dtype('l')).max + imin = np.iinfo(np.dtype('l')).min + assert_equal(ncu.ldexp(2., imax), np.inf) + assert_equal(ncu.ldexp(2., imin), 0) + + +class TestMaximum(_FilterInvalids): + def test_reduce(self): + dflt = np.typecodes['AllFloat'] + dint = np.typecodes['AllInteger'] + seq1 = np.arange(11) + seq2 = seq1[::-1] + func = np.maximum.reduce + for dt in dint: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 10) + assert_equal(func(tmp2), 10) + for dt in dflt: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 10) + assert_equal(func(tmp2), 10) + tmp1[::2] = np.nan + tmp2[::2] = np.nan + assert_equal(func(tmp1), np.nan) + assert_equal(func(tmp2), np.nan) + + def test_reduce_complex(self): + assert_equal(np.maximum.reduce([1, 2j]), 1) + assert_equal(np.maximum.reduce([1+3j, 2j]), 1+3j) + + def test_float_nans(self): + nan = np.nan + arg1 = np.array([0, nan, nan]) + arg2 = np.array([nan, 0, nan]) + out = np.array([nan, nan, nan]) + assert_equal(np.maximum(arg1, arg2), out) + + def test_object_nans(self): + # Multiple checks to give this a chance to + # fail if cmp is used instead of rich compare. + # Failure cannot be guaranteed. + for i in range(1): + x = np.array(float('nan'), object) + y = 1.0 + z = np.array(float('nan'), object) + assert_(np.maximum(x, y) == 1.0) + assert_(np.maximum(z, y) == 1.0) + + def test_complex_nans(self): + nan = np.nan + for cnan in [complex(nan, 0), complex(0, nan), complex(nan, nan)]: + arg1 = np.array([0, cnan, cnan], dtype=complex) + arg2 = np.array([cnan, 0, cnan], dtype=complex) + out = np.array([nan, nan, nan], dtype=complex) + assert_equal(np.maximum(arg1, arg2), out) + + def test_object_array(self): + arg1 = np.arange(5, dtype=object) + arg2 = arg1 + 1 + assert_equal(np.maximum(arg1, arg2), arg2) + + +class TestMinimum(_FilterInvalids): + def test_reduce(self): + dflt = np.typecodes['AllFloat'] + dint = np.typecodes['AllInteger'] + seq1 = np.arange(11) + seq2 = seq1[::-1] + func = np.minimum.reduce + for dt in dint: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 0) + assert_equal(func(tmp2), 0) + for dt in dflt: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 0) + assert_equal(func(tmp2), 0) + tmp1[::2] = np.nan + tmp2[::2] = np.nan + assert_equal(func(tmp1), np.nan) + assert_equal(func(tmp2), np.nan) + + def test_reduce_complex(self): + assert_equal(np.minimum.reduce([1, 2j]), 2j) + assert_equal(np.minimum.reduce([1+3j, 2j]), 2j) + + def test_float_nans(self): + nan = np.nan + arg1 = np.array([0, nan, nan]) + arg2 = np.array([nan, 0, nan]) + out = np.array([nan, nan, nan]) + assert_equal(np.minimum(arg1, arg2), out) + + def test_object_nans(self): + # Multiple checks to give this a chance to + # fail if cmp is used instead of rich compare. + # Failure cannot be guaranteed. + for i in range(1): + x = np.array(float('nan'), object) + y = 1.0 + z = np.array(float('nan'), object) + assert_(np.minimum(x, y) == 1.0) + assert_(np.minimum(z, y) == 1.0) + + def test_complex_nans(self): + nan = np.nan + for cnan in [complex(nan, 0), complex(0, nan), complex(nan, nan)]: + arg1 = np.array([0, cnan, cnan], dtype=complex) + arg2 = np.array([cnan, 0, cnan], dtype=complex) + out = np.array([nan, nan, nan], dtype=complex) + assert_equal(np.minimum(arg1, arg2), out) + + def test_object_array(self): + arg1 = np.arange(5, dtype=object) + arg2 = arg1 + 1 + assert_equal(np.minimum(arg1, arg2), arg1) + + +class TestFmax(_FilterInvalids): + def test_reduce(self): + dflt = np.typecodes['AllFloat'] + dint = np.typecodes['AllInteger'] + seq1 = np.arange(11) + seq2 = seq1[::-1] + func = np.fmax.reduce + for dt in dint: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 10) + assert_equal(func(tmp2), 10) + for dt in dflt: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 10) + assert_equal(func(tmp2), 10) + tmp1[::2] = np.nan + tmp2[::2] = np.nan + assert_equal(func(tmp1), 9) + assert_equal(func(tmp2), 9) + + def test_reduce_complex(self): + assert_equal(np.fmax.reduce([1, 2j]), 1) + assert_equal(np.fmax.reduce([1+3j, 2j]), 1+3j) + + def test_float_nans(self): + nan = np.nan + arg1 = np.array([0, nan, nan]) + arg2 = np.array([nan, 0, nan]) + out = np.array([0, 0, nan]) + assert_equal(np.fmax(arg1, arg2), out) + + def test_complex_nans(self): + nan = np.nan + for cnan in [complex(nan, 0), complex(0, nan), complex(nan, nan)]: + arg1 = np.array([0, cnan, cnan], dtype=complex) + arg2 = np.array([cnan, 0, cnan], dtype=complex) + out = np.array([0, 0, nan], dtype=complex) + assert_equal(np.fmax(arg1, arg2), out) + + +class TestFmin(_FilterInvalids): + def test_reduce(self): + dflt = np.typecodes['AllFloat'] + dint = np.typecodes['AllInteger'] + seq1 = np.arange(11) + seq2 = seq1[::-1] + func = np.fmin.reduce + for dt in dint: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 0) + assert_equal(func(tmp2), 0) + for dt in dflt: + tmp1 = seq1.astype(dt) + tmp2 = seq2.astype(dt) + assert_equal(func(tmp1), 0) + assert_equal(func(tmp2), 0) + tmp1[::2] = np.nan + tmp2[::2] = np.nan + assert_equal(func(tmp1), 1) + assert_equal(func(tmp2), 1) + + def test_reduce_complex(self): + assert_equal(np.fmin.reduce([1, 2j]), 2j) + assert_equal(np.fmin.reduce([1+3j, 2j]), 2j) + + def test_float_nans(self): + nan = np.nan + arg1 = np.array([0, nan, nan]) + arg2 = np.array([nan, 0, nan]) + out = np.array([0, 0, nan]) + assert_equal(np.fmin(arg1, arg2), out) + + def test_complex_nans(self): + nan = np.nan + for cnan in [complex(nan, 0), complex(0, nan), complex(nan, nan)]: + arg1 = np.array([0, cnan, cnan], dtype=complex) + arg2 = np.array([cnan, 0, cnan], dtype=complex) + out = np.array([0, 0, nan], dtype=complex) + assert_equal(np.fmin(arg1, arg2), out) + + +class TestBool(object): + def test_exceptions(self): + a = np.ones(1, dtype=np.bool_) + assert_raises(TypeError, np.negative, a) + assert_raises(TypeError, np.positive, a) + assert_raises(TypeError, np.subtract, a, a) + + def test_truth_table_logical(self): + # 2, 3 and 4 serves as true values + input1 = [0, 0, 3, 2] + input2 = [0, 4, 0, 2] + + typecodes = (np.typecodes['AllFloat'] + + np.typecodes['AllInteger'] + + '?') # boolean + for dtype in map(np.dtype, typecodes): + arg1 = np.asarray(input1, dtype=dtype) + arg2 = np.asarray(input2, dtype=dtype) + + # OR + out = [False, True, True, True] + for func in (np.logical_or, np.maximum): + assert_equal(func(arg1, arg2).astype(bool), out) + # AND + out = [False, False, False, True] + for func in (np.logical_and, np.minimum): + assert_equal(func(arg1, arg2).astype(bool), out) + # XOR + out = [False, True, True, False] + for func in (np.logical_xor, np.not_equal): + assert_equal(func(arg1, arg2).astype(bool), out) + + def test_truth_table_bitwise(self): + arg1 = [False, False, True, True] + arg2 = [False, True, False, True] + + out = [False, True, True, True] + assert_equal(np.bitwise_or(arg1, arg2), out) + + out = [False, False, False, True] + assert_equal(np.bitwise_and(arg1, arg2), out) + + out = [False, True, True, False] + assert_equal(np.bitwise_xor(arg1, arg2), out) + + def test_reduce(self): + none = np.array([0, 0, 0, 0], bool) + some = np.array([1, 0, 1, 1], bool) + every = np.array([1, 1, 1, 1], bool) + empty = np.array([], bool) + + arrs = [none, some, every, empty] + + for arr in arrs: + assert_equal(np.logical_and.reduce(arr), all(arr)) + + for arr in arrs: + assert_equal(np.logical_or.reduce(arr), any(arr)) + + for arr in arrs: + assert_equal(np.logical_xor.reduce(arr), arr.sum() % 2 == 1) + + +class TestBitwiseUFuncs(object): + + bitwise_types = [np.dtype(c) for c in '?' + 'bBhHiIlLqQ' + 'O'] + + def test_values(self): + for dt in self.bitwise_types: + zeros = np.array([0], dtype=dt) + ones = np.array([-1], dtype=dt) + msg = "dt = '%s'" % dt.char + + assert_equal(np.bitwise_not(zeros), ones, err_msg=msg) + assert_equal(np.bitwise_not(ones), zeros, err_msg=msg) + + assert_equal(np.bitwise_or(zeros, zeros), zeros, err_msg=msg) + assert_equal(np.bitwise_or(zeros, ones), ones, err_msg=msg) + assert_equal(np.bitwise_or(ones, zeros), ones, err_msg=msg) + assert_equal(np.bitwise_or(ones, ones), ones, err_msg=msg) + + assert_equal(np.bitwise_xor(zeros, zeros), zeros, err_msg=msg) + assert_equal(np.bitwise_xor(zeros, ones), ones, err_msg=msg) + assert_equal(np.bitwise_xor(ones, zeros), ones, err_msg=msg) + assert_equal(np.bitwise_xor(ones, ones), zeros, err_msg=msg) + + assert_equal(np.bitwise_and(zeros, zeros), zeros, err_msg=msg) + assert_equal(np.bitwise_and(zeros, ones), zeros, err_msg=msg) + assert_equal(np.bitwise_and(ones, zeros), zeros, err_msg=msg) + assert_equal(np.bitwise_and(ones, ones), ones, err_msg=msg) + + def test_types(self): + for dt in self.bitwise_types: + zeros = np.array([0], dtype=dt) + ones = np.array([-1], dtype=dt) + msg = "dt = '%s'" % dt.char + + assert_(np.bitwise_not(zeros).dtype == dt, msg) + assert_(np.bitwise_or(zeros, zeros).dtype == dt, msg) + assert_(np.bitwise_xor(zeros, zeros).dtype == dt, msg) + assert_(np.bitwise_and(zeros, zeros).dtype == dt, msg) + + def test_identity(self): + assert_(np.bitwise_or.identity == 0, 'bitwise_or') + assert_(np.bitwise_xor.identity == 0, 'bitwise_xor') + assert_(np.bitwise_and.identity == -1, 'bitwise_and') + + def test_reduction(self): + binary_funcs = (np.bitwise_or, np.bitwise_xor, np.bitwise_and) + + for dt in self.bitwise_types: + zeros = np.array([0], dtype=dt) + ones = np.array([-1], dtype=dt) + for f in binary_funcs: + msg = "dt: '%s', f: '%s'" % (dt, f) + assert_equal(f.reduce(zeros), zeros, err_msg=msg) + assert_equal(f.reduce(ones), ones, err_msg=msg) + + # Test empty reduction, no object dtype + for dt in self.bitwise_types[:-1]: + # No object array types + empty = np.array([], dtype=dt) + for f in binary_funcs: + msg = "dt: '%s', f: '%s'" % (dt, f) + tgt = np.array(f.identity, dtype=dt) + res = f.reduce(empty) + assert_equal(res, tgt, err_msg=msg) + assert_(res.dtype == tgt.dtype, msg) + + # Empty object arrays use the identity. Note that the types may + # differ, the actual type used is determined by the assign_identity + # function and is not the same as the type returned by the identity + # method. + for f in binary_funcs: + msg = "dt: '%s'" % (f,) + empty = np.array([], dtype=object) + tgt = f.identity + res = f.reduce(empty) + assert_equal(res, tgt, err_msg=msg) + + # Non-empty object arrays do not use the identity + for f in binary_funcs: + msg = "dt: '%s'" % (f,) + btype = np.array([True], dtype=object) + assert_(type(f.reduce(btype)) is bool, msg) + + +class TestInt(object): + def test_logical_not(self): + x = np.ones(10, dtype=np.int16) + o = np.ones(10 * 2, dtype=bool) + tgt = o.copy() + tgt[::2] = False + os = o[::2] + assert_array_equal(np.logical_not(x, out=os), False) + assert_array_equal(o, tgt) + + +class TestFloatingPoint(object): + def test_floating_point(self): + assert_equal(ncu.FLOATING_POINT_SUPPORT, 1) + + +class TestDegrees(object): + def test_degrees(self): + assert_almost_equal(ncu.degrees(np.pi), 180.0) + assert_almost_equal(ncu.degrees(-0.5*np.pi), -90.0) + + +class TestRadians(object): + def test_radians(self): + assert_almost_equal(ncu.radians(180.0), np.pi) + assert_almost_equal(ncu.radians(-90.0), -0.5*np.pi) + + +class TestHeavside(object): + def test_heaviside(self): + x = np.array([[-30.0, -0.1, 0.0, 0.2], [7.5, np.nan, np.inf, -np.inf]]) + expectedhalf = np.array([[0.0, 0.0, 0.5, 1.0], [1.0, np.nan, 1.0, 0.0]]) + expected1 = expectedhalf.copy() + expected1[0, 2] = 1 + + h = ncu.heaviside(x, 0.5) + assert_equal(h, expectedhalf) + + h = ncu.heaviside(x, 1.0) + assert_equal(h, expected1) + + x = x.astype(np.float32) + + h = ncu.heaviside(x, np.float32(0.5)) + assert_equal(h, expectedhalf.astype(np.float32)) + + h = ncu.heaviside(x, np.float32(1.0)) + assert_equal(h, expected1.astype(np.float32)) + + +class TestSign(object): + def test_sign(self): + a = np.array([np.inf, -np.inf, np.nan, 0.0, 3.0, -3.0]) + out = np.zeros(a.shape) + tgt = np.array([1., -1., np.nan, 0.0, 1.0, -1.0]) + + with np.errstate(invalid='ignore'): + res = ncu.sign(a) + assert_equal(res, tgt) + res = ncu.sign(a, out) + assert_equal(res, tgt) + assert_equal(out, tgt) + + def test_sign_dtype_object(self): + # In reference to github issue #6229 + + foo = np.array([-.1, 0, .1]) + a = np.sign(foo.astype(object)) + b = np.sign(foo) + + assert_array_equal(a, b) + + def test_sign_dtype_nan_object(self): + # In reference to github issue #6229 + def test_nan(): + foo = np.array([np.nan]) + # FIXME: a not used + a = np.sign(foo.astype(object)) + + assert_raises(TypeError, test_nan) + +class TestMinMax(object): + def test_minmax_blocked(self): + # simd tests on max/min, test all alignments, slow but important + # for 2 * vz + 2 * (vs - 1) + 1 (unrolled once) + for dt, sz in [(np.float32, 15), (np.float64, 7)]: + for out, inp, msg in _gen_alignment_data(dtype=dt, type='unary', + max_size=sz): + for i in range(inp.size): + inp[:] = np.arange(inp.size, dtype=dt) + inp[i] = np.nan + emsg = lambda: '%r\n%s' % (inp, msg) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, + "invalid value encountered in reduce") + assert_(np.isnan(inp.max()), msg=emsg) + assert_(np.isnan(inp.min()), msg=emsg) + + inp[i] = 1e10 + assert_equal(inp.max(), 1e10, err_msg=msg) + inp[i] = -1e10 + assert_equal(inp.min(), -1e10, err_msg=msg) + + def test_lower_align(self): + # check data that is not aligned to element size + # i.e doubles are aligned to 4 bytes on i386 + d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + assert_equal(d.max(), d[0]) + assert_equal(d.min(), d[0]) + + def test_reduce_reorder(self): + # gh 10370, 11029 Some compilers reorder the call to npy_getfloatstatus + # and put it before the call to an intrisic function that causes + # invalid status to be set. Also make sure warnings are not emitted + for n in (2, 4, 8, 16, 32): + for dt in (np.float32, np.float16, np.complex64): + for r in np.diagflat(np.array([np.nan] * n, dtype=dt)): + assert_equal(np.min(r), np.nan) + + def test_minimize_no_warns(self): + a = np.minimum(np.nan, 1) + assert_equal(a, np.nan) + + +class TestAbsoluteNegative(object): + def test_abs_neg_blocked(self): + # simd tests on abs, test all alignments for vz + 2 * (vs - 1) + 1 + for dt, sz in [(np.float32, 11), (np.float64, 5)]: + for out, inp, msg in _gen_alignment_data(dtype=dt, type='unary', + max_size=sz): + tgt = [ncu.absolute(i) for i in inp] + np.absolute(inp, out=out) + assert_equal(out, tgt, err_msg=msg) + assert_((out >= 0).all()) + + tgt = [-1*(i) for i in inp] + np.negative(inp, out=out) + assert_equal(out, tgt, err_msg=msg) + + for v in [np.nan, -np.inf, np.inf]: + for i in range(inp.size): + d = np.arange(inp.size, dtype=dt) + inp[:] = -d + inp[i] = v + d[i] = -v if v == -np.inf else v + assert_array_equal(np.abs(inp), d, err_msg=msg) + np.abs(inp, out=out) + assert_array_equal(out, d, err_msg=msg) + + assert_array_equal(-inp, -1*inp, err_msg=msg) + d = -1 * inp + np.negative(inp, out=out) + assert_array_equal(out, d, err_msg=msg) + + def test_lower_align(self): + # check data that is not aligned to element size + # i.e doubles are aligned to 4 bytes on i386 + d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + assert_equal(np.abs(d), d) + assert_equal(np.negative(d), -d) + np.negative(d, out=d) + np.negative(np.ones_like(d), out=d) + np.abs(d, out=d) + np.abs(np.ones_like(d), out=d) + + +class TestPositive(object): + def test_valid(self): + valid_dtypes = [int, float, complex, object] + for dtype in valid_dtypes: + x = np.arange(5, dtype=dtype) + result = np.positive(x) + assert_equal(x, result, err_msg=str(dtype)) + + def test_invalid(self): + with assert_raises(TypeError): + np.positive(True) + with assert_raises(TypeError): + np.positive(np.datetime64('2000-01-01')) + with assert_raises(TypeError): + np.positive(np.array(['foo'], dtype=str)) + with assert_raises(TypeError): + np.positive(np.array(['bar'], dtype=object)) + + +class TestSpecialMethods(object): + def test_wrap(self): + + class with_wrap(object): + def __array__(self): + return np.zeros(1) + + def __array_wrap__(self, arr, context): + r = with_wrap() + r.arr = arr + r.context = context + return r + + a = with_wrap() + x = ncu.minimum(a, a) + assert_equal(x.arr, np.zeros(1)) + func, args, i = x.context + assert_(func is ncu.minimum) + assert_equal(len(args), 2) + assert_equal(args[0], a) + assert_equal(args[1], a) + assert_equal(i, 0) + + def test_wrap_and_prepare_out(self): + # Calling convention for out should not affect how special methods are + # called + + class StoreArrayPrepareWrap(np.ndarray): + _wrap_args = None + _prepare_args = None + def __new__(cls): + return np.empty(()).view(cls) + def __array_wrap__(self, obj, context): + self._wrap_args = context[1] + return obj + def __array_prepare__(self, obj, context): + self._prepare_args = context[1] + return obj + @property + def args(self): + # We need to ensure these are fetched at the same time, before + # any other ufuncs are calld by the assertions + return (self._prepare_args, self._wrap_args) + def __repr__(self): + return "a" # for short test output + + def do_test(f_call, f_expected): + a = StoreArrayPrepareWrap() + f_call(a) + p, w = a.args + expected = f_expected(a) + try: + assert_equal(p, expected) + assert_equal(w, expected) + except AssertionError as e: + # assert_equal produces truly useless error messages + raise AssertionError("\n".join([ + "Bad arguments passed in ufunc call", + " expected: {}".format(expected), + " __array_prepare__ got: {}".format(p), + " __array_wrap__ got: {}".format(w) + ])) + + # method not on the out argument + do_test(lambda a: np.add(a, 0), lambda a: (a, 0)) + do_test(lambda a: np.add(a, 0, None), lambda a: (a, 0)) + do_test(lambda a: np.add(a, 0, out=None), lambda a: (a, 0)) + do_test(lambda a: np.add(a, 0, out=(None,)), lambda a: (a, 0)) + + # method on the out argument + do_test(lambda a: np.add(0, 0, a), lambda a: (0, 0, a)) + do_test(lambda a: np.add(0, 0, out=a), lambda a: (0, 0, a)) + do_test(lambda a: np.add(0, 0, out=(a,)), lambda a: (0, 0, a)) + + def test_wrap_with_iterable(self): + # test fix for bug #1026: + + class with_wrap(np.ndarray): + __array_priority__ = 10 + + def __new__(cls): + return np.asarray(1).view(cls).copy() + + def __array_wrap__(self, arr, context): + return arr.view(type(self)) + + a = with_wrap() + x = ncu.multiply(a, (1, 2, 3)) + assert_(isinstance(x, with_wrap)) + assert_array_equal(x, np.array((1, 2, 3))) + + def test_priority_with_scalar(self): + # test fix for bug #826: + + class A(np.ndarray): + __array_priority__ = 10 + + def __new__(cls): + return np.asarray(1.0, 'float64').view(cls).copy() + + a = A() + x = np.float64(1)*a + assert_(isinstance(x, A)) + assert_array_equal(x, np.array(1)) + + def test_old_wrap(self): + + class with_wrap(object): + def __array__(self): + return np.zeros(1) + + def __array_wrap__(self, arr): + r = with_wrap() + r.arr = arr + return r + + a = with_wrap() + x = ncu.minimum(a, a) + assert_equal(x.arr, np.zeros(1)) + + def test_priority(self): + + class A(object): + def __array__(self): + return np.zeros(1) + + def __array_wrap__(self, arr, context): + r = type(self)() + r.arr = arr + r.context = context + return r + + class B(A): + __array_priority__ = 20. + + class C(A): + __array_priority__ = 40. + + x = np.zeros(1) + a = A() + b = B() + c = C() + f = ncu.minimum + assert_(type(f(x, x)) is np.ndarray) + assert_(type(f(x, a)) is A) + assert_(type(f(x, b)) is B) + assert_(type(f(x, c)) is C) + assert_(type(f(a, x)) is A) + assert_(type(f(b, x)) is B) + assert_(type(f(c, x)) is C) + + assert_(type(f(a, a)) is A) + assert_(type(f(a, b)) is B) + assert_(type(f(b, a)) is B) + assert_(type(f(b, b)) is B) + assert_(type(f(b, c)) is C) + assert_(type(f(c, b)) is C) + assert_(type(f(c, c)) is C) + + assert_(type(ncu.exp(a) is A)) + assert_(type(ncu.exp(b) is B)) + assert_(type(ncu.exp(c) is C)) + + def test_failing_wrap(self): + + class A(object): + def __array__(self): + return np.zeros(2) + + def __array_wrap__(self, arr, context): + raise RuntimeError + + a = A() + assert_raises(RuntimeError, ncu.maximum, a, a) + assert_raises(RuntimeError, ncu.maximum.reduce, a) + + def test_failing_out_wrap(self): + + singleton = np.array([1.0]) + + class Ok(np.ndarray): + def __array_wrap__(self, obj): + return singleton + + class Bad(np.ndarray): + def __array_wrap__(self, obj): + raise RuntimeError + + ok = np.empty(1).view(Ok) + bad = np.empty(1).view(Bad) + + # double-free (segfault) of "ok" if "bad" raises an exception + for i in range(10): + assert_raises(RuntimeError, ncu.frexp, 1, ok, bad) + + def test_none_wrap(self): + # Tests that issue #8507 is resolved. Previously, this would segfault + + class A(object): + def __array__(self): + return np.zeros(1) + + def __array_wrap__(self, arr, context=None): + return None + + a = A() + assert_equal(ncu.maximum(a, a), None) + + def test_default_prepare(self): + + class with_wrap(object): + __array_priority__ = 10 + + def __array__(self): + return np.zeros(1) + + def __array_wrap__(self, arr, context): + return arr + + a = with_wrap() + x = ncu.minimum(a, a) + assert_equal(x, np.zeros(1)) + assert_equal(type(x), np.ndarray) + + def test_prepare(self): + + class with_prepare(np.ndarray): + __array_priority__ = 10 + + def __array_prepare__(self, arr, context): + # make sure we can return a new + return np.array(arr).view(type=with_prepare) + + a = np.array(1).view(type=with_prepare) + x = np.add(a, a) + assert_equal(x, np.array(2)) + assert_equal(type(x), with_prepare) + + def test_prepare_out(self): + + class with_prepare(np.ndarray): + __array_priority__ = 10 + + def __array_prepare__(self, arr, context): + return np.array(arr).view(type=with_prepare) + + a = np.array([1]).view(type=with_prepare) + x = np.add(a, a, a) + # Returned array is new, because of the strange + # __array_prepare__ above + assert_(not np.shares_memory(x, a)) + assert_equal(x, np.array([2])) + assert_equal(type(x), with_prepare) + + def test_failing_prepare(self): + + class A(object): + def __array__(self): + return np.zeros(1) + + def __array_prepare__(self, arr, context=None): + raise RuntimeError + + a = A() + assert_raises(RuntimeError, ncu.maximum, a, a) + + def test_array_with_context(self): + + class A(object): + def __array__(self, dtype=None, context=None): + func, args, i = context + self.func = func + self.args = args + self.i = i + return np.zeros(1) + + class B(object): + def __array__(self, dtype=None): + return np.zeros(1, dtype) + + class C(object): + def __array__(self): + return np.zeros(1) + + a = A() + ncu.maximum(np.zeros(1), a) + assert_(a.func is ncu.maximum) + assert_equal(a.args[0], 0) + assert_(a.args[1] is a) + assert_(a.i == 1) + assert_equal(ncu.maximum(a, B()), 0) + assert_equal(ncu.maximum(a, C()), 0) + + def test_ufunc_override(self): + # check override works even with instance with high priority. + class A(object): + def __array_ufunc__(self, func, method, *inputs, **kwargs): + return self, func, method, inputs, kwargs + + class MyNDArray(np.ndarray): + __array_priority__ = 100 + + a = A() + b = np.array([1]).view(MyNDArray) + res0 = np.multiply(a, b) + res1 = np.multiply(b, b, out=a) + + # self + assert_equal(res0[0], a) + assert_equal(res1[0], a) + assert_equal(res0[1], np.multiply) + assert_equal(res1[1], np.multiply) + assert_equal(res0[2], '__call__') + assert_equal(res1[2], '__call__') + assert_equal(res0[3], (a, b)) + assert_equal(res1[3], (b, b)) + assert_equal(res0[4], {}) + assert_equal(res1[4], {'out': (a,)}) + + def test_ufunc_override_mro(self): + + # Some multi arg functions for testing. + def tres_mul(a, b, c): + return a * b * c + + def quatro_mul(a, b, c, d): + return a * b * c * d + + # Make these into ufuncs. + three_mul_ufunc = np.frompyfunc(tres_mul, 3, 1) + four_mul_ufunc = np.frompyfunc(quatro_mul, 4, 1) + + class A(object): + def __array_ufunc__(self, func, method, *inputs, **kwargs): + return "A" + + class ASub(A): + def __array_ufunc__(self, func, method, *inputs, **kwargs): + return "ASub" + + class B(object): + def __array_ufunc__(self, func, method, *inputs, **kwargs): + return "B" + + class C(object): + def __init__(self): + self.count = 0 + + def __array_ufunc__(self, func, method, *inputs, **kwargs): + self.count += 1 + return NotImplemented + + class CSub(C): + def __array_ufunc__(self, func, method, *inputs, **kwargs): + self.count += 1 + return NotImplemented + + a = A() + a_sub = ASub() + b = B() + c = C() + + # Standard + res = np.multiply(a, a_sub) + assert_equal(res, "ASub") + res = np.multiply(a_sub, b) + assert_equal(res, "ASub") + + # With 1 NotImplemented + res = np.multiply(c, a) + assert_equal(res, "A") + assert_equal(c.count, 1) + # Check our counter works, so we can trust tests below. + res = np.multiply(c, a) + assert_equal(c.count, 2) + + # Both NotImplemented. + c = C() + c_sub = CSub() + assert_raises(TypeError, np.multiply, c, c_sub) + assert_equal(c.count, 1) + assert_equal(c_sub.count, 1) + c.count = c_sub.count = 0 + assert_raises(TypeError, np.multiply, c_sub, c) + assert_equal(c.count, 1) + assert_equal(c_sub.count, 1) + c.count = 0 + assert_raises(TypeError, np.multiply, c, c) + assert_equal(c.count, 1) + c.count = 0 + assert_raises(TypeError, np.multiply, 2, c) + assert_equal(c.count, 1) + + # Ternary testing. + assert_equal(three_mul_ufunc(a, 1, 2), "A") + assert_equal(three_mul_ufunc(1, a, 2), "A") + assert_equal(three_mul_ufunc(1, 2, a), "A") + + assert_equal(three_mul_ufunc(a, a, 6), "A") + assert_equal(three_mul_ufunc(a, 2, a), "A") + assert_equal(three_mul_ufunc(a, 2, b), "A") + assert_equal(three_mul_ufunc(a, 2, a_sub), "ASub") + assert_equal(three_mul_ufunc(a, a_sub, 3), "ASub") + c.count = 0 + assert_equal(three_mul_ufunc(c, a_sub, 3), "ASub") + assert_equal(c.count, 1) + c.count = 0 + assert_equal(three_mul_ufunc(1, a_sub, c), "ASub") + assert_equal(c.count, 0) + + c.count = 0 + assert_equal(three_mul_ufunc(a, b, c), "A") + assert_equal(c.count, 0) + c_sub.count = 0 + assert_equal(three_mul_ufunc(a, b, c_sub), "A") + assert_equal(c_sub.count, 0) + assert_equal(three_mul_ufunc(1, 2, b), "B") + + assert_raises(TypeError, three_mul_ufunc, 1, 2, c) + assert_raises(TypeError, three_mul_ufunc, c_sub, 2, c) + assert_raises(TypeError, three_mul_ufunc, c_sub, 2, 3) + + # Quaternary testing. + assert_equal(four_mul_ufunc(a, 1, 2, 3), "A") + assert_equal(four_mul_ufunc(1, a, 2, 3), "A") + assert_equal(four_mul_ufunc(1, 1, a, 3), "A") + assert_equal(four_mul_ufunc(1, 1, 2, a), "A") + + assert_equal(four_mul_ufunc(a, b, 2, 3), "A") + assert_equal(four_mul_ufunc(1, a, 2, b), "A") + assert_equal(four_mul_ufunc(b, 1, a, 3), "B") + assert_equal(four_mul_ufunc(a_sub, 1, 2, a), "ASub") + assert_equal(four_mul_ufunc(a, 1, 2, a_sub), "ASub") + + c = C() + c_sub = CSub() + assert_raises(TypeError, four_mul_ufunc, 1, 2, 3, c) + assert_equal(c.count, 1) + c.count = 0 + assert_raises(TypeError, four_mul_ufunc, 1, 2, c_sub, c) + assert_equal(c_sub.count, 1) + assert_equal(c.count, 1) + c2 = C() + c.count = c_sub.count = 0 + assert_raises(TypeError, four_mul_ufunc, 1, c, c_sub, c2) + assert_equal(c_sub.count, 1) + assert_equal(c.count, 1) + assert_equal(c2.count, 0) + c.count = c2.count = c_sub.count = 0 + assert_raises(TypeError, four_mul_ufunc, c2, c, c_sub, c) + assert_equal(c_sub.count, 1) + assert_equal(c.count, 0) + assert_equal(c2.count, 1) + + def test_ufunc_override_methods(self): + + class A(object): + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return self, ufunc, method, inputs, kwargs + + # __call__ + a = A() + res = np.multiply.__call__(1, a, foo='bar', answer=42) + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], '__call__') + assert_equal(res[3], (1, a)) + assert_equal(res[4], {'foo': 'bar', 'answer': 42}) + + # __call__, wrong args + assert_raises(TypeError, np.multiply, a) + assert_raises(TypeError, np.multiply, a, a, a, a) + assert_raises(TypeError, np.multiply, a, a, sig='a', signature='a') + assert_raises(TypeError, ncu_tests.inner1d, a, a, axis=0, axes=[0, 0]) + + # reduce, positional args + res = np.multiply.reduce(a, 'axis0', 'dtype0', 'out0', 'keep0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'reduce') + assert_equal(res[3], (a,)) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'keepdims': 'keep0', + 'axis': 'axis0'}) + + # reduce, kwargs + res = np.multiply.reduce(a, axis='axis0', dtype='dtype0', out='out0', + keepdims='keep0', initial='init0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'reduce') + assert_equal(res[3], (a,)) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'keepdims': 'keep0', + 'axis': 'axis0', + 'initial': 'init0'}) + + # reduce, output equal to None removed, but not other explicit ones, + # even if they are at their default value. + res = np.multiply.reduce(a, 0, None, None, False) + assert_equal(res[4], {'axis': 0, 'dtype': None, 'keepdims': False}) + res = np.multiply.reduce(a, out=None, axis=0, keepdims=True) + assert_equal(res[4], {'axis': 0, 'keepdims': True}) + res = np.multiply.reduce(a, None, out=(None,), dtype=None) + assert_equal(res[4], {'axis': None, 'dtype': None}) + res = np.multiply.reduce(a, 0, None, None, False, 2) + assert_equal(res[4], {'axis': 0, 'dtype': None, 'keepdims': False, 'initial': 2}) + # np._NoValue ignored for initial. + res = np.multiply.reduce(a, 0, None, None, False, np._NoValue) + assert_equal(res[4], {'axis': 0, 'dtype': None, 'keepdims': False}) + # None kept for initial. + res = np.multiply.reduce(a, 0, None, None, False, None) + assert_equal(res[4], {'axis': 0, 'dtype': None, 'keepdims': False, 'initial': None}) + + # reduce, wrong args + assert_raises(ValueError, np.multiply.reduce, a, out=()) + assert_raises(ValueError, np.multiply.reduce, a, out=('out0', 'out1')) + assert_raises(TypeError, np.multiply.reduce, a, 'axis0', axis='axis0') + + # accumulate, pos args + res = np.multiply.accumulate(a, 'axis0', 'dtype0', 'out0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'accumulate') + assert_equal(res[3], (a,)) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'axis': 'axis0'}) + + # accumulate, kwargs + res = np.multiply.accumulate(a, axis='axis0', dtype='dtype0', + out='out0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'accumulate') + assert_equal(res[3], (a,)) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'axis': 'axis0'}) + + # accumulate, output equal to None removed. + res = np.multiply.accumulate(a, 0, None, None) + assert_equal(res[4], {'axis': 0, 'dtype': None}) + res = np.multiply.accumulate(a, out=None, axis=0, dtype='dtype1') + assert_equal(res[4], {'axis': 0, 'dtype': 'dtype1'}) + res = np.multiply.accumulate(a, None, out=(None,), dtype=None) + assert_equal(res[4], {'axis': None, 'dtype': None}) + + # accumulate, wrong args + assert_raises(ValueError, np.multiply.accumulate, a, out=()) + assert_raises(ValueError, np.multiply.accumulate, a, + out=('out0', 'out1')) + assert_raises(TypeError, np.multiply.accumulate, a, + 'axis0', axis='axis0') + + # reduceat, pos args + res = np.multiply.reduceat(a, [4, 2], 'axis0', 'dtype0', 'out0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'reduceat') + assert_equal(res[3], (a, [4, 2])) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'axis': 'axis0'}) + + # reduceat, kwargs + res = np.multiply.reduceat(a, [4, 2], axis='axis0', dtype='dtype0', + out='out0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'reduceat') + assert_equal(res[3], (a, [4, 2])) + assert_equal(res[4], {'dtype':'dtype0', + 'out': ('out0',), + 'axis': 'axis0'}) + + # reduceat, output equal to None removed. + res = np.multiply.reduceat(a, [4, 2], 0, None, None) + assert_equal(res[4], {'axis': 0, 'dtype': None}) + res = np.multiply.reduceat(a, [4, 2], axis=None, out=None, dtype='dt') + assert_equal(res[4], {'axis': None, 'dtype': 'dt'}) + res = np.multiply.reduceat(a, [4, 2], None, None, out=(None,)) + assert_equal(res[4], {'axis': None, 'dtype': None}) + + # reduceat, wrong args + assert_raises(ValueError, np.multiply.reduce, a, [4, 2], out=()) + assert_raises(ValueError, np.multiply.reduce, a, [4, 2], + out=('out0', 'out1')) + assert_raises(TypeError, np.multiply.reduce, a, [4, 2], + 'axis0', axis='axis0') + + # outer + res = np.multiply.outer(a, 42) + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'outer') + assert_equal(res[3], (a, 42)) + assert_equal(res[4], {}) + + # outer, wrong args + assert_raises(TypeError, np.multiply.outer, a) + assert_raises(TypeError, np.multiply.outer, a, a, a, a) + assert_raises(TypeError, np.multiply.outer, a, a, sig='a', signature='a') + + # at + res = np.multiply.at(a, [4, 2], 'b0') + assert_equal(res[0], a) + assert_equal(res[1], np.multiply) + assert_equal(res[2], 'at') + assert_equal(res[3], (a, [4, 2], 'b0')) + + # at, wrong args + assert_raises(TypeError, np.multiply.at, a) + assert_raises(TypeError, np.multiply.at, a, a, a, a) + + def test_ufunc_override_out(self): + + class A(object): + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return kwargs + + class B(object): + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return kwargs + + a = A() + b = B() + res0 = np.multiply(a, b, 'out_arg') + res1 = np.multiply(a, b, out='out_arg') + res2 = np.multiply(2, b, 'out_arg') + res3 = np.multiply(3, b, out='out_arg') + res4 = np.multiply(a, 4, 'out_arg') + res5 = np.multiply(a, 5, out='out_arg') + + assert_equal(res0['out'][0], 'out_arg') + assert_equal(res1['out'][0], 'out_arg') + assert_equal(res2['out'][0], 'out_arg') + assert_equal(res3['out'][0], 'out_arg') + assert_equal(res4['out'][0], 'out_arg') + assert_equal(res5['out'][0], 'out_arg') + + # ufuncs with multiple output modf and frexp. + res6 = np.modf(a, 'out0', 'out1') + res7 = np.frexp(a, 'out0', 'out1') + assert_equal(res6['out'][0], 'out0') + assert_equal(res6['out'][1], 'out1') + assert_equal(res7['out'][0], 'out0') + assert_equal(res7['out'][1], 'out1') + + # While we're at it, check that default output is never passed on. + assert_(np.sin(a, None) == {}) + assert_(np.sin(a, out=None) == {}) + assert_(np.sin(a, out=(None,)) == {}) + assert_(np.modf(a, None) == {}) + assert_(np.modf(a, None, None) == {}) + assert_(np.modf(a, out=(None, None)) == {}) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', DeprecationWarning) + assert_(np.modf(a, out=None) == {}) + assert_(w[0].category is DeprecationWarning) + + # don't give positional and output argument, or too many arguments. + # wrong number of arguments in the tuple is an error too. + assert_raises(TypeError, np.multiply, a, b, 'one', out='two') + assert_raises(TypeError, np.multiply, a, b, 'one', 'two') + assert_raises(ValueError, np.multiply, a, b, out=('one', 'two')) + assert_raises(ValueError, np.multiply, a, out=()) + assert_raises(TypeError, np.modf, a, 'one', out=('two', 'three')) + assert_raises(TypeError, np.modf, a, 'one', 'two', 'three') + assert_raises(ValueError, np.modf, a, out=('one', 'two', 'three')) + assert_raises(ValueError, np.modf, a, out=('one',)) + + def test_ufunc_override_exception(self): + + class A(object): + def __array_ufunc__(self, *a, **kwargs): + raise ValueError("oops") + + a = A() + assert_raises(ValueError, np.negative, 1, out=a) + assert_raises(ValueError, np.negative, a) + assert_raises(ValueError, np.divide, 1., a) + + def test_ufunc_override_not_implemented(self): + + class A(object): + def __array_ufunc__(self, *args, **kwargs): + return NotImplemented + + msg = ("operand type(s) all returned NotImplemented from " + "__array_ufunc__(<ufunc 'negative'>, '__call__', <*>): 'A'") + with assert_raises_regex(TypeError, fnmatch.translate(msg)): + np.negative(A()) + + msg = ("operand type(s) all returned NotImplemented from " + "__array_ufunc__(<ufunc 'add'>, '__call__', <*>, <object *>, " + "out=(1,)): 'A', 'object', 'int'") + with assert_raises_regex(TypeError, fnmatch.translate(msg)): + np.add(A(), object(), out=1) + + def test_ufunc_override_disabled(self): + + class OptOut(object): + __array_ufunc__ = None + + opt_out = OptOut() + + # ufuncs always raise + msg = "operand 'OptOut' does not support ufuncs" + with assert_raises_regex(TypeError, msg): + np.add(opt_out, 1) + with assert_raises_regex(TypeError, msg): + np.add(1, opt_out) + with assert_raises_regex(TypeError, msg): + np.negative(opt_out) + + # opt-outs still hold even when other arguments have pathological + # __array_ufunc__ implementations + + class GreedyArray(object): + def __array_ufunc__(self, *args, **kwargs): + return self + + greedy = GreedyArray() + assert_(np.negative(greedy) is greedy) + with assert_raises_regex(TypeError, msg): + np.add(greedy, opt_out) + with assert_raises_regex(TypeError, msg): + np.add(greedy, 1, out=opt_out) + + def test_gufunc_override(self): + # gufunc are just ufunc instances, but follow a different path, + # so check __array_ufunc__ overrides them properly. + class A(object): + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return self, ufunc, method, inputs, kwargs + + inner1d = ncu_tests.inner1d + a = A() + res = inner1d(a, a) + assert_equal(res[0], a) + assert_equal(res[1], inner1d) + assert_equal(res[2], '__call__') + assert_equal(res[3], (a, a)) + assert_equal(res[4], {}) + + res = inner1d(1, 1, out=a) + assert_equal(res[0], a) + assert_equal(res[1], inner1d) + assert_equal(res[2], '__call__') + assert_equal(res[3], (1, 1)) + assert_equal(res[4], {'out': (a,)}) + + # wrong number of arguments in the tuple is an error too. + assert_raises(TypeError, inner1d, a, out='two') + assert_raises(TypeError, inner1d, a, a, 'one', out='two') + assert_raises(TypeError, inner1d, a, a, 'one', 'two') + assert_raises(ValueError, inner1d, a, a, out=('one', 'two')) + assert_raises(ValueError, inner1d, a, a, out=()) + + def test_ufunc_override_with_super(self): + # NOTE: this class is given as an example in doc/subclassing.py; + # if you make any changes here, do update it there too. + class A(np.ndarray): + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + args = [] + in_no = [] + for i, input_ in enumerate(inputs): + if isinstance(input_, A): + in_no.append(i) + args.append(input_.view(np.ndarray)) + else: + args.append(input_) + + outputs = kwargs.pop('out', None) + out_no = [] + if outputs: + out_args = [] + for j, output in enumerate(outputs): + if isinstance(output, A): + out_no.append(j) + out_args.append(output.view(np.ndarray)) + else: + out_args.append(output) + kwargs['out'] = tuple(out_args) + else: + outputs = (None,) * ufunc.nout + + info = {} + if in_no: + info['inputs'] = in_no + if out_no: + info['outputs'] = out_no + + results = super(A, self).__array_ufunc__(ufunc, method, + *args, **kwargs) + if results is NotImplemented: + return NotImplemented + + if method == 'at': + if isinstance(inputs[0], A): + inputs[0].info = info + return + + if ufunc.nout == 1: + results = (results,) + + results = tuple((np.asarray(result).view(A) + if output is None else output) + for result, output in zip(results, outputs)) + if results and isinstance(results[0], A): + results[0].info = info + + return results[0] if len(results) == 1 else results + + class B(object): + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + if any(isinstance(input_, A) for input_ in inputs): + return "A!" + else: + return NotImplemented + + d = np.arange(5.) + # 1 input, 1 output + a = np.arange(5.).view(A) + b = np.sin(a) + check = np.sin(d) + assert_(np.all(check == b)) + assert_equal(b.info, {'inputs': [0]}) + b = np.sin(d, out=(a,)) + assert_(np.all(check == b)) + assert_equal(b.info, {'outputs': [0]}) + assert_(b is a) + a = np.arange(5.).view(A) + b = np.sin(a, out=a) + assert_(np.all(check == b)) + assert_equal(b.info, {'inputs': [0], 'outputs': [0]}) + + # 1 input, 2 outputs + a = np.arange(5.).view(A) + b1, b2 = np.modf(a) + assert_equal(b1.info, {'inputs': [0]}) + b1, b2 = np.modf(d, out=(None, a)) + assert_(b2 is a) + assert_equal(b1.info, {'outputs': [1]}) + a = np.arange(5.).view(A) + b = np.arange(5.).view(A) + c1, c2 = np.modf(a, out=(a, b)) + assert_(c1 is a) + assert_(c2 is b) + assert_equal(c1.info, {'inputs': [0], 'outputs': [0, 1]}) + + # 2 input, 1 output + a = np.arange(5.).view(A) + b = np.arange(5.).view(A) + c = np.add(a, b, out=a) + assert_(c is a) + assert_equal(c.info, {'inputs': [0, 1], 'outputs': [0]}) + # some tests with a non-ndarray subclass + a = np.arange(5.) + b = B() + assert_(a.__array_ufunc__(np.add, '__call__', a, b) is NotImplemented) + assert_(b.__array_ufunc__(np.add, '__call__', a, b) is NotImplemented) + assert_raises(TypeError, np.add, a, b) + a = a.view(A) + assert_(a.__array_ufunc__(np.add, '__call__', a, b) is NotImplemented) + assert_(b.__array_ufunc__(np.add, '__call__', a, b) == "A!") + assert_(np.add(a, b) == "A!") + # regression check for gh-9102 -- tests ufunc.reduce implicitly. + d = np.array([[1, 2, 3], [1, 2, 3]]) + a = d.view(A) + c = a.any() + check = d.any() + assert_equal(c, check) + assert_(c.info, {'inputs': [0]}) + c = a.max() + check = d.max() + assert_equal(c, check) + assert_(c.info, {'inputs': [0]}) + b = np.array(0).view(A) + c = a.max(out=b) + assert_equal(c, check) + assert_(c is b) + assert_(c.info, {'inputs': [0], 'outputs': [0]}) + check = a.max(axis=0) + b = np.zeros_like(check).view(A) + c = a.max(axis=0, out=b) + assert_equal(c, check) + assert_(c is b) + assert_(c.info, {'inputs': [0], 'outputs': [0]}) + # simple explicit tests of reduce, accumulate, reduceat + check = np.add.reduce(d, axis=1) + c = np.add.reduce(a, axis=1) + assert_equal(c, check) + assert_(c.info, {'inputs': [0]}) + b = np.zeros_like(c) + c = np.add.reduce(a, 1, None, b) + assert_equal(c, check) + assert_(c is b) + assert_(c.info, {'inputs': [0], 'outputs': [0]}) + check = np.add.accumulate(d, axis=0) + c = np.add.accumulate(a, axis=0) + assert_equal(c, check) + assert_(c.info, {'inputs': [0]}) + b = np.zeros_like(c) + c = np.add.accumulate(a, 0, None, b) + assert_equal(c, check) + assert_(c is b) + assert_(c.info, {'inputs': [0], 'outputs': [0]}) + indices = [0, 2, 1] + check = np.add.reduceat(d, indices, axis=1) + c = np.add.reduceat(a, indices, axis=1) + assert_equal(c, check) + assert_(c.info, {'inputs': [0]}) + b = np.zeros_like(c) + c = np.add.reduceat(a, indices, 1, None, b) + assert_equal(c, check) + assert_(c is b) + assert_(c.info, {'inputs': [0], 'outputs': [0]}) + # and a few tests for at + d = np.array([[1, 2, 3], [1, 2, 3]]) + check = d.copy() + a = d.copy().view(A) + np.add.at(check, ([0, 1], [0, 2]), 1.) + np.add.at(a, ([0, 1], [0, 2]), 1.) + assert_equal(a, check) + assert_(a.info, {'inputs': [0]}) + b = np.array(1.).view(A) + a = d.copy().view(A) + np.add.at(a, ([0, 1], [0, 2]), b) + assert_equal(a, check) + assert_(a.info, {'inputs': [0, 2]}) + + +class TestChoose(object): + def test_mixed(self): + c = np.array([True, True]) + a = np.array([True, True]) + assert_equal(np.choose(c, (a, 1)), np.array([1, 1])) + + +class TestRationalFunctions(object): + def test_lcm(self): + self._test_lcm_inner(np.int16) + self._test_lcm_inner(np.uint16) + + def test_lcm_object(self): + self._test_lcm_inner(np.object_) + + def test_gcd(self): + self._test_gcd_inner(np.int16) + self._test_lcm_inner(np.uint16) + + def test_gcd_object(self): + self._test_gcd_inner(np.object_) + + def _test_lcm_inner(self, dtype): + # basic use + a = np.array([12, 120], dtype=dtype) + b = np.array([20, 200], dtype=dtype) + assert_equal(np.lcm(a, b), [60, 600]) + + if not issubclass(dtype, np.unsignedinteger): + # negatives are ignored + a = np.array([12, -12, 12, -12], dtype=dtype) + b = np.array([20, 20, -20, -20], dtype=dtype) + assert_equal(np.lcm(a, b), [60]*4) + + # reduce + a = np.array([3, 12, 20], dtype=dtype) + assert_equal(np.lcm.reduce([3, 12, 20]), 60) + + # broadcasting, and a test including 0 + a = np.arange(6).astype(dtype) + b = 20 + assert_equal(np.lcm(a, b), [0, 20, 20, 60, 20, 20]) + + def _test_gcd_inner(self, dtype): + # basic use + a = np.array([12, 120], dtype=dtype) + b = np.array([20, 200], dtype=dtype) + assert_equal(np.gcd(a, b), [4, 40]) + + if not issubclass(dtype, np.unsignedinteger): + # negatives are ignored + a = np.array([12, -12, 12, -12], dtype=dtype) + b = np.array([20, 20, -20, -20], dtype=dtype) + assert_equal(np.gcd(a, b), [4]*4) + + # reduce + a = np.array([15, 25, 35], dtype=dtype) + assert_equal(np.gcd.reduce(a), 5) + + # broadcasting, and a test including 0 + a = np.arange(6).astype(dtype) + b = 20 + assert_equal(np.gcd(a, b), [20, 1, 2, 1, 4, 5]) + + def test_lcm_overflow(self): + # verify that we don't overflow when a*b does overflow + big = np.int32(np.iinfo(np.int32).max // 11) + a = 2*big + b = 5*big + assert_equal(np.lcm(a, b), 10*big) + + def test_gcd_overflow(self): + for dtype in (np.int32, np.int64): + # verify that we don't overflow when taking abs(x) + # not relevant for lcm, where the result is unrepresentable anyway + a = dtype(np.iinfo(dtype).min) # negative power of two + q = -(a // 4) + assert_equal(np.gcd(a, q*3), q) + assert_equal(np.gcd(a, -q*3), q) + + def test_decimal(self): + from decimal import Decimal + a = np.array([1, 1, -1, -1]) * Decimal('0.20') + b = np.array([1, -1, 1, -1]) * Decimal('0.12') + + assert_equal(np.gcd(a, b), 4*[Decimal('0.04')]) + assert_equal(np.lcm(a, b), 4*[Decimal('0.60')]) + + def test_float(self): + # not well-defined on float due to rounding errors + assert_raises(TypeError, np.gcd, 0.3, 0.4) + assert_raises(TypeError, np.lcm, 0.3, 0.4) + + def test_builtin_long(self): + # sanity check that array coercion is alright for builtin longs + assert_equal(np.array(2**200).item(), 2**200) + + # expressed as prime factors + a = np.array(2**100 * 3**5) + b = np.array([2**100 * 5**7, 2**50 * 3**10]) + assert_equal(np.gcd(a, b), [2**100, 2**50 * 3**5]) + assert_equal(np.lcm(a, b), [2**100 * 3**5 * 5**7, 2**100 * 3**10]) + + assert_equal(np.gcd(2**100, 3**100), 1) + + +class TestComplexFunctions(object): + funcs = [np.arcsin, np.arccos, np.arctan, np.arcsinh, np.arccosh, + np.arctanh, np.sin, np.cos, np.tan, np.exp, + np.exp2, np.log, np.sqrt, np.log10, np.log2, + np.log1p] + + def test_it(self): + for f in self.funcs: + if f is np.arccosh: + x = 1.5 + else: + x = .5 + fr = f(x) + fz = f(complex(x)) + assert_almost_equal(fz.real, fr, err_msg='real part %s' % f) + assert_almost_equal(fz.imag, 0., err_msg='imag part %s' % f) + + def test_precisions_consistent(self): + z = 1 + 1j + for f in self.funcs: + fcf = f(np.csingle(z)) + fcd = f(np.cdouble(z)) + fcl = f(np.clongdouble(z)) + assert_almost_equal(fcf, fcd, decimal=6, err_msg='fch-fcd %s' % f) + assert_almost_equal(fcl, fcd, decimal=15, err_msg='fch-fcl %s' % f) + + def test_branch_cuts(self): + # check branch cuts and continuity on them + _check_branch_cut(np.log, -0.5, 1j, 1, -1, True) + _check_branch_cut(np.log2, -0.5, 1j, 1, -1, True) + _check_branch_cut(np.log10, -0.5, 1j, 1, -1, True) + _check_branch_cut(np.log1p, -1.5, 1j, 1, -1, True) + _check_branch_cut(np.sqrt, -0.5, 1j, 1, -1, True) + + _check_branch_cut(np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True) + _check_branch_cut(np.arccos, [ -2, 2], [1j, 1j], 1, -1, True) + _check_branch_cut(np.arctan, [0-2j, 2j], [1, 1], -1, 1, True) + + _check_branch_cut(np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True) + _check_branch_cut(np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True) + _check_branch_cut(np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True) + + # check against bogus branch cuts: assert continuity between quadrants + _check_branch_cut(np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1) + _check_branch_cut(np.arccos, [0-2j, 2j], [ 1, 1], 1, 1) + _check_branch_cut(np.arctan, [ -2, 2], [1j, 1j], 1, 1) + + _check_branch_cut(np.arcsinh, [ -2, 2, 0], [1j, 1j, 1], 1, 1) + _check_branch_cut(np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1) + _check_branch_cut(np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1) + + def test_branch_cuts_complex64(self): + # check branch cuts and continuity on them + _check_branch_cut(np.log, -0.5, 1j, 1, -1, True, np.complex64) + _check_branch_cut(np.log2, -0.5, 1j, 1, -1, True, np.complex64) + _check_branch_cut(np.log10, -0.5, 1j, 1, -1, True, np.complex64) + _check_branch_cut(np.log1p, -1.5, 1j, 1, -1, True, np.complex64) + _check_branch_cut(np.sqrt, -0.5, 1j, 1, -1, True, np.complex64) + + _check_branch_cut(np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64) + _check_branch_cut(np.arccos, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64) + _check_branch_cut(np.arctan, [0-2j, 2j], [1, 1], -1, 1, True, np.complex64) + + _check_branch_cut(np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True, np.complex64) + _check_branch_cut(np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True, np.complex64) + _check_branch_cut(np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64) + + # check against bogus branch cuts: assert continuity between quadrants + _check_branch_cut(np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64) + _check_branch_cut(np.arccos, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64) + _check_branch_cut(np.arctan, [ -2, 2], [1j, 1j], 1, 1, False, np.complex64) + + _check_branch_cut(np.arcsinh, [ -2, 2, 0], [1j, 1j, 1], 1, 1, False, np.complex64) + _check_branch_cut(np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1, False, np.complex64) + _check_branch_cut(np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1, False, np.complex64) + + def test_against_cmath(self): + import cmath + + points = [-1-1j, -1+1j, +1-1j, +1+1j] + name_map = {'arcsin': 'asin', 'arccos': 'acos', 'arctan': 'atan', + 'arcsinh': 'asinh', 'arccosh': 'acosh', 'arctanh': 'atanh'} + atol = 4*np.finfo(complex).eps + for func in self.funcs: + fname = func.__name__.split('.')[-1] + cname = name_map.get(fname, fname) + try: + cfunc = getattr(cmath, cname) + except AttributeError: + continue + for p in points: + a = complex(func(np.complex_(p))) + b = cfunc(p) + assert_(abs(a - b) < atol, "%s %s: %s; cmath: %s" % (fname, p, a, b)) + + @pytest.mark.parametrize('dtype', [np.complex64, np.complex_, np.longcomplex]) + def test_loss_of_precision(self, dtype): + """Check loss of precision in complex arc* functions""" + + # Check against known-good functions + + info = np.finfo(dtype) + real_dtype = dtype(0.).real.dtype + eps = info.eps + + def check(x, rtol): + x = x.astype(real_dtype) + + z = x.astype(dtype) + d = np.absolute(np.arcsinh(x)/np.arcsinh(z).real - 1) + assert_(np.all(d < rtol), (np.argmax(d), x[np.argmax(d)], d.max(), + 'arcsinh')) + + z = (1j*x).astype(dtype) + d = np.absolute(np.arcsinh(x)/np.arcsin(z).imag - 1) + assert_(np.all(d < rtol), (np.argmax(d), x[np.argmax(d)], d.max(), + 'arcsin')) + + z = x.astype(dtype) + d = np.absolute(np.arctanh(x)/np.arctanh(z).real - 1) + assert_(np.all(d < rtol), (np.argmax(d), x[np.argmax(d)], d.max(), + 'arctanh')) + + z = (1j*x).astype(dtype) + d = np.absolute(np.arctanh(x)/np.arctan(z).imag - 1) + assert_(np.all(d < rtol), (np.argmax(d), x[np.argmax(d)], d.max(), + 'arctan')) + + # The switchover was chosen as 1e-3; hence there can be up to + # ~eps/1e-3 of relative cancellation error before it + + x_series = np.logspace(-20, -3.001, 200) + x_basic = np.logspace(-2.999, 0, 10, endpoint=False) + + if dtype is np.longcomplex: + # It's not guaranteed that the system-provided arc functions + # are accurate down to a few epsilons. (Eg. on Linux 64-bit) + # So, give more leeway for long complex tests here: + # Can use 2.1 for > Ubuntu LTS Trusty (2014), glibc = 2.19. + check(x_series, 50.0*eps) + else: + check(x_series, 2.1*eps) + check(x_basic, 2.0*eps/1e-3) + + # Check a few points + + z = np.array([1e-5*(1+1j)], dtype=dtype) + p = 9.999999999333333333e-6 + 1.000000000066666666e-5j + d = np.absolute(1-np.arctanh(z)/p) + assert_(np.all(d < 1e-15)) + + p = 1.0000000000333333333e-5 + 9.999999999666666667e-6j + d = np.absolute(1-np.arcsinh(z)/p) + assert_(np.all(d < 1e-15)) + + p = 9.999999999333333333e-6j + 1.000000000066666666e-5 + d = np.absolute(1-np.arctan(z)/p) + assert_(np.all(d < 1e-15)) + + p = 1.0000000000333333333e-5j + 9.999999999666666667e-6 + d = np.absolute(1-np.arcsin(z)/p) + assert_(np.all(d < 1e-15)) + + # Check continuity across switchover points + + def check(func, z0, d=1): + z0 = np.asarray(z0, dtype=dtype) + zp = z0 + abs(z0) * d * eps * 2 + zm = z0 - abs(z0) * d * eps * 2 + assert_(np.all(zp != zm), (zp, zm)) + + # NB: the cancellation error at the switchover is at least eps + good = (abs(func(zp) - func(zm)) < 2*eps) + assert_(np.all(good), (func, z0[~good])) + + for func in (np.arcsinh, np.arcsinh, np.arcsin, np.arctanh, np.arctan): + pts = [rp+1j*ip for rp in (-1e-3, 0, 1e-3) for ip in(-1e-3, 0, 1e-3) + if rp != 0 or ip != 0] + check(func, pts, 1) + check(func, pts, 1j) + check(func, pts, 1+1j) + + +class TestAttributes(object): + def test_attributes(self): + add = ncu.add + assert_equal(add.__name__, 'add') + assert_(add.ntypes >= 18) # don't fail if types added + assert_('ii->i' in add.types) + assert_equal(add.nin, 2) + assert_equal(add.nout, 1) + assert_equal(add.identity, 0) + + def test_doc(self): + # don't bother checking the long list of kwargs, which are likely to + # change + assert_(ncu.add.__doc__.startswith( + "add(x1, x2, /, out=None, *, where=True")) + assert_(ncu.frexp.__doc__.startswith( + "frexp(x[, out1, out2], / [, out=(None, None)], *, where=True")) + + +class TestSubclass(object): + + def test_subclass_op(self): + + class simple(np.ndarray): + def __new__(subtype, shape): + self = np.ndarray.__new__(subtype, shape, dtype=object) + self.fill(0) + return self + + a = simple((3, 4)) + assert_equal(a+a, a) + +def _check_branch_cut(f, x0, dx, re_sign=1, im_sign=-1, sig_zero_ok=False, + dtype=complex): + """ + Check for a branch cut in a function. + + Assert that `x0` lies on a branch cut of function `f` and `f` is + continuous from the direction `dx`. + + Parameters + ---------- + f : func + Function to check + x0 : array-like + Point on branch cut + dx : array-like + Direction to check continuity in + re_sign, im_sign : {1, -1} + Change of sign of the real or imaginary part expected + sig_zero_ok : bool + Whether to check if the branch cut respects signed zero (if applicable) + dtype : dtype + Dtype to check (should be complex) + + """ + x0 = np.atleast_1d(x0).astype(dtype) + dx = np.atleast_1d(dx).astype(dtype) + + if np.dtype(dtype).char == 'F': + scale = np.finfo(dtype).eps * 1e2 + atol = np.float32(1e-2) + else: + scale = np.finfo(dtype).eps * 1e3 + atol = 1e-4 + + y0 = f(x0) + yp = f(x0 + dx*scale*np.absolute(x0)/np.absolute(dx)) + ym = f(x0 - dx*scale*np.absolute(x0)/np.absolute(dx)) + + assert_(np.all(np.absolute(y0.real - yp.real) < atol), (y0, yp)) + assert_(np.all(np.absolute(y0.imag - yp.imag) < atol), (y0, yp)) + assert_(np.all(np.absolute(y0.real - ym.real*re_sign) < atol), (y0, ym)) + assert_(np.all(np.absolute(y0.imag - ym.imag*im_sign) < atol), (y0, ym)) + + if sig_zero_ok: + # check that signed zeros also work as a displacement + jr = (x0.real == 0) & (dx.real != 0) + ji = (x0.imag == 0) & (dx.imag != 0) + if np.any(jr): + x = x0[jr] + x.real = np.NZERO + ym = f(x) + assert_(np.all(np.absolute(y0[jr].real - ym.real*re_sign) < atol), (y0[jr], ym)) + assert_(np.all(np.absolute(y0[jr].imag - ym.imag*im_sign) < atol), (y0[jr], ym)) + + if np.any(ji): + x = x0[ji] + x.imag = np.NZERO + ym = f(x) + assert_(np.all(np.absolute(y0[ji].real - ym.real*re_sign) < atol), (y0[ji], ym)) + assert_(np.all(np.absolute(y0[ji].imag - ym.imag*im_sign) < atol), (y0[ji], ym)) + +def test_copysign(): + assert_(np.copysign(1, -1) == -1) + with np.errstate(divide="ignore"): + assert_(1 / np.copysign(0, -1) < 0) + assert_(1 / np.copysign(0, 1) > 0) + assert_(np.signbit(np.copysign(np.nan, -1))) + assert_(not np.signbit(np.copysign(np.nan, 1))) + +def _test_nextafter(t): + one = t(1) + two = t(2) + zero = t(0) + eps = np.finfo(t).eps + assert_(np.nextafter(one, two) - one == eps) + assert_(np.nextafter(one, zero) - one < 0) + assert_(np.isnan(np.nextafter(np.nan, one))) + assert_(np.isnan(np.nextafter(one, np.nan))) + assert_(np.nextafter(one, one) == one) + +def test_nextafter(): + return _test_nextafter(np.float64) + + +def test_nextafterf(): + return _test_nextafter(np.float32) + + +@pytest.mark.skipif(np.finfo(np.double) == np.finfo(np.longdouble), + reason="long double is same as double") +@pytest.mark.xfail(condition=platform.machine().startswith("ppc64"), + reason="IBM double double") +def test_nextafterl(): + return _test_nextafter(np.longdouble) + + +def test_nextafter_0(): + for t, direction in itertools.product(np.sctypes['float'], (1, -1)): + tiny = np.finfo(t).tiny + assert_(0. < direction * np.nextafter(t(0), t(direction)) < tiny) + assert_equal(np.nextafter(t(0), t(direction)) / t(2.1), direction * 0.0) + +def _test_spacing(t): + one = t(1) + eps = np.finfo(t).eps + nan = t(np.nan) + inf = t(np.inf) + with np.errstate(invalid='ignore'): + assert_(np.spacing(one) == eps) + assert_(np.isnan(np.spacing(nan))) + assert_(np.isnan(np.spacing(inf))) + assert_(np.isnan(np.spacing(-inf))) + assert_(np.spacing(t(1e30)) != 0) + +def test_spacing(): + return _test_spacing(np.float64) + +def test_spacingf(): + return _test_spacing(np.float32) + + +@pytest.mark.skipif(np.finfo(np.double) == np.finfo(np.longdouble), + reason="long double is same as double") +@pytest.mark.xfail(condition=platform.machine().startswith("ppc64"), + reason="IBM double double") +def test_spacingl(): + return _test_spacing(np.longdouble) + +def test_spacing_gfortran(): + # Reference from this fortran file, built with gfortran 4.3.3 on linux + # 32bits: + # PROGRAM test_spacing + # INTEGER, PARAMETER :: SGL = SELECTED_REAL_KIND(p=6, r=37) + # INTEGER, PARAMETER :: DBL = SELECTED_REAL_KIND(p=13, r=200) + # + # WRITE(*,*) spacing(0.00001_DBL) + # WRITE(*,*) spacing(1.0_DBL) + # WRITE(*,*) spacing(1000._DBL) + # WRITE(*,*) spacing(10500._DBL) + # + # WRITE(*,*) spacing(0.00001_SGL) + # WRITE(*,*) spacing(1.0_SGL) + # WRITE(*,*) spacing(1000._SGL) + # WRITE(*,*) spacing(10500._SGL) + # END PROGRAM + ref = {np.float64: [1.69406589450860068E-021, + 2.22044604925031308E-016, + 1.13686837721616030E-013, + 1.81898940354585648E-012], + np.float32: [9.09494702E-13, + 1.19209290E-07, + 6.10351563E-05, + 9.76562500E-04]} + + for dt, dec_ in zip([np.float32, np.float64], (10, 20)): + x = np.array([1e-5, 1, 1000, 10500], dtype=dt) + assert_array_almost_equal(np.spacing(x), ref[dt], decimal=dec_) + +def test_nextafter_vs_spacing(): + # XXX: spacing does not handle long double yet + for t in [np.float32, np.float64]: + for _f in [1, 1e-5, 1000]: + f = t(_f) + f1 = t(_f + 1) + assert_(np.nextafter(f, f1) - f == np.spacing(f)) + +def test_pos_nan(): + """Check np.nan is a positive nan.""" + assert_(np.signbit(np.nan) == 0) + +def test_reduceat(): + """Test bug in reduceat when structured arrays are not copied.""" + db = np.dtype([('name', 'S11'), ('time', np.int64), ('value', np.float32)]) + a = np.empty([100], dtype=db) + a['name'] = 'Simple' + a['time'] = 10 + a['value'] = 100 + indx = [0, 7, 15, 25] + + h2 = [] + val1 = indx[0] + for val2 in indx[1:]: + h2.append(np.add.reduce(a['value'][val1:val2])) + val1 = val2 + h2.append(np.add.reduce(a['value'][val1:])) + h2 = np.array(h2) + + # test buffered -- this should work + h1 = np.add.reduceat(a['value'], indx) + assert_array_almost_equal(h1, h2) + + # This is when the error occurs. + # test no buffer + np.setbufsize(32) + h1 = np.add.reduceat(a['value'], indx) + np.setbufsize(np.UFUNC_BUFSIZE_DEFAULT) + assert_array_almost_equal(h1, h2) + +def test_reduceat_empty(): + """Reduceat should work with empty arrays""" + indices = np.array([], 'i4') + x = np.array([], 'f8') + result = np.add.reduceat(x, indices) + assert_equal(result.dtype, x.dtype) + assert_equal(result.shape, (0,)) + # Another case with a slightly different zero-sized shape + x = np.ones((5, 2)) + result = np.add.reduceat(x, [], axis=0) + assert_equal(result.dtype, x.dtype) + assert_equal(result.shape, (0, 2)) + result = np.add.reduceat(x, [], axis=1) + assert_equal(result.dtype, x.dtype) + assert_equal(result.shape, (5, 0)) + +def test_complex_nan_comparisons(): + nans = [complex(np.nan, 0), complex(0, np.nan), complex(np.nan, np.nan)] + fins = [complex(1, 0), complex(-1, 0), complex(0, 1), complex(0, -1), + complex(1, 1), complex(-1, -1), complex(0, 0)] + + with np.errstate(invalid='ignore'): + for x in nans + fins: + x = np.array([x]) + for y in nans + fins: + y = np.array([y]) + + if np.isfinite(x) and np.isfinite(y): + continue + + assert_equal(x < y, False, err_msg="%r < %r" % (x, y)) + assert_equal(x > y, False, err_msg="%r > %r" % (x, y)) + assert_equal(x <= y, False, err_msg="%r <= %r" % (x, y)) + assert_equal(x >= y, False, err_msg="%r >= %r" % (x, y)) + assert_equal(x == y, False, err_msg="%r == %r" % (x, y)) + + +def test_rint_big_int(): + # np.rint bug for large integer values on Windows 32-bit and MKL + # https://github.com/numpy/numpy/issues/6685 + val = 4607998452777363968 + # This is exactly representable in floating point + assert_equal(val, int(float(val))) + # Rint should not change the value + assert_equal(val, np.rint(val)) + + +def test_signaling_nan_exceptions(): + with assert_no_warnings(): + a = np.ndarray(shape=(), dtype='float32', buffer=b'\x00\xe0\xbf\xff') + np.isnan(a) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_umath_complex.py b/contrib/python/numpy/py2/numpy/core/tests/test_umath_complex.py new file mode 100644 index 0000000000..785ae8c57d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_umath_complex.py @@ -0,0 +1,543 @@ +from __future__ import division, absolute_import, print_function + +import sys +import platform +import pytest + +import numpy as np +import numpy.core.umath as ncu +from numpy.testing import ( + assert_raises, assert_equal, assert_array_equal, assert_almost_equal + ) + +# TODO: branch cuts (use Pauli code) +# TODO: conj 'symmetry' +# TODO: FPU exceptions + +# At least on Windows the results of many complex functions are not conforming +# to the C99 standard. See ticket 1574. +# Ditto for Solaris (ticket 1642) and OS X on PowerPC. +#FIXME: this will probably change when we require full C99 campatibility +with np.errstate(all='ignore'): + functions_seem_flaky = ((np.exp(complex(np.inf, 0)).imag != 0) + or (np.log(complex(np.NZERO, 0)).imag != np.pi)) +# TODO: replace with a check on whether platform-provided C99 funcs are used +xfail_complex_tests = (not sys.platform.startswith('linux') or functions_seem_flaky) + +# TODO This can be xfail when the generator functions are got rid of. +platform_skip = pytest.mark.skipif(xfail_complex_tests, + reason="Inadequate C99 complex support") + + + +class TestCexp(object): + def test_simple(self): + check = check_complex_value + f = np.exp + + check(f, 1, 0, np.exp(1), 0, False) + check(f, 0, 1, np.cos(1), np.sin(1), False) + + ref = np.exp(1) * complex(np.cos(1), np.sin(1)) + check(f, 1, 1, ref.real, ref.imag, False) + + @platform_skip + def test_special_values(self): + # C99: Section G 6.3.1 + + check = check_complex_value + f = np.exp + + # cexp(+-0 + 0i) is 1 + 0i + check(f, np.PZERO, 0, 1, 0, False) + check(f, np.NZERO, 0, 1, 0, False) + + # cexp(x + infi) is nan + nani for finite x and raises 'invalid' FPU + # exception + check(f, 1, np.inf, np.nan, np.nan) + check(f, -1, np.inf, np.nan, np.nan) + check(f, 0, np.inf, np.nan, np.nan) + + # cexp(inf + 0i) is inf + 0i + check(f, np.inf, 0, np.inf, 0) + + # cexp(-inf + yi) is +0 * (cos(y) + i sin(y)) for finite y + check(f, -np.inf, 1, np.PZERO, np.PZERO) + check(f, -np.inf, 0.75 * np.pi, np.NZERO, np.PZERO) + + # cexp(inf + yi) is +inf * (cos(y) + i sin(y)) for finite y + check(f, np.inf, 1, np.inf, np.inf) + check(f, np.inf, 0.75 * np.pi, -np.inf, np.inf) + + # cexp(-inf + inf i) is +-0 +- 0i (signs unspecified) + def _check_ninf_inf(dummy): + msgform = "cexp(-inf, inf) is (%f, %f), expected (+-0, +-0)" + with np.errstate(invalid='ignore'): + z = f(np.array(complex(-np.inf, np.inf))) + if z.real != 0 or z.imag != 0: + raise AssertionError(msgform % (z.real, z.imag)) + + _check_ninf_inf(None) + + # cexp(inf + inf i) is +-inf + NaNi and raised invalid FPU ex. + def _check_inf_inf(dummy): + msgform = "cexp(inf, inf) is (%f, %f), expected (+-inf, nan)" + with np.errstate(invalid='ignore'): + z = f(np.array(complex(np.inf, np.inf))) + if not np.isinf(z.real) or not np.isnan(z.imag): + raise AssertionError(msgform % (z.real, z.imag)) + + _check_inf_inf(None) + + # cexp(-inf + nan i) is +-0 +- 0i + def _check_ninf_nan(dummy): + msgform = "cexp(-inf, nan) is (%f, %f), expected (+-0, +-0)" + with np.errstate(invalid='ignore'): + z = f(np.array(complex(-np.inf, np.nan))) + if z.real != 0 or z.imag != 0: + raise AssertionError(msgform % (z.real, z.imag)) + + _check_ninf_nan(None) + + # cexp(inf + nan i) is +-inf + nan + def _check_inf_nan(dummy): + msgform = "cexp(-inf, nan) is (%f, %f), expected (+-inf, nan)" + with np.errstate(invalid='ignore'): + z = f(np.array(complex(np.inf, np.nan))) + if not np.isinf(z.real) or not np.isnan(z.imag): + raise AssertionError(msgform % (z.real, z.imag)) + + _check_inf_nan(None) + + # cexp(nan + yi) is nan + nani for y != 0 (optional: raises invalid FPU + # ex) + check(f, np.nan, 1, np.nan, np.nan) + check(f, np.nan, -1, np.nan, np.nan) + + check(f, np.nan, np.inf, np.nan, np.nan) + check(f, np.nan, -np.inf, np.nan, np.nan) + + # cexp(nan + nani) is nan + nani + check(f, np.nan, np.nan, np.nan, np.nan) + + # TODO This can be xfail when the generator functions are got rid of. + @pytest.mark.skip(reason="cexp(nan + 0I) is wrong on most platforms") + def test_special_values2(self): + # XXX: most implementations get it wrong here (including glibc <= 2.10) + # cexp(nan + 0i) is nan + 0i + check = check_complex_value + f = np.exp + + check(f, np.nan, 0, np.nan, 0) + +class TestClog(object): + def test_simple(self): + x = np.array([1+0j, 1+2j]) + y_r = np.log(np.abs(x)) + 1j * np.angle(x) + y = np.log(x) + for i in range(len(x)): + assert_almost_equal(y[i], y_r[i]) + + @platform_skip + @pytest.mark.skipif(platform.machine() == "armv5tel", reason="See gh-413.") + def test_special_values(self): + xl = [] + yl = [] + + # From C99 std (Sec 6.3.2) + # XXX: check exceptions raised + # --- raise for invalid fails. + + # clog(-0 + i0) returns -inf + i pi and raises the 'divide-by-zero' + # floating-point exception. + with np.errstate(divide='raise'): + x = np.array([np.NZERO], dtype=complex) + y = complex(-np.inf, np.pi) + assert_raises(FloatingPointError, np.log, x) + with np.errstate(divide='ignore'): + assert_almost_equal(np.log(x), y) + + xl.append(x) + yl.append(y) + + # clog(+0 + i0) returns -inf + i0 and raises the 'divide-by-zero' + # floating-point exception. + with np.errstate(divide='raise'): + x = np.array([0], dtype=complex) + y = complex(-np.inf, 0) + assert_raises(FloatingPointError, np.log, x) + with np.errstate(divide='ignore'): + assert_almost_equal(np.log(x), y) + + xl.append(x) + yl.append(y) + + # clog(x + i inf returns +inf + i pi /2, for finite x. + x = np.array([complex(1, np.inf)], dtype=complex) + y = complex(np.inf, 0.5 * np.pi) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + x = np.array([complex(-1, np.inf)], dtype=complex) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(x + iNaN) returns NaN + iNaN and optionally raises the + # 'invalid' floating- point exception, for finite x. + with np.errstate(invalid='raise'): + x = np.array([complex(1., np.nan)], dtype=complex) + y = complex(np.nan, np.nan) + #assert_raises(FloatingPointError, np.log, x) + with np.errstate(invalid='ignore'): + assert_almost_equal(np.log(x), y) + + xl.append(x) + yl.append(y) + + with np.errstate(invalid='raise'): + x = np.array([np.inf + 1j * np.nan], dtype=complex) + #assert_raises(FloatingPointError, np.log, x) + with np.errstate(invalid='ignore'): + assert_almost_equal(np.log(x), y) + + xl.append(x) + yl.append(y) + + # clog(- inf + iy) returns +inf + ipi , for finite positive-signed y. + x = np.array([-np.inf + 1j], dtype=complex) + y = complex(np.inf, np.pi) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(+ inf + iy) returns +inf + i0, for finite positive-signed y. + x = np.array([np.inf + 1j], dtype=complex) + y = complex(np.inf, 0) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(- inf + i inf) returns +inf + i3pi /4. + x = np.array([complex(-np.inf, np.inf)], dtype=complex) + y = complex(np.inf, 0.75 * np.pi) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(+ inf + i inf) returns +inf + ipi /4. + x = np.array([complex(np.inf, np.inf)], dtype=complex) + y = complex(np.inf, 0.25 * np.pi) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(+/- inf + iNaN) returns +inf + iNaN. + x = np.array([complex(np.inf, np.nan)], dtype=complex) + y = complex(np.inf, np.nan) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + x = np.array([complex(-np.inf, np.nan)], dtype=complex) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(NaN + iy) returns NaN + iNaN and optionally raises the + # 'invalid' floating-point exception, for finite y. + x = np.array([complex(np.nan, 1)], dtype=complex) + y = complex(np.nan, np.nan) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(NaN + i inf) returns +inf + iNaN. + x = np.array([complex(np.nan, np.inf)], dtype=complex) + y = complex(np.inf, np.nan) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(NaN + iNaN) returns NaN + iNaN. + x = np.array([complex(np.nan, np.nan)], dtype=complex) + y = complex(np.nan, np.nan) + assert_almost_equal(np.log(x), y) + xl.append(x) + yl.append(y) + + # clog(conj(z)) = conj(clog(z)). + xa = np.array(xl, dtype=complex) + ya = np.array(yl, dtype=complex) + with np.errstate(divide='ignore'): + for i in range(len(xa)): + assert_almost_equal(np.log(xa[i].conj()), ya[i].conj()) + + +class TestCsqrt(object): + + def test_simple(self): + # sqrt(1) + check_complex_value(np.sqrt, 1, 0, 1, 0) + + # sqrt(1i) + rres = 0.5*np.sqrt(2) + ires = rres + check_complex_value(np.sqrt, 0, 1, rres, ires, False) + + # sqrt(-1) + check_complex_value(np.sqrt, -1, 0, 0, 1) + + def test_simple_conjugate(self): + ref = np.conj(np.sqrt(complex(1, 1))) + + def f(z): + return np.sqrt(np.conj(z)) + + check_complex_value(f, 1, 1, ref.real, ref.imag, False) + + #def test_branch_cut(self): + # _check_branch_cut(f, -1, 0, 1, -1) + + @platform_skip + def test_special_values(self): + # C99: Sec G 6.4.2 + + check = check_complex_value + f = np.sqrt + + # csqrt(+-0 + 0i) is 0 + 0i + check(f, np.PZERO, 0, 0, 0) + check(f, np.NZERO, 0, 0, 0) + + # csqrt(x + infi) is inf + infi for any x (including NaN) + check(f, 1, np.inf, np.inf, np.inf) + check(f, -1, np.inf, np.inf, np.inf) + + check(f, np.PZERO, np.inf, np.inf, np.inf) + check(f, np.NZERO, np.inf, np.inf, np.inf) + check(f, np.inf, np.inf, np.inf, np.inf) + check(f, -np.inf, np.inf, np.inf, np.inf) + check(f, -np.nan, np.inf, np.inf, np.inf) + + # csqrt(x + nani) is nan + nani for any finite x + check(f, 1, np.nan, np.nan, np.nan) + check(f, -1, np.nan, np.nan, np.nan) + check(f, 0, np.nan, np.nan, np.nan) + + # csqrt(-inf + yi) is +0 + infi for any finite y > 0 + check(f, -np.inf, 1, np.PZERO, np.inf) + + # csqrt(inf + yi) is +inf + 0i for any finite y > 0 + check(f, np.inf, 1, np.inf, np.PZERO) + + # csqrt(-inf + nani) is nan +- infi (both +i infi are valid) + def _check_ninf_nan(dummy): + msgform = "csqrt(-inf, nan) is (%f, %f), expected (nan, +-inf)" + z = np.sqrt(np.array(complex(-np.inf, np.nan))) + #Fixme: ugly workaround for isinf bug. + with np.errstate(invalid='ignore'): + if not (np.isnan(z.real) and np.isinf(z.imag)): + raise AssertionError(msgform % (z.real, z.imag)) + + _check_ninf_nan(None) + + # csqrt(+inf + nani) is inf + nani + check(f, np.inf, np.nan, np.inf, np.nan) + + # csqrt(nan + yi) is nan + nani for any finite y (infinite handled in x + # + nani) + check(f, np.nan, 0, np.nan, np.nan) + check(f, np.nan, 1, np.nan, np.nan) + check(f, np.nan, np.nan, np.nan, np.nan) + + # XXX: check for conj(csqrt(z)) == csqrt(conj(z)) (need to fix branch + # cuts first) + +class TestCpow(object): + def setup(self): + self.olderr = np.seterr(invalid='ignore') + + def teardown(self): + np.seterr(**self.olderr) + + def test_simple(self): + x = np.array([1+1j, 0+2j, 1+2j, np.inf, np.nan]) + y_r = x ** 2 + y = np.power(x, 2) + for i in range(len(x)): + assert_almost_equal(y[i], y_r[i]) + + def test_scalar(self): + x = np.array([1, 1j, 2, 2.5+.37j, np.inf, np.nan]) + y = np.array([1, 1j, -0.5+1.5j, -0.5+1.5j, 2, 3]) + lx = list(range(len(x))) + # Compute the values for complex type in python + p_r = [complex(x[i]) ** complex(y[i]) for i in lx] + # Substitute a result allowed by C99 standard + p_r[4] = complex(np.inf, np.nan) + # Do the same with numpy complex scalars + n_r = [x[i] ** y[i] for i in lx] + for i in lx: + assert_almost_equal(n_r[i], p_r[i], err_msg='Loop %d\n' % i) + + def test_array(self): + x = np.array([1, 1j, 2, 2.5+.37j, np.inf, np.nan]) + y = np.array([1, 1j, -0.5+1.5j, -0.5+1.5j, 2, 3]) + lx = list(range(len(x))) + # Compute the values for complex type in python + p_r = [complex(x[i]) ** complex(y[i]) for i in lx] + # Substitute a result allowed by C99 standard + p_r[4] = complex(np.inf, np.nan) + # Do the same with numpy arrays + n_r = x ** y + for i in lx: + assert_almost_equal(n_r[i], p_r[i], err_msg='Loop %d\n' % i) + +class TestCabs(object): + def setup(self): + self.olderr = np.seterr(invalid='ignore') + + def teardown(self): + np.seterr(**self.olderr) + + def test_simple(self): + x = np.array([1+1j, 0+2j, 1+2j, np.inf, np.nan]) + y_r = np.array([np.sqrt(2.), 2, np.sqrt(5), np.inf, np.nan]) + y = np.abs(x) + for i in range(len(x)): + assert_almost_equal(y[i], y_r[i]) + + def test_fabs(self): + # Test that np.abs(x +- 0j) == np.abs(x) (as mandated by C99 for cabs) + x = np.array([1+0j], dtype=complex) + assert_array_equal(np.abs(x), np.real(x)) + + x = np.array([complex(1, np.NZERO)], dtype=complex) + assert_array_equal(np.abs(x), np.real(x)) + + x = np.array([complex(np.inf, np.NZERO)], dtype=complex) + assert_array_equal(np.abs(x), np.real(x)) + + x = np.array([complex(np.nan, np.NZERO)], dtype=complex) + assert_array_equal(np.abs(x), np.real(x)) + + def test_cabs_inf_nan(self): + x, y = [], [] + + # cabs(+-nan + nani) returns nan + x.append(np.nan) + y.append(np.nan) + check_real_value(np.abs, np.nan, np.nan, np.nan) + + x.append(np.nan) + y.append(-np.nan) + check_real_value(np.abs, -np.nan, np.nan, np.nan) + + # According to C99 standard, if exactly one of the real/part is inf and + # the other nan, then cabs should return inf + x.append(np.inf) + y.append(np.nan) + check_real_value(np.abs, np.inf, np.nan, np.inf) + + x.append(-np.inf) + y.append(np.nan) + check_real_value(np.abs, -np.inf, np.nan, np.inf) + + # cabs(conj(z)) == conj(cabs(z)) (= cabs(z)) + def f(a): + return np.abs(np.conj(a)) + + def g(a, b): + return np.abs(complex(a, b)) + + xa = np.array(x, dtype=complex) + for i in range(len(xa)): + ref = g(x[i], y[i]) + check_real_value(f, x[i], y[i], ref) + +class TestCarg(object): + def test_simple(self): + check_real_value(ncu._arg, 1, 0, 0, False) + check_real_value(ncu._arg, 0, 1, 0.5*np.pi, False) + + check_real_value(ncu._arg, 1, 1, 0.25*np.pi, False) + check_real_value(ncu._arg, np.PZERO, np.PZERO, np.PZERO) + + # TODO This can be xfail when the generator functions are got rid of. + @pytest.mark.skip( + reason="Complex arithmetic with signed zero fails on most platforms") + def test_zero(self): + # carg(-0 +- 0i) returns +- pi + check_real_value(ncu._arg, np.NZERO, np.PZERO, np.pi, False) + check_real_value(ncu._arg, np.NZERO, np.NZERO, -np.pi, False) + + # carg(+0 +- 0i) returns +- 0 + check_real_value(ncu._arg, np.PZERO, np.PZERO, np.PZERO) + check_real_value(ncu._arg, np.PZERO, np.NZERO, np.NZERO) + + # carg(x +- 0i) returns +- 0 for x > 0 + check_real_value(ncu._arg, 1, np.PZERO, np.PZERO, False) + check_real_value(ncu._arg, 1, np.NZERO, np.NZERO, False) + + # carg(x +- 0i) returns +- pi for x < 0 + check_real_value(ncu._arg, -1, np.PZERO, np.pi, False) + check_real_value(ncu._arg, -1, np.NZERO, -np.pi, False) + + # carg(+- 0 + yi) returns pi/2 for y > 0 + check_real_value(ncu._arg, np.PZERO, 1, 0.5 * np.pi, False) + check_real_value(ncu._arg, np.NZERO, 1, 0.5 * np.pi, False) + + # carg(+- 0 + yi) returns -pi/2 for y < 0 + check_real_value(ncu._arg, np.PZERO, -1, 0.5 * np.pi, False) + check_real_value(ncu._arg, np.NZERO, -1, -0.5 * np.pi, False) + + #def test_branch_cuts(self): + # _check_branch_cut(ncu._arg, -1, 1j, -1, 1) + + def test_special_values(self): + # carg(-np.inf +- yi) returns +-pi for finite y > 0 + check_real_value(ncu._arg, -np.inf, 1, np.pi, False) + check_real_value(ncu._arg, -np.inf, -1, -np.pi, False) + + # carg(np.inf +- yi) returns +-0 for finite y > 0 + check_real_value(ncu._arg, np.inf, 1, np.PZERO, False) + check_real_value(ncu._arg, np.inf, -1, np.NZERO, False) + + # carg(x +- np.infi) returns +-pi/2 for finite x + check_real_value(ncu._arg, 1, np.inf, 0.5 * np.pi, False) + check_real_value(ncu._arg, 1, -np.inf, -0.5 * np.pi, False) + + # carg(-np.inf +- np.infi) returns +-3pi/4 + check_real_value(ncu._arg, -np.inf, np.inf, 0.75 * np.pi, False) + check_real_value(ncu._arg, -np.inf, -np.inf, -0.75 * np.pi, False) + + # carg(np.inf +- np.infi) returns +-pi/4 + check_real_value(ncu._arg, np.inf, np.inf, 0.25 * np.pi, False) + check_real_value(ncu._arg, np.inf, -np.inf, -0.25 * np.pi, False) + + # carg(x + yi) returns np.nan if x or y is nan + check_real_value(ncu._arg, np.nan, 0, np.nan, False) + check_real_value(ncu._arg, 0, np.nan, np.nan, False) + + check_real_value(ncu._arg, np.nan, np.inf, np.nan, False) + check_real_value(ncu._arg, np.inf, np.nan, np.nan, False) + + +def check_real_value(f, x1, y1, x, exact=True): + z1 = np.array([complex(x1, y1)]) + if exact: + assert_equal(f(z1), x) + else: + assert_almost_equal(f(z1), x) + + +def check_complex_value(f, x1, y1, x2, y2, exact=True): + z1 = np.array([complex(x1, y1)]) + z2 = complex(x2, y2) + with np.errstate(invalid='ignore'): + if exact: + assert_equal(f(z1), z2) + else: + assert_almost_equal(f(z1), z2) diff --git a/contrib/python/numpy/py2/numpy/core/tests/test_unicode.py b/contrib/python/numpy/py2/numpy/core/tests/test_unicode.py new file mode 100644 index 0000000000..2ffd8801b7 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/core/tests/test_unicode.py @@ -0,0 +1,396 @@ +from __future__ import division, absolute_import, print_function + +import sys + +import numpy as np +from numpy.compat import unicode +from numpy.testing import assert_, assert_equal, assert_array_equal + +# Guess the UCS length for this python interpreter +if sys.version_info[:2] >= (3, 3): + # Python 3.3 uses a flexible string representation + ucs4 = False + + def buffer_length(arr): + if isinstance(arr, unicode): + arr = str(arr) + if not arr: + charmax = 0 + else: + charmax = max([ord(c) for c in arr]) + if charmax < 256: + size = 1 + elif charmax < 65536: + size = 2 + else: + size = 4 + return size * len(arr) + v = memoryview(arr) + if v.shape is None: + return len(v) * v.itemsize + else: + return np.prod(v.shape) * v.itemsize +else: + if len(buffer(u'u')) == 4: + ucs4 = True + else: + ucs4 = False + + def buffer_length(arr): + if isinstance(arr, np.ndarray): + return len(arr.data) + return len(buffer(arr)) + +# In both cases below we need to make sure that the byte swapped value (as +# UCS4) is still a valid unicode: +# Value that can be represented in UCS2 interpreters +ucs2_value = u'\u0900' +# Value that cannot be represented in UCS2 interpreters (but can in UCS4) +ucs4_value = u'\U00100900' + + +def test_string_cast(): + str_arr = np.array(["1234", "1234\0\0"], dtype='S') + uni_arr1 = str_arr.astype('>U') + uni_arr2 = str_arr.astype('<U') + + if sys.version_info[0] < 3: + assert_array_equal(str_arr, uni_arr1) + assert_array_equal(str_arr, uni_arr2) + else: + assert_(str_arr != uni_arr1) + assert_(str_arr != uni_arr2) + assert_array_equal(uni_arr1, uni_arr2) + + +############################################################ +# Creation tests +############################################################ + +class CreateZeros(object): + """Check the creation of zero-valued arrays""" + + def content_check(self, ua, ua_scalar, nbytes): + + # Check the length of the unicode base type + assert_(int(ua.dtype.str[2:]) == self.ulen) + # Check the length of the data buffer + assert_(buffer_length(ua) == nbytes) + # Small check that data in array element is ok + assert_(ua_scalar == u'') + # Encode to ascii and double check + assert_(ua_scalar.encode('ascii') == b'') + # Check buffer lengths for scalars + if ucs4: + assert_(buffer_length(ua_scalar) == 0) + else: + assert_(buffer_length(ua_scalar) == 0) + + def test_zeros0D(self): + # Check creation of 0-dimensional objects + ua = np.zeros((), dtype='U%s' % self.ulen) + self.content_check(ua, ua[()], 4*self.ulen) + + def test_zerosSD(self): + # Check creation of single-dimensional objects + ua = np.zeros((2,), dtype='U%s' % self.ulen) + self.content_check(ua, ua[0], 4*self.ulen*2) + self.content_check(ua, ua[1], 4*self.ulen*2) + + def test_zerosMD(self): + # Check creation of multi-dimensional objects + ua = np.zeros((2, 3, 4), dtype='U%s' % self.ulen) + self.content_check(ua, ua[0, 0, 0], 4*self.ulen*2*3*4) + self.content_check(ua, ua[-1, -1, -1], 4*self.ulen*2*3*4) + + +class TestCreateZeros_1(CreateZeros): + """Check the creation of zero-valued arrays (size 1)""" + ulen = 1 + + +class TestCreateZeros_2(CreateZeros): + """Check the creation of zero-valued arrays (size 2)""" + ulen = 2 + + +class TestCreateZeros_1009(CreateZeros): + """Check the creation of zero-valued arrays (size 1009)""" + ulen = 1009 + + +class CreateValues(object): + """Check the creation of unicode arrays with values""" + + def content_check(self, ua, ua_scalar, nbytes): + + # Check the length of the unicode base type + assert_(int(ua.dtype.str[2:]) == self.ulen) + # Check the length of the data buffer + assert_(buffer_length(ua) == nbytes) + # Small check that data in array element is ok + assert_(ua_scalar == self.ucs_value*self.ulen) + # Encode to UTF-8 and double check + assert_(ua_scalar.encode('utf-8') == + (self.ucs_value*self.ulen).encode('utf-8')) + # Check buffer lengths for scalars + if ucs4: + assert_(buffer_length(ua_scalar) == 4*self.ulen) + else: + if self.ucs_value == ucs4_value: + # In UCS2, the \U0010FFFF will be represented using a + # surrogate *pair* + assert_(buffer_length(ua_scalar) == 2*2*self.ulen) + else: + # In UCS2, the \uFFFF will be represented using a + # regular 2-byte word + assert_(buffer_length(ua_scalar) == 2*self.ulen) + + def test_values0D(self): + # Check creation of 0-dimensional objects with values + ua = np.array(self.ucs_value*self.ulen, dtype='U%s' % self.ulen) + self.content_check(ua, ua[()], 4*self.ulen) + + def test_valuesSD(self): + # Check creation of single-dimensional objects with values + ua = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen) + self.content_check(ua, ua[0], 4*self.ulen*2) + self.content_check(ua, ua[1], 4*self.ulen*2) + + def test_valuesMD(self): + # Check creation of multi-dimensional objects with values + ua = np.array([[[self.ucs_value*self.ulen]*2]*3]*4, dtype='U%s' % self.ulen) + self.content_check(ua, ua[0, 0, 0], 4*self.ulen*2*3*4) + self.content_check(ua, ua[-1, -1, -1], 4*self.ulen*2*3*4) + + +class TestCreateValues_1_UCS2(CreateValues): + """Check the creation of valued arrays (size 1, UCS2 values)""" + ulen = 1 + ucs_value = ucs2_value + + +class TestCreateValues_1_UCS4(CreateValues): + """Check the creation of valued arrays (size 1, UCS4 values)""" + ulen = 1 + ucs_value = ucs4_value + + +class TestCreateValues_2_UCS2(CreateValues): + """Check the creation of valued arrays (size 2, UCS2 values)""" + ulen = 2 + ucs_value = ucs2_value + + +class TestCreateValues_2_UCS4(CreateValues): + """Check the creation of valued arrays (size 2, UCS4 values)""" + ulen = 2 + ucs_value = ucs4_value + + +class TestCreateValues_1009_UCS2(CreateValues): + """Check the creation of valued arrays (size 1009, UCS2 values)""" + ulen = 1009 + ucs_value = ucs2_value + + +class TestCreateValues_1009_UCS4(CreateValues): + """Check the creation of valued arrays (size 1009, UCS4 values)""" + ulen = 1009 + ucs_value = ucs4_value + + +############################################################ +# Assignment tests +############################################################ + +class AssignValues(object): + """Check the assignment of unicode arrays with values""" + + def content_check(self, ua, ua_scalar, nbytes): + + # Check the length of the unicode base type + assert_(int(ua.dtype.str[2:]) == self.ulen) + # Check the length of the data buffer + assert_(buffer_length(ua) == nbytes) + # Small check that data in array element is ok + assert_(ua_scalar == self.ucs_value*self.ulen) + # Encode to UTF-8 and double check + assert_(ua_scalar.encode('utf-8') == + (self.ucs_value*self.ulen).encode('utf-8')) + # Check buffer lengths for scalars + if ucs4: + assert_(buffer_length(ua_scalar) == 4*self.ulen) + else: + if self.ucs_value == ucs4_value: + # In UCS2, the \U0010FFFF will be represented using a + # surrogate *pair* + assert_(buffer_length(ua_scalar) == 2*2*self.ulen) + else: + # In UCS2, the \uFFFF will be represented using a + # regular 2-byte word + assert_(buffer_length(ua_scalar) == 2*self.ulen) + + def test_values0D(self): + # Check assignment of 0-dimensional objects with values + ua = np.zeros((), dtype='U%s' % self.ulen) + ua[()] = self.ucs_value*self.ulen + self.content_check(ua, ua[()], 4*self.ulen) + + def test_valuesSD(self): + # Check assignment of single-dimensional objects with values + ua = np.zeros((2,), dtype='U%s' % self.ulen) + ua[0] = self.ucs_value*self.ulen + self.content_check(ua, ua[0], 4*self.ulen*2) + ua[1] = self.ucs_value*self.ulen + self.content_check(ua, ua[1], 4*self.ulen*2) + + def test_valuesMD(self): + # Check assignment of multi-dimensional objects with values + ua = np.zeros((2, 3, 4), dtype='U%s' % self.ulen) + ua[0, 0, 0] = self.ucs_value*self.ulen + self.content_check(ua, ua[0, 0, 0], 4*self.ulen*2*3*4) + ua[-1, -1, -1] = self.ucs_value*self.ulen + self.content_check(ua, ua[-1, -1, -1], 4*self.ulen*2*3*4) + + +class TestAssignValues_1_UCS2(AssignValues): + """Check the assignment of valued arrays (size 1, UCS2 values)""" + ulen = 1 + ucs_value = ucs2_value + + +class TestAssignValues_1_UCS4(AssignValues): + """Check the assignment of valued arrays (size 1, UCS4 values)""" + ulen = 1 + ucs_value = ucs4_value + + +class TestAssignValues_2_UCS2(AssignValues): + """Check the assignment of valued arrays (size 2, UCS2 values)""" + ulen = 2 + ucs_value = ucs2_value + + +class TestAssignValues_2_UCS4(AssignValues): + """Check the assignment of valued arrays (size 2, UCS4 values)""" + ulen = 2 + ucs_value = ucs4_value + + +class TestAssignValues_1009_UCS2(AssignValues): + """Check the assignment of valued arrays (size 1009, UCS2 values)""" + ulen = 1009 + ucs_value = ucs2_value + + +class TestAssignValues_1009_UCS4(AssignValues): + """Check the assignment of valued arrays (size 1009, UCS4 values)""" + ulen = 1009 + ucs_value = ucs4_value + + +############################################################ +# Byteorder tests +############################################################ + +class ByteorderValues(object): + """Check the byteorder of unicode arrays in round-trip conversions""" + + def test_values0D(self): + # Check byteorder of 0-dimensional objects + ua = np.array(self.ucs_value*self.ulen, dtype='U%s' % self.ulen) + ua2 = ua.newbyteorder() + # This changes the interpretation of the data region (but not the + # actual data), therefore the returned scalars are not + # the same (they are byte-swapped versions of each other). + assert_(ua[()] != ua2[()]) + ua3 = ua2.newbyteorder() + # Arrays must be equal after the round-trip + assert_equal(ua, ua3) + + def test_valuesSD(self): + # Check byteorder of single-dimensional objects + ua = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen) + ua2 = ua.newbyteorder() + assert_((ua != ua2).all()) + assert_(ua[-1] != ua2[-1]) + ua3 = ua2.newbyteorder() + # Arrays must be equal after the round-trip + assert_equal(ua, ua3) + + def test_valuesMD(self): + # Check byteorder of multi-dimensional objects + ua = np.array([[[self.ucs_value*self.ulen]*2]*3]*4, + dtype='U%s' % self.ulen) + ua2 = ua.newbyteorder() + assert_((ua != ua2).all()) + assert_(ua[-1, -1, -1] != ua2[-1, -1, -1]) + ua3 = ua2.newbyteorder() + # Arrays must be equal after the round-trip + assert_equal(ua, ua3) + + def test_values_cast(self): + # Check byteorder of when casting the array for a strided and + # contiguous array: + test1 = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen) + test2 = np.repeat(test1, 2)[::2] + for ua in (test1, test2): + ua2 = ua.astype(dtype=ua.dtype.newbyteorder()) + assert_((ua == ua2).all()) + assert_(ua[-1] == ua2[-1]) + ua3 = ua2.astype(dtype=ua.dtype) + # Arrays must be equal after the round-trip + assert_equal(ua, ua3) + + def test_values_updowncast(self): + # Check byteorder of when casting the array to a longer and shorter + # string length for strided and contiguous arrays + test1 = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen) + test2 = np.repeat(test1, 2)[::2] + for ua in (test1, test2): + # Cast to a longer type with zero padding + longer_type = np.dtype('U%s' % (self.ulen+1)).newbyteorder() + ua2 = ua.astype(dtype=longer_type) + assert_((ua == ua2).all()) + assert_(ua[-1] == ua2[-1]) + # Cast back again with truncating: + ua3 = ua2.astype(dtype=ua.dtype) + # Arrays must be equal after the round-trip + assert_equal(ua, ua3) + + +class TestByteorder_1_UCS2(ByteorderValues): + """Check the byteorder in unicode (size 1, UCS2 values)""" + ulen = 1 + ucs_value = ucs2_value + + +class TestByteorder_1_UCS4(ByteorderValues): + """Check the byteorder in unicode (size 1, UCS4 values)""" + ulen = 1 + ucs_value = ucs4_value + + +class TestByteorder_2_UCS2(ByteorderValues): + """Check the byteorder in unicode (size 2, UCS2 values)""" + ulen = 2 + ucs_value = ucs2_value + + +class TestByteorder_2_UCS4(ByteorderValues): + """Check the byteorder in unicode (size 2, UCS4 values)""" + ulen = 2 + ucs_value = ucs4_value + + +class TestByteorder_1009_UCS2(ByteorderValues): + """Check the byteorder in unicode (size 1009, UCS2 values)""" + ulen = 1009 + ucs_value = ucs2_value + + +class TestByteorder_1009_UCS4(ByteorderValues): + """Check the byteorder in unicode (size 1009, UCS4 values)""" + ulen = 1009 + ucs_value = ucs4_value diff --git a/contrib/python/numpy/py2/numpy/distutils/mingw/gfortran_vs2003_hack.c b/contrib/python/numpy/py2/numpy/distutils/mingw/gfortran_vs2003_hack.c new file mode 100644 index 0000000000..485a675d8a --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/mingw/gfortran_vs2003_hack.c @@ -0,0 +1,6 @@ +int _get_output_format(void) +{ + return 0; +} + +int _imp____lc_codepage = 0; diff --git a/contrib/python/numpy/py2/numpy/distutils/setup.py b/contrib/python/numpy/py2/numpy/distutils/setup.py new file mode 100644 index 0000000000..82a53bd08d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/setup.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +from __future__ import division, print_function + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('distutils', parent_package, top_path) + config.add_subpackage('command') + config.add_subpackage('fcompiler') + config.add_data_dir('tests') + config.add_data_files('site.cfg') + config.add_data_files('mingw/gfortran_vs2003_hack.c') + config.make_config_py() + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/contrib/python/numpy/py2/numpy/distutils/tests/__init__.py b/contrib/python/numpy/py2/numpy/distutils/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/distutils/tests/test_exec_command.py b/contrib/python/numpy/py2/numpy/distutils/tests/test_exec_command.py new file mode 100644 index 0000000000..8bd2650074 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/tests/test_exec_command.py @@ -0,0 +1,215 @@ +from __future__ import division, absolute_import, print_function + +import os +import sys +from tempfile import TemporaryFile + +from numpy.distutils import exec_command +from numpy.distutils.exec_command import get_pythonexe +from numpy.testing import tempdir, assert_ + +# In python 3 stdout, stderr are text (unicode compliant) devices, so to +# emulate them import StringIO from the io module. +if sys.version_info[0] >= 3: + from io import StringIO +else: + from StringIO import StringIO + +class redirect_stdout(object): + """Context manager to redirect stdout for exec_command test.""" + def __init__(self, stdout=None): + self._stdout = stdout or sys.stdout + + def __enter__(self): + self.old_stdout = sys.stdout + sys.stdout = self._stdout + + def __exit__(self, exc_type, exc_value, traceback): + self._stdout.flush() + sys.stdout = self.old_stdout + # note: closing sys.stdout won't close it. + self._stdout.close() + +class redirect_stderr(object): + """Context manager to redirect stderr for exec_command test.""" + def __init__(self, stderr=None): + self._stderr = stderr or sys.stderr + + def __enter__(self): + self.old_stderr = sys.stderr + sys.stderr = self._stderr + + def __exit__(self, exc_type, exc_value, traceback): + self._stderr.flush() + sys.stderr = self.old_stderr + # note: closing sys.stderr won't close it. + self._stderr.close() + +class emulate_nonposix(object): + """Context manager to emulate os.name != 'posix' """ + def __init__(self, osname='non-posix'): + self._new_name = osname + + def __enter__(self): + self._old_name = os.name + os.name = self._new_name + + def __exit__(self, exc_type, exc_value, traceback): + os.name = self._old_name + + +def test_exec_command_stdout(): + # Regression test for gh-2999 and gh-2915. + # There are several packages (nose, scipy.weave.inline, Sage inline + # Fortran) that replace stdout, in which case it doesn't have a fileno + # method. This is tested here, with a do-nothing command that fails if the + # presence of fileno() is assumed in exec_command. + + # The code has a special case for posix systems, so if we are on posix test + # both that the special case works and that the generic code works. + + # Test posix version: + with redirect_stdout(StringIO()): + with redirect_stderr(TemporaryFile()): + exec_command.exec_command("cd '.'") + + if os.name == 'posix': + # Test general (non-posix) version: + with emulate_nonposix(): + with redirect_stdout(StringIO()): + with redirect_stderr(TemporaryFile()): + exec_command.exec_command("cd '.'") + +def test_exec_command_stderr(): + # Test posix version: + with redirect_stdout(TemporaryFile(mode='w+')): + with redirect_stderr(StringIO()): + exec_command.exec_command("cd '.'") + + if os.name == 'posix': + # Test general (non-posix) version: + with emulate_nonposix(): + with redirect_stdout(TemporaryFile()): + with redirect_stderr(StringIO()): + exec_command.exec_command("cd '.'") + + +class TestExecCommand(object): + def setup(self): + self.pyexe = get_pythonexe() + + def check_nt(self, **kws): + s, o = exec_command.exec_command('cmd /C echo path=%path%') + assert_(s == 0) + assert_(o != '') + + s, o = exec_command.exec_command( + '"%s" -c "import sys;sys.stderr.write(sys.platform)"' % self.pyexe) + assert_(s == 0) + assert_(o == 'win32') + + def check_posix(self, **kws): + s, o = exec_command.exec_command("echo Hello", **kws) + assert_(s == 0) + assert_(o == 'Hello') + + s, o = exec_command.exec_command('echo $AAA', **kws) + assert_(s == 0) + assert_(o == '') + + s, o = exec_command.exec_command('echo "$AAA"', AAA='Tere', **kws) + assert_(s == 0) + assert_(o == 'Tere') + + s, o = exec_command.exec_command('echo "$AAA"', **kws) + assert_(s == 0) + assert_(o == '') + + if 'BBB' not in os.environ: + os.environ['BBB'] = 'Hi' + s, o = exec_command.exec_command('echo "$BBB"', **kws) + assert_(s == 0) + assert_(o == 'Hi') + + s, o = exec_command.exec_command('echo "$BBB"', BBB='Hey', **kws) + assert_(s == 0) + assert_(o == 'Hey') + + s, o = exec_command.exec_command('echo "$BBB"', **kws) + assert_(s == 0) + assert_(o == 'Hi') + + del os.environ['BBB'] + + s, o = exec_command.exec_command('echo "$BBB"', **kws) + assert_(s == 0) + assert_(o == '') + + + s, o = exec_command.exec_command('this_is_not_a_command', **kws) + assert_(s != 0) + assert_(o != '') + + s, o = exec_command.exec_command('echo path=$PATH', **kws) + assert_(s == 0) + assert_(o != '') + + s, o = exec_command.exec_command( + '"%s" -c "import sys,os;sys.stderr.write(os.name)"' % + self.pyexe, **kws) + assert_(s == 0) + assert_(o == 'posix') + + def check_basic(self, *kws): + s, o = exec_command.exec_command( + '"%s" -c "raise \'Ignore me.\'"' % self.pyexe, **kws) + assert_(s != 0) + assert_(o != '') + + s, o = exec_command.exec_command( + '"%s" -c "import sys;sys.stderr.write(\'0\');' + 'sys.stderr.write(\'1\');sys.stderr.write(\'2\')"' % + self.pyexe, **kws) + assert_(s == 0) + assert_(o == '012') + + s, o = exec_command.exec_command( + '"%s" -c "import sys;sys.exit(15)"' % self.pyexe, **kws) + assert_(s == 15) + assert_(o == '') + + s, o = exec_command.exec_command( + '"%s" -c "print(\'Heipa\'")' % self.pyexe, **kws) + assert_(s == 0) + assert_(o == 'Heipa') + + def check_execute_in(self, **kws): + with tempdir() as tmpdir: + fn = "file" + tmpfile = os.path.join(tmpdir, fn) + f = open(tmpfile, 'w') + f.write('Hello') + f.close() + + s, o = exec_command.exec_command( + '"%s" -c "f = open(\'%s\', \'r\'); f.close()"' % + (self.pyexe, fn), **kws) + assert_(s != 0) + assert_(o != '') + s, o = exec_command.exec_command( + '"%s" -c "f = open(\'%s\', \'r\'); print(f.read()); ' + 'f.close()"' % (self.pyexe, fn), execute_in=tmpdir, **kws) + assert_(s == 0) + assert_(o == 'Hello') + + def test_basic(self): + with redirect_stdout(StringIO()): + with redirect_stderr(StringIO()): + if os.name == "posix": + self.check_posix(use_tee=0) + self.check_posix(use_tee=1) + elif os.name == "nt": + self.check_nt(use_tee=0) + self.check_nt(use_tee=1) + self.check_execute_in(use_tee=0) + self.check_execute_in(use_tee=1) diff --git a/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler.py b/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler.py new file mode 100644 index 0000000000..ba19a97ea6 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler.py @@ -0,0 +1,81 @@ +from __future__ import division, absolute_import, print_function + +import pytest + +from numpy.testing import assert_, suppress_warnings +import numpy.distutils.fcompiler + +customizable_flags = [ + ('f77', 'F77FLAGS'), + ('f90', 'F90FLAGS'), + ('free', 'FREEFLAGS'), + ('arch', 'FARCH'), + ('debug', 'FDEBUG'), + ('flags', 'FFLAGS'), + ('linker_so', 'LDFLAGS'), +] + + +def test_fcompiler_flags(monkeypatch): + monkeypatch.setenv('NPY_DISTUTILS_APPEND_FLAGS', '0') + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='none') + flag_vars = fc.flag_vars.clone(lambda *args, **kwargs: None) + + for opt, envvar in customizable_flags: + new_flag = '-dummy-{}-flag'.format(opt) + prev_flags = getattr(flag_vars, opt) + + monkeypatch.setenv(envvar, new_flag) + new_flags = getattr(flag_vars, opt) + + monkeypatch.delenv(envvar) + assert_(new_flags == [new_flag]) + + monkeypatch.setenv('NPY_DISTUTILS_APPEND_FLAGS', '1') + + for opt, envvar in customizable_flags: + new_flag = '-dummy-{}-flag'.format(opt) + prev_flags = getattr(flag_vars, opt) + monkeypatch.setenv(envvar, new_flag) + new_flags = getattr(flag_vars, opt) + + monkeypatch.delenv(envvar) + if prev_flags is None: + assert_(new_flags == [new_flag]) + else: + assert_(new_flags == prev_flags + [new_flag]) + + +def test_fcompiler_flags_append_warning(monkeypatch): + # Test to check that the warning for append behavior changing in future + # is triggered. Need to use a real compiler instance so that we have + # non-empty flags to start with (otherwise the "if var and append" check + # will always be false). + try: + with suppress_warnings() as sup: + sup.record() + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='gnu95') + fc.customize() + except numpy.distutils.fcompiler.CompilerNotFound: + pytest.skip("gfortran not found, so can't execute this test") + + # Ensure NPY_DISTUTILS_APPEND_FLAGS not defined + monkeypatch.delenv('NPY_DISTUTILS_APPEND_FLAGS', raising=False) + + for opt, envvar in customizable_flags: + new_flag = '-dummy-{}-flag'.format(opt) + with suppress_warnings() as sup: + sup.record() + prev_flags = getattr(fc.flag_vars, opt) + + monkeypatch.setenv(envvar, new_flag) + with suppress_warnings() as sup: + sup.record() + new_flags = getattr(fc.flag_vars, opt) + if prev_flags: + # Check that warning was issued + assert len(sup.log) == 1 + + monkeypatch.delenv(envvar) + assert_(new_flags == [new_flag]) + diff --git a/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_gnu.py b/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_gnu.py new file mode 100644 index 0000000000..49208aaced --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_gnu.py @@ -0,0 +1,57 @@ +from __future__ import division, absolute_import, print_function + +from numpy.testing import assert_ + +import numpy.distutils.fcompiler + +g77_version_strings = [ + ('GNU Fortran 0.5.25 20010319 (prerelease)', '0.5.25'), + ('GNU Fortran (GCC 3.2) 3.2 20020814 (release)', '3.2'), + ('GNU Fortran (GCC) 3.3.3 20040110 (prerelease) (Debian)', '3.3.3'), + ('GNU Fortran (GCC) 3.3.3 (Debian 20040401)', '3.3.3'), + ('GNU Fortran (GCC 3.2.2 20030222 (Red Hat Linux 3.2.2-5)) 3.2.2' + ' 20030222 (Red Hat Linux 3.2.2-5)', '3.2.2'), +] + +gfortran_version_strings = [ + ('GNU Fortran 95 (GCC 4.0.3 20051023 (prerelease) (Debian 4.0.2-3))', + '4.0.3'), + ('GNU Fortran 95 (GCC) 4.1.0', '4.1.0'), + ('GNU Fortran 95 (GCC) 4.2.0 20060218 (experimental)', '4.2.0'), + ('GNU Fortran (GCC) 4.3.0 20070316 (experimental)', '4.3.0'), + ('GNU Fortran (rubenvb-4.8.0) 4.8.0', '4.8.0'), + ('4.8.0', '4.8.0'), + ('4.0.3-7', '4.0.3'), + ("gfortran: warning: couldn't understand kern.osversion '14.1.0\n4.9.1", + '4.9.1'), + ("gfortran: warning: couldn't understand kern.osversion '14.1.0\n" + "gfortran: warning: yet another warning\n4.9.1", + '4.9.1'), + ('GNU Fortran (crosstool-NG 8a21ab48) 7.2.0', '7.2.0') +] + +class TestG77Versions(object): + def test_g77_version(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='gnu') + for vs, version in g77_version_strings: + v = fc.version_match(vs) + assert_(v == version, (vs, v)) + + def test_not_g77(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='gnu') + for vs, _ in gfortran_version_strings: + v = fc.version_match(vs) + assert_(v is None, (vs, v)) + +class TestGFortranVersions(object): + def test_gfortran_version(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='gnu95') + for vs, version in gfortran_version_strings: + v = fc.version_match(vs) + assert_(v == version, (vs, v)) + + def test_not_gfortran(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='gnu95') + for vs, _ in g77_version_strings: + v = fc.version_match(vs) + assert_(v is None, (vs, v)) diff --git a/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_intel.py b/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_intel.py new file mode 100644 index 0000000000..5e014bada3 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_intel.py @@ -0,0 +1,32 @@ +from __future__ import division, absolute_import, print_function + +import numpy.distutils.fcompiler +from numpy.testing import assert_ + + +intel_32bit_version_strings = [ + ("Intel(R) Fortran Intel(R) 32-bit Compiler Professional for applications" + "running on Intel(R) 32, Version 11.1", '11.1'), +] + +intel_64bit_version_strings = [ + ("Intel(R) Fortran IA-64 Compiler Professional for applications" + "running on IA-64, Version 11.0", '11.0'), + ("Intel(R) Fortran Intel(R) 64 Compiler Professional for applications" + "running on Intel(R) 64, Version 11.1", '11.1') +] + +class TestIntelFCompilerVersions(object): + def test_32bit_version(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='intel') + for vs, version in intel_32bit_version_strings: + v = fc.version_match(vs) + assert_(v == version) + + +class TestIntelEM64TFCompilerVersions(object): + def test_64bit_version(self): + fc = numpy.distutils.fcompiler.new_fcompiler(compiler='intelem') + for vs, version in intel_64bit_version_strings: + v = fc.version_match(vs) + assert_(v == version) diff --git a/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_nagfor.py b/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_nagfor.py new file mode 100644 index 0000000000..1c936056a8 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/tests/test_fcompiler_nagfor.py @@ -0,0 +1,24 @@ +from __future__ import division, absolute_import, print_function + +from numpy.testing import assert_ +import numpy.distutils.fcompiler + +nag_version_strings = [('nagfor', 'NAG Fortran Compiler Release ' + '6.2(Chiyoda) Build 6200', '6.2'), + ('nagfor', 'NAG Fortran Compiler Release ' + '6.1(Tozai) Build 6136', '6.1'), + ('nagfor', 'NAG Fortran Compiler Release ' + '6.0(Hibiya) Build 1021', '6.0'), + ('nagfor', 'NAG Fortran Compiler Release ' + '5.3.2(971)', '5.3.2'), + ('nag', 'NAGWare Fortran 95 compiler Release 5.1' + '(347,355-367,375,380-383,389,394,399,401-402,407,' + '431,435,437,446,459-460,463,472,494,496,503,508,' + '511,517,529,555,557,565)', '5.1')] + +class TestNagFCompilerVersions(object): + def test_version_match(self): + for comp, vs, version in nag_version_strings: + fc = numpy.distutils.fcompiler.new_fcompiler(compiler=comp) + v = fc.version_match(vs) + assert_(v == version) diff --git a/contrib/python/numpy/py2/numpy/distutils/tests/test_from_template.py b/contrib/python/numpy/py2/numpy/distutils/tests/test_from_template.py new file mode 100644 index 0000000000..5881754962 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/tests/test_from_template.py @@ -0,0 +1,44 @@ + +from numpy.distutils.from_template import process_str +from numpy.testing import assert_equal + + +pyf_src = """ +python module foo + <_rd=real,double precision> + interface + subroutine <s,d>foosub(tol) + <_rd>, intent(in,out) :: tol + end subroutine <s,d>foosub + end interface +end python module foo +""" + +expected_pyf = """ +python module foo + interface + subroutine sfoosub(tol) + real, intent(in,out) :: tol + end subroutine sfoosub + subroutine dfoosub(tol) + double precision, intent(in,out) :: tol + end subroutine dfoosub + end interface +end python module foo +""" + + +def normalize_whitespace(s): + """ + Remove leading and trailing whitespace, and convert internal + stretches of whitespace to a single space. + """ + return ' '.join(s.split()) + + +def test_from_template(): + """Regression test for gh-10712.""" + pyf = process_str(pyf_src) + normalized_pyf = normalize_whitespace(pyf) + normalized_expected_pyf = normalize_whitespace(expected_pyf) + assert_equal(normalized_pyf, normalized_expected_pyf) diff --git a/contrib/python/numpy/py2/numpy/distutils/tests/test_misc_util.py b/contrib/python/numpy/py2/numpy/distutils/tests/test_misc_util.py new file mode 100644 index 0000000000..3e239cf48c --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/tests/test_misc_util.py @@ -0,0 +1,84 @@ +from __future__ import division, absolute_import, print_function + +from os.path import join, sep, dirname + +from numpy.distutils.misc_util import ( + appendpath, minrelpath, gpaths, get_shared_lib_extension, get_info + ) +from numpy.testing import ( + assert_, assert_equal + ) + +ajoin = lambda *paths: join(*((sep,)+paths)) + +class TestAppendpath(object): + + def test_1(self): + assert_equal(appendpath('prefix', 'name'), join('prefix', 'name')) + assert_equal(appendpath('/prefix', 'name'), ajoin('prefix', 'name')) + assert_equal(appendpath('/prefix', '/name'), ajoin('prefix', 'name')) + assert_equal(appendpath('prefix', '/name'), join('prefix', 'name')) + + def test_2(self): + assert_equal(appendpath('prefix/sub', 'name'), + join('prefix', 'sub', 'name')) + assert_equal(appendpath('prefix/sub', 'sup/name'), + join('prefix', 'sub', 'sup', 'name')) + assert_equal(appendpath('/prefix/sub', '/prefix/name'), + ajoin('prefix', 'sub', 'name')) + + def test_3(self): + assert_equal(appendpath('/prefix/sub', '/prefix/sup/name'), + ajoin('prefix', 'sub', 'sup', 'name')) + assert_equal(appendpath('/prefix/sub/sub2', '/prefix/sup/sup2/name'), + ajoin('prefix', 'sub', 'sub2', 'sup', 'sup2', 'name')) + assert_equal(appendpath('/prefix/sub/sub2', '/prefix/sub/sup/name'), + ajoin('prefix', 'sub', 'sub2', 'sup', 'name')) + +class TestMinrelpath(object): + + def test_1(self): + n = lambda path: path.replace('/', sep) + assert_equal(minrelpath(n('aa/bb')), n('aa/bb')) + assert_equal(minrelpath('..'), '..') + assert_equal(minrelpath(n('aa/..')), '') + assert_equal(minrelpath(n('aa/../bb')), 'bb') + assert_equal(minrelpath(n('aa/bb/..')), 'aa') + assert_equal(minrelpath(n('aa/bb/../..')), '') + assert_equal(minrelpath(n('aa/bb/../cc/../dd')), n('aa/dd')) + assert_equal(minrelpath(n('.././..')), n('../..')) + assert_equal(minrelpath(n('aa/bb/.././../dd')), n('dd')) + +class TestGpaths(object): + + def test_gpaths(self): + local_path = minrelpath(join(dirname(__file__), '..')) + ls = gpaths('command/*.py', local_path) + assert_(join(local_path, 'command', 'build_src.py') in ls, repr(ls)) + f = gpaths('system_info.py', local_path) + assert_(join(local_path, 'system_info.py') == f[0], repr(f)) + +class TestSharedExtension(object): + + def test_get_shared_lib_extension(self): + import sys + ext = get_shared_lib_extension(is_python_ext=False) + if sys.platform.startswith('linux'): + assert_equal(ext, '.so') + elif sys.platform.startswith('gnukfreebsd'): + assert_equal(ext, '.so') + elif sys.platform.startswith('darwin'): + assert_equal(ext, '.dylib') + elif sys.platform.startswith('win'): + assert_equal(ext, '.dll') + # just check for no crash + assert_(get_shared_lib_extension(is_python_ext=True)) + + +def test_installed_npymath_ini(): + # Regression test for gh-7707. If npymath.ini wasn't installed, then this + # will give an error. + info = get_info('npymath') + + assert isinstance(info, dict) + assert "define_macros" in info diff --git a/contrib/python/numpy/py2/numpy/distutils/tests/test_npy_pkg_config.py b/contrib/python/numpy/py2/numpy/distutils/tests/test_npy_pkg_config.py new file mode 100644 index 0000000000..537e16e90d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/tests/test_npy_pkg_config.py @@ -0,0 +1,86 @@ +from __future__ import division, absolute_import, print_function + +import os + +from numpy.distutils.npy_pkg_config import read_config, parse_flags +from numpy.testing import temppath, assert_ + +simple = """\ +[meta] +Name = foo +Description = foo lib +Version = 0.1 + +[default] +cflags = -I/usr/include +libs = -L/usr/lib +""" +simple_d = {'cflags': '-I/usr/include', 'libflags': '-L/usr/lib', + 'version': '0.1', 'name': 'foo'} + +simple_variable = """\ +[meta] +Name = foo +Description = foo lib +Version = 0.1 + +[variables] +prefix = /foo/bar +libdir = ${prefix}/lib +includedir = ${prefix}/include + +[default] +cflags = -I${includedir} +libs = -L${libdir} +""" +simple_variable_d = {'cflags': '-I/foo/bar/include', 'libflags': '-L/foo/bar/lib', + 'version': '0.1', 'name': 'foo'} + +class TestLibraryInfo(object): + def test_simple(self): + with temppath('foo.ini') as path: + with open(path, 'w') as f: + f.write(simple) + pkg = os.path.splitext(path)[0] + out = read_config(pkg) + + assert_(out.cflags() == simple_d['cflags']) + assert_(out.libs() == simple_d['libflags']) + assert_(out.name == simple_d['name']) + assert_(out.version == simple_d['version']) + + def test_simple_variable(self): + with temppath('foo.ini') as path: + with open(path, 'w') as f: + f.write(simple_variable) + pkg = os.path.splitext(path)[0] + out = read_config(pkg) + + assert_(out.cflags() == simple_variable_d['cflags']) + assert_(out.libs() == simple_variable_d['libflags']) + assert_(out.name == simple_variable_d['name']) + assert_(out.version == simple_variable_d['version']) + out.vars['prefix'] = '/Users/david' + assert_(out.cflags() == '-I/Users/david/include') + +class TestParseFlags(object): + def test_simple_cflags(self): + d = parse_flags("-I/usr/include") + assert_(d['include_dirs'] == ['/usr/include']) + + d = parse_flags("-I/usr/include -DFOO") + assert_(d['include_dirs'] == ['/usr/include']) + assert_(d['macros'] == ['FOO']) + + d = parse_flags("-I /usr/include -DFOO") + assert_(d['include_dirs'] == ['/usr/include']) + assert_(d['macros'] == ['FOO']) + + def test_simple_lflags(self): + d = parse_flags("-L/usr/lib -lfoo -L/usr/lib -lbar") + assert_(d['library_dirs'] == ['/usr/lib', '/usr/lib']) + assert_(d['libraries'] == ['foo', 'bar']) + + d = parse_flags("-L /usr/lib -lfoo -L/usr/lib -lbar") + assert_(d['library_dirs'] == ['/usr/lib', '/usr/lib']) + assert_(d['libraries'] == ['foo', 'bar']) diff --git a/contrib/python/numpy/py2/numpy/distutils/tests/test_shell_utils.py b/contrib/python/numpy/py2/numpy/distutils/tests/test_shell_utils.py new file mode 100644 index 0000000000..a0344244fe --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/tests/test_shell_utils.py @@ -0,0 +1,79 @@ +from __future__ import division, absolute_import, print_function + +import pytest +import subprocess +import os +import json +import sys + +from numpy.distutils import _shell_utils + +argv_cases = [ + [r'exe'], + [r'path/exe'], + [r'path\exe'], + [r'\\server\path\exe'], + [r'path to/exe'], + [r'path to\exe'], + + [r'exe', '--flag'], + [r'path/exe', '--flag'], + [r'path\exe', '--flag'], + [r'path to/exe', '--flag'], + [r'path to\exe', '--flag'], + + # flags containing literal quotes in their name + [r'path to/exe', '--flag-"quoted"'], + [r'path to\exe', '--flag-"quoted"'], + [r'path to/exe', '"--flag-quoted"'], + [r'path to\exe', '"--flag-quoted"'], +] + + +@pytest.fixture(params=[ + _shell_utils.WindowsParser, + _shell_utils.PosixParser +]) +def Parser(request): + return request.param + + +@pytest.fixture +def runner(Parser): + if Parser != _shell_utils.NativeParser: + pytest.skip('Unable to run with non-native parser') + + if Parser == _shell_utils.WindowsParser: + return lambda cmd: subprocess.check_output(cmd) + elif Parser == _shell_utils.PosixParser: + # posix has no non-shell string parsing + return lambda cmd: subprocess.check_output(cmd, shell=True) + else: + raise NotImplementedError + + +@pytest.mark.parametrize('argv', argv_cases) +def test_join_matches_subprocess(Parser, runner, argv): + """ + Test that join produces strings understood by subprocess + """ + # invoke python to return its arguments as json + cmd = [ + sys.executable, '-c', + 'import json, sys; print(json.dumps(sys.argv[1:]))' + ] + joined = Parser.join(cmd + argv) + json_out = runner(joined).decode() + assert json.loads(json_out) == argv + + +@pytest.mark.parametrize('argv', argv_cases) +def test_roundtrip(Parser, argv): + """ + Test that split is the inverse operation of join + """ + try: + joined = Parser.join(argv) + assert argv == Parser.split(joined) + except NotImplementedError: + pytest.skip("Not implemented") diff --git a/contrib/python/numpy/py2/numpy/distutils/tests/test_system_info.py b/contrib/python/numpy/py2/numpy/distutils/tests/test_system_info.py new file mode 100644 index 0000000000..f7e275a2e7 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/distutils/tests/test_system_info.py @@ -0,0 +1,237 @@ +from __future__ import division, print_function + +import os +import shutil +import pytest +from tempfile import mkstemp, mkdtemp +from subprocess import Popen, PIPE +from distutils.errors import DistutilsError + +from numpy.distutils import ccompiler, customized_ccompiler +from numpy.testing import assert_, assert_equal +from numpy.distutils.system_info import system_info, ConfigParser +from numpy.distutils.system_info import default_lib_dirs, default_include_dirs +from numpy.distutils import _shell_utils + + +def get_class(name, notfound_action=1): + """ + notfound_action: + 0 - do nothing + 1 - display warning message + 2 - raise error + """ + cl = {'temp1': Temp1Info, + 'temp2': Temp2Info + }.get(name.lower(), _system_info) + return cl() + +simple_site = """ +[ALL] +library_dirs = {dir1:s}{pathsep:s}{dir2:s} +libraries = {lib1:s},{lib2:s} +extra_compile_args = -I/fake/directory -I"/path with/spaces" -Os +runtime_library_dirs = {dir1:s} + +[temp1] +library_dirs = {dir1:s} +libraries = {lib1:s} +runtime_library_dirs = {dir1:s} + +[temp2] +library_dirs = {dir2:s} +libraries = {lib2:s} +extra_link_args = -Wl,-rpath={lib2_escaped:s} +rpath = {dir2:s} +""" +site_cfg = simple_site + +fakelib_c_text = """ +/* This file is generated from numpy/distutils/testing/test_system_info.py */ +#include<stdio.h> +void foo(void) { + printf("Hello foo"); +} +void bar(void) { + printf("Hello bar"); +} +""" + +def have_compiler(): + """ Return True if there appears to be an executable compiler + """ + compiler = customized_ccompiler() + try: + cmd = compiler.compiler # Unix compilers + except AttributeError: + try: + if not compiler.initialized: + compiler.initialize() # MSVC is different + except (DistutilsError, ValueError): + return False + cmd = [compiler.cc] + try: + p = Popen(cmd, stdout=PIPE, stderr=PIPE) + p.stdout.close() + p.stderr.close() + p.wait() + except OSError: + return False + return True + + +HAVE_COMPILER = have_compiler() + + +class _system_info(system_info): + + def __init__(self, + default_lib_dirs=default_lib_dirs, + default_include_dirs=default_include_dirs, + verbosity=1, + ): + self.__class__.info = {} + self.local_prefixes = [] + defaults = {'library_dirs': '', + 'include_dirs': '', + 'runtime_library_dirs': '', + 'rpath': '', + 'src_dirs': '', + 'search_static_first': "0", + 'extra_compile_args': '', + 'extra_link_args': ''} + self.cp = ConfigParser(defaults) + # We have to parse the config files afterwards + # to have a consistent temporary filepath + + def _check_libs(self, lib_dirs, libs, opt_libs, exts): + """Override _check_libs to return with all dirs """ + info = {'libraries': libs, 'library_dirs': lib_dirs} + return info + + +class Temp1Info(_system_info): + """For testing purposes""" + section = 'temp1' + + +class Temp2Info(_system_info): + """For testing purposes""" + section = 'temp2' + + +class TestSystemInfoReading(object): + + def setup(self): + """ Create the libraries """ + # Create 2 sources and 2 libraries + self._dir1 = mkdtemp() + self._src1 = os.path.join(self._dir1, 'foo.c') + self._lib1 = os.path.join(self._dir1, 'libfoo.so') + self._dir2 = mkdtemp() + self._src2 = os.path.join(self._dir2, 'bar.c') + self._lib2 = os.path.join(self._dir2, 'libbar.so') + # Update local site.cfg + global simple_site, site_cfg + site_cfg = simple_site.format(**{ + 'dir1': self._dir1, + 'lib1': self._lib1, + 'dir2': self._dir2, + 'lib2': self._lib2, + 'pathsep': os.pathsep, + 'lib2_escaped': _shell_utils.NativeParser.join([self._lib2]) + }) + # Write site.cfg + fd, self._sitecfg = mkstemp() + os.close(fd) + with open(self._sitecfg, 'w') as fd: + fd.write(site_cfg) + # Write the sources + with open(self._src1, 'w') as fd: + fd.write(fakelib_c_text) + with open(self._src2, 'w') as fd: + fd.write(fakelib_c_text) + # We create all class-instances + + def site_and_parse(c, site_cfg): + c.files = [site_cfg] + c.parse_config_files() + return c + self.c_default = site_and_parse(get_class('default'), self._sitecfg) + self.c_temp1 = site_and_parse(get_class('temp1'), self._sitecfg) + self.c_temp2 = site_and_parse(get_class('temp2'), self._sitecfg) + + def teardown(self): + # Do each removal separately + try: + shutil.rmtree(self._dir1) + except Exception: + pass + try: + shutil.rmtree(self._dir2) + except Exception: + pass + try: + os.remove(self._sitecfg) + except Exception: + pass + + def test_all(self): + # Read in all information in the ALL block + tsi = self.c_default + assert_equal(tsi.get_lib_dirs(), [self._dir1, self._dir2]) + assert_equal(tsi.get_libraries(), [self._lib1, self._lib2]) + assert_equal(tsi.get_runtime_lib_dirs(), [self._dir1]) + extra = tsi.calc_extra_info() + assert_equal(extra['extra_compile_args'], ['-I/fake/directory', '-I/path with/spaces', '-Os']) + + def test_temp1(self): + # Read in all information in the temp1 block + tsi = self.c_temp1 + assert_equal(tsi.get_lib_dirs(), [self._dir1]) + assert_equal(tsi.get_libraries(), [self._lib1]) + assert_equal(tsi.get_runtime_lib_dirs(), [self._dir1]) + + def test_temp2(self): + # Read in all information in the temp2 block + tsi = self.c_temp2 + assert_equal(tsi.get_lib_dirs(), [self._dir2]) + assert_equal(tsi.get_libraries(), [self._lib2]) + # Now from rpath and not runtime_library_dirs + assert_equal(tsi.get_runtime_lib_dirs(key='rpath'), [self._dir2]) + extra = tsi.calc_extra_info() + assert_equal(extra['extra_link_args'], ['-Wl,-rpath=' + self._lib2]) + + @pytest.mark.skipif(not HAVE_COMPILER, reason="Missing compiler") + def test_compile1(self): + # Compile source and link the first source + c = customized_ccompiler() + previousDir = os.getcwd() + try: + # Change directory to not screw up directories + os.chdir(self._dir1) + c.compile([os.path.basename(self._src1)], output_dir=self._dir1) + # Ensure that the object exists + assert_(os.path.isfile(self._src1.replace('.c', '.o')) or + os.path.isfile(self._src1.replace('.c', '.obj'))) + finally: + os.chdir(previousDir) + + @pytest.mark.skipif(not HAVE_COMPILER, reason="Missing compiler") + @pytest.mark.skipif('msvc' in repr(ccompiler.new_compiler()), + reason="Fails with MSVC compiler ") + def test_compile2(self): + # Compile source and link the second source + tsi = self.c_temp2 + c = customized_ccompiler() + extra_link_args = tsi.calc_extra_info()['extra_link_args'] + previousDir = os.getcwd() + try: + # Change directory to not screw up directories + os.chdir(self._dir2) + c.compile([os.path.basename(self._src2)], output_dir=self._dir2, + extra_postargs=extra_link_args) + # Ensure that the object exists + assert_(os.path.isfile(self._src2.replace('.c', '.o'))) + finally: + os.chdir(previousDir) diff --git a/contrib/python/numpy/py2/numpy/f2py/setup.cfg b/contrib/python/numpy/py2/numpy/f2py/setup.cfg new file mode 100644 index 0000000000..14669544cc --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/setup.cfg @@ -0,0 +1,3 @@ +[bdist_rpm] +doc_files = docs/ + tests/ \ No newline at end of file diff --git a/contrib/python/numpy/py2/numpy/f2py/setup.py b/contrib/python/numpy/py2/numpy/f2py/setup.py new file mode 100644 index 0000000000..c0c50ce547 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/setup.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +""" +setup.py for installing F2PY + +Usage: + python setup.py install + +Copyright 2001-2005 Pearu Peterson all rights reserved, +Pearu Peterson <pearu@cens.ioc.ee> +Permission to use, modify, and distribute this software is given under the +terms of the NumPy License. + +NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. +$Revision: 1.32 $ +$Date: 2005/01/30 17:22:14 $ +Pearu Peterson + +""" +from __future__ import division, print_function + +from numpy.distutils.core import setup +from numpy.distutils.misc_util import Configuration + + +from __version__ import version + + +def configuration(parent_package='', top_path=None): + config = Configuration('f2py', parent_package, top_path) + config.add_data_dir('tests') + config.add_data_files( + 'src/fortranobject.c', + 'src/fortranobject.h') + return config + + +if __name__ == "__main__": + + config = configuration(top_path='') + config = config.todict() + + config['download_url'] = "http://cens.ioc.ee/projects/f2py2e/2.x"\ + "/F2PY-2-latest.tar.gz" + config['classifiers'] = [ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: NumPy License', + 'Natural Language :: English', + 'Operating System :: OS Independent', + 'Programming Language :: C', + 'Programming Language :: Fortran', + 'Programming Language :: Python', + 'Topic :: Scientific/Engineering', + 'Topic :: Software Development :: Code Generators', + ] + setup(version=version, + description="F2PY - Fortran to Python Interface Generator", + author="Pearu Peterson", + author_email="pearu@cens.ioc.ee", + maintainer="Pearu Peterson", + maintainer_email="pearu@cens.ioc.ee", + license="BSD", + platforms="Unix, Windows (mingw|cygwin), Mac OSX", + long_description="""\ +The Fortran to Python Interface Generator, or F2PY for short, is a +command line tool (f2py) for generating Python C/API modules for +wrapping Fortran 77/90/95 subroutines, accessing common blocks from +Python, and calling Python functions from Fortran (call-backs). +Interfacing subroutines/data from Fortran 90/95 modules is supported.""", + url="http://cens.ioc.ee/projects/f2py2e/", + keywords=['Fortran', 'f2py'], + **config) diff --git a/contrib/python/numpy/py2/numpy/f2py/src/test/Makefile b/contrib/python/numpy/py2/numpy/f2py/src/test/Makefile new file mode 100644 index 0000000000..0f8869f726 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/src/test/Makefile @@ -0,0 +1,96 @@ +# -*- makefile -*- +# File: Makefile-foo +# Usage: +# make -f Makefile-foo [MODE=opt|debug] +# Notes: +# 1) You must use GNU make; try `gmake ..' if `make' fails. +# 2) This file is auto-generated with f2py (version 2.264). +# f2py is a Fortran to Python Interface Generator (FPIG), Second Edition, +# written by Pearu Peterson <pearu@ioc.ee>. +# See http://cens.ioc.ee/projects/f2py2e/ +# Generation date: Wed Sep 13 16:22:55 2000 +# $Revision: 1.2 $ +# $Date: 2000/09/17 16:10:27 $ + +# Recommendation notes produced by f2py2e/buildmakefile.py: +# *** + +PYINC = -I/numeric/include/python1.5/Numeric -I/numeric/include/python1.5 +INCLUDES = -I.. +LIBS = -L$(shell gcc -v 2>&1 | grep specs | sed -e 's/Reading specs from //g' | sed -e 's/\/specs//g') -lg2c +LIBS=-L$$ABSOFT/lib -lfio -lf77math -lf90math +LIBS=-L/numeric/bin -lvast90 -L/usr/lib/gcc-lib/i586-mandrake-linux/2.95.2 -lg2c + +# Wrapper generator: +F2PY = /home/pearu/bin/f2py-cvs + +# Fortran compiler: Absoft f95 +FC = f95 +FC = f90 +FOPT = +FDEBUG = +FFLAGS = -B108 -YCFRL=1 -YCOM_NAMES=LCS -YCOM_PFX -YCOM_SFX=_ -YEXT_PFX -YEXT_NAMES=LCS +FFLAGS = +# C compiler: cc ('gcc 2.x.x' 2.95.2) +CC = cc +COPT = +CDEBUG = +CFLAGS = -fpic + +# Linker: ld ('GNU ld' 2.9.5) +LD = ld +LDFLAGS = -shared -s +SO = .so + +ifeq '$(MODE)' 'debug' +FFLAGS += $(FDEBUG) +CFLAGS += $(CDEBUG) +endif +ifeq '$(MODE)' 'opt' +FFLAGS += $(FOPT) +CFLAGS += $(COPT) +endif +FFLAGS += $(INCLUDES) +CFLAGS += $(PYINC) $(INCLUDES) + +SRCC = ../fortranobject.c +SRCF = mod.f90 bar.f foo90.f90 wrap.f +SRCS = $(SRCC) $(SRCF) +OBJC = $(filter %.o,$(SRCC:.c=.o) $(SRCC:.cc=.o) $(SRCC:.C=.o)) +OBJF = $(filter %.o,$(SRCF:.f90=.o) $(SRCF:.f=.o) $(SRCF:.F=.o) $(SRCF:.for=.o)) +OBJS = $(OBJC) $(OBJF) + +INSTALLNAME = f2py2e-apps +INSTALLDIRECTORY = /numeric/lib/python1.5/site-packages/$(INSTALLNAME) +INSTALLDIR = install -d -c +INSTALLEXEC = install -m 755 -c + +all: foo + +foo: foomodule$(SO) +foomodule$(SO) : foomodule.o $(OBJS) + $(LD) $(LDFLAGS) -o $@ $< $(OBJS) $(LIBS) + +foomodule.o: foomodule.c + + +$(OBJS) : $(SRCS) +%.o : %.f ; $(FC) -c $(FFLAGS) $< +%.o : %.f90 ; $(FC) -c $(FFLAGS) $< + +test: foomodule$(SO) + python -c 'import foo;print foo.__doc__' +install: foomodule$(SO) + $(INSTALLDIR) $(INSTALLDIRECTORY) + $(INSTALLEXEC) foomodule$(SO) $(INSTALLDIRECTORY) + cd $(INSTALLDIRECTORY) && echo "$(INSTALLNAME)" > ../$(INSTALLNAME).pth + +.PHONY: clean distclean debug test install foo +debug: + echo "OBJS=$(OBJS)" + echo "SRCS=$(SRCS)" +clean: + $(RM) *.o *.mod core foomodule.{dvi,log} $(OBJS) +distclean: clean + $(RM) *.so *.sl foomodule.{tex,so} + $(RM) .f2py_get_compiler_* diff --git a/contrib/python/numpy/py2/numpy/f2py/src/test/bar.f b/contrib/python/numpy/py2/numpy/f2py/src/test/bar.f new file mode 100644 index 0000000000..5354ceaf98 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/src/test/bar.f @@ -0,0 +1,11 @@ + subroutine bar() + integer a + real*8 b,c(3) + common /foodata/ a,b,c + a = 4 + b = 6.7 + c(2) = 3.0 + write(*,*) "bar:a=",a + write(*,*) "bar:b=",b + write(*,*) "bar:c=",c + end diff --git a/contrib/python/numpy/py2/numpy/f2py/src/test/foo.f b/contrib/python/numpy/py2/numpy/f2py/src/test/foo.f new file mode 100644 index 0000000000..5354ceaf98 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/src/test/foo.f @@ -0,0 +1,11 @@ + subroutine bar() + integer a + real*8 b,c(3) + common /foodata/ a,b,c + a = 4 + b = 6.7 + c(2) = 3.0 + write(*,*) "bar:a=",a + write(*,*) "bar:b=",b + write(*,*) "bar:c=",c + end diff --git a/contrib/python/numpy/py2/numpy/f2py/src/test/foo90.f90 b/contrib/python/numpy/py2/numpy/f2py/src/test/foo90.f90 new file mode 100644 index 0000000000..dbca7e95ba --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/src/test/foo90.f90 @@ -0,0 +1,13 @@ +subroutine foo() + integer a + real*8 b,c(3) + common /foodata/ a,b,c + print*, " F: in foo" + a = 5 + b = 6.3 + c(2) = 9.1 +end subroutine foo + + + + diff --git a/contrib/python/numpy/py2/numpy/f2py/src/test/foomodule.c b/contrib/python/numpy/py2/numpy/f2py/src/test/foomodule.c new file mode 100644 index 0000000000..733fab0bed --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/src/test/foomodule.c @@ -0,0 +1,142 @@ +/* File: foomodule.c + * Example of FortranObject usage. See also wrap.f foo.f foo90.f90. + * Author: Pearu Peterson <pearu@ioc.ee>. + * http://cens.ioc.ee/projects/f2py2e/ + * $Revision: 1.2 $ + * $Date: 2000/09/17 16:10:27 $ + */ +#ifdef __cplusplus +extern "C" { +#endif + +#include "Python.h" +#include "fortranobject.h" + +static PyObject *foo_error; + +#if defined(NO_APPEND_FORTRAN) +#if defined(UPPERCASE_FORTRAN) +#define F_FUNC(f,F) F +#else +#define F_FUNC(f,F) f +#endif +#else +#if defined(UPPERCASE_FORTRAN) +#define F_FUNC(f,F) F##_ +#else +#define F_FUNC(f,F) f##_ +#endif +#endif + +/************* foo_bar *************/ +static char doc_foo_bar[] = "\ +Function signature:\n\ + bar()\n\ +"; +static PyObject *foo_bar(PyObject *capi_self, PyObject *capi_args, + PyObject *capi_keywds, void (*f2py_func)()) { + PyObject *capi_buildvalue = NULL; + static char *capi_kwlist[] = {NULL}; + if (!PyArg_ParseTupleAndKeywords(capi_args,capi_keywds,\ + "|:foo.bar",\ + capi_kwlist)) + goto capi_fail; + (*f2py_func)(); + capi_buildvalue = Py_BuildValue(""); + capi_fail: + return capi_buildvalue; +} +/************ mod_init **************/ +static PyObject *mod_init(PyObject *capi_self, PyObject *capi_args, + PyObject *capi_keywds, void (*f2py_func)()) { + PyObject *capi_buildvalue = NULL; + static char *capi_kwlist[] = {NULL}; + if (!PyArg_ParseTupleAndKeywords(capi_args,capi_keywds,\ + "|:mod.init",\ + capi_kwlist)) + goto capi_fail; + (*f2py_func)(); + capi_buildvalue = Py_BuildValue(""); + capi_fail: + return capi_buildvalue; +} + +/* F90 module */ +static FortranDataDef f2py_mod_def[] = { + {"a",0, {}, NPY_INT}, + {"b",0, {}, NPY_DOUBLE}, + {"c",1, {3}, NPY_DOUBLE}, + {"d",1, {-1}, NPY_DOUBLE}, + {"init",-1,{},0,NULL,(void *)mod_init}, + {NULL} +}; +static void f2py_setup_mod(char *a,char *b,char *c,void (*d)(),char *init) { + f2py_mod_def[0].data = a; + f2py_mod_def[1].data = b; + f2py_mod_def[2].data = c; + f2py_mod_def[3].func = d; + f2py_mod_def[4].data = init; +} +extern void F_FUNC(f2pyinitmod,F2PYINITMOD)(); + static void f2py_init_mod() { + F_FUNC(f2pyinitmod,F2PYINITMOD)(f2py_setup_mod); + } + +/* COMMON block */ +static FortranDataDef f2py_foodata_def[] = { + {"a",0, {}, NPY_INT}, + {"b",0, {}, NPY_DOUBLE}, + {"c",1, {3}, NPY_DOUBLE}, + {NULL} +}; +static void f2py_setup_foodata(char *a,char *b,char *c) { + f2py_foodata_def[0].data = a; + f2py_foodata_def[1].data = b; + f2py_foodata_def[2].data = c; +} +extern void F_FUNC(f2pyinitfoodata,F2PYINITFOODATA)(); + static void f2py_init_foodata() { + F_FUNC(f2pyinitfoodata,F2PYINITFOODATA)(f2py_setup_foodata); + } + +/* Fortran routines (needs no initialization/setup function) */ +extern void F_FUNC(bar,BAR)(); + extern void F_FUNC(foo,FOO)(); + static FortranDataDef f2py_routines_def[] = { + {"bar",-1, {}, 0, (char *)F_FUNC(bar,BAR),(void *)foo_bar,doc_foo_bar}, + {"foo",-1, {}, 0, (char *)F_FUNC(foo,FOO),(void *)foo_bar,doc_foo_bar}, + {NULL} + }; + +static PyMethodDef foo_module_methods[] = { + /*eof method*/ + {NULL,NULL} +}; + +void initfoo() { + int i; + PyObject *m, *d, *s; + import_array(); + + m = Py_InitModule("foo", foo_module_methods); + + d = PyModule_GetDict(m); + s = PyString_FromString("This module 'foo' demonstrates the usage of fortranobject."); + PyDict_SetItemString(d, "__doc__", s); + + /* Fortran objects: */ + PyDict_SetItemString(d, "mod", PyFortranObject_New(f2py_mod_def,f2py_init_mod)); + PyDict_SetItemString(d, "foodata", PyFortranObject_New(f2py_foodata_def,f2py_init_foodata)); + for(i=0;f2py_routines_def[i].name!=NULL;i++) + PyDict_SetItemString(d, f2py_routines_def[i].name, + PyFortranObject_NewAsAttr(&f2py_routines_def[i])); + + Py_DECREF(s); + + if (PyErr_Occurred()) + Py_FatalError("can't initialize module foo"); +} + +#ifdef __cplusplus +} +#endif diff --git a/contrib/python/numpy/py2/numpy/f2py/src/test/wrap.f b/contrib/python/numpy/py2/numpy/f2py/src/test/wrap.f new file mode 100644 index 0000000000..9414eb9f6f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/src/test/wrap.f @@ -0,0 +1,70 @@ + subroutine f2py_mod_get_dims(f2py_r,f2py_s,f2py_set,f2py_n) + use mod + external f2py_set + logical f2py_ns + integer f2py_s(*),f2py_r,f2py_i,f2py_j + character*(*) f2py_n + if ("d".eq.f2py_n) then + f2py_ns = .FALSE. + if (allocated(d)) then + do f2py_i=1,f2py_r + if ((size(d,f2py_r-f2py_i+1).ne.f2py_s(f2py_i)).and. + c (f2py_s(f2py_i).ge.0)) then + f2py_ns = .TRUE. + end if + end do + if (f2py_ns) then + deallocate(d) + end if + end if + if (.not.allocated(d)) then + allocate(d(f2py_s(1))) + end if + if (allocated(d)) then + do f2py_i=1,f2py_r + f2py_s(f2py_i) = size(d,f2py_r-f2py_i+1) + end do + call f2py_set(d) + end if + end if + end subroutine f2py_mod_get_dims + subroutine f2py_mod_get_dims_d(r,s,set_data) + use mod, only: d => d + external set_data + logical ns + integer s(*),r,i,j + ns = .FALSE. + if (allocated(d)) then + do i=1,r + if ((size(d,r-i+1).ne.s(i)).and.(s(i).ge.0)) then + ns = .TRUE. + end if + end do + if (ns) then + deallocate(d) + end if + end if + if (.not.allocated(d).and.(s(1).ge.1)) then + allocate(d(s(1))) + end if + if (allocated(d)) then + do i=1,r + s(i) = size(d,r-i+1) + end do + end if + call set_data(d,allocated(d)) + end subroutine f2py_mod_get_dims_d + + subroutine f2pyinitmod(setupfunc) + use mod + external setupfunc,f2py_mod_get_dims_d,init + call setupfunc(a,b,c,f2py_mod_get_dims_d,init) + end subroutine f2pyinitmod + + subroutine f2pyinitfoodata(setupfunc) + external setupfunc + integer a + real*8 b,c(3) + common /foodata/ a,b,c + call setupfunc(a,b,c) + end subroutine f2pyinitfoodata diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/__init__.py b/contrib/python/numpy/py2/numpy/f2py/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c b/contrib/python/numpy/py2/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c new file mode 100644 index 0000000000..7f46303b01 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c @@ -0,0 +1,224 @@ +/* File: wrapmodule.c + * This file is auto-generated with f2py (version:2_1330). + * Hand edited by Pearu. + * f2py is a Fortran to Python Interface Generator (FPIG), Second Edition, + * written by Pearu Peterson <pearu@cens.ioc.ee>. + * See http://cens.ioc.ee/projects/f2py2e/ + * Generation date: Fri Oct 21 22:41:12 2005 + * $Revision:$ + * $Date:$ + * Do not edit this file directly unless you know what you are doing!!! + */ +#ifdef __cplusplus +extern "C" { +#endif + +/*********************** See f2py2e/cfuncs.py: includes ***********************/ +#include "Python.h" +#include "fortranobject.h" +#include <math.h> + +static PyObject *wrap_error; +static PyObject *wrap_module; + +/************************************ call ************************************/ +static char doc_f2py_rout_wrap_call[] = "\ +Function signature:\n\ + arr = call(type_num,dims,intent,obj)\n\ +Required arguments:\n" +" type_num : input int\n" +" dims : input int-sequence\n" +" intent : input int\n" +" obj : input python object\n" +"Return objects:\n" +" arr : array"; +static PyObject *f2py_rout_wrap_call(PyObject *capi_self, + PyObject *capi_args) { + PyObject * volatile capi_buildvalue = NULL; + int type_num = 0; + npy_intp *dims = NULL; + PyObject *dims_capi = Py_None; + int rank = 0; + int intent = 0; + PyArrayObject *capi_arr_tmp = NULL; + PyObject *arr_capi = Py_None; + int i; + + if (!PyArg_ParseTuple(capi_args,"iOiO|:wrap.call",\ + &type_num,&dims_capi,&intent,&arr_capi)) + return NULL; + rank = PySequence_Length(dims_capi); + dims = malloc(rank*sizeof(npy_intp)); + for (i=0;i<rank;++i) + dims[i] = (npy_intp)PyInt_AsLong(PySequence_GetItem(dims_capi,i)); + + capi_arr_tmp = array_from_pyobj(type_num,dims,rank,intent|F2PY_INTENT_OUT,arr_capi); + if (capi_arr_tmp == NULL) { + free(dims); + return NULL; + } + capi_buildvalue = Py_BuildValue("N",capi_arr_tmp); + free(dims); + return capi_buildvalue; +} + +static char doc_f2py_rout_wrap_attrs[] = "\ +Function signature:\n\ + arr = array_attrs(arr)\n\ +Required arguments:\n" +" arr : input array object\n" +"Return objects:\n" +" data : data address in hex\n" +" nd : int\n" +" dimensions : tuple\n" +" strides : tuple\n" +" base : python object\n" +" (kind,type,type_num,elsize,alignment) : 4-tuple\n" +" flags : int\n" +" itemsize : int\n" +; +static PyObject *f2py_rout_wrap_attrs(PyObject *capi_self, + PyObject *capi_args) { + PyObject *arr_capi = Py_None; + PyArrayObject *arr = NULL; + PyObject *dimensions = NULL; + PyObject *strides = NULL; + char s[100]; + int i; + memset(s,0,100*sizeof(char)); + if (!PyArg_ParseTuple(capi_args,"O!|:wrap.attrs", + &PyArray_Type,&arr_capi)) + return NULL; + arr = (PyArrayObject *)arr_capi; + sprintf(s,"%p",PyArray_DATA(arr)); + dimensions = PyTuple_New(PyArray_NDIM(arr)); + strides = PyTuple_New(PyArray_NDIM(arr)); + for (i=0;i<PyArray_NDIM(arr);++i) { + PyTuple_SetItem(dimensions,i,PyInt_FromLong(PyArray_DIM(arr,i))); + PyTuple_SetItem(strides,i,PyInt_FromLong(PyArray_STRIDE(arr,i))); + } + return Py_BuildValue("siOOO(cciii)ii",s,PyArray_NDIM(arr), + dimensions,strides, + (PyArray_BASE(arr)==NULL?Py_None:PyArray_BASE(arr)), + PyArray_DESCR(arr)->kind, + PyArray_DESCR(arr)->type, + PyArray_TYPE(arr), + PyArray_ITEMSIZE(arr), + PyArray_DESCR(arr)->alignment, + PyArray_FLAGS(arr), + PyArray_ITEMSIZE(arr)); +} + +static PyMethodDef f2py_module_methods[] = { + + {"call",f2py_rout_wrap_call,METH_VARARGS,doc_f2py_rout_wrap_call}, + {"array_attrs",f2py_rout_wrap_attrs,METH_VARARGS,doc_f2py_rout_wrap_attrs}, + {NULL,NULL} +}; + +#if PY_VERSION_HEX >= 0x03000000 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "test_array_from_pyobj_ext", + NULL, + -1, + f2py_module_methods, + NULL, + NULL, + NULL, + NULL +}; +#endif + +#if PY_VERSION_HEX >= 0x03000000 +#define RETVAL m +PyMODINIT_FUNC PyInit_test_array_from_pyobj_ext(void) { +#else +#define RETVAL +PyMODINIT_FUNC inittest_array_from_pyobj_ext(void) { +#endif + PyObject *m,*d, *s; +#if PY_VERSION_HEX >= 0x03000000 + m = wrap_module = PyModule_Create(&moduledef); +#else + m = wrap_module = Py_InitModule("test_array_from_pyobj_ext", f2py_module_methods); +#endif + Py_TYPE(&PyFortran_Type) = &PyType_Type; + import_array(); + if (PyErr_Occurred()) + Py_FatalError("can't initialize module wrap (failed to import numpy)"); + d = PyModule_GetDict(m); + s = PyString_FromString("This module 'wrap' is auto-generated with f2py (version:2_1330).\nFunctions:\n" +" arr = call(type_num,dims,intent,obj)\n" +"."); + PyDict_SetItemString(d, "__doc__", s); + wrap_error = PyErr_NewException ("wrap.error", NULL, NULL); + Py_DECREF(s); + PyDict_SetItemString(d, "F2PY_INTENT_IN", PyInt_FromLong(F2PY_INTENT_IN)); + PyDict_SetItemString(d, "F2PY_INTENT_INOUT", PyInt_FromLong(F2PY_INTENT_INOUT)); + PyDict_SetItemString(d, "F2PY_INTENT_OUT", PyInt_FromLong(F2PY_INTENT_OUT)); + PyDict_SetItemString(d, "F2PY_INTENT_HIDE", PyInt_FromLong(F2PY_INTENT_HIDE)); + PyDict_SetItemString(d, "F2PY_INTENT_CACHE", PyInt_FromLong(F2PY_INTENT_CACHE)); + PyDict_SetItemString(d, "F2PY_INTENT_COPY", PyInt_FromLong(F2PY_INTENT_COPY)); + PyDict_SetItemString(d, "F2PY_INTENT_C", PyInt_FromLong(F2PY_INTENT_C)); + PyDict_SetItemString(d, "F2PY_OPTIONAL", PyInt_FromLong(F2PY_OPTIONAL)); + PyDict_SetItemString(d, "F2PY_INTENT_INPLACE", PyInt_FromLong(F2PY_INTENT_INPLACE)); + PyDict_SetItemString(d, "NPY_BOOL", PyInt_FromLong(NPY_BOOL)); + PyDict_SetItemString(d, "NPY_BYTE", PyInt_FromLong(NPY_BYTE)); + PyDict_SetItemString(d, "NPY_UBYTE", PyInt_FromLong(NPY_UBYTE)); + PyDict_SetItemString(d, "NPY_SHORT", PyInt_FromLong(NPY_SHORT)); + PyDict_SetItemString(d, "NPY_USHORT", PyInt_FromLong(NPY_USHORT)); + PyDict_SetItemString(d, "NPY_INT", PyInt_FromLong(NPY_INT)); + PyDict_SetItemString(d, "NPY_UINT", PyInt_FromLong(NPY_UINT)); + PyDict_SetItemString(d, "NPY_INTP", PyInt_FromLong(NPY_INTP)); + PyDict_SetItemString(d, "NPY_UINTP", PyInt_FromLong(NPY_UINTP)); + PyDict_SetItemString(d, "NPY_LONG", PyInt_FromLong(NPY_LONG)); + PyDict_SetItemString(d, "NPY_ULONG", PyInt_FromLong(NPY_ULONG)); + PyDict_SetItemString(d, "NPY_LONGLONG", PyInt_FromLong(NPY_LONGLONG)); + PyDict_SetItemString(d, "NPY_ULONGLONG", PyInt_FromLong(NPY_ULONGLONG)); + PyDict_SetItemString(d, "NPY_FLOAT", PyInt_FromLong(NPY_FLOAT)); + PyDict_SetItemString(d, "NPY_DOUBLE", PyInt_FromLong(NPY_DOUBLE)); + PyDict_SetItemString(d, "NPY_LONGDOUBLE", PyInt_FromLong(NPY_LONGDOUBLE)); + PyDict_SetItemString(d, "NPY_CFLOAT", PyInt_FromLong(NPY_CFLOAT)); + PyDict_SetItemString(d, "NPY_CDOUBLE", PyInt_FromLong(NPY_CDOUBLE)); + PyDict_SetItemString(d, "NPY_CLONGDOUBLE", PyInt_FromLong(NPY_CLONGDOUBLE)); + PyDict_SetItemString(d, "NPY_OBJECT", PyInt_FromLong(NPY_OBJECT)); + PyDict_SetItemString(d, "NPY_STRING", PyInt_FromLong(NPY_STRING)); + PyDict_SetItemString(d, "NPY_UNICODE", PyInt_FromLong(NPY_UNICODE)); + PyDict_SetItemString(d, "NPY_VOID", PyInt_FromLong(NPY_VOID)); + PyDict_SetItemString(d, "NPY_NTYPES", PyInt_FromLong(NPY_NTYPES)); + PyDict_SetItemString(d, "NPY_NOTYPE", PyInt_FromLong(NPY_NOTYPE)); + PyDict_SetItemString(d, "NPY_USERDEF", PyInt_FromLong(NPY_USERDEF)); + + PyDict_SetItemString(d, "CONTIGUOUS", PyInt_FromLong(NPY_ARRAY_C_CONTIGUOUS)); + PyDict_SetItemString(d, "FORTRAN", PyInt_FromLong(NPY_ARRAY_F_CONTIGUOUS)); + PyDict_SetItemString(d, "OWNDATA", PyInt_FromLong(NPY_ARRAY_OWNDATA)); + PyDict_SetItemString(d, "FORCECAST", PyInt_FromLong(NPY_ARRAY_FORCECAST)); + PyDict_SetItemString(d, "ENSURECOPY", PyInt_FromLong(NPY_ARRAY_ENSURECOPY)); + PyDict_SetItemString(d, "ENSUREARRAY", PyInt_FromLong(NPY_ARRAY_ENSUREARRAY)); + PyDict_SetItemString(d, "ALIGNED", PyInt_FromLong(NPY_ARRAY_ALIGNED)); + PyDict_SetItemString(d, "WRITEABLE", PyInt_FromLong(NPY_ARRAY_WRITEABLE)); + PyDict_SetItemString(d, "UPDATEIFCOPY", PyInt_FromLong(NPY_ARRAY_UPDATEIFCOPY)); + PyDict_SetItemString(d, "WRITEBACKIFCOPY", PyInt_FromLong(NPY_ARRAY_WRITEBACKIFCOPY)); + + PyDict_SetItemString(d, "BEHAVED", PyInt_FromLong(NPY_ARRAY_BEHAVED)); + PyDict_SetItemString(d, "BEHAVED_NS", PyInt_FromLong(NPY_ARRAY_BEHAVED_NS)); + PyDict_SetItemString(d, "CARRAY", PyInt_FromLong(NPY_ARRAY_CARRAY)); + PyDict_SetItemString(d, "FARRAY", PyInt_FromLong(NPY_ARRAY_FARRAY)); + PyDict_SetItemString(d, "CARRAY_RO", PyInt_FromLong(NPY_ARRAY_CARRAY_RO)); + PyDict_SetItemString(d, "FARRAY_RO", PyInt_FromLong(NPY_ARRAY_FARRAY_RO)); + PyDict_SetItemString(d, "DEFAULT", PyInt_FromLong(NPY_ARRAY_DEFAULT)); + PyDict_SetItemString(d, "UPDATE_ALL", PyInt_FromLong(NPY_ARRAY_UPDATE_ALL)); + + if (PyErr_Occurred()) + Py_FatalError("can't initialize module wrap"); + +#ifdef F2PY_REPORT_ATEXIT + on_exit(f2py_report_on_exit,(void*)"array_from_pyobj.wrap.call"); +#endif + + return RETVAL; +} +#ifdef __cplusplus +} +#endif diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap b/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap new file mode 100644 index 0000000000..2665f89b52 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/.f2py_f2cmap @@ -0,0 +1 @@ +dict(real=dict(rk="double")) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_free.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_free.f90 new file mode 100644 index 0000000000..b301710f5d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_free.f90 @@ -0,0 +1,34 @@ + +subroutine sum(x, res) + implicit none + real, intent(in) :: x(:) + real, intent(out) :: res + + integer :: i + + !print *, "sum: size(x) = ", size(x) + + res = 0.0 + + do i = 1, size(x) + res = res + x(i) + enddo + +end subroutine sum + +function fsum(x) result (res) + implicit none + real, intent(in) :: x(:) + real :: res + + integer :: i + + !print *, "fsum: size(x) = ", size(x) + + res = 0.0 + + do i = 1, size(x) + res = res + x(i) + enddo + +end function fsum diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_mod.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_mod.f90 new file mode 100644 index 0000000000..cbe6317ed8 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_mod.f90 @@ -0,0 +1,41 @@ + +module mod + +contains + +subroutine sum(x, res) + implicit none + real, intent(in) :: x(:) + real, intent(out) :: res + + integer :: i + + !print *, "sum: size(x) = ", size(x) + + res = 0.0 + + do i = 1, size(x) + res = res + x(i) + enddo + +end subroutine sum + +function fsum(x) result (res) + implicit none + real, intent(in) :: x(:) + real :: res + + integer :: i + + !print *, "fsum: size(x) = ", size(x) + + res = 0.0 + + do i = 1, size(x) + res = res + x(i) + enddo + +end function fsum + + +end module mod diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_use.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_use.f90 new file mode 100644 index 0000000000..337465ac54 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/foo_use.f90 @@ -0,0 +1,19 @@ +subroutine sum_with_use(x, res) + use precision + + implicit none + + real(kind=rk), intent(in) :: x(:) + real(kind=rk), intent(out) :: res + + integer :: i + + !print *, "size(x) = ", size(x) + + res = 0.0 + + do i = 1, size(x) + res = res + x(i) + enddo + + end subroutine diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/precision.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/precision.f90 new file mode 100644 index 0000000000..ed6c70cbbe --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/assumed_shape/precision.f90 @@ -0,0 +1,4 @@ +module precision + integer, parameter :: rk = selected_real_kind(8) + integer, parameter :: ik = selected_real_kind(4) +end module diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/common/block.f b/contrib/python/numpy/py2/numpy/f2py/tests/src/common/block.f new file mode 100644 index 0000000000..7ea7968fe9 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/common/block.f @@ -0,0 +1,11 @@ + SUBROUTINE INITCB + DOUBLE PRECISION LONG + CHARACTER STRING + INTEGER OK + + COMMON /BLOCK/ LONG, STRING, OK + LONG = 1.0 + STRING = '2' + OK = 3 + RETURN + END diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/kind/foo.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/kind/foo.f90 new file mode 100644 index 0000000000..d3d15cfb20 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/kind/foo.f90 @@ -0,0 +1,20 @@ + + +subroutine selectedrealkind(p, r, res) + implicit none + + integer, intent(in) :: p, r + !f2py integer :: r=0 + integer, intent(out) :: res + res = selected_real_kind(p, r) + +end subroutine + +subroutine selectedintkind(p, res) + implicit none + + integer, intent(in) :: p + integer, intent(out) :: res + res = selected_int_kind(p) + +end subroutine diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo.f b/contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo.f new file mode 100644 index 0000000000..c34742578f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo.f @@ -0,0 +1,5 @@ + subroutine bar11(a) +cf2py intent(out) a + integer a + a = 11 + end diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo_fixed.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo_fixed.f90 new file mode 100644 index 0000000000..7543a6acb7 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo_fixed.f90 @@ -0,0 +1,8 @@ + module foo_fixed + contains + subroutine bar12(a) +!f2py intent(out) a + integer a + a = 12 + end subroutine bar12 + end module foo_fixed diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo_free.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo_free.f90 new file mode 100644 index 0000000000..c1b641f13e --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/mixed/foo_free.f90 @@ -0,0 +1,8 @@ +module foo_free +contains + subroutine bar13(a) + !f2py intent(out) a + integer a + a = 13 + end subroutine bar13 +end module foo_free diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_both.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_both.f90 new file mode 100644 index 0000000000..ac90cedc52 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_both.f90 @@ -0,0 +1,57 @@ +! Check that parameters are correct intercepted. +! Constants with comma separations are commonly +! used, for instance Pi = 3._dp +subroutine foo(x) + implicit none + integer, parameter :: sp = selected_real_kind(6) + integer, parameter :: dp = selected_real_kind(15) + integer, parameter :: ii = selected_int_kind(9) + integer, parameter :: il = selected_int_kind(18) + real(dp), intent(inout) :: x + dimension x(3) + real(sp), parameter :: three_s = 3._sp + real(dp), parameter :: three_d = 3._dp + integer(ii), parameter :: three_i = 3_ii + integer(il), parameter :: three_l = 3_il + x(1) = x(1) + x(2) * three_s * three_i + x(3) * three_d * three_l + x(2) = x(2) * three_s + x(3) = x(3) * three_l + return +end subroutine + + +subroutine foo_no(x) + implicit none + integer, parameter :: sp = selected_real_kind(6) + integer, parameter :: dp = selected_real_kind(15) + integer, parameter :: ii = selected_int_kind(9) + integer, parameter :: il = selected_int_kind(18) + real(dp), intent(inout) :: x + dimension x(3) + real(sp), parameter :: three_s = 3. + real(dp), parameter :: three_d = 3. + integer(ii), parameter :: three_i = 3 + integer(il), parameter :: three_l = 3 + x(1) = x(1) + x(2) * three_s * three_i + x(3) * three_d * three_l + x(2) = x(2) * three_s + x(3) = x(3) * three_l + return +end subroutine + +subroutine foo_sum(x) + implicit none + integer, parameter :: sp = selected_real_kind(6) + integer, parameter :: dp = selected_real_kind(15) + integer, parameter :: ii = selected_int_kind(9) + integer, parameter :: il = selected_int_kind(18) + real(dp), intent(inout) :: x + dimension x(3) + real(sp), parameter :: three_s = 2._sp + 1._sp + real(dp), parameter :: three_d = 1._dp + 2._dp + integer(ii), parameter :: three_i = 2_ii + 1_ii + integer(il), parameter :: three_l = 1_il + 2_il + x(1) = x(1) + x(2) * three_s * three_i + x(3) * three_d * three_l + x(2) = x(2) * three_s + x(3) = x(3) * three_l + return +end subroutine diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_compound.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_compound.f90 new file mode 100644 index 0000000000..e51f5e9b2f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_compound.f90 @@ -0,0 +1,15 @@ +! Check that parameters are correct intercepted. +! Constants with comma separations are commonly +! used, for instance Pi = 3._dp +subroutine foo_compound_int(x) + implicit none + integer, parameter :: ii = selected_int_kind(9) + integer(ii), intent(inout) :: x + dimension x(3) + integer(ii), parameter :: three = 3_ii + integer(ii), parameter :: two = 2_ii + integer(ii), parameter :: six = three * 1_ii * two + + x(1) = x(1) + x(2) + x(3) * six + return +end subroutine diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_integer.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_integer.f90 new file mode 100644 index 0000000000..aaa83d2eb2 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_integer.f90 @@ -0,0 +1,22 @@ +! Check that parameters are correct intercepted. +! Constants with comma separations are commonly +! used, for instance Pi = 3._dp +subroutine foo_int(x) + implicit none + integer, parameter :: ii = selected_int_kind(9) + integer(ii), intent(inout) :: x + dimension x(3) + integer(ii), parameter :: three = 3_ii + x(1) = x(1) + x(2) + x(3) * three + return +end subroutine + +subroutine foo_long(x) + implicit none + integer, parameter :: ii = selected_int_kind(18) + integer(ii), intent(inout) :: x + dimension x(3) + integer(ii), parameter :: three = 3_ii + x(1) = x(1) + x(2) + x(3) * three + return +end subroutine diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_non_compound.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_non_compound.f90 new file mode 100644 index 0000000000..62c9a5b943 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_non_compound.f90 @@ -0,0 +1,23 @@ +! Check that parameters are correct intercepted. +! Specifically that types of constants without +! compound kind specs are correctly inferred +! adapted Gibbs iteration code from pymc +! for this test case +subroutine foo_non_compound_int(x) + implicit none + integer, parameter :: ii = selected_int_kind(9) + + integer(ii) maxiterates + parameter (maxiterates=2) + + integer(ii) maxseries + parameter (maxseries=2) + + integer(ii) wasize + parameter (wasize=maxiterates*maxseries) + integer(ii), intent(inout) :: x + dimension x(wasize) + + x(1) = x(1) + x(2) + x(3) + x(4) * wasize + return +end subroutine diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_real.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_real.f90 new file mode 100644 index 0000000000..02ac9dd993 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/parameter/constant_real.f90 @@ -0,0 +1,23 @@ +! Check that parameters are correct intercepted. +! Constants with comma separations are commonly +! used, for instance Pi = 3._dp +subroutine foo_single(x) + implicit none + integer, parameter :: rp = selected_real_kind(6) + real(rp), intent(inout) :: x + dimension x(3) + real(rp), parameter :: three = 3._rp + x(1) = x(1) + x(2) + x(3) * three + return +end subroutine + +subroutine foo_double(x) + implicit none + integer, parameter :: rp = selected_real_kind(15) + real(rp), intent(inout) :: x + dimension x(3) + real(rp), parameter :: three = 3._rp + x(1) = x(1) + x(2) + x(3) * three + return +end subroutine + diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/regression/inout.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/regression/inout.f90 new file mode 100644 index 0000000000..80cdad90ce --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/regression/inout.f90 @@ -0,0 +1,9 @@ +! Check that intent(in out) translates as intent(inout). +! The separation seems to be a common usage. + subroutine foo(x) + implicit none + real(4), intent(in out) :: x + dimension x(3) + x(1) = x(1) + x(2) + x(3) + return + end diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/size/foo.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/size/foo.f90 new file mode 100644 index 0000000000..5b66f8c430 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/size/foo.f90 @@ -0,0 +1,44 @@ + +subroutine foo(a, n, m, b) + implicit none + + real, intent(in) :: a(n, m) + integer, intent(in) :: n, m + real, intent(out) :: b(size(a, 1)) + + integer :: i + + do i = 1, size(b) + b(i) = sum(a(i,:)) + enddo +end subroutine + +subroutine trans(x,y) + implicit none + real, intent(in), dimension(:,:) :: x + real, intent(out), dimension( size(x,2), size(x,1) ) :: y + integer :: N, M, i, j + N = size(x,1) + M = size(x,2) + DO i=1,N + do j=1,M + y(j,i) = x(i,j) + END DO + END DO +end subroutine trans + +subroutine flatten(x,y) + implicit none + real, intent(in), dimension(:,:) :: x + real, intent(out), dimension( size(x) ) :: y + integer :: N, M, i, j, k + N = size(x,1) + M = size(x,2) + k = 1 + DO i=1,N + do j=1,M + y(k) = x(i,j) + k = k + 1 + END DO + END DO +end subroutine flatten diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/src/string/char.f90 b/contrib/python/numpy/py2/numpy/f2py/tests/src/string/char.f90 new file mode 100644 index 0000000000..bb7985ce50 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/src/string/char.f90 @@ -0,0 +1,29 @@ +MODULE char_test + +CONTAINS + +SUBROUTINE change_strings(strings, n_strs, out_strings) + IMPLICIT NONE + + ! Inputs + INTEGER, INTENT(IN) :: n_strs + CHARACTER, INTENT(IN), DIMENSION(2,n_strs) :: strings + CHARACTER, INTENT(OUT), DIMENSION(2,n_strs) :: out_strings + +!f2py INTEGER, INTENT(IN) :: n_strs +!f2py CHARACTER, INTENT(IN), DIMENSION(2,n_strs) :: strings +!f2py CHARACTER, INTENT(OUT), DIMENSION(2,n_strs) :: strings + + ! Misc. + INTEGER*4 :: j + + + DO j=1, n_strs + out_strings(1,j) = strings(1,j) + out_strings(2,j) = 'A' + END DO + +END SUBROUTINE change_strings + +END MODULE char_test + diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_array_from_pyobj.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_array_from_pyobj.py new file mode 100644 index 0000000000..a80090185d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_array_from_pyobj.py @@ -0,0 +1,581 @@ +from __future__ import division, absolute_import, print_function + +import os +import sys +import copy +import pytest + +from numpy import ( + array, alltrue, ndarray, zeros, dtype, intp, clongdouble + ) +from numpy.testing import assert_, assert_equal +from numpy.core.multiarray import typeinfo +from . import util + +wrap = None + + +def setup_module(): + """ + Build the required testing extension module + + """ + global wrap + + # Check compiler availability first + if not util.has_c_compiler(): + pytest.skip("No C compiler available") + + if wrap is None: + config_code = """ + config.add_extension('test_array_from_pyobj_ext', + sources=['wrapmodule.c', 'fortranobject.c'], + define_macros=[]) + """ + d = os.path.dirname(__file__) + src = [os.path.join(d, 'src', 'array_from_pyobj', 'wrapmodule.c'), + os.path.join(d, '..', 'src', 'fortranobject.c'), + os.path.join(d, '..', 'src', 'fortranobject.h')] + wrap = util.build_module_distutils(src, config_code, + 'test_array_from_pyobj_ext') + + +def flags_info(arr): + flags = wrap.array_attrs(arr)[6] + return flags2names(flags) + + +def flags2names(flags): + info = [] + for flagname in ['CONTIGUOUS', 'FORTRAN', 'OWNDATA', 'ENSURECOPY', + 'ENSUREARRAY', 'ALIGNED', 'NOTSWAPPED', 'WRITEABLE', + 'WRITEBACKIFCOPY', 'UPDATEIFCOPY', 'BEHAVED', 'BEHAVED_RO', + 'CARRAY', 'FARRAY' + ]: + if abs(flags) & getattr(wrap, flagname, 0): + info.append(flagname) + return info + + +class Intent(object): + + def __init__(self, intent_list=[]): + self.intent_list = intent_list[:] + flags = 0 + for i in intent_list: + if i == 'optional': + flags |= wrap.F2PY_OPTIONAL + else: + flags |= getattr(wrap, 'F2PY_INTENT_' + i.upper()) + self.flags = flags + + def __getattr__(self, name): + name = name.lower() + if name == 'in_': + name = 'in' + return self.__class__(self.intent_list + [name]) + + def __str__(self): + return 'intent(%s)' % (','.join(self.intent_list)) + + def __repr__(self): + return 'Intent(%r)' % (self.intent_list) + + def is_intent(self, *names): + for name in names: + if name not in self.intent_list: + return False + return True + + def is_intent_exact(self, *names): + return len(self.intent_list) == len(names) and self.is_intent(*names) + +intent = Intent() + +_type_names = ['BOOL', 'BYTE', 'UBYTE', 'SHORT', 'USHORT', 'INT', 'UINT', + 'LONG', 'ULONG', 'LONGLONG', 'ULONGLONG', + 'FLOAT', 'DOUBLE', 'CFLOAT'] + +_cast_dict = {'BOOL': ['BOOL']} +_cast_dict['BYTE'] = _cast_dict['BOOL'] + ['BYTE'] +_cast_dict['UBYTE'] = _cast_dict['BOOL'] + ['UBYTE'] +_cast_dict['BYTE'] = ['BYTE'] +_cast_dict['UBYTE'] = ['UBYTE'] +_cast_dict['SHORT'] = _cast_dict['BYTE'] + ['UBYTE', 'SHORT'] +_cast_dict['USHORT'] = _cast_dict['UBYTE'] + ['BYTE', 'USHORT'] +_cast_dict['INT'] = _cast_dict['SHORT'] + ['USHORT', 'INT'] +_cast_dict['UINT'] = _cast_dict['USHORT'] + ['SHORT', 'UINT'] + +_cast_dict['LONG'] = _cast_dict['INT'] + ['LONG'] +_cast_dict['ULONG'] = _cast_dict['UINT'] + ['ULONG'] + +_cast_dict['LONGLONG'] = _cast_dict['LONG'] + ['LONGLONG'] +_cast_dict['ULONGLONG'] = _cast_dict['ULONG'] + ['ULONGLONG'] + +_cast_dict['FLOAT'] = _cast_dict['SHORT'] + ['USHORT', 'FLOAT'] +_cast_dict['DOUBLE'] = _cast_dict['INT'] + ['UINT', 'FLOAT', 'DOUBLE'] + +_cast_dict['CFLOAT'] = _cast_dict['FLOAT'] + ['CFLOAT'] + +# 32 bit system malloc typically does not provide the alignment required by +# 16 byte long double types this means the inout intent cannot be satisfied +# and several tests fail as the alignment flag can be randomly true or fals +# when numpy gains an aligned allocator the tests could be enabled again +if ((intp().dtype.itemsize != 4 or clongdouble().dtype.alignment <= 8) and + sys.platform != 'win32'): + _type_names.extend(['LONGDOUBLE', 'CDOUBLE', 'CLONGDOUBLE']) + _cast_dict['LONGDOUBLE'] = _cast_dict['LONG'] + \ + ['ULONG', 'FLOAT', 'DOUBLE', 'LONGDOUBLE'] + _cast_dict['CLONGDOUBLE'] = _cast_dict['LONGDOUBLE'] + \ + ['CFLOAT', 'CDOUBLE', 'CLONGDOUBLE'] + _cast_dict['CDOUBLE'] = _cast_dict['DOUBLE'] + ['CFLOAT', 'CDOUBLE'] + + +class Type(object): + _type_cache = {} + + def __new__(cls, name): + if isinstance(name, dtype): + dtype0 = name + name = None + for n, i in typeinfo.items(): + if not isinstance(i, type) and dtype0.type is i.type: + name = n + break + obj = cls._type_cache.get(name.upper(), None) + if obj is not None: + return obj + obj = object.__new__(cls) + obj._init(name) + cls._type_cache[name.upper()] = obj + return obj + + def _init(self, name): + self.NAME = name.upper() + info = typeinfo[self.NAME] + self.type_num = getattr(wrap, 'NPY_' + self.NAME) + assert_equal(self.type_num, info.num) + self.dtype = info.type + self.elsize = info.bits / 8 + self.dtypechar = info.char + + def cast_types(self): + return [self.__class__(_m) for _m in _cast_dict[self.NAME]] + + def all_types(self): + return [self.__class__(_m) for _m in _type_names] + + def smaller_types(self): + bits = typeinfo[self.NAME].alignment + types = [] + for name in _type_names: + if typeinfo[name].alignment < bits: + types.append(Type(name)) + return types + + def equal_types(self): + bits = typeinfo[self.NAME].alignment + types = [] + for name in _type_names: + if name == self.NAME: + continue + if typeinfo[name].alignment == bits: + types.append(Type(name)) + return types + + def larger_types(self): + bits = typeinfo[self.NAME].alignment + types = [] + for name in _type_names: + if typeinfo[name].alignment > bits: + types.append(Type(name)) + return types + + +class Array(object): + + def __init__(self, typ, dims, intent, obj): + self.type = typ + self.dims = dims + self.intent = intent + self.obj_copy = copy.deepcopy(obj) + self.obj = obj + + # arr.dtypechar may be different from typ.dtypechar + self.arr = wrap.call(typ.type_num, dims, intent.flags, obj) + + assert_(isinstance(self.arr, ndarray), repr(type(self.arr))) + + self.arr_attr = wrap.array_attrs(self.arr) + + if len(dims) > 1: + if self.intent.is_intent('c'): + assert_(intent.flags & wrap.F2PY_INTENT_C) + assert_(not self.arr.flags['FORTRAN'], + repr((self.arr.flags, getattr(obj, 'flags', None)))) + assert_(self.arr.flags['CONTIGUOUS']) + assert_(not self.arr_attr[6] & wrap.FORTRAN) + else: + assert_(not intent.flags & wrap.F2PY_INTENT_C) + assert_(self.arr.flags['FORTRAN']) + assert_(not self.arr.flags['CONTIGUOUS']) + assert_(self.arr_attr[6] & wrap.FORTRAN) + + if obj is None: + self.pyarr = None + self.pyarr_attr = None + return + + if intent.is_intent('cache'): + assert_(isinstance(obj, ndarray), repr(type(obj))) + self.pyarr = array(obj).reshape(*dims).copy() + else: + self.pyarr = array(array(obj, dtype=typ.dtypechar).reshape(*dims), + order=self.intent.is_intent('c') and 'C' or 'F') + assert_(self.pyarr.dtype == typ, + repr((self.pyarr.dtype, typ))) + assert_(self.pyarr.flags['OWNDATA'], (obj, intent)) + self.pyarr_attr = wrap.array_attrs(self.pyarr) + + if len(dims) > 1: + if self.intent.is_intent('c'): + assert_(not self.pyarr.flags['FORTRAN']) + assert_(self.pyarr.flags['CONTIGUOUS']) + assert_(not self.pyarr_attr[6] & wrap.FORTRAN) + else: + assert_(self.pyarr.flags['FORTRAN']) + assert_(not self.pyarr.flags['CONTIGUOUS']) + assert_(self.pyarr_attr[6] & wrap.FORTRAN) + + assert_(self.arr_attr[1] == self.pyarr_attr[1]) # nd + assert_(self.arr_attr[2] == self.pyarr_attr[2]) # dimensions + if self.arr_attr[1] <= 1: + assert_(self.arr_attr[3] == self.pyarr_attr[3], + repr((self.arr_attr[3], self.pyarr_attr[3], + self.arr.tobytes(), self.pyarr.tobytes()))) # strides + assert_(self.arr_attr[5][-2:] == self.pyarr_attr[5][-2:], + repr((self.arr_attr[5], self.pyarr_attr[5]))) # descr + assert_(self.arr_attr[6] == self.pyarr_attr[6], + repr((self.arr_attr[6], self.pyarr_attr[6], + flags2names(0 * self.arr_attr[6] - self.pyarr_attr[6]), + flags2names(self.arr_attr[6]), intent))) # flags + + if intent.is_intent('cache'): + assert_(self.arr_attr[5][3] >= self.type.elsize, + repr((self.arr_attr[5][3], self.type.elsize))) + else: + assert_(self.arr_attr[5][3] == self.type.elsize, + repr((self.arr_attr[5][3], self.type.elsize))) + assert_(self.arr_equal(self.pyarr, self.arr)) + + if isinstance(self.obj, ndarray): + if typ.elsize == Type(obj.dtype).elsize: + if not intent.is_intent('copy') and self.arr_attr[1] <= 1: + assert_(self.has_shared_memory()) + + def arr_equal(self, arr1, arr2): + if arr1.shape != arr2.shape: + return False + s = arr1 == arr2 + return alltrue(s.flatten()) + + def __str__(self): + return str(self.arr) + + def has_shared_memory(self): + """Check that created array shares data with input array. + """ + if self.obj is self.arr: + return True + if not isinstance(self.obj, ndarray): + return False + obj_attr = wrap.array_attrs(self.obj) + return obj_attr[0] == self.arr_attr[0] + + +class TestIntent(object): + + def test_in_out(self): + assert_equal(str(intent.in_.out), 'intent(in,out)') + assert_(intent.in_.c.is_intent('c')) + assert_(not intent.in_.c.is_intent_exact('c')) + assert_(intent.in_.c.is_intent_exact('c', 'in')) + assert_(intent.in_.c.is_intent_exact('in', 'c')) + assert_(not intent.in_.is_intent('c')) + + +class TestSharedMemory(object): + num2seq = [1, 2] + num23seq = [[1, 2, 3], [4, 5, 6]] + + @pytest.fixture(autouse=True, scope='class', params=_type_names) + def setup_type(self, request): + request.cls.type = Type(request.param) + request.cls.array = lambda self, dims, intent, obj: \ + Array(Type(request.param), dims, intent, obj) + + def test_in_from_2seq(self): + a = self.array([2], intent.in_, self.num2seq) + assert_(not a.has_shared_memory()) + + def test_in_from_2casttype(self): + for t in self.type.cast_types(): + obj = array(self.num2seq, dtype=t.dtype) + a = self.array([len(self.num2seq)], intent.in_, obj) + if t.elsize == self.type.elsize: + assert_( + a.has_shared_memory(), repr((self.type.dtype, t.dtype))) + else: + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_inout_2seq(self): + obj = array(self.num2seq, dtype=self.type.dtype) + a = self.array([len(self.num2seq)], intent.inout, obj) + assert_(a.has_shared_memory()) + + try: + a = self.array([2], intent.in_.inout, self.num2seq) + except TypeError as msg: + if not str(msg).startswith('failed to initialize intent' + '(inout|inplace|cache) array'): + raise + else: + raise SystemError('intent(inout) should have failed on sequence') + + def test_f_inout_23seq(self): + obj = array(self.num23seq, dtype=self.type.dtype, order='F') + shape = (len(self.num23seq), len(self.num23seq[0])) + a = self.array(shape, intent.in_.inout, obj) + assert_(a.has_shared_memory()) + + obj = array(self.num23seq, dtype=self.type.dtype, order='C') + shape = (len(self.num23seq), len(self.num23seq[0])) + try: + a = self.array(shape, intent.in_.inout, obj) + except ValueError as msg: + if not str(msg).startswith('failed to initialize intent' + '(inout) array'): + raise + else: + raise SystemError( + 'intent(inout) should have failed on improper array') + + def test_c_inout_23seq(self): + obj = array(self.num23seq, dtype=self.type.dtype) + shape = (len(self.num23seq), len(self.num23seq[0])) + a = self.array(shape, intent.in_.c.inout, obj) + assert_(a.has_shared_memory()) + + def test_in_copy_from_2casttype(self): + for t in self.type.cast_types(): + obj = array(self.num2seq, dtype=t.dtype) + a = self.array([len(self.num2seq)], intent.in_.copy, obj) + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_c_in_from_23seq(self): + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_, self.num23seq) + assert_(not a.has_shared_memory()) + + def test_in_from_23casttype(self): + for t in self.type.cast_types(): + obj = array(self.num23seq, dtype=t.dtype) + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_, obj) + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_f_in_from_23casttype(self): + for t in self.type.cast_types(): + obj = array(self.num23seq, dtype=t.dtype, order='F') + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_, obj) + if t.elsize == self.type.elsize: + assert_(a.has_shared_memory(), repr(t.dtype)) + else: + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_c_in_from_23casttype(self): + for t in self.type.cast_types(): + obj = array(self.num23seq, dtype=t.dtype) + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_.c, obj) + if t.elsize == self.type.elsize: + assert_(a.has_shared_memory(), repr(t.dtype)) + else: + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_f_copy_in_from_23casttype(self): + for t in self.type.cast_types(): + obj = array(self.num23seq, dtype=t.dtype, order='F') + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_.copy, obj) + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_c_copy_in_from_23casttype(self): + for t in self.type.cast_types(): + obj = array(self.num23seq, dtype=t.dtype) + a = self.array([len(self.num23seq), len(self.num23seq[0])], + intent.in_.c.copy, obj) + assert_(not a.has_shared_memory(), repr(t.dtype)) + + def test_in_cache_from_2casttype(self): + for t in self.type.all_types(): + if t.elsize != self.type.elsize: + continue + obj = array(self.num2seq, dtype=t.dtype) + shape = (len(self.num2seq),) + a = self.array(shape, intent.in_.c.cache, obj) + assert_(a.has_shared_memory(), repr(t.dtype)) + + a = self.array(shape, intent.in_.cache, obj) + assert_(a.has_shared_memory(), repr(t.dtype)) + + obj = array(self.num2seq, dtype=t.dtype, order='F') + a = self.array(shape, intent.in_.c.cache, obj) + assert_(a.has_shared_memory(), repr(t.dtype)) + + a = self.array(shape, intent.in_.cache, obj) + assert_(a.has_shared_memory(), repr(t.dtype)) + + try: + a = self.array(shape, intent.in_.cache, obj[::-1]) + except ValueError as msg: + if not str(msg).startswith('failed to initialize' + ' intent(cache) array'): + raise + else: + raise SystemError( + 'intent(cache) should have failed on multisegmented array') + + def test_in_cache_from_2casttype_failure(self): + for t in self.type.all_types(): + if t.elsize >= self.type.elsize: + continue + obj = array(self.num2seq, dtype=t.dtype) + shape = (len(self.num2seq),) + try: + self.array(shape, intent.in_.cache, obj) # Should succeed + except ValueError as msg: + if not str(msg).startswith('failed to initialize' + ' intent(cache) array'): + raise + else: + raise SystemError( + 'intent(cache) should have failed on smaller array') + + def test_cache_hidden(self): + shape = (2,) + a = self.array(shape, intent.cache.hide, None) + assert_(a.arr.shape == shape) + + shape = (2, 3) + a = self.array(shape, intent.cache.hide, None) + assert_(a.arr.shape == shape) + + shape = (-1, 3) + try: + a = self.array(shape, intent.cache.hide, None) + except ValueError as msg: + if not str(msg).startswith('failed to create intent' + '(cache|hide)|optional array'): + raise + else: + raise SystemError( + 'intent(cache) should have failed on undefined dimensions') + + def test_hidden(self): + shape = (2,) + a = self.array(shape, intent.hide, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, zeros(shape, dtype=self.type.dtype))) + + shape = (2, 3) + a = self.array(shape, intent.hide, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, zeros(shape, dtype=self.type.dtype))) + assert_(a.arr.flags['FORTRAN'] and not a.arr.flags['CONTIGUOUS']) + + shape = (2, 3) + a = self.array(shape, intent.c.hide, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, zeros(shape, dtype=self.type.dtype))) + assert_(not a.arr.flags['FORTRAN'] and a.arr.flags['CONTIGUOUS']) + + shape = (-1, 3) + try: + a = self.array(shape, intent.hide, None) + except ValueError as msg: + if not str(msg).startswith('failed to create intent' + '(cache|hide)|optional array'): + raise + else: + raise SystemError('intent(hide) should have failed' + ' on undefined dimensions') + + def test_optional_none(self): + shape = (2,) + a = self.array(shape, intent.optional, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, zeros(shape, dtype=self.type.dtype))) + + shape = (2, 3) + a = self.array(shape, intent.optional, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, zeros(shape, dtype=self.type.dtype))) + assert_(a.arr.flags['FORTRAN'] and not a.arr.flags['CONTIGUOUS']) + + shape = (2, 3) + a = self.array(shape, intent.c.optional, None) + assert_(a.arr.shape == shape) + assert_(a.arr_equal(a.arr, zeros(shape, dtype=self.type.dtype))) + assert_(not a.arr.flags['FORTRAN'] and a.arr.flags['CONTIGUOUS']) + + def test_optional_from_2seq(self): + obj = self.num2seq + shape = (len(obj),) + a = self.array(shape, intent.optional, obj) + assert_(a.arr.shape == shape) + assert_(not a.has_shared_memory()) + + def test_optional_from_23seq(self): + obj = self.num23seq + shape = (len(obj), len(obj[0])) + a = self.array(shape, intent.optional, obj) + assert_(a.arr.shape == shape) + assert_(not a.has_shared_memory()) + + a = self.array(shape, intent.optional.c, obj) + assert_(a.arr.shape == shape) + assert_(not a.has_shared_memory()) + + def test_inplace(self): + obj = array(self.num23seq, dtype=self.type.dtype) + assert_(not obj.flags['FORTRAN'] and obj.flags['CONTIGUOUS']) + shape = obj.shape + a = self.array(shape, intent.inplace, obj) + assert_(obj[1][2] == a.arr[1][2], repr((obj, a.arr))) + a.arr[1][2] = 54 + assert_(obj[1][2] == a.arr[1][2] == + array(54, dtype=self.type.dtype), repr((obj, a.arr))) + assert_(a.arr is obj) + assert_(obj.flags['FORTRAN']) # obj attributes are changed inplace! + assert_(not obj.flags['CONTIGUOUS']) + + def test_inplace_from_casttype(self): + for t in self.type.cast_types(): + if t is self.type: + continue + obj = array(self.num23seq, dtype=t.dtype) + assert_(obj.dtype.type == t.dtype) + assert_(obj.dtype.type is not self.type.dtype) + assert_(not obj.flags['FORTRAN'] and obj.flags['CONTIGUOUS']) + shape = obj.shape + a = self.array(shape, intent.inplace, obj) + assert_(obj[1][2] == a.arr[1][2], repr((obj, a.arr))) + a.arr[1][2] = 54 + assert_(obj[1][2] == a.arr[1][2] == + array(54, dtype=self.type.dtype), repr((obj, a.arr))) + assert_(a.arr is obj) + assert_(obj.flags['FORTRAN']) # obj attributes changed inplace! + assert_(not obj.flags['CONTIGUOUS']) + assert_(obj.dtype.type is self.type.dtype) # obj changed inplace! diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_assumed_shape.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_assumed_shape.py new file mode 100644 index 0000000000..460afd68db --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_assumed_shape.py @@ -0,0 +1,33 @@ +from __future__ import division, absolute_import, print_function + +import os +import pytest + +from numpy.testing import assert_ +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestAssumedShapeSumExample(util.F2PyTest): + sources = [_path('src', 'assumed_shape', 'foo_free.f90'), + _path('src', 'assumed_shape', 'foo_use.f90'), + _path('src', 'assumed_shape', 'precision.f90'), + _path('src', 'assumed_shape', 'foo_mod.f90'), + ] + + @pytest.mark.slow + def test_all(self): + r = self.module.fsum([1, 2]) + assert_(r == 3, repr(r)) + r = self.module.sum([1, 2]) + assert_(r == 3, repr(r)) + r = self.module.sum_with_use([1, 2]) + assert_(r == 3, repr(r)) + + r = self.module.mod.sum([1, 2]) + assert_(r == 3, repr(r)) + r = self.module.mod.fsum([1, 2]) + assert_(r == 3, repr(r)) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_block_docstring.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_block_docstring.py new file mode 100644 index 0000000000..4f1678980f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_block_docstring.py @@ -0,0 +1,24 @@ +from __future__ import division, absolute_import, print_function + +import sys +import pytest +from . import util + +from numpy.testing import assert_equal, IS_PYPY + +class TestBlockDocString(util.F2PyTest): + code = """ + SUBROUTINE FOO() + INTEGER BAR(2, 3) + + COMMON /BLOCK/ BAR + RETURN + END + """ + + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + @pytest.mark.xfail(IS_PYPY, reason="PyPy does not modify tp_doc") + def test_block_docstring(self): + expected = "'i'-array(2,3)\n" + assert_equal(self.module.block.__doc__, expected) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_callback.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_callback.py new file mode 100644 index 0000000000..824ef7b0c5 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_callback.py @@ -0,0 +1,165 @@ +from __future__ import division, absolute_import, print_function + +import math +import textwrap +import sys +import pytest + +import numpy as np +from numpy.testing import assert_, assert_equal +from . import util + + +class TestF77Callback(util.F2PyTest): + code = """ + subroutine t(fun,a) + integer a +cf2py intent(out) a + external fun + call fun(a) + end + + subroutine func(a) +cf2py intent(in,out) a + integer a + a = a + 11 + end + + subroutine func0(a) +cf2py intent(out) a + integer a + a = 11 + end + + subroutine t2(a) +cf2py intent(callback) fun + integer a +cf2py intent(out) a + external fun + call fun(a) + end + + subroutine string_callback(callback, a) + external callback + double precision callback + double precision a + character*1 r +cf2py intent(out) a + r = 'r' + a = callback(r) + end + + subroutine string_callback_array(callback, cu, lencu, a) + external callback + integer callback + integer lencu + character*8 cu(lencu) + integer a +cf2py intent(out) a + + a = callback(cu, lencu) + end + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', 't,t2'.split(',')) + def test_all(self, name): + self.check_function(name) + + @pytest.mark.slow + def test_docstring(self): + expected = """ + a = t(fun,[fun_extra_args]) + + Wrapper for ``t``. + + Parameters + ---------- + fun : call-back function + + Other Parameters + ---------------- + fun_extra_args : input tuple, optional + Default: () + + Returns + ------- + a : int + + Notes + ----- + Call-back functions:: + + def fun(): return a + Return objects: + a : int + """ + assert_equal(self.module.t.__doc__, textwrap.dedent(expected).lstrip()) + + def check_function(self, name): + t = getattr(self.module, name) + r = t(lambda: 4) + assert_(r == 4, repr(r)) + r = t(lambda a: 5, fun_extra_args=(6,)) + assert_(r == 5, repr(r)) + r = t(lambda a: a, fun_extra_args=(6,)) + assert_(r == 6, repr(r)) + r = t(lambda a: 5 + a, fun_extra_args=(7,)) + assert_(r == 12, repr(r)) + r = t(lambda a: math.degrees(a), fun_extra_args=(math.pi,)) + assert_(r == 180, repr(r)) + r = t(math.degrees, fun_extra_args=(math.pi,)) + assert_(r == 180, repr(r)) + + r = t(self.module.func, fun_extra_args=(6,)) + assert_(r == 17, repr(r)) + r = t(self.module.func0) + assert_(r == 11, repr(r)) + r = t(self.module.func0._cpointer) + assert_(r == 11, repr(r)) + + class A(object): + + def __call__(self): + return 7 + + def mth(self): + return 9 + a = A() + r = t(a) + assert_(r == 7, repr(r)) + r = t(a.mth) + assert_(r == 9, repr(r)) + + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + def test_string_callback(self): + + def callback(code): + if code == 'r': + return 0 + else: + return 1 + + f = getattr(self.module, 'string_callback') + r = f(callback) + assert_(r == 0, repr(r)) + + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + def test_string_callback_array(self): + # See gh-10027 + cu = np.zeros((1, 8), 'S1') + + def callback(cu, lencu): + if cu.shape != (lencu, 8): + return 1 + if cu.dtype != 'S1': + return 2 + if not np.all(cu == b''): + return 3 + return 0 + + f = getattr(self.module, 'string_callback_array') + res = f(callback, cu, len(cu)) + assert_(res == 0, repr(res)) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_common.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_common.py new file mode 100644 index 0000000000..dcb01b0ec7 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_common.py @@ -0,0 +1,27 @@ +from __future__ import division, absolute_import, print_function + +import os +import sys +import pytest + +import numpy as np +from . import util + +from numpy.testing import assert_array_equal + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + +class TestCommonBlock(util.F2PyTest): + sources = [_path('src', 'common', 'block.f')] + + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + def test_common_block(self): + self.module.initcb() + assert_array_equal(self.module.block.long_bn, + np.array(1.0, dtype=np.float64)) + assert_array_equal(self.module.block.string_bn, + np.array('2', dtype='|S1')) + assert_array_equal(self.module.block.ok, + np.array(3, dtype=np.int32)) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_compile_function.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_compile_function.py new file mode 100644 index 0000000000..36abf05f9c --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_compile_function.py @@ -0,0 +1,125 @@ +"""See https://github.com/numpy/numpy/pull/11937. + +""" +from __future__ import division, absolute_import, print_function + +import sys +import os +import uuid +from importlib import import_module +import pytest + +import numpy.f2py + +from numpy.testing import assert_equal +from . import util + + +def setup_module(): + if sys.platform == 'win32' and sys.version_info[0] < 3: + pytest.skip('Fails with MinGW64 Gfortran (Issue #9673)') + if not util.has_c_compiler(): + pytest.skip("Needs C compiler") + if not util.has_f77_compiler(): + pytest.skip('Needs FORTRAN 77 compiler') + + +# extra_args can be a list (since gh-11937) or string. +# also test absence of extra_args +@pytest.mark.parametrize( + "extra_args", [['--noopt', '--debug'], '--noopt --debug', ''] + ) +def test_f2py_init_compile(extra_args): + # flush through the f2py __init__ compile() function code path as a + # crude test for input handling following migration from + # exec_command() to subprocess.check_output() in gh-11937 + + # the Fortran 77 syntax requires 6 spaces before any commands, but + # more space may be added/ + fsource = """ + integer function foo() + foo = 10 + 5 + return + end + """ + # use various helper functions in util.py to enable robust build / + # compile and reimport cycle in test suite + moddir = util.get_module_dir() + modname = util.get_temp_module_name() + + cwd = os.getcwd() + target = os.path.join(moddir, str(uuid.uuid4()) + '.f') + # try running compile() with and without a source_fn provided so + # that the code path where a temporary file for writing Fortran + # source is created is also explored + for source_fn in [target, None]: + # mimic the path changing behavior used by build_module() in + # util.py, but don't actually use build_module() because it has + # its own invocation of subprocess that circumvents the + # f2py.compile code block under test + try: + os.chdir(moddir) + ret_val = numpy.f2py.compile( + fsource, + modulename=modname, + extra_args=extra_args, + source_fn=source_fn + ) + finally: + os.chdir(cwd) + + # check for compile success return value + assert_equal(ret_val, 0) + + # we are not currently able to import the Python-Fortran + # interface module on Windows / Appveyor, even though we do get + # successful compilation on that platform with Python 3.x + if sys.platform != 'win32': + # check for sensible result of Fortran function; that means + # we can import the module name in Python and retrieve the + # result of the sum operation + return_check = import_module(modname) + calc_result = return_check.foo() + assert_equal(calc_result, 15) + + +def test_f2py_init_compile_failure(): + # verify an appropriate integer status value returned by + # f2py.compile() when invalid Fortran is provided + ret_val = numpy.f2py.compile(b"invalid") + assert_equal(ret_val, 1) + + +def test_f2py_init_compile_bad_cmd(): + # verify that usage of invalid command in f2py.compile() returns + # status value of 127 for historic consistency with exec_command() + # error handling + + # patch the sys Python exe path temporarily to induce an OSError + # downstream NOTE: how bad of an idea is this patching? + try: + temp = sys.executable + sys.executable = 'does not exist' + + # the OSError should take precedence over invalid Fortran + ret_val = numpy.f2py.compile(b"invalid") + assert_equal(ret_val, 127) + finally: + sys.executable = temp + + +@pytest.mark.parametrize('fsource', + ['program test_f2py\nend program test_f2py', + b'program test_f2py\nend program test_f2py',]) +def test_compile_from_strings(tmpdir, fsource): + # Make sure we can compile str and bytes gh-12796 + cwd = os.getcwd() + try: + os.chdir(str(tmpdir)) + ret_val = numpy.f2py.compile( + fsource, + modulename='test_compile_from_strings', + extension='.f90') + assert_equal(ret_val, 0) + finally: + os.chdir(cwd) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_kind.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_kind.py new file mode 100644 index 0000000000..1f7762a805 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_kind.py @@ -0,0 +1,34 @@ +from __future__ import division, absolute_import, print_function + +import os +import pytest + +from numpy.testing import assert_ +from numpy.f2py.crackfortran import ( + _selected_int_kind_func as selected_int_kind, + _selected_real_kind_func as selected_real_kind + ) +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestKind(util.F2PyTest): + sources = [_path('src', 'kind', 'foo.f90')] + + @pytest.mark.slow + def test_all(self): + selectedrealkind = self.module.selectedrealkind + selectedintkind = self.module.selectedintkind + + for i in range(40): + assert_(selectedintkind(i) in [selected_int_kind(i), -1], + 'selectedintkind(%s): expected %r but got %r' % + (i, selected_int_kind(i), selectedintkind(i))) + + for i in range(20): + assert_(selectedrealkind(i) in [selected_real_kind(i), -1], + 'selectedrealkind(%s): expected %r but got %r' % + (i, selected_real_kind(i), selectedrealkind(i))) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_mixed.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_mixed.py new file mode 100644 index 0000000000..28268ecc02 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_mixed.py @@ -0,0 +1,38 @@ +from __future__ import division, absolute_import, print_function + +import os +import textwrap +import pytest + +from numpy.testing import assert_, assert_equal +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestMixed(util.F2PyTest): + sources = [_path('src', 'mixed', 'foo.f'), + _path('src', 'mixed', 'foo_fixed.f90'), + _path('src', 'mixed', 'foo_free.f90')] + + @pytest.mark.slow + def test_all(self): + assert_(self.module.bar11() == 11) + assert_(self.module.foo_fixed.bar12() == 12) + assert_(self.module.foo_free.bar13() == 13) + + @pytest.mark.slow + def test_docstring(self): + expected = """ + a = bar11() + + Wrapper for ``bar11``. + + Returns + ------- + a : int + """ + assert_equal(self.module.bar11.__doc__, + textwrap.dedent(expected).lstrip()) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_parameter.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_parameter.py new file mode 100644 index 0000000000..6a378687ad --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_parameter.py @@ -0,0 +1,118 @@ +from __future__ import division, absolute_import, print_function + +import os +import pytest + +import numpy as np +from numpy.testing import assert_raises, assert_equal + +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestParameters(util.F2PyTest): + # Check that intent(in out) translates as intent(inout) + sources = [_path('src', 'parameter', 'constant_real.f90'), + _path('src', 'parameter', 'constant_integer.f90'), + _path('src', 'parameter', 'constant_both.f90'), + _path('src', 'parameter', 'constant_compound.f90'), + _path('src', 'parameter', 'constant_non_compound.f90'), + ] + + @pytest.mark.slow + def test_constant_real_single(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float32)[::2] + assert_raises(ValueError, self.module.foo_single, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float32) + self.module.foo_single(x) + assert_equal(x, [0 + 1 + 2*3, 1, 2]) + + @pytest.mark.slow + def test_constant_real_double(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float64)[::2] + assert_raises(ValueError, self.module.foo_double, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float64) + self.module.foo_double(x) + assert_equal(x, [0 + 1 + 2*3, 1, 2]) + + @pytest.mark.slow + def test_constant_compound_int(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.int32)[::2] + assert_raises(ValueError, self.module.foo_compound_int, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.int32) + self.module.foo_compound_int(x) + assert_equal(x, [0 + 1 + 2*6, 1, 2]) + + @pytest.mark.slow + def test_constant_non_compound_int(self): + # check values + x = np.arange(4, dtype=np.int32) + self.module.foo_non_compound_int(x) + assert_equal(x, [0 + 1 + 2 + 3*4, 1, 2, 3]) + + @pytest.mark.slow + def test_constant_integer_int(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.int32)[::2] + assert_raises(ValueError, self.module.foo_int, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.int32) + self.module.foo_int(x) + assert_equal(x, [0 + 1 + 2*3, 1, 2]) + + @pytest.mark.slow + def test_constant_integer_long(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.int64)[::2] + assert_raises(ValueError, self.module.foo_long, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.int64) + self.module.foo_long(x) + assert_equal(x, [0 + 1 + 2*3, 1, 2]) + + @pytest.mark.slow + def test_constant_both(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float64)[::2] + assert_raises(ValueError, self.module.foo, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float64) + self.module.foo(x) + assert_equal(x, [0 + 1*3*3 + 2*3*3, 1*3, 2*3]) + + @pytest.mark.slow + def test_constant_no(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float64)[::2] + assert_raises(ValueError, self.module.foo_no, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float64) + self.module.foo_no(x) + assert_equal(x, [0 + 1*3*3 + 2*3*3, 1*3, 2*3]) + + @pytest.mark.slow + def test_constant_sum(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float64)[::2] + assert_raises(ValueError, self.module.foo_sum, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float64) + self.module.foo_sum(x) + assert_equal(x, [0 + 1*3*3 + 2*3*3, 1*3, 2*3]) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_quoted_character.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_quoted_character.py new file mode 100644 index 0000000000..c9a1c36f50 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_quoted_character.py @@ -0,0 +1,35 @@ +"""See https://github.com/numpy/numpy/pull/10676. + +""" +from __future__ import division, absolute_import, print_function + +import sys +from importlib import import_module +import pytest + +from numpy.testing import assert_equal +from . import util + + +class TestQuotedCharacter(util.F2PyTest): + code = """ + SUBROUTINE FOO(OUT1, OUT2, OUT3, OUT4, OUT5, OUT6) + CHARACTER SINGLE, DOUBLE, SEMICOL, EXCLA, OPENPAR, CLOSEPAR + PARAMETER (SINGLE="'", DOUBLE='"', SEMICOL=';', EXCLA="!", + 1 OPENPAR="(", CLOSEPAR=")") + CHARACTER OUT1, OUT2, OUT3, OUT4, OUT5, OUT6 +Cf2py intent(out) OUT1, OUT2, OUT3, OUT4, OUT5, OUT6 + OUT1 = SINGLE + OUT2 = DOUBLE + OUT3 = SEMICOL + OUT4 = EXCLA + OUT5 = OPENPAR + OUT6 = CLOSEPAR + RETURN + END + """ + + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + def test_quoted_character(self): + assert_equal(self.module.foo(), (b"'", b'"', b';', b'!', b'(', b')')) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_regression.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_regression.py new file mode 100644 index 0000000000..3adae635d9 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_regression.py @@ -0,0 +1,29 @@ +from __future__ import division, absolute_import, print_function + +import os +import pytest + +import numpy as np +from numpy.testing import assert_raises, assert_equal + +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestIntentInOut(util.F2PyTest): + # Check that intent(in out) translates as intent(inout) + sources = [_path('src', 'regression', 'inout.f90')] + + @pytest.mark.slow + def test_inout(self): + # non-contiguous should raise error + x = np.arange(6, dtype=np.float32)[::2] + assert_raises(ValueError, self.module.foo, x) + + # check values with contiguous array + x = np.arange(3, dtype=np.float32) + self.module.foo(x) + assert_equal(x, [3, 1, 2]) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_return_character.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_return_character.py new file mode 100644 index 0000000000..fc3a58d36b --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_return_character.py @@ -0,0 +1,146 @@ +from __future__ import division, absolute_import, print_function + +import pytest + +from numpy import array +from numpy.testing import assert_ +from . import util + + +class TestReturnCharacter(util.F2PyTest): + + def check_function(self, t): + tname = t.__doc__.split()[0] + if tname in ['t0', 't1', 's0', 's1']: + assert_(t(23) == b'2') + r = t('ab') + assert_(r == b'a', repr(r)) + r = t(array('ab')) + assert_(r == b'a', repr(r)) + r = t(array(77, 'u1')) + assert_(r == b'M', repr(r)) + #assert_(_raises(ValueError, t, array([77,87]))) + #assert_(_raises(ValueError, t, array(77))) + elif tname in ['ts', 'ss']: + assert_(t(23) == b'23 ', repr(t(23))) + assert_(t('123456789abcdef') == b'123456789a') + elif tname in ['t5', 's5']: + assert_(t(23) == b'23 ', repr(t(23))) + assert_(t('ab') == b'ab ', repr(t('ab'))) + assert_(t('123456789abcdef') == b'12345') + else: + raise NotImplementedError + + +class TestF77ReturnCharacter(TestReturnCharacter): + code = """ + function t0(value) + character value + character t0 + t0 = value + end + function t1(value) + character*1 value + character*1 t1 + t1 = value + end + function t5(value) + character*5 value + character*5 t5 + t5 = value + end + function ts(value) + character*(*) value + character*(*) ts + ts = value + end + + subroutine s0(t0,value) + character value + character t0 +cf2py intent(out) t0 + t0 = value + end + subroutine s1(t1,value) + character*1 value + character*1 t1 +cf2py intent(out) t1 + t1 = value + end + subroutine s5(t5,value) + character*5 value + character*5 t5 +cf2py intent(out) t5 + t5 = value + end + subroutine ss(ts,value) + character*(*) value + character*10 ts +cf2py intent(out) ts + ts = value + end + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', 't0,t1,t5,s0,s1,s5,ss'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name)) + + +class TestF90ReturnCharacter(TestReturnCharacter): + suffix = ".f90" + code = """ +module f90_return_char + contains + function t0(value) + character :: value + character :: t0 + t0 = value + end function t0 + function t1(value) + character(len=1) :: value + character(len=1) :: t1 + t1 = value + end function t1 + function t5(value) + character(len=5) :: value + character(len=5) :: t5 + t5 = value + end function t5 + function ts(value) + character(len=*) :: value + character(len=10) :: ts + ts = value + end function ts + + subroutine s0(t0,value) + character :: value + character :: t0 +!f2py intent(out) t0 + t0 = value + end subroutine s0 + subroutine s1(t1,value) + character(len=1) :: value + character(len=1) :: t1 +!f2py intent(out) t1 + t1 = value + end subroutine s1 + subroutine s5(t5,value) + character(len=5) :: value + character(len=5) :: t5 +!f2py intent(out) t5 + t5 = value + end subroutine s5 + subroutine ss(ts,value) + character(len=*) :: value + character(len=10) :: ts +!f2py intent(out) ts + ts = value + end subroutine ss +end module f90_return_char + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', 't0,t1,t5,ts,s0,s1,s5,ss'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module.f90_return_char, name)) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_return_complex.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_return_complex.py new file mode 100644 index 0000000000..43c884dfb0 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_return_complex.py @@ -0,0 +1,169 @@ +from __future__ import division, absolute_import, print_function + +import pytest + +from numpy import array +from numpy.compat import long +from numpy.testing import assert_, assert_raises +from . import util + + +class TestReturnComplex(util.F2PyTest): + + def check_function(self, t): + tname = t.__doc__.split()[0] + if tname in ['t0', 't8', 's0', 's8']: + err = 1e-5 + else: + err = 0.0 + assert_(abs(t(234j) - 234.0j) <= err) + assert_(abs(t(234.6) - 234.6) <= err) + assert_(abs(t(long(234)) - 234.0) <= err) + assert_(abs(t(234.6 + 3j) - (234.6 + 3j)) <= err) + #assert_( abs(t('234')-234.)<=err) + #assert_( abs(t('234.6')-234.6)<=err) + assert_(abs(t(-234) + 234.) <= err) + assert_(abs(t([234]) - 234.) <= err) + assert_(abs(t((234,)) - 234.) <= err) + assert_(abs(t(array(234)) - 234.) <= err) + assert_(abs(t(array(23 + 4j, 'F')) - (23 + 4j)) <= err) + assert_(abs(t(array([234])) - 234.) <= err) + assert_(abs(t(array([[234]])) - 234.) <= err) + assert_(abs(t(array([234], 'b')) + 22.) <= err) + assert_(abs(t(array([234], 'h')) - 234.) <= err) + assert_(abs(t(array([234], 'i')) - 234.) <= err) + assert_(abs(t(array([234], 'l')) - 234.) <= err) + assert_(abs(t(array([234], 'q')) - 234.) <= err) + assert_(abs(t(array([234], 'f')) - 234.) <= err) + assert_(abs(t(array([234], 'd')) - 234.) <= err) + assert_(abs(t(array([234 + 3j], 'F')) - (234 + 3j)) <= err) + assert_(abs(t(array([234], 'D')) - 234.) <= err) + + #assert_raises(TypeError, t, array([234], 'a1')) + assert_raises(TypeError, t, 'abc') + + assert_raises(IndexError, t, []) + assert_raises(IndexError, t, ()) + + assert_raises(TypeError, t, t) + assert_raises(TypeError, t, {}) + + try: + r = t(10 ** 400) + assert_(repr(r) in ['(inf+0j)', '(Infinity+0j)'], repr(r)) + except OverflowError: + pass + + +class TestF77ReturnComplex(TestReturnComplex): + code = """ + function t0(value) + complex value + complex t0 + t0 = value + end + function t8(value) + complex*8 value + complex*8 t8 + t8 = value + end + function t16(value) + complex*16 value + complex*16 t16 + t16 = value + end + function td(value) + double complex value + double complex td + td = value + end + + subroutine s0(t0,value) + complex value + complex t0 +cf2py intent(out) t0 + t0 = value + end + subroutine s8(t8,value) + complex*8 value + complex*8 t8 +cf2py intent(out) t8 + t8 = value + end + subroutine s16(t16,value) + complex*16 value + complex*16 t16 +cf2py intent(out) t16 + t16 = value + end + subroutine sd(td,value) + double complex value + double complex td +cf2py intent(out) td + td = value + end + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', 't0,t8,t16,td,s0,s8,s16,sd'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name)) + + +class TestF90ReturnComplex(TestReturnComplex): + suffix = ".f90" + code = """ +module f90_return_complex + contains + function t0(value) + complex :: value + complex :: t0 + t0 = value + end function t0 + function t8(value) + complex(kind=4) :: value + complex(kind=4) :: t8 + t8 = value + end function t8 + function t16(value) + complex(kind=8) :: value + complex(kind=8) :: t16 + t16 = value + end function t16 + function td(value) + double complex :: value + double complex :: td + td = value + end function td + + subroutine s0(t0,value) + complex :: value + complex :: t0 +!f2py intent(out) t0 + t0 = value + end subroutine s0 + subroutine s8(t8,value) + complex(kind=4) :: value + complex(kind=4) :: t8 +!f2py intent(out) t8 + t8 = value + end subroutine s8 + subroutine s16(t16,value) + complex(kind=8) :: value + complex(kind=8) :: t16 +!f2py intent(out) t16 + t16 = value + end subroutine s16 + subroutine sd(td,value) + double complex :: value + double complex :: td +!f2py intent(out) td + td = value + end subroutine sd +end module f90_return_complex + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', 't0,t8,t16,td,s0,s8,s16,sd'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module.f90_return_complex, name)) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_return_integer.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_return_integer.py new file mode 100644 index 0000000000..22f4acfdf6 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_return_integer.py @@ -0,0 +1,181 @@ +from __future__ import division, absolute_import, print_function + +import pytest + +from numpy import array +from numpy.compat import long +from numpy.testing import assert_, assert_raises +from . import util + + +class TestReturnInteger(util.F2PyTest): + + def check_function(self, t): + assert_(t(123) == 123, repr(t(123))) + assert_(t(123.6) == 123) + assert_(t(long(123)) == 123) + assert_(t('123') == 123) + assert_(t(-123) == -123) + assert_(t([123]) == 123) + assert_(t((123,)) == 123) + assert_(t(array(123)) == 123) + assert_(t(array([123])) == 123) + assert_(t(array([[123]])) == 123) + assert_(t(array([123], 'b')) == 123) + assert_(t(array([123], 'h')) == 123) + assert_(t(array([123], 'i')) == 123) + assert_(t(array([123], 'l')) == 123) + assert_(t(array([123], 'B')) == 123) + assert_(t(array([123], 'f')) == 123) + assert_(t(array([123], 'd')) == 123) + + #assert_raises(ValueError, t, array([123],'S3')) + assert_raises(ValueError, t, 'abc') + + assert_raises(IndexError, t, []) + assert_raises(IndexError, t, ()) + + assert_raises(Exception, t, t) + assert_raises(Exception, t, {}) + + if t.__doc__.split()[0] in ['t8', 's8']: + assert_raises(OverflowError, t, 100000000000000000000000) + assert_raises(OverflowError, t, 10000000011111111111111.23) + + +class TestF77ReturnInteger(TestReturnInteger): + code = """ + function t0(value) + integer value + integer t0 + t0 = value + end + function t1(value) + integer*1 value + integer*1 t1 + t1 = value + end + function t2(value) + integer*2 value + integer*2 t2 + t2 = value + end + function t4(value) + integer*4 value + integer*4 t4 + t4 = value + end + function t8(value) + integer*8 value + integer*8 t8 + t8 = value + end + + subroutine s0(t0,value) + integer value + integer t0 +cf2py intent(out) t0 + t0 = value + end + subroutine s1(t1,value) + integer*1 value + integer*1 t1 +cf2py intent(out) t1 + t1 = value + end + subroutine s2(t2,value) + integer*2 value + integer*2 t2 +cf2py intent(out) t2 + t2 = value + end + subroutine s4(t4,value) + integer*4 value + integer*4 t4 +cf2py intent(out) t4 + t4 = value + end + subroutine s8(t8,value) + integer*8 value + integer*8 t8 +cf2py intent(out) t8 + t8 = value + end + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', + 't0,t1,t2,t4,t8,s0,s1,s2,s4,s8'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name)) + + +class TestF90ReturnInteger(TestReturnInteger): + suffix = ".f90" + code = """ +module f90_return_integer + contains + function t0(value) + integer :: value + integer :: t0 + t0 = value + end function t0 + function t1(value) + integer(kind=1) :: value + integer(kind=1) :: t1 + t1 = value + end function t1 + function t2(value) + integer(kind=2) :: value + integer(kind=2) :: t2 + t2 = value + end function t2 + function t4(value) + integer(kind=4) :: value + integer(kind=4) :: t4 + t4 = value + end function t4 + function t8(value) + integer(kind=8) :: value + integer(kind=8) :: t8 + t8 = value + end function t8 + + subroutine s0(t0,value) + integer :: value + integer :: t0 +!f2py intent(out) t0 + t0 = value + end subroutine s0 + subroutine s1(t1,value) + integer(kind=1) :: value + integer(kind=1) :: t1 +!f2py intent(out) t1 + t1 = value + end subroutine s1 + subroutine s2(t2,value) + integer(kind=2) :: value + integer(kind=2) :: t2 +!f2py intent(out) t2 + t2 = value + end subroutine s2 + subroutine s4(t4,value) + integer(kind=4) :: value + integer(kind=4) :: t4 +!f2py intent(out) t4 + t4 = value + end subroutine s4 + subroutine s8(t8,value) + integer(kind=8) :: value + integer(kind=8) :: t8 +!f2py intent(out) t8 + t8 = value + end subroutine s8 +end module f90_return_integer + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', + 't0,t1,t2,t4,t8,s0,s1,s2,s4,s8'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module.f90_return_integer, name)) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_return_logical.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_return_logical.py new file mode 100644 index 0000000000..96f215a914 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_return_logical.py @@ -0,0 +1,189 @@ +from __future__ import division, absolute_import, print_function + +import pytest + +from numpy import array +from numpy.compat import long +from numpy.testing import assert_, assert_raises +from . import util + + +class TestReturnLogical(util.F2PyTest): + + def check_function(self, t): + assert_(t(True) == 1, repr(t(True))) + assert_(t(False) == 0, repr(t(False))) + assert_(t(0) == 0) + assert_(t(None) == 0) + assert_(t(0.0) == 0) + assert_(t(0j) == 0) + assert_(t(1j) == 1) + assert_(t(234) == 1) + assert_(t(234.6) == 1) + assert_(t(long(234)) == 1) + assert_(t(234.6 + 3j) == 1) + assert_(t('234') == 1) + assert_(t('aaa') == 1) + assert_(t('') == 0) + assert_(t([]) == 0) + assert_(t(()) == 0) + assert_(t({}) == 0) + assert_(t(t) == 1) + assert_(t(-234) == 1) + assert_(t(10 ** 100) == 1) + assert_(t([234]) == 1) + assert_(t((234,)) == 1) + assert_(t(array(234)) == 1) + assert_(t(array([234])) == 1) + assert_(t(array([[234]])) == 1) + assert_(t(array([234], 'b')) == 1) + assert_(t(array([234], 'h')) == 1) + assert_(t(array([234], 'i')) == 1) + assert_(t(array([234], 'l')) == 1) + assert_(t(array([234], 'f')) == 1) + assert_(t(array([234], 'd')) == 1) + assert_(t(array([234 + 3j], 'F')) == 1) + assert_(t(array([234], 'D')) == 1) + assert_(t(array(0)) == 0) + assert_(t(array([0])) == 0) + assert_(t(array([[0]])) == 0) + assert_(t(array([0j])) == 0) + assert_(t(array([1])) == 1) + assert_raises(ValueError, t, array([0, 0])) + + +class TestF77ReturnLogical(TestReturnLogical): + code = """ + function t0(value) + logical value + logical t0 + t0 = value + end + function t1(value) + logical*1 value + logical*1 t1 + t1 = value + end + function t2(value) + logical*2 value + logical*2 t2 + t2 = value + end + function t4(value) + logical*4 value + logical*4 t4 + t4 = value + end +c function t8(value) +c logical*8 value +c logical*8 t8 +c t8 = value +c end + + subroutine s0(t0,value) + logical value + logical t0 +cf2py intent(out) t0 + t0 = value + end + subroutine s1(t1,value) + logical*1 value + logical*1 t1 +cf2py intent(out) t1 + t1 = value + end + subroutine s2(t2,value) + logical*2 value + logical*2 t2 +cf2py intent(out) t2 + t2 = value + end + subroutine s4(t4,value) + logical*4 value + logical*4 t4 +cf2py intent(out) t4 + t4 = value + end +c subroutine s8(t8,value) +c logical*8 value +c logical*8 t8 +cf2py intent(out) t8 +c t8 = value +c end + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', 't0,t1,t2,t4,s0,s1,s2,s4'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name)) + + +class TestF90ReturnLogical(TestReturnLogical): + suffix = ".f90" + code = """ +module f90_return_logical + contains + function t0(value) + logical :: value + logical :: t0 + t0 = value + end function t0 + function t1(value) + logical(kind=1) :: value + logical(kind=1) :: t1 + t1 = value + end function t1 + function t2(value) + logical(kind=2) :: value + logical(kind=2) :: t2 + t2 = value + end function t2 + function t4(value) + logical(kind=4) :: value + logical(kind=4) :: t4 + t4 = value + end function t4 + function t8(value) + logical(kind=8) :: value + logical(kind=8) :: t8 + t8 = value + end function t8 + + subroutine s0(t0,value) + logical :: value + logical :: t0 +!f2py intent(out) t0 + t0 = value + end subroutine s0 + subroutine s1(t1,value) + logical(kind=1) :: value + logical(kind=1) :: t1 +!f2py intent(out) t1 + t1 = value + end subroutine s1 + subroutine s2(t2,value) + logical(kind=2) :: value + logical(kind=2) :: t2 +!f2py intent(out) t2 + t2 = value + end subroutine s2 + subroutine s4(t4,value) + logical(kind=4) :: value + logical(kind=4) :: t4 +!f2py intent(out) t4 + t4 = value + end subroutine s4 + subroutine s8(t8,value) + logical(kind=8) :: value + logical(kind=8) :: t8 +!f2py intent(out) t8 + t8 = value + end subroutine s8 +end module f90_return_logical + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', + 't0,t1,t2,t4,t8,s0,s1,s2,s4,s8'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module.f90_return_logical, name)) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_return_real.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_return_real.py new file mode 100644 index 0000000000..315cfe49b9 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_return_real.py @@ -0,0 +1,210 @@ +from __future__ import division, absolute_import, print_function + +import platform +import pytest + +from numpy import array +from numpy.compat import long +from numpy.testing import assert_, assert_raises +from . import util + + +class TestReturnReal(util.F2PyTest): + + def check_function(self, t): + if t.__doc__.split()[0] in ['t0', 't4', 's0', 's4']: + err = 1e-5 + else: + err = 0.0 + assert_(abs(t(234) - 234.0) <= err) + assert_(abs(t(234.6) - 234.6) <= err) + assert_(abs(t(long(234)) - 234.0) <= err) + assert_(abs(t('234') - 234) <= err) + assert_(abs(t('234.6') - 234.6) <= err) + assert_(abs(t(-234) + 234) <= err) + assert_(abs(t([234]) - 234) <= err) + assert_(abs(t((234,)) - 234.) <= err) + assert_(abs(t(array(234)) - 234.) <= err) + assert_(abs(t(array([234])) - 234.) <= err) + assert_(abs(t(array([[234]])) - 234.) <= err) + assert_(abs(t(array([234], 'b')) + 22) <= err) + assert_(abs(t(array([234], 'h')) - 234.) <= err) + assert_(abs(t(array([234], 'i')) - 234.) <= err) + assert_(abs(t(array([234], 'l')) - 234.) <= err) + assert_(abs(t(array([234], 'B')) - 234.) <= err) + assert_(abs(t(array([234], 'f')) - 234.) <= err) + assert_(abs(t(array([234], 'd')) - 234.) <= err) + if t.__doc__.split()[0] in ['t0', 't4', 's0', 's4']: + assert_(t(1e200) == t(1e300)) # inf + + #assert_raises(ValueError, t, array([234], 'S1')) + assert_raises(ValueError, t, 'abc') + + assert_raises(IndexError, t, []) + assert_raises(IndexError, t, ()) + + assert_raises(Exception, t, t) + assert_raises(Exception, t, {}) + + try: + r = t(10 ** 400) + assert_(repr(r) in ['inf', 'Infinity'], repr(r)) + except OverflowError: + pass + + + +@pytest.mark.skipif( + platform.system() == 'Darwin', + reason="Prone to error when run with numpy/f2py/tests on mac os, " + "but not when run in isolation") +class TestCReturnReal(TestReturnReal): + suffix = ".pyf" + module_name = "c_ext_return_real" + code = """ +python module c_ext_return_real +usercode \'\'\' +float t4(float value) { return value; } +void s4(float *t4, float value) { *t4 = value; } +double t8(double value) { return value; } +void s8(double *t8, double value) { *t8 = value; } +\'\'\' +interface + function t4(value) + real*4 intent(c) :: t4,value + end + function t8(value) + real*8 intent(c) :: t8,value + end + subroutine s4(t4,value) + intent(c) s4 + real*4 intent(out) :: t4 + real*4 intent(c) :: value + end + subroutine s8(t8,value) + intent(c) s8 + real*8 intent(out) :: t8 + real*8 intent(c) :: value + end +end interface +end python module c_ext_return_real + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', 't4,t8,s4,s8'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name)) + + +class TestF77ReturnReal(TestReturnReal): + code = """ + function t0(value) + real value + real t0 + t0 = value + end + function t4(value) + real*4 value + real*4 t4 + t4 = value + end + function t8(value) + real*8 value + real*8 t8 + t8 = value + end + function td(value) + double precision value + double precision td + td = value + end + + subroutine s0(t0,value) + real value + real t0 +cf2py intent(out) t0 + t0 = value + end + subroutine s4(t4,value) + real*4 value + real*4 t4 +cf2py intent(out) t4 + t4 = value + end + subroutine s8(t8,value) + real*8 value + real*8 t8 +cf2py intent(out) t8 + t8 = value + end + subroutine sd(td,value) + double precision value + double precision td +cf2py intent(out) td + td = value + end + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', 't0,t4,t8,td,s0,s4,s8,sd'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module, name)) + + +class TestF90ReturnReal(TestReturnReal): + suffix = ".f90" + code = """ +module f90_return_real + contains + function t0(value) + real :: value + real :: t0 + t0 = value + end function t0 + function t4(value) + real(kind=4) :: value + real(kind=4) :: t4 + t4 = value + end function t4 + function t8(value) + real(kind=8) :: value + real(kind=8) :: t8 + t8 = value + end function t8 + function td(value) + double precision :: value + double precision :: td + td = value + end function td + + subroutine s0(t0,value) + real :: value + real :: t0 +!f2py intent(out) t0 + t0 = value + end subroutine s0 + subroutine s4(t4,value) + real(kind=4) :: value + real(kind=4) :: t4 +!f2py intent(out) t4 + t4 = value + end subroutine s4 + subroutine s8(t8,value) + real(kind=8) :: value + real(kind=8) :: t8 +!f2py intent(out) t8 + t8 = value + end subroutine s8 + subroutine sd(td,value) + double precision :: value + double precision :: td +!f2py intent(out) td + td = value + end subroutine sd +end module f90_return_real + """ + + @pytest.mark.slow + @pytest.mark.parametrize('name', 't0,t4,t8,td,s0,s4,s8,sd'.split(',')) + def test_all(self, name): + self.check_function(getattr(self.module.f90_return_real, name)) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_semicolon_split.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_semicolon_split.py new file mode 100644 index 0000000000..bcd18c893f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_semicolon_split.py @@ -0,0 +1,65 @@ +from __future__ import division, absolute_import, print_function + +import platform +import pytest + +from . import util +from numpy.testing import assert_equal + +@pytest.mark.skipif( + platform.system() == 'Darwin', + reason="Prone to error when run with numpy/f2py/tests on mac os, " + "but not when run in isolation") +class TestMultiline(util.F2PyTest): + suffix = ".pyf" + module_name = "multiline" + code = """ +python module {module} + usercode ''' +void foo(int* x) {{ + char dummy = ';'; + *x = 42; +}} +''' + interface + subroutine foo(x) + intent(c) foo + integer intent(out) :: x + end subroutine foo + end interface +end python module {module} + """.format(module=module_name) + + def test_multiline(self): + assert_equal(self.module.foo(), 42) + + +@pytest.mark.skipif( + platform.system() == 'Darwin', + reason="Prone to error when run with numpy/f2py/tests on mac os, " + "but not when run in isolation") +class TestCallstatement(util.F2PyTest): + suffix = ".pyf" + module_name = "callstatement" + code = """ +python module {module} + usercode ''' +void foo(int* x) {{ +}} +''' + interface + subroutine foo(x) + intent(c) foo + integer intent(out) :: x + callprotoargument int* + callstatement {{ & + ; & + x = 42; & + }} + end subroutine foo + end interface +end python module {module} + """.format(module=module_name) + + def test_callstatement(self): + assert_equal(self.module.foo(), 42) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_size.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_size.py new file mode 100644 index 0000000000..e2af618048 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_size.py @@ -0,0 +1,51 @@ +from __future__ import division, absolute_import, print_function + +import os +import pytest + +from numpy.testing import assert_equal +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestSizeSumExample(util.F2PyTest): + sources = [_path('src', 'size', 'foo.f90')] + + @pytest.mark.slow + def test_all(self): + r = self.module.foo([[]]) + assert_equal(r, [0], repr(r)) + + r = self.module.foo([[1, 2]]) + assert_equal(r, [3], repr(r)) + + r = self.module.foo([[1, 2], [3, 4]]) + assert_equal(r, [3, 7], repr(r)) + + r = self.module.foo([[1, 2], [3, 4], [5, 6]]) + assert_equal(r, [3, 7, 11], repr(r)) + + @pytest.mark.slow + def test_transpose(self): + r = self.module.trans([[]]) + assert_equal(r.T, [[]], repr(r)) + + r = self.module.trans([[1, 2]]) + assert_equal(r, [[1], [2]], repr(r)) + + r = self.module.trans([[1, 2, 3], [4, 5, 6]]) + assert_equal(r, [[1, 4], [2, 5], [3, 6]], repr(r)) + + @pytest.mark.slow + def test_flatten(self): + r = self.module.flatten([[]]) + assert_equal(r, [], repr(r)) + + r = self.module.flatten([[1, 2]]) + assert_equal(r, [1, 2], repr(r)) + + r = self.module.flatten([[1, 2, 3], [4, 5, 6]]) + assert_equal(r, [1, 2, 3, 4, 5, 6], repr(r)) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/test_string.py b/contrib/python/numpy/py2/numpy/f2py/tests/test_string.py new file mode 100644 index 0000000000..0493c99cf1 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/test_string.py @@ -0,0 +1,24 @@ +from __future__ import division, absolute_import, print_function + +import os +import pytest + +from numpy.testing import assert_array_equal +import numpy as np +from . import util + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + +class TestString(util.F2PyTest): + sources = [_path('src', 'string', 'char.f90')] + + @pytest.mark.slow + def test_char(self): + strings = np.array(['ab', 'cd', 'ef'], dtype='c').T + inp, out = self.module.char_test.change_strings(strings, strings.shape[1]) + assert_array_equal(inp, strings) + expected = strings.copy() + expected[1, :] = 'AAA' + assert_array_equal(out, expected) diff --git a/contrib/python/numpy/py2/numpy/f2py/tests/util.py b/contrib/python/numpy/py2/numpy/f2py/tests/util.py new file mode 100644 index 0000000000..5fa5dadd2a --- /dev/null +++ b/contrib/python/numpy/py2/numpy/f2py/tests/util.py @@ -0,0 +1,360 @@ +""" +Utility functions for + +- building and importing modules on test time, using a temporary location +- detecting if compilers are present + +""" +from __future__ import division, absolute_import, print_function + +import os +import sys +import subprocess +import tempfile +import shutil +import atexit +import textwrap +import re +import pytest + +from numpy.compat import asbytes, asstr +from numpy.testing import temppath +from importlib import import_module + +try: + from hashlib import md5 +except ImportError: + from md5 import new as md5 # noqa: F401 + +# +# Maintaining a temporary module directory +# + +_module_dir = None + + +def _cleanup(): + global _module_dir + if _module_dir is not None: + try: + sys.path.remove(_module_dir) + except ValueError: + pass + try: + shutil.rmtree(_module_dir) + except (IOError, OSError): + pass + _module_dir = None + + +def get_module_dir(): + global _module_dir + if _module_dir is None: + _module_dir = tempfile.mkdtemp() + atexit.register(_cleanup) + if _module_dir not in sys.path: + sys.path.insert(0, _module_dir) + return _module_dir + + +def get_temp_module_name(): + # Assume single-threaded, and the module dir usable only by this thread + d = get_module_dir() + for j in range(5403, 9999999): + name = "_test_ext_module_%d" % j + fn = os.path.join(d, name) + if name not in sys.modules and not os.path.isfile(fn + '.py'): + return name + raise RuntimeError("Failed to create a temporary module name") + + +def _memoize(func): + memo = {} + + def wrapper(*a, **kw): + key = repr((a, kw)) + if key not in memo: + try: + memo[key] = func(*a, **kw) + except Exception as e: + memo[key] = e + raise + ret = memo[key] + if isinstance(ret, Exception): + raise ret + return ret + wrapper.__name__ = func.__name__ + return wrapper + +# +# Building modules +# + + +@_memoize +def build_module(source_files, options=[], skip=[], only=[], module_name=None): + """ + Compile and import a f2py module, built from the given files. + + """ + + code = ("import sys; sys.path = %s; import numpy.f2py as f2py2e; " + "f2py2e.main()" % repr(sys.path)) + + d = get_module_dir() + + # Copy files + dst_sources = [] + for fn in source_files: + if not os.path.isfile(fn): + raise RuntimeError("%s is not a file" % fn) + dst = os.path.join(d, os.path.basename(fn)) + shutil.copyfile(fn, dst) + dst_sources.append(dst) + + fn = os.path.join(os.path.dirname(fn), '.f2py_f2cmap') + if os.path.isfile(fn): + dst = os.path.join(d, os.path.basename(fn)) + if not os.path.isfile(dst): + shutil.copyfile(fn, dst) + + # Prepare options + if module_name is None: + module_name = get_temp_module_name() + f2py_opts = ['-c', '-m', module_name] + options + dst_sources + if skip: + f2py_opts += ['skip:'] + skip + if only: + f2py_opts += ['only:'] + only + + # Build + cwd = os.getcwd() + try: + os.chdir(d) + cmd = [sys.executable, '-c', code] + f2py_opts + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + out, err = p.communicate() + if p.returncode != 0: + raise RuntimeError("Running f2py failed: %s\n%s" + % (cmd[4:], asstr(out))) + finally: + os.chdir(cwd) + + # Partial cleanup + for fn in dst_sources: + os.unlink(fn) + + # Import + return import_module(module_name) + + +@_memoize +def build_code(source_code, options=[], skip=[], only=[], suffix=None, + module_name=None): + """ + Compile and import Fortran code using f2py. + + """ + if suffix is None: + suffix = '.f' + with temppath(suffix=suffix) as path: + with open(path, 'w') as f: + f.write(source_code) + return build_module([path], options=options, skip=skip, only=only, + module_name=module_name) + +# +# Check if compilers are available at all... +# + +_compiler_status = None + + +def _get_compiler_status(): + global _compiler_status + if _compiler_status is not None: + return _compiler_status + + _compiler_status = (False, False, False) + + # XXX: this is really ugly. But I don't know how to invoke Distutils + # in a safer way... + code = """ +import os +import sys +sys.path = %(syspath)s + +def configuration(parent_name='',top_path=None): + global config + from numpy.distutils.misc_util import Configuration + config = Configuration('', parent_name, top_path) + return config + +from numpy.distutils.core import setup +setup(configuration=configuration) + +config_cmd = config.get_config_cmd() +have_c = config_cmd.try_compile('void foo() {}') +print('COMPILERS:%%d,%%d,%%d' %% (have_c, + config.have_f77c(), + config.have_f90c())) +sys.exit(99) +""" + code = code % dict(syspath=repr(sys.path)) + + with temppath(suffix='.py') as script: + with open(script, 'w') as f: + f.write(code) + + cmd = [sys.executable, script, 'config'] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + out, err = p.communicate() + + m = re.search(br'COMPILERS:(\d+),(\d+),(\d+)', out) + if m: + _compiler_status = (bool(int(m.group(1))), bool(int(m.group(2))), + bool(int(m.group(3)))) + # Finished + return _compiler_status + + +def has_c_compiler(): + return _get_compiler_status()[0] + + +def has_f77_compiler(): + return _get_compiler_status()[1] + + +def has_f90_compiler(): + return _get_compiler_status()[2] + +# +# Building with distutils +# + + +@_memoize +def build_module_distutils(source_files, config_code, module_name, **kw): + """ + Build a module via distutils and import it. + + """ + from numpy.distutils.misc_util import Configuration + from numpy.distutils.core import setup + + d = get_module_dir() + + # Copy files + dst_sources = [] + for fn in source_files: + if not os.path.isfile(fn): + raise RuntimeError("%s is not a file" % fn) + dst = os.path.join(d, os.path.basename(fn)) + shutil.copyfile(fn, dst) + dst_sources.append(dst) + + # Build script + config_code = textwrap.dedent(config_code).replace("\n", "\n ") + + code = """\ +import os +import sys +sys.path = %(syspath)s + +def configuration(parent_name='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('', parent_name, top_path) + %(config_code)s + return config + +if __name__ == "__main__": + from numpy.distutils.core import setup + setup(configuration=configuration) +""" % dict(config_code=config_code, syspath=repr(sys.path)) + + script = os.path.join(d, get_temp_module_name() + '.py') + dst_sources.append(script) + f = open(script, 'wb') + f.write(asbytes(code)) + f.close() + + # Build + cwd = os.getcwd() + try: + os.chdir(d) + cmd = [sys.executable, script, 'build_ext', '-i'] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + out, err = p.communicate() + if p.returncode != 0: + raise RuntimeError("Running distutils build failed: %s\n%s" + % (cmd[4:], asstr(out))) + finally: + os.chdir(cwd) + + # Partial cleanup + for fn in dst_sources: + os.unlink(fn) + + # Import + __import__(module_name) + return sys.modules[module_name] + +# +# Unittest convenience +# + + +class F2PyTest(object): + code = None + sources = None + options = [] + skip = [] + only = [] + suffix = '.f' + module = None + module_name = None + + def setup(self): + if sys.platform == 'win32': + pytest.skip('Fails with MinGW64 Gfortran (Issue #9673)') + + if self.module is not None: + return + + # Check compiler availability first + if not has_c_compiler(): + pytest.skip("No C compiler available") + + codes = [] + if self.sources: + codes.extend(self.sources) + if self.code is not None: + codes.append(self.suffix) + + needs_f77 = False + needs_f90 = False + for fn in codes: + if fn.endswith('.f'): + needs_f77 = True + elif fn.endswith('.f90'): + needs_f90 = True + if needs_f77 and not has_f77_compiler(): + pytest.skip("No Fortran 77 compiler available") + if needs_f90 and not has_f90_compiler(): + pytest.skip("No Fortran 90 compiler available") + + # Build the module + if self.code is not None: + self.module = build_code(self.code, options=self.options, + skip=self.skip, only=self.only, + suffix=self.suffix, + module_name=self.module_name) + + if self.sources is not None: + self.module = build_module(self.sources, options=self.options, + skip=self.skip, only=self.only, + module_name=self.module_name) diff --git a/contrib/python/numpy/py2/numpy/fft/setup.py b/contrib/python/numpy/py2/numpy/fft/setup.py new file mode 100644 index 0000000000..cd99a82d7b --- /dev/null +++ b/contrib/python/numpy/py2/numpy/fft/setup.py @@ -0,0 +1,19 @@ +from __future__ import division, print_function + + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('fft', parent_package, top_path) + + config.add_data_dir('tests') + + # Configure fftpack_lite + config.add_extension('fftpack_lite', + sources=['fftpack_litemodule.c', 'fftpack.c'] + ) + + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/contrib/python/numpy/py2/numpy/fft/tests/__init__.py b/contrib/python/numpy/py2/numpy/fft/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/fft/tests/test_fftpack.py b/contrib/python/numpy/py2/numpy/fft/tests/test_fftpack.py new file mode 100644 index 0000000000..8d6cd84070 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/fft/tests/test_fftpack.py @@ -0,0 +1,185 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.random import random +from numpy.testing import ( + assert_array_almost_equal, assert_array_equal, assert_raises, + ) +import threading +import sys +if sys.version_info[0] >= 3: + import queue +else: + import Queue as queue + + +def fft1(x): + L = len(x) + phase = -2j*np.pi*(np.arange(L)/float(L)) + phase = np.arange(L).reshape(-1, 1) * phase + return np.sum(x*np.exp(phase), axis=1) + + +class TestFFTShift(object): + + def test_fft_n(self): + assert_raises(ValueError, np.fft.fft, [1, 2, 3], 0) + + +class TestFFT1D(object): + + def test_fft(self): + x = random(30) + 1j*random(30) + assert_array_almost_equal(fft1(x), np.fft.fft(x)) + assert_array_almost_equal(fft1(x) / np.sqrt(30), + np.fft.fft(x, norm="ortho")) + + def test_ifft(self): + x = random(30) + 1j*random(30) + assert_array_almost_equal(x, np.fft.ifft(np.fft.fft(x))) + assert_array_almost_equal( + x, np.fft.ifft(np.fft.fft(x, norm="ortho"), norm="ortho")) + + def test_fft2(self): + x = random((30, 20)) + 1j*random((30, 20)) + assert_array_almost_equal(np.fft.fft(np.fft.fft(x, axis=1), axis=0), + np.fft.fft2(x)) + assert_array_almost_equal(np.fft.fft2(x) / np.sqrt(30 * 20), + np.fft.fft2(x, norm="ortho")) + + def test_ifft2(self): + x = random((30, 20)) + 1j*random((30, 20)) + assert_array_almost_equal(np.fft.ifft(np.fft.ifft(x, axis=1), axis=0), + np.fft.ifft2(x)) + assert_array_almost_equal(np.fft.ifft2(x) * np.sqrt(30 * 20), + np.fft.ifft2(x, norm="ortho")) + + def test_fftn(self): + x = random((30, 20, 10)) + 1j*random((30, 20, 10)) + assert_array_almost_equal( + np.fft.fft(np.fft.fft(np.fft.fft(x, axis=2), axis=1), axis=0), + np.fft.fftn(x)) + assert_array_almost_equal(np.fft.fftn(x) / np.sqrt(30 * 20 * 10), + np.fft.fftn(x, norm="ortho")) + + def test_ifftn(self): + x = random((30, 20, 10)) + 1j*random((30, 20, 10)) + assert_array_almost_equal( + np.fft.ifft(np.fft.ifft(np.fft.ifft(x, axis=2), axis=1), axis=0), + np.fft.ifftn(x)) + assert_array_almost_equal(np.fft.ifftn(x) * np.sqrt(30 * 20 * 10), + np.fft.ifftn(x, norm="ortho")) + + def test_rfft(self): + x = random(30) + for n in [x.size, 2*x.size]: + for norm in [None, 'ortho']: + assert_array_almost_equal( + np.fft.fft(x, n=n, norm=norm)[:(n//2 + 1)], + np.fft.rfft(x, n=n, norm=norm)) + assert_array_almost_equal(np.fft.rfft(x, n=n) / np.sqrt(n), + np.fft.rfft(x, n=n, norm="ortho")) + + def test_irfft(self): + x = random(30) + assert_array_almost_equal(x, np.fft.irfft(np.fft.rfft(x))) + assert_array_almost_equal( + x, np.fft.irfft(np.fft.rfft(x, norm="ortho"), norm="ortho")) + + def test_rfft2(self): + x = random((30, 20)) + assert_array_almost_equal(np.fft.fft2(x)[:, :11], np.fft.rfft2(x)) + assert_array_almost_equal(np.fft.rfft2(x) / np.sqrt(30 * 20), + np.fft.rfft2(x, norm="ortho")) + + def test_irfft2(self): + x = random((30, 20)) + assert_array_almost_equal(x, np.fft.irfft2(np.fft.rfft2(x))) + assert_array_almost_equal( + x, np.fft.irfft2(np.fft.rfft2(x, norm="ortho"), norm="ortho")) + + def test_rfftn(self): + x = random((30, 20, 10)) + assert_array_almost_equal(np.fft.fftn(x)[:, :, :6], np.fft.rfftn(x)) + assert_array_almost_equal(np.fft.rfftn(x) / np.sqrt(30 * 20 * 10), + np.fft.rfftn(x, norm="ortho")) + + def test_irfftn(self): + x = random((30, 20, 10)) + assert_array_almost_equal(x, np.fft.irfftn(np.fft.rfftn(x))) + assert_array_almost_equal( + x, np.fft.irfftn(np.fft.rfftn(x, norm="ortho"), norm="ortho")) + + def test_hfft(self): + x = random(14) + 1j*random(14) + x_herm = np.concatenate((random(1), x, random(1))) + x = np.concatenate((x_herm, x[::-1].conj())) + assert_array_almost_equal(np.fft.fft(x), np.fft.hfft(x_herm)) + assert_array_almost_equal(np.fft.hfft(x_herm) / np.sqrt(30), + np.fft.hfft(x_herm, norm="ortho")) + + def test_ihttf(self): + x = random(14) + 1j*random(14) + x_herm = np.concatenate((random(1), x, random(1))) + x = np.concatenate((x_herm, x[::-1].conj())) + assert_array_almost_equal(x_herm, np.fft.ihfft(np.fft.hfft(x_herm))) + assert_array_almost_equal( + x_herm, np.fft.ihfft(np.fft.hfft(x_herm, norm="ortho"), + norm="ortho")) + + def test_all_1d_norm_preserving(self): + # verify that round-trip transforms are norm-preserving + x = random(30) + x_norm = np.linalg.norm(x) + n = x.size * 2 + func_pairs = [(np.fft.fft, np.fft.ifft), + (np.fft.rfft, np.fft.irfft), + # hfft: order so the first function takes x.size samples + # (necessary for comparison to x_norm above) + (np.fft.ihfft, np.fft.hfft), + ] + for forw, back in func_pairs: + for n in [x.size, 2*x.size]: + for norm in [None, 'ortho']: + tmp = forw(x, n=n, norm=norm) + tmp = back(tmp, n=n, norm=norm) + assert_array_almost_equal(x_norm, + np.linalg.norm(tmp)) + +class TestFFTThreadSafe(object): + threads = 16 + input_shape = (800, 200) + + def _test_mtsame(self, func, *args): + def worker(args, q): + q.put(func(*args)) + + q = queue.Queue() + expected = func(*args) + + # Spin off a bunch of threads to call the same function simultaneously + t = [threading.Thread(target=worker, args=(args, q)) + for i in range(self.threads)] + [x.start() for x in t] + + [x.join() for x in t] + # Make sure all threads returned the correct value + for i in range(self.threads): + assert_array_equal(q.get(timeout=5), expected, + 'Function returned wrong value in multithreaded context') + + def test_fft(self): + a = np.ones(self.input_shape) * 1+0j + self._test_mtsame(np.fft.fft, a) + + def test_ifft(self): + a = np.ones(self.input_shape) * 1+0j + self._test_mtsame(np.fft.ifft, a) + + def test_rfft(self): + a = np.ones(self.input_shape) + self._test_mtsame(np.fft.rfft, a) + + def test_irfft(self): + a = np.ones(self.input_shape) * 1+0j + self._test_mtsame(np.fft.irfft, a) diff --git a/contrib/python/numpy/py2/numpy/fft/tests/test_helper.py b/contrib/python/numpy/py2/numpy/fft/tests/test_helper.py new file mode 100644 index 0000000000..8d315fa020 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/fft/tests/test_helper.py @@ -0,0 +1,248 @@ +"""Test functions for fftpack.helper module + +Copied from fftpack.helper by Pearu Peterson, October 2005 + +""" +from __future__ import division, absolute_import, print_function +import numpy as np +from numpy.testing import assert_array_almost_equal, assert_equal +from numpy import fft, pi +from numpy.fft.helper import _FFTCache + + +class TestFFTShift(object): + + def test_definition(self): + x = [0, 1, 2, 3, 4, -4, -3, -2, -1] + y = [-4, -3, -2, -1, 0, 1, 2, 3, 4] + assert_array_almost_equal(fft.fftshift(x), y) + assert_array_almost_equal(fft.ifftshift(y), x) + x = [0, 1, 2, 3, 4, -5, -4, -3, -2, -1] + y = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4] + assert_array_almost_equal(fft.fftshift(x), y) + assert_array_almost_equal(fft.ifftshift(y), x) + + def test_inverse(self): + for n in [1, 4, 9, 100, 211]: + x = np.random.random((n,)) + assert_array_almost_equal(fft.ifftshift(fft.fftshift(x)), x) + + def test_axes_keyword(self): + freqs = [[0, 1, 2], [3, 4, -4], [-3, -2, -1]] + shifted = [[-1, -3, -2], [2, 0, 1], [-4, 3, 4]] + assert_array_almost_equal(fft.fftshift(freqs, axes=(0, 1)), shifted) + assert_array_almost_equal(fft.fftshift(freqs, axes=0), + fft.fftshift(freqs, axes=(0,))) + assert_array_almost_equal(fft.ifftshift(shifted, axes=(0, 1)), freqs) + assert_array_almost_equal(fft.ifftshift(shifted, axes=0), + fft.ifftshift(shifted, axes=(0,))) + + assert_array_almost_equal(fft.fftshift(freqs), shifted) + assert_array_almost_equal(fft.ifftshift(shifted), freqs) + + def test_uneven_dims(self): + """ Test 2D input, which has uneven dimension sizes """ + freqs = [ + [0, 1], + [2, 3], + [4, 5] + ] + + # shift in dimension 0 + shift_dim0 = [ + [4, 5], + [0, 1], + [2, 3] + ] + assert_array_almost_equal(fft.fftshift(freqs, axes=0), shift_dim0) + assert_array_almost_equal(fft.ifftshift(shift_dim0, axes=0), freqs) + assert_array_almost_equal(fft.fftshift(freqs, axes=(0,)), shift_dim0) + assert_array_almost_equal(fft.ifftshift(shift_dim0, axes=[0]), freqs) + + # shift in dimension 1 + shift_dim1 = [ + [1, 0], + [3, 2], + [5, 4] + ] + assert_array_almost_equal(fft.fftshift(freqs, axes=1), shift_dim1) + assert_array_almost_equal(fft.ifftshift(shift_dim1, axes=1), freqs) + + # shift in both dimensions + shift_dim_both = [ + [5, 4], + [1, 0], + [3, 2] + ] + assert_array_almost_equal(fft.fftshift(freqs, axes=(0, 1)), shift_dim_both) + assert_array_almost_equal(fft.ifftshift(shift_dim_both, axes=(0, 1)), freqs) + assert_array_almost_equal(fft.fftshift(freqs, axes=[0, 1]), shift_dim_both) + assert_array_almost_equal(fft.ifftshift(shift_dim_both, axes=[0, 1]), freqs) + + # axes=None (default) shift in all dimensions + assert_array_almost_equal(fft.fftshift(freqs, axes=None), shift_dim_both) + assert_array_almost_equal(fft.ifftshift(shift_dim_both, axes=None), freqs) + assert_array_almost_equal(fft.fftshift(freqs), shift_dim_both) + assert_array_almost_equal(fft.ifftshift(shift_dim_both), freqs) + + def test_equal_to_original(self): + """ Test that the new (>=v1.15) implementation (see #10073) is equal to the original (<=v1.14) """ + from numpy.compat import integer_types + from numpy.core import asarray, concatenate, arange, take + + def original_fftshift(x, axes=None): + """ How fftshift was implemented in v1.14""" + tmp = asarray(x) + ndim = tmp.ndim + if axes is None: + axes = list(range(ndim)) + elif isinstance(axes, integer_types): + axes = (axes,) + y = tmp + for k in axes: + n = tmp.shape[k] + p2 = (n + 1) // 2 + mylist = concatenate((arange(p2, n), arange(p2))) + y = take(y, mylist, k) + return y + + def original_ifftshift(x, axes=None): + """ How ifftshift was implemented in v1.14 """ + tmp = asarray(x) + ndim = tmp.ndim + if axes is None: + axes = list(range(ndim)) + elif isinstance(axes, integer_types): + axes = (axes,) + y = tmp + for k in axes: + n = tmp.shape[k] + p2 = n - (n + 1) // 2 + mylist = concatenate((arange(p2, n), arange(p2))) + y = take(y, mylist, k) + return y + + # create possible 2d array combinations and try all possible keywords + # compare output to original functions + for i in range(16): + for j in range(16): + for axes_keyword in [0, 1, None, (0,), (0, 1)]: + inp = np.random.rand(i, j) + + assert_array_almost_equal(fft.fftshift(inp, axes_keyword), + original_fftshift(inp, axes_keyword)) + + assert_array_almost_equal(fft.ifftshift(inp, axes_keyword), + original_ifftshift(inp, axes_keyword)) + + +class TestFFTFreq(object): + + def test_definition(self): + x = [0, 1, 2, 3, 4, -4, -3, -2, -1] + assert_array_almost_equal(9*fft.fftfreq(9), x) + assert_array_almost_equal(9*pi*fft.fftfreq(9, pi), x) + x = [0, 1, 2, 3, 4, -5, -4, -3, -2, -1] + assert_array_almost_equal(10*fft.fftfreq(10), x) + assert_array_almost_equal(10*pi*fft.fftfreq(10, pi), x) + + +class TestRFFTFreq(object): + + def test_definition(self): + x = [0, 1, 2, 3, 4] + assert_array_almost_equal(9*fft.rfftfreq(9), x) + assert_array_almost_equal(9*pi*fft.rfftfreq(9, pi), x) + x = [0, 1, 2, 3, 4, 5] + assert_array_almost_equal(10*fft.rfftfreq(10), x) + assert_array_almost_equal(10*pi*fft.rfftfreq(10, pi), x) + + +class TestIRFFTN(object): + + def test_not_last_axis_success(self): + ar, ai = np.random.random((2, 16, 8, 32)) + a = ar + 1j*ai + + axes = (-2,) + + # Should not raise error + fft.irfftn(a, axes=axes) + + +class TestFFTCache(object): + + def test_basic_behaviour(self): + c = _FFTCache(max_size_in_mb=1, max_item_count=4) + + # Put + c.put_twiddle_factors(1, np.ones(2, dtype=np.float32)) + c.put_twiddle_factors(2, np.zeros(2, dtype=np.float32)) + + # Get + assert_array_almost_equal(c.pop_twiddle_factors(1), + np.ones(2, dtype=np.float32)) + assert_array_almost_equal(c.pop_twiddle_factors(2), + np.zeros(2, dtype=np.float32)) + + # Nothing should be left. + assert_equal(len(c._dict), 0) + + # Now put everything in twice so it can be retrieved once and each will + # still have one item left. + for _ in range(2): + c.put_twiddle_factors(1, np.ones(2, dtype=np.float32)) + c.put_twiddle_factors(2, np.zeros(2, dtype=np.float32)) + assert_array_almost_equal(c.pop_twiddle_factors(1), + np.ones(2, dtype=np.float32)) + assert_array_almost_equal(c.pop_twiddle_factors(2), + np.zeros(2, dtype=np.float32)) + assert_equal(len(c._dict), 2) + + def test_automatic_pruning(self): + # That's around 2600 single precision samples. + c = _FFTCache(max_size_in_mb=0.01, max_item_count=4) + + c.put_twiddle_factors(1, np.ones(200, dtype=np.float32)) + c.put_twiddle_factors(2, np.ones(200, dtype=np.float32)) + assert_equal(list(c._dict.keys()), [1, 2]) + + # This is larger than the limit but should still be kept. + c.put_twiddle_factors(3, np.ones(3000, dtype=np.float32)) + assert_equal(list(c._dict.keys()), [1, 2, 3]) + # Add one more. + c.put_twiddle_factors(4, np.ones(3000, dtype=np.float32)) + # The other three should no longer exist. + assert_equal(list(c._dict.keys()), [4]) + + # Now test the max item count pruning. + c = _FFTCache(max_size_in_mb=0.01, max_item_count=2) + c.put_twiddle_factors(2, np.empty(2)) + c.put_twiddle_factors(1, np.empty(2)) + # Can still be accessed. + assert_equal(list(c._dict.keys()), [2, 1]) + + c.put_twiddle_factors(3, np.empty(2)) + # 1 and 3 can still be accessed - c[2] has been touched least recently + # and is thus evicted. + assert_equal(list(c._dict.keys()), [1, 3]) + + # One last test. We will add a single large item that is slightly + # bigger then the cache size. Some small items can still be added. + c = _FFTCache(max_size_in_mb=0.01, max_item_count=5) + c.put_twiddle_factors(1, np.ones(3000, dtype=np.float32)) + c.put_twiddle_factors(2, np.ones(2, dtype=np.float32)) + c.put_twiddle_factors(3, np.ones(2, dtype=np.float32)) + c.put_twiddle_factors(4, np.ones(2, dtype=np.float32)) + assert_equal(list(c._dict.keys()), [1, 2, 3, 4]) + + # One more big item. This time it is 6 smaller ones but they are + # counted as one big item. + for _ in range(6): + c.put_twiddle_factors(5, np.ones(500, dtype=np.float32)) + # '1' no longer in the cache. Rest still in the cache. + assert_equal(list(c._dict.keys()), [2, 3, 4, 5]) + + # Another big item - should now be the only item in the cache. + c.put_twiddle_factors(6, np.ones(4000, dtype=np.float32)) + assert_equal(list(c._dict.keys()), [6]) diff --git a/contrib/python/numpy/py2/numpy/lib/setup.py b/contrib/python/numpy/py2/numpy/lib/setup.py new file mode 100644 index 0000000000..d342410b8a --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/setup.py @@ -0,0 +1,12 @@ +from __future__ import division, print_function + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + + config = Configuration('lib', parent_package, top_path) + config.add_data_dir('tests') + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/__init__.py b/contrib/python/numpy/py2/numpy/lib/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/lib/tests/data/py2-objarr.npy b/contrib/python/numpy/py2/numpy/lib/tests/data/py2-objarr.npy new file mode 100644 index 0000000000..12936c92d8 Binary files /dev/null and b/contrib/python/numpy/py2/numpy/lib/tests/data/py2-objarr.npy differ diff --git a/contrib/python/numpy/py2/numpy/lib/tests/data/py2-objarr.npz b/contrib/python/numpy/py2/numpy/lib/tests/data/py2-objarr.npz new file mode 100644 index 0000000000..68a3b53a1d Binary files /dev/null and b/contrib/python/numpy/py2/numpy/lib/tests/data/py2-objarr.npz differ diff --git a/contrib/python/numpy/py2/numpy/lib/tests/data/py3-objarr.npy b/contrib/python/numpy/py2/numpy/lib/tests/data/py3-objarr.npy new file mode 100644 index 0000000000..6776074b42 Binary files /dev/null and b/contrib/python/numpy/py2/numpy/lib/tests/data/py3-objarr.npy differ diff --git a/contrib/python/numpy/py2/numpy/lib/tests/data/py3-objarr.npz b/contrib/python/numpy/py2/numpy/lib/tests/data/py3-objarr.npz new file mode 100644 index 0000000000..05eac0b76d Binary files /dev/null and b/contrib/python/numpy/py2/numpy/lib/tests/data/py3-objarr.npz differ diff --git a/contrib/python/numpy/py2/numpy/lib/tests/data/python3.npy b/contrib/python/numpy/py2/numpy/lib/tests/data/python3.npy new file mode 100644 index 0000000000..7c6997dd69 Binary files /dev/null and b/contrib/python/numpy/py2/numpy/lib/tests/data/python3.npy differ diff --git a/contrib/python/numpy/py2/numpy/lib/tests/data/win64python2.npy b/contrib/python/numpy/py2/numpy/lib/tests/data/win64python2.npy new file mode 100644 index 0000000000..d9bc36af73 Binary files /dev/null and b/contrib/python/numpy/py2/numpy/lib/tests/data/win64python2.npy differ diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test__datasource.py b/contrib/python/numpy/py2/numpy/lib/tests/test__datasource.py new file mode 100644 index 0000000000..8eac16b589 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test__datasource.py @@ -0,0 +1,378 @@ +from __future__ import division, absolute_import, print_function + +import os +import sys +import pytest +from tempfile import mkdtemp, mkstemp, NamedTemporaryFile +from shutil import rmtree + +import numpy.lib._datasource as datasource +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_warns + ) + +if sys.version_info[0] >= 3: + import urllib.request as urllib_request + from urllib.parse import urlparse + from urllib.error import URLError +else: + import urllib2 as urllib_request + from urlparse import urlparse + from urllib2 import URLError + + +def urlopen_stub(url, data=None): + '''Stub to replace urlopen for testing.''' + if url == valid_httpurl(): + tmpfile = NamedTemporaryFile(prefix='urltmp_') + return tmpfile + else: + raise URLError('Name or service not known') + +# setup and teardown +old_urlopen = None + + +def setup_module(): + global old_urlopen + + old_urlopen = urllib_request.urlopen + urllib_request.urlopen = urlopen_stub + + +def teardown_module(): + urllib_request.urlopen = old_urlopen + +# A valid website for more robust testing +http_path = 'http://www.google.com/' +http_file = 'index.html' + +http_fakepath = 'http://fake.abc.web/site/' +http_fakefile = 'fake.txt' + +malicious_files = ['/etc/shadow', '../../shadow', + '..\\system.dat', 'c:\\windows\\system.dat'] + +magic_line = b'three is the magic number' + + +# Utility functions used by many tests +def valid_textfile(filedir): + # Generate and return a valid temporary file. + fd, path = mkstemp(suffix='.txt', prefix='dstmp_', dir=filedir, text=True) + os.close(fd) + return path + + +def invalid_textfile(filedir): + # Generate and return an invalid filename. + fd, path = mkstemp(suffix='.txt', prefix='dstmp_', dir=filedir) + os.close(fd) + os.remove(path) + return path + + +def valid_httpurl(): + return http_path+http_file + + +def invalid_httpurl(): + return http_fakepath+http_fakefile + + +def valid_baseurl(): + return http_path + + +def invalid_baseurl(): + return http_fakepath + + +def valid_httpfile(): + return http_file + + +def invalid_httpfile(): + return http_fakefile + + +class TestDataSourceOpen(object): + def setup(self): + self.tmpdir = mkdtemp() + self.ds = datasource.DataSource(self.tmpdir) + + def teardown(self): + rmtree(self.tmpdir) + del self.ds + + def test_ValidHTTP(self): + fh = self.ds.open(valid_httpurl()) + assert_(fh) + fh.close() + + def test_InvalidHTTP(self): + url = invalid_httpurl() + assert_raises(IOError, self.ds.open, url) + try: + self.ds.open(url) + except IOError as e: + # Regression test for bug fixed in r4342. + assert_(e.errno is None) + + def test_InvalidHTTPCacheURLError(self): + assert_raises(URLError, self.ds._cache, invalid_httpurl()) + + def test_ValidFile(self): + local_file = valid_textfile(self.tmpdir) + fh = self.ds.open(local_file) + assert_(fh) + fh.close() + + def test_InvalidFile(self): + invalid_file = invalid_textfile(self.tmpdir) + assert_raises(IOError, self.ds.open, invalid_file) + + def test_ValidGzipFile(self): + try: + import gzip + except ImportError: + # We don't have the gzip capabilities to test. + pytest.skip() + # Test datasource's internal file_opener for Gzip files. + filepath = os.path.join(self.tmpdir, 'foobar.txt.gz') + fp = gzip.open(filepath, 'w') + fp.write(magic_line) + fp.close() + fp = self.ds.open(filepath) + result = fp.readline() + fp.close() + assert_equal(magic_line, result) + + def test_ValidBz2File(self): + try: + import bz2 + except ImportError: + # We don't have the bz2 capabilities to test. + pytest.skip() + # Test datasource's internal file_opener for BZip2 files. + filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2') + fp = bz2.BZ2File(filepath, 'w') + fp.write(magic_line) + fp.close() + fp = self.ds.open(filepath) + result = fp.readline() + fp.close() + assert_equal(magic_line, result) + + @pytest.mark.skipif(sys.version_info[0] >= 3, reason="Python 2 only") + def test_Bz2File_text_mode_warning(self): + try: + import bz2 + except ImportError: + # We don't have the bz2 capabilities to test. + pytest.skip() + # Test datasource's internal file_opener for BZip2 files. + filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2') + fp = bz2.BZ2File(filepath, 'w') + fp.write(magic_line) + fp.close() + with assert_warns(RuntimeWarning): + fp = self.ds.open(filepath, 'rt') + result = fp.readline() + fp.close() + assert_equal(magic_line, result) + + +class TestDataSourceExists(object): + def setup(self): + self.tmpdir = mkdtemp() + self.ds = datasource.DataSource(self.tmpdir) + + def teardown(self): + rmtree(self.tmpdir) + del self.ds + + def test_ValidHTTP(self): + assert_(self.ds.exists(valid_httpurl())) + + def test_InvalidHTTP(self): + assert_equal(self.ds.exists(invalid_httpurl()), False) + + def test_ValidFile(self): + # Test valid file in destpath + tmpfile = valid_textfile(self.tmpdir) + assert_(self.ds.exists(tmpfile)) + # Test valid local file not in destpath + localdir = mkdtemp() + tmpfile = valid_textfile(localdir) + assert_(self.ds.exists(tmpfile)) + rmtree(localdir) + + def test_InvalidFile(self): + tmpfile = invalid_textfile(self.tmpdir) + assert_equal(self.ds.exists(tmpfile), False) + + +class TestDataSourceAbspath(object): + def setup(self): + self.tmpdir = os.path.abspath(mkdtemp()) + self.ds = datasource.DataSource(self.tmpdir) + + def teardown(self): + rmtree(self.tmpdir) + del self.ds + + def test_ValidHTTP(self): + scheme, netloc, upath, pms, qry, frg = urlparse(valid_httpurl()) + local_path = os.path.join(self.tmpdir, netloc, + upath.strip(os.sep).strip('/')) + assert_equal(local_path, self.ds.abspath(valid_httpurl())) + + def test_ValidFile(self): + tmpfile = valid_textfile(self.tmpdir) + tmpfilename = os.path.split(tmpfile)[-1] + # Test with filename only + assert_equal(tmpfile, self.ds.abspath(tmpfilename)) + # Test filename with complete path + assert_equal(tmpfile, self.ds.abspath(tmpfile)) + + def test_InvalidHTTP(self): + scheme, netloc, upath, pms, qry, frg = urlparse(invalid_httpurl()) + invalidhttp = os.path.join(self.tmpdir, netloc, + upath.strip(os.sep).strip('/')) + assert_(invalidhttp != self.ds.abspath(valid_httpurl())) + + def test_InvalidFile(self): + invalidfile = valid_textfile(self.tmpdir) + tmpfile = valid_textfile(self.tmpdir) + tmpfilename = os.path.split(tmpfile)[-1] + # Test with filename only + assert_(invalidfile != self.ds.abspath(tmpfilename)) + # Test filename with complete path + assert_(invalidfile != self.ds.abspath(tmpfile)) + + def test_sandboxing(self): + tmpfile = valid_textfile(self.tmpdir) + tmpfilename = os.path.split(tmpfile)[-1] + + tmp_path = lambda x: os.path.abspath(self.ds.abspath(x)) + + assert_(tmp_path(valid_httpurl()).startswith(self.tmpdir)) + assert_(tmp_path(invalid_httpurl()).startswith(self.tmpdir)) + assert_(tmp_path(tmpfile).startswith(self.tmpdir)) + assert_(tmp_path(tmpfilename).startswith(self.tmpdir)) + for fn in malicious_files: + assert_(tmp_path(http_path+fn).startswith(self.tmpdir)) + assert_(tmp_path(fn).startswith(self.tmpdir)) + + def test_windows_os_sep(self): + orig_os_sep = os.sep + try: + os.sep = '\\' + self.test_ValidHTTP() + self.test_ValidFile() + self.test_InvalidHTTP() + self.test_InvalidFile() + self.test_sandboxing() + finally: + os.sep = orig_os_sep + + +class TestRepositoryAbspath(object): + def setup(self): + self.tmpdir = os.path.abspath(mkdtemp()) + self.repos = datasource.Repository(valid_baseurl(), self.tmpdir) + + def teardown(self): + rmtree(self.tmpdir) + del self.repos + + def test_ValidHTTP(self): + scheme, netloc, upath, pms, qry, frg = urlparse(valid_httpurl()) + local_path = os.path.join(self.repos._destpath, netloc, + upath.strip(os.sep).strip('/')) + filepath = self.repos.abspath(valid_httpfile()) + assert_equal(local_path, filepath) + + def test_sandboxing(self): + tmp_path = lambda x: os.path.abspath(self.repos.abspath(x)) + assert_(tmp_path(valid_httpfile()).startswith(self.tmpdir)) + for fn in malicious_files: + assert_(tmp_path(http_path+fn).startswith(self.tmpdir)) + assert_(tmp_path(fn).startswith(self.tmpdir)) + + def test_windows_os_sep(self): + orig_os_sep = os.sep + try: + os.sep = '\\' + self.test_ValidHTTP() + self.test_sandboxing() + finally: + os.sep = orig_os_sep + + +class TestRepositoryExists(object): + def setup(self): + self.tmpdir = mkdtemp() + self.repos = datasource.Repository(valid_baseurl(), self.tmpdir) + + def teardown(self): + rmtree(self.tmpdir) + del self.repos + + def test_ValidFile(self): + # Create local temp file + tmpfile = valid_textfile(self.tmpdir) + assert_(self.repos.exists(tmpfile)) + + def test_InvalidFile(self): + tmpfile = invalid_textfile(self.tmpdir) + assert_equal(self.repos.exists(tmpfile), False) + + def test_RemoveHTTPFile(self): + assert_(self.repos.exists(valid_httpurl())) + + def test_CachedHTTPFile(self): + localfile = valid_httpurl() + # Create a locally cached temp file with an URL based + # directory structure. This is similar to what Repository.open + # would do. + scheme, netloc, upath, pms, qry, frg = urlparse(localfile) + local_path = os.path.join(self.repos._destpath, netloc) + os.mkdir(local_path, 0o0700) + tmpfile = valid_textfile(local_path) + assert_(self.repos.exists(tmpfile)) + + +class TestOpenFunc(object): + def setup(self): + self.tmpdir = mkdtemp() + + def teardown(self): + rmtree(self.tmpdir) + + def test_DataSourceOpen(self): + local_file = valid_textfile(self.tmpdir) + # Test case where destpath is passed in + fp = datasource.open(local_file, destpath=self.tmpdir) + assert_(fp) + fp.close() + # Test case where default destpath is used + fp = datasource.open(local_file) + assert_(fp) + fp.close() + +def test_del_attr_handling(): + # DataSource __del__ can be called + # even if __init__ fails when the + # Exception object is caught by the + # caller as happens in refguide_check + # is_deprecated() function + + ds = datasource.DataSource() + # simulate failed __init__ by removing key attribute + # produced within __init__ and expected by __del__ + del ds._istmpdest + # should not raise an AttributeError if __del__ + # gracefully handles failed __init__: + ds.__del__() diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test__iotools.py b/contrib/python/numpy/py2/numpy/lib/tests/test__iotools.py new file mode 100644 index 0000000000..e04fdc8080 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test__iotools.py @@ -0,0 +1,352 @@ +from __future__ import division, absolute_import, print_function + +import time +from datetime import date + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_allclose, assert_raises, + ) +from numpy.lib._iotools import ( + LineSplitter, NameValidator, StringConverter, + has_nested_fields, easy_dtype, flatten_dtype + ) +from numpy.compat import unicode + + +class TestLineSplitter(object): + "Tests the LineSplitter class." + + def test_no_delimiter(self): + "Test LineSplitter w/o delimiter" + strg = " 1 2 3 4 5 # test" + test = LineSplitter()(strg) + assert_equal(test, ['1', '2', '3', '4', '5']) + test = LineSplitter('')(strg) + assert_equal(test, ['1', '2', '3', '4', '5']) + + def test_space_delimiter(self): + "Test space delimiter" + strg = " 1 2 3 4 5 # test" + test = LineSplitter(' ')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + test = LineSplitter(' ')(strg) + assert_equal(test, ['1 2 3 4', '5']) + + def test_tab_delimiter(self): + "Test tab delimiter" + strg = " 1\t 2\t 3\t 4\t 5 6" + test = LineSplitter('\t')(strg) + assert_equal(test, ['1', '2', '3', '4', '5 6']) + strg = " 1 2\t 3 4\t 5 6" + test = LineSplitter('\t')(strg) + assert_equal(test, ['1 2', '3 4', '5 6']) + + def test_other_delimiter(self): + "Test LineSplitter on delimiter" + strg = "1,2,3,4,,5" + test = LineSplitter(',')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + # + strg = " 1,2,3,4,,5 # test" + test = LineSplitter(',')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + + # gh-11028 bytes comment/delimiters should get encoded + strg = b" 1,2,3,4,,5 % test" + test = LineSplitter(delimiter=b',', comments=b'%')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + + def test_constant_fixed_width(self): + "Test LineSplitter w/ fixed-width fields" + strg = " 1 2 3 4 5 # test" + test = LineSplitter(3)(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5', '']) + # + strg = " 1 3 4 5 6# test" + test = LineSplitter(20)(strg) + assert_equal(test, ['1 3 4 5 6']) + # + strg = " 1 3 4 5 6# test" + test = LineSplitter(30)(strg) + assert_equal(test, ['1 3 4 5 6']) + + def test_variable_fixed_width(self): + strg = " 1 3 4 5 6# test" + test = LineSplitter((3, 6, 6, 3))(strg) + assert_equal(test, ['1', '3', '4 5', '6']) + # + strg = " 1 3 4 5 6# test" + test = LineSplitter((6, 6, 9))(strg) + assert_equal(test, ['1', '3 4', '5 6']) + +# ----------------------------------------------------------------------------- + + +class TestNameValidator(object): + + def test_case_sensitivity(self): + "Test case sensitivity" + names = ['A', 'a', 'b', 'c'] + test = NameValidator().validate(names) + assert_equal(test, ['A', 'a', 'b', 'c']) + test = NameValidator(case_sensitive=False).validate(names) + assert_equal(test, ['A', 'A_1', 'B', 'C']) + test = NameValidator(case_sensitive='upper').validate(names) + assert_equal(test, ['A', 'A_1', 'B', 'C']) + test = NameValidator(case_sensitive='lower').validate(names) + assert_equal(test, ['a', 'a_1', 'b', 'c']) + + # check exceptions + assert_raises(ValueError, NameValidator, case_sensitive='foobar') + + def test_excludelist(self): + "Test excludelist" + names = ['dates', 'data', 'Other Data', 'mask'] + validator = NameValidator(excludelist=['dates', 'data', 'mask']) + test = validator.validate(names) + assert_equal(test, ['dates_', 'data_', 'Other_Data', 'mask_']) + + def test_missing_names(self): + "Test validate missing names" + namelist = ('a', 'b', 'c') + validator = NameValidator() + assert_equal(validator(namelist), ['a', 'b', 'c']) + namelist = ('', 'b', 'c') + assert_equal(validator(namelist), ['f0', 'b', 'c']) + namelist = ('a', 'b', '') + assert_equal(validator(namelist), ['a', 'b', 'f0']) + namelist = ('', 'f0', '') + assert_equal(validator(namelist), ['f1', 'f0', 'f2']) + + def test_validate_nb_names(self): + "Test validate nb names" + namelist = ('a', 'b', 'c') + validator = NameValidator() + assert_equal(validator(namelist, nbfields=1), ('a',)) + assert_equal(validator(namelist, nbfields=5, defaultfmt="g%i"), + ['a', 'b', 'c', 'g0', 'g1']) + + def test_validate_wo_names(self): + "Test validate no names" + namelist = None + validator = NameValidator() + assert_(validator(namelist) is None) + assert_equal(validator(namelist, nbfields=3), ['f0', 'f1', 'f2']) + +# ----------------------------------------------------------------------------- + + +def _bytes_to_date(s): + return date(*time.strptime(s, "%Y-%m-%d")[:3]) + + +class TestStringConverter(object): + "Test StringConverter" + + def test_creation(self): + "Test creation of a StringConverter" + converter = StringConverter(int, -99999) + assert_equal(converter._status, 1) + assert_equal(converter.default, -99999) + + def test_upgrade(self): + "Tests the upgrade method." + + converter = StringConverter() + assert_equal(converter._status, 0) + + # test int + assert_equal(converter.upgrade('0'), 0) + assert_equal(converter._status, 1) + + # On systems where long defaults to 32-bit, the statuses will be + # offset by one, so we check for this here. + import numpy.core.numeric as nx + status_offset = int(nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize) + + # test int > 2**32 + assert_equal(converter.upgrade('17179869184'), 17179869184) + assert_equal(converter._status, 1 + status_offset) + + # test float + assert_allclose(converter.upgrade('0.'), 0.0) + assert_equal(converter._status, 2 + status_offset) + + # test complex + assert_equal(converter.upgrade('0j'), complex('0j')) + assert_equal(converter._status, 3 + status_offset) + + # test str + # note that the longdouble type has been skipped, so the + # _status increases by 2. Everything should succeed with + # unicode conversion (5). + for s in ['a', u'a', b'a']: + res = converter.upgrade(s) + assert_(type(res) is unicode) + assert_equal(res, u'a') + assert_equal(converter._status, 5 + status_offset) + + def test_missing(self): + "Tests the use of missing values." + converter = StringConverter(missing_values=('missing', + 'missed')) + converter.upgrade('0') + assert_equal(converter('0'), 0) + assert_equal(converter(''), converter.default) + assert_equal(converter('missing'), converter.default) + assert_equal(converter('missed'), converter.default) + try: + converter('miss') + except ValueError: + pass + + def test_upgrademapper(self): + "Tests updatemapper" + dateparser = _bytes_to_date + StringConverter.upgrade_mapper(dateparser, date(2000, 1, 1)) + convert = StringConverter(dateparser, date(2000, 1, 1)) + test = convert('2001-01-01') + assert_equal(test, date(2001, 1, 1)) + test = convert('2009-01-01') + assert_equal(test, date(2009, 1, 1)) + test = convert('') + assert_equal(test, date(2000, 1, 1)) + + def test_string_to_object(self): + "Make sure that string-to-object functions are properly recognized" + old_mapper = StringConverter._mapper[:] # copy of list + conv = StringConverter(_bytes_to_date) + assert_equal(conv._mapper, old_mapper) + assert_(hasattr(conv, 'default')) + + def test_keep_default(self): + "Make sure we don't lose an explicit default" + converter = StringConverter(None, missing_values='', + default=-999) + converter.upgrade('3.14159265') + assert_equal(converter.default, -999) + assert_equal(converter.type, np.dtype(float)) + # + converter = StringConverter( + None, missing_values='', default=0) + converter.upgrade('3.14159265') + assert_equal(converter.default, 0) + assert_equal(converter.type, np.dtype(float)) + + def test_keep_default_zero(self): + "Check that we don't lose a default of 0" + converter = StringConverter(int, default=0, + missing_values="N/A") + assert_equal(converter.default, 0) + + def test_keep_missing_values(self): + "Check that we're not losing missing values" + converter = StringConverter(int, default=0, + missing_values="N/A") + assert_equal( + converter.missing_values, {'', 'N/A'}) + + def test_int64_dtype(self): + "Check that int64 integer types can be specified" + converter = StringConverter(np.int64, default=0) + val = "-9223372036854775807" + assert_(converter(val) == -9223372036854775807) + val = "9223372036854775807" + assert_(converter(val) == 9223372036854775807) + + def test_uint64_dtype(self): + "Check that uint64 integer types can be specified" + converter = StringConverter(np.uint64, default=0) + val = "9223372043271415339" + assert_(converter(val) == 9223372043271415339) + + +class TestMiscFunctions(object): + + def test_has_nested_dtype(self): + "Test has_nested_dtype" + ndtype = np.dtype(float) + assert_equal(has_nested_fields(ndtype), False) + ndtype = np.dtype([('A', '|S3'), ('B', float)]) + assert_equal(has_nested_fields(ndtype), False) + ndtype = np.dtype([('A', int), ('B', [('BA', float), ('BB', '|S1')])]) + assert_equal(has_nested_fields(ndtype), True) + + def test_easy_dtype(self): + "Test ndtype on dtypes" + # Simple case + ndtype = float + assert_equal(easy_dtype(ndtype), np.dtype(float)) + # As string w/o names + ndtype = "i4, f8" + assert_equal(easy_dtype(ndtype), + np.dtype([('f0', "i4"), ('f1', "f8")])) + # As string w/o names but different default format + assert_equal(easy_dtype(ndtype, defaultfmt="field_%03i"), + np.dtype([('field_000', "i4"), ('field_001', "f8")])) + # As string w/ names + ndtype = "i4, f8" + assert_equal(easy_dtype(ndtype, names="a, b"), + np.dtype([('a', "i4"), ('b', "f8")])) + # As string w/ names (too many) + ndtype = "i4, f8" + assert_equal(easy_dtype(ndtype, names="a, b, c"), + np.dtype([('a', "i4"), ('b', "f8")])) + # As string w/ names (not enough) + ndtype = "i4, f8" + assert_equal(easy_dtype(ndtype, names=", b"), + np.dtype([('f0', "i4"), ('b', "f8")])) + # ... (with different default format) + assert_equal(easy_dtype(ndtype, names="a", defaultfmt="f%02i"), + np.dtype([('a', "i4"), ('f00', "f8")])) + # As list of tuples w/o names + ndtype = [('A', int), ('B', float)] + assert_equal(easy_dtype(ndtype), np.dtype([('A', int), ('B', float)])) + # As list of tuples w/ names + assert_equal(easy_dtype(ndtype, names="a,b"), + np.dtype([('a', int), ('b', float)])) + # As list of tuples w/ not enough names + assert_equal(easy_dtype(ndtype, names="a"), + np.dtype([('a', int), ('f0', float)])) + # As list of tuples w/ too many names + assert_equal(easy_dtype(ndtype, names="a,b,c"), + np.dtype([('a', int), ('b', float)])) + # As list of types w/o names + ndtype = (int, float, float) + assert_equal(easy_dtype(ndtype), + np.dtype([('f0', int), ('f1', float), ('f2', float)])) + # As list of types w names + ndtype = (int, float, float) + assert_equal(easy_dtype(ndtype, names="a, b, c"), + np.dtype([('a', int), ('b', float), ('c', float)])) + # As simple dtype w/ names + ndtype = np.dtype(float) + assert_equal(easy_dtype(ndtype, names="a, b, c"), + np.dtype([(_, float) for _ in ('a', 'b', 'c')])) + # As simple dtype w/o names (but multiple fields) + ndtype = np.dtype(float) + assert_equal( + easy_dtype(ndtype, names=['', '', ''], defaultfmt="f%02i"), + np.dtype([(_, float) for _ in ('f00', 'f01', 'f02')])) + + def test_flatten_dtype(self): + "Testing flatten_dtype" + # Standard dtype + dt = np.dtype([("a", "f8"), ("b", "f8")]) + dt_flat = flatten_dtype(dt) + assert_equal(dt_flat, [float, float]) + # Recursive dtype + dt = np.dtype([("a", [("aa", '|S1'), ("ab", '|S2')]), ("b", int)]) + dt_flat = flatten_dtype(dt) + assert_equal(dt_flat, [np.dtype('|S1'), np.dtype('|S2'), int]) + # dtype with shaped fields + dt = np.dtype([("a", (float, 2)), ("b", (int, 3))]) + dt_flat = flatten_dtype(dt) + assert_equal(dt_flat, [float, int]) + dt_flat = flatten_dtype(dt, True) + assert_equal(dt_flat, [float] * 2 + [int] * 3) + # dtype w/ titles + dt = np.dtype([(("a", "A"), "f8"), (("b", "B"), "f8")]) + dt_flat = flatten_dtype(dt) + assert_equal(dt_flat, [float, float]) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test__version.py b/contrib/python/numpy/py2/numpy/lib/tests/test__version.py new file mode 100644 index 0000000000..8e66a0c032 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test__version.py @@ -0,0 +1,66 @@ +"""Tests for the NumpyVersion class. + +""" +from __future__ import division, absolute_import, print_function + +from numpy.testing import assert_, assert_raises +from numpy.lib import NumpyVersion + + +def test_main_versions(): + assert_(NumpyVersion('1.8.0') == '1.8.0') + for ver in ['1.9.0', '2.0.0', '1.8.1']: + assert_(NumpyVersion('1.8.0') < ver) + + for ver in ['1.7.0', '1.7.1', '0.9.9']: + assert_(NumpyVersion('1.8.0') > ver) + + +def test_version_1_point_10(): + # regression test for gh-2998. + assert_(NumpyVersion('1.9.0') < '1.10.0') + assert_(NumpyVersion('1.11.0') < '1.11.1') + assert_(NumpyVersion('1.11.0') == '1.11.0') + assert_(NumpyVersion('1.99.11') < '1.99.12') + + +def test_alpha_beta_rc(): + assert_(NumpyVersion('1.8.0rc1') == '1.8.0rc1') + for ver in ['1.8.0', '1.8.0rc2']: + assert_(NumpyVersion('1.8.0rc1') < ver) + + for ver in ['1.8.0a2', '1.8.0b3', '1.7.2rc4']: + assert_(NumpyVersion('1.8.0rc1') > ver) + + assert_(NumpyVersion('1.8.0b1') > '1.8.0a2') + + +def test_dev_version(): + assert_(NumpyVersion('1.9.0.dev-Unknown') < '1.9.0') + for ver in ['1.9.0', '1.9.0a1', '1.9.0b2', '1.9.0b2.dev-ffffffff']: + assert_(NumpyVersion('1.9.0.dev-f16acvda') < ver) + + assert_(NumpyVersion('1.9.0.dev-f16acvda') == '1.9.0.dev-11111111') + + +def test_dev_a_b_rc_mixed(): + assert_(NumpyVersion('1.9.0a2.dev-f16acvda') == '1.9.0a2.dev-11111111') + assert_(NumpyVersion('1.9.0a2.dev-6acvda54') < '1.9.0a2') + + +def test_dev0_version(): + assert_(NumpyVersion('1.9.0.dev0+Unknown') < '1.9.0') + for ver in ['1.9.0', '1.9.0a1', '1.9.0b2', '1.9.0b2.dev0+ffffffff']: + assert_(NumpyVersion('1.9.0.dev0+f16acvda') < ver) + + assert_(NumpyVersion('1.9.0.dev0+f16acvda') == '1.9.0.dev0+11111111') + + +def test_dev0_a_b_rc_mixed(): + assert_(NumpyVersion('1.9.0a2.dev0+f16acvda') == '1.9.0a2.dev0+11111111') + assert_(NumpyVersion('1.9.0a2.dev0+6acvda54') < '1.9.0a2') + + +def test_raises(): + for ver in ['1.9', '1,9.0', '1.7.x']: + assert_raises(ValueError, NumpyVersion, ver) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_arraypad.py b/contrib/python/numpy/py2/numpy/lib/tests/test_arraypad.py new file mode 100644 index 0000000000..6620db8df3 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_arraypad.py @@ -0,0 +1,1286 @@ +"""Tests for the array padding functions. + +""" +from __future__ import division, absolute_import, print_function + +import pytest + +import numpy as np +from numpy.testing import (assert_array_equal, assert_raises, assert_allclose, + assert_equal) +from numpy.lib import pad +from numpy.lib.arraypad import _as_pairs + + +class TestAsPairs(object): + + def test_single_value(self): + """Test casting for a single value.""" + expected = np.array([[3, 3]] * 10) + for x in (3, [3], [[3]]): + result = _as_pairs(x, 10) + assert_equal(result, expected) + # Test with dtype=object + obj = object() + assert_equal( + _as_pairs(obj, 10), + np.array([[obj, obj]] * 10) + ) + + def test_two_values(self): + """Test proper casting for two different values.""" + # Broadcasting in the first dimension with numbers + expected = np.array([[3, 4]] * 10) + for x in ([3, 4], [[3, 4]]): + result = _as_pairs(x, 10) + assert_equal(result, expected) + # and with dtype=object + obj = object() + assert_equal( + _as_pairs(["a", obj], 10), + np.array([["a", obj]] * 10) + ) + + # Broadcasting in the second / last dimension with numbers + assert_equal( + _as_pairs([[3], [4]], 2), + np.array([[3, 3], [4, 4]]) + ) + # and with dtype=object + assert_equal( + _as_pairs([["a"], [obj]], 2), + np.array([["a", "a"], [obj, obj]]) + ) + + def test_with_none(self): + expected = ((None, None), (None, None), (None, None)) + assert_equal( + _as_pairs(None, 3, as_index=False), + expected + ) + assert_equal( + _as_pairs(None, 3, as_index=True), + expected + ) + + def test_pass_through(self): + """Test if `x` already matching desired output are passed through.""" + expected = np.arange(12).reshape((6, 2)) + assert_equal( + _as_pairs(expected, 6), + expected + ) + + def test_as_index(self): + """Test results if `as_index=True`.""" + assert_equal( + _as_pairs([2.6, 3.3], 10, as_index=True), + np.array([[3, 3]] * 10, dtype=np.intp) + ) + assert_equal( + _as_pairs([2.6, 4.49], 10, as_index=True), + np.array([[3, 4]] * 10, dtype=np.intp) + ) + for x in (-3, [-3], [[-3]], [-3, 4], [3, -4], [[-3, 4]], [[4, -3]], + [[1, 2]] * 9 + [[1, -2]]): + with pytest.raises(ValueError, match="negative values"): + _as_pairs(x, 10, as_index=True) + + def test_exceptions(self): + """Ensure faulty usage is discovered.""" + with pytest.raises(ValueError, match="more dimensions than allowed"): + _as_pairs([[[3]]], 10) + with pytest.raises(ValueError, match="could not be broadcast"): + _as_pairs([[1, 2], [3, 4]], 3) + with pytest.raises(ValueError, match="could not be broadcast"): + _as_pairs(np.ones((2, 3)), 3) + + +class TestConditionalShortcuts(object): + def test_zero_padding_shortcuts(self): + test = np.arange(120).reshape(4, 5, 6) + pad_amt = [(0, 0) for axis in test.shape] + modes = ['constant', + 'edge', + 'linear_ramp', + 'maximum', + 'mean', + 'median', + 'minimum', + 'reflect', + 'symmetric', + 'wrap', + ] + for mode in modes: + assert_array_equal(test, pad(test, pad_amt, mode=mode)) + + def test_shallow_statistic_range(self): + test = np.arange(120).reshape(4, 5, 6) + pad_amt = [(1, 1) for axis in test.shape] + modes = ['maximum', + 'mean', + 'median', + 'minimum', + ] + for mode in modes: + assert_array_equal(pad(test, pad_amt, mode='edge'), + pad(test, pad_amt, mode=mode, stat_length=1)) + + def test_clip_statistic_range(self): + test = np.arange(30).reshape(5, 6) + pad_amt = [(3, 3) for axis in test.shape] + modes = ['maximum', + 'mean', + 'median', + 'minimum', + ] + for mode in modes: + assert_array_equal(pad(test, pad_amt, mode=mode), + pad(test, pad_amt, mode=mode, stat_length=30)) + + +class TestStatistic(object): + def test_check_mean_stat_length(self): + a = np.arange(100).astype('f') + a = pad(a, ((25, 20), ), 'mean', stat_length=((2, 3), )) + b = np.array( + [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, + 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, + 0.5, 0.5, 0.5, 0.5, 0.5, + + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., + 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., + 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., + 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., + 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., + 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., + 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., + 90., 91., 92., 93., 94., 95., 96., 97., 98., 99., + + 98., 98., 98., 98., 98., 98., 98., 98., 98., 98., + 98., 98., 98., 98., 98., 98., 98., 98., 98., 98. + ]) + assert_array_equal(a, b) + + def test_check_maximum_1(self): + a = np.arange(100) + a = pad(a, (25, 20), 'maximum') + b = np.array( + [99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99] + ) + assert_array_equal(a, b) + + def test_check_maximum_2(self): + a = np.arange(100) + 1 + a = pad(a, (25, 20), 'maximum') + b = np.array( + [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, + + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, + + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100] + ) + assert_array_equal(a, b) + + def test_check_maximum_stat_length(self): + a = np.arange(100) + 1 + a = pad(a, (25, 20), 'maximum', stat_length=10) + b = np.array( + [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, + + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, + + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100] + ) + assert_array_equal(a, b) + + def test_check_minimum_1(self): + a = np.arange(100) + a = pad(a, (25, 20), 'minimum') + b = np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ) + assert_array_equal(a, b) + + def test_check_minimum_2(self): + a = np.arange(100) + 2 + a = pad(a, (25, 20), 'minimum') + b = np.array( + [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, + + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, + 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, + 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, + + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] + ) + assert_array_equal(a, b) + + def test_check_minimum_stat_length(self): + a = np.arange(100) + 1 + a = pad(a, (25, 20), 'minimum', stat_length=10) + b = np.array( + [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, + + 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, + 91, 91, 91, 91, 91, 91, 91, 91, 91, 91] + ) + assert_array_equal(a, b) + + def test_check_median(self): + a = np.arange(100).astype('f') + a = pad(a, (25, 20), 'median') + b = np.array( + [49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, + + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., + 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., + 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., + 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., + 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., + 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., + 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., + 90., 91., 92., 93., 94., 95., 96., 97., 98., 99., + + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5] + ) + assert_array_equal(a, b) + + def test_check_median_01(self): + a = np.array([[3, 1, 4], [4, 5, 9], [9, 8, 2]]) + a = pad(a, 1, 'median') + b = np.array( + [[4, 4, 5, 4, 4], + + [3, 3, 1, 4, 3], + [5, 4, 5, 9, 5], + [8, 9, 8, 2, 8], + + [4, 4, 5, 4, 4]] + ) + assert_array_equal(a, b) + + def test_check_median_02(self): + a = np.array([[3, 1, 4], [4, 5, 9], [9, 8, 2]]) + a = pad(a.T, 1, 'median').T + b = np.array( + [[5, 4, 5, 4, 5], + + [3, 3, 1, 4, 3], + [5, 4, 5, 9, 5], + [8, 9, 8, 2, 8], + + [5, 4, 5, 4, 5]] + ) + assert_array_equal(a, b) + + def test_check_median_stat_length(self): + a = np.arange(100).astype('f') + a[1] = 2. + a[97] = 96. + a = pad(a, (25, 20), 'median', stat_length=(3, 5)) + b = np.array( + [ 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., + 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., + 2., 2., 2., 2., 2., + + 0., 2., 2., 3., 4., 5., 6., 7., 8., 9., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., + 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., + 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., + 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., + 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., + 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., + 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., + 90., 91., 92., 93., 94., 95., 96., 96., 98., 99., + + 96., 96., 96., 96., 96., 96., 96., 96., 96., 96., + 96., 96., 96., 96., 96., 96., 96., 96., 96., 96.] + ) + assert_array_equal(a, b) + + def test_check_mean_shape_one(self): + a = [[4, 5, 6]] + a = pad(a, (5, 7), 'mean', stat_length=2) + b = np.array( + [[4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], + [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6]] + ) + assert_array_equal(a, b) + + def test_check_mean_2(self): + a = np.arange(100).astype('f') + a = pad(a, (25, 20), 'mean') + b = np.array( + [49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, + + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., + 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., + 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., + 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., + 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., + 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., + 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., + 90., 91., 92., 93., 94., 95., 96., 97., 98., 99., + + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, + 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5] + ) + assert_array_equal(a, b) + + @pytest.mark.parametrize("mode", [ + pytest.param("mean", marks=pytest.mark.xfail(reason="gh-11216")), + "median", + "minimum", + "maximum" + ]) + def test_same_prepend_append(self, mode): + """ Test that appended and prepended values are equal """ + # This test is constructed to trigger floating point rounding errors in + # a way that caused gh-11216 for mode=='mean' + a = np.array([-1, 2, -1]) + np.array([0, 1e-12, 0], dtype=np.float64) + a = np.pad(a, (1, 1), mode) + assert_equal(a[0], a[-1]) + + +class TestConstant(object): + def test_check_constant(self): + a = np.arange(100) + a = pad(a, (25, 20), 'constant', constant_values=(10, 20)) + b = np.array( + [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20] + ) + assert_array_equal(a, b) + + def test_check_constant_zeros(self): + a = np.arange(100) + a = pad(a, (25, 20), 'constant') + b = np.array( + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ) + assert_array_equal(a, b) + + def test_check_constant_float(self): + # If input array is int, but constant_values are float, the dtype of + # the array to be padded is kept + arr = np.arange(30).reshape(5, 6) + test = pad(arr, (1, 2), mode='constant', + constant_values=1.1) + expected = np.array( + [[ 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [ 1, 0, 1, 2, 3, 4, 5, 1, 1], + [ 1, 6, 7, 8, 9, 10, 11, 1, 1], + [ 1, 12, 13, 14, 15, 16, 17, 1, 1], + [ 1, 18, 19, 20, 21, 22, 23, 1, 1], + [ 1, 24, 25, 26, 27, 28, 29, 1, 1], + + [ 1, 1, 1, 1, 1, 1, 1, 1, 1], + [ 1, 1, 1, 1, 1, 1, 1, 1, 1]] + ) + assert_allclose(test, expected) + + def test_check_constant_float2(self): + # If input array is float, and constant_values are float, the dtype of + # the array to be padded is kept - here retaining the float constants + arr = np.arange(30).reshape(5, 6) + arr_float = arr.astype(np.float64) + test = pad(arr_float, ((1, 2), (1, 2)), mode='constant', + constant_values=1.1) + expected = np.array( + [[ 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1], + + [ 1.1, 0. , 1. , 2. , 3. , 4. , 5. , 1.1, 1.1], + [ 1.1, 6. , 7. , 8. , 9. , 10. , 11. , 1.1, 1.1], + [ 1.1, 12. , 13. , 14. , 15. , 16. , 17. , 1.1, 1.1], + [ 1.1, 18. , 19. , 20. , 21. , 22. , 23. , 1.1, 1.1], + [ 1.1, 24. , 25. , 26. , 27. , 28. , 29. , 1.1, 1.1], + + [ 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1], + [ 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1]] + ) + assert_allclose(test, expected) + + def test_check_constant_float3(self): + a = np.arange(100, dtype=float) + a = pad(a, (25, 20), 'constant', constant_values=(-1.1, -1.2)) + b = np.array( + [-1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, + -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, + -1.1, -1.1, -1.1, -1.1, -1.1, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, + -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2] + ) + assert_allclose(a, b) + + def test_check_constant_odd_pad_amount(self): + arr = np.arange(30).reshape(5, 6) + test = pad(arr, ((1,), (2,)), mode='constant', + constant_values=3) + expected = np.array( + [[ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], + + [ 3, 3, 0, 1, 2, 3, 4, 5, 3, 3], + [ 3, 3, 6, 7, 8, 9, 10, 11, 3, 3], + [ 3, 3, 12, 13, 14, 15, 16, 17, 3, 3], + [ 3, 3, 18, 19, 20, 21, 22, 23, 3, 3], + [ 3, 3, 24, 25, 26, 27, 28, 29, 3, 3], + + [ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]] + ) + assert_allclose(test, expected) + + def test_check_constant_pad_2d(self): + arr = np.arange(4).reshape(2, 2) + test = np.lib.pad(arr, ((1, 2), (1, 3)), mode='constant', + constant_values=((1, 2), (3, 4))) + expected = np.array( + [[3, 1, 1, 4, 4, 4], + [3, 0, 1, 4, 4, 4], + [3, 2, 3, 4, 4, 4], + [3, 2, 2, 4, 4, 4], + [3, 2, 2, 4, 4, 4]] + ) + assert_allclose(test, expected) + + def test_check_large_integers(self): + uint64_max = 2 ** 64 - 1 + arr = np.full(5, uint64_max, dtype=np.uint64) + test = np.pad(arr, 1, mode="constant", constant_values=arr.min()) + expected = np.full(7, uint64_max, dtype=np.uint64) + assert_array_equal(test, expected) + + int64_max = 2 ** 63 - 1 + arr = np.full(5, int64_max, dtype=np.int64) + test = np.pad(arr, 1, mode="constant", constant_values=arr.min()) + expected = np.full(7, int64_max, dtype=np.int64) + assert_array_equal(test, expected) + + def test_check_object_array(self): + arr = np.empty(1, dtype=object) + obj_a = object() + arr[0] = obj_a + obj_b = object() + obj_c = object() + arr = np.pad(arr, pad_width=1, mode='constant', + constant_values=(obj_b, obj_c)) + + expected = np.empty((3,), dtype=object) + expected[0] = obj_b + expected[1] = obj_a + expected[2] = obj_c + + assert_array_equal(arr, expected) + + +class TestLinearRamp(object): + def test_check_simple(self): + a = np.arange(100).astype('f') + a = pad(a, (25, 20), 'linear_ramp', end_values=(4, 5)) + b = np.array( + [4.00, 3.84, 3.68, 3.52, 3.36, 3.20, 3.04, 2.88, 2.72, 2.56, + 2.40, 2.24, 2.08, 1.92, 1.76, 1.60, 1.44, 1.28, 1.12, 0.96, + 0.80, 0.64, 0.48, 0.32, 0.16, + + 0.00, 1.00, 2.00, 3.00, 4.00, 5.00, 6.00, 7.00, 8.00, 9.00, + 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, + 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, + 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, + 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, + 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, + 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, + 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, + 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, + 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, + + 94.3, 89.6, 84.9, 80.2, 75.5, 70.8, 66.1, 61.4, 56.7, 52.0, + 47.3, 42.6, 37.9, 33.2, 28.5, 23.8, 19.1, 14.4, 9.7, 5.] + ) + assert_allclose(a, b, rtol=1e-5, atol=1e-5) + + def test_check_2d(self): + arr = np.arange(20).reshape(4, 5).astype(np.float64) + test = pad(arr, (2, 2), mode='linear_ramp', end_values=(0, 0)) + expected = np.array( + [[0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0.5, 1., 1.5, 2., 1., 0.], + [0., 0., 0., 1., 2., 3., 4., 2., 0.], + [0., 2.5, 5., 6., 7., 8., 9., 4.5, 0.], + [0., 5., 10., 11., 12., 13., 14., 7., 0.], + [0., 7.5, 15., 16., 17., 18., 19., 9.5, 0.], + [0., 3.75, 7.5, 8., 8.5, 9., 9.5, 4.75, 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0.]]) + assert_allclose(test, expected) + + @pytest.mark.xfail(exceptions=(AssertionError,)) + def test_object_array(self): + from fractions import Fraction + arr = np.array([Fraction(1, 2), Fraction(-1, 2)]) + actual = np.pad(arr, (2, 3), mode='linear_ramp', end_values=0) + + # deliberately chosen to have a non-power-of-2 denominator such that + # rounding to floats causes a failure. + expected = np.array([ + Fraction( 0, 12), + Fraction( 3, 12), + Fraction( 6, 12), + Fraction(-6, 12), + Fraction(-4, 12), + Fraction(-2, 12), + Fraction(-0, 12), + ]) + assert_equal(actual, expected) + + @pytest.mark.parametrize("dtype", ( + np.sctypes["uint"] + + np.sctypes["int"] + + np.sctypes["float"] + + np.sctypes["complex"] + )) + def test_negative_difference(self, dtype): + """ + Check correct behavior of unsigned dtypes if there is a negative + difference between the edge to pad and `end_values`. Check both cases + to be independent of implementation. Test behavior for all other dtypes + in case dtype casting interferes with complex dtypes. See gh-14191. + """ + x = np.array([3], dtype=dtype) + result = np.pad(x, 3, mode="linear_ramp", end_values=0) + expected = np.array([0, 1, 2, 3, 2, 1, 0], dtype=dtype) + assert_equal(result, expected) + + x = np.array([0], dtype=dtype) + result = np.pad(x, 3, mode="linear_ramp", end_values=3) + expected = np.array([3, 2, 1, 0, 1, 2, 3], dtype=dtype) + assert_equal(result, expected) + + + +class TestReflect(object): + def test_check_simple(self): + a = np.arange(100) + a = pad(a, (25, 20), 'reflect') + b = np.array( + [25, 24, 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, + 5, 4, 3, 2, 1, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, + 88, 87, 86, 85, 84, 83, 82, 81, 80, 79] + ) + assert_array_equal(a, b) + + def test_check_odd_method(self): + a = np.arange(100) + a = pad(a, (25, 20), 'reflect', reflect_type='odd') + b = np.array( + [-25, -24, -23, -22, -21, -20, -19, -18, -17, -16, + -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, + -5, -4, -3, -2, -1, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119] + ) + assert_array_equal(a, b) + + def test_check_large_pad(self): + a = [[4, 5, 6], [6, 7, 8]] + a = pad(a, (5, 7), 'reflect') + b = np.array( + [[7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5]] + ) + assert_array_equal(a, b) + + def test_check_shape(self): + a = [[4, 5, 6]] + a = pad(a, (5, 7), 'reflect') + b = np.array( + [[5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], + [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5]] + ) + assert_array_equal(a, b) + + def test_check_01(self): + a = pad([1, 2, 3], 2, 'reflect') + b = np.array([3, 2, 1, 2, 3, 2, 1]) + assert_array_equal(a, b) + + def test_check_02(self): + a = pad([1, 2, 3], 3, 'reflect') + b = np.array([2, 3, 2, 1, 2, 3, 2, 1, 2]) + assert_array_equal(a, b) + + def test_check_03(self): + a = pad([1, 2, 3], 4, 'reflect') + b = np.array([1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3]) + assert_array_equal(a, b) + + def test_check_padding_an_empty_array(self): + a = pad(np.zeros((0, 3)), ((0,), (1,)), mode='reflect') + b = np.zeros((0, 5)) + assert_array_equal(a, b) + + +class TestSymmetric(object): + def test_check_simple(self): + a = np.arange(100) + a = pad(a, (25, 20), 'symmetric') + b = np.array( + [24, 23, 22, 21, 20, 19, 18, 17, 16, 15, + 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, + 4, 3, 2, 1, 0, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, + 89, 88, 87, 86, 85, 84, 83, 82, 81, 80] + ) + assert_array_equal(a, b) + + def test_check_odd_method(self): + a = np.arange(100) + a = pad(a, (25, 20), 'symmetric', reflect_type='odd') + b = np.array( + [-24, -23, -22, -21, -20, -19, -18, -17, -16, -15, + -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, + -4, -3, -2, -1, 0, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, + 109, 110, 111, 112, 113, 114, 115, 116, 117, 118] + ) + assert_array_equal(a, b) + + def test_check_large_pad(self): + a = [[4, 5, 6], [6, 7, 8]] + a = pad(a, (5, 7), 'symmetric') + b = np.array( + [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6]] + ) + + assert_array_equal(a, b) + + def test_check_large_pad_odd(self): + a = [[4, 5, 6], [6, 7, 8]] + a = pad(a, (5, 7), 'symmetric', reflect_type='odd') + b = np.array( + [[-3, -2, -2, -1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6], + [-3, -2, -2, -1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6], + [-1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8], + [-1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8], + [ 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10], + + [ 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10], + [ 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12], + + [ 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12], + [ 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14], + [ 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14], + [ 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16], + [ 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16], + [ 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16, 17, 18, 18], + [ 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16, 17, 18, 18]] + ) + assert_array_equal(a, b) + + def test_check_shape(self): + a = [[4, 5, 6]] + a = pad(a, (5, 7), 'symmetric') + b = np.array( + [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6]] + ) + assert_array_equal(a, b) + + def test_check_01(self): + a = pad([1, 2, 3], 2, 'symmetric') + b = np.array([2, 1, 1, 2, 3, 3, 2]) + assert_array_equal(a, b) + + def test_check_02(self): + a = pad([1, 2, 3], 3, 'symmetric') + b = np.array([3, 2, 1, 1, 2, 3, 3, 2, 1]) + assert_array_equal(a, b) + + def test_check_03(self): + a = pad([1, 2, 3], 6, 'symmetric') + b = np.array([1, 2, 3, 3, 2, 1, 1, 2, 3, 3, 2, 1, 1, 2, 3]) + assert_array_equal(a, b) + + +class TestWrap(object): + def test_check_simple(self): + a = np.arange(100) + a = pad(a, (25, 20), 'wrap') + b = np.array( + [75, 76, 77, 78, 79, 80, 81, 82, 83, 84, + 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, + 95, 96, 97, 98, 99, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + ) + assert_array_equal(a, b) + + def test_check_large_pad(self): + a = np.arange(12) + a = np.reshape(a, (3, 4)) + a = pad(a, (10, 12), 'wrap') + b = np.array( + [[10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11], + [2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, + 3, 0, 1, 2, 3, 0, 1, 2, 3], + [6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, + 7, 4, 5, 6, 7, 4, 5, 6, 7], + [10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, + 11, 8, 9, 10, 11, 8, 9, 10, 11]] + ) + assert_array_equal(a, b) + + def test_check_01(self): + a = pad([1, 2, 3], 3, 'wrap') + b = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]) + assert_array_equal(a, b) + + def test_check_02(self): + a = pad([1, 2, 3], 4, 'wrap') + b = np.array([3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1]) + assert_array_equal(a, b) + + def test_pad_with_zero(self): + a = np.ones((3, 5)) + b = np.pad(a, (0, 5), mode="wrap") + assert_array_equal(a, b[:-5, :-5]) + + +class TestStatLen(object): + def test_check_simple(self): + a = np.arange(30) + a = np.reshape(a, (6, 5)) + a = pad(a, ((2, 3), (3, 2)), mode='mean', stat_length=(3,)) + b = np.array( + [[6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + + [1, 1, 1, 0, 1, 2, 3, 4, 3, 3], + [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + [11, 11, 11, 10, 11, 12, 13, 14, 13, 13], + [16, 16, 16, 15, 16, 17, 18, 19, 18, 18], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [26, 26, 26, 25, 26, 27, 28, 29, 28, 28], + + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23]] + ) + assert_array_equal(a, b) + + +class TestEdge(object): + def test_check_simple(self): + a = np.arange(12) + a = np.reshape(a, (4, 3)) + a = pad(a, ((2, 3), (3, 2)), 'edge') + b = np.array( + [[0, 0, 0, 0, 1, 2, 2, 2], + [0, 0, 0, 0, 1, 2, 2, 2], + + [0, 0, 0, 0, 1, 2, 2, 2], + [3, 3, 3, 3, 4, 5, 5, 5], + [6, 6, 6, 6, 7, 8, 8, 8], + [9, 9, 9, 9, 10, 11, 11, 11], + + [9, 9, 9, 9, 10, 11, 11, 11], + [9, 9, 9, 9, 10, 11, 11, 11], + [9, 9, 9, 9, 10, 11, 11, 11]] + ) + assert_array_equal(a, b) + + def test_check_width_shape_1_2(self): + # Check a pad_width of the form ((1, 2),). + # Regression test for issue gh-7808. + a = np.array([1, 2, 3]) + padded = pad(a, ((1, 2),), 'edge') + expected = np.array([1, 1, 2, 3, 3, 3]) + assert_array_equal(padded, expected) + + a = np.array([[1, 2, 3], [4, 5, 6]]) + padded = pad(a, ((1, 2),), 'edge') + expected = pad(a, ((1, 2), (1, 2)), 'edge') + assert_array_equal(padded, expected) + + a = np.arange(24).reshape(2, 3, 4) + padded = pad(a, ((1, 2),), 'edge') + expected = pad(a, ((1, 2), (1, 2), (1, 2)), 'edge') + assert_array_equal(padded, expected) + + +class TestZeroPadWidth(object): + def test_zero_pad_width(self): + arr = np.arange(30) + arr = np.reshape(arr, (6, 5)) + for pad_width in (0, (0, 0), ((0, 0), (0, 0))): + assert_array_equal(arr, pad(arr, pad_width, mode='constant')) + + +class TestLegacyVectorFunction(object): + def test_legacy_vector_functionality(self): + def _padwithtens(vector, pad_width, iaxis, kwargs): + vector[:pad_width[0]] = 10 + vector[-pad_width[1]:] = 10 + return vector + + a = np.arange(6).reshape(2, 3) + a = pad(a, 2, _padwithtens) + b = np.array( + [[10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10], + + [10, 10, 0, 1, 2, 10, 10], + [10, 10, 3, 4, 5, 10, 10], + + [10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10]] + ) + assert_array_equal(a, b) + + +class TestNdarrayPadWidth(object): + def test_check_simple(self): + a = np.arange(12) + a = np.reshape(a, (4, 3)) + a = pad(a, np.array(((2, 3), (3, 2))), 'edge') + b = np.array( + [[0, 0, 0, 0, 1, 2, 2, 2], + [0, 0, 0, 0, 1, 2, 2, 2], + + [0, 0, 0, 0, 1, 2, 2, 2], + [3, 3, 3, 3, 4, 5, 5, 5], + [6, 6, 6, 6, 7, 8, 8, 8], + [9, 9, 9, 9, 10, 11, 11, 11], + + [9, 9, 9, 9, 10, 11, 11, 11], + [9, 9, 9, 9, 10, 11, 11, 11], + [9, 9, 9, 9, 10, 11, 11, 11]] + ) + assert_array_equal(a, b) + + +class TestUnicodeInput(object): + def test_unicode_mode(self): + constant_mode = u'constant' + a = np.pad([1], 2, mode=constant_mode) + b = np.array([0, 0, 1, 0, 0]) + assert_array_equal(a, b) + + +class TestObjectInput(object): + def test_object_input(self): + # Regression test for issue gh-11395. + a = np.full((4, 3), None) + pad_amt = ((2, 3), (3, 2)) + b = np.full((9, 8), None) + modes = ['edge', + 'symmetric', + 'reflect', + 'wrap', + ] + for mode in modes: + assert_array_equal(pad(a, pad_amt, mode=mode), b) + + +class TestValueError1(object): + def test_check_simple(self): + arr = np.arange(30) + arr = np.reshape(arr, (6, 5)) + kwargs = dict(mode='mean', stat_length=(3, )) + assert_raises(ValueError, pad, arr, ((2, 3), (3, 2), (4, 5)), + **kwargs) + + def test_check_negative_stat_length(self): + arr = np.arange(30) + arr = np.reshape(arr, (6, 5)) + kwargs = dict(mode='mean', stat_length=(-3, )) + assert_raises(ValueError, pad, arr, ((2, 3), (3, 2)), + **kwargs) + + def test_check_negative_pad_width(self): + arr = np.arange(30) + arr = np.reshape(arr, (6, 5)) + kwargs = dict(mode='mean', stat_length=(3, )) + assert_raises(ValueError, pad, arr, ((-2, 3), (3, 2)), + **kwargs) + + def test_check_empty_array(self): + assert_raises(ValueError, pad, [], 4, mode='reflect') + assert_raises(ValueError, pad, np.ndarray(0), 4, mode='reflect') + assert_raises(ValueError, pad, np.zeros((0, 3)), ((1,), (0,)), + mode='reflect') + + +class TestValueError2(object): + def test_check_negative_pad_amount(self): + arr = np.arange(30) + arr = np.reshape(arr, (6, 5)) + kwargs = dict(mode='mean', stat_length=(3, )) + assert_raises(ValueError, pad, arr, ((-2, 3), (3, 2)), + **kwargs) + + +class TestValueError3(object): + def test_check_kwarg_not_allowed(self): + arr = np.arange(30).reshape(5, 6) + assert_raises(ValueError, pad, arr, 4, mode='mean', + reflect_type='odd') + + def test_mode_not_set(self): + arr = np.arange(30).reshape(5, 6) + assert_raises(TypeError, pad, arr, 4) + + def test_malformed_pad_amount(self): + arr = np.arange(30).reshape(5, 6) + assert_raises(ValueError, pad, arr, (4, 5, 6, 7), mode='constant') + + def test_malformed_pad_amount2(self): + arr = np.arange(30).reshape(5, 6) + assert_raises(ValueError, pad, arr, ((3, 4, 5), (0, 1, 2)), + mode='constant') + + def test_pad_too_many_axes(self): + arr = np.arange(30).reshape(5, 6) + + # Attempt to pad using a 3D array equivalent + bad_shape = (((3,), (4,), (5,)), ((0,), (1,), (2,))) + assert_raises(ValueError, pad, arr, bad_shape, + mode='constant') + + +class TestTypeError1(object): + def test_float(self): + arr = np.arange(30) + assert_raises(TypeError, pad, arr, ((-2.1, 3), (3, 2))) + assert_raises(TypeError, pad, arr, np.array(((-2.1, 3), (3, 2)))) + + def test_str(self): + arr = np.arange(30) + assert_raises(TypeError, pad, arr, 'foo') + assert_raises(TypeError, pad, arr, np.array('foo')) + + def test_object(self): + class FooBar(object): + pass + arr = np.arange(30) + assert_raises(TypeError, pad, arr, FooBar()) + + def test_complex(self): + arr = np.arange(30) + assert_raises(TypeError, pad, arr, complex(1, -1)) + assert_raises(TypeError, pad, arr, np.array(complex(1, -1))) + + def test_check_wrong_pad_amount(self): + arr = np.arange(30) + arr = np.reshape(arr, (6, 5)) + kwargs = dict(mode='mean', stat_length=(3, )) + assert_raises(TypeError, pad, arr, ((2, 3, 4), (3, 2)), + **kwargs) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_arraysetops.py b/contrib/python/numpy/py2/numpy/lib/tests/test_arraysetops.py new file mode 100644 index 0000000000..93d4b279f3 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_arraysetops.py @@ -0,0 +1,623 @@ +"""Test functions for 1D array set operations. + +""" +from __future__ import division, absolute_import, print_function + +import numpy as np + +from numpy.testing import (assert_array_equal, assert_equal, + assert_raises, assert_raises_regex) +from numpy.lib.arraysetops import ( + ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d, isin + ) +import pytest + + + +class TestSetOps(object): + + def test_intersect1d(self): + # unique inputs + a = np.array([5, 7, 1, 2]) + b = np.array([2, 4, 3, 1, 5]) + + ec = np.array([1, 2, 5]) + c = intersect1d(a, b, assume_unique=True) + assert_array_equal(c, ec) + + # non-unique inputs + a = np.array([5, 5, 7, 1, 2]) + b = np.array([2, 1, 4, 3, 3, 1, 5]) + + ed = np.array([1, 2, 5]) + c = intersect1d(a, b) + assert_array_equal(c, ed) + assert_array_equal([], intersect1d([], [])) + + def test_intersect1d_array_like(self): + # See gh-11772 + class Test(object): + def __array__(self): + return np.arange(3) + + a = Test() + res = intersect1d(a, a) + assert_array_equal(res, a) + res = intersect1d([1, 2, 3], [1, 2, 3]) + assert_array_equal(res, [1, 2, 3]) + + def test_intersect1d_indices(self): + # unique inputs + a = np.array([1, 2, 3, 4]) + b = np.array([2, 1, 4, 6]) + c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True) + ee = np.array([1, 2, 4]) + assert_array_equal(c, ee) + assert_array_equal(a[i1], ee) + assert_array_equal(b[i2], ee) + + # non-unique inputs + a = np.array([1, 2, 2, 3, 4, 3, 2]) + b = np.array([1, 8, 4, 2, 2, 3, 2, 3]) + c, i1, i2 = intersect1d(a, b, return_indices=True) + ef = np.array([1, 2, 3, 4]) + assert_array_equal(c, ef) + assert_array_equal(a[i1], ef) + assert_array_equal(b[i2], ef) + + # non1d, unique inputs + a = np.array([[2, 4, 5, 6], [7, 8, 1, 15]]) + b = np.array([[3, 2, 7, 6], [10, 12, 8, 9]]) + c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True) + ui1 = np.unravel_index(i1, a.shape) + ui2 = np.unravel_index(i2, b.shape) + ea = np.array([2, 6, 7, 8]) + assert_array_equal(ea, a[ui1]) + assert_array_equal(ea, b[ui2]) + + # non1d, not assumed to be uniqueinputs + a = np.array([[2, 4, 5, 6, 6], [4, 7, 8, 7, 2]]) + b = np.array([[3, 2, 7, 7], [10, 12, 8, 7]]) + c, i1, i2 = intersect1d(a, b, return_indices=True) + ui1 = np.unravel_index(i1, a.shape) + ui2 = np.unravel_index(i2, b.shape) + ea = np.array([2, 7, 8]) + assert_array_equal(ea, a[ui1]) + assert_array_equal(ea, b[ui2]) + + def test_setxor1d(self): + a = np.array([5, 7, 1, 2]) + b = np.array([2, 4, 3, 1, 5]) + + ec = np.array([3, 4, 7]) + c = setxor1d(a, b) + assert_array_equal(c, ec) + + a = np.array([1, 2, 3]) + b = np.array([6, 5, 4]) + + ec = np.array([1, 2, 3, 4, 5, 6]) + c = setxor1d(a, b) + assert_array_equal(c, ec) + + a = np.array([1, 8, 2, 3]) + b = np.array([6, 5, 4, 8]) + + ec = np.array([1, 2, 3, 4, 5, 6]) + c = setxor1d(a, b) + assert_array_equal(c, ec) + + assert_array_equal([], setxor1d([], [])) + + def test_ediff1d(self): + zero_elem = np.array([]) + one_elem = np.array([1]) + two_elem = np.array([1, 2]) + + assert_array_equal([], ediff1d(zero_elem)) + assert_array_equal([0], ediff1d(zero_elem, to_begin=0)) + assert_array_equal([0], ediff1d(zero_elem, to_end=0)) + assert_array_equal([-1, 0], ediff1d(zero_elem, to_begin=-1, to_end=0)) + assert_array_equal([], ediff1d(one_elem)) + assert_array_equal([1], ediff1d(two_elem)) + assert_array_equal([7,1,9], ediff1d(two_elem, to_begin=7, to_end=9)) + assert_array_equal([5,6,1,7,8], ediff1d(two_elem, to_begin=[5,6], to_end=[7,8])) + assert_array_equal([1,9], ediff1d(two_elem, to_end=9)) + assert_array_equal([1,7,8], ediff1d(two_elem, to_end=[7,8])) + assert_array_equal([7,1], ediff1d(two_elem, to_begin=7)) + assert_array_equal([5,6,1], ediff1d(two_elem, to_begin=[5,6])) + + @pytest.mark.parametrize("ary, prepend, append", [ + # should fail because trying to cast + # np.nan standard floating point value + # into an integer array: + (np.array([1, 2, 3], dtype=np.int64), + None, + np.nan), + # should fail because attempting + # to downcast to smaller int type: + (np.array([1, 2, 3], dtype=np.int16), + np.array([5, 1<<20, 2], dtype=np.int32), + None), + # should fail because attempting to cast + # two special floating point values + # to integers (on both sides of ary): + (np.array([1., 3., 9.], dtype=np.int8), + np.nan, + np.nan), + ]) + def test_ediff1d_forbidden_type_casts(self, ary, prepend, append): + # verify resolution of gh-11490 + + # specifically, raise an appropriate + # Exception when attempting to append or + # prepend with an incompatible type + msg = 'cannot convert' + with assert_raises_regex(ValueError, msg): + ediff1d(ary=ary, + to_end=append, + to_begin=prepend) + + @pytest.mark.parametrize("ary," + "prepend," + "append," + "expected", [ + (np.array([1, 2, 3], dtype=np.int16), + 0, + None, + np.array([0, 1, 1], dtype=np.int16)), + (np.array([1, 2, 3], dtype=np.int32), + 0, + 0, + np.array([0, 1, 1, 0], dtype=np.int32)), + (np.array([1, 2, 3], dtype=np.int64), + 3, + -9, + np.array([3, 1, 1, -9], dtype=np.int64)), + ]) + def test_ediff1d_scalar_handling(self, + ary, + prepend, + append, + expected): + # maintain backwards-compatibility + # of scalar prepend / append behavior + # in ediff1d following fix for gh-11490 + actual = np.ediff1d(ary=ary, + to_end=append, + to_begin=prepend) + assert_equal(actual, expected) + + + def test_isin(self): + # the tests for in1d cover most of isin's behavior + # if in1d is removed, would need to change those tests to test + # isin instead. + def _isin_slow(a, b): + b = np.asarray(b).flatten().tolist() + return a in b + isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1}) + def assert_isin_equal(a, b): + x = isin(a, b) + y = isin_slow(a, b) + assert_array_equal(x, y) + + #multidimensional arrays in both arguments + a = np.arange(24).reshape([2, 3, 4]) + b = np.array([[10, 20, 30], [0, 1, 3], [11, 22, 33]]) + assert_isin_equal(a, b) + + #array-likes as both arguments + c = [(9, 8), (7, 6)] + d = (9, 7) + assert_isin_equal(c, d) + + #zero-d array: + f = np.array(3) + assert_isin_equal(f, b) + assert_isin_equal(a, f) + assert_isin_equal(f, f) + + #scalar: + assert_isin_equal(5, b) + assert_isin_equal(a, 6) + assert_isin_equal(5, 6) + + #empty array-like: + x = [] + assert_isin_equal(x, b) + assert_isin_equal(a, x) + assert_isin_equal(x, x) + + def test_in1d(self): + # we use two different sizes for the b array here to test the + # two different paths in in1d(). + for mult in (1, 10): + # One check without np.array to make sure lists are handled correct + a = [5, 7, 1, 2] + b = [2, 4, 3, 1, 5] * mult + ec = np.array([True, False, True, True]) + c = in1d(a, b, assume_unique=True) + assert_array_equal(c, ec) + + a[0] = 8 + ec = np.array([False, False, True, True]) + c = in1d(a, b, assume_unique=True) + assert_array_equal(c, ec) + + a[0], a[3] = 4, 8 + ec = np.array([True, False, True, False]) + c = in1d(a, b, assume_unique=True) + assert_array_equal(c, ec) + + a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5]) + b = [2, 3, 4] * mult + ec = [False, True, False, True, True, True, True, True, True, + False, True, False, False, False] + c = in1d(a, b) + assert_array_equal(c, ec) + + b = b + [5, 5, 4] * mult + ec = [True, True, True, True, True, True, True, True, True, True, + True, False, True, True] + c = in1d(a, b) + assert_array_equal(c, ec) + + a = np.array([5, 7, 1, 2]) + b = np.array([2, 4, 3, 1, 5] * mult) + ec = np.array([True, False, True, True]) + c = in1d(a, b) + assert_array_equal(c, ec) + + a = np.array([5, 7, 1, 1, 2]) + b = np.array([2, 4, 3, 3, 1, 5] * mult) + ec = np.array([True, False, True, True, True]) + c = in1d(a, b) + assert_array_equal(c, ec) + + a = np.array([5, 5]) + b = np.array([2, 2] * mult) + ec = np.array([False, False]) + c = in1d(a, b) + assert_array_equal(c, ec) + + a = np.array([5]) + b = np.array([2]) + ec = np.array([False]) + c = in1d(a, b) + assert_array_equal(c, ec) + + assert_array_equal(in1d([], []), []) + + def test_in1d_char_array(self): + a = np.array(['a', 'b', 'c', 'd', 'e', 'c', 'e', 'b']) + b = np.array(['a', 'c']) + + ec = np.array([True, False, True, False, False, True, False, False]) + c = in1d(a, b) + + assert_array_equal(c, ec) + + def test_in1d_invert(self): + "Test in1d's invert parameter" + # We use two different sizes for the b array here to test the + # two different paths in in1d(). + for mult in (1, 10): + a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5]) + b = [2, 3, 4] * mult + assert_array_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True)) + + def test_in1d_ravel(self): + # Test that in1d ravels its input arrays. This is not documented + # behavior however. The test is to ensure consistentency. + a = np.arange(6).reshape(2, 3) + b = np.arange(3, 9).reshape(3, 2) + long_b = np.arange(3, 63).reshape(30, 2) + ec = np.array([False, False, False, True, True, True]) + + assert_array_equal(in1d(a, b, assume_unique=True), ec) + assert_array_equal(in1d(a, b, assume_unique=False), ec) + assert_array_equal(in1d(a, long_b, assume_unique=True), ec) + assert_array_equal(in1d(a, long_b, assume_unique=False), ec) + + def test_in1d_first_array_is_object(self): + ar1 = [None] + ar2 = np.array([1]*10) + expected = np.array([False]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + + def test_in1d_second_array_is_object(self): + ar1 = 1 + ar2 = np.array([None]*10) + expected = np.array([False]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + + def test_in1d_both_arrays_are_object(self): + ar1 = [None] + ar2 = np.array([None]*10) + expected = np.array([True]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + + def test_in1d_both_arrays_have_structured_dtype(self): + # Test arrays of a structured data type containing an integer field + # and a field of dtype `object` allowing for arbitrary Python objects + dt = np.dtype([('field1', int), ('field2', object)]) + ar1 = np.array([(1, None)], dtype=dt) + ar2 = np.array([(1, None)]*10, dtype=dt) + expected = np.array([True]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + + def test_union1d(self): + a = np.array([5, 4, 7, 1, 2]) + b = np.array([2, 4, 3, 3, 2, 1, 5]) + + ec = np.array([1, 2, 3, 4, 5, 7]) + c = union1d(a, b) + assert_array_equal(c, ec) + + # Tests gh-10340, arguments to union1d should be + # flattened if they are not already 1D + x = np.array([[0, 1, 2], [3, 4, 5]]) + y = np.array([0, 1, 2, 3, 4]) + ez = np.array([0, 1, 2, 3, 4, 5]) + z = union1d(x, y) + assert_array_equal(z, ez) + + assert_array_equal([], union1d([], [])) + + def test_setdiff1d(self): + a = np.array([6, 5, 4, 7, 1, 2, 7, 4]) + b = np.array([2, 4, 3, 3, 2, 1, 5]) + + ec = np.array([6, 7]) + c = setdiff1d(a, b) + assert_array_equal(c, ec) + + a = np.arange(21) + b = np.arange(19) + ec = np.array([19, 20]) + c = setdiff1d(a, b) + assert_array_equal(c, ec) + + assert_array_equal([], setdiff1d([], [])) + a = np.array((), np.uint32) + assert_equal(setdiff1d(a, []).dtype, np.uint32) + + def test_setdiff1d_unique(self): + a = np.array([3, 2, 1]) + b = np.array([7, 5, 2]) + expected = np.array([3, 1]) + actual = setdiff1d(a, b, assume_unique=True) + assert_equal(actual, expected) + + def test_setdiff1d_char_array(self): + a = np.array(['a', 'b', 'c']) + b = np.array(['a', 'b', 's']) + assert_array_equal(setdiff1d(a, b), np.array(['c'])) + + def test_manyways(self): + a = np.array([5, 7, 1, 2, 8]) + b = np.array([9, 8, 2, 4, 3, 1, 5]) + + c1 = setxor1d(a, b) + aux1 = intersect1d(a, b) + aux2 = union1d(a, b) + c2 = setdiff1d(aux2, aux1) + assert_array_equal(c1, c2) + + +class TestUnique(object): + + def test_unique_1d(self): + + def check_all(a, b, i1, i2, c, dt): + base_msg = 'check {0} failed for type {1}' + + msg = base_msg.format('values', dt) + v = unique(a) + assert_array_equal(v, b, msg) + + msg = base_msg.format('return_index', dt) + v, j = unique(a, 1, 0, 0) + assert_array_equal(v, b, msg) + assert_array_equal(j, i1, msg) + + msg = base_msg.format('return_inverse', dt) + v, j = unique(a, 0, 1, 0) + assert_array_equal(v, b, msg) + assert_array_equal(j, i2, msg) + + msg = base_msg.format('return_counts', dt) + v, j = unique(a, 0, 0, 1) + assert_array_equal(v, b, msg) + assert_array_equal(j, c, msg) + + msg = base_msg.format('return_index and return_inverse', dt) + v, j1, j2 = unique(a, 1, 1, 0) + assert_array_equal(v, b, msg) + assert_array_equal(j1, i1, msg) + assert_array_equal(j2, i2, msg) + + msg = base_msg.format('return_index and return_counts', dt) + v, j1, j2 = unique(a, 1, 0, 1) + assert_array_equal(v, b, msg) + assert_array_equal(j1, i1, msg) + assert_array_equal(j2, c, msg) + + msg = base_msg.format('return_inverse and return_counts', dt) + v, j1, j2 = unique(a, 0, 1, 1) + assert_array_equal(v, b, msg) + assert_array_equal(j1, i2, msg) + assert_array_equal(j2, c, msg) + + msg = base_msg.format(('return_index, return_inverse ' + 'and return_counts'), dt) + v, j1, j2, j3 = unique(a, 1, 1, 1) + assert_array_equal(v, b, msg) + assert_array_equal(j1, i1, msg) + assert_array_equal(j2, i2, msg) + assert_array_equal(j3, c, msg) + + a = [5, 7, 1, 2, 1, 5, 7]*10 + b = [1, 2, 5, 7] + i1 = [2, 3, 0, 1] + i2 = [2, 3, 0, 1, 0, 2, 3]*10 + c = np.multiply([2, 1, 2, 2], 10) + + # test for numeric arrays + types = [] + types.extend(np.typecodes['AllInteger']) + types.extend(np.typecodes['AllFloat']) + types.append('datetime64[D]') + types.append('timedelta64[D]') + for dt in types: + aa = np.array(a, dt) + bb = np.array(b, dt) + check_all(aa, bb, i1, i2, c, dt) + + # test for object arrays + dt = 'O' + aa = np.empty(len(a), dt) + aa[:] = a + bb = np.empty(len(b), dt) + bb[:] = b + check_all(aa, bb, i1, i2, c, dt) + + # test for structured arrays + dt = [('', 'i'), ('', 'i')] + aa = np.array(list(zip(a, a)), dt) + bb = np.array(list(zip(b, b)), dt) + check_all(aa, bb, i1, i2, c, dt) + + # test for ticket #2799 + aa = [1. + 0.j, 1 - 1.j, 1] + assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j]) + + # test for ticket #4785 + a = [(1, 2), (1, 2), (2, 3)] + unq = [1, 2, 3] + inv = [0, 1, 0, 1, 1, 2] + a1 = unique(a) + assert_array_equal(a1, unq) + a2, a2_inv = unique(a, return_inverse=True) + assert_array_equal(a2, unq) + assert_array_equal(a2_inv, inv) + + # test for chararrays with return_inverse (gh-5099) + a = np.chararray(5) + a[...] = '' + a2, a2_inv = np.unique(a, return_inverse=True) + assert_array_equal(a2_inv, np.zeros(5)) + + # test for ticket #9137 + a = [] + a1_idx = np.unique(a, return_index=True)[1] + a2_inv = np.unique(a, return_inverse=True)[1] + a3_idx, a3_inv = np.unique(a, return_index=True, return_inverse=True)[1:] + assert_equal(a1_idx.dtype, np.intp) + assert_equal(a2_inv.dtype, np.intp) + assert_equal(a3_idx.dtype, np.intp) + assert_equal(a3_inv.dtype, np.intp) + + def test_unique_axis_errors(self): + assert_raises(TypeError, self._run_axis_tests, object) + assert_raises(TypeError, self._run_axis_tests, + [('a', int), ('b', object)]) + + assert_raises(np.AxisError, unique, np.arange(10), axis=2) + assert_raises(np.AxisError, unique, np.arange(10), axis=-2) + + def test_unique_axis_list(self): + msg = "Unique failed on list of lists" + inp = [[0, 1, 0], [0, 1, 0]] + inp_arr = np.asarray(inp) + assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg) + assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg) + + def test_unique_axis(self): + types = [] + types.extend(np.typecodes['AllInteger']) + types.extend(np.typecodes['AllFloat']) + types.append('datetime64[D]') + types.append('timedelta64[D]') + types.append([('a', int), ('b', int)]) + types.append([('a', int), ('b', float)]) + + for dtype in types: + self._run_axis_tests(dtype) + + msg = 'Non-bitwise-equal booleans test failed' + data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool) + result = np.array([[False, True], [True, True]], dtype=bool) + assert_array_equal(unique(data, axis=0), result, msg) + + msg = 'Negative zero equality test failed' + data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]]) + result = np.array([[-0.0, 0.0]]) + assert_array_equal(unique(data, axis=0), result, msg) + + def test_unique_masked(self): + # issue 8664 + x = np.array([64, 0, 1, 2, 3, 63, 63, 0, 0, 0, 1, 2, 0, 63, 0], dtype='uint8') + y = np.ma.masked_equal(x, 0) + + v = np.unique(y) + v2, i, c = np.unique(y, return_index=True, return_counts=True) + + msg = 'Unique returned different results when asked for index' + assert_array_equal(v.data, v2.data, msg) + assert_array_equal(v.mask, v2.mask, msg) + + def test_unique_sort_order_with_axis(self): + # These tests fail if sorting along axis is done by treating subarrays + # as unsigned byte strings. See gh-10495. + fmt = "sort order incorrect for integer type '%s'" + for dt in 'bhilq': + a = np.array([[-1],[0]], dt) + b = np.unique(a, axis=0) + assert_array_equal(a, b, fmt % dt) + + def _run_axis_tests(self, dtype): + data = np.array([[0, 1, 0, 0], + [1, 0, 0, 0], + [0, 1, 0, 0], + [1, 0, 0, 0]]).astype(dtype) + + msg = 'Unique with 1d array and axis=0 failed' + result = np.array([0, 1]) + assert_array_equal(unique(data), result.astype(dtype), msg) + + msg = 'Unique with 2d array and axis=0 failed' + result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]]) + assert_array_equal(unique(data, axis=0), result.astype(dtype), msg) + + msg = 'Unique with 2d array and axis=1 failed' + result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]]) + assert_array_equal(unique(data, axis=1), result.astype(dtype), msg) + + msg = 'Unique with 3d array and axis=2 failed' + data3d = np.dstack([data] * 3) + result = data3d[..., :1] + assert_array_equal(unique(data3d, axis=2), result, msg) + + uniq, idx, inv, cnt = unique(data, axis=0, return_index=True, + return_inverse=True, return_counts=True) + msg = "Unique's return_index=True failed with axis=0" + assert_array_equal(data[idx], uniq, msg) + msg = "Unique's return_inverse=True failed with axis=0" + assert_array_equal(uniq[inv], data) + msg = "Unique's return_counts=True failed with axis=0" + assert_array_equal(cnt, np.array([2, 2]), msg) + + uniq, idx, inv, cnt = unique(data, axis=1, return_index=True, + return_inverse=True, return_counts=True) + msg = "Unique's return_index=True failed with axis=1" + assert_array_equal(data[:, idx], uniq) + msg = "Unique's return_inverse=True failed with axis=1" + assert_array_equal(uniq[:, inv], data) + msg = "Unique's return_counts=True failed with axis=1" + assert_array_equal(cnt, np.array([2, 1, 1]), msg) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_arrayterator.py b/contrib/python/numpy/py2/numpy/lib/tests/test_arrayterator.py new file mode 100644 index 0000000000..2ce4456a5b --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_arrayterator.py @@ -0,0 +1,48 @@ +from __future__ import division, absolute_import, print_function + +from operator import mul +from functools import reduce + +import numpy as np +from numpy.random import randint +from numpy.lib import Arrayterator +from numpy.testing import assert_ + + +def test(): + np.random.seed(np.arange(10)) + + # Create a random array + ndims = randint(5)+1 + shape = tuple(randint(10)+1 for dim in range(ndims)) + els = reduce(mul, shape) + a = np.arange(els) + a.shape = shape + + buf_size = randint(2*els) + b = Arrayterator(a, buf_size) + + # Check that each block has at most ``buf_size`` elements + for block in b: + assert_(len(block.flat) <= (buf_size or els)) + + # Check that all elements are iterated correctly + assert_(list(b.flat) == list(a.flat)) + + # Slice arrayterator + start = [randint(dim) for dim in shape] + stop = [randint(dim)+1 for dim in shape] + step = [randint(dim)+1 for dim in shape] + slice_ = tuple(slice(*t) for t in zip(start, stop, step)) + c = b[slice_] + d = a[slice_] + + # Check that each block has at most ``buf_size`` elements + for block in c: + assert_(len(block.flat) <= (buf_size or els)) + + # Check that the arrayterator is sliced correctly + assert_(np.all(c.__array__() == d)) + + # Check that all elements are iterated correctly + assert_(list(c.flat) == list(d.flat)) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_financial.py b/contrib/python/numpy/py2/numpy/lib/tests/test_financial.py new file mode 100644 index 0000000000..5249150411 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_financial.py @@ -0,0 +1,340 @@ +from __future__ import division, absolute_import, print_function + +from decimal import Decimal + +import numpy as np +from numpy.testing import ( + assert_, assert_almost_equal, assert_allclose, assert_equal, assert_raises + ) + + +class TestFinancial(object): + def test_rate(self): + assert_almost_equal( + np.rate(10, 0, -3500, 10000), + 0.1107, 4) + + def test_rate_decimal(self): + rate = np.rate(Decimal('10'), Decimal('0'), Decimal('-3500'), Decimal('10000')) + assert_equal(Decimal('0.1106908537142689284704528100'), rate) + + def test_irr(self): + v = [-150000, 15000, 25000, 35000, 45000, 60000] + assert_almost_equal(np.irr(v), 0.0524, 2) + v = [-100, 0, 0, 74] + assert_almost_equal(np.irr(v), -0.0955, 2) + v = [-100, 39, 59, 55, 20] + assert_almost_equal(np.irr(v), 0.28095, 2) + v = [-100, 100, 0, -7] + assert_almost_equal(np.irr(v), -0.0833, 2) + v = [-100, 100, 0, 7] + assert_almost_equal(np.irr(v), 0.06206, 2) + v = [-5, 10.5, 1, -8, 1] + assert_almost_equal(np.irr(v), 0.0886, 2) + + # Test that if there is no solution then np.irr returns nan + # Fixes gh-6744 + v = [-1, -2, -3] + assert_equal(np.irr(v), np.nan) + + def test_pv(self): + assert_almost_equal(np.pv(0.07, 20, 12000, 0), -127128.17, 2) + + def test_pv_decimal(self): + assert_equal(np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0')), + Decimal('-127128.1709461939327295222005')) + + def test_fv(self): + assert_equal(np.fv(0.075, 20, -2000, 0, 0), 86609.362673042924) + + def test_fv_decimal(self): + assert_equal(np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), 0, 0), + Decimal('86609.36267304300040536731624')) + + def test_pmt(self): + res = np.pmt(0.08 / 12, 5 * 12, 15000) + tgt = -304.145914 + assert_allclose(res, tgt) + # Test the edge case where rate == 0.0 + res = np.pmt(0.0, 5 * 12, 15000) + tgt = -250.0 + assert_allclose(res, tgt) + # Test the case where we use broadcast and + # the arguments passed in are arrays. + res = np.pmt([[0.0, 0.8], [0.3, 0.8]], [12, 3], [2000, 20000]) + tgt = np.array([[-166.66667, -19311.258], [-626.90814, -19311.258]]) + assert_allclose(res, tgt) + + def test_pmt_decimal(self): + res = np.pmt(Decimal('0.08') / Decimal('12'), 5 * 12, 15000) + tgt = Decimal('-304.1459143262052370338701494') + assert_equal(res, tgt) + # Test the edge case where rate == 0.0 + res = np.pmt(Decimal('0'), Decimal('60'), Decimal('15000')) + tgt = -250 + assert_equal(res, tgt) + # Test the case where we use broadcast and + # the arguments passed in are arrays. + res = np.pmt([[Decimal('0'), Decimal('0.8')], [Decimal('0.3'), Decimal('0.8')]], + [Decimal('12'), Decimal('3')], [Decimal('2000'), Decimal('20000')]) + tgt = np.array([[Decimal('-166.6666666666666666666666667'), Decimal('-19311.25827814569536423841060')], + [Decimal('-626.9081401700757748402586600'), Decimal('-19311.25827814569536423841060')]]) + + # Cannot use the `assert_allclose` because it uses isfinite under the covers + # which does not support the Decimal type + # See issue: https://github.com/numpy/numpy/issues/9954 + assert_equal(res[0][0], tgt[0][0]) + assert_equal(res[0][1], tgt[0][1]) + assert_equal(res[1][0], tgt[1][0]) + assert_equal(res[1][1], tgt[1][1]) + + def test_ppmt(self): + assert_equal(np.round(np.ppmt(0.1 / 12, 1, 60, 55000), 2), -710.25) + + def test_ppmt_decimal(self): + assert_equal(np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000')), + Decimal('-710.2541257864217612489830917')) + + # Two tests showing how Decimal is actually getting at a more exact result + # .23 / 12 does not come out nicely as a float but does as a decimal + def test_ppmt_special_rate(self): + assert_equal(np.round(np.ppmt(0.23 / 12, 1, 60, 10000000000), 8), -90238044.232277036) + + def test_ppmt_special_rate_decimal(self): + # When rounded out to 8 decimal places like the float based test, this should not equal the same value + # as the float, substituted for the decimal + def raise_error_because_not_equal(): + assert_equal( + round(np.ppmt(Decimal('0.23') / Decimal('12'), 1, 60, Decimal('10000000000')), 8), + Decimal('-90238044.232277036')) + + assert_raises(AssertionError, raise_error_because_not_equal) + assert_equal(np.ppmt(Decimal('0.23') / Decimal('12'), 1, 60, Decimal('10000000000')), + Decimal('-90238044.2322778884413969909')) + + def test_ipmt(self): + assert_almost_equal(np.round(np.ipmt(0.1 / 12, 1, 24, 2000), 2), -16.67) + + def test_ipmt_decimal(self): + result = np.ipmt(Decimal('0.1') / Decimal('12'), 1, 24, 2000) + assert_equal(result.flat[0], Decimal('-16.66666666666666666666666667')) + + def test_nper(self): + assert_almost_equal(np.nper(0.075, -2000, 0, 100000.), + 21.54, 2) + + def test_nper2(self): + assert_almost_equal(np.nper(0.0, -2000, 0, 100000.), + 50.0, 1) + + def test_npv(self): + assert_almost_equal( + np.npv(0.05, [-15000, 1500, 2500, 3500, 4500, 6000]), + 122.89, 2) + + def test_npv_decimal(self): + assert_equal( + np.npv(Decimal('0.05'), [-15000, 1500, 2500, 3500, 4500, 6000]), + Decimal('122.894854950942692161628715')) + + def test_mirr(self): + val = [-4500, -800, 800, 800, 600, 600, 800, 800, 700, 3000] + assert_almost_equal(np.mirr(val, 0.08, 0.055), 0.0666, 4) + + val = [-120000, 39000, 30000, 21000, 37000, 46000] + assert_almost_equal(np.mirr(val, 0.10, 0.12), 0.126094, 6) + + val = [100, 200, -50, 300, -200] + assert_almost_equal(np.mirr(val, 0.05, 0.06), 0.3428, 4) + + val = [39000, 30000, 21000, 37000, 46000] + assert_(np.isnan(np.mirr(val, 0.10, 0.12))) + + def test_mirr_decimal(self): + val = [Decimal('-4500'), Decimal('-800'), Decimal('800'), Decimal('800'), + Decimal('600'), Decimal('600'), Decimal('800'), Decimal('800'), + Decimal('700'), Decimal('3000')] + assert_equal(np.mirr(val, Decimal('0.08'), Decimal('0.055')), + Decimal('0.066597175031553548874239618')) + + val = [Decimal('-120000'), Decimal('39000'), Decimal('30000'), + Decimal('21000'), Decimal('37000'), Decimal('46000')] + assert_equal(np.mirr(val, Decimal('0.10'), Decimal('0.12')), Decimal('0.126094130365905145828421880')) + + val = [Decimal('100'), Decimal('200'), Decimal('-50'), + Decimal('300'), Decimal('-200')] + assert_equal(np.mirr(val, Decimal('0.05'), Decimal('0.06')), Decimal('0.342823387842176663647819868')) + + val = [Decimal('39000'), Decimal('30000'), Decimal('21000'), Decimal('37000'), Decimal('46000')] + assert_(np.isnan(np.mirr(val, Decimal('0.10'), Decimal('0.12')))) + + def test_when(self): + # begin + assert_equal(np.rate(10, 20, -3500, 10000, 1), + np.rate(10, 20, -3500, 10000, 'begin')) + # end + assert_equal(np.rate(10, 20, -3500, 10000), + np.rate(10, 20, -3500, 10000, 'end')) + assert_equal(np.rate(10, 20, -3500, 10000, 0), + np.rate(10, 20, -3500, 10000, 'end')) + + # begin + assert_equal(np.pv(0.07, 20, 12000, 0, 1), + np.pv(0.07, 20, 12000, 0, 'begin')) + # end + assert_equal(np.pv(0.07, 20, 12000, 0), + np.pv(0.07, 20, 12000, 0, 'end')) + assert_equal(np.pv(0.07, 20, 12000, 0, 0), + np.pv(0.07, 20, 12000, 0, 'end')) + + # begin + assert_equal(np.fv(0.075, 20, -2000, 0, 1), + np.fv(0.075, 20, -2000, 0, 'begin')) + # end + assert_equal(np.fv(0.075, 20, -2000, 0), + np.fv(0.075, 20, -2000, 0, 'end')) + assert_equal(np.fv(0.075, 20, -2000, 0, 0), + np.fv(0.075, 20, -2000, 0, 'end')) + + # begin + assert_equal(np.pmt(0.08 / 12, 5 * 12, 15000., 0, 1), + np.pmt(0.08 / 12, 5 * 12, 15000., 0, 'begin')) + # end + assert_equal(np.pmt(0.08 / 12, 5 * 12, 15000., 0), + np.pmt(0.08 / 12, 5 * 12, 15000., 0, 'end')) + assert_equal(np.pmt(0.08 / 12, 5 * 12, 15000., 0, 0), + np.pmt(0.08 / 12, 5 * 12, 15000., 0, 'end')) + + # begin + assert_equal(np.ppmt(0.1 / 12, 1, 60, 55000, 0, 1), + np.ppmt(0.1 / 12, 1, 60, 55000, 0, 'begin')) + # end + assert_equal(np.ppmt(0.1 / 12, 1, 60, 55000, 0), + np.ppmt(0.1 / 12, 1, 60, 55000, 0, 'end')) + assert_equal(np.ppmt(0.1 / 12, 1, 60, 55000, 0, 0), + np.ppmt(0.1 / 12, 1, 60, 55000, 0, 'end')) + + # begin + assert_equal(np.ipmt(0.1 / 12, 1, 24, 2000, 0, 1), + np.ipmt(0.1 / 12, 1, 24, 2000, 0, 'begin')) + # end + assert_equal(np.ipmt(0.1 / 12, 1, 24, 2000, 0), + np.ipmt(0.1 / 12, 1, 24, 2000, 0, 'end')) + assert_equal(np.ipmt(0.1 / 12, 1, 24, 2000, 0, 0), + np.ipmt(0.1 / 12, 1, 24, 2000, 0, 'end')) + + # begin + assert_equal(np.nper(0.075, -2000, 0, 100000., 1), + np.nper(0.075, -2000, 0, 100000., 'begin')) + # end + assert_equal(np.nper(0.075, -2000, 0, 100000.), + np.nper(0.075, -2000, 0, 100000., 'end')) + assert_equal(np.nper(0.075, -2000, 0, 100000., 0), + np.nper(0.075, -2000, 0, 100000., 'end')) + + def test_decimal_with_when(self): + """Test that decimals are still supported if the when argument is passed""" + # begin + assert_equal(np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000'), Decimal('1')), + np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000'), 'begin')) + # end + assert_equal(np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000')), + np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000'), 'end')) + assert_equal(np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000'), Decimal('0')), + np.rate(Decimal('10'), Decimal('20'), Decimal('-3500'), Decimal('10000'), 'end')) + + # begin + assert_equal(np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0'), Decimal('1')), + np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0'), 'begin')) + # end + assert_equal(np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0')), + np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0'), 'end')) + assert_equal(np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0'), Decimal('0')), + np.pv(Decimal('0.07'), Decimal('20'), Decimal('12000'), Decimal('0'), 'end')) + + # begin + assert_equal(np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0'), Decimal('1')), + np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0'), 'begin')) + # end + assert_equal(np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0')), + np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0'), 'end')) + assert_equal(np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0'), Decimal('0')), + np.fv(Decimal('0.075'), Decimal('20'), Decimal('-2000'), Decimal('0'), 'end')) + + # begin + assert_equal(np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'), + Decimal('0'), Decimal('1')), + np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'), + Decimal('0'), 'begin')) + # end + assert_equal(np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'), + Decimal('0')), + np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'), + Decimal('0'), 'end')) + assert_equal(np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'), + Decimal('0'), Decimal('0')), + np.pmt(Decimal('0.08') / Decimal('12'), Decimal('5') * Decimal('12'), Decimal('15000.'), + Decimal('0'), 'end')) + + # begin + assert_equal(np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'), + Decimal('0'), Decimal('1')), + np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'), + Decimal('0'), 'begin')) + # end + assert_equal(np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'), + Decimal('0')), + np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'), + Decimal('0'), 'end')) + assert_equal(np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'), + Decimal('0'), Decimal('0')), + np.ppmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('60'), Decimal('55000'), + Decimal('0'), 'end')) + + # begin + assert_equal(np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'), + Decimal('0'), Decimal('1')).flat[0], + np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'), + Decimal('0'), 'begin').flat[0]) + # end + assert_equal(np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'), + Decimal('0')).flat[0], + np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'), + Decimal('0'), 'end').flat[0]) + assert_equal(np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'), + Decimal('0'), Decimal('0')).flat[0], + np.ipmt(Decimal('0.1') / Decimal('12'), Decimal('1'), Decimal('24'), Decimal('2000'), + Decimal('0'), 'end').flat[0]) + + def test_broadcast(self): + assert_almost_equal(np.nper(0.075, -2000, 0, 100000., [0, 1]), + [21.5449442, 20.76156441], 4) + + assert_almost_equal(np.ipmt(0.1 / 12, list(range(5)), 24, 2000), + [-17.29165168, -16.66666667, -16.03647345, + -15.40102862, -14.76028842], 4) + + assert_almost_equal(np.ppmt(0.1 / 12, list(range(5)), 24, 2000), + [-74.998201, -75.62318601, -76.25337923, + -76.88882405, -77.52956425], 4) + + assert_almost_equal(np.ppmt(0.1 / 12, list(range(5)), 24, 2000, 0, + [0, 0, 1, 'end', 'begin']), + [-74.998201, -75.62318601, -75.62318601, + -76.88882405, -76.88882405], 4) + + def test_broadcast_decimal(self): + # Use almost equal because precision is tested in the explicit tests, this test is to ensure + # broadcast with Decimal is not broken. + assert_almost_equal(np.ipmt(Decimal('0.1') / Decimal('12'), list(range(5)), Decimal('24'), Decimal('2000')), + [Decimal('-17.29165168'), Decimal('-16.66666667'), Decimal('-16.03647345'), + Decimal('-15.40102862'), Decimal('-14.76028842')], 4) + + assert_almost_equal(np.ppmt(Decimal('0.1') / Decimal('12'), list(range(5)), Decimal('24'), Decimal('2000')), + [Decimal('-74.998201'), Decimal('-75.62318601'), Decimal('-76.25337923'), + Decimal('-76.88882405'), Decimal('-77.52956425')], 4) + + assert_almost_equal(np.ppmt(Decimal('0.1') / Decimal('12'), list(range(5)), Decimal('24'), Decimal('2000'), + Decimal('0'), [Decimal('0'), Decimal('0'), Decimal('1'), 'end', 'begin']), + [Decimal('-74.998201'), Decimal('-75.62318601'), Decimal('-75.62318601'), + Decimal('-76.88882405'), Decimal('-76.88882405')], 4) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_format.py b/contrib/python/numpy/py2/numpy/lib/tests/test_format.py new file mode 100644 index 0000000000..4a3fbdf571 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_format.py @@ -0,0 +1,940 @@ +from __future__ import division, absolute_import, print_function + +# doctest +r''' Test the .npy file format. + +Set up: + + >>> import sys + >>> from io import BytesIO + >>> from numpy.lib import format + >>> + >>> scalars = [ + ... np.uint8, + ... np.int8, + ... np.uint16, + ... np.int16, + ... np.uint32, + ... np.int32, + ... np.uint64, + ... np.int64, + ... np.float32, + ... np.float64, + ... np.complex64, + ... np.complex128, + ... object, + ... ] + >>> + >>> basic_arrays = [] + >>> + >>> for scalar in scalars: + ... for endian in '<>': + ... dtype = np.dtype(scalar).newbyteorder(endian) + ... basic = np.arange(15).astype(dtype) + ... basic_arrays.extend([ + ... np.array([], dtype=dtype), + ... np.array(10, dtype=dtype), + ... basic, + ... basic.reshape((3,5)), + ... basic.reshape((3,5)).T, + ... basic.reshape((3,5))[::-1,::2], + ... ]) + ... + >>> + >>> Pdescr = [ + ... ('x', 'i4', (2,)), + ... ('y', 'f8', (2, 2)), + ... ('z', 'u1')] + >>> + >>> + >>> PbufferT = [ + ... ([3,2], [[6.,4.],[6.,4.]], 8), + ... ([4,3], [[7.,5.],[7.,5.]], 9), + ... ] + >>> + >>> + >>> Ndescr = [ + ... ('x', 'i4', (2,)), + ... ('Info', [ + ... ('value', 'c16'), + ... ('y2', 'f8'), + ... ('Info2', [ + ... ('name', 'S2'), + ... ('value', 'c16', (2,)), + ... ('y3', 'f8', (2,)), + ... ('z3', 'u4', (2,))]), + ... ('name', 'S2'), + ... ('z2', 'b1')]), + ... ('color', 'S2'), + ... ('info', [ + ... ('Name', 'U8'), + ... ('Value', 'c16')]), + ... ('y', 'f8', (2, 2)), + ... ('z', 'u1')] + >>> + >>> + >>> NbufferT = [ + ... ([3,2], (6j, 6., ('nn', [6j,4j], [6.,4.], [1,2]), 'NN', True), 'cc', ('NN', 6j), [[6.,4.],[6.,4.]], 8), + ... ([4,3], (7j, 7., ('oo', [7j,5j], [7.,5.], [2,1]), 'OO', False), 'dd', ('OO', 7j), [[7.,5.],[7.,5.]], 9), + ... ] + >>> + >>> + >>> record_arrays = [ + ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')), + ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')), + ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')), + ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')), + ... ] + +Test the magic string writing. + + >>> format.magic(1, 0) + '\x93NUMPY\x01\x00' + >>> format.magic(0, 0) + '\x93NUMPY\x00\x00' + >>> format.magic(255, 255) + '\x93NUMPY\xff\xff' + >>> format.magic(2, 5) + '\x93NUMPY\x02\x05' + +Test the magic string reading. + + >>> format.read_magic(BytesIO(format.magic(1, 0))) + (1, 0) + >>> format.read_magic(BytesIO(format.magic(0, 0))) + (0, 0) + >>> format.read_magic(BytesIO(format.magic(255, 255))) + (255, 255) + >>> format.read_magic(BytesIO(format.magic(2, 5))) + (2, 5) + +Test the header writing. + + >>> for arr in basic_arrays + record_arrays: + ... f = BytesIO() + ... format.write_array_header_1_0(f, arr) # XXX: arr is not a dict, items gets called on it + ... print(repr(f.getvalue())) + ... + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<u2', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>u2', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<i2', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>i2', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<u4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>u4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<i4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>i4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<u8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>u8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<i8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>i8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<f4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>f4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<f8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>f8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<c8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>c8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<c16', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>c16', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': 'O', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': 'O', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 3)} \n" + "v\x00{'descr': [('x', '<i4', (2,)), ('y', '<f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n" + "\x16\x02{'descr': [('x', '<i4', (2,)),\n ('Info',\n [('value', '<c16'),\n ('y2', '<f8'),\n ('Info2',\n [('name', '|S2'),\n ('value', '<c16', (2,)),\n ('y3', '<f8', (2,)),\n ('z3', '<u4', (2,))]),\n ('name', '|S2'),\n ('z2', '|b1')]),\n ('color', '|S2'),\n ('info', [('Name', '<U8'), ('Value', '<c16')]),\n ('y', '<f8', (2, 2)),\n ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n" + "v\x00{'descr': [('x', '>i4', (2,)), ('y', '>f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n" + "\x16\x02{'descr': [('x', '>i4', (2,)),\n ('Info',\n [('value', '>c16'),\n ('y2', '>f8'),\n ('Info2',\n [('name', '|S2'),\n ('value', '>c16', (2,)),\n ('y3', '>f8', (2,)),\n ('z3', '>u4', (2,))]),\n ('name', '|S2'),\n ('z2', '|b1')]),\n ('color', '|S2'),\n ('info', [('Name', '>U8'), ('Value', '>c16')]),\n ('y', '>f8', (2, 2)),\n ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n" +''' +import sys +import os +import shutil +import tempfile +import warnings +import pytest +from io import BytesIO + +import numpy as np +from numpy.testing import ( + assert_, assert_array_equal, assert_raises, assert_raises_regex, + ) +from numpy.lib import format + + +tempdir = None + +# Module-level setup. + + +def setup_module(): + global tempdir + tempdir = tempfile.mkdtemp() + + +def teardown_module(): + global tempdir + if tempdir is not None and os.path.isdir(tempdir): + shutil.rmtree(tempdir) + tempdir = None + + +# Generate some basic arrays to test with. +scalars = [ + np.uint8, + np.int8, + np.uint16, + np.int16, + np.uint32, + np.int32, + np.uint64, + np.int64, + np.float32, + np.float64, + np.complex64, + np.complex128, + object, +] +basic_arrays = [] +for scalar in scalars: + for endian in '<>': + dtype = np.dtype(scalar).newbyteorder(endian) + basic = np.arange(1500).astype(dtype) + basic_arrays.extend([ + # Empty + np.array([], dtype=dtype), + # Rank-0 + np.array(10, dtype=dtype), + # 1-D + basic, + # 2-D C-contiguous + basic.reshape((30, 50)), + # 2-D F-contiguous + basic.reshape((30, 50)).T, + # 2-D non-contiguous + basic.reshape((30, 50))[::-1, ::2], + ]) + +# More complicated record arrays. +# This is the structure of the table used for plain objects: +# +# +-+-+-+ +# |x|y|z| +# +-+-+-+ + +# Structure of a plain array description: +Pdescr = [ + ('x', 'i4', (2,)), + ('y', 'f8', (2, 2)), + ('z', 'u1')] + +# A plain list of tuples with values for testing: +PbufferT = [ + # x y z + ([3, 2], [[6., 4.], [6., 4.]], 8), + ([4, 3], [[7., 5.], [7., 5.]], 9), + ] + + +# This is the structure of the table used for nested objects (DON'T PANIC!): +# +# +-+---------------------------------+-----+----------+-+-+ +# |x|Info |color|info |y|z| +# | +-----+--+----------------+----+--+ +----+-----+ | | +# | |value|y2|Info2 |name|z2| |Name|Value| | | +# | | | +----+-----+--+--+ | | | | | | | +# | | | |name|value|y3|z3| | | | | | | | +# +-+-----+--+----+-----+--+--+----+--+-----+----+-----+-+-+ +# + +# The corresponding nested array description: +Ndescr = [ + ('x', 'i4', (2,)), + ('Info', [ + ('value', 'c16'), + ('y2', 'f8'), + ('Info2', [ + ('name', 'S2'), + ('value', 'c16', (2,)), + ('y3', 'f8', (2,)), + ('z3', 'u4', (2,))]), + ('name', 'S2'), + ('z2', 'b1')]), + ('color', 'S2'), + ('info', [ + ('Name', 'U8'), + ('Value', 'c16')]), + ('y', 'f8', (2, 2)), + ('z', 'u1')] + +NbufferT = [ + # x Info color info y z + # value y2 Info2 name z2 Name Value + # name value y3 z3 + ([3, 2], (6j, 6., ('nn', [6j, 4j], [6., 4.], [1, 2]), 'NN', True), + 'cc', ('NN', 6j), [[6., 4.], [6., 4.]], 8), + ([4, 3], (7j, 7., ('oo', [7j, 5j], [7., 5.], [2, 1]), 'OO', False), + 'dd', ('OO', 7j), [[7., 5.], [7., 5.]], 9), + ] + +record_arrays = [ + np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')), + np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')), + np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')), + np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')), + np.zeros(1, dtype=[('c', ('<f8', (5,)), (2,))]) +] + + +#BytesIO that reads a random number of bytes at a time +class BytesIOSRandomSize(BytesIO): + def read(self, size=None): + import random + size = random.randint(1, size) + return super(BytesIOSRandomSize, self).read(size) + + +def roundtrip(arr): + f = BytesIO() + format.write_array(f, arr) + f2 = BytesIO(f.getvalue()) + arr2 = format.read_array(f2, allow_pickle=True) + return arr2 + + +def roundtrip_randsize(arr): + f = BytesIO() + format.write_array(f, arr) + f2 = BytesIOSRandomSize(f.getvalue()) + arr2 = format.read_array(f2) + return arr2 + + +def roundtrip_truncated(arr): + f = BytesIO() + format.write_array(f, arr) + #BytesIO is one byte short + f2 = BytesIO(f.getvalue()[0:-1]) + arr2 = format.read_array(f2) + return arr2 + + +def assert_equal_(o1, o2): + assert_(o1 == o2) + + +def test_roundtrip(): + for arr in basic_arrays + record_arrays: + arr2 = roundtrip(arr) + assert_array_equal(arr, arr2) + + +def test_roundtrip_randsize(): + for arr in basic_arrays + record_arrays: + if arr.dtype != object: + arr2 = roundtrip_randsize(arr) + assert_array_equal(arr, arr2) + + +def test_roundtrip_truncated(): + for arr in basic_arrays: + if arr.dtype != object: + assert_raises(ValueError, roundtrip_truncated, arr) + + +def test_long_str(): + # check items larger than internal buffer size, gh-4027 + long_str_arr = np.ones(1, dtype=np.dtype((str, format.BUFFER_SIZE + 1))) + long_str_arr2 = roundtrip(long_str_arr) + assert_array_equal(long_str_arr, long_str_arr2) + + +@pytest.mark.slow +def test_memmap_roundtrip(): + # Fixme: used to crash on windows + if not (sys.platform == 'win32' or sys.platform == 'cygwin'): + for arr in basic_arrays + record_arrays: + if arr.dtype.hasobject: + # Skip these since they can't be mmap'ed. + continue + # Write it out normally and through mmap. + nfn = os.path.join(tempdir, 'normal.npy') + mfn = os.path.join(tempdir, 'memmap.npy') + fp = open(nfn, 'wb') + try: + format.write_array(fp, arr) + finally: + fp.close() + + fortran_order = ( + arr.flags.f_contiguous and not arr.flags.c_contiguous) + ma = format.open_memmap(mfn, mode='w+', dtype=arr.dtype, + shape=arr.shape, fortran_order=fortran_order) + ma[...] = arr + del ma + + # Check that both of these files' contents are the same. + fp = open(nfn, 'rb') + normal_bytes = fp.read() + fp.close() + fp = open(mfn, 'rb') + memmap_bytes = fp.read() + fp.close() + assert_equal_(normal_bytes, memmap_bytes) + + # Check that reading the file using memmap works. + ma = format.open_memmap(nfn, mode='r') + del ma + + +def test_compressed_roundtrip(): + arr = np.random.rand(200, 200) + npz_file = os.path.join(tempdir, 'compressed.npz') + np.savez_compressed(npz_file, arr=arr) + arr1 = np.load(npz_file)['arr'] + assert_array_equal(arr, arr1) + + +# aligned +dt1 = np.dtype('i1, i4, i1', align=True) +# non-aligned, explicit offsets +dt2 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'], + 'offsets': [1, 6]}) +# nested struct-in-struct +dt3 = np.dtype({'names': ['c', 'd'], 'formats': ['i4', dt2]}) +# field with '' name +dt4 = np.dtype({'names': ['a', '', 'b'], 'formats': ['i4']*3}) +# titles +dt5 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'], + 'offsets': [1, 6], 'titles': ['aa', 'bb']}) + +@pytest.mark.parametrize("dt", [dt1, dt2, dt3, dt4, dt5]) +def test_load_padded_dtype(dt): + arr = np.zeros(3, dt) + for i in range(3): + arr[i] = i + 5 + npz_file = os.path.join(tempdir, 'aligned.npz') + np.savez(npz_file, arr=arr) + arr1 = np.load(npz_file)['arr'] + assert_array_equal(arr, arr1) + + +def test_python2_python3_interoperability(): + if sys.version_info[0] >= 3: + fname = 'win64python2.npy' + else: + fname = 'python3.npy' + path = os.path.join(os.path.dirname(__file__), 'data', fname) + data = np.load(path) + assert_array_equal(data, np.ones(2)) + +def test_pickle_python2_python3(): + # Test that loading object arrays saved on Python 2 works both on + # Python 2 and Python 3 and vice versa + data_dir = os.path.join(os.path.dirname(__file__), 'data') + + if sys.version_info[0] >= 3: + xrange = range + else: + import __builtin__ + xrange = __builtin__.xrange + + expected = np.array([None, xrange, u'\u512a\u826f', + b'\xe4\xb8\x8d\xe8\x89\xaf'], + dtype=object) + + for fname in ['py2-objarr.npy', 'py2-objarr.npz', + 'py3-objarr.npy', 'py3-objarr.npz']: + path = os.path.join(data_dir, fname) + + for encoding in ['bytes', 'latin1']: + data_f = np.load(path, allow_pickle=True, encoding=encoding) + if fname.endswith('.npz'): + data = data_f['x'] + data_f.close() + else: + data = data_f + + if sys.version_info[0] >= 3: + if encoding == 'latin1' and fname.startswith('py2'): + assert_(isinstance(data[3], str)) + assert_array_equal(data[:-1], expected[:-1]) + # mojibake occurs + assert_array_equal(data[-1].encode(encoding), expected[-1]) + else: + assert_(isinstance(data[3], bytes)) + assert_array_equal(data, expected) + else: + assert_array_equal(data, expected) + + if sys.version_info[0] >= 3: + if fname.startswith('py2'): + if fname.endswith('.npz'): + data = np.load(path, allow_pickle=True) + assert_raises(UnicodeError, data.__getitem__, 'x') + data.close() + data = np.load(path, allow_pickle=True, fix_imports=False, + encoding='latin1') + assert_raises(ImportError, data.__getitem__, 'x') + data.close() + else: + assert_raises(UnicodeError, np.load, path, + allow_pickle=True) + assert_raises(ImportError, np.load, path, + allow_pickle=True, fix_imports=False, + encoding='latin1') + + +def test_pickle_disallow(): + data_dir = os.path.join(os.path.dirname(__file__), 'data') + + path = os.path.join(data_dir, 'py2-objarr.npy') + assert_raises(ValueError, np.load, path, + allow_pickle=False, encoding='latin1') + + path = os.path.join(data_dir, 'py2-objarr.npz') + f = np.load(path, allow_pickle=False, encoding='latin1') + assert_raises(ValueError, f.__getitem__, 'x') + + path = os.path.join(tempdir, 'pickle-disabled.npy') + assert_raises(ValueError, np.save, path, np.array([None], dtype=object), + allow_pickle=False) + +@pytest.mark.parametrize('dt', [ + np.dtype(np.dtype([('a', np.int8), + ('b', np.int16), + ('c', np.int32), + ], align=True), + (3,)), + np.dtype([('x', np.dtype({'names':['a','b'], + 'formats':['i1','i1'], + 'offsets':[0,4], + 'itemsize':8, + }, + (3,)), + (4,), + )]), + np.dtype([('x', + ('<f8', (5,)), + (2,), + )]), + np.dtype([('x', np.dtype(( + np.dtype(( + np.dtype({'names':['a','b'], + 'formats':['i1','i1'], + 'offsets':[0,4], + 'itemsize':8}), + (3,) + )), + (4,) + ))) + ]), + np.dtype([ + ('a', np.dtype(( + np.dtype(( + np.dtype(( + np.dtype([ + ('a', int), + ('b', np.dtype({'names':['a','b'], + 'formats':['i1','i1'], + 'offsets':[0,4], + 'itemsize':8})), + ]), + (3,), + )), + (4,), + )), + (5,), + ))) + ]), + ]) + +def test_descr_to_dtype(dt): + dt1 = format.descr_to_dtype(dt.descr) + assert_equal_(dt1, dt) + arr1 = np.zeros(3, dt) + arr2 = roundtrip(arr1) + assert_array_equal(arr1, arr2) + +def test_version_2_0(): + f = BytesIO() + # requires more than 2 byte for header + dt = [(("%d" % i) * 100, float) for i in range(500)] + d = np.ones(1000, dtype=dt) + + format.write_array(f, d, version=(2, 0)) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', UserWarning) + format.write_array(f, d) + assert_(w[0].category is UserWarning) + + # check alignment of data portion + f.seek(0) + header = f.readline() + assert_(len(header) % format.ARRAY_ALIGN == 0) + + f.seek(0) + n = format.read_array(f) + assert_array_equal(d, n) + + # 1.0 requested but data cannot be saved this way + assert_raises(ValueError, format.write_array, f, d, (1, 0)) + + +@pytest.mark.slow +def test_version_2_0_memmap(): + # requires more than 2 byte for header + dt = [(("%d" % i) * 100, float) for i in range(500)] + d = np.ones(1000, dtype=dt) + tf = tempfile.mktemp('', 'mmap', dir=tempdir) + + # 1.0 requested but data cannot be saved this way + assert_raises(ValueError, format.open_memmap, tf, mode='w+', dtype=d.dtype, + shape=d.shape, version=(1, 0)) + + ma = format.open_memmap(tf, mode='w+', dtype=d.dtype, + shape=d.shape, version=(2, 0)) + ma[...] = d + del ma + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', UserWarning) + ma = format.open_memmap(tf, mode='w+', dtype=d.dtype, + shape=d.shape, version=None) + assert_(w[0].category is UserWarning) + ma[...] = d + del ma + + ma = format.open_memmap(tf, mode='r') + assert_array_equal(ma, d) + + +def test_write_version(): + f = BytesIO() + arr = np.arange(1) + # These should pass. + format.write_array(f, arr, version=(1, 0)) + format.write_array(f, arr) + + format.write_array(f, arr, version=None) + format.write_array(f, arr) + + format.write_array(f, arr, version=(2, 0)) + format.write_array(f, arr) + + # These should all fail. + bad_versions = [ + (1, 1), + (0, 0), + (0, 1), + (2, 2), + (255, 255), + ] + for version in bad_versions: + with assert_raises_regex(ValueError, + 'we only support format version.*'): + format.write_array(f, arr, version=version) + + +bad_version_magic = [ + b'\x93NUMPY\x01\x01', + b'\x93NUMPY\x00\x00', + b'\x93NUMPY\x00\x01', + b'\x93NUMPY\x02\x00', + b'\x93NUMPY\x02\x02', + b'\x93NUMPY\xff\xff', +] +malformed_magic = [ + b'\x92NUMPY\x01\x00', + b'\x00NUMPY\x01\x00', + b'\x93numpy\x01\x00', + b'\x93MATLB\x01\x00', + b'\x93NUMPY\x01', + b'\x93NUMPY', + b'', +] + +def test_read_magic(): + s1 = BytesIO() + s2 = BytesIO() + + arr = np.ones((3, 6), dtype=float) + + format.write_array(s1, arr, version=(1, 0)) + format.write_array(s2, arr, version=(2, 0)) + + s1.seek(0) + s2.seek(0) + + version1 = format.read_magic(s1) + version2 = format.read_magic(s2) + + assert_(version1 == (1, 0)) + assert_(version2 == (2, 0)) + + assert_(s1.tell() == format.MAGIC_LEN) + assert_(s2.tell() == format.MAGIC_LEN) + +def test_read_magic_bad_magic(): + for magic in malformed_magic: + f = BytesIO(magic) + assert_raises(ValueError, format.read_array, f) + + +def test_read_version_1_0_bad_magic(): + for magic in bad_version_magic + malformed_magic: + f = BytesIO(magic) + assert_raises(ValueError, format.read_array, f) + + +def test_bad_magic_args(): + assert_raises(ValueError, format.magic, -1, 1) + assert_raises(ValueError, format.magic, 256, 1) + assert_raises(ValueError, format.magic, 1, -1) + assert_raises(ValueError, format.magic, 1, 256) + + +def test_large_header(): + s = BytesIO() + d = {'a': 1, 'b': 2} + format.write_array_header_1_0(s, d) + + s = BytesIO() + d = {'a': 1, 'b': 2, 'c': 'x'*256*256} + assert_raises(ValueError, format.write_array_header_1_0, s, d) + + +def test_read_array_header_1_0(): + s = BytesIO() + + arr = np.ones((3, 6), dtype=float) + format.write_array(s, arr, version=(1, 0)) + + s.seek(format.MAGIC_LEN) + shape, fortran, dtype = format.read_array_header_1_0(s) + + assert_(s.tell() % format.ARRAY_ALIGN == 0) + assert_((shape, fortran, dtype) == ((3, 6), False, float)) + + +def test_read_array_header_2_0(): + s = BytesIO() + + arr = np.ones((3, 6), dtype=float) + format.write_array(s, arr, version=(2, 0)) + + s.seek(format.MAGIC_LEN) + shape, fortran, dtype = format.read_array_header_2_0(s) + + assert_(s.tell() % format.ARRAY_ALIGN == 0) + assert_((shape, fortran, dtype) == ((3, 6), False, float)) + + +def test_bad_header(): + # header of length less than 2 should fail + s = BytesIO() + assert_raises(ValueError, format.read_array_header_1_0, s) + s = BytesIO(b'1') + assert_raises(ValueError, format.read_array_header_1_0, s) + + # header shorter than indicated size should fail + s = BytesIO(b'\x01\x00') + assert_raises(ValueError, format.read_array_header_1_0, s) + + # headers without the exact keys required should fail + d = {"shape": (1, 2), + "descr": "x"} + s = BytesIO() + format.write_array_header_1_0(s, d) + assert_raises(ValueError, format.read_array_header_1_0, s) + + d = {"shape": (1, 2), + "fortran_order": False, + "descr": "x", + "extrakey": -1} + s = BytesIO() + format.write_array_header_1_0(s, d) + assert_raises(ValueError, format.read_array_header_1_0, s) + + +def test_large_file_support(): + if (sys.platform == 'win32' or sys.platform == 'cygwin'): + pytest.skip("Unknown if Windows has sparse filesystems") + # try creating a large sparse file + tf_name = os.path.join(tempdir, 'sparse_file') + try: + # seek past end would work too, but linux truncate somewhat + # increases the chances that we have a sparse filesystem and can + # avoid actually writing 5GB + import subprocess as sp + sp.check_call(["truncate", "-s", "5368709120", tf_name]) + except Exception: + pytest.skip("Could not create 5GB large file") + # write a small array to the end + with open(tf_name, "wb") as f: + f.seek(5368709120) + d = np.arange(5) + np.save(f, d) + # read it back + with open(tf_name, "rb") as f: + f.seek(5368709120) + r = np.load(f) + assert_array_equal(r, d) + + +@pytest.mark.skipif(np.dtype(np.intp).itemsize < 8, + reason="test requires 64-bit system") +@pytest.mark.slow +def test_large_archive(): + # Regression test for product of saving arrays with dimensions of array + # having a product that doesn't fit in int32. See gh-7598 for details. + try: + a = np.empty((2**30, 2), dtype=np.uint8) + except MemoryError: + pytest.skip("Could not create large file") + + fname = os.path.join(tempdir, "large_archive") + + with open(fname, "wb") as f: + np.savez(f, arr=a) + + with open(fname, "rb") as f: + new_a = np.load(f)["arr"] + + assert_(a.shape == new_a.shape) + + +def test_empty_npz(): + # Test for gh-9989 + fname = os.path.join(tempdir, "nothing.npz") + np.savez(fname) + np.load(fname) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_function_base.py b/contrib/python/numpy/py2/numpy/lib/tests/test_function_base.py new file mode 100644 index 0000000000..088ca2baeb --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_function_base.py @@ -0,0 +1,3141 @@ +from __future__ import division, absolute_import, print_function + +import operator +import warnings +import sys +import decimal +import pytest + +import numpy as np +from numpy import ma +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_almost_equal, + assert_array_almost_equal, assert_raises, assert_allclose, IS_PYPY, + assert_warns, assert_raises_regex, suppress_warnings, HAS_REFCOUNT, + ) +import numpy.lib.function_base as nfb +from numpy.random import rand +from numpy.lib import ( + add_newdoc_ufunc, angle, average, bartlett, blackman, corrcoef, cov, + delete, diff, digitize, extract, flipud, gradient, hamming, hanning, + i0, insert, interp, kaiser, meshgrid, msort, piecewise, place, rot90, + select, setxor1d, sinc, trapz, trim_zeros, unwrap, unique, vectorize + ) + +from numpy.compat import long + + +def get_mat(n): + data = np.arange(n) + data = np.add.outer(data, data) + return data + + +class TestRot90(object): + def test_basic(self): + assert_raises(ValueError, rot90, np.ones(4)) + assert_raises(ValueError, rot90, np.ones((2,2,2)), axes=(0,1,2)) + assert_raises(ValueError, rot90, np.ones((2,2)), axes=(0,2)) + assert_raises(ValueError, rot90, np.ones((2,2)), axes=(1,1)) + assert_raises(ValueError, rot90, np.ones((2,2,2)), axes=(-2,1)) + + a = [[0, 1, 2], + [3, 4, 5]] + b1 = [[2, 5], + [1, 4], + [0, 3]] + b2 = [[5, 4, 3], + [2, 1, 0]] + b3 = [[3, 0], + [4, 1], + [5, 2]] + b4 = [[0, 1, 2], + [3, 4, 5]] + + for k in range(-3, 13, 4): + assert_equal(rot90(a, k=k), b1) + for k in range(-2, 13, 4): + assert_equal(rot90(a, k=k), b2) + for k in range(-1, 13, 4): + assert_equal(rot90(a, k=k), b3) + for k in range(0, 13, 4): + assert_equal(rot90(a, k=k), b4) + + assert_equal(rot90(rot90(a, axes=(0,1)), axes=(1,0)), a) + assert_equal(rot90(a, k=1, axes=(1,0)), rot90(a, k=-1, axes=(0,1))) + + def test_axes(self): + a = np.ones((50, 40, 3)) + assert_equal(rot90(a).shape, (40, 50, 3)) + assert_equal(rot90(a, axes=(0,2)), rot90(a, axes=(0,-1))) + assert_equal(rot90(a, axes=(1,2)), rot90(a, axes=(-2,-1))) + + def test_rotation_axes(self): + a = np.arange(8).reshape((2,2,2)) + + a_rot90_01 = [[[2, 3], + [6, 7]], + [[0, 1], + [4, 5]]] + a_rot90_12 = [[[1, 3], + [0, 2]], + [[5, 7], + [4, 6]]] + a_rot90_20 = [[[4, 0], + [6, 2]], + [[5, 1], + [7, 3]]] + a_rot90_10 = [[[4, 5], + [0, 1]], + [[6, 7], + [2, 3]]] + + assert_equal(rot90(a, axes=(0, 1)), a_rot90_01) + assert_equal(rot90(a, axes=(1, 0)), a_rot90_10) + assert_equal(rot90(a, axes=(1, 2)), a_rot90_12) + + for k in range(1,5): + assert_equal(rot90(a, k=k, axes=(2, 0)), + rot90(a_rot90_20, k=k-1, axes=(2, 0))) + + +class TestFlip(object): + + def test_axes(self): + assert_raises(np.AxisError, np.flip, np.ones(4), axis=1) + assert_raises(np.AxisError, np.flip, np.ones((4, 4)), axis=2) + assert_raises(np.AxisError, np.flip, np.ones((4, 4)), axis=-3) + assert_raises(np.AxisError, np.flip, np.ones((4, 4)), axis=(0, 3)) + + def test_basic_lr(self): + a = get_mat(4) + b = a[:, ::-1] + assert_equal(np.flip(a, 1), b) + a = [[0, 1, 2], + [3, 4, 5]] + b = [[2, 1, 0], + [5, 4, 3]] + assert_equal(np.flip(a, 1), b) + + def test_basic_ud(self): + a = get_mat(4) + b = a[::-1, :] + assert_equal(np.flip(a, 0), b) + a = [[0, 1, 2], + [3, 4, 5]] + b = [[3, 4, 5], + [0, 1, 2]] + assert_equal(np.flip(a, 0), b) + + def test_3d_swap_axis0(self): + a = np.array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + + b = np.array([[[4, 5], + [6, 7]], + [[0, 1], + [2, 3]]]) + + assert_equal(np.flip(a, 0), b) + + def test_3d_swap_axis1(self): + a = np.array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + + b = np.array([[[2, 3], + [0, 1]], + [[6, 7], + [4, 5]]]) + + assert_equal(np.flip(a, 1), b) + + def test_3d_swap_axis2(self): + a = np.array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + + b = np.array([[[1, 0], + [3, 2]], + [[5, 4], + [7, 6]]]) + + assert_equal(np.flip(a, 2), b) + + def test_4d(self): + a = np.arange(2 * 3 * 4 * 5).reshape(2, 3, 4, 5) + for i in range(a.ndim): + assert_equal(np.flip(a, i), + np.flipud(a.swapaxes(0, i)).swapaxes(i, 0)) + + def test_default_axis(self): + a = np.array([[1, 2, 3], + [4, 5, 6]]) + b = np.array([[6, 5, 4], + [3, 2, 1]]) + assert_equal(np.flip(a), b) + + def test_multiple_axes(self): + a = np.array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + + assert_equal(np.flip(a, axis=()), a) + + b = np.array([[[5, 4], + [7, 6]], + [[1, 0], + [3, 2]]]) + + assert_equal(np.flip(a, axis=(0, 2)), b) + + c = np.array([[[3, 2], + [1, 0]], + [[7, 6], + [5, 4]]]) + + assert_equal(np.flip(a, axis=(1, 2)), c) + + +class TestAny(object): + + def test_basic(self): + y1 = [0, 0, 1, 0] + y2 = [0, 0, 0, 0] + y3 = [1, 0, 1, 0] + assert_(np.any(y1)) + assert_(np.any(y3)) + assert_(not np.any(y2)) + + def test_nd(self): + y1 = [[0, 0, 0], [0, 1, 0], [1, 1, 0]] + assert_(np.any(y1)) + assert_array_equal(np.sometrue(y1, axis=0), [1, 1, 0]) + assert_array_equal(np.sometrue(y1, axis=1), [0, 1, 1]) + + +class TestAll(object): + + def test_basic(self): + y1 = [0, 1, 1, 0] + y2 = [0, 0, 0, 0] + y3 = [1, 1, 1, 1] + assert_(not np.all(y1)) + assert_(np.all(y3)) + assert_(not np.all(y2)) + assert_(np.all(~np.array(y2))) + + def test_nd(self): + y1 = [[0, 0, 1], [0, 1, 1], [1, 1, 1]] + assert_(not np.all(y1)) + assert_array_equal(np.alltrue(y1, axis=0), [0, 0, 1]) + assert_array_equal(np.alltrue(y1, axis=1), [0, 0, 1]) + + +class TestCopy(object): + + def test_basic(self): + a = np.array([[1, 2], [3, 4]]) + a_copy = np.copy(a) + assert_array_equal(a, a_copy) + a_copy[0, 0] = 10 + assert_equal(a[0, 0], 1) + assert_equal(a_copy[0, 0], 10) + + def test_order(self): + # It turns out that people rely on np.copy() preserving order by + # default; changing this broke scikit-learn: + # github.com/scikit-learn/scikit-learn/commit/7842748cf777412c506a8c0ed28090711d3a3783 # noqa + a = np.array([[1, 2], [3, 4]]) + assert_(a.flags.c_contiguous) + assert_(not a.flags.f_contiguous) + a_fort = np.array([[1, 2], [3, 4]], order="F") + assert_(not a_fort.flags.c_contiguous) + assert_(a_fort.flags.f_contiguous) + a_copy = np.copy(a) + assert_(a_copy.flags.c_contiguous) + assert_(not a_copy.flags.f_contiguous) + a_fort_copy = np.copy(a_fort) + assert_(not a_fort_copy.flags.c_contiguous) + assert_(a_fort_copy.flags.f_contiguous) + + +class TestAverage(object): + + def test_basic(self): + y1 = np.array([1, 2, 3]) + assert_(average(y1, axis=0) == 2.) + y2 = np.array([1., 2., 3.]) + assert_(average(y2, axis=0) == 2.) + y3 = [0., 0., 0.] + assert_(average(y3, axis=0) == 0.) + + y4 = np.ones((4, 4)) + y4[0, 1] = 0 + y4[1, 0] = 2 + assert_almost_equal(y4.mean(0), average(y4, 0)) + assert_almost_equal(y4.mean(1), average(y4, 1)) + + y5 = rand(5, 5) + assert_almost_equal(y5.mean(0), average(y5, 0)) + assert_almost_equal(y5.mean(1), average(y5, 1)) + + def test_weights(self): + y = np.arange(10) + w = np.arange(10) + actual = average(y, weights=w) + desired = (np.arange(10) ** 2).sum() * 1. / np.arange(10).sum() + assert_almost_equal(actual, desired) + + y1 = np.array([[1, 2, 3], [4, 5, 6]]) + w0 = [1, 2] + actual = average(y1, weights=w0, axis=0) + desired = np.array([3., 4., 5.]) + assert_almost_equal(actual, desired) + + w1 = [0, 0, 1] + actual = average(y1, weights=w1, axis=1) + desired = np.array([3., 6.]) + assert_almost_equal(actual, desired) + + # This should raise an error. Can we test for that ? + # assert_equal(average(y1, weights=w1), 9./2.) + + # 2D Case + w2 = [[0, 0, 1], [0, 0, 2]] + desired = np.array([3., 6.]) + assert_array_equal(average(y1, weights=w2, axis=1), desired) + assert_equal(average(y1, weights=w2), 5.) + + y3 = rand(5).astype(np.float32) + w3 = rand(5).astype(np.float64) + + assert_(np.average(y3, weights=w3).dtype == np.result_type(y3, w3)) + + def test_returned(self): + y = np.array([[1, 2, 3], [4, 5, 6]]) + + # No weights + avg, scl = average(y, returned=True) + assert_equal(scl, 6.) + + avg, scl = average(y, 0, returned=True) + assert_array_equal(scl, np.array([2., 2., 2.])) + + avg, scl = average(y, 1, returned=True) + assert_array_equal(scl, np.array([3., 3.])) + + # With weights + w0 = [1, 2] + avg, scl = average(y, weights=w0, axis=0, returned=True) + assert_array_equal(scl, np.array([3., 3., 3.])) + + w1 = [1, 2, 3] + avg, scl = average(y, weights=w1, axis=1, returned=True) + assert_array_equal(scl, np.array([6., 6.])) + + w2 = [[0, 0, 1], [1, 2, 3]] + avg, scl = average(y, weights=w2, axis=1, returned=True) + assert_array_equal(scl, np.array([1., 6.])) + + def test_subclasses(self): + class subclass(np.ndarray): + pass + a = np.array([[1,2],[3,4]]).view(subclass) + w = np.array([[1,2],[3,4]]).view(subclass) + + assert_equal(type(np.average(a)), subclass) + assert_equal(type(np.average(a, weights=w)), subclass) + + def test_upcasting(self): + types = [('i4', 'i4', 'f8'), ('i4', 'f4', 'f8'), ('f4', 'i4', 'f8'), + ('f4', 'f4', 'f4'), ('f4', 'f8', 'f8')] + for at, wt, rt in types: + a = np.array([[1,2],[3,4]], dtype=at) + w = np.array([[1,2],[3,4]], dtype=wt) + assert_equal(np.average(a, weights=w).dtype, np.dtype(rt)) + + def test_object_dtype(self): + a = np.array([decimal.Decimal(x) for x in range(10)]) + w = np.array([decimal.Decimal(1) for _ in range(10)]) + w /= w.sum() + assert_almost_equal(a.mean(0), average(a, weights=w)) + +class TestSelect(object): + choices = [np.array([1, 2, 3]), + np.array([4, 5, 6]), + np.array([7, 8, 9])] + conditions = [np.array([False, False, False]), + np.array([False, True, False]), + np.array([False, False, True])] + + def _select(self, cond, values, default=0): + output = [] + for m in range(len(cond)): + output += [V[m] for V, C in zip(values, cond) if C[m]] or [default] + return output + + def test_basic(self): + choices = self.choices + conditions = self.conditions + assert_array_equal(select(conditions, choices, default=15), + self._select(conditions, choices, default=15)) + + assert_equal(len(choices), 3) + assert_equal(len(conditions), 3) + + def test_broadcasting(self): + conditions = [np.array(True), np.array([False, True, False])] + choices = [1, np.arange(12).reshape(4, 3)] + assert_array_equal(select(conditions, choices), np.ones((4, 3))) + # default can broadcast too: + assert_equal(select([True], [0], default=[0]).shape, (1,)) + + def test_return_dtype(self): + assert_equal(select(self.conditions, self.choices, 1j).dtype, + np.complex_) + # But the conditions need to be stronger then the scalar default + # if it is scalar. + choices = [choice.astype(np.int8) for choice in self.choices] + assert_equal(select(self.conditions, choices).dtype, np.int8) + + d = np.array([1, 2, 3, np.nan, 5, 7]) + m = np.isnan(d) + assert_equal(select([m], [d]), [0, 0, 0, np.nan, 0, 0]) + + def test_deprecated_empty(self): + with warnings.catch_warnings(record=True): + warnings.simplefilter("always") + assert_equal(select([], [], 3j), 3j) + + with warnings.catch_warnings(): + warnings.simplefilter("always") + assert_warns(DeprecationWarning, select, [], []) + warnings.simplefilter("error") + assert_raises(DeprecationWarning, select, [], []) + + def test_non_bool_deprecation(self): + choices = self.choices + conditions = self.conditions[:] + with warnings.catch_warnings(): + warnings.filterwarnings("always") + conditions[0] = conditions[0].astype(np.int_) + assert_warns(DeprecationWarning, select, conditions, choices) + conditions[0] = conditions[0].astype(np.uint8) + assert_warns(DeprecationWarning, select, conditions, choices) + warnings.filterwarnings("error") + assert_raises(DeprecationWarning, select, conditions, choices) + + def test_many_arguments(self): + # This used to be limited by NPY_MAXARGS == 32 + conditions = [np.array([False])] * 100 + choices = [np.array([1])] * 100 + select(conditions, choices) + + +class TestInsert(object): + + def test_basic(self): + a = [1, 2, 3] + assert_equal(insert(a, 0, 1), [1, 1, 2, 3]) + assert_equal(insert(a, 3, 1), [1, 2, 3, 1]) + assert_equal(insert(a, [1, 1, 1], [1, 2, 3]), [1, 1, 2, 3, 2, 3]) + assert_equal(insert(a, 1, [1, 2, 3]), [1, 1, 2, 3, 2, 3]) + assert_equal(insert(a, [1, -1, 3], 9), [1, 9, 2, 9, 3, 9]) + assert_equal(insert(a, slice(-1, None, -1), 9), [9, 1, 9, 2, 9, 3]) + assert_equal(insert(a, [-1, 1, 3], [7, 8, 9]), [1, 8, 2, 7, 3, 9]) + b = np.array([0, 1], dtype=np.float64) + assert_equal(insert(b, 0, b[0]), [0., 0., 1.]) + assert_equal(insert(b, [], []), b) + # Bools will be treated differently in the future: + # assert_equal(insert(a, np.array([True]*4), 9), [9, 1, 9, 2, 9, 3, 9]) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', FutureWarning) + assert_equal( + insert(a, np.array([True] * 4), 9), [1, 9, 9, 9, 9, 2, 3]) + assert_(w[0].category is FutureWarning) + + def test_multidim(self): + a = [[1, 1, 1]] + r = [[2, 2, 2], + [1, 1, 1]] + assert_equal(insert(a, 0, [1]), [1, 1, 1, 1]) + assert_equal(insert(a, 0, [2, 2, 2], axis=0), r) + assert_equal(insert(a, 0, 2, axis=0), r) + assert_equal(insert(a, 2, 2, axis=1), [[1, 1, 2, 1]]) + + a = np.array([[1, 1], [2, 2], [3, 3]]) + b = np.arange(1, 4).repeat(3).reshape(3, 3) + c = np.concatenate( + (a[:, 0:1], np.arange(1, 4).repeat(3).reshape(3, 3).T, + a[:, 1:2]), axis=1) + assert_equal(insert(a, [1], [[1], [2], [3]], axis=1), b) + assert_equal(insert(a, [1], [1, 2, 3], axis=1), c) + # scalars behave differently, in this case exactly opposite: + assert_equal(insert(a, 1, [1, 2, 3], axis=1), b) + assert_equal(insert(a, 1, [[1], [2], [3]], axis=1), c) + + a = np.arange(4).reshape(2, 2) + assert_equal(insert(a[:, :1], 1, a[:, 1], axis=1), a) + assert_equal(insert(a[:1,:], 1, a[1,:], axis=0), a) + + # negative axis value + a = np.arange(24).reshape((2, 3, 4)) + assert_equal(insert(a, 1, a[:,:, 3], axis=-1), + insert(a, 1, a[:,:, 3], axis=2)) + assert_equal(insert(a, 1, a[:, 2,:], axis=-2), + insert(a, 1, a[:, 2,:], axis=1)) + + # invalid axis value + assert_raises(np.AxisError, insert, a, 1, a[:, 2, :], axis=3) + assert_raises(np.AxisError, insert, a, 1, a[:, 2, :], axis=-4) + + # negative axis value + a = np.arange(24).reshape((2, 3, 4)) + assert_equal(insert(a, 1, a[:, :, 3], axis=-1), + insert(a, 1, a[:, :, 3], axis=2)) + assert_equal(insert(a, 1, a[:, 2, :], axis=-2), + insert(a, 1, a[:, 2, :], axis=1)) + + def test_0d(self): + # This is an error in the future + a = np.array(1) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', DeprecationWarning) + assert_equal(insert(a, [], 2, axis=0), np.array(2)) + assert_(w[0].category is DeprecationWarning) + + def test_subclass(self): + class SubClass(np.ndarray): + pass + a = np.arange(10).view(SubClass) + assert_(isinstance(np.insert(a, 0, [0]), SubClass)) + assert_(isinstance(np.insert(a, [], []), SubClass)) + assert_(isinstance(np.insert(a, [0, 1], [1, 2]), SubClass)) + assert_(isinstance(np.insert(a, slice(1, 2), [1, 2]), SubClass)) + assert_(isinstance(np.insert(a, slice(1, -2, -1), []), SubClass)) + # This is an error in the future: + a = np.array(1).view(SubClass) + assert_(isinstance(np.insert(a, 0, [0]), SubClass)) + + def test_index_array_copied(self): + x = np.array([1, 1, 1]) + np.insert([0, 1, 2], x, [3, 4, 5]) + assert_equal(x, np.array([1, 1, 1])) + + def test_structured_array(self): + a = np.array([(1, 'a'), (2, 'b'), (3, 'c')], + dtype=[('foo', 'i'), ('bar', 'a1')]) + val = (4, 'd') + b = np.insert(a, 0, val) + assert_array_equal(b[0], np.array(val, dtype=b.dtype)) + val = [(4, 'd')] * 2 + b = np.insert(a, [0, 2], val) + assert_array_equal(b[[0, 3]], np.array(val, dtype=b.dtype)) + + +class TestAmax(object): + + def test_basic(self): + a = [3, 4, 5, 10, -3, -5, 6.0] + assert_equal(np.amax(a), 10.0) + b = [[3, 6.0, 9.0], + [4, 10.0, 5.0], + [8, 3.0, 2.0]] + assert_equal(np.amax(b, axis=0), [8.0, 10.0, 9.0]) + assert_equal(np.amax(b, axis=1), [9.0, 10.0, 8.0]) + + +class TestAmin(object): + + def test_basic(self): + a = [3, 4, 5, 10, -3, -5, 6.0] + assert_equal(np.amin(a), -5.0) + b = [[3, 6.0, 9.0], + [4, 10.0, 5.0], + [8, 3.0, 2.0]] + assert_equal(np.amin(b, axis=0), [3.0, 3.0, 2.0]) + assert_equal(np.amin(b, axis=1), [3.0, 4.0, 2.0]) + + +class TestPtp(object): + + def test_basic(self): + a = np.array([3, 4, 5, 10, -3, -5, 6.0]) + assert_equal(a.ptp(axis=0), 15.0) + b = np.array([[3, 6.0, 9.0], + [4, 10.0, 5.0], + [8, 3.0, 2.0]]) + assert_equal(b.ptp(axis=0), [5.0, 7.0, 7.0]) + assert_equal(b.ptp(axis=-1), [6.0, 6.0, 6.0]) + + assert_equal(b.ptp(axis=0, keepdims=True), [[5.0, 7.0, 7.0]]) + assert_equal(b.ptp(axis=(0,1), keepdims=True), [[8.0]]) + + +class TestCumsum(object): + + def test_basic(self): + ba = [1, 2, 10, 11, 6, 5, 4] + ba2 = [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]] + for ctype in [np.int8, np.uint8, np.int16, np.uint16, np.int32, + np.uint32, np.float32, np.float64, np.complex64, + np.complex128]: + a = np.array(ba, ctype) + a2 = np.array(ba2, ctype) + + tgt = np.array([1, 3, 13, 24, 30, 35, 39], ctype) + assert_array_equal(np.cumsum(a, axis=0), tgt) + + tgt = np.array( + [[1, 2, 3, 4], [6, 8, 10, 13], [16, 11, 14, 18]], ctype) + assert_array_equal(np.cumsum(a2, axis=0), tgt) + + tgt = np.array( + [[1, 3, 6, 10], [5, 11, 18, 27], [10, 13, 17, 22]], ctype) + assert_array_equal(np.cumsum(a2, axis=1), tgt) + + +class TestProd(object): + + def test_basic(self): + ba = [1, 2, 10, 11, 6, 5, 4] + ba2 = [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]] + for ctype in [np.int16, np.uint16, np.int32, np.uint32, + np.float32, np.float64, np.complex64, np.complex128]: + a = np.array(ba, ctype) + a2 = np.array(ba2, ctype) + if ctype in ['1', 'b']: + assert_raises(ArithmeticError, np.prod, a) + assert_raises(ArithmeticError, np.prod, a2, 1) + else: + assert_equal(a.prod(axis=0), 26400) + assert_array_equal(a2.prod(axis=0), + np.array([50, 36, 84, 180], ctype)) + assert_array_equal(a2.prod(axis=-1), + np.array([24, 1890, 600], ctype)) + + +class TestCumprod(object): + + def test_basic(self): + ba = [1, 2, 10, 11, 6, 5, 4] + ba2 = [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]] + for ctype in [np.int16, np.uint16, np.int32, np.uint32, + np.float32, np.float64, np.complex64, np.complex128]: + a = np.array(ba, ctype) + a2 = np.array(ba2, ctype) + if ctype in ['1', 'b']: + assert_raises(ArithmeticError, np.cumprod, a) + assert_raises(ArithmeticError, np.cumprod, a2, 1) + assert_raises(ArithmeticError, np.cumprod, a) + else: + assert_array_equal(np.cumprod(a, axis=-1), + np.array([1, 2, 20, 220, + 1320, 6600, 26400], ctype)) + assert_array_equal(np.cumprod(a2, axis=0), + np.array([[1, 2, 3, 4], + [5, 12, 21, 36], + [50, 36, 84, 180]], ctype)) + assert_array_equal(np.cumprod(a2, axis=-1), + np.array([[1, 2, 6, 24], + [5, 30, 210, 1890], + [10, 30, 120, 600]], ctype)) + + +class TestDiff(object): + + def test_basic(self): + x = [1, 4, 6, 7, 12] + out = np.array([3, 2, 1, 5]) + out2 = np.array([-1, -1, 4]) + out3 = np.array([0, 5]) + assert_array_equal(diff(x), out) + assert_array_equal(diff(x, n=2), out2) + assert_array_equal(diff(x, n=3), out3) + + x = [1.1, 2.2, 3.0, -0.2, -0.1] + out = np.array([1.1, 0.8, -3.2, 0.1]) + assert_almost_equal(diff(x), out) + + x = [True, True, False, False] + out = np.array([False, True, False]) + out2 = np.array([True, True]) + assert_array_equal(diff(x), out) + assert_array_equal(diff(x, n=2), out2) + + def test_axis(self): + x = np.zeros((10, 20, 30)) + x[:, 1::2, :] = 1 + exp = np.ones((10, 19, 30)) + exp[:, 1::2, :] = -1 + assert_array_equal(diff(x), np.zeros((10, 20, 29))) + assert_array_equal(diff(x, axis=-1), np.zeros((10, 20, 29))) + assert_array_equal(diff(x, axis=0), np.zeros((9, 20, 30))) + assert_array_equal(diff(x, axis=1), exp) + assert_array_equal(diff(x, axis=-2), exp) + assert_raises(np.AxisError, diff, x, axis=3) + assert_raises(np.AxisError, diff, x, axis=-4) + + def test_nd(self): + x = 20 * rand(10, 20, 30) + out1 = x[:, :, 1:] - x[:, :, :-1] + out2 = out1[:, :, 1:] - out1[:, :, :-1] + out3 = x[1:, :, :] - x[:-1, :, :] + out4 = out3[1:, :, :] - out3[:-1, :, :] + assert_array_equal(diff(x), out1) + assert_array_equal(diff(x, n=2), out2) + assert_array_equal(diff(x, axis=0), out3) + assert_array_equal(diff(x, n=2, axis=0), out4) + + def test_n(self): + x = list(range(3)) + assert_raises(ValueError, diff, x, n=-1) + output = [diff(x, n=n) for n in range(1, 5)] + expected = [[1, 1], [0], [], []] + assert_(diff(x, n=0) is x) + for n, (expected, out) in enumerate(zip(expected, output), start=1): + assert_(type(out) is np.ndarray) + assert_array_equal(out, expected) + assert_equal(out.dtype, np.int_) + assert_equal(len(out), max(0, len(x) - n)) + + def test_times(self): + x = np.arange('1066-10-13', '1066-10-16', dtype=np.datetime64) + expected = [ + np.array([1, 1], dtype='timedelta64[D]'), + np.array([0], dtype='timedelta64[D]'), + ] + expected.extend([np.array([], dtype='timedelta64[D]')] * 3) + for n, exp in enumerate(expected, start=1): + out = diff(x, n=n) + assert_array_equal(out, exp) + assert_equal(out.dtype, exp.dtype) + + def test_subclass(self): + x = ma.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]], + mask=[[False, False], [True, False], + [False, True], [True, True], [False, False]]) + out = diff(x) + assert_array_equal(out.data, [[1], [1], [1], [1], [1]]) + assert_array_equal(out.mask, [[False], [True], + [True], [True], [False]]) + assert_(type(out) is type(x)) + + out3 = diff(x, n=3) + assert_array_equal(out3.data, [[], [], [], [], []]) + assert_array_equal(out3.mask, [[], [], [], [], []]) + assert_(type(out3) is type(x)) + + def test_prepend(self): + x = np.arange(5) + 1 + assert_array_equal(diff(x, prepend=0), np.ones(5)) + assert_array_equal(diff(x, prepend=[0]), np.ones(5)) + assert_array_equal(np.cumsum(np.diff(x, prepend=0)), x) + assert_array_equal(diff(x, prepend=[-1, 0]), np.ones(6)) + + x = np.arange(4).reshape(2, 2) + result = np.diff(x, axis=1, prepend=0) + expected = [[0, 1], [2, 1]] + assert_array_equal(result, expected) + result = np.diff(x, axis=1, prepend=[[0], [0]]) + assert_array_equal(result, expected) + + result = np.diff(x, axis=0, prepend=0) + expected = [[0, 1], [2, 2]] + assert_array_equal(result, expected) + result = np.diff(x, axis=0, prepend=[[0, 0]]) + assert_array_equal(result, expected) + + assert_raises(ValueError, np.diff, x, prepend=np.zeros((3,3))) + + assert_raises(np.AxisError, diff, x, prepend=0, axis=3) + + def test_append(self): + x = np.arange(5) + result = diff(x, append=0) + expected = [1, 1, 1, 1, -4] + assert_array_equal(result, expected) + result = diff(x, append=[0]) + assert_array_equal(result, expected) + result = diff(x, append=[0, 2]) + expected = expected + [2] + assert_array_equal(result, expected) + + x = np.arange(4).reshape(2, 2) + result = np.diff(x, axis=1, append=0) + expected = [[1, -1], [1, -3]] + assert_array_equal(result, expected) + result = np.diff(x, axis=1, append=[[0], [0]]) + assert_array_equal(result, expected) + + result = np.diff(x, axis=0, append=0) + expected = [[2, 2], [-2, -3]] + assert_array_equal(result, expected) + result = np.diff(x, axis=0, append=[[0, 0]]) + assert_array_equal(result, expected) + + assert_raises(ValueError, np.diff, x, append=np.zeros((3,3))) + + assert_raises(np.AxisError, diff, x, append=0, axis=3) + + +class TestDelete(object): + + def setup(self): + self.a = np.arange(5) + self.nd_a = np.arange(5).repeat(2).reshape(1, 5, 2) + + def _check_inverse_of_slicing(self, indices): + a_del = delete(self.a, indices) + nd_a_del = delete(self.nd_a, indices, axis=1) + msg = 'Delete failed for obj: %r' % indices + # NOTE: The cast should be removed after warning phase for bools + if not isinstance(indices, (slice, int, long, np.integer)): + indices = np.asarray(indices, dtype=np.intp) + indices = indices[(indices >= 0) & (indices < 5)] + assert_array_equal(setxor1d(a_del, self.a[indices, ]), self.a, + err_msg=msg) + xor = setxor1d(nd_a_del[0,:, 0], self.nd_a[0, indices, 0]) + assert_array_equal(xor, self.nd_a[0,:, 0], err_msg=msg) + + def test_slices(self): + lims = [-6, -2, 0, 1, 2, 4, 5] + steps = [-3, -1, 1, 3] + for start in lims: + for stop in lims: + for step in steps: + s = slice(start, stop, step) + self._check_inverse_of_slicing(s) + + def test_fancy(self): + # Deprecation/FutureWarning tests should be kept after change. + self._check_inverse_of_slicing(np.array([[0, 1], [2, 1]])) + with warnings.catch_warnings(): + warnings.filterwarnings('error', category=DeprecationWarning) + assert_raises(DeprecationWarning, delete, self.a, [100]) + assert_raises(DeprecationWarning, delete, self.a, [-100]) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', category=FutureWarning) + self._check_inverse_of_slicing([0, -1, 2, 2]) + obj = np.array([True, False, False], dtype=bool) + self._check_inverse_of_slicing(obj) + assert_(w[0].category is FutureWarning) + assert_(w[1].category is FutureWarning) + + def test_single(self): + self._check_inverse_of_slicing(0) + self._check_inverse_of_slicing(-4) + + def test_0d(self): + a = np.array(1) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', DeprecationWarning) + assert_equal(delete(a, [], axis=0), a) + assert_(w[0].category is DeprecationWarning) + + def test_subclass(self): + class SubClass(np.ndarray): + pass + a = self.a.view(SubClass) + assert_(isinstance(delete(a, 0), SubClass)) + assert_(isinstance(delete(a, []), SubClass)) + assert_(isinstance(delete(a, [0, 1]), SubClass)) + assert_(isinstance(delete(a, slice(1, 2)), SubClass)) + assert_(isinstance(delete(a, slice(1, -2)), SubClass)) + + def test_array_order_preserve(self): + # See gh-7113 + k = np.arange(10).reshape(2, 5, order='F') + m = delete(k, slice(60, None), axis=1) + + # 'k' is Fortran ordered, and 'm' should have the + # same ordering as 'k' and NOT become C ordered + assert_equal(m.flags.c_contiguous, k.flags.c_contiguous) + assert_equal(m.flags.f_contiguous, k.flags.f_contiguous) + + +class TestGradient(object): + + def test_basic(self): + v = [[1, 1], [3, 4]] + x = np.array(v) + dx = [np.array([[2., 3.], [2., 3.]]), + np.array([[0., 0.], [1., 1.]])] + assert_array_equal(gradient(x), dx) + assert_array_equal(gradient(v), dx) + + def test_args(self): + dx = np.cumsum(np.ones(5)) + dx_uneven = [1., 2., 5., 9., 11.] + f_2d = np.arange(25).reshape(5, 5) + + # distances must be scalars or have size equal to gradient[axis] + gradient(np.arange(5), 3.) + gradient(np.arange(5), np.array(3.)) + gradient(np.arange(5), dx) + # dy is set equal to dx because scalar + gradient(f_2d, 1.5) + gradient(f_2d, np.array(1.5)) + + gradient(f_2d, dx_uneven, dx_uneven) + # mix between even and uneven spaces and + # mix between scalar and vector + gradient(f_2d, dx, 2) + + # 2D but axis specified + gradient(f_2d, dx, axis=1) + + # 2d coordinate arguments are not yet allowed + assert_raises_regex(ValueError, '.*scalars or 1d', + gradient, f_2d, np.stack([dx]*2, axis=-1), 1) + + def test_badargs(self): + f_2d = np.arange(25).reshape(5, 5) + x = np.cumsum(np.ones(5)) + + # wrong sizes + assert_raises(ValueError, gradient, f_2d, x, np.ones(2)) + assert_raises(ValueError, gradient, f_2d, 1, np.ones(2)) + assert_raises(ValueError, gradient, f_2d, np.ones(2), np.ones(2)) + # wrong number of arguments + assert_raises(TypeError, gradient, f_2d, x) + assert_raises(TypeError, gradient, f_2d, x, axis=(0,1)) + assert_raises(TypeError, gradient, f_2d, x, x, x) + assert_raises(TypeError, gradient, f_2d, 1, 1, 1) + assert_raises(TypeError, gradient, f_2d, x, x, axis=1) + assert_raises(TypeError, gradient, f_2d, 1, 1, axis=1) + + def test_datetime64(self): + # Make sure gradient() can handle special types like datetime64 + x = np.array( + ['1910-08-16', '1910-08-11', '1910-08-10', '1910-08-12', + '1910-10-12', '1910-12-12', '1912-12-12'], + dtype='datetime64[D]') + dx = np.array( + [-5, -3, 0, 31, 61, 396, 731], + dtype='timedelta64[D]') + assert_array_equal(gradient(x), dx) + assert_(dx.dtype == np.dtype('timedelta64[D]')) + + def test_masked(self): + # Make sure that gradient supports subclasses like masked arrays + x = np.ma.array([[1, 1], [3, 4]], + mask=[[False, False], [False, False]]) + out = gradient(x)[0] + assert_equal(type(out), type(x)) + # And make sure that the output and input don't have aliased mask + # arrays + assert_(x.mask is not out.mask) + # Also check that edge_order=2 doesn't alter the original mask + x2 = np.ma.arange(5) + x2[2] = np.ma.masked + np.gradient(x2, edge_order=2) + assert_array_equal(x2.mask, [False, False, True, False, False]) + + def test_second_order_accurate(self): + # Testing that the relative numerical error is less that 3% for + # this example problem. This corresponds to second order + # accurate finite differences for all interior and boundary + # points. + x = np.linspace(0, 1, 10) + dx = x[1] - x[0] + y = 2 * x ** 3 + 4 * x ** 2 + 2 * x + analytical = 6 * x ** 2 + 8 * x + 2 + num_error = np.abs((np.gradient(y, dx, edge_order=2) / analytical) - 1) + assert_(np.all(num_error < 0.03) == True) + + # test with unevenly spaced + np.random.seed(0) + x = np.sort(np.random.random(10)) + y = 2 * x ** 3 + 4 * x ** 2 + 2 * x + analytical = 6 * x ** 2 + 8 * x + 2 + num_error = np.abs((np.gradient(y, x, edge_order=2) / analytical) - 1) + assert_(np.all(num_error < 0.03) == True) + + def test_spacing(self): + f = np.array([0, 2., 3., 4., 5., 5.]) + f = np.tile(f, (6,1)) + f.reshape(-1, 1) + x_uneven = np.array([0., 0.5, 1., 3., 5., 7.]) + x_even = np.arange(6.) + + fdx_even_ord1 = np.tile([2., 1.5, 1., 1., 0.5, 0.], (6,1)) + fdx_even_ord2 = np.tile([2.5, 1.5, 1., 1., 0.5, -0.5], (6,1)) + fdx_uneven_ord1 = np.tile([4., 3., 1.7, 0.5, 0.25, 0.], (6,1)) + fdx_uneven_ord2 = np.tile([5., 3., 1.7, 0.5, 0.25, -0.25], (6,1)) + + # evenly spaced + for edge_order, exp_res in [(1, fdx_even_ord1), (2, fdx_even_ord2)]: + res1 = gradient(f, 1., axis=(0,1), edge_order=edge_order) + res2 = gradient(f, x_even, x_even, + axis=(0,1), edge_order=edge_order) + res3 = gradient(f, x_even, x_even, + axis=None, edge_order=edge_order) + assert_array_equal(res1, res2) + assert_array_equal(res2, res3) + assert_almost_equal(res1[0], exp_res.T) + assert_almost_equal(res1[1], exp_res) + + res1 = gradient(f, 1., axis=0, edge_order=edge_order) + res2 = gradient(f, x_even, axis=0, edge_order=edge_order) + assert_(res1.shape == res2.shape) + assert_almost_equal(res2, exp_res.T) + + res1 = gradient(f, 1., axis=1, edge_order=edge_order) + res2 = gradient(f, x_even, axis=1, edge_order=edge_order) + assert_(res1.shape == res2.shape) + assert_array_equal(res2, exp_res) + + # unevenly spaced + for edge_order, exp_res in [(1, fdx_uneven_ord1), (2, fdx_uneven_ord2)]: + res1 = gradient(f, x_uneven, x_uneven, + axis=(0,1), edge_order=edge_order) + res2 = gradient(f, x_uneven, x_uneven, + axis=None, edge_order=edge_order) + assert_array_equal(res1, res2) + assert_almost_equal(res1[0], exp_res.T) + assert_almost_equal(res1[1], exp_res) + + res1 = gradient(f, x_uneven, axis=0, edge_order=edge_order) + assert_almost_equal(res1, exp_res.T) + + res1 = gradient(f, x_uneven, axis=1, edge_order=edge_order) + assert_almost_equal(res1, exp_res) + + # mixed + res1 = gradient(f, x_even, x_uneven, axis=(0,1), edge_order=1) + res2 = gradient(f, x_uneven, x_even, axis=(1,0), edge_order=1) + assert_array_equal(res1[0], res2[1]) + assert_array_equal(res1[1], res2[0]) + assert_almost_equal(res1[0], fdx_even_ord1.T) + assert_almost_equal(res1[1], fdx_uneven_ord1) + + res1 = gradient(f, x_even, x_uneven, axis=(0,1), edge_order=2) + res2 = gradient(f, x_uneven, x_even, axis=(1,0), edge_order=2) + assert_array_equal(res1[0], res2[1]) + assert_array_equal(res1[1], res2[0]) + assert_almost_equal(res1[0], fdx_even_ord2.T) + assert_almost_equal(res1[1], fdx_uneven_ord2) + + def test_specific_axes(self): + # Testing that gradient can work on a given axis only + v = [[1, 1], [3, 4]] + x = np.array(v) + dx = [np.array([[2., 3.], [2., 3.]]), + np.array([[0., 0.], [1., 1.]])] + assert_array_equal(gradient(x, axis=0), dx[0]) + assert_array_equal(gradient(x, axis=1), dx[1]) + assert_array_equal(gradient(x, axis=-1), dx[1]) + assert_array_equal(gradient(x, axis=(1, 0)), [dx[1], dx[0]]) + + # test axis=None which means all axes + assert_almost_equal(gradient(x, axis=None), [dx[0], dx[1]]) + # and is the same as no axis keyword given + assert_almost_equal(gradient(x, axis=None), gradient(x)) + + # test vararg order + assert_array_equal(gradient(x, 2, 3, axis=(1, 0)), + [dx[1]/2.0, dx[0]/3.0]) + # test maximal number of varargs + assert_raises(TypeError, gradient, x, 1, 2, axis=1) + + assert_raises(np.AxisError, gradient, x, axis=3) + assert_raises(np.AxisError, gradient, x, axis=-3) + # assert_raises(TypeError, gradient, x, axis=[1,]) + + def test_timedelta64(self): + # Make sure gradient() can handle special types like timedelta64 + x = np.array( + [-5, -3, 10, 12, 61, 321, 300], + dtype='timedelta64[D]') + dx = np.array( + [2, 7, 7, 25, 154, 119, -21], + dtype='timedelta64[D]') + assert_array_equal(gradient(x), dx) + assert_(dx.dtype == np.dtype('timedelta64[D]')) + + def test_inexact_dtypes(self): + for dt in [np.float16, np.float32, np.float64]: + # dtypes should not be promoted in a different way to what diff does + x = np.array([1, 2, 3], dtype=dt) + assert_equal(gradient(x).dtype, np.diff(x).dtype) + + def test_values(self): + # needs at least 2 points for edge_order ==1 + gradient(np.arange(2), edge_order=1) + # needs at least 3 points for edge_order ==1 + gradient(np.arange(3), edge_order=2) + + assert_raises(ValueError, gradient, np.arange(0), edge_order=1) + assert_raises(ValueError, gradient, np.arange(0), edge_order=2) + assert_raises(ValueError, gradient, np.arange(1), edge_order=1) + assert_raises(ValueError, gradient, np.arange(1), edge_order=2) + assert_raises(ValueError, gradient, np.arange(2), edge_order=2) + + +class TestAngle(object): + + def test_basic(self): + x = [1 + 3j, np.sqrt(2) / 2.0 + 1j * np.sqrt(2) / 2, + 1, 1j, -1, -1j, 1 - 3j, -1 + 3j] + y = angle(x) + yo = [ + np.arctan(3.0 / 1.0), + np.arctan(1.0), 0, np.pi / 2, np.pi, -np.pi / 2.0, + -np.arctan(3.0 / 1.0), np.pi - np.arctan(3.0 / 1.0)] + z = angle(x, deg=1) + zo = np.array(yo) * 180 / np.pi + assert_array_almost_equal(y, yo, 11) + assert_array_almost_equal(z, zo, 11) + + def test_subclass(self): + x = np.ma.array([1 + 3j, 1, np.sqrt(2)/2 * (1 + 1j)]) + x[1] = np.ma.masked + expected = np.ma.array([np.arctan(3.0 / 1.0), 0, np.arctan(1.0)]) + expected[1] = np.ma.masked + actual = angle(x) + assert_equal(type(actual), type(expected)) + assert_equal(actual.mask, expected.mask) + assert_equal(actual, expected) + + +class TestTrimZeros(object): + + """ + Only testing for integer splits. + + """ + + def test_basic(self): + a = np.array([0, 0, 1, 2, 3, 4, 0]) + res = trim_zeros(a) + assert_array_equal(res, np.array([1, 2, 3, 4])) + + def test_leading_skip(self): + a = np.array([0, 0, 1, 0, 2, 3, 4, 0]) + res = trim_zeros(a) + assert_array_equal(res, np.array([1, 0, 2, 3, 4])) + + def test_trailing_skip(self): + a = np.array([0, 0, 1, 0, 2, 3, 0, 4, 0]) + res = trim_zeros(a) + assert_array_equal(res, np.array([1, 0, 2, 3, 0, 4])) + + +class TestExtins(object): + + def test_basic(self): + a = np.array([1, 3, 2, 1, 2, 3, 3]) + b = extract(a > 1, a) + assert_array_equal(b, [3, 2, 2, 3, 3]) + + def test_place(self): + # Make sure that non-np.ndarray objects + # raise an error instead of doing nothing + assert_raises(TypeError, place, [1, 2, 3], [True, False], [0, 1]) + + a = np.array([1, 4, 3, 2, 5, 8, 7]) + place(a, [0, 1, 0, 1, 0, 1, 0], [2, 4, 6]) + assert_array_equal(a, [1, 2, 3, 4, 5, 6, 7]) + + place(a, np.zeros(7), []) + assert_array_equal(a, np.arange(1, 8)) + + place(a, [1, 0, 1, 0, 1, 0, 1], [8, 9]) + assert_array_equal(a, [8, 2, 9, 4, 8, 6, 9]) + assert_raises_regex(ValueError, "Cannot insert from an empty array", + lambda: place(a, [0, 0, 0, 0, 0, 1, 0], [])) + + # See Issue #6974 + a = np.array(['12', '34']) + place(a, [0, 1], '9') + assert_array_equal(a, ['12', '9']) + + def test_both(self): + a = rand(10) + mask = a > 0.5 + ac = a.copy() + c = extract(mask, a) + place(a, mask, 0) + place(a, mask, c) + assert_array_equal(a, ac) + + +class TestVectorize(object): + + def test_simple(self): + def addsubtract(a, b): + if a > b: + return a - b + else: + return a + b + + f = vectorize(addsubtract) + r = f([0, 3, 6, 9], [1, 3, 5, 7]) + assert_array_equal(r, [1, 6, 1, 2]) + + def test_scalar(self): + def addsubtract(a, b): + if a > b: + return a - b + else: + return a + b + + f = vectorize(addsubtract) + r = f([0, 3, 6, 9], 5) + assert_array_equal(r, [5, 8, 1, 4]) + + def test_large(self): + x = np.linspace(-3, 2, 10000) + f = vectorize(lambda x: x) + y = f(x) + assert_array_equal(y, x) + + def test_ufunc(self): + import math + f = vectorize(math.cos) + args = np.array([0, 0.5 * np.pi, np.pi, 1.5 * np.pi, 2 * np.pi]) + r1 = f(args) + r2 = np.cos(args) + assert_array_almost_equal(r1, r2) + + def test_keywords(self): + + def foo(a, b=1): + return a + b + + f = vectorize(foo) + args = np.array([1, 2, 3]) + r1 = f(args) + r2 = np.array([2, 3, 4]) + assert_array_equal(r1, r2) + r1 = f(args, 2) + r2 = np.array([3, 4, 5]) + assert_array_equal(r1, r2) + + def test_keywords_no_func_code(self): + # This needs to test a function that has keywords but + # no func_code attribute, since otherwise vectorize will + # inspect the func_code. + import random + try: + vectorize(random.randrange) # Should succeed + except Exception: + raise AssertionError() + + def test_keywords2_ticket_2100(self): + # Test kwarg support: enhancement ticket 2100 + + def foo(a, b=1): + return a + b + + f = vectorize(foo) + args = np.array([1, 2, 3]) + r1 = f(a=args) + r2 = np.array([2, 3, 4]) + assert_array_equal(r1, r2) + r1 = f(b=1, a=args) + assert_array_equal(r1, r2) + r1 = f(args, b=2) + r2 = np.array([3, 4, 5]) + assert_array_equal(r1, r2) + + def test_keywords3_ticket_2100(self): + # Test excluded with mixed positional and kwargs: ticket 2100 + def mypolyval(x, p): + _p = list(p) + res = _p.pop(0) + while _p: + res = res * x + _p.pop(0) + return res + + vpolyval = np.vectorize(mypolyval, excluded=['p', 1]) + ans = [3, 6] + assert_array_equal(ans, vpolyval(x=[0, 1], p=[1, 2, 3])) + assert_array_equal(ans, vpolyval([0, 1], p=[1, 2, 3])) + assert_array_equal(ans, vpolyval([0, 1], [1, 2, 3])) + + def test_keywords4_ticket_2100(self): + # Test vectorizing function with no positional args. + @vectorize + def f(**kw): + res = 1.0 + for _k in kw: + res *= kw[_k] + return res + + assert_array_equal(f(a=[1, 2], b=[3, 4]), [3, 8]) + + def test_keywords5_ticket_2100(self): + # Test vectorizing function with no kwargs args. + @vectorize + def f(*v): + return np.prod(v) + + assert_array_equal(f([1, 2], [3, 4]), [3, 8]) + + def test_coverage1_ticket_2100(self): + def foo(): + return 1 + + f = vectorize(foo) + assert_array_equal(f(), 1) + + def test_assigning_docstring(self): + def foo(x): + """Original documentation""" + return x + + f = vectorize(foo) + assert_equal(f.__doc__, foo.__doc__) + + doc = "Provided documentation" + f = vectorize(foo, doc=doc) + assert_equal(f.__doc__, doc) + + def test_UnboundMethod_ticket_1156(self): + # Regression test for issue 1156 + class Foo: + b = 2 + + def bar(self, a): + return a ** self.b + + assert_array_equal(vectorize(Foo().bar)(np.arange(9)), + np.arange(9) ** 2) + assert_array_equal(vectorize(Foo.bar)(Foo(), np.arange(9)), + np.arange(9) ** 2) + + def test_execution_order_ticket_1487(self): + # Regression test for dependence on execution order: issue 1487 + f1 = vectorize(lambda x: x) + res1a = f1(np.arange(3)) + res1b = f1(np.arange(0.1, 3)) + f2 = vectorize(lambda x: x) + res2b = f2(np.arange(0.1, 3)) + res2a = f2(np.arange(3)) + assert_equal(res1a, res2a) + assert_equal(res1b, res2b) + + def test_string_ticket_1892(self): + # Test vectorization over strings: issue 1892. + f = np.vectorize(lambda x: x) + s = '0123456789' * 10 + assert_equal(s, f(s)) + + def test_cache(self): + # Ensure that vectorized func called exactly once per argument. + _calls = [0] + + @vectorize + def f(x): + _calls[0] += 1 + return x ** 2 + + f.cache = True + x = np.arange(5) + assert_array_equal(f(x), x * x) + assert_equal(_calls[0], len(x)) + + def test_otypes(self): + f = np.vectorize(lambda x: x) + f.otypes = 'i' + x = np.arange(5) + assert_array_equal(f(x), x) + + def test_parse_gufunc_signature(self): + assert_equal(nfb._parse_gufunc_signature('(x)->()'), ([('x',)], [()])) + assert_equal(nfb._parse_gufunc_signature('(x,y)->()'), + ([('x', 'y')], [()])) + assert_equal(nfb._parse_gufunc_signature('(x),(y)->()'), + ([('x',), ('y',)], [()])) + assert_equal(nfb._parse_gufunc_signature('(x)->(y)'), + ([('x',)], [('y',)])) + assert_equal(nfb._parse_gufunc_signature('(x)->(y),()'), + ([('x',)], [('y',), ()])) + assert_equal(nfb._parse_gufunc_signature('(),(a,b,c),(d)->(d,e)'), + ([(), ('a', 'b', 'c'), ('d',)], [('d', 'e')])) + with assert_raises(ValueError): + nfb._parse_gufunc_signature('(x)(y)->()') + with assert_raises(ValueError): + nfb._parse_gufunc_signature('(x),(y)->') + with assert_raises(ValueError): + nfb._parse_gufunc_signature('((x))->(x)') + + def test_signature_simple(self): + def addsubtract(a, b): + if a > b: + return a - b + else: + return a + b + + f = vectorize(addsubtract, signature='(),()->()') + r = f([0, 3, 6, 9], [1, 3, 5, 7]) + assert_array_equal(r, [1, 6, 1, 2]) + + def test_signature_mean_last(self): + def mean(a): + return a.mean() + + f = vectorize(mean, signature='(n)->()') + r = f([[1, 3], [2, 4]]) + assert_array_equal(r, [2, 3]) + + def test_signature_center(self): + def center(a): + return a - a.mean() + + f = vectorize(center, signature='(n)->(n)') + r = f([[1, 3], [2, 4]]) + assert_array_equal(r, [[-1, 1], [-1, 1]]) + + def test_signature_two_outputs(self): + f = vectorize(lambda x: (x, x), signature='()->(),()') + r = f([1, 2, 3]) + assert_(isinstance(r, tuple) and len(r) == 2) + assert_array_equal(r[0], [1, 2, 3]) + assert_array_equal(r[1], [1, 2, 3]) + + def test_signature_outer(self): + f = vectorize(np.outer, signature='(a),(b)->(a,b)') + r = f([1, 2], [1, 2, 3]) + assert_array_equal(r, [[1, 2, 3], [2, 4, 6]]) + + r = f([[[1, 2]]], [1, 2, 3]) + assert_array_equal(r, [[[[1, 2, 3], [2, 4, 6]]]]) + + r = f([[1, 0], [2, 0]], [1, 2, 3]) + assert_array_equal(r, [[[1, 2, 3], [0, 0, 0]], + [[2, 4, 6], [0, 0, 0]]]) + + r = f([1, 2], [[1, 2, 3], [0, 0, 0]]) + assert_array_equal(r, [[[1, 2, 3], [2, 4, 6]], + [[0, 0, 0], [0, 0, 0]]]) + + def test_signature_computed_size(self): + f = vectorize(lambda x: x[:-1], signature='(n)->(m)') + r = f([1, 2, 3]) + assert_array_equal(r, [1, 2]) + + r = f([[1, 2, 3], [2, 3, 4]]) + assert_array_equal(r, [[1, 2], [2, 3]]) + + def test_signature_excluded(self): + + def foo(a, b=1): + return a + b + + f = vectorize(foo, signature='()->()', excluded={'b'}) + assert_array_equal(f([1, 2, 3]), [2, 3, 4]) + assert_array_equal(f([1, 2, 3], b=0), [1, 2, 3]) + + def test_signature_otypes(self): + f = vectorize(lambda x: x, signature='(n)->(n)', otypes=['float64']) + r = f([1, 2, 3]) + assert_equal(r.dtype, np.dtype('float64')) + assert_array_equal(r, [1, 2, 3]) + + def test_signature_invalid_inputs(self): + f = vectorize(operator.add, signature='(n),(n)->(n)') + with assert_raises_regex(TypeError, 'wrong number of positional'): + f([1, 2]) + with assert_raises_regex( + ValueError, 'does not have enough dimensions'): + f(1, 2) + with assert_raises_regex( + ValueError, 'inconsistent size for core dimension'): + f([1, 2], [1, 2, 3]) + + f = vectorize(operator.add, signature='()->()') + with assert_raises_regex(TypeError, 'wrong number of positional'): + f(1, 2) + + def test_signature_invalid_outputs(self): + + f = vectorize(lambda x: x[:-1], signature='(n)->(n)') + with assert_raises_regex( + ValueError, 'inconsistent size for core dimension'): + f([1, 2, 3]) + + f = vectorize(lambda x: x, signature='()->(),()') + with assert_raises_regex(ValueError, 'wrong number of outputs'): + f(1) + + f = vectorize(lambda x: (x, x), signature='()->()') + with assert_raises_regex(ValueError, 'wrong number of outputs'): + f([1, 2]) + + def test_size_zero_output(self): + # see issue 5868 + f = np.vectorize(lambda x: x) + x = np.zeros([0, 5], dtype=int) + with assert_raises_regex(ValueError, 'otypes'): + f(x) + + f.otypes = 'i' + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='()->()') + with assert_raises_regex(ValueError, 'otypes'): + f(x) + + f = np.vectorize(lambda x: x, signature='()->()', otypes='i') + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='(n)->(n)', otypes='i') + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='(n)->(n)') + assert_array_equal(f(x.T), x.T) + + f = np.vectorize(lambda x: [x], signature='()->(n)', otypes='i') + with assert_raises_regex(ValueError, 'new output dimensions'): + f(x) + + +class TestDigitize(object): + + def test_forward(self): + x = np.arange(-6, 5) + bins = np.arange(-5, 5) + assert_array_equal(digitize(x, bins), np.arange(11)) + + def test_reverse(self): + x = np.arange(5, -6, -1) + bins = np.arange(5, -5, -1) + assert_array_equal(digitize(x, bins), np.arange(11)) + + def test_random(self): + x = rand(10) + bin = np.linspace(x.min(), x.max(), 10) + assert_(np.all(digitize(x, bin) != 0)) + + def test_right_basic(self): + x = [1, 5, 4, 10, 8, 11, 0] + bins = [1, 5, 10] + default_answer = [1, 2, 1, 3, 2, 3, 0] + assert_array_equal(digitize(x, bins), default_answer) + right_answer = [0, 1, 1, 2, 2, 3, 0] + assert_array_equal(digitize(x, bins, True), right_answer) + + def test_right_open(self): + x = np.arange(-6, 5) + bins = np.arange(-6, 4) + assert_array_equal(digitize(x, bins, True), np.arange(11)) + + def test_right_open_reverse(self): + x = np.arange(5, -6, -1) + bins = np.arange(4, -6, -1) + assert_array_equal(digitize(x, bins, True), np.arange(11)) + + def test_right_open_random(self): + x = rand(10) + bins = np.linspace(x.min(), x.max(), 10) + assert_(np.all(digitize(x, bins, True) != 10)) + + def test_monotonic(self): + x = [-1, 0, 1, 2] + bins = [0, 0, 1] + assert_array_equal(digitize(x, bins, False), [0, 2, 3, 3]) + assert_array_equal(digitize(x, bins, True), [0, 0, 2, 3]) + bins = [1, 1, 0] + assert_array_equal(digitize(x, bins, False), [3, 2, 0, 0]) + assert_array_equal(digitize(x, bins, True), [3, 3, 2, 0]) + bins = [1, 1, 1, 1] + assert_array_equal(digitize(x, bins, False), [0, 0, 4, 4]) + assert_array_equal(digitize(x, bins, True), [0, 0, 0, 4]) + bins = [0, 0, 1, 0] + assert_raises(ValueError, digitize, x, bins) + bins = [1, 1, 0, 1] + assert_raises(ValueError, digitize, x, bins) + + def test_casting_error(self): + x = [1, 2, 3 + 1.j] + bins = [1, 2, 3] + assert_raises(TypeError, digitize, x, bins) + x, bins = bins, x + assert_raises(TypeError, digitize, x, bins) + + def test_return_type(self): + # Functions returning indices should always return base ndarrays + class A(np.ndarray): + pass + a = np.arange(5).view(A) + b = np.arange(1, 3).view(A) + assert_(not isinstance(digitize(b, a, False), A)) + assert_(not isinstance(digitize(b, a, True), A)) + + def test_large_integers_increasing(self): + # gh-11022 + x = 2**54 # loses precision in a float + assert_equal(np.digitize(x, [x - 1, x + 1]), 1) + + @pytest.mark.xfail( + reason="gh-11022: np.core.multiarray._monoticity loses precision") + def test_large_integers_decreasing(self): + # gh-11022 + x = 2**54 # loses precision in a float + assert_equal(np.digitize(x, [x + 1, x - 1]), 1) + + +class TestUnwrap(object): + + def test_simple(self): + # check that unwrap removes jumps greater that 2*pi + assert_array_equal(unwrap([1, 1 + 2 * np.pi]), [1, 1]) + # check that unwrap maintains continuity + assert_(np.all(diff(unwrap(rand(10) * 100)) < np.pi)) + + +class TestFilterwindows(object): + + def test_hanning(self): + # check symmetry + w = hanning(10) + assert_array_almost_equal(w, flipud(w), 7) + # check known value + assert_almost_equal(np.sum(w, axis=0), 4.500, 4) + + def test_hamming(self): + # check symmetry + w = hamming(10) + assert_array_almost_equal(w, flipud(w), 7) + # check known value + assert_almost_equal(np.sum(w, axis=0), 4.9400, 4) + + def test_bartlett(self): + # check symmetry + w = bartlett(10) + assert_array_almost_equal(w, flipud(w), 7) + # check known value + assert_almost_equal(np.sum(w, axis=0), 4.4444, 4) + + def test_blackman(self): + # check symmetry + w = blackman(10) + assert_array_almost_equal(w, flipud(w), 7) + # check known value + assert_almost_equal(np.sum(w, axis=0), 3.7800, 4) + + +class TestTrapz(object): + + def test_simple(self): + x = np.arange(-10, 10, .1) + r = trapz(np.exp(-.5 * x ** 2) / np.sqrt(2 * np.pi), dx=0.1) + # check integral of normal equals 1 + assert_almost_equal(r, 1, 7) + + def test_ndim(self): + x = np.linspace(0, 1, 3) + y = np.linspace(0, 2, 8) + z = np.linspace(0, 3, 13) + + wx = np.ones_like(x) * (x[1] - x[0]) + wx[0] /= 2 + wx[-1] /= 2 + wy = np.ones_like(y) * (y[1] - y[0]) + wy[0] /= 2 + wy[-1] /= 2 + wz = np.ones_like(z) * (z[1] - z[0]) + wz[0] /= 2 + wz[-1] /= 2 + + q = x[:, None, None] + y[None,:, None] + z[None, None,:] + + qx = (q * wx[:, None, None]).sum(axis=0) + qy = (q * wy[None, :, None]).sum(axis=1) + qz = (q * wz[None, None, :]).sum(axis=2) + + # n-d `x` + r = trapz(q, x=x[:, None, None], axis=0) + assert_almost_equal(r, qx) + r = trapz(q, x=y[None,:, None], axis=1) + assert_almost_equal(r, qy) + r = trapz(q, x=z[None, None,:], axis=2) + assert_almost_equal(r, qz) + + # 1-d `x` + r = trapz(q, x=x, axis=0) + assert_almost_equal(r, qx) + r = trapz(q, x=y, axis=1) + assert_almost_equal(r, qy) + r = trapz(q, x=z, axis=2) + assert_almost_equal(r, qz) + + def test_masked(self): + # Testing that masked arrays behave as if the function is 0 where + # masked + x = np.arange(5) + y = x * x + mask = x == 2 + ym = np.ma.array(y, mask=mask) + r = 13.0 # sum(0.5 * (0 + 1) * 1.0 + 0.5 * (9 + 16)) + assert_almost_equal(trapz(ym, x), r) + + xm = np.ma.array(x, mask=mask) + assert_almost_equal(trapz(ym, xm), r) + + xm = np.ma.array(x, mask=mask) + assert_almost_equal(trapz(y, xm), r) + + +class TestSinc(object): + + def test_simple(self): + assert_(sinc(0) == 1) + w = sinc(np.linspace(-1, 1, 100)) + # check symmetry + assert_array_almost_equal(w, flipud(w), 7) + + def test_array_like(self): + x = [0, 0.5] + y1 = sinc(np.array(x)) + y2 = sinc(list(x)) + y3 = sinc(tuple(x)) + assert_array_equal(y1, y2) + assert_array_equal(y1, y3) + + +class TestUnique(object): + + def test_simple(self): + x = np.array([4, 3, 2, 1, 1, 2, 3, 4, 0]) + assert_(np.all(unique(x) == [0, 1, 2, 3, 4])) + assert_(unique(np.array([1, 1, 1, 1, 1])) == np.array([1])) + x = ['widget', 'ham', 'foo', 'bar', 'foo', 'ham'] + assert_(np.all(unique(x) == ['bar', 'foo', 'ham', 'widget'])) + x = np.array([5 + 6j, 1 + 1j, 1 + 10j, 10, 5 + 6j]) + assert_(np.all(unique(x) == [1 + 1j, 1 + 10j, 5 + 6j, 10])) + + +class TestCheckFinite(object): + + def test_simple(self): + a = [1, 2, 3] + b = [1, 2, np.inf] + c = [1, 2, np.nan] + np.lib.asarray_chkfinite(a) + assert_raises(ValueError, np.lib.asarray_chkfinite, b) + assert_raises(ValueError, np.lib.asarray_chkfinite, c) + + def test_dtype_order(self): + # Regression test for missing dtype and order arguments + a = [1, 2, 3] + a = np.lib.asarray_chkfinite(a, order='F', dtype=np.float64) + assert_(a.dtype == np.float64) + + +class TestCorrCoef(object): + A = np.array( + [[0.15391142, 0.18045767, 0.14197213], + [0.70461506, 0.96474128, 0.27906989], + [0.9297531, 0.32296769, 0.19267156]]) + B = np.array( + [[0.10377691, 0.5417086, 0.49807457], + [0.82872117, 0.77801674, 0.39226705], + [0.9314666, 0.66800209, 0.03538394]]) + res1 = np.array( + [[1., 0.9379533, -0.04931983], + [0.9379533, 1., 0.30007991], + [-0.04931983, 0.30007991, 1.]]) + res2 = np.array( + [[1., 0.9379533, -0.04931983, 0.30151751, 0.66318558, 0.51532523], + [0.9379533, 1., 0.30007991, -0.04781421, 0.88157256, 0.78052386], + [-0.04931983, 0.30007991, 1., -0.96717111, 0.71483595, 0.83053601], + [0.30151751, -0.04781421, -0.96717111, 1., -0.51366032, -0.66173113], + [0.66318558, 0.88157256, 0.71483595, -0.51366032, 1., 0.98317823], + [0.51532523, 0.78052386, 0.83053601, -0.66173113, 0.98317823, 1.]]) + + def test_non_array(self): + assert_almost_equal(np.corrcoef([0, 1, 0], [1, 0, 1]), + [[1., -1.], [-1., 1.]]) + + def test_simple(self): + tgt1 = corrcoef(self.A) + assert_almost_equal(tgt1, self.res1) + assert_(np.all(np.abs(tgt1) <= 1.0)) + + tgt2 = corrcoef(self.A, self.B) + assert_almost_equal(tgt2, self.res2) + assert_(np.all(np.abs(tgt2) <= 1.0)) + + def test_ddof(self): + # ddof raises DeprecationWarning + with suppress_warnings() as sup: + warnings.simplefilter("always") + assert_warns(DeprecationWarning, corrcoef, self.A, ddof=-1) + sup.filter(DeprecationWarning) + # ddof has no or negligible effect on the function + assert_almost_equal(corrcoef(self.A, ddof=-1), self.res1) + assert_almost_equal(corrcoef(self.A, self.B, ddof=-1), self.res2) + assert_almost_equal(corrcoef(self.A, ddof=3), self.res1) + assert_almost_equal(corrcoef(self.A, self.B, ddof=3), self.res2) + + def test_bias(self): + # bias raises DeprecationWarning + with suppress_warnings() as sup: + warnings.simplefilter("always") + assert_warns(DeprecationWarning, corrcoef, self.A, self.B, 1, 0) + assert_warns(DeprecationWarning, corrcoef, self.A, bias=0) + sup.filter(DeprecationWarning) + # bias has no or negligible effect on the function + assert_almost_equal(corrcoef(self.A, bias=1), self.res1) + + def test_complex(self): + x = np.array([[1, 2, 3], [1j, 2j, 3j]]) + res = corrcoef(x) + tgt = np.array([[1., -1.j], [1.j, 1.]]) + assert_allclose(res, tgt) + assert_(np.all(np.abs(res) <= 1.0)) + + def test_xy(self): + x = np.array([[1, 2, 3]]) + y = np.array([[1j, 2j, 3j]]) + assert_allclose(np.corrcoef(x, y), np.array([[1., -1.j], [1.j, 1.]])) + + def test_empty(self): + with warnings.catch_warnings(record=True): + warnings.simplefilter('always', RuntimeWarning) + assert_array_equal(corrcoef(np.array([])), np.nan) + assert_array_equal(corrcoef(np.array([]).reshape(0, 2)), + np.array([]).reshape(0, 0)) + assert_array_equal(corrcoef(np.array([]).reshape(2, 0)), + np.array([[np.nan, np.nan], [np.nan, np.nan]])) + + def test_extreme(self): + x = [[1e-100, 1e100], [1e100, 1e-100]] + with np.errstate(all='raise'): + c = corrcoef(x) + assert_array_almost_equal(c, np.array([[1., -1.], [-1., 1.]])) + assert_(np.all(np.abs(c) <= 1.0)) + + +class TestCov(object): + x1 = np.array([[0, 2], [1, 1], [2, 0]]).T + res1 = np.array([[1., -1.], [-1., 1.]]) + x2 = np.array([0.0, 1.0, 2.0], ndmin=2) + frequencies = np.array([1, 4, 1]) + x2_repeats = np.array([[0.0], [1.0], [1.0], [1.0], [1.0], [2.0]]).T + res2 = np.array([[0.4, -0.4], [-0.4, 0.4]]) + unit_frequencies = np.ones(3, dtype=np.integer) + weights = np.array([1.0, 4.0, 1.0]) + res3 = np.array([[2. / 3., -2. / 3.], [-2. / 3., 2. / 3.]]) + unit_weights = np.ones(3) + x3 = np.array([0.3942, 0.5969, 0.7730, 0.9918, 0.7964]) + + def test_basic(self): + assert_allclose(cov(self.x1), self.res1) + + def test_complex(self): + x = np.array([[1, 2, 3], [1j, 2j, 3j]]) + res = np.array([[1., -1.j], [1.j, 1.]]) + assert_allclose(cov(x), res) + assert_allclose(cov(x, aweights=np.ones(3)), res) + + def test_xy(self): + x = np.array([[1, 2, 3]]) + y = np.array([[1j, 2j, 3j]]) + assert_allclose(cov(x, y), np.array([[1., -1.j], [1.j, 1.]])) + + def test_empty(self): + with warnings.catch_warnings(record=True): + warnings.simplefilter('always', RuntimeWarning) + assert_array_equal(cov(np.array([])), np.nan) + assert_array_equal(cov(np.array([]).reshape(0, 2)), + np.array([]).reshape(0, 0)) + assert_array_equal(cov(np.array([]).reshape(2, 0)), + np.array([[np.nan, np.nan], [np.nan, np.nan]])) + + def test_wrong_ddof(self): + with warnings.catch_warnings(record=True): + warnings.simplefilter('always', RuntimeWarning) + assert_array_equal(cov(self.x1, ddof=5), + np.array([[np.inf, -np.inf], + [-np.inf, np.inf]])) + + def test_1D_rowvar(self): + assert_allclose(cov(self.x3), cov(self.x3, rowvar=0)) + y = np.array([0.0780, 0.3107, 0.2111, 0.0334, 0.8501]) + assert_allclose(cov(self.x3, y), cov(self.x3, y, rowvar=0)) + + def test_1D_variance(self): + assert_allclose(cov(self.x3, ddof=1), np.var(self.x3, ddof=1)) + + def test_fweights(self): + assert_allclose(cov(self.x2, fweights=self.frequencies), + cov(self.x2_repeats)) + assert_allclose(cov(self.x1, fweights=self.frequencies), + self.res2) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies), + self.res1) + nonint = self.frequencies + 0.5 + assert_raises(TypeError, cov, self.x1, fweights=nonint) + f = np.ones((2, 3), dtype=np.integer) + assert_raises(RuntimeError, cov, self.x1, fweights=f) + f = np.ones(2, dtype=np.integer) + assert_raises(RuntimeError, cov, self.x1, fweights=f) + f = -1 * np.ones(3, dtype=np.integer) + assert_raises(ValueError, cov, self.x1, fweights=f) + + def test_aweights(self): + assert_allclose(cov(self.x1, aweights=self.weights), self.res3) + assert_allclose(cov(self.x1, aweights=3.0 * self.weights), + cov(self.x1, aweights=self.weights)) + assert_allclose(cov(self.x1, aweights=self.unit_weights), self.res1) + w = np.ones((2, 3)) + assert_raises(RuntimeError, cov, self.x1, aweights=w) + w = np.ones(2) + assert_raises(RuntimeError, cov, self.x1, aweights=w) + w = -1.0 * np.ones(3) + assert_raises(ValueError, cov, self.x1, aweights=w) + + def test_unit_fweights_and_aweights(self): + assert_allclose(cov(self.x2, fweights=self.frequencies, + aweights=self.unit_weights), + cov(self.x2_repeats)) + assert_allclose(cov(self.x1, fweights=self.frequencies, + aweights=self.unit_weights), + self.res2) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.unit_weights), + self.res1) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.weights), + self.res3) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=3.0 * self.weights), + cov(self.x1, aweights=self.weights)) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.unit_weights), + self.res1) + + +class Test_I0(object): + + def test_simple(self): + assert_almost_equal( + i0(0.5), + np.array(1.0634833707413234)) + + A = np.array([0.49842636, 0.6969809, 0.22011976, 0.0155549]) + assert_almost_equal( + i0(A), + np.array([1.06307822, 1.12518299, 1.01214991, 1.00006049])) + + B = np.array([[0.827002, 0.99959078], + [0.89694769, 0.39298162], + [0.37954418, 0.05206293], + [0.36465447, 0.72446427], + [0.48164949, 0.50324519]]) + assert_almost_equal( + i0(B), + np.array([[1.17843223, 1.26583466], + [1.21147086, 1.03898290], + [1.03633899, 1.00067775], + [1.03352052, 1.13557954], + [1.05884290, 1.06432317]])) + + +class TestKaiser(object): + + def test_simple(self): + assert_(np.isfinite(kaiser(1, 1.0))) + assert_almost_equal(kaiser(0, 1.0), + np.array([])) + assert_almost_equal(kaiser(2, 1.0), + np.array([0.78984831, 0.78984831])) + assert_almost_equal(kaiser(5, 1.0), + np.array([0.78984831, 0.94503323, 1., + 0.94503323, 0.78984831])) + assert_almost_equal(kaiser(5, 1.56789), + np.array([0.58285404, 0.88409679, 1., + 0.88409679, 0.58285404])) + + def test_int_beta(self): + kaiser(3, 4) + + +class TestMsort(object): + + def test_simple(self): + A = np.array([[0.44567325, 0.79115165, 0.54900530], + [0.36844147, 0.37325583, 0.96098397], + [0.64864341, 0.52929049, 0.39172155]]) + assert_almost_equal( + msort(A), + np.array([[0.36844147, 0.37325583, 0.39172155], + [0.44567325, 0.52929049, 0.54900530], + [0.64864341, 0.79115165, 0.96098397]])) + + +class TestMeshgrid(object): + + def test_simple(self): + [X, Y] = meshgrid([1, 2, 3], [4, 5, 6, 7]) + assert_array_equal(X, np.array([[1, 2, 3], + [1, 2, 3], + [1, 2, 3], + [1, 2, 3]])) + assert_array_equal(Y, np.array([[4, 4, 4], + [5, 5, 5], + [6, 6, 6], + [7, 7, 7]])) + + def test_single_input(self): + [X] = meshgrid([1, 2, 3, 4]) + assert_array_equal(X, np.array([1, 2, 3, 4])) + + def test_no_input(self): + args = [] + assert_array_equal([], meshgrid(*args)) + assert_array_equal([], meshgrid(*args, copy=False)) + + def test_indexing(self): + x = [1, 2, 3] + y = [4, 5, 6, 7] + [X, Y] = meshgrid(x, y, indexing='ij') + assert_array_equal(X, np.array([[1, 1, 1, 1], + [2, 2, 2, 2], + [3, 3, 3, 3]])) + assert_array_equal(Y, np.array([[4, 5, 6, 7], + [4, 5, 6, 7], + [4, 5, 6, 7]])) + + # Test expected shapes: + z = [8, 9] + assert_(meshgrid(x, y)[0].shape == (4, 3)) + assert_(meshgrid(x, y, indexing='ij')[0].shape == (3, 4)) + assert_(meshgrid(x, y, z)[0].shape == (4, 3, 2)) + assert_(meshgrid(x, y, z, indexing='ij')[0].shape == (3, 4, 2)) + + assert_raises(ValueError, meshgrid, x, y, indexing='notvalid') + + def test_sparse(self): + [X, Y] = meshgrid([1, 2, 3], [4, 5, 6, 7], sparse=True) + assert_array_equal(X, np.array([[1, 2, 3]])) + assert_array_equal(Y, np.array([[4], [5], [6], [7]])) + + def test_invalid_arguments(self): + # Test that meshgrid complains about invalid arguments + # Regression test for issue #4755: + # https://github.com/numpy/numpy/issues/4755 + assert_raises(TypeError, meshgrid, + [1, 2, 3], [4, 5, 6, 7], indices='ij') + + def test_return_type(self): + # Test for appropriate dtype in returned arrays. + # Regression test for issue #5297 + # https://github.com/numpy/numpy/issues/5297 + x = np.arange(0, 10, dtype=np.float32) + y = np.arange(10, 20, dtype=np.float64) + + X, Y = np.meshgrid(x,y) + + assert_(X.dtype == x.dtype) + assert_(Y.dtype == y.dtype) + + # copy + X, Y = np.meshgrid(x,y, copy=True) + + assert_(X.dtype == x.dtype) + assert_(Y.dtype == y.dtype) + + # sparse + X, Y = np.meshgrid(x,y, sparse=True) + + assert_(X.dtype == x.dtype) + assert_(Y.dtype == y.dtype) + + def test_writeback(self): + # Issue 8561 + X = np.array([1.1, 2.2]) + Y = np.array([3.3, 4.4]) + x, y = np.meshgrid(X, Y, sparse=False, copy=True) + + x[0, :] = 0 + assert_equal(x[0, :], 0) + assert_equal(x[1, :], X) + + +class TestPiecewise(object): + + def test_simple(self): + # Condition is single bool list + x = piecewise([0, 0], [True, False], [1]) + assert_array_equal(x, [1, 0]) + + # List of conditions: single bool list + x = piecewise([0, 0], [[True, False]], [1]) + assert_array_equal(x, [1, 0]) + + # Conditions is single bool array + x = piecewise([0, 0], np.array([True, False]), [1]) + assert_array_equal(x, [1, 0]) + + # Condition is single int array + x = piecewise([0, 0], np.array([1, 0]), [1]) + assert_array_equal(x, [1, 0]) + + # List of conditions: int array + x = piecewise([0, 0], [np.array([1, 0])], [1]) + assert_array_equal(x, [1, 0]) + + x = piecewise([0, 0], [[False, True]], [lambda x:-1]) + assert_array_equal(x, [0, -1]) + + assert_raises_regex(ValueError, '1 or 2 functions are expected', + piecewise, [0, 0], [[False, True]], []) + assert_raises_regex(ValueError, '1 or 2 functions are expected', + piecewise, [0, 0], [[False, True]], [1, 2, 3]) + + def test_two_conditions(self): + x = piecewise([1, 2], [[True, False], [False, True]], [3, 4]) + assert_array_equal(x, [3, 4]) + + def test_scalar_domains_three_conditions(self): + x = piecewise(3, [True, False, False], [4, 2, 0]) + assert_equal(x, 4) + + def test_default(self): + # No value specified for x[1], should be 0 + x = piecewise([1, 2], [True, False], [2]) + assert_array_equal(x, [2, 0]) + + # Should set x[1] to 3 + x = piecewise([1, 2], [True, False], [2, 3]) + assert_array_equal(x, [2, 3]) + + def test_0d(self): + x = np.array(3) + y = piecewise(x, x > 3, [4, 0]) + assert_(y.ndim == 0) + assert_(y == 0) + + x = 5 + y = piecewise(x, [True, False], [1, 0]) + assert_(y.ndim == 0) + assert_(y == 1) + + # With 3 ranges (It was failing, before) + y = piecewise(x, [False, False, True], [1, 2, 3]) + assert_array_equal(y, 3) + + def test_0d_comparison(self): + x = 3 + y = piecewise(x, [x <= 3, x > 3], [4, 0]) # Should succeed. + assert_equal(y, 4) + + # With 3 ranges (It was failing, before) + x = 4 + y = piecewise(x, [x <= 3, (x > 3) * (x <= 5), x > 5], [1, 2, 3]) + assert_array_equal(y, 2) + + assert_raises_regex(ValueError, '2 or 3 functions are expected', + piecewise, x, [x <= 3, x > 3], [1]) + assert_raises_regex(ValueError, '2 or 3 functions are expected', + piecewise, x, [x <= 3, x > 3], [1, 1, 1, 1]) + + def test_0d_0d_condition(self): + x = np.array(3) + c = np.array(x > 3) + y = piecewise(x, [c], [1, 2]) + assert_equal(y, 2) + + def test_multidimensional_extrafunc(self): + x = np.array([[-2.5, -1.5, -0.5], + [0.5, 1.5, 2.5]]) + y = piecewise(x, [x < 0, x >= 2], [-1, 1, 3]) + assert_array_equal(y, np.array([[-1., -1., -1.], + [3., 3., 1.]])) + + +class TestBincount(object): + + def test_simple(self): + y = np.bincount(np.arange(4)) + assert_array_equal(y, np.ones(4)) + + def test_simple2(self): + y = np.bincount(np.array([1, 5, 2, 4, 1])) + assert_array_equal(y, np.array([0, 2, 1, 0, 1, 1])) + + def test_simple_weight(self): + x = np.arange(4) + w = np.array([0.2, 0.3, 0.5, 0.1]) + y = np.bincount(x, w) + assert_array_equal(y, w) + + def test_simple_weight2(self): + x = np.array([1, 2, 4, 5, 2]) + w = np.array([0.2, 0.3, 0.5, 0.1, 0.2]) + y = np.bincount(x, w) + assert_array_equal(y, np.array([0, 0.2, 0.5, 0, 0.5, 0.1])) + + def test_with_minlength(self): + x = np.array([0, 1, 0, 1, 1]) + y = np.bincount(x, minlength=3) + assert_array_equal(y, np.array([2, 3, 0])) + x = [] + y = np.bincount(x, minlength=0) + assert_array_equal(y, np.array([])) + + def test_with_minlength_smaller_than_maxvalue(self): + x = np.array([0, 1, 1, 2, 2, 3, 3]) + y = np.bincount(x, minlength=2) + assert_array_equal(y, np.array([1, 2, 2, 2])) + y = np.bincount(x, minlength=0) + assert_array_equal(y, np.array([1, 2, 2, 2])) + + def test_with_minlength_and_weights(self): + x = np.array([1, 2, 4, 5, 2]) + w = np.array([0.2, 0.3, 0.5, 0.1, 0.2]) + y = np.bincount(x, w, 8) + assert_array_equal(y, np.array([0, 0.2, 0.5, 0, 0.5, 0.1, 0, 0])) + + def test_empty(self): + x = np.array([], dtype=int) + y = np.bincount(x) + assert_array_equal(x, y) + + def test_empty_with_minlength(self): + x = np.array([], dtype=int) + y = np.bincount(x, minlength=5) + assert_array_equal(y, np.zeros(5, dtype=int)) + + def test_with_incorrect_minlength(self): + x = np.array([], dtype=int) + assert_raises_regex(TypeError, + "'str' object cannot be interpreted", + lambda: np.bincount(x, minlength="foobar")) + assert_raises_regex(ValueError, + "must not be negative", + lambda: np.bincount(x, minlength=-1)) + + x = np.arange(5) + assert_raises_regex(TypeError, + "'str' object cannot be interpreted", + lambda: np.bincount(x, minlength="foobar")) + assert_raises_regex(ValueError, + "must not be negative", + lambda: np.bincount(x, minlength=-1)) + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + def test_dtype_reference_leaks(self): + # gh-6805 + intp_refcount = sys.getrefcount(np.dtype(np.intp)) + double_refcount = sys.getrefcount(np.dtype(np.double)) + + for j in range(10): + np.bincount([1, 2, 3]) + assert_equal(sys.getrefcount(np.dtype(np.intp)), intp_refcount) + assert_equal(sys.getrefcount(np.dtype(np.double)), double_refcount) + + for j in range(10): + np.bincount([1, 2, 3], [4, 5, 6]) + assert_equal(sys.getrefcount(np.dtype(np.intp)), intp_refcount) + assert_equal(sys.getrefcount(np.dtype(np.double)), double_refcount) + + +class TestInterp(object): + + def test_exceptions(self): + assert_raises(ValueError, interp, 0, [], []) + assert_raises(ValueError, interp, 0, [0], [1, 2]) + assert_raises(ValueError, interp, 0, [0, 1], [1, 2], period=0) + assert_raises(ValueError, interp, 0, [], [], period=360) + assert_raises(ValueError, interp, 0, [0], [1, 2], period=360) + + def test_basic(self): + x = np.linspace(0, 1, 5) + y = np.linspace(0, 1, 5) + x0 = np.linspace(0, 1, 50) + assert_almost_equal(np.interp(x0, x, y), x0) + + def test_right_left_behavior(self): + # Needs range of sizes to test different code paths. + # size ==1 is special cased, 1 < size < 5 is linear search, and + # size >= 5 goes through local search and possibly binary search. + for size in range(1, 10): + xp = np.arange(size, dtype=np.double) + yp = np.ones(size, dtype=np.double) + incpts = np.array([-1, 0, size - 1, size], dtype=np.double) + decpts = incpts[::-1] + + incres = interp(incpts, xp, yp) + decres = interp(decpts, xp, yp) + inctgt = np.array([1, 1, 1, 1], dtype=float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) + + incres = interp(incpts, xp, yp, left=0) + decres = interp(decpts, xp, yp, left=0) + inctgt = np.array([0, 1, 1, 1], dtype=float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) + + incres = interp(incpts, xp, yp, right=2) + decres = interp(decpts, xp, yp, right=2) + inctgt = np.array([1, 1, 1, 2], dtype=float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) + + incres = interp(incpts, xp, yp, left=0, right=2) + decres = interp(decpts, xp, yp, left=0, right=2) + inctgt = np.array([0, 1, 1, 2], dtype=float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) + + def test_scalar_interpolation_point(self): + x = np.linspace(0, 1, 5) + y = np.linspace(0, 1, 5) + x0 = 0 + assert_almost_equal(np.interp(x0, x, y), x0) + x0 = .3 + assert_almost_equal(np.interp(x0, x, y), x0) + x0 = np.float32(.3) + assert_almost_equal(np.interp(x0, x, y), x0) + x0 = np.float64(.3) + assert_almost_equal(np.interp(x0, x, y), x0) + x0 = np.nan + assert_almost_equal(np.interp(x0, x, y), x0) + + def test_non_finite_behavior(self): + x = [1, 2, 2.5, 3, 4] + xp = [1, 2, 3, 4] + fp = [1, 2, np.inf, 4] + assert_almost_equal(np.interp(x, xp, fp), [1, 2, np.inf, np.inf, 4]) + fp = [1, 2, np.nan, 4] + assert_almost_equal(np.interp(x, xp, fp), [1, 2, np.nan, np.nan, 4]) + + def test_complex_interp(self): + # test complex interpolation + x = np.linspace(0, 1, 5) + y = np.linspace(0, 1, 5) + (1 + np.linspace(0, 1, 5))*1.0j + x0 = 0.3 + y0 = x0 + (1+x0)*1.0j + assert_almost_equal(np.interp(x0, x, y), y0) + # test complex left and right + x0 = -1 + left = 2 + 3.0j + assert_almost_equal(np.interp(x0, x, y, left=left), left) + x0 = 2.0 + right = 2 + 3.0j + assert_almost_equal(np.interp(x0, x, y, right=right), right) + # test complex non finite + x = [1, 2, 2.5, 3, 4] + xp = [1, 2, 3, 4] + fp = [1, 2+1j, np.inf, 4] + y = [1, 2+1j, np.inf+0.5j, np.inf, 4] + assert_almost_equal(np.interp(x, xp, fp), y) + # test complex periodic + x = [-180, -170, -185, 185, -10, -5, 0, 365] + xp = [190, -190, 350, -350] + fp = [5+1.0j, 10+2j, 3+3j, 4+4j] + y = [7.5+1.5j, 5.+1.0j, 8.75+1.75j, 6.25+1.25j, 3.+3j, 3.25+3.25j, + 3.5+3.5j, 3.75+3.75j] + assert_almost_equal(np.interp(x, xp, fp, period=360), y) + + def test_zero_dimensional_interpolation_point(self): + x = np.linspace(0, 1, 5) + y = np.linspace(0, 1, 5) + x0 = np.array(.3) + assert_almost_equal(np.interp(x0, x, y), x0) + + xp = np.array([0, 2, 4]) + fp = np.array([1, -1, 1]) + + actual = np.interp(np.array(1), xp, fp) + assert_equal(actual, 0) + assert_(isinstance(actual, np.float64)) + + actual = np.interp(np.array(4.5), xp, fp, period=4) + assert_equal(actual, 0.5) + assert_(isinstance(actual, np.float64)) + + def test_if_len_x_is_small(self): + xp = np.arange(0, 10, 0.0001) + fp = np.sin(xp) + assert_almost_equal(np.interp(np.pi, xp, fp), 0.0) + + def test_period(self): + x = [-180, -170, -185, 185, -10, -5, 0, 365] + xp = [190, -190, 350, -350] + fp = [5, 10, 3, 4] + y = [7.5, 5., 8.75, 6.25, 3., 3.25, 3.5, 3.75] + assert_almost_equal(np.interp(x, xp, fp, period=360), y) + x = np.array(x, order='F').reshape(2, -1) + y = np.array(y, order='C').reshape(2, -1) + assert_almost_equal(np.interp(x, xp, fp, period=360), y) + + +def compare_results(res, desired): + for i in range(len(desired)): + assert_array_equal(res[i], desired[i]) + + +class TestPercentile(object): + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.percentile(x, 0), 0.) + assert_equal(np.percentile(x, 100), 3.5) + assert_equal(np.percentile(x, 50), 1.75) + x[1] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(x, 0), np.nan) + assert_equal(np.percentile(x, 0, interpolation='nearest'), np.nan) + assert_(w[0].category is RuntimeWarning) + + def test_api(self): + d = np.ones(5) + np.percentile(d, 5, None, None, False) + np.percentile(d, 5, None, None, False, 'linear') + o = np.ones((1,)) + np.percentile(d, 5, None, o, False, 'linear') + + def test_2D(self): + x = np.array([[1, 1, 1], + [1, 1, 1], + [4, 4, 3], + [1, 1, 1], + [1, 1, 1]]) + assert_array_equal(np.percentile(x, 50, axis=0), [1, 1, 1]) + + def test_linear(self): + + # Test defaults + assert_equal(np.percentile(range(10), 50), 4.5) + + # explicitly specify interpolation_method 'linear' (the default) + assert_equal(np.percentile(range(10), 50, + interpolation='linear'), 4.5) + + def test_lower_higher(self): + + # interpolation_method 'lower'/'higher' + assert_equal(np.percentile(range(10), 50, + interpolation='lower'), 4) + assert_equal(np.percentile(range(10), 50, + interpolation='higher'), 5) + + def test_midpoint(self): + assert_equal(np.percentile(range(10), 51, + interpolation='midpoint'), 4.5) + assert_equal(np.percentile(range(11), 51, + interpolation='midpoint'), 5.5) + assert_equal(np.percentile(range(11), 50, + interpolation='midpoint'), 5) + + def test_nearest(self): + assert_equal(np.percentile(range(10), 51, + interpolation='nearest'), 5) + assert_equal(np.percentile(range(10), 49, + interpolation='nearest'), 4) + + def test_sequence(self): + x = np.arange(8) * 0.5 + assert_equal(np.percentile(x, [0, 100, 50]), [0, 3.5, 1.75]) + + def test_axis(self): + x = np.arange(12).reshape(3, 4) + + assert_equal(np.percentile(x, (25, 50, 100)), [2.75, 5.5, 11.0]) + + r0 = [[2, 3, 4, 5], [4, 5, 6, 7], [8, 9, 10, 11]] + assert_equal(np.percentile(x, (25, 50, 100), axis=0), r0) + + r1 = [[0.75, 1.5, 3], [4.75, 5.5, 7], [8.75, 9.5, 11]] + assert_equal(np.percentile(x, (25, 50, 100), axis=1), np.array(r1).T) + + # ensure qth axis is always first as with np.array(old_percentile(..)) + x = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) + assert_equal(np.percentile(x, (25, 50)).shape, (2,)) + assert_equal(np.percentile(x, (25, 50, 75)).shape, (3,)) + assert_equal(np.percentile(x, (25, 50), axis=0).shape, (2, 4, 5, 6)) + assert_equal(np.percentile(x, (25, 50), axis=1).shape, (2, 3, 5, 6)) + assert_equal(np.percentile(x, (25, 50), axis=2).shape, (2, 3, 4, 6)) + assert_equal(np.percentile(x, (25, 50), axis=3).shape, (2, 3, 4, 5)) + assert_equal( + np.percentile(x, (25, 50, 75), axis=1).shape, (3, 3, 5, 6)) + assert_equal(np.percentile(x, (25, 50), + interpolation="higher").shape, (2,)) + assert_equal(np.percentile(x, (25, 50, 75), + interpolation="higher").shape, (3,)) + assert_equal(np.percentile(x, (25, 50), axis=0, + interpolation="higher").shape, (2, 4, 5, 6)) + assert_equal(np.percentile(x, (25, 50), axis=1, + interpolation="higher").shape, (2, 3, 5, 6)) + assert_equal(np.percentile(x, (25, 50), axis=2, + interpolation="higher").shape, (2, 3, 4, 6)) + assert_equal(np.percentile(x, (25, 50), axis=3, + interpolation="higher").shape, (2, 3, 4, 5)) + assert_equal(np.percentile(x, (25, 50, 75), axis=1, + interpolation="higher").shape, (3, 3, 5, 6)) + + def test_scalar_q(self): + # test for no empty dimensions for compatibility with old percentile + x = np.arange(12).reshape(3, 4) + assert_equal(np.percentile(x, 50), 5.5) + assert_(np.isscalar(np.percentile(x, 50))) + r0 = np.array([4., 5., 6., 7.]) + assert_equal(np.percentile(x, 50, axis=0), r0) + assert_equal(np.percentile(x, 50, axis=0).shape, r0.shape) + r1 = np.array([1.5, 5.5, 9.5]) + assert_almost_equal(np.percentile(x, 50, axis=1), r1) + assert_equal(np.percentile(x, 50, axis=1).shape, r1.shape) + + out = np.empty(1) + assert_equal(np.percentile(x, 50, out=out), 5.5) + assert_equal(out, 5.5) + out = np.empty(4) + assert_equal(np.percentile(x, 50, axis=0, out=out), r0) + assert_equal(out, r0) + out = np.empty(3) + assert_equal(np.percentile(x, 50, axis=1, out=out), r1) + assert_equal(out, r1) + + # test for no empty dimensions for compatibility with old percentile + x = np.arange(12).reshape(3, 4) + assert_equal(np.percentile(x, 50, interpolation='lower'), 5.) + assert_(np.isscalar(np.percentile(x, 50))) + r0 = np.array([4., 5., 6., 7.]) + c0 = np.percentile(x, 50, interpolation='lower', axis=0) + assert_equal(c0, r0) + assert_equal(c0.shape, r0.shape) + r1 = np.array([1., 5., 9.]) + c1 = np.percentile(x, 50, interpolation='lower', axis=1) + assert_almost_equal(c1, r1) + assert_equal(c1.shape, r1.shape) + + out = np.empty((), dtype=x.dtype) + c = np.percentile(x, 50, interpolation='lower', out=out) + assert_equal(c, 5) + assert_equal(out, 5) + out = np.empty(4, dtype=x.dtype) + c = np.percentile(x, 50, interpolation='lower', axis=0, out=out) + assert_equal(c, r0) + assert_equal(out, r0) + out = np.empty(3, dtype=x.dtype) + c = np.percentile(x, 50, interpolation='lower', axis=1, out=out) + assert_equal(c, r1) + assert_equal(out, r1) + + def test_exception(self): + assert_raises(ValueError, np.percentile, [1, 2], 56, + interpolation='foobar') + assert_raises(ValueError, np.percentile, [1], 101) + assert_raises(ValueError, np.percentile, [1], -1) + assert_raises(ValueError, np.percentile, [1], list(range(50)) + [101]) + assert_raises(ValueError, np.percentile, [1], list(range(50)) + [-0.1]) + + def test_percentile_list(self): + assert_equal(np.percentile([1, 2, 3], 0), 1) + + def test_percentile_out(self): + x = np.array([1, 2, 3]) + y = np.zeros((3,)) + p = (1, 2, 3) + np.percentile(x, p, out=y) + assert_equal(y, np.percentile(x, p)) + + x = np.array([[1, 2, 3], + [4, 5, 6]]) + + y = np.zeros((3, 3)) + np.percentile(x, p, axis=0, out=y) + assert_equal(y, np.percentile(x, p, axis=0)) + + y = np.zeros((3, 2)) + np.percentile(x, p, axis=1, out=y) + assert_equal(y, np.percentile(x, p, axis=1)) + + x = np.arange(12).reshape(3, 4) + # q.dim > 1, float + r0 = np.array([[2., 3., 4., 5.], [4., 5., 6., 7.]]) + out = np.empty((2, 4)) + assert_equal(np.percentile(x, (25, 50), axis=0, out=out), r0) + assert_equal(out, r0) + r1 = np.array([[0.75, 4.75, 8.75], [1.5, 5.5, 9.5]]) + out = np.empty((2, 3)) + assert_equal(np.percentile(x, (25, 50), axis=1, out=out), r1) + assert_equal(out, r1) + + # q.dim > 1, int + r0 = np.array([[0, 1, 2, 3], [4, 5, 6, 7]]) + out = np.empty((2, 4), dtype=x.dtype) + c = np.percentile(x, (25, 50), interpolation='lower', axis=0, out=out) + assert_equal(c, r0) + assert_equal(out, r0) + r1 = np.array([[0, 4, 8], [1, 5, 9]]) + out = np.empty((2, 3), dtype=x.dtype) + c = np.percentile(x, (25, 50), interpolation='lower', axis=1, out=out) + assert_equal(c, r1) + assert_equal(out, r1) + + def test_percentile_empty_dim(self): + # empty dims are preserved + d = np.arange(11 * 2).reshape(11, 1, 2, 1) + assert_array_equal(np.percentile(d, 50, axis=0).shape, (1, 2, 1)) + assert_array_equal(np.percentile(d, 50, axis=1).shape, (11, 2, 1)) + assert_array_equal(np.percentile(d, 50, axis=2).shape, (11, 1, 1)) + assert_array_equal(np.percentile(d, 50, axis=3).shape, (11, 1, 2)) + assert_array_equal(np.percentile(d, 50, axis=-1).shape, (11, 1, 2)) + assert_array_equal(np.percentile(d, 50, axis=-2).shape, (11, 1, 1)) + assert_array_equal(np.percentile(d, 50, axis=-3).shape, (11, 2, 1)) + assert_array_equal(np.percentile(d, 50, axis=-4).shape, (1, 2, 1)) + + assert_array_equal(np.percentile(d, 50, axis=2, + interpolation='midpoint').shape, + (11, 1, 1)) + assert_array_equal(np.percentile(d, 50, axis=-2, + interpolation='midpoint').shape, + (11, 1, 1)) + + assert_array_equal(np.array(np.percentile(d, [10, 50], axis=0)).shape, + (2, 1, 2, 1)) + assert_array_equal(np.array(np.percentile(d, [10, 50], axis=1)).shape, + (2, 11, 2, 1)) + assert_array_equal(np.array(np.percentile(d, [10, 50], axis=2)).shape, + (2, 11, 1, 1)) + assert_array_equal(np.array(np.percentile(d, [10, 50], axis=3)).shape, + (2, 11, 1, 2)) + + def test_percentile_no_overwrite(self): + a = np.array([2, 3, 4, 1]) + np.percentile(a, [50], overwrite_input=False) + assert_equal(a, np.array([2, 3, 4, 1])) + + a = np.array([2, 3, 4, 1]) + np.percentile(a, [50]) + assert_equal(a, np.array([2, 3, 4, 1])) + + def test_no_p_overwrite(self): + p = np.linspace(0., 100., num=5) + np.percentile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, np.linspace(0., 100., num=5)) + p = np.linspace(0., 100., num=5).tolist() + np.percentile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, np.linspace(0., 100., num=5).tolist()) + + def test_percentile_overwrite(self): + a = np.array([2, 3, 4, 1]) + b = np.percentile(a, [50], overwrite_input=True) + assert_equal(b, np.array([2.5])) + + b = np.percentile([2, 3, 4, 1], [50], overwrite_input=True) + assert_equal(b, np.array([2.5])) + + def test_extended_axis(self): + o = np.random.normal(size=(71, 23)) + x = np.dstack([o] * 10) + assert_equal(np.percentile(x, 30, axis=(0, 1)), np.percentile(o, 30)) + x = np.moveaxis(x, -1, 0) + assert_equal(np.percentile(x, 30, axis=(-2, -1)), np.percentile(o, 30)) + x = x.swapaxes(0, 1).copy() + assert_equal(np.percentile(x, 30, axis=(0, -1)), np.percentile(o, 30)) + x = x.swapaxes(0, 1).copy() + + assert_equal(np.percentile(x, [25, 60], axis=(0, 1, 2)), + np.percentile(x, [25, 60], axis=None)) + assert_equal(np.percentile(x, [25, 60], axis=(0,)), + np.percentile(x, [25, 60], axis=0)) + + d = np.arange(3 * 5 * 7 * 11).reshape((3, 5, 7, 11)) + np.random.shuffle(d.ravel()) + assert_equal(np.percentile(d, 25, axis=(0, 1, 2))[0], + np.percentile(d[:,:,:, 0].flatten(), 25)) + assert_equal(np.percentile(d, [10, 90], axis=(0, 1, 3))[:, 1], + np.percentile(d[:,:, 1,:].flatten(), [10, 90])) + assert_equal(np.percentile(d, 25, axis=(3, 1, -4))[2], + np.percentile(d[:,:, 2,:].flatten(), 25)) + assert_equal(np.percentile(d, 25, axis=(3, 1, 2))[2], + np.percentile(d[2,:,:,:].flatten(), 25)) + assert_equal(np.percentile(d, 25, axis=(3, 2))[2, 1], + np.percentile(d[2, 1,:,:].flatten(), 25)) + assert_equal(np.percentile(d, 25, axis=(1, -2))[2, 1], + np.percentile(d[2,:,:, 1].flatten(), 25)) + assert_equal(np.percentile(d, 25, axis=(1, 3))[2, 2], + np.percentile(d[2,:, 2,:].flatten(), 25)) + + def test_extended_axis_invalid(self): + d = np.ones((3, 5, 7, 11)) + assert_raises(np.AxisError, np.percentile, d, axis=-5, q=25) + assert_raises(np.AxisError, np.percentile, d, axis=(0, -5), q=25) + assert_raises(np.AxisError, np.percentile, d, axis=4, q=25) + assert_raises(np.AxisError, np.percentile, d, axis=(0, 4), q=25) + # each of these refers to the same axis twice + assert_raises(ValueError, np.percentile, d, axis=(1, 1), q=25) + assert_raises(ValueError, np.percentile, d, axis=(-1, -1), q=25) + assert_raises(ValueError, np.percentile, d, axis=(3, -1), q=25) + + def test_keepdims(self): + d = np.ones((3, 5, 7, 11)) + assert_equal(np.percentile(d, 7, axis=None, keepdims=True).shape, + (1, 1, 1, 1)) + assert_equal(np.percentile(d, 7, axis=(0, 1), keepdims=True).shape, + (1, 1, 7, 11)) + assert_equal(np.percentile(d, 7, axis=(0, 3), keepdims=True).shape, + (1, 5, 7, 1)) + assert_equal(np.percentile(d, 7, axis=(1,), keepdims=True).shape, + (3, 1, 7, 11)) + assert_equal(np.percentile(d, 7, (0, 1, 2, 3), keepdims=True).shape, + (1, 1, 1, 1)) + assert_equal(np.percentile(d, 7, axis=(0, 1, 3), keepdims=True).shape, + (1, 1, 7, 1)) + + assert_equal(np.percentile(d, [1, 7], axis=(0, 1, 3), + keepdims=True).shape, (2, 1, 1, 7, 1)) + assert_equal(np.percentile(d, [1, 7], axis=(0, 3), + keepdims=True).shape, (2, 1, 5, 7, 1)) + + def test_out(self): + o = np.zeros((4,)) + d = np.ones((3, 4)) + assert_equal(np.percentile(d, 0, 0, out=o), o) + assert_equal(np.percentile(d, 0, 0, interpolation='nearest', out=o), o) + o = np.zeros((3,)) + assert_equal(np.percentile(d, 1, 1, out=o), o) + assert_equal(np.percentile(d, 1, 1, interpolation='nearest', out=o), o) + + o = np.zeros(()) + assert_equal(np.percentile(d, 2, out=o), o) + assert_equal(np.percentile(d, 2, interpolation='nearest', out=o), o) + + def test_out_nan(self): + with warnings.catch_warnings(record=True): + warnings.filterwarnings('always', '', RuntimeWarning) + o = np.zeros((4,)) + d = np.ones((3, 4)) + d[2, 1] = np.nan + assert_equal(np.percentile(d, 0, 0, out=o), o) + assert_equal( + np.percentile(d, 0, 0, interpolation='nearest', out=o), o) + o = np.zeros((3,)) + assert_equal(np.percentile(d, 1, 1, out=o), o) + assert_equal( + np.percentile(d, 1, 1, interpolation='nearest', out=o), o) + o = np.zeros(()) + assert_equal(np.percentile(d, 1, out=o), o) + assert_equal( + np.percentile(d, 1, interpolation='nearest', out=o), o) + + def test_nan_behavior(self): + a = np.arange(24, dtype=float) + a[2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, 0.3), np.nan) + assert_equal(np.percentile(a, 0.3, axis=0), np.nan) + assert_equal(np.percentile(a, [0.3, 0.6], axis=0), + np.array([np.nan] * 2)) + assert_(w[0].category is RuntimeWarning) + assert_(w[1].category is RuntimeWarning) + assert_(w[2].category is RuntimeWarning) + + a = np.arange(24, dtype=float).reshape(2, 3, 4) + a[1, 2, 3] = np.nan + a[1, 1, 2] = np.nan + + # no axis + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, 0.3), np.nan) + assert_equal(np.percentile(a, 0.3).ndim, 0) + assert_(w[0].category is RuntimeWarning) + + # axis0 zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 0) + b[2, 3] = np.nan + b[1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, 0.3, 0), b) + + # axis0 not zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), + [0.3, 0.6], 0) + b[:, 2, 3] = np.nan + b[:, 1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, [0.3, 0.6], 0), b) + + # axis1 zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 1) + b[1, 3] = np.nan + b[1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, 0.3, 1), b) + # axis1 not zerod + b = np.percentile( + np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 1) + b[:, 1, 3] = np.nan + b[:, 1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, [0.3, 0.6], 1), b) + + # axis02 zerod + b = np.percentile( + np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, (0, 2)) + b[1] = np.nan + b[2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, 0.3, (0, 2)), b) + # axis02 not zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), + [0.3, 0.6], (0, 2)) + b[:, 1] = np.nan + b[:, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b) + # axis02 not zerod with nearest interpolation + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), + [0.3, 0.6], (0, 2), interpolation='nearest') + b[:, 1] = np.nan + b[:, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile( + a, [0.3, 0.6], (0, 2), interpolation='nearest'), b) + + +class TestQuantile(object): + # most of this is already tested by TestPercentile + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.quantile(x, 0), 0.) + assert_equal(np.quantile(x, 1), 3.5) + assert_equal(np.quantile(x, 0.5), 1.75) + + def test_no_p_overwrite(self): + # this is worth retesting, because quantile does not make a copy + p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) + p = p0.copy() + np.quantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + p0 = p0.tolist() + p = p.tolist() + np.quantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + +class TestMedian(object): + + def test_basic(self): + a0 = np.array(1) + a1 = np.arange(2) + a2 = np.arange(6).reshape(2, 3) + assert_equal(np.median(a0), 1) + assert_allclose(np.median(a1), 0.5) + assert_allclose(np.median(a2), 2.5) + assert_allclose(np.median(a2, axis=0), [1.5, 2.5, 3.5]) + assert_equal(np.median(a2, axis=1), [1, 4]) + assert_allclose(np.median(a2, axis=None), 2.5) + + a = np.array([0.0444502, 0.0463301, 0.141249, 0.0606775]) + assert_almost_equal((a[1] + a[3]) / 2., np.median(a)) + a = np.array([0.0463301, 0.0444502, 0.141249]) + assert_equal(a[0], np.median(a)) + a = np.array([0.0444502, 0.141249, 0.0463301]) + assert_equal(a[-1], np.median(a)) + # check array scalar result + assert_equal(np.median(a).ndim, 0) + a[1] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a).ndim, 0) + assert_(w[0].category is RuntimeWarning) + + def test_axis_keyword(self): + a3 = np.array([[2, 3], + [0, 1], + [6, 7], + [4, 5]]) + for a in [a3, np.random.randint(0, 100, size=(2, 3, 4))]: + orig = a.copy() + np.median(a, axis=None) + for ax in range(a.ndim): + np.median(a, axis=ax) + assert_array_equal(a, orig) + + assert_allclose(np.median(a3, axis=0), [3, 4]) + assert_allclose(np.median(a3.T, axis=1), [3, 4]) + assert_allclose(np.median(a3), 3.5) + assert_allclose(np.median(a3, axis=None), 3.5) + assert_allclose(np.median(a3.T), 3.5) + + def test_overwrite_keyword(self): + a3 = np.array([[2, 3], + [0, 1], + [6, 7], + [4, 5]]) + a0 = np.array(1) + a1 = np.arange(2) + a2 = np.arange(6).reshape(2, 3) + assert_allclose(np.median(a0.copy(), overwrite_input=True), 1) + assert_allclose(np.median(a1.copy(), overwrite_input=True), 0.5) + assert_allclose(np.median(a2.copy(), overwrite_input=True), 2.5) + assert_allclose(np.median(a2.copy(), overwrite_input=True, axis=0), + [1.5, 2.5, 3.5]) + assert_allclose( + np.median(a2.copy(), overwrite_input=True, axis=1), [1, 4]) + assert_allclose( + np.median(a2.copy(), overwrite_input=True, axis=None), 2.5) + assert_allclose( + np.median(a3.copy(), overwrite_input=True, axis=0), [3, 4]) + assert_allclose(np.median(a3.T.copy(), overwrite_input=True, axis=1), + [3, 4]) + + a4 = np.arange(3 * 4 * 5, dtype=np.float32).reshape((3, 4, 5)) + np.random.shuffle(a4.ravel()) + assert_allclose(np.median(a4, axis=None), + np.median(a4.copy(), axis=None, overwrite_input=True)) + assert_allclose(np.median(a4, axis=0), + np.median(a4.copy(), axis=0, overwrite_input=True)) + assert_allclose(np.median(a4, axis=1), + np.median(a4.copy(), axis=1, overwrite_input=True)) + assert_allclose(np.median(a4, axis=2), + np.median(a4.copy(), axis=2, overwrite_input=True)) + + def test_array_like(self): + x = [1, 2, 3] + assert_almost_equal(np.median(x), 2) + x2 = [x] + assert_almost_equal(np.median(x2), 2) + assert_allclose(np.median(x2, axis=0), x) + + def test_subclass(self): + # gh-3846 + class MySubClass(np.ndarray): + + def __new__(cls, input_array, info=None): + obj = np.asarray(input_array).view(cls) + obj.info = info + return obj + + def mean(self, axis=None, dtype=None, out=None): + return -7 + + a = MySubClass([1, 2, 3]) + assert_equal(np.median(a), -7) + + def test_out(self): + o = np.zeros((4,)) + d = np.ones((3, 4)) + assert_equal(np.median(d, 0, out=o), o) + o = np.zeros((3,)) + assert_equal(np.median(d, 1, out=o), o) + o = np.zeros(()) + assert_equal(np.median(d, out=o), o) + + def test_out_nan(self): + with warnings.catch_warnings(record=True): + warnings.filterwarnings('always', '', RuntimeWarning) + o = np.zeros((4,)) + d = np.ones((3, 4)) + d[2, 1] = np.nan + assert_equal(np.median(d, 0, out=o), o) + o = np.zeros((3,)) + assert_equal(np.median(d, 1, out=o), o) + o = np.zeros(()) + assert_equal(np.median(d, out=o), o) + + def test_nan_behavior(self): + a = np.arange(24, dtype=float) + a[2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a, axis=0), np.nan) + assert_(w[0].category is RuntimeWarning) + assert_(w[1].category is RuntimeWarning) + + a = np.arange(24, dtype=float).reshape(2, 3, 4) + a[1, 2, 3] = np.nan + a[1, 1, 2] = np.nan + + # no axis + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a).ndim, 0) + assert_(w[0].category is RuntimeWarning) + + # axis0 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 0) + b[2, 3] = np.nan + b[1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a, 0), b) + assert_equal(len(w), 1) + + # axis1 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 1) + b[1, 3] = np.nan + b[1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a, 1), b) + assert_equal(len(w), 1) + + # axis02 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), (0, 2)) + b[1] = np.nan + b[2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a, (0, 2)), b) + assert_equal(len(w), 1) + + def test_empty(self): + # empty arrays + a = np.array([], dtype=float) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_(w[0].category is RuntimeWarning) + + # multiple dimensions + a = np.array([], dtype=float, ndmin=3) + # no axis + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_(w[0].category is RuntimeWarning) + + # axis 0 and 1 + b = np.array([], dtype=float, ndmin=2) + assert_equal(np.median(a, axis=0), b) + assert_equal(np.median(a, axis=1), b) + + # axis 2 + b = np.array(np.nan, dtype=float, ndmin=2) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a, axis=2), b) + assert_(w[0].category is RuntimeWarning) + + def test_object(self): + o = np.arange(7.) + assert_(type(np.median(o.astype(object))), float) + o[2] = np.nan + assert_(type(np.median(o.astype(object))), float) + + def test_extended_axis(self): + o = np.random.normal(size=(71, 23)) + x = np.dstack([o] * 10) + assert_equal(np.median(x, axis=(0, 1)), np.median(o)) + x = np.moveaxis(x, -1, 0) + assert_equal(np.median(x, axis=(-2, -1)), np.median(o)) + x = x.swapaxes(0, 1).copy() + assert_equal(np.median(x, axis=(0, -1)), np.median(o)) + + assert_equal(np.median(x, axis=(0, 1, 2)), np.median(x, axis=None)) + assert_equal(np.median(x, axis=(0, )), np.median(x, axis=0)) + assert_equal(np.median(x, axis=(-1, )), np.median(x, axis=-1)) + + d = np.arange(3 * 5 * 7 * 11).reshape((3, 5, 7, 11)) + np.random.shuffle(d.ravel()) + assert_equal(np.median(d, axis=(0, 1, 2))[0], + np.median(d[:,:,:, 0].flatten())) + assert_equal(np.median(d, axis=(0, 1, 3))[1], + np.median(d[:,:, 1,:].flatten())) + assert_equal(np.median(d, axis=(3, 1, -4))[2], + np.median(d[:,:, 2,:].flatten())) + assert_equal(np.median(d, axis=(3, 1, 2))[2], + np.median(d[2,:,:,:].flatten())) + assert_equal(np.median(d, axis=(3, 2))[2, 1], + np.median(d[2, 1,:,:].flatten())) + assert_equal(np.median(d, axis=(1, -2))[2, 1], + np.median(d[2,:,:, 1].flatten())) + assert_equal(np.median(d, axis=(1, 3))[2, 2], + np.median(d[2,:, 2,:].flatten())) + + def test_extended_axis_invalid(self): + d = np.ones((3, 5, 7, 11)) + assert_raises(np.AxisError, np.median, d, axis=-5) + assert_raises(np.AxisError, np.median, d, axis=(0, -5)) + assert_raises(np.AxisError, np.median, d, axis=4) + assert_raises(np.AxisError, np.median, d, axis=(0, 4)) + assert_raises(ValueError, np.median, d, axis=(1, 1)) + + def test_keepdims(self): + d = np.ones((3, 5, 7, 11)) + assert_equal(np.median(d, axis=None, keepdims=True).shape, + (1, 1, 1, 1)) + assert_equal(np.median(d, axis=(0, 1), keepdims=True).shape, + (1, 1, 7, 11)) + assert_equal(np.median(d, axis=(0, 3), keepdims=True).shape, + (1, 5, 7, 1)) + assert_equal(np.median(d, axis=(1,), keepdims=True).shape, + (3, 1, 7, 11)) + assert_equal(np.median(d, axis=(0, 1, 2, 3), keepdims=True).shape, + (1, 1, 1, 1)) + assert_equal(np.median(d, axis=(0, 1, 3), keepdims=True).shape, + (1, 1, 7, 1)) + + +class TestAdd_newdoc_ufunc(object): + + def test_ufunc_arg(self): + assert_raises(TypeError, add_newdoc_ufunc, 2, "blah") + assert_raises(ValueError, add_newdoc_ufunc, np.add, "blah") + + def test_string_arg(self): + assert_raises(TypeError, add_newdoc_ufunc, np.add, 3) + + +class TestAdd_newdoc(object): + + @pytest.mark.skipif(sys.flags.optimize == 2, reason="Python running -OO") + @pytest.mark.xfail(IS_PYPY, reason="PyPy does not modify tp_doc") + def test_add_doc(self): + # test np.add_newdoc + tgt = "Current flat index into the array." + assert_equal(np.core.flatiter.index.__doc__[:len(tgt)], tgt) + assert_(len(np.core.ufunc.identity.__doc__) > 300) + assert_(len(np.lib.index_tricks.mgrid.__doc__) > 300) + +class TestSortComplex(object): + + @pytest.mark.parametrize("type_in, type_out", [ + ('l', 'D'), + ('h', 'F'), + ('H', 'F'), + ('b', 'F'), + ('B', 'F'), + ('g', 'G'), + ]) + def test_sort_real(self, type_in, type_out): + # sort_complex() type casting for real input types + a = np.array([5, 3, 6, 2, 1], dtype=type_in) + actual = np.sort_complex(a) + expected = np.sort(a).astype(type_out) + assert_equal(actual, expected) + assert_equal(actual.dtype, expected.dtype) + + def test_sort_complex(self): + # sort_complex() handling of complex input + a = np.array([2 + 3j, 1 - 2j, 1 - 3j, 2 + 1j], dtype='D') + expected = np.array([1 - 3j, 1 - 2j, 2 + 1j, 2 + 3j], dtype='D') + actual = np.sort_complex(a) + assert_equal(actual, expected) + assert_equal(actual.dtype, expected.dtype) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_histograms.py b/contrib/python/numpy/py2/numpy/lib/tests/test_histograms.py new file mode 100644 index 0000000000..594c8e782c --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_histograms.py @@ -0,0 +1,844 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np + +from numpy.lib.histograms import histogram, histogramdd, histogram_bin_edges +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_almost_equal, + assert_array_almost_equal, assert_raises, assert_allclose, + assert_array_max_ulp, assert_raises_regex, suppress_warnings, + ) +import pytest + + +class TestHistogram(object): + + def setup(self): + pass + + def teardown(self): + pass + + def test_simple(self): + n = 100 + v = np.random.rand(n) + (a, b) = histogram(v) + # check if the sum of the bins equals the number of samples + assert_equal(np.sum(a, axis=0), n) + # check that the bin counts are evenly spaced when the data is from + # a linear function + (a, b) = histogram(np.linspace(0, 10, 100)) + assert_array_equal(a, 10) + + def test_one_bin(self): + # Ticket 632 + hist, edges = histogram([1, 2, 3, 4], [1, 2]) + assert_array_equal(hist, [2, ]) + assert_array_equal(edges, [1, 2]) + assert_raises(ValueError, histogram, [1, 2], bins=0) + h, e = histogram([1, 2], bins=1) + assert_equal(h, np.array([2])) + assert_allclose(e, np.array([1., 2.])) + + def test_normed(self): + sup = suppress_warnings() + with sup: + rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*') + # Check that the integral of the density equals 1. + n = 100 + v = np.random.rand(n) + a, b = histogram(v, normed=True) + area = np.sum(a * np.diff(b)) + assert_almost_equal(area, 1) + assert_equal(len(rec), 1) + + sup = suppress_warnings() + with sup: + rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*') + # Check with non-constant bin widths (buggy but backwards + # compatible) + v = np.arange(10) + bins = [0, 1, 5, 9, 10] + a, b = histogram(v, bins, normed=True) + area = np.sum(a * np.diff(b)) + assert_almost_equal(area, 1) + assert_equal(len(rec), 1) + + def test_density(self): + # Check that the integral of the density equals 1. + n = 100 + v = np.random.rand(n) + a, b = histogram(v, density=True) + area = np.sum(a * np.diff(b)) + assert_almost_equal(area, 1) + + # Check with non-constant bin widths + v = np.arange(10) + bins = [0, 1, 3, 6, 10] + a, b = histogram(v, bins, density=True) + assert_array_equal(a, .1) + assert_equal(np.sum(a * np.diff(b)), 1) + + # Test that passing False works too + a, b = histogram(v, bins, density=False) + assert_array_equal(a, [1, 2, 3, 4]) + + # Variale bin widths are especially useful to deal with + # infinities. + v = np.arange(10) + bins = [0, 1, 3, 6, np.inf] + a, b = histogram(v, bins, density=True) + assert_array_equal(a, [.1, .1, .1, 0.]) + + # Taken from a bug report from N. Becker on the numpy-discussion + # mailing list Aug. 6, 2010. + counts, dmy = np.histogram( + [1, 2, 3, 4], [0.5, 1.5, np.inf], density=True) + assert_equal(counts, [.25, 0]) + + def test_outliers(self): + # Check that outliers are not tallied + a = np.arange(10) + .5 + + # Lower outliers + h, b = histogram(a, range=[0, 9]) + assert_equal(h.sum(), 9) + + # Upper outliers + h, b = histogram(a, range=[1, 10]) + assert_equal(h.sum(), 9) + + # Normalization + h, b = histogram(a, range=[1, 9], density=True) + assert_almost_equal((h * np.diff(b)).sum(), 1, decimal=15) + + # Weights + w = np.arange(10) + .5 + h, b = histogram(a, range=[1, 9], weights=w, density=True) + assert_equal((h * np.diff(b)).sum(), 1) + + h, b = histogram(a, bins=8, range=[1, 9], weights=w) + assert_equal(h, w[1:-1]) + + def test_arr_weights_mismatch(self): + a = np.arange(10) + .5 + w = np.arange(11) + .5 + with assert_raises_regex(ValueError, "same shape as"): + h, b = histogram(a, range=[1, 9], weights=w, density=True) + + + def test_type(self): + # Check the type of the returned histogram + a = np.arange(10) + .5 + h, b = histogram(a) + assert_(np.issubdtype(h.dtype, np.integer)) + + h, b = histogram(a, density=True) + assert_(np.issubdtype(h.dtype, np.floating)) + + h, b = histogram(a, weights=np.ones(10, int)) + assert_(np.issubdtype(h.dtype, np.integer)) + + h, b = histogram(a, weights=np.ones(10, float)) + assert_(np.issubdtype(h.dtype, np.floating)) + + def test_f32_rounding(self): + # gh-4799, check that the rounding of the edges works with float32 + x = np.array([276.318359, -69.593948, 21.329449], dtype=np.float32) + y = np.array([5005.689453, 4481.327637, 6010.369629], dtype=np.float32) + counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100) + assert_equal(counts_hist.sum(), 3.) + + def test_bool_conversion(self): + # gh-12107 + # Reference integer histogram + a = np.array([1, 1, 0], dtype=np.uint8) + int_hist, int_edges = np.histogram(a) + + # Should raise an warning on booleans + # Ensure that the histograms are equivalent, need to suppress + # the warnings to get the actual outputs + with suppress_warnings() as sup: + rec = sup.record(RuntimeWarning, 'Converting input from .*') + hist, edges = np.histogram([True, True, False]) + # A warning should be issued + assert_equal(len(rec), 1) + assert_array_equal(hist, int_hist) + assert_array_equal(edges, int_edges) + + def test_weights(self): + v = np.random.rand(100) + w = np.ones(100) * 5 + a, b = histogram(v) + na, nb = histogram(v, density=True) + wa, wb = histogram(v, weights=w) + nwa, nwb = histogram(v, weights=w, density=True) + assert_array_almost_equal(a * 5, wa) + assert_array_almost_equal(na, nwa) + + # Check weights are properly applied. + v = np.linspace(0, 10, 10) + w = np.concatenate((np.zeros(5), np.ones(5))) + wa, wb = histogram(v, bins=np.arange(11), weights=w) + assert_array_almost_equal(wa, w) + + # Check with integer weights + wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1]) + assert_array_equal(wa, [4, 5, 0, 1]) + wa, wb = histogram( + [1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], density=True) + assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4) + + # Check weights with non-uniform bin widths + a, b = histogram( + np.arange(9), [0, 1, 3, 6, 10], + weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True) + assert_almost_equal(a, [.2, .1, .1, .075]) + + def test_exotic_weights(self): + + # Test the use of weights that are not integer or floats, but e.g. + # complex numbers or object types. + + # Complex weights + values = np.array([1.3, 2.5, 2.3]) + weights = np.array([1, -1, 2]) + 1j * np.array([2, 1, 2]) + + # Check with custom bins + wa, wb = histogram(values, bins=[0, 2, 3], weights=weights) + assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3])) + + # Check with even bins + wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights) + assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3])) + + # Decimal weights + from decimal import Decimal + values = np.array([1.3, 2.5, 2.3]) + weights = np.array([Decimal(1), Decimal(2), Decimal(3)]) + + # Check with custom bins + wa, wb = histogram(values, bins=[0, 2, 3], weights=weights) + assert_array_almost_equal(wa, [Decimal(1), Decimal(5)]) + + # Check with even bins + wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights) + assert_array_almost_equal(wa, [Decimal(1), Decimal(5)]) + + def test_no_side_effects(self): + # This is a regression test that ensures that values passed to + # ``histogram`` are unchanged. + values = np.array([1.3, 2.5, 2.3]) + np.histogram(values, range=[-10, 10], bins=100) + assert_array_almost_equal(values, [1.3, 2.5, 2.3]) + + def test_empty(self): + a, b = histogram([], bins=([0, 1])) + assert_array_equal(a, np.array([0])) + assert_array_equal(b, np.array([0, 1])) + + def test_error_binnum_type (self): + # Tests if right Error is raised if bins argument is float + vals = np.linspace(0.0, 1.0, num=100) + histogram(vals, 5) + assert_raises(TypeError, histogram, vals, 2.4) + + def test_finite_range(self): + # Normal ranges should be fine + vals = np.linspace(0.0, 1.0, num=100) + histogram(vals, range=[0.25,0.75]) + assert_raises(ValueError, histogram, vals, range=[np.nan,0.75]) + assert_raises(ValueError, histogram, vals, range=[0.25,np.inf]) + + def test_invalid_range(self): + # start of range must be < end of range + vals = np.linspace(0.0, 1.0, num=100) + with assert_raises_regex(ValueError, "max must be larger than"): + np.histogram(vals, range=[0.1, 0.01]) + + def test_bin_edge_cases(self): + # Ensure that floating-point computations correctly place edge cases. + arr = np.array([337, 404, 739, 806, 1007, 1811, 2012]) + hist, edges = np.histogram(arr, bins=8296, range=(2, 2280)) + mask = hist > 0 + left_edges = edges[:-1][mask] + right_edges = edges[1:][mask] + for x, left, right in zip(arr, left_edges, right_edges): + assert_(x >= left) + assert_(x < right) + + def test_last_bin_inclusive_range(self): + arr = np.array([0., 0., 0., 1., 2., 3., 3., 4., 5.]) + hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5)) + assert_equal(hist[-1], 1) + + def test_bin_array_dims(self): + # gracefully handle bins object > 1 dimension + vals = np.linspace(0.0, 1.0, num=100) + bins = np.array([[0, 0.5], [0.6, 1.0]]) + with assert_raises_regex(ValueError, "must be 1d"): + np.histogram(vals, bins=bins) + + def test_unsigned_monotonicity_check(self): + # Ensures ValueError is raised if bins not increasing monotonically + # when bins contain unsigned values (see #9222) + arr = np.array([2]) + bins = np.array([1, 3, 1], dtype='uint64') + with assert_raises(ValueError): + hist, edges = np.histogram(arr, bins=bins) + + def test_object_array_of_0d(self): + # gh-7864 + assert_raises(ValueError, + histogram, [np.array(0.4) for i in range(10)] + [-np.inf]) + assert_raises(ValueError, + histogram, [np.array(0.4) for i in range(10)] + [np.inf]) + + # these should not crash + np.histogram([np.array(0.5) for i in range(10)] + [.500000000000001]) + np.histogram([np.array(0.5) for i in range(10)] + [.5]) + + def test_some_nan_values(self): + # gh-7503 + one_nan = np.array([0, 1, np.nan]) + all_nan = np.array([np.nan, np.nan]) + + # the internal comparisons with NaN give warnings + sup = suppress_warnings() + sup.filter(RuntimeWarning) + with sup: + # can't infer range with nan + assert_raises(ValueError, histogram, one_nan, bins='auto') + assert_raises(ValueError, histogram, all_nan, bins='auto') + + # explicit range solves the problem + h, b = histogram(one_nan, bins='auto', range=(0, 1)) + assert_equal(h.sum(), 2) # nan is not counted + h, b = histogram(all_nan, bins='auto', range=(0, 1)) + assert_equal(h.sum(), 0) # nan is not counted + + # as does an explicit set of bins + h, b = histogram(one_nan, bins=[0, 1]) + assert_equal(h.sum(), 2) # nan is not counted + h, b = histogram(all_nan, bins=[0, 1]) + assert_equal(h.sum(), 0) # nan is not counted + + def test_datetime(self): + begin = np.datetime64('2000-01-01', 'D') + offsets = np.array([0, 0, 1, 1, 2, 3, 5, 10, 20]) + bins = np.array([0, 2, 7, 20]) + dates = begin + offsets + date_bins = begin + bins + + td = np.dtype('timedelta64[D]') + + # Results should be the same for integer offsets or datetime values. + # For now, only explicit bins are supported, since linspace does not + # work on datetimes or timedeltas + d_count, d_edge = histogram(dates, bins=date_bins) + t_count, t_edge = histogram(offsets.astype(td), bins=bins.astype(td)) + i_count, i_edge = histogram(offsets, bins=bins) + + assert_equal(d_count, i_count) + assert_equal(t_count, i_count) + + assert_equal((d_edge - begin).astype(int), i_edge) + assert_equal(t_edge.astype(int), i_edge) + + assert_equal(d_edge.dtype, dates.dtype) + assert_equal(t_edge.dtype, td) + + def do_signed_overflow_bounds(self, dtype): + exponent = 8 * np.dtype(dtype).itemsize - 1 + arr = np.array([-2**exponent + 4, 2**exponent - 4], dtype=dtype) + hist, e = histogram(arr, bins=2) + assert_equal(e, [-2**exponent + 4, 0, 2**exponent - 4]) + assert_equal(hist, [1, 1]) + + def test_signed_overflow_bounds(self): + self.do_signed_overflow_bounds(np.byte) + self.do_signed_overflow_bounds(np.short) + self.do_signed_overflow_bounds(np.intc) + self.do_signed_overflow_bounds(np.int_) + self.do_signed_overflow_bounds(np.longlong) + + def do_precision_lower_bound(self, float_small, float_large): + eps = np.finfo(float_large).eps + + arr = np.array([1.0], float_small) + range = np.array([1.0 + eps, 2.0], float_large) + + # test is looking for behavior when the bounds change between dtypes + if range.astype(float_small)[0] != 1: + return + + # previously crashed + count, x_loc = np.histogram(arr, bins=1, range=range) + assert_equal(count, [1]) + + # gh-10322 means that the type comes from arr - this may change + assert_equal(x_loc.dtype, float_small) + + def do_precision_upper_bound(self, float_small, float_large): + eps = np.finfo(float_large).eps + + arr = np.array([1.0], float_small) + range = np.array([0.0, 1.0 - eps], float_large) + + # test is looking for behavior when the bounds change between dtypes + if range.astype(float_small)[-1] != 1: + return + + # previously crashed + count, x_loc = np.histogram(arr, bins=1, range=range) + assert_equal(count, [1]) + + # gh-10322 means that the type comes from arr - this may change + assert_equal(x_loc.dtype, float_small) + + def do_precision(self, float_small, float_large): + self.do_precision_lower_bound(float_small, float_large) + self.do_precision_upper_bound(float_small, float_large) + + def test_precision(self): + # not looping results in a useful stack trace upon failure + self.do_precision(np.half, np.single) + self.do_precision(np.half, np.double) + self.do_precision(np.half, np.longdouble) + self.do_precision(np.single, np.double) + self.do_precision(np.single, np.longdouble) + self.do_precision(np.double, np.longdouble) + + def test_histogram_bin_edges(self): + hist, e = histogram([1, 2, 3, 4], [1, 2]) + edges = histogram_bin_edges([1, 2, 3, 4], [1, 2]) + assert_array_equal(edges, e) + + arr = np.array([0., 0., 0., 1., 2., 3., 3., 4., 5.]) + hist, e = histogram(arr, bins=30, range=(-0.5, 5)) + edges = histogram_bin_edges(arr, bins=30, range=(-0.5, 5)) + assert_array_equal(edges, e) + + hist, e = histogram(arr, bins='auto', range=(0, 1)) + edges = histogram_bin_edges(arr, bins='auto', range=(0, 1)) + assert_array_equal(edges, e) + + +class TestHistogramOptimBinNums(object): + """ + Provide test coverage when using provided estimators for optimal number of + bins + """ + + def test_empty(self): + estimator_list = ['fd', 'scott', 'rice', 'sturges', + 'doane', 'sqrt', 'auto', 'stone'] + # check it can deal with empty data + for estimator in estimator_list: + a, b = histogram([], bins=estimator) + assert_array_equal(a, np.array([0])) + assert_array_equal(b, np.array([0, 1])) + + def test_simple(self): + """ + Straightforward testing with a mixture of linspace data (for + consistency). All test values have been precomputed and the values + shouldn't change + """ + # Some basic sanity checking, with some fixed data. + # Checking for the correct number of bins + basic_test = {50: {'fd': 4, 'scott': 4, 'rice': 8, 'sturges': 7, + 'doane': 8, 'sqrt': 8, 'auto': 7, 'stone': 2}, + 500: {'fd': 8, 'scott': 8, 'rice': 16, 'sturges': 10, + 'doane': 12, 'sqrt': 23, 'auto': 10, 'stone': 9}, + 5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14, + 'doane': 17, 'sqrt': 71, 'auto': 17, 'stone': 20}} + + for testlen, expectedResults in basic_test.items(): + # Create some sort of non uniform data to test with + # (2 peak uniform mixture) + x1 = np.linspace(-10, -1, testlen // 5 * 2) + x2 = np.linspace(1, 10, testlen // 5 * 3) + x = np.concatenate((x1, x2)) + for estimator, numbins in expectedResults.items(): + a, b = np.histogram(x, estimator) + assert_equal(len(a), numbins, err_msg="For the {0} estimator " + "with datasize of {1}".format(estimator, testlen)) + + def test_small(self): + """ + Smaller datasets have the potential to cause issues with the data + adaptive methods, especially the FD method. All bin numbers have been + precalculated. + """ + small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1, + 'doane': 1, 'sqrt': 1, 'stone': 1}, + 2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2, + 'doane': 1, 'sqrt': 2, 'stone': 1}, + 3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3, + 'doane': 3, 'sqrt': 2, 'stone': 1}} + + for testlen, expectedResults in small_dat.items(): + testdat = np.arange(testlen) + for estimator, expbins in expectedResults.items(): + a, b = np.histogram(testdat, estimator) + assert_equal(len(a), expbins, err_msg="For the {0} estimator " + "with datasize of {1}".format(estimator, testlen)) + + def test_incorrect_methods(self): + """ + Check a Value Error is thrown when an unknown string is passed in + """ + check_list = ['mad', 'freeman', 'histograms', 'IQR'] + for estimator in check_list: + assert_raises(ValueError, histogram, [1, 2, 3], estimator) + + def test_novariance(self): + """ + Check that methods handle no variance in data + Primarily for Scott and FD as the SD and IQR are both 0 in this case + """ + novar_dataset = np.ones(100) + novar_resultdict = {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1, + 'doane': 1, 'sqrt': 1, 'auto': 1, 'stone': 1} + + for estimator, numbins in novar_resultdict.items(): + a, b = np.histogram(novar_dataset, estimator) + assert_equal(len(a), numbins, err_msg="{0} estimator, " + "No Variance test".format(estimator)) + + def test_limited_variance(self): + """ + Check when IQR is 0, but variance exists, we return the sturges value + and not the fd value. + """ + lim_var_data = np.ones(1000) + lim_var_data[:3] = 0 + lim_var_data[-4:] = 100 + + edges_auto = histogram_bin_edges(lim_var_data, 'auto') + assert_equal(edges_auto, np.linspace(0, 100, 12)) + + edges_fd = histogram_bin_edges(lim_var_data, 'fd') + assert_equal(edges_fd, np.array([0, 100])) + + edges_sturges = histogram_bin_edges(lim_var_data, 'sturges') + assert_equal(edges_sturges, np.linspace(0, 100, 12)) + + def test_outlier(self): + """ + Check the FD, Scott and Doane with outliers. + + The FD estimates a smaller binwidth since it's less affected by + outliers. Since the range is so (artificially) large, this means more + bins, most of which will be empty, but the data of interest usually is + unaffected. The Scott estimator is more affected and returns fewer bins, + despite most of the variance being in one area of the data. The Doane + estimator lies somewhere between the other two. + """ + xcenter = np.linspace(-10, 10, 50) + outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter)) + + outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11, 'stone': 6} + + for estimator, numbins in outlier_resultdict.items(): + a, b = np.histogram(outlier_dataset, estimator) + assert_equal(len(a), numbins) + + def test_scott_vs_stone(self): + """Verify that Scott's rule and Stone's rule converges for normally distributed data""" + + def nbins_ratio(seed, size): + rng = np.random.RandomState(seed) + x = rng.normal(loc=0, scale=2, size=size) + a, b = len(np.histogram(x, 'stone')[0]), len(np.histogram(x, 'scott')[0]) + return a / (a + b) + + ll = [[nbins_ratio(seed, size) for size in np.geomspace(start=10, stop=100, num=4).round().astype(int)] + for seed in range(256)] + + # the average difference between the two methods decreases as the dataset size increases. + assert_almost_equal(abs(np.mean(ll, axis=0) - 0.5), + [0.1065248, + 0.0968844, + 0.0331818, + 0.0178057], + decimal=3) + + def test_simple_range(self): + """ + Straightforward testing with a mixture of linspace data (for + consistency). Adding in a 3rd mixture that will then be + completely ignored. All test values have been precomputed and + the shouldn't change. + """ + # some basic sanity checking, with some fixed data. + # Checking for the correct number of bins + basic_test = { + 50: {'fd': 8, 'scott': 8, 'rice': 15, + 'sturges': 14, 'auto': 14, 'stone': 8}, + 500: {'fd': 15, 'scott': 16, 'rice': 32, + 'sturges': 20, 'auto': 20, 'stone': 80}, + 5000: {'fd': 33, 'scott': 33, 'rice': 69, + 'sturges': 27, 'auto': 33, 'stone': 80} + } + + for testlen, expectedResults in basic_test.items(): + # create some sort of non uniform data to test with + # (3 peak uniform mixture) + x1 = np.linspace(-10, -1, testlen // 5 * 2) + x2 = np.linspace(1, 10, testlen // 5 * 3) + x3 = np.linspace(-100, -50, testlen) + x = np.hstack((x1, x2, x3)) + for estimator, numbins in expectedResults.items(): + a, b = np.histogram(x, estimator, range = (-20, 20)) + msg = "For the {0} estimator".format(estimator) + msg += " with datasize of {0}".format(testlen) + assert_equal(len(a), numbins, err_msg=msg) + + @pytest.mark.parametrize("bins", ['auto', 'fd', 'doane', 'scott', + 'stone', 'rice', 'sturges']) + def test_signed_integer_data(self, bins): + # Regression test for gh-14379. + a = np.array([-2, 0, 127], dtype=np.int8) + hist, edges = np.histogram(a, bins=bins) + hist32, edges32 = np.histogram(a.astype(np.int32), bins=bins) + assert_array_equal(hist, hist32) + assert_array_equal(edges, edges32) + + def test_simple_weighted(self): + """ + Check that weighted data raises a TypeError + """ + estimator_list = ['fd', 'scott', 'rice', 'sturges', 'auto'] + for estimator in estimator_list: + assert_raises(TypeError, histogram, [1, 2, 3], + estimator, weights=[1, 2, 3]) + + +class TestHistogramdd(object): + + def test_simple(self): + x = np.array([[-.5, .5, 1.5], [-.5, 1.5, 2.5], [-.5, 2.5, .5], + [.5, .5, 1.5], [.5, 1.5, 2.5], [.5, 2.5, 2.5]]) + H, edges = histogramdd(x, (2, 3, 3), + range=[[-1, 1], [0, 3], [0, 3]]) + answer = np.array([[[0, 1, 0], [0, 0, 1], [1, 0, 0]], + [[0, 1, 0], [0, 0, 1], [0, 0, 1]]]) + assert_array_equal(H, answer) + + # Check normalization + ed = [[-2, 0, 2], [0, 1, 2, 3], [0, 1, 2, 3]] + H, edges = histogramdd(x, bins=ed, density=True) + assert_(np.all(H == answer / 12.)) + + # Check that H has the correct shape. + H, edges = histogramdd(x, (2, 3, 4), + range=[[-1, 1], [0, 3], [0, 4]], + density=True) + answer = np.array([[[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]], + [[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]]]) + assert_array_almost_equal(H, answer / 6., 4) + # Check that a sequence of arrays is accepted and H has the correct + # shape. + z = [np.squeeze(y) for y in np.split(x, 3, axis=1)] + H, edges = histogramdd( + z, bins=(4, 3, 2), range=[[-2, 2], [0, 3], [0, 2]]) + answer = np.array([[[0, 0], [0, 0], [0, 0]], + [[0, 1], [0, 0], [1, 0]], + [[0, 1], [0, 0], [0, 0]], + [[0, 0], [0, 0], [0, 0]]]) + assert_array_equal(H, answer) + + Z = np.zeros((5, 5, 5)) + Z[list(range(5)), list(range(5)), list(range(5))] = 1. + H, edges = histogramdd([np.arange(5), np.arange(5), np.arange(5)], 5) + assert_array_equal(H, Z) + + def test_shape_3d(self): + # All possible permutations for bins of different lengths in 3D. + bins = ((5, 4, 6), (6, 4, 5), (5, 6, 4), (4, 6, 5), (6, 5, 4), + (4, 5, 6)) + r = np.random.rand(10, 3) + for b in bins: + H, edges = histogramdd(r, b) + assert_(H.shape == b) + + def test_shape_4d(self): + # All possible permutations for bins of different lengths in 4D. + bins = ((7, 4, 5, 6), (4, 5, 7, 6), (5, 6, 4, 7), (7, 6, 5, 4), + (5, 7, 6, 4), (4, 6, 7, 5), (6, 5, 7, 4), (7, 5, 4, 6), + (7, 4, 6, 5), (6, 4, 7, 5), (6, 7, 5, 4), (4, 6, 5, 7), + (4, 7, 5, 6), (5, 4, 6, 7), (5, 7, 4, 6), (6, 7, 4, 5), + (6, 5, 4, 7), (4, 7, 6, 5), (4, 5, 6, 7), (7, 6, 4, 5), + (5, 4, 7, 6), (5, 6, 7, 4), (6, 4, 5, 7), (7, 5, 6, 4)) + + r = np.random.rand(10, 4) + for b in bins: + H, edges = histogramdd(r, b) + assert_(H.shape == b) + + def test_weights(self): + v = np.random.rand(100, 2) + hist, edges = histogramdd(v) + n_hist, edges = histogramdd(v, density=True) + w_hist, edges = histogramdd(v, weights=np.ones(100)) + assert_array_equal(w_hist, hist) + w_hist, edges = histogramdd(v, weights=np.ones(100) * 2, density=True) + assert_array_equal(w_hist, n_hist) + w_hist, edges = histogramdd(v, weights=np.ones(100, int) * 2) + assert_array_equal(w_hist, 2 * hist) + + def test_identical_samples(self): + x = np.zeros((10, 2), int) + hist, edges = histogramdd(x, bins=2) + assert_array_equal(edges[0], np.array([-0.5, 0., 0.5])) + + def test_empty(self): + a, b = histogramdd([[], []], bins=([0, 1], [0, 1])) + assert_array_max_ulp(a, np.array([[0.]])) + a, b = np.histogramdd([[], [], []], bins=2) + assert_array_max_ulp(a, np.zeros((2, 2, 2))) + + def test_bins_errors(self): + # There are two ways to specify bins. Check for the right errors + # when mixing those. + x = np.arange(8).reshape(2, 4) + assert_raises(ValueError, np.histogramdd, x, bins=[-1, 2, 4, 5]) + assert_raises(ValueError, np.histogramdd, x, bins=[1, 0.99, 1, 1]) + assert_raises( + ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 3, -3]]) + assert_(np.histogramdd(x, bins=[1, 1, 1, [1, 2, 3, 4]])) + + def test_inf_edges(self): + # Test using +/-inf bin edges works. See #1788. + with np.errstate(invalid='ignore'): + x = np.arange(6).reshape(3, 2) + expected = np.array([[1, 0], [0, 1], [0, 1]]) + h, e = np.histogramdd(x, bins=[3, [-np.inf, 2, 10]]) + assert_allclose(h, expected) + h, e = np.histogramdd(x, bins=[3, np.array([-1, 2, np.inf])]) + assert_allclose(h, expected) + h, e = np.histogramdd(x, bins=[3, [-np.inf, 3, np.inf]]) + assert_allclose(h, expected) + + def test_rightmost_binedge(self): + # Test event very close to rightmost binedge. See Github issue #4266 + x = [0.9999999995] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 1.) + x = [1.0] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 1.) + x = [1.0000000001] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 0.0) + x = [1.0001] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 0.0) + + def test_finite_range(self): + vals = np.random.random((100, 3)) + histogramdd(vals, range=[[0.0, 1.0], [0.25, 0.75], [0.25, 0.5]]) + assert_raises(ValueError, histogramdd, vals, + range=[[0.0, 1.0], [0.25, 0.75], [0.25, np.inf]]) + assert_raises(ValueError, histogramdd, vals, + range=[[0.0, 1.0], [np.nan, 0.75], [0.25, 0.5]]) + + def test_equal_edges(self): + """ Test that adjacent entries in an edge array can be equal """ + x = np.array([0, 1, 2]) + y = np.array([0, 1, 2]) + x_edges = np.array([0, 2, 2]) + y_edges = 1 + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + hist_expected = np.array([ + [2.], + [1.], # x == 2 falls in the final bin + ]) + assert_equal(hist, hist_expected) + + def test_edge_dtype(self): + """ Test that if an edge array is input, its type is preserved """ + x = np.array([0, 10, 20]) + y = x / 10 + x_edges = np.array([0, 5, 15, 20]) + y_edges = x_edges / 10 + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + assert_equal(edges[0].dtype, x_edges.dtype) + assert_equal(edges[1].dtype, y_edges.dtype) + + def test_large_integers(self): + big = 2**60 # Too large to represent with a full precision float + + x = np.array([0], np.int64) + x_edges = np.array([-1, +1], np.int64) + y = big + x + y_edges = big + x_edges + + hist, edges = histogramdd((x, y), bins=(x_edges, y_edges)) + + assert_equal(hist[0, 0], 1) + + def test_density_non_uniform_2d(self): + # Defines the following grid: + # + # 0 2 8 + # 0+-+-----+ + # + | + + # + | + + # 6+-+-----+ + # 8+-+-----+ + x_edges = np.array([0, 2, 8]) + y_edges = np.array([0, 6, 8]) + relative_areas = np.array([ + [3, 9], + [1, 3]]) + + # ensure the number of points in each region is proportional to its area + x = np.array([1] + [1]*3 + [7]*3 + [7]*9) + y = np.array([7] + [1]*3 + [7]*3 + [1]*9) + + # sanity check that the above worked as intended + hist, edges = histogramdd((y, x), bins=(y_edges, x_edges)) + assert_equal(hist, relative_areas) + + # resulting histogram should be uniform, since counts and areas are propotional + hist, edges = histogramdd((y, x), bins=(y_edges, x_edges), density=True) + assert_equal(hist, 1 / (8*8)) + + def test_density_non_uniform_1d(self): + # compare to histogram to show the results are the same + v = np.arange(10) + bins = np.array([0, 1, 3, 6, 10]) + hist, edges = histogram(v, bins, density=True) + hist_dd, edges_dd = histogramdd((v,), (bins,), density=True) + assert_equal(hist, hist_dd) + assert_equal(edges, edges_dd[0]) + + def test_density_via_normed(self): + # normed should simply alias to density argument + v = np.arange(10) + bins = np.array([0, 1, 3, 6, 10]) + hist, edges = histogram(v, bins, density=True) + hist_dd, edges_dd = histogramdd((v,), (bins,), normed=True) + assert_equal(hist, hist_dd) + assert_equal(edges, edges_dd[0]) + + def test_density_normed_redundancy(self): + v = np.arange(10) + bins = np.array([0, 1, 3, 6, 10]) + with assert_raises_regex(TypeError, "Cannot specify both"): + hist_dd, edges_dd = histogramdd((v,), (bins,), + density=True, + normed=True) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_index_tricks.py b/contrib/python/numpy/py2/numpy/lib/tests/test_index_tricks.py new file mode 100644 index 0000000000..3246f68ff4 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_index_tricks.py @@ -0,0 +1,454 @@ +from __future__ import division, absolute_import, print_function + +import pytest + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_almost_equal, + assert_array_almost_equal, assert_raises, assert_raises_regex, + assert_warns + ) +from numpy.lib.index_tricks import ( + mgrid, ogrid, ndenumerate, fill_diagonal, diag_indices, diag_indices_from, + index_exp, ndindex, r_, s_, ix_ + ) + + +class TestRavelUnravelIndex(object): + def test_basic(self): + assert_equal(np.unravel_index(2, (2, 2)), (1, 0)) + + # test backwards compatibility with older dims + # keyword argument; see Issue #10586 + with assert_warns(DeprecationWarning): + # we should achieve the correct result + # AND raise the appropriate warning + # when using older "dims" kw argument + assert_equal(np.unravel_index(indices=2, + dims=(2, 2)), + (1, 0)) + + # test that new shape argument works properly + assert_equal(np.unravel_index(indices=2, + shape=(2, 2)), + (1, 0)) + + # test that an invalid second keyword argument + # is properly handled + with assert_raises(TypeError): + np.unravel_index(indices=2, hape=(2, 2)) + + with assert_raises(TypeError): + np.unravel_index(2, hape=(2, 2)) + + with assert_raises(TypeError): + np.unravel_index(254, ims=(17, 94)) + + assert_equal(np.ravel_multi_index((1, 0), (2, 2)), 2) + assert_equal(np.unravel_index(254, (17, 94)), (2, 66)) + assert_equal(np.ravel_multi_index((2, 66), (17, 94)), 254) + assert_raises(ValueError, np.unravel_index, -1, (2, 2)) + assert_raises(TypeError, np.unravel_index, 0.5, (2, 2)) + assert_raises(ValueError, np.unravel_index, 4, (2, 2)) + assert_raises(ValueError, np.ravel_multi_index, (-3, 1), (2, 2)) + assert_raises(ValueError, np.ravel_multi_index, (2, 1), (2, 2)) + assert_raises(ValueError, np.ravel_multi_index, (0, -3), (2, 2)) + assert_raises(ValueError, np.ravel_multi_index, (0, 2), (2, 2)) + assert_raises(TypeError, np.ravel_multi_index, (0.1, 0.), (2, 2)) + + assert_equal(np.unravel_index((2*3 + 1)*6 + 4, (4, 3, 6)), [2, 1, 4]) + assert_equal( + np.ravel_multi_index([2, 1, 4], (4, 3, 6)), (2*3 + 1)*6 + 4) + + arr = np.array([[3, 6, 6], [4, 5, 1]]) + assert_equal(np.ravel_multi_index(arr, (7, 6)), [22, 41, 37]) + assert_equal( + np.ravel_multi_index(arr, (7, 6), order='F'), [31, 41, 13]) + assert_equal( + np.ravel_multi_index(arr, (4, 6), mode='clip'), [22, 23, 19]) + assert_equal(np.ravel_multi_index(arr, (4, 4), mode=('clip', 'wrap')), + [12, 13, 13]) + assert_equal(np.ravel_multi_index((3, 1, 4, 1), (6, 7, 8, 9)), 1621) + + assert_equal(np.unravel_index(np.array([22, 41, 37]), (7, 6)), + [[3, 6, 6], [4, 5, 1]]) + assert_equal( + np.unravel_index(np.array([31, 41, 13]), (7, 6), order='F'), + [[3, 6, 6], [4, 5, 1]]) + assert_equal(np.unravel_index(1621, (6, 7, 8, 9)), [3, 1, 4, 1]) + + def test_big_indices(self): + # ravel_multi_index for big indices (issue #7546) + if np.intp == np.int64: + arr = ([1, 29], [3, 5], [3, 117], [19, 2], + [2379, 1284], [2, 2], [0, 1]) + assert_equal( + np.ravel_multi_index(arr, (41, 7, 120, 36, 2706, 8, 6)), + [5627771580, 117259570957]) + + # test overflow checking for too big array (issue #7546) + dummy_arr = ([0],[0]) + half_max = np.iinfo(np.intp).max // 2 + assert_equal( + np.ravel_multi_index(dummy_arr, (half_max, 2)), [0]) + assert_raises(ValueError, + np.ravel_multi_index, dummy_arr, (half_max+1, 2)) + assert_equal( + np.ravel_multi_index(dummy_arr, (half_max, 2), order='F'), [0]) + assert_raises(ValueError, + np.ravel_multi_index, dummy_arr, (half_max+1, 2), order='F') + + def test_dtypes(self): + # Test with different data types + for dtype in [np.int16, np.uint16, np.int32, + np.uint32, np.int64, np.uint64]: + coords = np.array( + [[1, 0, 1, 2, 3, 4], [1, 6, 1, 3, 2, 0]], dtype=dtype) + shape = (5, 8) + uncoords = 8*coords[0]+coords[1] + assert_equal(np.ravel_multi_index(coords, shape), uncoords) + assert_equal(coords, np.unravel_index(uncoords, shape)) + uncoords = coords[0]+5*coords[1] + assert_equal( + np.ravel_multi_index(coords, shape, order='F'), uncoords) + assert_equal(coords, np.unravel_index(uncoords, shape, order='F')) + + coords = np.array( + [[1, 0, 1, 2, 3, 4], [1, 6, 1, 3, 2, 0], [1, 3, 1, 0, 9, 5]], + dtype=dtype) + shape = (5, 8, 10) + uncoords = 10*(8*coords[0]+coords[1])+coords[2] + assert_equal(np.ravel_multi_index(coords, shape), uncoords) + assert_equal(coords, np.unravel_index(uncoords, shape)) + uncoords = coords[0]+5*(coords[1]+8*coords[2]) + assert_equal( + np.ravel_multi_index(coords, shape, order='F'), uncoords) + assert_equal(coords, np.unravel_index(uncoords, shape, order='F')) + + def test_clipmodes(self): + # Test clipmodes + assert_equal( + np.ravel_multi_index([5, 1, -1, 2], (4, 3, 7, 12), mode='wrap'), + np.ravel_multi_index([1, 1, 6, 2], (4, 3, 7, 12))) + assert_equal(np.ravel_multi_index([5, 1, -1, 2], (4, 3, 7, 12), + mode=( + 'wrap', 'raise', 'clip', 'raise')), + np.ravel_multi_index([1, 1, 0, 2], (4, 3, 7, 12))) + assert_raises( + ValueError, np.ravel_multi_index, [5, 1, -1, 2], (4, 3, 7, 12)) + + def test_writeability(self): + # See gh-7269 + x, y = np.unravel_index([1, 2, 3], (4, 5)) + assert_(x.flags.writeable) + assert_(y.flags.writeable) + + def test_0d(self): + # gh-580 + x = np.unravel_index(0, ()) + assert_equal(x, ()) + + assert_raises_regex(ValueError, "0d array", np.unravel_index, [0], ()) + assert_raises_regex( + ValueError, "out of bounds", np.unravel_index, [1], ()) + + +class TestGrid(object): + def test_basic(self): + a = mgrid[-1:1:10j] + b = mgrid[-1:1:0.1] + assert_(a.shape == (10,)) + assert_(b.shape == (20,)) + assert_(a[0] == -1) + assert_almost_equal(a[-1], 1) + assert_(b[0] == -1) + assert_almost_equal(b[1]-b[0], 0.1, 11) + assert_almost_equal(b[-1], b[0]+19*0.1, 11) + assert_almost_equal(a[1]-a[0], 2.0/9.0, 11) + + def test_linspace_equivalence(self): + y, st = np.linspace(2, 10, retstep=1) + assert_almost_equal(st, 8/49.0) + assert_array_almost_equal(y, mgrid[2:10:50j], 13) + + def test_nd(self): + c = mgrid[-1:1:10j, -2:2:10j] + d = mgrid[-1:1:0.1, -2:2:0.2] + assert_(c.shape == (2, 10, 10)) + assert_(d.shape == (2, 20, 20)) + assert_array_equal(c[0][0, :], -np.ones(10, 'd')) + assert_array_equal(c[1][:, 0], -2*np.ones(10, 'd')) + assert_array_almost_equal(c[0][-1, :], np.ones(10, 'd'), 11) + assert_array_almost_equal(c[1][:, -1], 2*np.ones(10, 'd'), 11) + assert_array_almost_equal(d[0, 1, :] - d[0, 0, :], + 0.1*np.ones(20, 'd'), 11) + assert_array_almost_equal(d[1, :, 1] - d[1, :, 0], + 0.2*np.ones(20, 'd'), 11) + + def test_sparse(self): + grid_full = mgrid[-1:1:10j, -2:2:10j] + grid_sparse = ogrid[-1:1:10j, -2:2:10j] + + # sparse grids can be made dense by broadcasting + grid_broadcast = np.broadcast_arrays(*grid_sparse) + for f, b in zip(grid_full, grid_broadcast): + assert_equal(f, b) + + @pytest.mark.parametrize("start, stop, step, expected", [ + (None, 10, 10j, (200, 10)), + (-10, 20, None, (1800, 30)), + ]) + def test_mgrid_size_none_handling(self, start, stop, step, expected): + # regression test None value handling for + # start and step values used by mgrid; + # internally, this aims to cover previously + # unexplored code paths in nd_grid() + grid = mgrid[start:stop:step, start:stop:step] + # need a smaller grid to explore one of the + # untested code paths + grid_small = mgrid[start:stop:step] + assert_equal(grid.size, expected[0]) + assert_equal(grid_small.size, expected[1]) + + +class TestConcatenator(object): + def test_1d(self): + assert_array_equal(r_[1, 2, 3, 4, 5, 6], np.array([1, 2, 3, 4, 5, 6])) + b = np.ones(5) + c = r_[b, 0, 0, b] + assert_array_equal(c, [1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1]) + + def test_mixed_type(self): + g = r_[10.1, 1:10] + assert_(g.dtype == 'f8') + + def test_more_mixed_type(self): + g = r_[-10.1, np.array([1]), np.array([2, 3, 4]), 10.0] + assert_(g.dtype == 'f8') + + def test_complex_step(self): + # Regression test for #12262 + g = r_[0:36:100j] + assert_(g.shape == (100,)) + + def test_2d(self): + b = np.random.rand(5, 5) + c = np.random.rand(5, 5) + d = r_['1', b, c] # append columns + assert_(d.shape == (5, 10)) + assert_array_equal(d[:, :5], b) + assert_array_equal(d[:, 5:], c) + d = r_[b, c] + assert_(d.shape == (10, 5)) + assert_array_equal(d[:5, :], b) + assert_array_equal(d[5:, :], c) + + def test_0d(self): + assert_equal(r_[0, np.array(1), 2], [0, 1, 2]) + assert_equal(r_[[0, 1, 2], np.array(3)], [0, 1, 2, 3]) + assert_equal(r_[np.array(0), [1, 2, 3]], [0, 1, 2, 3]) + + +class TestNdenumerate(object): + def test_basic(self): + a = np.array([[1, 2], [3, 4]]) + assert_equal(list(ndenumerate(a)), + [((0, 0), 1), ((0, 1), 2), ((1, 0), 3), ((1, 1), 4)]) + + +class TestIndexExpression(object): + def test_regression_1(self): + # ticket #1196 + a = np.arange(2) + assert_equal(a[:-1], a[s_[:-1]]) + assert_equal(a[:-1], a[index_exp[:-1]]) + + def test_simple_1(self): + a = np.random.rand(4, 5, 6) + + assert_equal(a[:, :3, [1, 2]], a[index_exp[:, :3, [1, 2]]]) + assert_equal(a[:, :3, [1, 2]], a[s_[:, :3, [1, 2]]]) + + +class TestIx_(object): + def test_regression_1(self): + # Test empty inputs create outputs of indexing type, gh-5804 + # Test both lists and arrays + for func in (range, np.arange): + a, = np.ix_(func(0)) + assert_equal(a.dtype, np.intp) + + def test_shape_and_dtype(self): + sizes = (4, 5, 3, 2) + # Test both lists and arrays + for func in (range, np.arange): + arrays = np.ix_(*[func(sz) for sz in sizes]) + for k, (a, sz) in enumerate(zip(arrays, sizes)): + assert_equal(a.shape[k], sz) + assert_(all(sh == 1 for j, sh in enumerate(a.shape) if j != k)) + assert_(np.issubdtype(a.dtype, np.integer)) + + def test_bool(self): + bool_a = [True, False, True, True] + int_a, = np.nonzero(bool_a) + assert_equal(np.ix_(bool_a)[0], int_a) + + def test_1d_only(self): + idx2d = [[1, 2, 3], [4, 5, 6]] + assert_raises(ValueError, np.ix_, idx2d) + + def test_repeated_input(self): + length_of_vector = 5 + x = np.arange(length_of_vector) + out = ix_(x, x) + assert_equal(out[0].shape, (length_of_vector, 1)) + assert_equal(out[1].shape, (1, length_of_vector)) + # check that input shape is not modified + assert_equal(x.shape, (length_of_vector,)) + + +def test_c_(): + a = np.c_[np.array([[1, 2, 3]]), 0, 0, np.array([[4, 5, 6]])] + assert_equal(a, [[1, 2, 3, 0, 0, 4, 5, 6]]) + + +class TestFillDiagonal(object): + def test_basic(self): + a = np.zeros((3, 3), int) + fill_diagonal(a, 5) + assert_array_equal( + a, np.array([[5, 0, 0], + [0, 5, 0], + [0, 0, 5]]) + ) + + def test_tall_matrix(self): + a = np.zeros((10, 3), int) + fill_diagonal(a, 5) + assert_array_equal( + a, np.array([[5, 0, 0], + [0, 5, 0], + [0, 0, 5], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0], + [0, 0, 0]]) + ) + + def test_tall_matrix_wrap(self): + a = np.zeros((10, 3), int) + fill_diagonal(a, 5, True) + assert_array_equal( + a, np.array([[5, 0, 0], + [0, 5, 0], + [0, 0, 5], + [0, 0, 0], + [5, 0, 0], + [0, 5, 0], + [0, 0, 5], + [0, 0, 0], + [5, 0, 0], + [0, 5, 0]]) + ) + + def test_wide_matrix(self): + a = np.zeros((3, 10), int) + fill_diagonal(a, 5) + assert_array_equal( + a, np.array([[5, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 5, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 5, 0, 0, 0, 0, 0, 0, 0]]) + ) + + def test_operate_4d_array(self): + a = np.zeros((3, 3, 3, 3), int) + fill_diagonal(a, 4) + i = np.array([0, 1, 2]) + assert_equal(np.where(a != 0), (i, i, i, i)) + + def test_low_dim_handling(self): + # raise error with low dimensionality + a = np.zeros(3, int) + with assert_raises_regex(ValueError, "at least 2-d"): + fill_diagonal(a, 5) + + def test_hetero_shape_handling(self): + # raise error with high dimensionality and + # shape mismatch + a = np.zeros((3,3,7,3), int) + with assert_raises_regex(ValueError, "equal length"): + fill_diagonal(a, 2) + + +def test_diag_indices(): + di = diag_indices(4) + a = np.array([[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12], + [13, 14, 15, 16]]) + a[di] = 100 + assert_array_equal( + a, np.array([[100, 2, 3, 4], + [5, 100, 7, 8], + [9, 10, 100, 12], + [13, 14, 15, 100]]) + ) + + # Now, we create indices to manipulate a 3-d array: + d3 = diag_indices(2, 3) + + # And use it to set the diagonal of a zeros array to 1: + a = np.zeros((2, 2, 2), int) + a[d3] = 1 + assert_array_equal( + a, np.array([[[1, 0], + [0, 0]], + [[0, 0], + [0, 1]]]) + ) + + +class TestDiagIndicesFrom(object): + + def test_diag_indices_from(self): + x = np.random.random((4, 4)) + r, c = diag_indices_from(x) + assert_array_equal(r, np.arange(4)) + assert_array_equal(c, np.arange(4)) + + def test_error_small_input(self): + x = np.ones(7) + with assert_raises_regex(ValueError, "at least 2-d"): + diag_indices_from(x) + + def test_error_shape_mismatch(self): + x = np.zeros((3, 3, 2, 3), int) + with assert_raises_regex(ValueError, "equal length"): + diag_indices_from(x) + + +def test_ndindex(): + x = list(ndindex(1, 2, 3)) + expected = [ix for ix, e in ndenumerate(np.zeros((1, 2, 3)))] + assert_array_equal(x, expected) + + x = list(ndindex((1, 2, 3))) + assert_array_equal(x, expected) + + # Test use of scalars and tuples + x = list(ndindex((3,))) + assert_array_equal(x, list(ndindex(3))) + + # Make sure size argument is optional + x = list(ndindex()) + assert_equal(x, [()]) + + x = list(ndindex(())) + assert_equal(x, [()]) + + # Make sure 0-sized ndindex works correctly + x = list(ndindex(*[0])) + assert_equal(x, []) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_io.py b/contrib/python/numpy/py2/numpy/lib/tests/test_io.py new file mode 100644 index 0000000000..899e490312 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_io.py @@ -0,0 +1,2518 @@ +from __future__ import division, absolute_import, print_function + +import sys +import gzip +import os +import threading +import time +import warnings +import io +import re +import pytest +from tempfile import NamedTemporaryFile +from io import BytesIO, StringIO +from datetime import datetime +import locale + +import numpy as np +import numpy.ma as ma +from numpy.lib._iotools import ConverterError, ConversionWarning +from numpy.compat import asbytes, bytes, Path +from numpy.ma.testutils import assert_equal +from numpy.testing import ( + assert_warns, assert_, assert_raises_regex, assert_raises, + assert_allclose, assert_array_equal, temppath, tempdir, IS_PYPY, + HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles, + ) + + +class TextIO(BytesIO): + """Helper IO class. + + Writes encode strings to bytes if needed, reads return bytes. + This makes it easier to emulate files opened in binary mode + without needing to explicitly convert strings to bytes in + setting up the test data. + + """ + def __init__(self, s=""): + BytesIO.__init__(self, asbytes(s)) + + def write(self, s): + BytesIO.write(self, asbytes(s)) + + def writelines(self, lines): + BytesIO.writelines(self, [asbytes(s) for s in lines]) + + +MAJVER, MINVER = sys.version_info[:2] +IS_64BIT = sys.maxsize > 2**32 +try: + import bz2 + HAS_BZ2 = True +except ImportError: + HAS_BZ2 = False +try: + import lzma + HAS_LZMA = True +except ImportError: + HAS_LZMA = False + + +def strptime(s, fmt=None): + """ + This function is available in the datetime module only from Python >= + 2.5. + + """ + if type(s) == bytes: + s = s.decode("latin1") + return datetime(*time.strptime(s, fmt)[:3]) + + +class RoundtripTest(object): + def roundtrip(self, save_func, *args, **kwargs): + """ + save_func : callable + Function used to save arrays to file. + file_on_disk : bool + If true, store the file on disk, instead of in a + string buffer. + save_kwds : dict + Parameters passed to `save_func`. + load_kwds : dict + Parameters passed to `numpy.load`. + args : tuple of arrays + Arrays stored to file. + + """ + save_kwds = kwargs.get('save_kwds', {}) + load_kwds = kwargs.get('load_kwds', {"allow_pickle": True}) + file_on_disk = kwargs.get('file_on_disk', False) + + if file_on_disk: + target_file = NamedTemporaryFile(delete=False) + load_file = target_file.name + else: + target_file = BytesIO() + load_file = target_file + + try: + arr = args + + save_func(target_file, *arr, **save_kwds) + target_file.flush() + target_file.seek(0) + + if sys.platform == 'win32' and not isinstance(target_file, BytesIO): + target_file.close() + + arr_reloaded = np.load(load_file, **load_kwds) + + self.arr = arr + self.arr_reloaded = arr_reloaded + finally: + if not isinstance(target_file, BytesIO): + target_file.close() + # holds an open file descriptor so it can't be deleted on win + if 'arr_reloaded' in locals(): + if not isinstance(arr_reloaded, np.lib.npyio.NpzFile): + os.remove(target_file.name) + + def check_roundtrips(self, a): + self.roundtrip(a) + self.roundtrip(a, file_on_disk=True) + self.roundtrip(np.asfortranarray(a)) + self.roundtrip(np.asfortranarray(a), file_on_disk=True) + if a.shape[0] > 1: + # neither C nor Fortran contiguous for 2D arrays or more + self.roundtrip(np.asfortranarray(a)[1:]) + self.roundtrip(np.asfortranarray(a)[1:], file_on_disk=True) + + def test_array(self): + a = np.array([], float) + self.check_roundtrips(a) + + a = np.array([[1, 2], [3, 4]], float) + self.check_roundtrips(a) + + a = np.array([[1, 2], [3, 4]], int) + self.check_roundtrips(a) + + a = np.array([[1 + 5j, 2 + 6j], [3 + 7j, 4 + 8j]], dtype=np.csingle) + self.check_roundtrips(a) + + a = np.array([[1 + 5j, 2 + 6j], [3 + 7j, 4 + 8j]], dtype=np.cdouble) + self.check_roundtrips(a) + + def test_array_object(self): + a = np.array([], object) + self.check_roundtrips(a) + + a = np.array([[1, 2], [3, 4]], object) + self.check_roundtrips(a) + + def test_1D(self): + a = np.array([1, 2, 3, 4], int) + self.roundtrip(a) + + @pytest.mark.skipif(sys.platform == 'win32', reason="Fails on Win32") + def test_mmap(self): + a = np.array([[1, 2.5], [4, 7.3]]) + self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'}) + + a = np.asfortranarray([[1, 2.5], [4, 7.3]]) + self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'}) + + def test_record(self): + a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) + self.check_roundtrips(a) + + @pytest.mark.slow + def test_format_2_0(self): + dt = [(("%d" % i) * 100, float) for i in range(500)] + a = np.ones(1000, dtype=dt) + with warnings.catch_warnings(record=True): + warnings.filterwarnings('always', '', UserWarning) + self.check_roundtrips(a) + + +class TestSaveLoad(RoundtripTest): + def roundtrip(self, *args, **kwargs): + RoundtripTest.roundtrip(self, np.save, *args, **kwargs) + assert_equal(self.arr[0], self.arr_reloaded) + assert_equal(self.arr[0].dtype, self.arr_reloaded.dtype) + assert_equal(self.arr[0].flags.fnc, self.arr_reloaded.flags.fnc) + + +class TestSavezLoad(RoundtripTest): + def roundtrip(self, *args, **kwargs): + RoundtripTest.roundtrip(self, np.savez, *args, **kwargs) + try: + for n, arr in enumerate(self.arr): + reloaded = self.arr_reloaded['arr_%d' % n] + assert_equal(arr, reloaded) + assert_equal(arr.dtype, reloaded.dtype) + assert_equal(arr.flags.fnc, reloaded.flags.fnc) + finally: + # delete tempfile, must be done here on windows + if self.arr_reloaded.fid: + self.arr_reloaded.fid.close() + os.remove(self.arr_reloaded.fid.name) + + @pytest.mark.skipif(not IS_64BIT, reason="Needs 64bit platform") + @pytest.mark.slow + def test_big_arrays(self): + L = (1 << 31) + 100000 + a = np.empty(L, dtype=np.uint8) + with temppath(prefix="numpy_test_big_arrays_", suffix=".npz") as tmp: + np.savez(tmp, a=a) + del a + npfile = np.load(tmp) + a = npfile['a'] # Should succeed + npfile.close() + del a # Avoid pyflakes unused variable warning. + + def test_multiple_arrays(self): + a = np.array([[1, 2], [3, 4]], float) + b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) + self.roundtrip(a, b) + + def test_named_arrays(self): + a = np.array([[1, 2], [3, 4]], float) + b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) + c = BytesIO() + np.savez(c, file_a=a, file_b=b) + c.seek(0) + l = np.load(c) + assert_equal(a, l['file_a']) + assert_equal(b, l['file_b']) + + def test_BagObj(self): + a = np.array([[1, 2], [3, 4]], float) + b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) + c = BytesIO() + np.savez(c, file_a=a, file_b=b) + c.seek(0) + l = np.load(c) + assert_equal(sorted(dir(l.f)), ['file_a','file_b']) + assert_equal(a, l.f.file_a) + assert_equal(b, l.f.file_b) + + def test_savez_filename_clashes(self): + # Test that issue #852 is fixed + # and savez functions in multithreaded environment + + def writer(error_list): + with temppath(suffix='.npz') as tmp: + arr = np.random.randn(500, 500) + try: + np.savez(tmp, arr=arr) + except OSError as err: + error_list.append(err) + + errors = [] + threads = [threading.Thread(target=writer, args=(errors,)) + for j in range(3)] + for t in threads: + t.start() + for t in threads: + t.join() + + if errors: + raise AssertionError(errors) + + def test_not_closing_opened_fid(self): + # Test that issue #2178 is fixed: + # verify could seek on 'loaded' file + with temppath(suffix='.npz') as tmp: + with open(tmp, 'wb') as fp: + np.savez(fp, data='LOVELY LOAD') + with open(tmp, 'rb', 10000) as fp: + fp.seek(0) + assert_(not fp.closed) + np.load(fp)['data'] + # fp must not get closed by .load + assert_(not fp.closed) + fp.seek(0) + assert_(not fp.closed) + + #FIXME: Is this still true? + @pytest.mark.skipif(IS_PYPY, reason="Missing context manager on PyPy") + def test_closing_fid(self): + # Test that issue #1517 (too many opened files) remains closed + # It might be a "weak" test since failed to get triggered on + # e.g. Debian sid of 2012 Jul 05 but was reported to + # trigger the failure on Ubuntu 10.04: + # http://projects.scipy.org/numpy/ticket/1517#comment:2 + with temppath(suffix='.npz') as tmp: + np.savez(tmp, data='LOVELY LOAD') + # We need to check if the garbage collector can properly close + # numpy npz file returned by np.load when their reference count + # goes to zero. Python 3 running in debug mode raises a + # ResourceWarning when file closing is left to the garbage + # collector, so we catch the warnings. Because ResourceWarning + # is unknown in Python < 3.x, we take the easy way out and + # catch all warnings. + with suppress_warnings() as sup: + sup.filter(Warning) # TODO: specify exact message + for i in range(1, 1025): + try: + np.load(tmp)["data"] + except Exception as e: + msg = "Failed to load data from a file: %s" % e + raise AssertionError(msg) + + def test_closing_zipfile_after_load(self): + # Check that zipfile owns file and can close it. This needs to + # pass a file name to load for the test. On windows failure will + # cause a second error will be raised when the attempt to remove + # the open file is made. + prefix = 'numpy_test_closing_zipfile_after_load_' + with temppath(suffix='.npz', prefix=prefix) as tmp: + np.savez(tmp, lab='place holder') + data = np.load(tmp) + fp = data.zip.fp + data.close() + assert_(fp.closed) + + +class TestSaveTxt(object): + def test_array(self): + a = np.array([[1, 2], [3, 4]], float) + fmt = "%.18e" + c = BytesIO() + np.savetxt(c, a, fmt=fmt) + c.seek(0) + assert_equal(c.readlines(), + [asbytes((fmt + ' ' + fmt + '\n') % (1, 2)), + asbytes((fmt + ' ' + fmt + '\n') % (3, 4))]) + + a = np.array([[1, 2], [3, 4]], int) + c = BytesIO() + np.savetxt(c, a, fmt='%d') + c.seek(0) + assert_equal(c.readlines(), [b'1 2\n', b'3 4\n']) + + def test_1D(self): + a = np.array([1, 2, 3, 4], int) + c = BytesIO() + np.savetxt(c, a, fmt='%d') + c.seek(0) + lines = c.readlines() + assert_equal(lines, [b'1\n', b'2\n', b'3\n', b'4\n']) + + def test_0D_3D(self): + c = BytesIO() + assert_raises(ValueError, np.savetxt, c, np.array(1)) + assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]])) + + def test_structured(self): + a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) + c = BytesIO() + np.savetxt(c, a, fmt='%d') + c.seek(0) + assert_equal(c.readlines(), [b'1 2\n', b'3 4\n']) + + def test_structured_padded(self): + # gh-13297 + a = np.array([(1, 2, 3),(4, 5, 6)], dtype=[ + ('foo', 'i4'), ('bar', 'i4'), ('baz', 'i4') + ]) + c = BytesIO() + np.savetxt(c, a[['foo', 'baz']], fmt='%d') + c.seek(0) + assert_equal(c.readlines(), [b'1 3\n', b'4 6\n']) + + @pytest.mark.skipif(Path is None, reason="No pathlib.Path") + def test_multifield_view(self): + a = np.ones(1, dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'f4')]) + v = a[['x', 'z']] + with temppath(suffix='.npy') as path: + path = Path(path) + np.save(path, v) + data = np.load(path) + assert_array_equal(data, v) + + def test_delimiter(self): + a = np.array([[1., 2.], [3., 4.]]) + c = BytesIO() + np.savetxt(c, a, delimiter=',', fmt='%d') + c.seek(0) + assert_equal(c.readlines(), [b'1,2\n', b'3,4\n']) + + def test_format(self): + a = np.array([(1, 2), (3, 4)]) + c = BytesIO() + # Sequence of formats + np.savetxt(c, a, fmt=['%02d', '%3.1f']) + c.seek(0) + assert_equal(c.readlines(), [b'01 2.0\n', b'03 4.0\n']) + + # A single multiformat string + c = BytesIO() + np.savetxt(c, a, fmt='%02d : %3.1f') + c.seek(0) + lines = c.readlines() + assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n']) + + # Specify delimiter, should be overridden + c = BytesIO() + np.savetxt(c, a, fmt='%02d : %3.1f', delimiter=',') + c.seek(0) + lines = c.readlines() + assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n']) + + # Bad fmt, should raise a ValueError + c = BytesIO() + assert_raises(ValueError, np.savetxt, c, a, fmt=99) + + def test_header_footer(self): + # Test the functionality of the header and footer keyword argument. + + c = BytesIO() + a = np.array([(1, 2), (3, 4)], dtype=int) + test_header_footer = 'Test header / footer' + # Test the header keyword argument + np.savetxt(c, a, fmt='%1d', header=test_header_footer) + c.seek(0) + assert_equal(c.read(), + asbytes('# ' + test_header_footer + '\n1 2\n3 4\n')) + # Test the footer keyword argument + c = BytesIO() + np.savetxt(c, a, fmt='%1d', footer=test_header_footer) + c.seek(0) + assert_equal(c.read(), + asbytes('1 2\n3 4\n# ' + test_header_footer + '\n')) + # Test the commentstr keyword argument used on the header + c = BytesIO() + commentstr = '% ' + np.savetxt(c, a, fmt='%1d', + header=test_header_footer, comments=commentstr) + c.seek(0) + assert_equal(c.read(), + asbytes(commentstr + test_header_footer + '\n' + '1 2\n3 4\n')) + # Test the commentstr keyword argument used on the footer + c = BytesIO() + commentstr = '% ' + np.savetxt(c, a, fmt='%1d', + footer=test_header_footer, comments=commentstr) + c.seek(0) + assert_equal(c.read(), + asbytes('1 2\n3 4\n' + commentstr + test_header_footer + '\n')) + + def test_file_roundtrip(self): + with temppath() as name: + a = np.array([(1, 2), (3, 4)]) + np.savetxt(name, a) + b = np.loadtxt(name) + assert_array_equal(a, b) + + def test_complex_arrays(self): + ncols = 2 + nrows = 2 + a = np.zeros((ncols, nrows), dtype=np.complex128) + re = np.pi + im = np.e + a[:] = re + 1.0j * im + + # One format only + c = BytesIO() + np.savetxt(c, a, fmt=' %+.3e') + c.seek(0) + lines = c.readlines() + assert_equal( + lines, + [b' ( +3.142e+00+ +2.718e+00j) ( +3.142e+00+ +2.718e+00j)\n', + b' ( +3.142e+00+ +2.718e+00j) ( +3.142e+00+ +2.718e+00j)\n']) + + # One format for each real and imaginary part + c = BytesIO() + np.savetxt(c, a, fmt=' %+.3e' * 2 * ncols) + c.seek(0) + lines = c.readlines() + assert_equal( + lines, + [b' +3.142e+00 +2.718e+00 +3.142e+00 +2.718e+00\n', + b' +3.142e+00 +2.718e+00 +3.142e+00 +2.718e+00\n']) + + # One format for each complex number + c = BytesIO() + np.savetxt(c, a, fmt=['(%.3e%+.3ej)'] * ncols) + c.seek(0) + lines = c.readlines() + assert_equal( + lines, + [b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n', + b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n']) + + def test_complex_negative_exponent(self): + # Previous to 1.15, some formats generated x+-yj, gh 7895 + ncols = 2 + nrows = 2 + a = np.zeros((ncols, nrows), dtype=np.complex128) + re = np.pi + im = np.e + a[:] = re - 1.0j * im + c = BytesIO() + np.savetxt(c, a, fmt='%.3e') + c.seek(0) + lines = c.readlines() + assert_equal( + lines, + [b' (3.142e+00-2.718e+00j) (3.142e+00-2.718e+00j)\n', + b' (3.142e+00-2.718e+00j) (3.142e+00-2.718e+00j)\n']) + + + + + def test_custom_writer(self): + + class CustomWriter(list): + def write(self, text): + self.extend(text.split(b'\n')) + + w = CustomWriter() + a = np.array([(1, 2), (3, 4)]) + np.savetxt(w, a) + b = np.loadtxt(w) + assert_array_equal(a, b) + + def test_unicode(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode) + with tempdir() as tmpdir: + # set encoding as on windows it may not be unicode even on py3 + np.savetxt(os.path.join(tmpdir, 'test.csv'), a, fmt=['%s'], + encoding='UTF-8') + + def test_unicode_roundtrip(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode) + # our gz wrapper support encoding + suffixes = ['', '.gz'] + # stdlib 2 versions do not support encoding + if MAJVER > 2: + if HAS_BZ2: + suffixes.append('.bz2') + if HAS_LZMA: + suffixes.extend(['.xz', '.lzma']) + with tempdir() as tmpdir: + for suffix in suffixes: + np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a, + fmt=['%s'], encoding='UTF-16-LE') + b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix), + encoding='UTF-16-LE', dtype=np.unicode) + assert_array_equal(a, b) + + def test_unicode_bytestream(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode) + s = BytesIO() + np.savetxt(s, a, fmt=['%s'], encoding='UTF-8') + s.seek(0) + assert_equal(s.read().decode('UTF-8'), utf8 + '\n') + + def test_unicode_stringstream(self): + utf8 = b'\xcf\x96'.decode('UTF-8') + a = np.array([utf8], dtype=np.unicode) + s = StringIO() + np.savetxt(s, a, fmt=['%s'], encoding='UTF-8') + s.seek(0) + assert_equal(s.read(), utf8 + '\n') + + +class LoadTxtBase(object): + def check_compressed(self, fopen, suffixes): + # Test that we can load data from a compressed file + wanted = np.arange(6).reshape((2, 3)) + linesep = ('\n', '\r\n', '\r') + for sep in linesep: + data = '0 1 2' + sep + '3 4 5' + for suffix in suffixes: + with temppath(suffix=suffix) as name: + with fopen(name, mode='wt', encoding='UTF-32-LE') as f: + f.write(data) + res = self.loadfunc(name, encoding='UTF-32-LE') + assert_array_equal(res, wanted) + with fopen(name, "rt", encoding='UTF-32-LE') as f: + res = self.loadfunc(f) + assert_array_equal(res, wanted) + + # Python2 .open does not support encoding + @pytest.mark.skipif(MAJVER == 2, reason="Needs Python version >= 3") + def test_compressed_gzip(self): + self.check_compressed(gzip.open, ('.gz',)) + + @pytest.mark.skipif(not HAS_BZ2, reason="Needs bz2") + @pytest.mark.skipif(MAJVER == 2, reason="Needs Python version >= 3") + def test_compressed_bz2(self): + self.check_compressed(bz2.open, ('.bz2',)) + + @pytest.mark.skipif(not HAS_LZMA, reason="Needs lzma") + @pytest.mark.skipif(MAJVER == 2, reason="Needs Python version >= 3") + def test_compressed_lzma(self): + self.check_compressed(lzma.open, ('.xz', '.lzma')) + + def test_encoding(self): + with temppath() as path: + with open(path, "wb") as f: + f.write('0.\n1.\n2.'.encode("UTF-16")) + x = self.loadfunc(path, encoding="UTF-16") + assert_array_equal(x, [0., 1., 2.]) + + def test_stringload(self): + # umlaute + nonascii = b'\xc3\xb6\xc3\xbc\xc3\xb6'.decode("UTF-8") + with temppath() as path: + with open(path, "wb") as f: + f.write(nonascii.encode("UTF-16")) + x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode) + assert_array_equal(x, nonascii) + + def test_binary_decode(self): + utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04' + v = self.loadfunc(BytesIO(utf16), dtype=np.unicode, encoding='UTF-16') + assert_array_equal(v, np.array(utf16.decode('UTF-16').split())) + + def test_converters_decode(self): + # test converters that decode strings + c = TextIO() + c.write(b'\xcf\x96') + c.seek(0) + x = self.loadfunc(c, dtype=np.unicode, + converters={0: lambda x: x.decode('UTF-8')}) + a = np.array([b'\xcf\x96'.decode('UTF-8')]) + assert_array_equal(x, a) + + def test_converters_nodecode(self): + # test native string converters enabled by setting an encoding + utf8 = b'\xcf\x96'.decode('UTF-8') + with temppath() as path: + with io.open(path, 'wt', encoding='UTF-8') as f: + f.write(utf8) + x = self.loadfunc(path, dtype=np.unicode, + converters={0: lambda x: x + 't'}, + encoding='UTF-8') + a = np.array([utf8 + 't']) + assert_array_equal(x, a) + + +class TestLoadTxt(LoadTxtBase): + loadfunc = staticmethod(np.loadtxt) + + def setup(self): + # lower chunksize for testing + self.orig_chunk = np.lib.npyio._loadtxt_chunksize + np.lib.npyio._loadtxt_chunksize = 1 + def teardown(self): + np.lib.npyio._loadtxt_chunksize = self.orig_chunk + + def test_record(self): + c = TextIO() + c.write('1 2\n3 4') + c.seek(0) + x = np.loadtxt(c, dtype=[('x', np.int32), ('y', np.int32)]) + a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) + assert_array_equal(x, a) + + d = TextIO() + d.write('M 64.0 75.0\nF 25.0 60.0') + d.seek(0) + mydescriptor = {'names': ('gender', 'age', 'weight'), + 'formats': ('S1', 'i4', 'f4')} + b = np.array([('M', 64.0, 75.0), + ('F', 25.0, 60.0)], dtype=mydescriptor) + y = np.loadtxt(d, dtype=mydescriptor) + assert_array_equal(y, b) + + def test_array(self): + c = TextIO() + c.write('1 2\n3 4') + + c.seek(0) + x = np.loadtxt(c, dtype=int) + a = np.array([[1, 2], [3, 4]], int) + assert_array_equal(x, a) + + c.seek(0) + x = np.loadtxt(c, dtype=float) + a = np.array([[1, 2], [3, 4]], float) + assert_array_equal(x, a) + + def test_1D(self): + c = TextIO() + c.write('1\n2\n3\n4\n') + c.seek(0) + x = np.loadtxt(c, dtype=int) + a = np.array([1, 2, 3, 4], int) + assert_array_equal(x, a) + + c = TextIO() + c.write('1,2,3,4\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',') + a = np.array([1, 2, 3, 4], int) + assert_array_equal(x, a) + + def test_missing(self): + c = TextIO() + c.write('1,2,3,,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + converters={3: lambda s: int(s or - 999)}) + a = np.array([1, 2, 3, -999, 5], int) + assert_array_equal(x, a) + + def test_converters_with_usecols(self): + c = TextIO() + c.write('1,2,3,,5\n6,7,8,9,10\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + converters={3: lambda s: int(s or - 999)}, + usecols=(1, 3,)) + a = np.array([[2, -999], [7, 9]], int) + assert_array_equal(x, a) + + def test_comments_unicode(self): + c = TextIO() + c.write('# comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=u'#') + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_comments_byte(self): + c = TextIO() + c.write('# comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=b'#') + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_comments_multiple(self): + c = TextIO() + c.write('# comment\n1,2,3\n@ comment2\n4,5,6 // comment3') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=['#', '@', '//']) + a = np.array([[1, 2, 3], [4, 5, 6]], int) + assert_array_equal(x, a) + + def test_comments_multi_chars(self): + c = TextIO() + c.write('/* comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments='/*') + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + # Check that '/*' is not transformed to ['/', '*'] + c = TextIO() + c.write('*/ comment\n1,2,3,5\n') + c.seek(0) + assert_raises(ValueError, np.loadtxt, c, dtype=int, delimiter=',', + comments='/*') + + def test_skiprows(self): + c = TextIO() + c.write('comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + skiprows=1) + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + c = TextIO() + c.write('# comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + skiprows=1) + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_usecols(self): + a = np.array([[1, 2], [3, 4]], float) + c = BytesIO() + np.savetxt(c, a) + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=(1,)) + assert_array_equal(x, a[:, 1]) + + a = np.array([[1, 2, 3], [3, 4, 5]], float) + c = BytesIO() + np.savetxt(c, a) + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=(1, 2)) + assert_array_equal(x, a[:, 1:]) + + # Testing with arrays instead of tuples. + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=np.array([1, 2])) + assert_array_equal(x, a[:, 1:]) + + # Testing with an integer instead of a sequence + for int_type in [int, np.int8, np.int16, + np.int32, np.int64, np.uint8, np.uint16, + np.uint32, np.uint64]: + to_read = int_type(1) + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=to_read) + assert_array_equal(x, a[:, 1]) + + # Testing with some crazy custom integer type + class CrazyInt(object): + def __index__(self): + return 1 + + crazy_int = CrazyInt() + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=crazy_int) + assert_array_equal(x, a[:, 1]) + + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=(crazy_int,)) + assert_array_equal(x, a[:, 1]) + + # Checking with dtypes defined converters. + data = '''JOE 70.1 25.3 + BOB 60.5 27.9 + ''' + c = TextIO(data) + names = ['stid', 'temp'] + dtypes = ['S4', 'f8'] + arr = np.loadtxt(c, usecols=(0, 2), dtype=list(zip(names, dtypes))) + assert_equal(arr['stid'], [b"JOE", b"BOB"]) + assert_equal(arr['temp'], [25.3, 27.9]) + + # Testing non-ints in usecols + c.seek(0) + bogus_idx = 1.5 + assert_raises_regex( + TypeError, + '^usecols must be.*%s' % type(bogus_idx), + np.loadtxt, c, usecols=bogus_idx + ) + + assert_raises_regex( + TypeError, + '^usecols must be.*%s' % type(bogus_idx), + np.loadtxt, c, usecols=[0, bogus_idx, 0] + ) + + def test_fancy_dtype(self): + c = TextIO() + c.write('1,2,3.0\n4,5,6.0\n') + c.seek(0) + dt = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) + x = np.loadtxt(c, dtype=dt, delimiter=',') + a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dt) + assert_array_equal(x, a) + + def test_shaped_dtype(self): + c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6") + dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), + ('block', int, (2, 3))]) + x = np.loadtxt(c, dtype=dt) + a = np.array([('aaaa', 1.0, 8.0, [[1, 2, 3], [4, 5, 6]])], + dtype=dt) + assert_array_equal(x, a) + + def test_3d_shaped_dtype(self): + c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6 7 8 9 10 11 12") + dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), + ('block', int, (2, 2, 3))]) + x = np.loadtxt(c, dtype=dt) + a = np.array([('aaaa', 1.0, 8.0, + [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])], + dtype=dt) + assert_array_equal(x, a) + + def test_str_dtype(self): + # see gh-8033 + c = ["str1", "str2"] + + for dt in (str, np.bytes_): + a = np.array(["str1", "str2"], dtype=dt) + x = np.loadtxt(c, dtype=dt) + assert_array_equal(x, a) + + def test_empty_file(self): + with suppress_warnings() as sup: + sup.filter(message="loadtxt: Empty input file:") + c = TextIO() + x = np.loadtxt(c) + assert_equal(x.shape, (0,)) + x = np.loadtxt(c, dtype=np.int64) + assert_equal(x.shape, (0,)) + assert_(x.dtype == np.int64) + + def test_unused_converter(self): + c = TextIO() + c.writelines(['1 21\n', '3 42\n']) + c.seek(0) + data = np.loadtxt(c, usecols=(1,), + converters={0: lambda s: int(s, 16)}) + assert_array_equal(data, [21, 42]) + + c.seek(0) + data = np.loadtxt(c, usecols=(1,), + converters={1: lambda s: int(s, 16)}) + assert_array_equal(data, [33, 66]) + + def test_dtype_with_object(self): + # Test using an explicit dtype with an object + data = """ 1; 2001-01-01 + 2; 2002-01-31 """ + ndtype = [('idx', int), ('code', object)] + func = lambda s: strptime(s.strip(), "%Y-%m-%d") + converters = {1: func} + test = np.loadtxt(TextIO(data), delimiter=";", dtype=ndtype, + converters=converters) + control = np.array( + [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))], + dtype=ndtype) + assert_equal(test, control) + + def test_uint64_type(self): + tgt = (9223372043271415339, 9223372043271415853) + c = TextIO() + c.write("%s %s" % tgt) + c.seek(0) + res = np.loadtxt(c, dtype=np.uint64) + assert_equal(res, tgt) + + def test_int64_type(self): + tgt = (-9223372036854775807, 9223372036854775807) + c = TextIO() + c.write("%s %s" % tgt) + c.seek(0) + res = np.loadtxt(c, dtype=np.int64) + assert_equal(res, tgt) + + def test_from_float_hex(self): + # IEEE doubles and floats only, otherwise the float32 + # conversion may fail. + tgt = np.logspace(-10, 10, 5).astype(np.float32) + tgt = np.hstack((tgt, -tgt)).astype(float) + inp = '\n'.join(map(float.hex, tgt)) + c = TextIO() + c.write(inp) + for dt in [float, np.float32]: + c.seek(0) + res = np.loadtxt(c, dtype=dt) + assert_equal(res, tgt, err_msg="%s" % dt) + + def test_from_complex(self): + tgt = (complex(1, 1), complex(1, -1)) + c = TextIO() + c.write("%s %s" % tgt) + c.seek(0) + res = np.loadtxt(c, dtype=complex) + assert_equal(res, tgt) + + def test_complex_misformatted(self): + # test for backward compatibility + # some complex formats used to generate x+-yj + a = np.zeros((2, 2), dtype=np.complex128) + re = np.pi + im = np.e + a[:] = re - 1.0j * im + c = BytesIO() + np.savetxt(c, a, fmt='%.16e') + c.seek(0) + txt = c.read() + c.seek(0) + # misformat the sign on the imaginary part, gh 7895 + txt_bad = txt.replace(b'e+00-', b'e00+-') + assert_(txt_bad != txt) + c.write(txt_bad) + c.seek(0) + res = np.loadtxt(c, dtype=complex) + assert_equal(res, a) + + def test_universal_newline(self): + with temppath() as name: + with open(name, 'w') as f: + f.write('1 21\r3 42\r') + data = np.loadtxt(name) + assert_array_equal(data, [[1, 21], [3, 42]]) + + def test_empty_field_after_tab(self): + c = TextIO() + c.write('1 \t2 \t3\tstart \n4\t5\t6\t \n7\t8\t9.5\t') + c.seek(0) + dt = {'names': ('x', 'y', 'z', 'comment'), + 'formats': ('<i4', '<i4', '<f4', '|S8')} + x = np.loadtxt(c, dtype=dt, delimiter='\t') + a = np.array([b'start ', b' ', b'']) + assert_array_equal(x['comment'], a) + + def test_structure_unpack(self): + txt = TextIO("M 21 72\nF 35 58") + dt = {'names': ('a', 'b', 'c'), 'formats': ('|S1', '<i4', '<f4')} + a, b, c = np.loadtxt(txt, dtype=dt, unpack=True) + assert_(a.dtype.str == '|S1') + assert_(b.dtype.str == '<i4') + assert_(c.dtype.str == '<f4') + assert_array_equal(a, np.array([b'M', b'F'])) + assert_array_equal(b, np.array([21, 35])) + assert_array_equal(c, np.array([72., 58.])) + + def test_ndmin_keyword(self): + c = TextIO() + c.write('1,2,3\n4,5,6') + c.seek(0) + assert_raises(ValueError, np.loadtxt, c, ndmin=3) + c.seek(0) + assert_raises(ValueError, np.loadtxt, c, ndmin=1.5) + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', ndmin=1) + a = np.array([[1, 2, 3], [4, 5, 6]]) + assert_array_equal(x, a) + + d = TextIO() + d.write('0,1,2') + d.seek(0) + x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=2) + assert_(x.shape == (1, 3)) + d.seek(0) + x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=1) + assert_(x.shape == (3,)) + d.seek(0) + x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=0) + assert_(x.shape == (3,)) + + e = TextIO() + e.write('0\n1\n2') + e.seek(0) + x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=2) + assert_(x.shape == (3, 1)) + e.seek(0) + x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=1) + assert_(x.shape == (3,)) + e.seek(0) + x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=0) + assert_(x.shape == (3,)) + + # Test ndmin kw with empty file. + with suppress_warnings() as sup: + sup.filter(message="loadtxt: Empty input file:") + f = TextIO() + assert_(np.loadtxt(f, ndmin=2).shape == (0, 1,)) + assert_(np.loadtxt(f, ndmin=1).shape == (0,)) + + def test_generator_source(self): + def count(): + for i in range(10): + yield "%d" % i + + res = np.loadtxt(count()) + assert_array_equal(res, np.arange(10)) + + def test_bad_line(self): + c = TextIO() + c.write('1 2 3\n4 5 6\n2 3') + c.seek(0) + + # Check for exception and that exception contains line number + assert_raises_regex(ValueError, "3", np.loadtxt, c) + + def test_none_as_string(self): + # gh-5155, None should work as string when format demands it + c = TextIO() + c.write('100,foo,200\n300,None,400') + c.seek(0) + dt = np.dtype([('x', int), ('a', 'S10'), ('y', int)]) + np.loadtxt(c, delimiter=',', dtype=dt, comments=None) # Should succeed + + @pytest.mark.skipif(locale.getpreferredencoding() == 'ANSI_X3.4-1968', + reason="Wrong preferred encoding") + def test_binary_load(self): + butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\ + b"20,2,3,\xc3\x95scar\n\r" + sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines() + with temppath() as path: + with open(path, "wb") as f: + f.write(butf8) + with open(path, "rb") as f: + x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode) + assert_array_equal(x, sutf8) + # test broken latin1 conversion people now rely on + with open(path, "rb") as f: + x = np.loadtxt(f, encoding="UTF-8", dtype="S") + x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar'] + assert_array_equal(x, np.array(x, dtype="S")) + + def test_max_rows(self): + c = TextIO() + c.write('1,2,3,5\n4,5,7,8\n2,1,4,5') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + max_rows=1) + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_max_rows_with_skiprows(self): + c = TextIO() + c.write('comments\n1,2,3,5\n4,5,7,8\n2,1,4,5') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + skiprows=1, max_rows=1) + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + c = TextIO() + c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + skiprows=1, max_rows=2) + a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int) + assert_array_equal(x, a) + + def test_max_rows_with_read_continuation(self): + c = TextIO() + c.write('1,2,3,5\n4,5,7,8\n2,1,4,5') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + max_rows=2) + a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int) + assert_array_equal(x, a) + # test continuation + x = np.loadtxt(c, dtype=int, delimiter=',') + a = np.array([2,1,4,5], int) + assert_array_equal(x, a) + + def test_max_rows_larger(self): + #test max_rows > num rows + c = TextIO() + c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + skiprows=1, max_rows=6) + a = np.array([[1, 2, 3, 5], [4, 5, 7, 8], [2, 1, 4, 5]], int) + assert_array_equal(x, a) + +class Testfromregex(object): + def test_record(self): + c = TextIO() + c.write('1.312 foo\n1.534 bar\n4.444 qux') + c.seek(0) + + dt = [('num', np.float64), ('val', 'S3')] + x = np.fromregex(c, r"([0-9.]+)\s+(...)", dt) + a = np.array([(1.312, 'foo'), (1.534, 'bar'), (4.444, 'qux')], + dtype=dt) + assert_array_equal(x, a) + + def test_record_2(self): + c = TextIO() + c.write('1312 foo\n1534 bar\n4444 qux') + c.seek(0) + + dt = [('num', np.int32), ('val', 'S3')] + x = np.fromregex(c, r"(\d+)\s+(...)", dt) + a = np.array([(1312, 'foo'), (1534, 'bar'), (4444, 'qux')], + dtype=dt) + assert_array_equal(x, a) + + def test_record_3(self): + c = TextIO() + c.write('1312 foo\n1534 bar\n4444 qux') + c.seek(0) + + dt = [('num', np.float64)] + x = np.fromregex(c, r"(\d+)\s+...", dt) + a = np.array([(1312,), (1534,), (4444,)], dtype=dt) + assert_array_equal(x, a) + + def test_record_unicode(self): + utf8 = b'\xcf\x96' + with temppath() as path: + with open(path, 'wb') as f: + f.write(b'1.312 foo' + utf8 + b' \n1.534 bar\n4.444 qux') + + dt = [('num', np.float64), ('val', 'U4')] + x = np.fromregex(path, r"(?u)([0-9.]+)\s+(\w+)", dt, encoding='UTF-8') + a = np.array([(1.312, 'foo' + utf8.decode('UTF-8')), (1.534, 'bar'), + (4.444, 'qux')], dtype=dt) + assert_array_equal(x, a) + + regexp = re.compile(r"([0-9.]+)\s+(\w+)", re.UNICODE) + x = np.fromregex(path, regexp, dt, encoding='UTF-8') + assert_array_equal(x, a) + + def test_compiled_bytes(self): + regexp = re.compile(b'(\\d)') + c = BytesIO(b'123') + dt = [('num', np.float64)] + a = np.array([1, 2, 3], dtype=dt) + x = np.fromregex(c, regexp, dt) + assert_array_equal(x, a) + +#####-------------------------------------------------------------------------- + + +class TestFromTxt(LoadTxtBase): + loadfunc = staticmethod(np.genfromtxt) + + def test_record(self): + # Test w/ explicit dtype + data = TextIO('1 2\n3 4') + test = np.ndfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)]) + control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) + assert_equal(test, control) + # + data = TextIO('M 64.0 75.0\nF 25.0 60.0') + descriptor = {'names': ('gender', 'age', 'weight'), + 'formats': ('S1', 'i4', 'f4')} + control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)], + dtype=descriptor) + test = np.ndfromtxt(data, dtype=descriptor) + assert_equal(test, control) + + def test_array(self): + # Test outputting a standard ndarray + data = TextIO('1 2\n3 4') + control = np.array([[1, 2], [3, 4]], dtype=int) + test = np.ndfromtxt(data, dtype=int) + assert_array_equal(test, control) + # + data.seek(0) + control = np.array([[1, 2], [3, 4]], dtype=float) + test = np.loadtxt(data, dtype=float) + assert_array_equal(test, control) + + def test_1D(self): + # Test squeezing to 1D + control = np.array([1, 2, 3, 4], int) + # + data = TextIO('1\n2\n3\n4\n') + test = np.ndfromtxt(data, dtype=int) + assert_array_equal(test, control) + # + data = TextIO('1,2,3,4\n') + test = np.ndfromtxt(data, dtype=int, delimiter=',') + assert_array_equal(test, control) + + def test_comments(self): + # Test the stripping of comments + control = np.array([1, 2, 3, 5], int) + # Comment on its own line + data = TextIO('# comment\n1,2,3,5\n') + test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#') + assert_equal(test, control) + # Comment at the end of a line + data = TextIO('1,2,3,5# comment\n') + test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#') + assert_equal(test, control) + + def test_skiprows(self): + # Test row skipping + control = np.array([1, 2, 3, 5], int) + kwargs = dict(dtype=int, delimiter=',') + # + data = TextIO('comment\n1,2,3,5\n') + test = np.ndfromtxt(data, skip_header=1, **kwargs) + assert_equal(test, control) + # + data = TextIO('# comment\n1,2,3,5\n') + test = np.loadtxt(data, skiprows=1, **kwargs) + assert_equal(test, control) + + def test_skip_footer(self): + data = ["# %i" % i for i in range(1, 6)] + data.append("A, B, C") + data.extend(["%i,%3.1f,%03s" % (i, i, i) for i in range(51)]) + data[-1] = "99,99" + kwargs = dict(delimiter=",", names=True, skip_header=5, skip_footer=10) + test = np.genfromtxt(TextIO("\n".join(data)), **kwargs) + ctrl = np.array([("%f" % i, "%f" % i, "%f" % i) for i in range(41)], + dtype=[(_, float) for _ in "ABC"]) + assert_equal(test, ctrl) + + def test_skip_footer_with_invalid(self): + with suppress_warnings() as sup: + sup.filter(ConversionWarning) + basestr = '1 1\n2 2\n3 3\n4 4\n5 \n6 \n7 \n' + # Footer too small to get rid of all invalid values + assert_raises(ValueError, np.genfromtxt, + TextIO(basestr), skip_footer=1) + # except ValueError: + # pass + a = np.genfromtxt( + TextIO(basestr), skip_footer=1, invalid_raise=False) + assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])) + # + a = np.genfromtxt(TextIO(basestr), skip_footer=3) + assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])) + # + basestr = '1 1\n2 \n3 3\n4 4\n5 \n6 6\n7 7\n' + a = np.genfromtxt( + TextIO(basestr), skip_footer=1, invalid_raise=False) + assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.], [6., 6.]])) + a = np.genfromtxt( + TextIO(basestr), skip_footer=3, invalid_raise=False) + assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.]])) + + def test_header(self): + # Test retrieving a header + data = TextIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.ndfromtxt(data, dtype=None, names=True) + assert_(w[0].category is np.VisibleDeprecationWarning) + control = {'gender': np.array([b'M', b'F']), + 'age': np.array([64.0, 25.0]), + 'weight': np.array([75.0, 60.0])} + assert_equal(test['gender'], control['gender']) + assert_equal(test['age'], control['age']) + assert_equal(test['weight'], control['weight']) + + def test_auto_dtype(self): + # Test the automatic definition of the output dtype + data = TextIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.ndfromtxt(data, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) + control = [np.array([b'A', b'BCD']), + np.array([64, 25]), + np.array([75.0, 60.0]), + np.array([3 + 4j, 5 + 6j]), + np.array([True, False]), ] + assert_equal(test.dtype.names, ['f0', 'f1', 'f2', 'f3', 'f4']) + for (i, ctrl) in enumerate(control): + assert_equal(test['f%i' % i], ctrl) + + def test_auto_dtype_uniform(self): + # Tests whether the output dtype can be uniformized + data = TextIO('1 2 3 4\n5 6 7 8\n') + test = np.ndfromtxt(data, dtype=None) + control = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) + assert_equal(test, control) + + def test_fancy_dtype(self): + # Check that a nested dtype isn't MIA + data = TextIO('1,2,3.0\n4,5,6.0\n') + fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) + test = np.ndfromtxt(data, dtype=fancydtype, delimiter=',') + control = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype) + assert_equal(test, control) + + def test_names_overwrite(self): + # Test overwriting the names of the dtype + descriptor = {'names': ('g', 'a', 'w'), + 'formats': ('S1', 'i4', 'f4')} + data = TextIO(b'M 64.0 75.0\nF 25.0 60.0') + names = ('gender', 'age', 'weight') + test = np.ndfromtxt(data, dtype=descriptor, names=names) + descriptor['names'] = names + control = np.array([('M', 64.0, 75.0), + ('F', 25.0, 60.0)], dtype=descriptor) + assert_equal(test, control) + + def test_commented_header(self): + # Check that names can be retrieved even if the line is commented out. + data = TextIO(""" +#gender age weight +M 21 72.100000 +F 35 58.330000 +M 33 21.99 + """) + # The # is part of the first name and should be deleted automatically. + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(data, names=True, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) + ctrl = np.array([('M', 21, 72.1), ('F', 35, 58.33), ('M', 33, 21.99)], + dtype=[('gender', '|S1'), ('age', int), ('weight', float)]) + assert_equal(test, ctrl) + # Ditto, but we should get rid of the first element + data = TextIO(b""" +# gender age weight +M 21 72.100000 +F 35 58.330000 +M 33 21.99 + """) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(data, names=True, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test, ctrl) + + def test_names_and_comments_none(self): + # Tests case when names is true but comments is None (gh-10780) + data = TextIO('col1 col2\n 1 2\n 3 4') + test = np.genfromtxt(data, dtype=(int, int), comments=None, names=True) + control = np.array([(1, 2), (3, 4)], dtype=[('col1', int), ('col2', int)]) + assert_equal(test, control) + + def test_autonames_and_usecols(self): + # Tests names and usecols + data = TextIO('A B C D\n aaaa 121 45 9.1') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), + names=True, dtype=None) + assert_(w[0].category is np.VisibleDeprecationWarning) + control = np.array(('aaaa', 45, 9.1), + dtype=[('A', '|S4'), ('C', int), ('D', float)]) + assert_equal(test, control) + + def test_converters_with_usecols(self): + # Test the combination user-defined converters and usecol + data = TextIO('1,2,3,,5\n6,7,8,9,10\n') + test = np.ndfromtxt(data, dtype=int, delimiter=',', + converters={3: lambda s: int(s or - 999)}, + usecols=(1, 3,)) + control = np.array([[2, -999], [7, 9]], int) + assert_equal(test, control) + + def test_converters_with_usecols_and_names(self): + # Tests names and usecols + data = TextIO('A B C D\n aaaa 121 45 9.1') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True, + dtype=None, + converters={'C': lambda s: 2 * int(s)}) + assert_(w[0].category is np.VisibleDeprecationWarning) + control = np.array(('aaaa', 90, 9.1), + dtype=[('A', '|S4'), ('C', int), ('D', float)]) + assert_equal(test, control) + + def test_converters_cornercases(self): + # Test the conversion to datetime. + converter = { + 'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')} + data = TextIO('2009-02-03 12:00:00Z, 72214.0') + test = np.ndfromtxt(data, delimiter=',', dtype=None, + names=['date', 'stid'], converters=converter) + control = np.array((datetime(2009, 2, 3), 72214.), + dtype=[('date', np.object_), ('stid', float)]) + assert_equal(test, control) + + def test_converters_cornercases2(self): + # Test the conversion to datetime64. + converter = { + 'date': lambda s: np.datetime64(strptime(s, '%Y-%m-%d %H:%M:%SZ'))} + data = TextIO('2009-02-03 12:00:00Z, 72214.0') + test = np.ndfromtxt(data, delimiter=',', dtype=None, + names=['date', 'stid'], converters=converter) + control = np.array((datetime(2009, 2, 3), 72214.), + dtype=[('date', 'datetime64[us]'), ('stid', float)]) + assert_equal(test, control) + + def test_unused_converter(self): + # Test whether unused converters are forgotten + data = TextIO("1 21\n 3 42\n") + test = np.ndfromtxt(data, usecols=(1,), + converters={0: lambda s: int(s, 16)}) + assert_equal(test, [21, 42]) + # + data.seek(0) + test = np.ndfromtxt(data, usecols=(1,), + converters={1: lambda s: int(s, 16)}) + assert_equal(test, [33, 66]) + + def test_invalid_converter(self): + strip_rand = lambda x: float((b'r' in x.lower() and x.split()[-1]) or + (b'r' not in x.lower() and x.strip() or 0.0)) + strip_per = lambda x: float((b'%' in x.lower() and x.split()[0]) or + (b'%' not in x.lower() and x.strip() or 0.0)) + s = TextIO("D01N01,10/1/2003 ,1 %,R 75,400,600\r\n" + "L24U05,12/5/2003, 2 %,1,300, 150.5\r\n" + "D02N03,10/10/2004,R 1,,7,145.55") + kwargs = dict( + converters={2: strip_per, 3: strip_rand}, delimiter=",", + dtype=None) + assert_raises(ConverterError, np.genfromtxt, s, **kwargs) + + def test_tricky_converter_bug1666(self): + # Test some corner cases + s = TextIO('q1,2\nq3,4') + cnv = lambda s: float(s[1:]) + test = np.genfromtxt(s, delimiter=',', converters={0: cnv}) + control = np.array([[1., 2.], [3., 4.]]) + assert_equal(test, control) + + def test_dtype_with_converters(self): + dstr = "2009; 23; 46" + test = np.ndfromtxt(TextIO(dstr,), + delimiter=";", dtype=float, converters={0: bytes}) + control = np.array([('2009', 23., 46)], + dtype=[('f0', '|S4'), ('f1', float), ('f2', float)]) + assert_equal(test, control) + test = np.ndfromtxt(TextIO(dstr,), + delimiter=";", dtype=float, converters={0: float}) + control = np.array([2009., 23., 46],) + assert_equal(test, control) + + def test_dtype_with_converters_and_usecols(self): + dstr = "1,5,-1,1:1\n2,8,-1,1:n\n3,3,-2,m:n\n" + dmap = {'1:1':0, '1:n':1, 'm:1':2, 'm:n':3} + dtyp = [('e1','i4'),('e2','i4'),('e3','i2'),('n', 'i1')] + conv = {0: int, 1: int, 2: int, 3: lambda r: dmap[r.decode()]} + test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',', + names=None, converters=conv) + control = np.rec.array([(1,5,-1,0), (2,8,-1,1), (3,3,-2,3)], dtype=dtyp) + assert_equal(test, control) + dtyp = [('e1','i4'),('e2','i4'),('n', 'i1')] + test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',', + usecols=(0,1,3), names=None, converters=conv) + control = np.rec.array([(1,5,0), (2,8,1), (3,3,3)], dtype=dtyp) + assert_equal(test, control) + + def test_dtype_with_object(self): + # Test using an explicit dtype with an object + data = """ 1; 2001-01-01 + 2; 2002-01-31 """ + ndtype = [('idx', int), ('code', object)] + func = lambda s: strptime(s.strip(), "%Y-%m-%d") + converters = {1: func} + test = np.genfromtxt(TextIO(data), delimiter=";", dtype=ndtype, + converters=converters) + control = np.array( + [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))], + dtype=ndtype) + assert_equal(test, control) + + ndtype = [('nest', [('idx', int), ('code', object)])] + with assert_raises_regex(NotImplementedError, + 'Nested fields.* not supported.*'): + test = np.genfromtxt(TextIO(data), delimiter=";", + dtype=ndtype, converters=converters) + + # nested but empty fields also aren't supported + ndtype = [('idx', int), ('code', object), ('nest', [])] + with assert_raises_regex(NotImplementedError, + 'Nested fields.* not supported.*'): + test = np.genfromtxt(TextIO(data), delimiter=";", + dtype=ndtype, converters=converters) + + def test_userconverters_with_explicit_dtype(self): + # Test user_converters w/ explicit (standard) dtype + data = TextIO('skip,skip,2001-01-01,1.0,skip') + test = np.genfromtxt(data, delimiter=",", names=None, dtype=float, + usecols=(2, 3), converters={2: bytes}) + control = np.array([('2001-01-01', 1.)], + dtype=[('', '|S10'), ('', float)]) + assert_equal(test, control) + + def test_utf8_userconverters_with_explicit_dtype(self): + utf8 = b'\xcf\x96' + with temppath() as path: + with open(path, 'wb') as f: + f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip') + test = np.genfromtxt(path, delimiter=",", names=None, dtype=float, + usecols=(2, 3), converters={2: np.unicode}, + encoding='UTF-8') + control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)], + dtype=[('', '|U11'), ('', float)]) + assert_equal(test, control) + + def test_spacedelimiter(self): + # Test space delimiter + data = TextIO("1 2 3 4 5\n6 7 8 9 10") + test = np.ndfromtxt(data) + control = np.array([[1., 2., 3., 4., 5.], + [6., 7., 8., 9., 10.]]) + assert_equal(test, control) + + def test_integer_delimiter(self): + # Test using an integer for delimiter + data = " 1 2 3\n 4 5 67\n890123 4" + test = np.genfromtxt(TextIO(data), delimiter=3) + control = np.array([[1, 2, 3], [4, 5, 67], [890, 123, 4]]) + assert_equal(test, control) + + def test_missing(self): + data = TextIO('1,2,3,,5\n') + test = np.ndfromtxt(data, dtype=int, delimiter=',', + converters={3: lambda s: int(s or - 999)}) + control = np.array([1, 2, 3, -999, 5], int) + assert_equal(test, control) + + def test_missing_with_tabs(self): + # Test w/ a delimiter tab + txt = "1\t2\t3\n\t2\t\n1\t\t3" + test = np.genfromtxt(TextIO(txt), delimiter="\t", + usemask=True,) + ctrl_d = np.array([(1, 2, 3), (np.nan, 2, np.nan), (1, np.nan, 3)],) + ctrl_m = np.array([(0, 0, 0), (1, 0, 1), (0, 1, 0)], dtype=bool) + assert_equal(test.data, ctrl_d) + assert_equal(test.mask, ctrl_m) + + def test_usecols(self): + # Test the selection of columns + # Select 1 column + control = np.array([[1, 2], [3, 4]], float) + data = TextIO() + np.savetxt(data, control) + data.seek(0) + test = np.ndfromtxt(data, dtype=float, usecols=(1,)) + assert_equal(test, control[:, 1]) + # + control = np.array([[1, 2, 3], [3, 4, 5]], float) + data = TextIO() + np.savetxt(data, control) + data.seek(0) + test = np.ndfromtxt(data, dtype=float, usecols=(1, 2)) + assert_equal(test, control[:, 1:]) + # Testing with arrays instead of tuples. + data.seek(0) + test = np.ndfromtxt(data, dtype=float, usecols=np.array([1, 2])) + assert_equal(test, control[:, 1:]) + + def test_usecols_as_css(self): + # Test giving usecols with a comma-separated string + data = "1 2 3\n4 5 6" + test = np.genfromtxt(TextIO(data), + names="a, b, c", usecols="a, c") + ctrl = np.array([(1, 3), (4, 6)], dtype=[(_, float) for _ in "ac"]) + assert_equal(test, ctrl) + + def test_usecols_with_structured_dtype(self): + # Test usecols with an explicit structured dtype + data = TextIO("JOE 70.1 25.3\nBOB 60.5 27.9") + names = ['stid', 'temp'] + dtypes = ['S4', 'f8'] + test = np.ndfromtxt( + data, usecols=(0, 2), dtype=list(zip(names, dtypes))) + assert_equal(test['stid'], [b"JOE", b"BOB"]) + assert_equal(test['temp'], [25.3, 27.9]) + + def test_usecols_with_integer(self): + # Test usecols with an integer + test = np.genfromtxt(TextIO(b"1 2 3\n4 5 6"), usecols=0) + assert_equal(test, np.array([1., 4.])) + + def test_usecols_with_named_columns(self): + # Test usecols with named columns + ctrl = np.array([(1, 3), (4, 6)], dtype=[('a', float), ('c', float)]) + data = "1 2 3\n4 5 6" + kwargs = dict(names="a, b, c") + test = np.genfromtxt(TextIO(data), usecols=(0, -1), **kwargs) + assert_equal(test, ctrl) + test = np.genfromtxt(TextIO(data), + usecols=('a', 'c'), **kwargs) + assert_equal(test, ctrl) + + def test_empty_file(self): + # Test that an empty file raises the proper warning. + with suppress_warnings() as sup: + sup.filter(message="genfromtxt: Empty input file:") + data = TextIO() + test = np.genfromtxt(data) + assert_equal(test, np.array([])) + + def test_fancy_dtype_alt(self): + # Check that a nested dtype isn't MIA + data = TextIO('1,2,3.0\n4,5,6.0\n') + fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) + test = np.mafromtxt(data, dtype=fancydtype, delimiter=',') + control = ma.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype) + assert_equal(test, control) + + def test_shaped_dtype(self): + c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6") + dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), + ('block', int, (2, 3))]) + x = np.ndfromtxt(c, dtype=dt) + a = np.array([('aaaa', 1.0, 8.0, [[1, 2, 3], [4, 5, 6]])], + dtype=dt) + assert_array_equal(x, a) + + def test_withmissing(self): + data = TextIO('A,B\n0,1\n2,N/A') + kwargs = dict(delimiter=",", missing_values="N/A", names=True) + test = np.mafromtxt(data, dtype=None, **kwargs) + control = ma.array([(0, 1), (2, -1)], + mask=[(False, False), (False, True)], + dtype=[('A', int), ('B', int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + # + data.seek(0) + test = np.mafromtxt(data, **kwargs) + control = ma.array([(0, 1), (2, -1)], + mask=[(False, False), (False, True)], + dtype=[('A', float), ('B', float)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + def test_user_missing_values(self): + data = "A, B, C\n0, 0., 0j\n1, N/A, 1j\n-9, 2.2, N/A\n3, -99, 3j" + basekwargs = dict(dtype=None, delimiter=",", names=True,) + mdtype = [('A', int), ('B', float), ('C', complex)] + # + test = np.mafromtxt(TextIO(data), missing_values="N/A", + **basekwargs) + control = ma.array([(0, 0.0, 0j), (1, -999, 1j), + (-9, 2.2, -999j), (3, -99, 3j)], + mask=[(0, 0, 0), (0, 1, 0), (0, 0, 1), (0, 0, 0)], + dtype=mdtype) + assert_equal(test, control) + # + basekwargs['dtype'] = mdtype + test = np.mafromtxt(TextIO(data), + missing_values={0: -9, 1: -99, 2: -999j}, **basekwargs) + control = ma.array([(0, 0.0, 0j), (1, -999, 1j), + (-9, 2.2, -999j), (3, -99, 3j)], + mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)], + dtype=mdtype) + assert_equal(test, control) + # + test = np.mafromtxt(TextIO(data), + missing_values={0: -9, 'B': -99, 'C': -999j}, + **basekwargs) + control = ma.array([(0, 0.0, 0j), (1, -999, 1j), + (-9, 2.2, -999j), (3, -99, 3j)], + mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)], + dtype=mdtype) + assert_equal(test, control) + + def test_user_filling_values(self): + # Test with missing and filling values + ctrl = np.array([(0, 3), (4, -999)], dtype=[('a', int), ('b', int)]) + data = "N/A, 2, 3\n4, ,???" + kwargs = dict(delimiter=",", + dtype=int, + names="a,b,c", + missing_values={0: "N/A", 'b': " ", 2: "???"}, + filling_values={0: 0, 'b': 0, 2: -999}) + test = np.genfromtxt(TextIO(data), **kwargs) + ctrl = np.array([(0, 2, 3), (4, 0, -999)], + dtype=[(_, int) for _ in "abc"]) + assert_equal(test, ctrl) + # + test = np.genfromtxt(TextIO(data), usecols=(0, -1), **kwargs) + ctrl = np.array([(0, 3), (4, -999)], dtype=[(_, int) for _ in "ac"]) + assert_equal(test, ctrl) + + data2 = "1,2,*,4\n5,*,7,8\n" + test = np.genfromtxt(TextIO(data2), delimiter=',', dtype=int, + missing_values="*", filling_values=0) + ctrl = np.array([[1, 2, 0, 4], [5, 0, 7, 8]]) + assert_equal(test, ctrl) + test = np.genfromtxt(TextIO(data2), delimiter=',', dtype=int, + missing_values="*", filling_values=-1) + ctrl = np.array([[1, 2, -1, 4], [5, -1, 7, 8]]) + assert_equal(test, ctrl) + + def test_withmissing_float(self): + data = TextIO('A,B\n0,1.5\n2,-999.00') + test = np.mafromtxt(data, dtype=None, delimiter=',', + missing_values='-999.0', names=True,) + control = ma.array([(0, 1.5), (2, -1.)], + mask=[(False, False), (False, True)], + dtype=[('A', int), ('B', float)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + def test_with_masked_column_uniform(self): + # Test masked column + data = TextIO('1 2 3\n4 5 6\n') + test = np.genfromtxt(data, dtype=None, + missing_values='2,5', usemask=True) + control = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[0, 1, 0], [0, 1, 0]]) + assert_equal(test, control) + + def test_with_masked_column_various(self): + # Test masked column + data = TextIO('True 2 3\nFalse 5 6\n') + test = np.genfromtxt(data, dtype=None, + missing_values='2,5', usemask=True) + control = ma.array([(1, 2, 3), (0, 5, 6)], + mask=[(0, 1, 0), (0, 1, 0)], + dtype=[('f0', bool), ('f1', bool), ('f2', int)]) + assert_equal(test, control) + + def test_invalid_raise(self): + # Test invalid raise + data = ["1, 1, 1, 1, 1"] * 50 + for i in range(5): + data[10 * i] = "2, 2, 2, 2 2" + data.insert(0, "a, b, c, d, e") + mdata = TextIO("\n".join(data)) + # + kwargs = dict(delimiter=",", dtype=None, names=True) + # XXX: is there a better way to get the return value of the + # callable in assert_warns ? + ret = {} + + def f(_ret={}): + _ret['mtest'] = np.ndfromtxt(mdata, invalid_raise=False, **kwargs) + assert_warns(ConversionWarning, f, _ret=ret) + mtest = ret['mtest'] + assert_equal(len(mtest), 45) + assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'abcde'])) + # + mdata.seek(0) + assert_raises(ValueError, np.ndfromtxt, mdata, + delimiter=",", names=True) + + def test_invalid_raise_with_usecols(self): + # Test invalid_raise with usecols + data = ["1, 1, 1, 1, 1"] * 50 + for i in range(5): + data[10 * i] = "2, 2, 2, 2 2" + data.insert(0, "a, b, c, d, e") + mdata = TextIO("\n".join(data)) + kwargs = dict(delimiter=",", dtype=None, names=True, + invalid_raise=False) + # XXX: is there a better way to get the return value of the + # callable in assert_warns ? + ret = {} + + def f(_ret={}): + _ret['mtest'] = np.ndfromtxt(mdata, usecols=(0, 4), **kwargs) + assert_warns(ConversionWarning, f, _ret=ret) + mtest = ret['mtest'] + assert_equal(len(mtest), 45) + assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'ae'])) + # + mdata.seek(0) + mtest = np.ndfromtxt(mdata, usecols=(0, 1), **kwargs) + assert_equal(len(mtest), 50) + control = np.ones(50, dtype=[(_, int) for _ in 'ab']) + control[[10 * _ for _ in range(5)]] = (2, 2) + assert_equal(mtest, control) + + def test_inconsistent_dtype(self): + # Test inconsistent dtype + data = ["1, 1, 1, 1, -1.1"] * 50 + mdata = TextIO("\n".join(data)) + + converters = {4: lambda x: "(%s)" % x} + kwargs = dict(delimiter=",", converters=converters, + dtype=[(_, int) for _ in 'abcde'],) + assert_raises(ValueError, np.genfromtxt, mdata, **kwargs) + + def test_default_field_format(self): + # Test default format + data = "0, 1, 2.3\n4, 5, 6.7" + mtest = np.ndfromtxt(TextIO(data), + delimiter=",", dtype=None, defaultfmt="f%02i") + ctrl = np.array([(0, 1, 2.3), (4, 5, 6.7)], + dtype=[("f00", int), ("f01", int), ("f02", float)]) + assert_equal(mtest, ctrl) + + def test_single_dtype_wo_names(self): + # Test single dtype w/o names + data = "0, 1, 2.3\n4, 5, 6.7" + mtest = np.ndfromtxt(TextIO(data), + delimiter=",", dtype=float, defaultfmt="f%02i") + ctrl = np.array([[0., 1., 2.3], [4., 5., 6.7]], dtype=float) + assert_equal(mtest, ctrl) + + def test_single_dtype_w_explicit_names(self): + # Test single dtype w explicit names + data = "0, 1, 2.3\n4, 5, 6.7" + mtest = np.ndfromtxt(TextIO(data), + delimiter=",", dtype=float, names="a, b, c") + ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)], + dtype=[(_, float) for _ in "abc"]) + assert_equal(mtest, ctrl) + + def test_single_dtype_w_implicit_names(self): + # Test single dtype w implicit names + data = "a, b, c\n0, 1, 2.3\n4, 5, 6.7" + mtest = np.ndfromtxt(TextIO(data), + delimiter=",", dtype=float, names=True) + ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)], + dtype=[(_, float) for _ in "abc"]) + assert_equal(mtest, ctrl) + + def test_easy_structured_dtype(self): + # Test easy structured dtype + data = "0, 1, 2.3\n4, 5, 6.7" + mtest = np.ndfromtxt(TextIO(data), delimiter=",", + dtype=(int, float, float), defaultfmt="f_%02i") + ctrl = np.array([(0, 1., 2.3), (4, 5., 6.7)], + dtype=[("f_00", int), ("f_01", float), ("f_02", float)]) + assert_equal(mtest, ctrl) + + def test_autostrip(self): + # Test autostrip + data = "01/01/2003 , 1.3, abcde" + kwargs = dict(delimiter=",", dtype=None) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + mtest = np.ndfromtxt(TextIO(data), **kwargs) + assert_(w[0].category is np.VisibleDeprecationWarning) + ctrl = np.array([('01/01/2003 ', 1.3, ' abcde')], + dtype=[('f0', '|S12'), ('f1', float), ('f2', '|S8')]) + assert_equal(mtest, ctrl) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + mtest = np.ndfromtxt(TextIO(data), autostrip=True, **kwargs) + assert_(w[0].category is np.VisibleDeprecationWarning) + ctrl = np.array([('01/01/2003', 1.3, 'abcde')], + dtype=[('f0', '|S10'), ('f1', float), ('f2', '|S5')]) + assert_equal(mtest, ctrl) + + def test_replace_space(self): + # Test the 'replace_space' option + txt = "A.A, B (B), C:C\n1, 2, 3.14" + # Test default: replace ' ' by '_' and delete non-alphanum chars + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=None) + ctrl_dtype = [("AA", int), ("B_B", int), ("CC", float)] + ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no replace, no delete + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=None, + replace_space='', deletechars='') + ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", float)] + ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no delete (spaces are replaced by _) + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=None, + deletechars='') + ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", float)] + ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) + assert_equal(test, ctrl) + + def test_replace_space_known_dtype(self): + # Test the 'replace_space' (and related) options when dtype != None + txt = "A.A, B (B), C:C\n1, 2, 3" + # Test default: replace ' ' by '_' and delete non-alphanum chars + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int) + ctrl_dtype = [("AA", int), ("B_B", int), ("CC", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no replace, no delete + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int, + replace_space='', deletechars='') + ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no delete (spaces are replaced by _) + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int, + deletechars='') + ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + + def test_incomplete_names(self): + # Test w/ incomplete names + data = "A,,C\n0,1,2\n3,4,5" + kwargs = dict(delimiter=",", names=True) + # w/ dtype=None + ctrl = np.array([(0, 1, 2), (3, 4, 5)], + dtype=[(_, int) for _ in ('A', 'f0', 'C')]) + test = np.ndfromtxt(TextIO(data), dtype=None, **kwargs) + assert_equal(test, ctrl) + # w/ default dtype + ctrl = np.array([(0, 1, 2), (3, 4, 5)], + dtype=[(_, float) for _ in ('A', 'f0', 'C')]) + test = np.ndfromtxt(TextIO(data), **kwargs) + + def test_names_auto_completion(self): + # Make sure that names are properly completed + data = "1 2 3\n 4 5 6" + test = np.genfromtxt(TextIO(data), + dtype=(int, float, int), names="a") + ctrl = np.array([(1, 2, 3), (4, 5, 6)], + dtype=[('a', int), ('f0', float), ('f1', int)]) + assert_equal(test, ctrl) + + def test_names_with_usecols_bug1636(self): + # Make sure we pick up the right names w/ usecols + data = "A,B,C,D,E\n0,1,2,3,4\n0,1,2,3,4\n0,1,2,3,4" + ctrl_names = ("A", "C", "E") + test = np.genfromtxt(TextIO(data), + dtype=(int, int, int), delimiter=",", + usecols=(0, 2, 4), names=True) + assert_equal(test.dtype.names, ctrl_names) + # + test = np.genfromtxt(TextIO(data), + dtype=(int, int, int), delimiter=",", + usecols=("A", "C", "E"), names=True) + assert_equal(test.dtype.names, ctrl_names) + # + test = np.genfromtxt(TextIO(data), + dtype=int, delimiter=",", + usecols=("A", "C", "E"), names=True) + assert_equal(test.dtype.names, ctrl_names) + + def test_fixed_width_names(self): + # Test fix-width w/ names + data = " A B C\n 0 1 2.3\n 45 67 9." + kwargs = dict(delimiter=(5, 5, 4), names=True, dtype=None) + ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)], + dtype=[('A', int), ('B', int), ('C', float)]) + test = np.ndfromtxt(TextIO(data), **kwargs) + assert_equal(test, ctrl) + # + kwargs = dict(delimiter=5, names=True, dtype=None) + ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)], + dtype=[('A', int), ('B', int), ('C', float)]) + test = np.ndfromtxt(TextIO(data), **kwargs) + assert_equal(test, ctrl) + + def test_filling_values(self): + # Test missing values + data = b"1, 2, 3\n1, , 5\n0, 6, \n" + kwargs = dict(delimiter=",", dtype=None, filling_values=-999) + ctrl = np.array([[1, 2, 3], [1, -999, 5], [0, 6, -999]], dtype=int) + test = np.ndfromtxt(TextIO(data), **kwargs) + assert_equal(test, ctrl) + + def test_comments_is_none(self): + # Github issue 329 (None was previously being converted to 'None'). + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO("test1,testNonetherestofthedata"), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test[1], b'testNonetherestofthedata') + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO("test1, testNonetherestofthedata"), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test[1], b' testNonetherestofthedata') + + def test_latin1(self): + latin1 = b'\xf6\xfc\xf6' + norm = b"norm1,norm2,norm3\n" + enc = b"test1,testNonethe" + latin1 + b",test3\n" + s = norm + enc + norm + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO(s), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test[1, 0], b"test1") + assert_equal(test[1, 1], b"testNonethe" + latin1) + assert_equal(test[1, 2], b"test3") + test = np.genfromtxt(TextIO(s), + dtype=None, comments=None, delimiter=',', + encoding='latin1') + assert_equal(test[1, 0], u"test1") + assert_equal(test[1, 1], u"testNonethe" + latin1.decode('latin1')) + assert_equal(test[1, 2], u"test3") + + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO(b"0,testNonethe" + latin1), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + assert_equal(test['f0'], 0) + assert_equal(test['f1'], b"testNonethe" + latin1) + + def test_binary_decode_autodtype(self): + utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04' + v = self.loadfunc(BytesIO(utf16), dtype=None, encoding='UTF-16') + assert_array_equal(v, np.array(utf16.decode('UTF-16').split())) + + def test_utf8_byte_encoding(self): + utf8 = b"\xcf\x96" + norm = b"norm1,norm2,norm3\n" + enc = b"test1,testNonethe" + utf8 + b",test3\n" + s = norm + enc + norm + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) + test = np.genfromtxt(TextIO(s), + dtype=None, comments=None, delimiter=',') + assert_(w[0].category is np.VisibleDeprecationWarning) + ctl = np.array([ + [b'norm1', b'norm2', b'norm3'], + [b'test1', b'testNonethe' + utf8, b'test3'], + [b'norm1', b'norm2', b'norm3']]) + assert_array_equal(test, ctl) + + def test_utf8_file(self): + utf8 = b"\xcf\x96" + with temppath() as path: + with open(path, "wb") as f: + f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2) + test = np.genfromtxt(path, dtype=None, comments=None, + delimiter=',', encoding="UTF-8") + ctl = np.array([ + ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"], + ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]], + dtype=np.unicode) + assert_array_equal(test, ctl) + + # test a mixed dtype + with open(path, "wb") as f: + f.write(b"0,testNonethe" + utf8) + test = np.genfromtxt(path, dtype=None, comments=None, + delimiter=',', encoding="UTF-8") + assert_equal(test['f0'], 0) + assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) + + def test_utf8_file_nodtype_unicode(self): + # bytes encoding with non-latin1 -> unicode upcast + utf8 = u'\u03d6' + latin1 = u'\xf6\xfc\xf6' + + # skip test if cannot encode utf8 test string with preferred + # encoding. The preferred encoding is assumed to be the default + # encoding of io.open. Will need to change this for PyTest, maybe + # using pytest.mark.xfail(raises=***). + try: + encoding = locale.getpreferredencoding() + utf8.encode(encoding) + except (UnicodeError, ImportError): + pytest.skip('Skipping test_utf8_file_nodtype_unicode, ' + 'unable to encode utf8 in preferred encoding') + + with temppath() as path: + with io.open(path, "wt") as f: + f.write(u"norm1,norm2,norm3\n") + f.write(u"norm1," + latin1 + u",norm3\n") + f.write(u"test1,testNonethe" + utf8 + u",test3\n") + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', + np.VisibleDeprecationWarning) + test = np.genfromtxt(path, dtype=None, comments=None, + delimiter=',') + # Check for warning when encoding not specified. + assert_(w[0].category is np.VisibleDeprecationWarning) + ctl = np.array([ + ["norm1", "norm2", "norm3"], + ["norm1", latin1, "norm3"], + ["test1", "testNonethe" + utf8, "test3"]], + dtype=np.unicode) + assert_array_equal(test, ctl) + + def test_recfromtxt(self): + # + data = TextIO('A,B\n0,1\n2,3') + kwargs = dict(delimiter=",", missing_values="N/A", names=True) + test = np.recfromtxt(data, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', int), ('B', int)]) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + # + data = TextIO('A,B\n0,1\n2,N/A') + test = np.recfromtxt(data, dtype=None, usemask=True, **kwargs) + control = ma.array([(0, 1), (2, -1)], + mask=[(False, False), (False, True)], + dtype=[('A', int), ('B', int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + assert_equal(test.A, [0, 2]) + + def test_recfromcsv(self): + # + data = TextIO('A,B\n0,1\n2,3') + kwargs = dict(missing_values="N/A", names=True, case_sensitive=True) + test = np.recfromcsv(data, dtype=None, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', int), ('B', int)]) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + # + data = TextIO('A,B\n0,1\n2,N/A') + test = np.recfromcsv(data, dtype=None, usemask=True, **kwargs) + control = ma.array([(0, 1), (2, -1)], + mask=[(False, False), (False, True)], + dtype=[('A', int), ('B', int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + assert_equal(test.A, [0, 2]) + # + data = TextIO('A,B\n0,1\n2,3') + test = np.recfromcsv(data, missing_values='N/A',) + control = np.array([(0, 1), (2, 3)], + dtype=[('a', int), ('b', int)]) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + # + data = TextIO('A,B\n0,1\n2,3') + dtype = [('a', int), ('b', float)] + test = np.recfromcsv(data, missing_values='N/A', dtype=dtype) + control = np.array([(0, 1), (2, 3)], + dtype=dtype) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + + #gh-10394 + data = TextIO('color\n"red"\n"blue"') + test = np.recfromcsv(data, converters={0: lambda x: x.strip(b'\"')}) + control = np.array([('red',), ('blue',)], dtype=[('color', (bytes, 4))]) + assert_equal(test.dtype, control.dtype) + assert_equal(test, control) + + def test_max_rows(self): + # Test the `max_rows` keyword argument. + data = '1 2\n3 4\n5 6\n7 8\n9 10\n' + txt = TextIO(data) + a1 = np.genfromtxt(txt, max_rows=3) + a2 = np.genfromtxt(txt) + assert_equal(a1, [[1, 2], [3, 4], [5, 6]]) + assert_equal(a2, [[7, 8], [9, 10]]) + + # max_rows must be at least 1. + assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=0) + + # An input with several invalid rows. + data = '1 1\n2 2\n0 \n3 3\n4 4\n5 \n6 \n7 \n' + + test = np.genfromtxt(TextIO(data), max_rows=2) + control = np.array([[1., 1.], [2., 2.]]) + assert_equal(test, control) + + # Test keywords conflict + assert_raises(ValueError, np.genfromtxt, TextIO(data), skip_footer=1, + max_rows=4) + + # Test with invalid value + assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=4) + + # Test with invalid not raise + with suppress_warnings() as sup: + sup.filter(ConversionWarning) + + test = np.genfromtxt(TextIO(data), max_rows=4, invalid_raise=False) + control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]) + assert_equal(test, control) + + test = np.genfromtxt(TextIO(data), max_rows=5, invalid_raise=False) + control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]) + assert_equal(test, control) + + # Structured array with field names. + data = 'a b\n#c d\n1 1\n2 2\n#0 \n3 3\n4 4\n5 5\n' + + # Test with header, names and comments + txt = TextIO(data) + test = np.genfromtxt(txt, skip_header=1, max_rows=3, names=True) + control = np.array([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)], + dtype=[('c', '<f8'), ('d', '<f8')]) + assert_equal(test, control) + # To continue reading the same "file", don't use skip_header or + # names, and use the previously determined dtype. + test = np.genfromtxt(txt, max_rows=None, dtype=test.dtype) + control = np.array([(4.0, 4.0), (5.0, 5.0)], + dtype=[('c', '<f8'), ('d', '<f8')]) + assert_equal(test, control) + + def test_gft_using_filename(self): + # Test that we can load data from a filename as well as a file + # object + tgt = np.arange(6).reshape((2, 3)) + linesep = ('\n', '\r\n', '\r') + + for sep in linesep: + data = '0 1 2' + sep + '3 4 5' + with temppath() as name: + with open(name, 'w') as f: + f.write(data) + res = np.genfromtxt(name) + assert_array_equal(res, tgt) + + def test_gft_from_gzip(self): + # Test that we can load data from a gzipped file + wanted = np.arange(6).reshape((2, 3)) + linesep = ('\n', '\r\n', '\r') + + for sep in linesep: + data = '0 1 2' + sep + '3 4 5' + s = BytesIO() + with gzip.GzipFile(fileobj=s, mode='w') as g: + g.write(asbytes(data)) + + with temppath(suffix='.gz2') as name: + with open(name, 'w') as f: + f.write(data) + assert_array_equal(np.genfromtxt(name), wanted) + + def test_gft_using_generator(self): + # gft doesn't work with unicode. + def count(): + for i in range(10): + yield asbytes("%d" % i) + + res = np.genfromtxt(count()) + assert_array_equal(res, np.arange(10)) + + def test_auto_dtype_largeint(self): + # Regression test for numpy/numpy#5635 whereby large integers could + # cause OverflowErrors. + + # Test the automatic definition of the output dtype + # + # 2**66 = 73786976294838206464 => should convert to float + # 2**34 = 17179869184 => should convert to int64 + # 2**10 = 1024 => should convert to int (int32 on 32-bit systems, + # int64 on 64-bit systems) + + data = TextIO('73786976294838206464 17179869184 1024') + + test = np.ndfromtxt(data, dtype=None) + + assert_equal(test.dtype.names, ['f0', 'f1', 'f2']) + + assert_(test.dtype['f0'] == float) + assert_(test.dtype['f1'] == np.int64) + assert_(test.dtype['f2'] == np.integer) + + assert_allclose(test['f0'], 73786976294838206464.) + assert_equal(test['f1'], 17179869184) + assert_equal(test['f2'], 1024) + + +@pytest.mark.skipif(Path is None, reason="No pathlib.Path") +class TestPathUsage(object): + # Test that pathlib.Path can be used + def test_loadtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + a = np.array([[1.1, 2], [3, 4]]) + np.savetxt(path, a) + x = np.loadtxt(path) + assert_array_equal(x, a) + + def test_save_load(self): + # Test that pathlib.Path instances can be used with save. + with temppath(suffix='.npy') as path: + path = Path(path) + a = np.array([[1, 2], [3, 4]], int) + np.save(path, a) + data = np.load(path) + assert_array_equal(data, a) + + def test_save_load_memmap(self): + # Test that pathlib.Path instances can be loaded mem-mapped. + with temppath(suffix='.npy') as path: + path = Path(path) + a = np.array([[1, 2], [3, 4]], int) + np.save(path, a) + data = np.load(path, mmap_mode='r') + assert_array_equal(data, a) + # close the mem-mapped file + del data + + def test_save_load_memmap_readwrite(self): + # Test that pathlib.Path instances can be written mem-mapped. + with temppath(suffix='.npy') as path: + path = Path(path) + a = np.array([[1, 2], [3, 4]], int) + np.save(path, a) + b = np.load(path, mmap_mode='r+') + a[0][0] = 5 + b[0][0] = 5 + del b # closes the file + data = np.load(path) + assert_array_equal(data, a) + + def test_savez_load(self): + # Test that pathlib.Path instances can be used with savez. + with temppath(suffix='.npz') as path: + path = Path(path) + np.savez(path, lab='place holder') + with np.load(path) as data: + assert_array_equal(data['lab'], 'place holder') + + def test_savez_compressed_load(self): + # Test that pathlib.Path instances can be used with savez. + with temppath(suffix='.npz') as path: + path = Path(path) + np.savez_compressed(path, lab='place holder') + data = np.load(path) + assert_array_equal(data['lab'], 'place holder') + data.close() + + def test_genfromtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + a = np.array([(1, 2), (3, 4)]) + np.savetxt(path, a) + data = np.genfromtxt(path) + assert_array_equal(a, data) + + def test_ndfromtxt(self): + # Test outputting a standard ndarray + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'1 2\n3 4') + + control = np.array([[1, 2], [3, 4]], dtype=int) + test = np.ndfromtxt(path, dtype=int) + assert_array_equal(test, control) + + def test_mafromtxt(self): + # From `test_fancy_dtype_alt` above + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'1,2,3.0\n4,5,6.0\n') + + test = np.mafromtxt(path, delimiter=',') + control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)]) + assert_equal(test, control) + + def test_recfromtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'A,B\n0,1\n2,3') + + kwargs = dict(delimiter=",", missing_values="N/A", names=True) + test = np.recfromtxt(path, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', int), ('B', int)]) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + + def test_recfromcsv(self): + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'A,B\n0,1\n2,3') + + kwargs = dict(missing_values="N/A", names=True, case_sensitive=True) + test = np.recfromcsv(path, dtype=None, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', int), ('B', int)]) + assert_(isinstance(test, np.recarray)) + assert_equal(test, control) + + +def test_gzip_load(): + a = np.random.random((5, 5)) + + s = BytesIO() + f = gzip.GzipFile(fileobj=s, mode="w") + + np.save(f, a) + f.close() + s.seek(0) + + f = gzip.GzipFile(fileobj=s, mode="r") + assert_array_equal(np.load(f), a) + + +def test_gzip_loadtxt(): + # Thanks to another windows brokenness, we can't use + # NamedTemporaryFile: a file created from this function cannot be + # reopened by another open call. So we first put the gzipped string + # of the test reference array, write it to a securely opened file, + # which is then read from by the loadtxt function + s = BytesIO() + g = gzip.GzipFile(fileobj=s, mode='w') + g.write(b'1 2 3\n') + g.close() + + s.seek(0) + with temppath(suffix='.gz') as name: + with open(name, 'wb') as f: + f.write(s.read()) + res = np.loadtxt(name) + s.close() + + assert_array_equal(res, [1, 2, 3]) + + +def test_gzip_loadtxt_from_string(): + s = BytesIO() + f = gzip.GzipFile(fileobj=s, mode="w") + f.write(b'1 2 3\n') + f.close() + s.seek(0) + + f = gzip.GzipFile(fileobj=s, mode="r") + assert_array_equal(np.loadtxt(f), [1, 2, 3]) + + +def test_npzfile_dict(): + s = BytesIO() + x = np.zeros((3, 3)) + y = np.zeros((3, 3)) + + np.savez(s, x=x, y=y) + s.seek(0) + + z = np.load(s) + + assert_('x' in z) + assert_('y' in z) + assert_('x' in z.keys()) + assert_('y' in z.keys()) + + for f, a in z.items(): + assert_(f in ['x', 'y']) + assert_equal(a.shape, (3, 3)) + + assert_(len(z.items()) == 2) + + for f in z: + assert_(f in ['x', 'y']) + + assert_('x' in z.keys()) + + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +def test_load_refcount(): + # Check that objects returned by np.load are directly freed based on + # their refcount, rather than needing the gc to collect them. + + f = BytesIO() + np.savez(f, [1, 2, 3]) + f.seek(0) + + with assert_no_gc_cycles(): + np.load(f) + + f.seek(0) + dt = [("a", 'u1', 2), ("b", 'u1', 2)] + with assert_no_gc_cycles(): + x = np.loadtxt(TextIO("0 1 2 3"), dtype=dt) + assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt)) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_mixins.py b/contrib/python/numpy/py2/numpy/lib/tests/test_mixins.py new file mode 100644 index 0000000000..3dd5346b69 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_mixins.py @@ -0,0 +1,224 @@ +from __future__ import division, absolute_import, print_function + +import numbers +import operator +import sys + +import numpy as np +from numpy.testing import assert_, assert_equal, assert_raises + + +PY2 = sys.version_info.major < 3 + + +# NOTE: This class should be kept as an exact copy of the example from the +# docstring for NDArrayOperatorsMixin. + +class ArrayLike(np.lib.mixins.NDArrayOperatorsMixin): + def __init__(self, value): + self.value = np.asarray(value) + + # One might also consider adding the built-in list type to this + # list, to support operations like np.add(array_like, list) + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + out = kwargs.get('out', ()) + for x in inputs + out: + # Only support operations with instances of _HANDLED_TYPES. + # Use ArrayLike instead of type(self) for isinstance to + # allow subclasses that don't override __array_ufunc__ to + # handle ArrayLike objects. + if not isinstance(x, self._HANDLED_TYPES + (ArrayLike,)): + return NotImplemented + + # Defer to the implementation of the ufunc on unwrapped values. + inputs = tuple(x.value if isinstance(x, ArrayLike) else x + for x in inputs) + if out: + kwargs['out'] = tuple( + x.value if isinstance(x, ArrayLike) else x + for x in out) + result = getattr(ufunc, method)(*inputs, **kwargs) + + if type(result) is tuple: + # multiple return values + return tuple(type(self)(x) for x in result) + elif method == 'at': + # no return value + return None + else: + # one return value + return type(self)(result) + + def __repr__(self): + return '%s(%r)' % (type(self).__name__, self.value) + + +def wrap_array_like(result): + if type(result) is tuple: + return tuple(ArrayLike(r) for r in result) + else: + return ArrayLike(result) + + +def _assert_equal_type_and_value(result, expected, err_msg=None): + assert_equal(type(result), type(expected), err_msg=err_msg) + if isinstance(result, tuple): + assert_equal(len(result), len(expected), err_msg=err_msg) + for result_item, expected_item in zip(result, expected): + _assert_equal_type_and_value(result_item, expected_item, err_msg) + else: + assert_equal(result.value, expected.value, err_msg=err_msg) + assert_equal(getattr(result.value, 'dtype', None), + getattr(expected.value, 'dtype', None), err_msg=err_msg) + + +_ALL_BINARY_OPERATORS = [ + operator.lt, + operator.le, + operator.eq, + operator.ne, + operator.gt, + operator.ge, + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.floordiv, + # TODO: test div on Python 2, only + operator.mod, + divmod, + pow, + operator.lshift, + operator.rshift, + operator.and_, + operator.xor, + operator.or_, +] + + +class TestNDArrayOperatorsMixin(object): + + def test_array_like_add(self): + + def check(result): + _assert_equal_type_and_value(result, ArrayLike(0)) + + check(ArrayLike(0) + 0) + check(0 + ArrayLike(0)) + + check(ArrayLike(0) + np.array(0)) + check(np.array(0) + ArrayLike(0)) + + check(ArrayLike(np.array(0)) + 0) + check(0 + ArrayLike(np.array(0))) + + check(ArrayLike(np.array(0)) + np.array(0)) + check(np.array(0) + ArrayLike(np.array(0))) + + def test_inplace(self): + array_like = ArrayLike(np.array([0])) + array_like += 1 + _assert_equal_type_and_value(array_like, ArrayLike(np.array([1]))) + + array = np.array([0]) + array += ArrayLike(1) + _assert_equal_type_and_value(array, ArrayLike(np.array([1]))) + + def test_opt_out(self): + + class OptOut(object): + """Object that opts out of __array_ufunc__.""" + __array_ufunc__ = None + + def __add__(self, other): + return self + + def __radd__(self, other): + return self + + array_like = ArrayLike(1) + opt_out = OptOut() + + # supported operations + assert_(array_like + opt_out is opt_out) + assert_(opt_out + array_like is opt_out) + + # not supported + with assert_raises(TypeError): + # don't use the Python default, array_like = array_like + opt_out + array_like += opt_out + with assert_raises(TypeError): + array_like - opt_out + with assert_raises(TypeError): + opt_out - array_like + + def test_subclass(self): + + class SubArrayLike(ArrayLike): + """Should take precedence over ArrayLike.""" + + x = ArrayLike(0) + y = SubArrayLike(1) + _assert_equal_type_and_value(x + y, y) + _assert_equal_type_and_value(y + x, y) + + def test_object(self): + x = ArrayLike(0) + obj = object() + with assert_raises(TypeError): + x + obj + with assert_raises(TypeError): + obj + x + with assert_raises(TypeError): + x += obj + + def test_unary_methods(self): + array = np.array([-1, 0, 1, 2]) + array_like = ArrayLike(array) + for op in [operator.neg, + operator.pos, + abs, + operator.invert]: + _assert_equal_type_and_value(op(array_like), ArrayLike(op(array))) + + def test_forward_binary_methods(self): + array = np.array([-1, 0, 1, 2]) + array_like = ArrayLike(array) + for op in _ALL_BINARY_OPERATORS: + expected = wrap_array_like(op(array, 1)) + actual = op(array_like, 1) + err_msg = 'failed for operator {}'.format(op) + _assert_equal_type_and_value(expected, actual, err_msg=err_msg) + + def test_reflected_binary_methods(self): + for op in _ALL_BINARY_OPERATORS: + expected = wrap_array_like(op(2, 1)) + actual = op(2, ArrayLike(1)) + err_msg = 'failed for operator {}'.format(op) + _assert_equal_type_and_value(expected, actual, err_msg=err_msg) + + def test_matmul(self): + array = np.array([1, 2], dtype=np.float64) + array_like = ArrayLike(array) + expected = ArrayLike(np.float64(5)) + _assert_equal_type_and_value(expected, np.matmul(array_like, array)) + if not PY2: + _assert_equal_type_and_value( + expected, operator.matmul(array_like, array)) + _assert_equal_type_and_value( + expected, operator.matmul(array, array_like)) + + def test_ufunc_at(self): + array = ArrayLike(np.array([1, 2, 3, 4])) + assert_(np.negative.at(array, np.array([0, 1])) is None) + _assert_equal_type_and_value(array, ArrayLike([-1, -2, 3, 4])) + + def test_ufunc_two_outputs(self): + mantissa, exponent = np.frexp(2 ** -3) + expected = (ArrayLike(mantissa), ArrayLike(exponent)) + _assert_equal_type_and_value( + np.frexp(ArrayLike(2 ** -3)), expected) + _assert_equal_type_and_value( + np.frexp(ArrayLike(np.array(2 ** -3))), expected) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_nanfunctions.py b/contrib/python/numpy/py2/numpy/lib/tests/test_nanfunctions.py new file mode 100644 index 0000000000..504372faf5 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_nanfunctions.py @@ -0,0 +1,927 @@ +from __future__ import division, absolute_import, print_function + +import warnings + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_almost_equal, assert_no_warnings, + assert_raises, assert_array_equal, suppress_warnings + ) + + +# Test data +_ndat = np.array([[0.6244, np.nan, 0.2692, 0.0116, np.nan, 0.1170], + [0.5351, -0.9403, np.nan, 0.2100, 0.4759, 0.2833], + [np.nan, np.nan, np.nan, 0.1042, np.nan, -0.5954], + [0.1610, np.nan, np.nan, 0.1859, 0.3146, np.nan]]) + + +# Rows of _ndat with nans removed +_rdat = [np.array([0.6244, 0.2692, 0.0116, 0.1170]), + np.array([0.5351, -0.9403, 0.2100, 0.4759, 0.2833]), + np.array([0.1042, -0.5954]), + np.array([0.1610, 0.1859, 0.3146])] + +# Rows of _ndat with nans converted to ones +_ndat_ones = np.array([[0.6244, 1.0, 0.2692, 0.0116, 1.0, 0.1170], + [0.5351, -0.9403, 1.0, 0.2100, 0.4759, 0.2833], + [1.0, 1.0, 1.0, 0.1042, 1.0, -0.5954], + [0.1610, 1.0, 1.0, 0.1859, 0.3146, 1.0]]) + +# Rows of _ndat with nans converted to zeros +_ndat_zeros = np.array([[0.6244, 0.0, 0.2692, 0.0116, 0.0, 0.1170], + [0.5351, -0.9403, 0.0, 0.2100, 0.4759, 0.2833], + [0.0, 0.0, 0.0, 0.1042, 0.0, -0.5954], + [0.1610, 0.0, 0.0, 0.1859, 0.3146, 0.0]]) + + +class TestNanFunctions_MinMax(object): + + nanfuncs = [np.nanmin, np.nanmax] + stdfuncs = [np.min, np.max] + + def test_mutation(self): + # Check that passed array is not modified. + ndat = _ndat.copy() + for f in self.nanfuncs: + f(ndat) + assert_equal(ndat, _ndat) + + def test_keepdims(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for axis in [None, 0, 1]: + tgt = rf(mat, axis=axis, keepdims=True) + res = nf(mat, axis=axis, keepdims=True) + assert_(res.ndim == tgt.ndim) + + def test_out(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + resout = np.zeros(3) + tgt = rf(mat, axis=1) + res = nf(mat, axis=1, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + def test_dtype_from_input(self): + codes = 'efdgFDG' + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for c in codes: + mat = np.eye(3, dtype=c) + tgt = rf(mat, axis=1).dtype.type + res = nf(mat, axis=1).dtype.type + assert_(res is tgt) + # scalar case + tgt = rf(mat, axis=None).dtype.type + res = nf(mat, axis=None).dtype.type + assert_(res is tgt) + + def test_result_values(self): + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + tgt = [rf(d) for d in _rdat] + res = nf(_ndat, axis=1) + assert_almost_equal(res, tgt) + + def test_allnans(self): + mat = np.array([np.nan]*9).reshape(3, 3) + for f in self.nanfuncs: + for axis in [None, 0, 1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(f(mat, axis=axis)).all()) + assert_(len(w) == 1, 'no warning raised') + assert_(issubclass(w[0].category, RuntimeWarning)) + # Check scalars + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(f(np.nan))) + assert_(len(w) == 1, 'no warning raised') + assert_(issubclass(w[0].category, RuntimeWarning)) + + def test_masked(self): + mat = np.ma.fix_invalid(_ndat) + msk = mat._mask.copy() + for f in [np.nanmin]: + res = f(mat, axis=1) + tgt = f(_ndat, axis=1) + assert_equal(res, tgt) + assert_equal(mat._mask, msk) + assert_(not np.isinf(mat).any()) + + def test_scalar(self): + for f in self.nanfuncs: + assert_(f(0.) == 0.) + + def test_subclass(self): + class MyNDArray(np.ndarray): + pass + + # Check that it works and that type and + # shape are preserved + mine = np.eye(3).view(MyNDArray) + for f in self.nanfuncs: + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine) + assert_(res.shape == ()) + + # check that rows of nan are dealt with for subclasses (#4628) + mine[1] = np.nan + for f in self.nanfuncs: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) + assert_(not np.any(np.isnan(res))) + assert_(len(w) == 0) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(np.isnan(res[1]) and not np.isnan(res[0]) + and not np.isnan(res[2])) + assert_(len(w) == 1, 'no warning raised') + assert_(issubclass(w[0].category, RuntimeWarning)) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mine) + assert_(res.shape == ()) + assert_(res != np.nan) + assert_(len(w) == 0) + + def test_object_array(self): + arr = np.array([[1.0, 2.0], [np.nan, 4.0], [np.nan, np.nan]], dtype=object) + assert_equal(np.nanmin(arr), 1.0) + assert_equal(np.nanmin(arr, axis=0), [1.0, 2.0]) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + # assert_equal does not work on object arrays of nan + assert_equal(list(np.nanmin(arr, axis=1)), [1.0, 4.0, np.nan]) + assert_(len(w) == 1, 'no warning raised') + assert_(issubclass(w[0].category, RuntimeWarning)) + + +class TestNanFunctions_ArgminArgmax(object): + + nanfuncs = [np.nanargmin, np.nanargmax] + + def test_mutation(self): + # Check that passed array is not modified. + ndat = _ndat.copy() + for f in self.nanfuncs: + f(ndat) + assert_equal(ndat, _ndat) + + def test_result_values(self): + for f, fcmp in zip(self.nanfuncs, [np.greater, np.less]): + for row in _ndat: + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in") + ind = f(row) + val = row[ind] + # comparing with NaN is tricky as the result + # is always false except for NaN != NaN + assert_(not np.isnan(val)) + assert_(not fcmp(val, row).any()) + assert_(not np.equal(val, row[:ind]).any()) + + def test_allnans(self): + mat = np.array([np.nan]*9).reshape(3, 3) + for f in self.nanfuncs: + for axis in [None, 0, 1]: + assert_raises(ValueError, f, mat, axis=axis) + assert_raises(ValueError, f, np.nan) + + def test_empty(self): + mat = np.zeros((0, 3)) + for f in self.nanfuncs: + for axis in [0, None]: + assert_raises(ValueError, f, mat, axis=axis) + for axis in [1]: + res = f(mat, axis=axis) + assert_equal(res, np.zeros(0)) + + def test_scalar(self): + for f in self.nanfuncs: + assert_(f(0.) == 0.) + + def test_subclass(self): + class MyNDArray(np.ndarray): + pass + + # Check that it works and that type and + # shape are preserved + mine = np.eye(3).view(MyNDArray) + for f in self.nanfuncs: + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == (3,)) + res = f(mine) + assert_(res.shape == ()) + + +class TestNanFunctions_IntTypes(object): + + int_types = (np.int8, np.int16, np.int32, np.int64, np.uint8, + np.uint16, np.uint32, np.uint64) + + mat = np.array([127, 39, 93, 87, 46]) + + def integer_arrays(self): + for dtype in self.int_types: + yield self.mat.astype(dtype) + + def test_nanmin(self): + tgt = np.min(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nanmin(mat), tgt) + + def test_nanmax(self): + tgt = np.max(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nanmax(mat), tgt) + + def test_nanargmin(self): + tgt = np.argmin(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nanargmin(mat), tgt) + + def test_nanargmax(self): + tgt = np.argmax(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nanargmax(mat), tgt) + + def test_nansum(self): + tgt = np.sum(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nansum(mat), tgt) + + def test_nanprod(self): + tgt = np.prod(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nanprod(mat), tgt) + + def test_nancumsum(self): + tgt = np.cumsum(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nancumsum(mat), tgt) + + def test_nancumprod(self): + tgt = np.cumprod(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nancumprod(mat), tgt) + + def test_nanmean(self): + tgt = np.mean(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nanmean(mat), tgt) + + def test_nanvar(self): + tgt = np.var(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nanvar(mat), tgt) + + tgt = np.var(mat, ddof=1) + for mat in self.integer_arrays(): + assert_equal(np.nanvar(mat, ddof=1), tgt) + + def test_nanstd(self): + tgt = np.std(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nanstd(mat), tgt) + + tgt = np.std(self.mat, ddof=1) + for mat in self.integer_arrays(): + assert_equal(np.nanstd(mat, ddof=1), tgt) + + +class SharedNanFunctionsTestsMixin(object): + def test_mutation(self): + # Check that passed array is not modified. + ndat = _ndat.copy() + for f in self.nanfuncs: + f(ndat) + assert_equal(ndat, _ndat) + + def test_keepdims(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for axis in [None, 0, 1]: + tgt = rf(mat, axis=axis, keepdims=True) + res = nf(mat, axis=axis, keepdims=True) + assert_(res.ndim == tgt.ndim) + + def test_out(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + resout = np.zeros(3) + tgt = rf(mat, axis=1) + res = nf(mat, axis=1, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + def test_dtype_from_dtype(self): + mat = np.eye(3) + codes = 'efdgFDG' + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for c in codes: + with suppress_warnings() as sup: + if nf in {np.nanstd, np.nanvar} and c in 'FDG': + # Giving the warning is a small bug, see gh-8000 + sup.filter(np.ComplexWarning) + tgt = rf(mat, dtype=np.dtype(c), axis=1).dtype.type + res = nf(mat, dtype=np.dtype(c), axis=1).dtype.type + assert_(res is tgt) + # scalar case + tgt = rf(mat, dtype=np.dtype(c), axis=None).dtype.type + res = nf(mat, dtype=np.dtype(c), axis=None).dtype.type + assert_(res is tgt) + + def test_dtype_from_char(self): + mat = np.eye(3) + codes = 'efdgFDG' + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for c in codes: + with suppress_warnings() as sup: + if nf in {np.nanstd, np.nanvar} and c in 'FDG': + # Giving the warning is a small bug, see gh-8000 + sup.filter(np.ComplexWarning) + tgt = rf(mat, dtype=c, axis=1).dtype.type + res = nf(mat, dtype=c, axis=1).dtype.type + assert_(res is tgt) + # scalar case + tgt = rf(mat, dtype=c, axis=None).dtype.type + res = nf(mat, dtype=c, axis=None).dtype.type + assert_(res is tgt) + + def test_dtype_from_input(self): + codes = 'efdgFDG' + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for c in codes: + mat = np.eye(3, dtype=c) + tgt = rf(mat, axis=1).dtype.type + res = nf(mat, axis=1).dtype.type + assert_(res is tgt, "res %s, tgt %s" % (res, tgt)) + # scalar case + tgt = rf(mat, axis=None).dtype.type + res = nf(mat, axis=None).dtype.type + assert_(res is tgt) + + def test_result_values(self): + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + tgt = [rf(d) for d in _rdat] + res = nf(_ndat, axis=1) + assert_almost_equal(res, tgt) + + def test_scalar(self): + for f in self.nanfuncs: + assert_(f(0.) == 0.) + + def test_subclass(self): + class MyNDArray(np.ndarray): + pass + + # Check that it works and that type and + # shape are preserved + array = np.eye(3) + mine = array.view(MyNDArray) + for f in self.nanfuncs: + expected_shape = f(array, axis=0).shape + res = f(mine, axis=0) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == expected_shape) + expected_shape = f(array, axis=1).shape + res = f(mine, axis=1) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == expected_shape) + expected_shape = f(array).shape + res = f(mine) + assert_(isinstance(res, MyNDArray)) + assert_(res.shape == expected_shape) + + +class TestNanFunctions_SumProd(SharedNanFunctionsTestsMixin): + + nanfuncs = [np.nansum, np.nanprod] + stdfuncs = [np.sum, np.prod] + + def test_allnans(self): + # Check for FutureWarning + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = np.nansum([np.nan]*3, axis=None) + assert_(res == 0, 'result is not 0') + assert_(len(w) == 0, 'warning raised') + # Check scalar + res = np.nansum(np.nan) + assert_(res == 0, 'result is not 0') + assert_(len(w) == 0, 'warning raised') + # Check there is no warning for not all-nan + np.nansum([0]*3, axis=None) + assert_(len(w) == 0, 'unwanted warning raised') + + def test_empty(self): + for f, tgt_value in zip([np.nansum, np.nanprod], [0, 1]): + mat = np.zeros((0, 3)) + tgt = [tgt_value]*3 + res = f(mat, axis=0) + assert_equal(res, tgt) + tgt = [] + res = f(mat, axis=1) + assert_equal(res, tgt) + tgt = tgt_value + res = f(mat, axis=None) + assert_equal(res, tgt) + + +class TestNanFunctions_CumSumProd(SharedNanFunctionsTestsMixin): + + nanfuncs = [np.nancumsum, np.nancumprod] + stdfuncs = [np.cumsum, np.cumprod] + + def test_allnans(self): + for f, tgt_value in zip(self.nanfuncs, [0, 1]): + # Unlike other nan-functions, sum/prod/cumsum/cumprod don't warn on all nan input + with assert_no_warnings(): + res = f([np.nan]*3, axis=None) + tgt = tgt_value*np.ones((3)) + assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((3))' % (tgt_value)) + # Check scalar + res = f(np.nan) + tgt = tgt_value*np.ones((1)) + assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((1))' % (tgt_value)) + # Check there is no warning for not all-nan + f([0]*3, axis=None) + + def test_empty(self): + for f, tgt_value in zip(self.nanfuncs, [0, 1]): + mat = np.zeros((0, 3)) + tgt = tgt_value*np.ones((0, 3)) + res = f(mat, axis=0) + assert_equal(res, tgt) + tgt = mat + res = f(mat, axis=1) + assert_equal(res, tgt) + tgt = np.zeros((0)) + res = f(mat, axis=None) + assert_equal(res, tgt) + + def test_keepdims(self): + for f, g in zip(self.nanfuncs, self.stdfuncs): + mat = np.eye(3) + for axis in [None, 0, 1]: + tgt = f(mat, axis=axis, out=None) + res = g(mat, axis=axis, out=None) + assert_(res.ndim == tgt.ndim) + + for f in self.nanfuncs: + d = np.ones((3, 5, 7, 11)) + # Randomly set some elements to NaN: + rs = np.random.RandomState(0) + d[rs.rand(*d.shape) < 0.5] = np.nan + res = f(d, axis=None) + assert_equal(res.shape, (1155,)) + for axis in np.arange(4): + res = f(d, axis=axis) + assert_equal(res.shape, (3, 5, 7, 11)) + + def test_result_values(self): + for axis in (-2, -1, 0, 1, None): + tgt = np.cumprod(_ndat_ones, axis=axis) + res = np.nancumprod(_ndat, axis=axis) + assert_almost_equal(res, tgt) + tgt = np.cumsum(_ndat_zeros,axis=axis) + res = np.nancumsum(_ndat, axis=axis) + assert_almost_equal(res, tgt) + + def test_out(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + resout = np.eye(3) + for axis in (-2, -1, 0, 1): + tgt = rf(mat, axis=axis) + res = nf(mat, axis=axis, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + +class TestNanFunctions_MeanVarStd(SharedNanFunctionsTestsMixin): + + nanfuncs = [np.nanmean, np.nanvar, np.nanstd] + stdfuncs = [np.mean, np.var, np.std] + + def test_dtype_error(self): + for f in self.nanfuncs: + for dtype in [np.bool_, np.int_, np.object_]: + assert_raises(TypeError, f, _ndat, axis=1, dtype=dtype) + + def test_out_dtype_error(self): + for f in self.nanfuncs: + for dtype in [np.bool_, np.int_, np.object_]: + out = np.empty(_ndat.shape[0], dtype=dtype) + assert_raises(TypeError, f, _ndat, axis=1, out=out) + + def test_ddof(self): + nanfuncs = [np.nanvar, np.nanstd] + stdfuncs = [np.var, np.std] + for nf, rf in zip(nanfuncs, stdfuncs): + for ddof in [0, 1]: + tgt = [rf(d, ddof=ddof) for d in _rdat] + res = nf(_ndat, axis=1, ddof=ddof) + assert_almost_equal(res, tgt) + + def test_ddof_too_big(self): + nanfuncs = [np.nanvar, np.nanstd] + stdfuncs = [np.var, np.std] + dsize = [len(d) for d in _rdat] + for nf, rf in zip(nanfuncs, stdfuncs): + for ddof in range(5): + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + sup.filter(np.ComplexWarning) + tgt = [ddof >= d for d in dsize] + res = nf(_ndat, axis=1, ddof=ddof) + assert_equal(np.isnan(res), tgt) + if any(tgt): + assert_(len(sup.log) == 1) + else: + assert_(len(sup.log) == 0) + + def test_allnans(self): + mat = np.array([np.nan]*9).reshape(3, 3) + for f in self.nanfuncs: + for axis in [None, 0, 1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(f(mat, axis=axis)).all()) + assert_(len(w) == 1) + assert_(issubclass(w[0].category, RuntimeWarning)) + # Check scalar + assert_(np.isnan(f(np.nan))) + assert_(len(w) == 2) + assert_(issubclass(w[0].category, RuntimeWarning)) + + def test_empty(self): + mat = np.zeros((0, 3)) + for f in self.nanfuncs: + for axis in [0, None]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(f(mat, axis=axis)).all()) + assert_(len(w) == 1) + assert_(issubclass(w[0].category, RuntimeWarning)) + for axis in [1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_equal(f(mat, axis=axis), np.zeros([])) + assert_(len(w) == 0) + + +class TestNanFunctions_Median(object): + + def test_mutation(self): + # Check that passed array is not modified. + ndat = _ndat.copy() + np.nanmedian(ndat) + assert_equal(ndat, _ndat) + + def test_keepdims(self): + mat = np.eye(3) + for axis in [None, 0, 1]: + tgt = np.median(mat, axis=axis, out=None, overwrite_input=False) + res = np.nanmedian(mat, axis=axis, out=None, overwrite_input=False) + assert_(res.ndim == tgt.ndim) + + d = np.ones((3, 5, 7, 11)) + # Randomly set some elements to NaN: + w = np.random.random((4, 200)) * np.array(d.shape)[:, None] + w = w.astype(np.intp) + d[tuple(w)] = np.nan + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) + res = np.nanmedian(d, axis=None, keepdims=True) + assert_equal(res.shape, (1, 1, 1, 1)) + res = np.nanmedian(d, axis=(0, 1), keepdims=True) + assert_equal(res.shape, (1, 1, 7, 11)) + res = np.nanmedian(d, axis=(0, 3), keepdims=True) + assert_equal(res.shape, (1, 5, 7, 1)) + res = np.nanmedian(d, axis=(1,), keepdims=True) + assert_equal(res.shape, (3, 1, 7, 11)) + res = np.nanmedian(d, axis=(0, 1, 2, 3), keepdims=True) + assert_equal(res.shape, (1, 1, 1, 1)) + res = np.nanmedian(d, axis=(0, 1, 3), keepdims=True) + assert_equal(res.shape, (1, 1, 7, 1)) + + def test_out(self): + mat = np.random.rand(3, 3) + nan_mat = np.insert(mat, [0, 2], np.nan, axis=1) + resout = np.zeros(3) + tgt = np.median(mat, axis=1) + res = np.nanmedian(nan_mat, axis=1, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + # 0-d output: + resout = np.zeros(()) + tgt = np.median(mat, axis=None) + res = np.nanmedian(nan_mat, axis=None, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + res = np.nanmedian(nan_mat, axis=(0, 1), out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + def test_small_large(self): + # test the small and large code paths, current cutoff 400 elements + for s in [5, 20, 51, 200, 1000]: + d = np.random.randn(4, s) + # Randomly set some elements to NaN: + w = np.random.randint(0, d.size, size=d.size // 5) + d.ravel()[w] = np.nan + d[:,0] = 1. # ensure at least one good value + # use normal median without nans to compare + tgt = [] + for x in d: + nonan = np.compress(~np.isnan(x), x) + tgt.append(np.median(nonan, overwrite_input=True)) + + assert_array_equal(np.nanmedian(d, axis=-1), tgt) + + def test_result_values(self): + tgt = [np.median(d) for d in _rdat] + res = np.nanmedian(_ndat, axis=1) + assert_almost_equal(res, tgt) + + def test_allnans(self): + mat = np.array([np.nan]*9).reshape(3, 3) + for axis in [None, 0, 1]: + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + + assert_(np.isnan(np.nanmedian(mat, axis=axis)).all()) + if axis is None: + assert_(len(sup.log) == 1) + else: + assert_(len(sup.log) == 3) + # Check scalar + assert_(np.isnan(np.nanmedian(np.nan))) + if axis is None: + assert_(len(sup.log) == 2) + else: + assert_(len(sup.log) == 4) + + def test_empty(self): + mat = np.zeros((0, 3)) + for axis in [0, None]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(np.nanmedian(mat, axis=axis)).all()) + assert_(len(w) == 1) + assert_(issubclass(w[0].category, RuntimeWarning)) + for axis in [1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_equal(np.nanmedian(mat, axis=axis), np.zeros([])) + assert_(len(w) == 0) + + def test_scalar(self): + assert_(np.nanmedian(0.) == 0.) + + def test_extended_axis_invalid(self): + d = np.ones((3, 5, 7, 11)) + assert_raises(np.AxisError, np.nanmedian, d, axis=-5) + assert_raises(np.AxisError, np.nanmedian, d, axis=(0, -5)) + assert_raises(np.AxisError, np.nanmedian, d, axis=4) + assert_raises(np.AxisError, np.nanmedian, d, axis=(0, 4)) + assert_raises(ValueError, np.nanmedian, d, axis=(1, 1)) + + def test_float_special(self): + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) + for inf in [np.inf, -np.inf]: + a = np.array([[inf, np.nan], [np.nan, np.nan]]) + assert_equal(np.nanmedian(a, axis=0), [inf, np.nan]) + assert_equal(np.nanmedian(a, axis=1), [inf, np.nan]) + assert_equal(np.nanmedian(a), inf) + + # minimum fill value check + a = np.array([[np.nan, np.nan, inf], + [np.nan, np.nan, inf]]) + assert_equal(np.nanmedian(a), inf) + assert_equal(np.nanmedian(a, axis=0), [np.nan, np.nan, inf]) + assert_equal(np.nanmedian(a, axis=1), inf) + + # no mask path + a = np.array([[inf, inf], [inf, inf]]) + assert_equal(np.nanmedian(a, axis=1), inf) + + a = np.array([[inf, 7, -inf, -9], + [-10, np.nan, np.nan, 5], + [4, np.nan, np.nan, inf]], + dtype=np.float32) + if inf > 0: + assert_equal(np.nanmedian(a, axis=0), [4., 7., -inf, 5.]) + assert_equal(np.nanmedian(a), 4.5) + else: + assert_equal(np.nanmedian(a, axis=0), [-10., 7., -inf, -9.]) + assert_equal(np.nanmedian(a), -2.5) + assert_equal(np.nanmedian(a, axis=-1), [-1., -2.5, inf]) + + for i in range(0, 10): + for j in range(1, 10): + a = np.array([([np.nan] * i) + ([inf] * j)] * 2) + assert_equal(np.nanmedian(a), inf) + assert_equal(np.nanmedian(a, axis=1), inf) + assert_equal(np.nanmedian(a, axis=0), + ([np.nan] * i) + [inf] * j) + + a = np.array([([np.nan] * i) + ([-inf] * j)] * 2) + assert_equal(np.nanmedian(a), -inf) + assert_equal(np.nanmedian(a, axis=1), -inf) + assert_equal(np.nanmedian(a, axis=0), + ([np.nan] * i) + [-inf] * j) + + +class TestNanFunctions_Percentile(object): + + def test_mutation(self): + # Check that passed array is not modified. + ndat = _ndat.copy() + np.nanpercentile(ndat, 30) + assert_equal(ndat, _ndat) + + def test_keepdims(self): + mat = np.eye(3) + for axis in [None, 0, 1]: + tgt = np.percentile(mat, 70, axis=axis, out=None, + overwrite_input=False) + res = np.nanpercentile(mat, 70, axis=axis, out=None, + overwrite_input=False) + assert_(res.ndim == tgt.ndim) + + d = np.ones((3, 5, 7, 11)) + # Randomly set some elements to NaN: + w = np.random.random((4, 200)) * np.array(d.shape)[:, None] + w = w.astype(np.intp) + d[tuple(w)] = np.nan + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) + res = np.nanpercentile(d, 90, axis=None, keepdims=True) + assert_equal(res.shape, (1, 1, 1, 1)) + res = np.nanpercentile(d, 90, axis=(0, 1), keepdims=True) + assert_equal(res.shape, (1, 1, 7, 11)) + res = np.nanpercentile(d, 90, axis=(0, 3), keepdims=True) + assert_equal(res.shape, (1, 5, 7, 1)) + res = np.nanpercentile(d, 90, axis=(1,), keepdims=True) + assert_equal(res.shape, (3, 1, 7, 11)) + res = np.nanpercentile(d, 90, axis=(0, 1, 2, 3), keepdims=True) + assert_equal(res.shape, (1, 1, 1, 1)) + res = np.nanpercentile(d, 90, axis=(0, 1, 3), keepdims=True) + assert_equal(res.shape, (1, 1, 7, 1)) + + def test_out(self): + mat = np.random.rand(3, 3) + nan_mat = np.insert(mat, [0, 2], np.nan, axis=1) + resout = np.zeros(3) + tgt = np.percentile(mat, 42, axis=1) + res = np.nanpercentile(nan_mat, 42, axis=1, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + # 0-d output: + resout = np.zeros(()) + tgt = np.percentile(mat, 42, axis=None) + res = np.nanpercentile(nan_mat, 42, axis=None, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + res = np.nanpercentile(nan_mat, 42, axis=(0, 1), out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + def test_result_values(self): + tgt = [np.percentile(d, 28) for d in _rdat] + res = np.nanpercentile(_ndat, 28, axis=1) + assert_almost_equal(res, tgt) + # Transpose the array to fit the output convention of numpy.percentile + tgt = np.transpose([np.percentile(d, (28, 98)) for d in _rdat]) + res = np.nanpercentile(_ndat, (28, 98), axis=1) + assert_almost_equal(res, tgt) + + def test_allnans(self): + mat = np.array([np.nan]*9).reshape(3, 3) + for axis in [None, 0, 1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(np.nanpercentile(mat, 60, axis=axis)).all()) + if axis is None: + assert_(len(w) == 1) + else: + assert_(len(w) == 3) + assert_(issubclass(w[0].category, RuntimeWarning)) + # Check scalar + assert_(np.isnan(np.nanpercentile(np.nan, 60))) + if axis is None: + assert_(len(w) == 2) + else: + assert_(len(w) == 4) + assert_(issubclass(w[0].category, RuntimeWarning)) + + def test_empty(self): + mat = np.zeros((0, 3)) + for axis in [0, None]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(np.nanpercentile(mat, 40, axis=axis)).all()) + assert_(len(w) == 1) + assert_(issubclass(w[0].category, RuntimeWarning)) + for axis in [1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_equal(np.nanpercentile(mat, 40, axis=axis), np.zeros([])) + assert_(len(w) == 0) + + def test_scalar(self): + assert_equal(np.nanpercentile(0., 100), 0.) + a = np.arange(6) + r = np.nanpercentile(a, 50, axis=0) + assert_equal(r, 2.5) + assert_(np.isscalar(r)) + + def test_extended_axis_invalid(self): + d = np.ones((3, 5, 7, 11)) + assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=-5) + assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=(0, -5)) + assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=4) + assert_raises(np.AxisError, np.nanpercentile, d, q=5, axis=(0, 4)) + assert_raises(ValueError, np.nanpercentile, d, q=5, axis=(1, 1)) + + def test_multiple_percentiles(self): + perc = [50, 100] + mat = np.ones((4, 3)) + nan_mat = np.nan * mat + # For checking consistency in higher dimensional case + large_mat = np.ones((3, 4, 5)) + large_mat[:, 0:2:4, :] = 0 + large_mat[:, :, 3:] *= 2 + for axis in [None, 0, 1]: + for keepdim in [False, True]: + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "All-NaN slice encountered") + val = np.percentile(mat, perc, axis=axis, keepdims=keepdim) + nan_val = np.nanpercentile(nan_mat, perc, axis=axis, + keepdims=keepdim) + assert_equal(nan_val.shape, val.shape) + + val = np.percentile(large_mat, perc, axis=axis, + keepdims=keepdim) + nan_val = np.nanpercentile(large_mat, perc, axis=axis, + keepdims=keepdim) + assert_equal(nan_val, val) + + megamat = np.ones((3, 4, 5, 6)) + assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6)) + + +class TestNanFunctions_Quantile(object): + # most of this is already tested by TestPercentile + + def test_regression(self): + ar = np.arange(24).reshape(2, 3, 4).astype(float) + ar[0][1] = np.nan + + assert_equal(np.nanquantile(ar, q=0.5), np.nanpercentile(ar, q=50)) + assert_equal(np.nanquantile(ar, q=0.5, axis=0), + np.nanpercentile(ar, q=50, axis=0)) + assert_equal(np.nanquantile(ar, q=0.5, axis=1), + np.nanpercentile(ar, q=50, axis=1)) + assert_equal(np.nanquantile(ar, q=[0.5], axis=1), + np.nanpercentile(ar, q=[50], axis=1)) + assert_equal(np.nanquantile(ar, q=[0.25, 0.5, 0.75], axis=1), + np.nanpercentile(ar, q=[25, 50, 75], axis=1)) + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.nanquantile(x, 0), 0.) + assert_equal(np.nanquantile(x, 1), 3.5) + assert_equal(np.nanquantile(x, 0.5), 1.75) + + def test_no_p_overwrite(self): + # this is worth retesting, because quantile does not make a copy + p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) + p = p0.copy() + np.nanquantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + p0 = p0.tolist() + p = p.tolist() + np.nanquantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_packbits.py b/contrib/python/numpy/py2/numpy/lib/tests/test_packbits.py new file mode 100644 index 0000000000..fde5c37f2e --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_packbits.py @@ -0,0 +1,268 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import assert_array_equal, assert_equal, assert_raises + + +def test_packbits(): + # Copied from the docstring. + a = [[[1, 0, 1], [0, 1, 0]], + [[1, 1, 0], [0, 0, 1]]] + for dt in '?bBhHiIlLqQ': + arr = np.array(a, dtype=dt) + b = np.packbits(arr, axis=-1) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, np.array([[[160], [64]], [[192], [32]]])) + + assert_raises(TypeError, np.packbits, np.array(a, dtype=float)) + + +def test_packbits_empty(): + shapes = [ + (0,), (10, 20, 0), (10, 0, 20), (0, 10, 20), (20, 0, 0), (0, 20, 0), + (0, 0, 20), (0, 0, 0), + ] + for dt in '?bBhHiIlLqQ': + for shape in shapes: + a = np.empty(shape, dtype=dt) + b = np.packbits(a) + assert_equal(b.dtype, np.uint8) + assert_equal(b.shape, (0,)) + + +def test_packbits_empty_with_axis(): + # Original shapes and lists of packed shapes for different axes. + shapes = [ + ((0,), [(0,)]), + ((10, 20, 0), [(2, 20, 0), (10, 3, 0), (10, 20, 0)]), + ((10, 0, 20), [(2, 0, 20), (10, 0, 20), (10, 0, 3)]), + ((0, 10, 20), [(0, 10, 20), (0, 2, 20), (0, 10, 3)]), + ((20, 0, 0), [(3, 0, 0), (20, 0, 0), (20, 0, 0)]), + ((0, 20, 0), [(0, 20, 0), (0, 3, 0), (0, 20, 0)]), + ((0, 0, 20), [(0, 0, 20), (0, 0, 20), (0, 0, 3)]), + ((0, 0, 0), [(0, 0, 0), (0, 0, 0), (0, 0, 0)]), + ] + for dt in '?bBhHiIlLqQ': + for in_shape, out_shapes in shapes: + for ax, out_shape in enumerate(out_shapes): + a = np.empty(in_shape, dtype=dt) + b = np.packbits(a, axis=ax) + assert_equal(b.dtype, np.uint8) + assert_equal(b.shape, out_shape) + + +def test_packbits_large(): + # test data large enough for 16 byte vectorization + a = np.array([1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, + 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, + 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, + 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, + 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, + 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, + 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, + 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, + 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, + 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, + 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, + 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, + 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0]) + a = a.repeat(3) + for dtype in '?bBhHiIlLqQ': + arr = np.array(a, dtype=dtype) + b = np.packbits(arr, axis=None) + assert_equal(b.dtype, np.uint8) + r = [252, 127, 192, 3, 254, 7, 252, 0, 7, 31, 240, 0, 28, 1, 255, 252, + 113, 248, 3, 255, 192, 28, 15, 192, 28, 126, 0, 224, 127, 255, + 227, 142, 7, 31, 142, 63, 28, 126, 56, 227, 240, 0, 227, 128, 63, + 224, 14, 56, 252, 112, 56, 255, 241, 248, 3, 240, 56, 224, 112, + 63, 255, 255, 199, 224, 14, 0, 31, 143, 192, 3, 255, 199, 0, 1, + 255, 224, 1, 255, 252, 126, 63, 0, 1, 192, 252, 14, 63, 0, 15, + 199, 252, 113, 255, 3, 128, 56, 252, 14, 7, 0, 113, 255, 255, 142, 56, 227, + 129, 248, 227, 129, 199, 31, 128] + assert_array_equal(b, r) + # equal for size being multiple of 8 + assert_array_equal(np.unpackbits(b)[:-4], a) + + # check last byte of different remainders (16 byte vectorization) + b = [np.packbits(arr[:-i], axis=None)[-1] for i in range(1, 16)] + assert_array_equal(b, [128, 128, 128, 31, 30, 28, 24, 16, 0, 0, 0, 199, + 198, 196, 192]) + + + arr = arr.reshape(36, 25) + b = np.packbits(arr, axis=0) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, [[190, 186, 178, 178, 150, 215, 87, 83, 83, 195, + 199, 206, 204, 204, 140, 140, 136, 136, 8, 40, 105, + 107, 75, 74, 88], + [72, 216, 248, 241, 227, 195, 202, 90, 90, 83, + 83, 119, 127, 109, 73, 64, 208, 244, 189, 45, + 41, 104, 122, 90, 18], + [113, 120, 248, 216, 152, 24, 60, 52, 182, 150, + 150, 150, 146, 210, 210, 246, 255, 255, 223, + 151, 21, 17, 17, 131, 163], + [214, 210, 210, 64, 68, 5, 5, 1, 72, 88, 92, + 92, 78, 110, 39, 181, 149, 220, 222, 218, 218, + 202, 234, 170, 168], + [0, 128, 128, 192, 80, 112, 48, 160, 160, 224, + 240, 208, 144, 128, 160, 224, 240, 208, 144, + 144, 176, 240, 224, 192, 128]]) + + b = np.packbits(arr, axis=1) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, [[252, 127, 192, 0], + [ 7, 252, 15, 128], + [240, 0, 28, 0], + [255, 128, 0, 128], + [192, 31, 255, 128], + [142, 63, 0, 0], + [255, 240, 7, 0], + [ 7, 224, 14, 0], + [126, 0, 224, 0], + [255, 255, 199, 0], + [ 56, 28, 126, 0], + [113, 248, 227, 128], + [227, 142, 63, 0], + [ 0, 28, 112, 0], + [ 15, 248, 3, 128], + [ 28, 126, 56, 0], + [ 56, 255, 241, 128], + [240, 7, 224, 0], + [227, 129, 192, 128], + [255, 255, 254, 0], + [126, 0, 224, 0], + [ 3, 241, 248, 0], + [ 0, 255, 241, 128], + [128, 0, 255, 128], + [224, 1, 255, 128], + [248, 252, 126, 0], + [ 0, 7, 3, 128], + [224, 113, 248, 0], + [ 0, 252, 127, 128], + [142, 63, 224, 0], + [224, 14, 63, 0], + [ 7, 3, 128, 0], + [113, 255, 255, 128], + [ 28, 113, 199, 0], + [ 7, 227, 142, 0], + [ 14, 56, 252, 0]]) + + arr = arr.T.copy() + b = np.packbits(arr, axis=0) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, [[252, 7, 240, 255, 192, 142, 255, 7, 126, 255, + 56, 113, 227, 0, 15, 28, 56, 240, 227, 255, + 126, 3, 0, 128, 224, 248, 0, 224, 0, 142, 224, + 7, 113, 28, 7, 14], + [127, 252, 0, 128, 31, 63, 240, 224, 0, 255, + 28, 248, 142, 28, 248, 126, 255, 7, 129, 255, + 0, 241, 255, 0, 1, 252, 7, 113, 252, 63, 14, + 3, 255, 113, 227, 56], + [192, 15, 28, 0, 255, 0, 7, 14, 224, 199, 126, + 227, 63, 112, 3, 56, 241, 224, 192, 254, 224, + 248, 241, 255, 255, 126, 3, 248, 127, 224, 63, + 128, 255, 199, 142, 252], + [0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 128, 0, + 0, 128, 0, 128, 0, 128, 0, 0, 0, 128, 128, + 128, 0, 128, 0, 128, 0, 0, 0, 128, 0, 0, 0]]) + + b = np.packbits(arr, axis=1) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, [[190, 72, 113, 214, 0], + [186, 216, 120, 210, 128], + [178, 248, 248, 210, 128], + [178, 241, 216, 64, 192], + [150, 227, 152, 68, 80], + [215, 195, 24, 5, 112], + [ 87, 202, 60, 5, 48], + [ 83, 90, 52, 1, 160], + [ 83, 90, 182, 72, 160], + [195, 83, 150, 88, 224], + [199, 83, 150, 92, 240], + [206, 119, 150, 92, 208], + [204, 127, 146, 78, 144], + [204, 109, 210, 110, 128], + [140, 73, 210, 39, 160], + [140, 64, 246, 181, 224], + [136, 208, 255, 149, 240], + [136, 244, 255, 220, 208], + [ 8, 189, 223, 222, 144], + [ 40, 45, 151, 218, 144], + [105, 41, 21, 218, 176], + [107, 104, 17, 202, 240], + [ 75, 122, 17, 234, 224], + [ 74, 90, 131, 170, 192], + [ 88, 18, 163, 168, 128]]) + + + # result is the same if input is multiplied with a nonzero value + for dtype in 'bBhHiIlLqQ': + arr = np.array(a, dtype=dtype) + rnd = np.random.randint(low=np.iinfo(dtype).min, + high=np.iinfo(dtype).max, size=arr.size, + dtype=dtype) + rnd[rnd == 0] = 1 + arr *= rnd.astype(dtype) + b = np.packbits(arr, axis=-1) + assert_array_equal(np.unpackbits(b)[:-4], a) + + assert_raises(TypeError, np.packbits, np.array(a, dtype=float)) + + +def test_packbits_very_large(): + # test some with a larger arrays gh-8637 + # code is covered earlier but larger array makes crash on bug more likely + for s in range(950, 1050): + for dt in '?bBhHiIlLqQ': + x = np.ones((200, s), dtype=bool) + np.packbits(x, axis=1) + + +def test_unpackbits(): + # Copied from the docstring. + a = np.array([[2], [7], [23]], dtype=np.uint8) + b = np.unpackbits(a, axis=1) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, np.array([[0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 0, 1, 1, 1]])) + + +def test_unpackbits_empty(): + a = np.empty((0,), dtype=np.uint8) + b = np.unpackbits(a) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, np.empty((0,))) + + +def test_unpackbits_empty_with_axis(): + # Lists of packed shapes for different axes and unpacked shapes. + shapes = [ + ([(0,)], (0,)), + ([(2, 24, 0), (16, 3, 0), (16, 24, 0)], (16, 24, 0)), + ([(2, 0, 24), (16, 0, 24), (16, 0, 3)], (16, 0, 24)), + ([(0, 16, 24), (0, 2, 24), (0, 16, 3)], (0, 16, 24)), + ([(3, 0, 0), (24, 0, 0), (24, 0, 0)], (24, 0, 0)), + ([(0, 24, 0), (0, 3, 0), (0, 24, 0)], (0, 24, 0)), + ([(0, 0, 24), (0, 0, 24), (0, 0, 3)], (0, 0, 24)), + ([(0, 0, 0), (0, 0, 0), (0, 0, 0)], (0, 0, 0)), + ] + for in_shapes, out_shape in shapes: + for ax, in_shape in enumerate(in_shapes): + a = np.empty(in_shape, dtype=np.uint8) + b = np.unpackbits(a, axis=ax) + assert_equal(b.dtype, np.uint8) + assert_equal(b.shape, out_shape) + + +def test_unpackbits_large(): + # test all possible numbers via comparison to already tested packbits + d = np.arange(277, dtype=np.uint8) + assert_array_equal(np.packbits(np.unpackbits(d)), d) + assert_array_equal(np.packbits(np.unpackbits(d[::2])), d[::2]) + d = np.tile(d, (3, 1)) + assert_array_equal(np.packbits(np.unpackbits(d, axis=1), axis=1), d) + d = d.T.copy() + assert_array_equal(np.packbits(np.unpackbits(d, axis=0), axis=0), d) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_polynomial.py b/contrib/python/numpy/py2/numpy/lib/tests/test_polynomial.py new file mode 100644 index 0000000000..89759bd839 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_polynomial.py @@ -0,0 +1,261 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_almost_equal, + assert_array_almost_equal, assert_raises, assert_allclose + ) + + +class TestPolynomial(object): + def test_poly1d_str_and_repr(self): + p = np.poly1d([1., 2, 3]) + assert_equal(repr(p), 'poly1d([1., 2., 3.])') + assert_equal(str(p), + ' 2\n' + '1 x + 2 x + 3') + + q = np.poly1d([3., 2, 1]) + assert_equal(repr(q), 'poly1d([3., 2., 1.])') + assert_equal(str(q), + ' 2\n' + '3 x + 2 x + 1') + + r = np.poly1d([1.89999 + 2j, -3j, -5.12345678, 2 + 1j]) + assert_equal(str(r), + ' 3 2\n' + '(1.9 + 2j) x - 3j x - 5.123 x + (2 + 1j)') + + assert_equal(str(np.poly1d([-3, -2, -1])), + ' 2\n' + '-3 x - 2 x - 1') + + def test_poly1d_resolution(self): + p = np.poly1d([1., 2, 3]) + q = np.poly1d([3., 2, 1]) + assert_equal(p(0), 3.0) + assert_equal(p(5), 38.0) + assert_equal(q(0), 1.0) + assert_equal(q(5), 86.0) + + def test_poly1d_math(self): + # here we use some simple coeffs to make calculations easier + p = np.poly1d([1., 2, 4]) + q = np.poly1d([4., 2, 1]) + assert_equal(p/q, (np.poly1d([0.25]), np.poly1d([1.5, 3.75]))) + assert_equal(p.integ(), np.poly1d([1/3, 1., 4., 0.])) + assert_equal(p.integ(1), np.poly1d([1/3, 1., 4., 0.])) + + p = np.poly1d([1., 2, 3]) + q = np.poly1d([3., 2, 1]) + assert_equal(p * q, np.poly1d([3., 8., 14., 8., 3.])) + assert_equal(p + q, np.poly1d([4., 4., 4.])) + assert_equal(p - q, np.poly1d([-2., 0., 2.])) + assert_equal(p ** 4, np.poly1d([1., 8., 36., 104., 214., 312., 324., 216., 81.])) + assert_equal(p(q), np.poly1d([9., 12., 16., 8., 6.])) + assert_equal(q(p), np.poly1d([3., 12., 32., 40., 34.])) + assert_equal(p.deriv(), np.poly1d([2., 2.])) + assert_equal(p.deriv(2), np.poly1d([2.])) + assert_equal(np.polydiv(np.poly1d([1, 0, -1]), np.poly1d([1, 1])), + (np.poly1d([1., -1.]), np.poly1d([0.]))) + + def test_poly1d_misc(self): + p = np.poly1d([1., 2, 3]) + assert_equal(np.asarray(p), np.array([1., 2., 3.])) + assert_equal(len(p), 2) + assert_equal((p[0], p[1], p[2], p[3]), (3.0, 2.0, 1.0, 0)) + + def test_poly1d_variable_arg(self): + q = np.poly1d([1., 2, 3], variable='y') + assert_equal(str(q), + ' 2\n' + '1 y + 2 y + 3') + q = np.poly1d([1., 2, 3], variable='lambda') + assert_equal(str(q), + ' 2\n' + '1 lambda + 2 lambda + 3') + + def test_poly(self): + assert_array_almost_equal(np.poly([3, -np.sqrt(2), np.sqrt(2)]), + [1, -3, -2, 6]) + + # From matlab docs + A = [[1, 2, 3], [4, 5, 6], [7, 8, 0]] + assert_array_almost_equal(np.poly(A), [1, -6, -72, -27]) + + # Should produce real output for perfect conjugates + assert_(np.isrealobj(np.poly([+1.082j, +2.613j, -2.613j, -1.082j]))) + assert_(np.isrealobj(np.poly([0+1j, -0+-1j, 1+2j, + 1-2j, 1.+3.5j, 1-3.5j]))) + assert_(np.isrealobj(np.poly([1j, -1j, 1+2j, 1-2j, 1+3j, 1-3.j]))) + assert_(np.isrealobj(np.poly([1j, -1j, 1+2j, 1-2j]))) + assert_(np.isrealobj(np.poly([1j, -1j, 2j, -2j]))) + assert_(np.isrealobj(np.poly([1j, -1j]))) + assert_(np.isrealobj(np.poly([1, -1]))) + + assert_(np.iscomplexobj(np.poly([1j, -1.0000001j]))) + + np.random.seed(42) + a = np.random.randn(100) + 1j*np.random.randn(100) + assert_(np.isrealobj(np.poly(np.concatenate((a, np.conjugate(a)))))) + + def test_roots(self): + assert_array_equal(np.roots([1, 0, 0]), [0, 0]) + + def test_str_leading_zeros(self): + p = np.poly1d([4, 3, 2, 1]) + p[3] = 0 + assert_equal(str(p), + " 2\n" + "3 x + 2 x + 1") + + p = np.poly1d([1, 2]) + p[0] = 0 + p[1] = 0 + assert_equal(str(p), " \n0") + + def test_polyfit(self): + c = np.array([3., 2., 1.]) + x = np.linspace(0, 2, 7) + y = np.polyval(c, x) + err = [1, -1, 1, -1, 1, -1, 1] + weights = np.arange(8, 1, -1)**2/7.0 + + # Check exception when too few points for variance estimate. Note that + # the estimate requires the number of data points to exceed + # degree + 1 + assert_raises(ValueError, np.polyfit, + [1], [1], deg=0, cov=True) + + # check 1D case + m, cov = np.polyfit(x, y+err, 2, cov=True) + est = [3.8571, 0.2857, 1.619] + assert_almost_equal(est, m, decimal=4) + val0 = [[ 1.4694, -2.9388, 0.8163], + [-2.9388, 6.3673, -2.1224], + [ 0.8163, -2.1224, 1.161 ]] + assert_almost_equal(val0, cov, decimal=4) + + m2, cov2 = np.polyfit(x, y+err, 2, w=weights, cov=True) + assert_almost_equal([4.8927, -1.0177, 1.7768], m2, decimal=4) + val = [[ 4.3964, -5.0052, 0.4878], + [-5.0052, 6.8067, -0.9089], + [ 0.4878, -0.9089, 0.3337]] + assert_almost_equal(val, cov2, decimal=4) + + m3, cov3 = np.polyfit(x, y+err, 2, w=weights, cov="unscaled") + assert_almost_equal([4.8927, -1.0177, 1.7768], m3, decimal=4) + val = [[ 0.1473, -0.1677, 0.0163], + [-0.1677, 0.228 , -0.0304], + [ 0.0163, -0.0304, 0.0112]] + assert_almost_equal(val, cov3, decimal=4) + + # check 2D (n,1) case + y = y[:, np.newaxis] + c = c[:, np.newaxis] + assert_almost_equal(c, np.polyfit(x, y, 2)) + # check 2D (n,2) case + yy = np.concatenate((y, y), axis=1) + cc = np.concatenate((c, c), axis=1) + assert_almost_equal(cc, np.polyfit(x, yy, 2)) + + m, cov = np.polyfit(x, yy + np.array(err)[:, np.newaxis], 2, cov=True) + assert_almost_equal(est, m[:, 0], decimal=4) + assert_almost_equal(est, m[:, 1], decimal=4) + assert_almost_equal(val0, cov[:, :, 0], decimal=4) + assert_almost_equal(val0, cov[:, :, 1], decimal=4) + + # check order 1 (deg=0) case, were the analytic results are simple + np.random.seed(123) + y = np.random.normal(size=(4, 10000)) + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, deg=0, cov=True) + # Should get sigma_mean = sigma/sqrt(N) = 1./sqrt(4) = 0.5. + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01) + # Without scaling, since reduced chi2 is 1, the result should be the same. + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=np.ones(y.shape[0]), + deg=0, cov="unscaled") + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_almost_equal(np.sqrt(cov.mean()), 0.5) + # If we estimate our errors wrong, no change with scaling: + w = np.full(y.shape[0], 1./0.5) + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov=True) + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01) + # But if we do not scale, our estimate for the error in the mean will + # differ. + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov="unscaled") + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_almost_equal(np.sqrt(cov.mean()), 0.25) + + def test_objects(self): + from decimal import Decimal + p = np.poly1d([Decimal('4.0'), Decimal('3.0'), Decimal('2.0')]) + p2 = p * Decimal('1.333333333333333') + assert_(p2[1] == Decimal("3.9999999999999990")) + p2 = p.deriv() + assert_(p2[1] == Decimal('8.0')) + p2 = p.integ() + assert_(p2[3] == Decimal("1.333333333333333333333333333")) + assert_(p2[2] == Decimal('1.5')) + assert_(np.issubdtype(p2.coeffs.dtype, np.object_)) + p = np.poly([Decimal(1), Decimal(2)]) + assert_equal(np.poly([Decimal(1), Decimal(2)]), + [1, Decimal(-3), Decimal(2)]) + + def test_complex(self): + p = np.poly1d([3j, 2j, 1j]) + p2 = p.integ() + assert_((p2.coeffs == [1j, 1j, 1j, 0]).all()) + p2 = p.deriv() + assert_((p2.coeffs == [6j, 2j]).all()) + + def test_integ_coeffs(self): + p = np.poly1d([3, 2, 1]) + p2 = p.integ(3, k=[9, 7, 6]) + assert_( + (p2.coeffs == [1/4./5., 1/3./4., 1/2./3., 9/1./2., 7, 6]).all()) + + def test_zero_dims(self): + try: + np.poly(np.zeros((0, 0))) + except ValueError: + pass + + def test_poly_int_overflow(self): + """ + Regression test for gh-5096. + """ + v = np.arange(1, 21) + assert_almost_equal(np.poly(v), np.poly(np.diag(v))) + + def test_poly_eq(self): + p = np.poly1d([1, 2, 3]) + p2 = np.poly1d([1, 2, 4]) + assert_equal(p == None, False) + assert_equal(p != None, True) + assert_equal(p == p, True) + assert_equal(p == p2, False) + assert_equal(p != p2, True) + + def test_polydiv(self): + b = np.poly1d([2, 6, 6, 1]) + a = np.poly1d([-1j, (1+2j), -(2+1j), 1]) + q, r = np.polydiv(b, a) + assert_equal(q.coeffs.dtype, np.complex128) + assert_equal(r.coeffs.dtype, np.complex128) + assert_equal(q*a + r, b) + + def test_poly_coeffs_mutable(self): + """ Coefficients should be modifiable """ + p = np.poly1d([1, 2, 3]) + + p.coeffs += 1 + assert_equal(p.coeffs, [2, 3, 4]) + + p.coeffs[2] += 10 + assert_equal(p.coeffs, [2, 3, 14]) + + # this never used to be allowed - let's not add features to deprecated + # APIs + assert_raises(AttributeError, setattr, p, 'coeffs', np.array(1)) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_recfunctions.py b/contrib/python/numpy/py2/numpy/lib/tests/test_recfunctions.py new file mode 100644 index 0000000000..0c839d486f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_recfunctions.py @@ -0,0 +1,980 @@ +from __future__ import division, absolute_import, print_function + +import pytest + +import numpy as np +import numpy.ma as ma +from numpy.ma.mrecords import MaskedRecords +from numpy.ma.testutils import assert_equal +from numpy.testing import assert_, assert_raises +from numpy.lib.recfunctions import ( + drop_fields, rename_fields, get_fieldstructure, recursive_fill_fields, + find_duplicates, merge_arrays, append_fields, stack_arrays, join_by, + repack_fields, unstructured_to_structured, structured_to_unstructured, + apply_along_fields, require_fields, assign_fields_by_name) +get_fieldspec = np.lib.recfunctions._get_fieldspec +get_names = np.lib.recfunctions.get_names +get_names_flat = np.lib.recfunctions.get_names_flat +zip_descr = np.lib.recfunctions._zip_descr +zip_dtype = np.lib.recfunctions._zip_dtype + + +class TestRecFunctions(object): + # Misc tests + + def setup(self): + x = np.array([1, 2, ]) + y = np.array([10, 20, 30]) + z = np.array([('A', 1.), ('B', 2.)], + dtype=[('A', '|S3'), ('B', float)]) + w = np.array([(1, (2, 3.0)), (4, (5, 6.0))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + self.data = (w, x, y, z) + + def test_zip_descr(self): + # Test zip_descr + (w, x, y, z) = self.data + + # Std array + test = zip_descr((x, x), flatten=True) + assert_equal(test, + np.dtype([('', int), ('', int)])) + test = zip_descr((x, x), flatten=False) + assert_equal(test, + np.dtype([('', int), ('', int)])) + + # Std & flexible-dtype + test = zip_descr((x, z), flatten=True) + assert_equal(test, + np.dtype([('', int), ('A', '|S3'), ('B', float)])) + test = zip_descr((x, z), flatten=False) + assert_equal(test, + np.dtype([('', int), + ('', [('A', '|S3'), ('B', float)])])) + + # Standard & nested dtype + test = zip_descr((x, w), flatten=True) + assert_equal(test, + np.dtype([('', int), + ('a', int), + ('ba', float), ('bb', int)])) + test = zip_descr((x, w), flatten=False) + assert_equal(test, + np.dtype([('', int), + ('', [('a', int), + ('b', [('ba', float), ('bb', int)])])])) + + def test_drop_fields(self): + # Test drop_fields + a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + + # A basic field + test = drop_fields(a, 'a') + control = np.array([((2, 3.0),), ((5, 6.0),)], + dtype=[('b', [('ba', float), ('bb', int)])]) + assert_equal(test, control) + + # Another basic field (but nesting two fields) + test = drop_fields(a, 'b') + control = np.array([(1,), (4,)], dtype=[('a', int)]) + assert_equal(test, control) + + # A nested sub-field + test = drop_fields(a, ['ba', ]) + control = np.array([(1, (3.0,)), (4, (6.0,))], + dtype=[('a', int), ('b', [('bb', int)])]) + assert_equal(test, control) + + # All the nested sub-field from a field: zap that field + test = drop_fields(a, ['ba', 'bb']) + control = np.array([(1,), (4,)], dtype=[('a', int)]) + assert_equal(test, control) + + test = drop_fields(a, ['a', 'b']) + assert_(test is None) + + def test_rename_fields(self): + # Test rename fields + a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))], + dtype=[('a', int), + ('b', [('ba', float), ('bb', (float, 2))])]) + test = rename_fields(a, {'a': 'A', 'bb': 'BB'}) + newdtype = [('A', int), ('b', [('ba', float), ('BB', (float, 2))])] + control = a.view(newdtype) + assert_equal(test.dtype, newdtype) + assert_equal(test, control) + + def test_get_names(self): + # Test get_names + ndtype = np.dtype([('A', '|S3'), ('B', float)]) + test = get_names(ndtype) + assert_equal(test, ('A', 'B')) + + ndtype = np.dtype([('a', int), ('b', [('ba', float), ('bb', int)])]) + test = get_names(ndtype) + assert_equal(test, ('a', ('b', ('ba', 'bb')))) + + ndtype = np.dtype([('a', int), ('b', [])]) + test = get_names(ndtype) + assert_equal(test, ('a', ('b', ()))) + + ndtype = np.dtype([]) + test = get_names(ndtype) + assert_equal(test, ()) + + def test_get_names_flat(self): + # Test get_names_flat + ndtype = np.dtype([('A', '|S3'), ('B', float)]) + test = get_names_flat(ndtype) + assert_equal(test, ('A', 'B')) + + ndtype = np.dtype([('a', int), ('b', [('ba', float), ('bb', int)])]) + test = get_names_flat(ndtype) + assert_equal(test, ('a', 'b', 'ba', 'bb')) + + ndtype = np.dtype([('a', int), ('b', [])]) + test = get_names_flat(ndtype) + assert_equal(test, ('a', 'b')) + + ndtype = np.dtype([]) + test = get_names_flat(ndtype) + assert_equal(test, ()) + + def test_get_fieldstructure(self): + # Test get_fieldstructure + + # No nested fields + ndtype = np.dtype([('A', '|S3'), ('B', float)]) + test = get_fieldstructure(ndtype) + assert_equal(test, {'A': [], 'B': []}) + + # One 1-nested field + ndtype = np.dtype([('A', int), ('B', [('BA', float), ('BB', '|S1')])]) + test = get_fieldstructure(ndtype) + assert_equal(test, {'A': [], 'B': [], 'BA': ['B', ], 'BB': ['B']}) + + # One 2-nested fields + ndtype = np.dtype([('A', int), + ('B', [('BA', int), + ('BB', [('BBA', int), ('BBB', int)])])]) + test = get_fieldstructure(ndtype) + control = {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'], + 'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']} + assert_equal(test, control) + + # 0 fields + ndtype = np.dtype([]) + test = get_fieldstructure(ndtype) + assert_equal(test, {}) + + def test_find_duplicates(self): + # Test find_duplicates + a = ma.array([(2, (2., 'B')), (1, (2., 'B')), (2, (2., 'B')), + (1, (1., 'B')), (2, (2., 'B')), (2, (2., 'C'))], + mask=[(0, (0, 0)), (0, (0, 0)), (0, (0, 0)), + (0, (0, 0)), (1, (0, 0)), (0, (1, 0))], + dtype=[('A', int), ('B', [('BA', float), ('BB', '|S1')])]) + + test = find_duplicates(a, ignoremask=False, return_index=True) + control = [0, 2] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + test = find_duplicates(a, key='A', return_index=True) + control = [0, 1, 2, 3, 5] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + test = find_duplicates(a, key='B', return_index=True) + control = [0, 1, 2, 4] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + test = find_duplicates(a, key='BA', return_index=True) + control = [0, 1, 2, 4] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + test = find_duplicates(a, key='BB', return_index=True) + control = [0, 1, 2, 3, 4] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + def test_find_duplicates_ignoremask(self): + # Test the ignoremask option of find_duplicates + ndtype = [('a', int)] + a = ma.array([1, 1, 1, 2, 2, 3, 3], + mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype) + test = find_duplicates(a, ignoremask=True, return_index=True) + control = [0, 1, 3, 4] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + test = find_duplicates(a, ignoremask=False, return_index=True) + control = [0, 1, 2, 3, 4, 6] + assert_equal(sorted(test[-1]), control) + assert_equal(test[0], a[test[-1]]) + + def test_repack_fields(self): + dt = np.dtype('u1,f4,i8', align=True) + a = np.zeros(2, dtype=dt) + + assert_equal(repack_fields(dt), np.dtype('u1,f4,i8')) + assert_equal(repack_fields(a).itemsize, 13) + assert_equal(repack_fields(repack_fields(dt), align=True), dt) + + # make sure type is preserved + dt = np.dtype((np.record, dt)) + assert_(repack_fields(dt).type is np.record) + + def test_structured_to_unstructured(self): + a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) + out = structured_to_unstructured(a) + assert_equal(out, np.zeros((4,5), dtype='f8')) + + b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], + dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) + out = np.mean(structured_to_unstructured(b[['x', 'z']]), axis=-1) + assert_equal(out, np.array([ 3. , 5.5, 9. , 11. ])) + out = np.mean(structured_to_unstructured(b[['x']]), axis=-1) + assert_equal(out, np.array([ 1. , 4. , 7. , 10. ])) + + c = np.arange(20).reshape((4,5)) + out = unstructured_to_structured(c, a.dtype) + want = np.array([( 0, ( 1., 2), [ 3., 4.]), + ( 5, ( 6., 7), [ 8., 9.]), + (10, (11., 12), [13., 14.]), + (15, (16., 17), [18., 19.])], + dtype=[('a', 'i4'), + ('b', [('f0', 'f4'), ('f1', 'u2')]), + ('c', 'f4', (2,))]) + assert_equal(out, want) + + d = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], + dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) + assert_equal(apply_along_fields(np.mean, d), + np.array([ 8.0/3, 16.0/3, 26.0/3, 11. ])) + assert_equal(apply_along_fields(np.mean, d[['x', 'z']]), + np.array([ 3. , 5.5, 9. , 11. ])) + + # check that for uniform field dtypes we get a view, not a copy: + d = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], + dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'i4')]) + dd = structured_to_unstructured(d) + ddd = unstructured_to_structured(dd, d.dtype) + assert_(dd.base is d) + assert_(ddd.base is d) + + # including uniform fields with subarrays unpacked + d = np.array([(1, [2, 3], [[ 4, 5], [ 6, 7]]), + (8, [9, 10], [[11, 12], [13, 14]])], + dtype=[('x0', 'i4'), ('x1', ('i4', 2)), + ('x2', ('i4', (2, 2)))]) + dd = structured_to_unstructured(d) + ddd = unstructured_to_structured(dd, d.dtype) + assert_(dd.base is d) + assert_(ddd.base is d) + + # test that nested fields with identical names don't break anything + point = np.dtype([('x', int), ('y', int)]) + triangle = np.dtype([('a', point), ('b', point), ('c', point)]) + arr = np.zeros(10, triangle) + res = structured_to_unstructured(arr, dtype=int) + assert_equal(res, np.zeros((10, 6), dtype=int)) + + + # test nested combinations of subarrays and structured arrays, gh-13333 + def subarray(dt, shape): + return np.dtype((dt, shape)) + + def structured(*dts): + return np.dtype([('x{}'.format(i), dt) for i, dt in enumerate(dts)]) + + def inspect(dt, dtype=None): + arr = np.zeros((), dt) + ret = structured_to_unstructured(arr, dtype=dtype) + backarr = unstructured_to_structured(ret, dt) + return ret.shape, ret.dtype, backarr.dtype + + dt = structured(subarray(structured(np.int32, np.int32), 3)) + assert_equal(inspect(dt), ((6,), np.int32, dt)) + + dt = structured(subarray(subarray(np.int32, 2), 2)) + assert_equal(inspect(dt), ((4,), np.int32, dt)) + + dt = structured(np.int32) + assert_equal(inspect(dt), ((1,), np.int32, dt)) + + dt = structured(np.int32, subarray(subarray(np.int32, 2), 2)) + assert_equal(inspect(dt), ((5,), np.int32, dt)) + + dt = structured() + assert_raises(ValueError, structured_to_unstructured, np.zeros(3, dt)) + + # these currently don't work, but we may make it work in the future + assert_raises(NotImplementedError, structured_to_unstructured, + np.zeros(3, dt), dtype=np.int32) + assert_raises(NotImplementedError, unstructured_to_structured, + np.zeros((3,0), dtype=np.int32)) + + def test_field_assignment_by_name(self): + a = np.ones(2, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')]) + newdt = [('b', 'f4'), ('c', 'u1')] + + assert_equal(require_fields(a, newdt), np.ones(2, newdt)) + + b = np.array([(1,2), (3,4)], dtype=newdt) + assign_fields_by_name(a, b, zero_unassigned=False) + assert_equal(a, np.array([(1,1,2),(1,3,4)], dtype=a.dtype)) + assign_fields_by_name(a, b) + assert_equal(a, np.array([(0,1,2),(0,3,4)], dtype=a.dtype)) + + # test nested fields + a = np.ones(2, dtype=[('a', [('b', 'f8'), ('c', 'u1')])]) + newdt = [('a', [('c', 'u1')])] + assert_equal(require_fields(a, newdt), np.ones(2, newdt)) + b = np.array([((2,),), ((3,),)], dtype=newdt) + assign_fields_by_name(a, b, zero_unassigned=False) + assert_equal(a, np.array([((1,2),), ((1,3),)], dtype=a.dtype)) + assign_fields_by_name(a, b) + assert_equal(a, np.array([((0,2),), ((0,3),)], dtype=a.dtype)) + + # test unstructured code path for 0d arrays + a, b = np.array(3), np.array(0) + assign_fields_by_name(b, a) + assert_equal(b[()], 3) + + +class TestRecursiveFillFields(object): + # Test recursive_fill_fields. + def test_simple_flexible(self): + # Test recursive_fill_fields on flexible-array + a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)]) + b = np.zeros((3,), dtype=a.dtype) + test = recursive_fill_fields(a, b) + control = np.array([(1, 10.), (2, 20.), (0, 0.)], + dtype=[('A', int), ('B', float)]) + assert_equal(test, control) + + def test_masked_flexible(self): + # Test recursive_fill_fields on masked flexible-array + a = ma.array([(1, 10.), (2, 20.)], mask=[(0, 1), (1, 0)], + dtype=[('A', int), ('B', float)]) + b = ma.zeros((3,), dtype=a.dtype) + test = recursive_fill_fields(a, b) + control = ma.array([(1, 10.), (2, 20.), (0, 0.)], + mask=[(0, 1), (1, 0), (0, 0)], + dtype=[('A', int), ('B', float)]) + assert_equal(test, control) + + +class TestMergeArrays(object): + # Test merge_arrays + + def setup(self): + x = np.array([1, 2, ]) + y = np.array([10, 20, 30]) + z = np.array( + [('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)]) + w = np.array( + [(1, (2, 3.0)), (4, (5, 6.0))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + self.data = (w, x, y, z) + + def test_solo(self): + # Test merge_arrays on a single array. + (_, x, _, z) = self.data + + test = merge_arrays(x) + control = np.array([(1,), (2,)], dtype=[('f0', int)]) + assert_equal(test, control) + test = merge_arrays((x,)) + assert_equal(test, control) + + test = merge_arrays(z, flatten=False) + assert_equal(test, z) + test = merge_arrays(z, flatten=True) + assert_equal(test, z) + + def test_solo_w_flatten(self): + # Test merge_arrays on a single array w & w/o flattening + w = self.data[0] + test = merge_arrays(w, flatten=False) + assert_equal(test, w) + + test = merge_arrays(w, flatten=True) + control = np.array([(1, 2, 3.0), (4, 5, 6.0)], + dtype=[('a', int), ('ba', float), ('bb', int)]) + assert_equal(test, control) + + def test_standard(self): + # Test standard & standard + # Test merge arrays + (_, x, y, _) = self.data + test = merge_arrays((x, y), usemask=False) + control = np.array([(1, 10), (2, 20), (-1, 30)], + dtype=[('f0', int), ('f1', int)]) + assert_equal(test, control) + + test = merge_arrays((x, y), usemask=True) + control = ma.array([(1, 10), (2, 20), (-1, 30)], + mask=[(0, 0), (0, 0), (1, 0)], + dtype=[('f0', int), ('f1', int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + def test_flatten(self): + # Test standard & flexible + (_, x, _, z) = self.data + test = merge_arrays((x, z), flatten=True) + control = np.array([(1, 'A', 1.), (2, 'B', 2.)], + dtype=[('f0', int), ('A', '|S3'), ('B', float)]) + assert_equal(test, control) + + test = merge_arrays((x, z), flatten=False) + control = np.array([(1, ('A', 1.)), (2, ('B', 2.))], + dtype=[('f0', int), + ('f1', [('A', '|S3'), ('B', float)])]) + assert_equal(test, control) + + def test_flatten_wflexible(self): + # Test flatten standard & nested + (w, x, _, _) = self.data + test = merge_arrays((x, w), flatten=True) + control = np.array([(1, 1, 2, 3.0), (2, 4, 5, 6.0)], + dtype=[('f0', int), + ('a', int), ('ba', float), ('bb', int)]) + assert_equal(test, control) + + test = merge_arrays((x, w), flatten=False) + controldtype = [('f0', int), + ('f1', [('a', int), + ('b', [('ba', float), ('bb', int)])])] + control = np.array([(1., (1, (2, 3.0))), (2, (4, (5, 6.0)))], + dtype=controldtype) + assert_equal(test, control) + + def test_wmasked_arrays(self): + # Test merge_arrays masked arrays + (_, x, _, _) = self.data + mx = ma.array([1, 2, 3], mask=[1, 0, 0]) + test = merge_arrays((x, mx), usemask=True) + control = ma.array([(1, 1), (2, 2), (-1, 3)], + mask=[(0, 1), (0, 0), (1, 0)], + dtype=[('f0', int), ('f1', int)]) + assert_equal(test, control) + test = merge_arrays((x, mx), usemask=True, asrecarray=True) + assert_equal(test, control) + assert_(isinstance(test, MaskedRecords)) + + def test_w_singlefield(self): + # Test single field + test = merge_arrays((np.array([1, 2]).view([('a', int)]), + np.array([10., 20., 30.])),) + control = ma.array([(1, 10.), (2, 20.), (-1, 30.)], + mask=[(0, 0), (0, 0), (1, 0)], + dtype=[('a', int), ('f1', float)]) + assert_equal(test, control) + + def test_w_shorter_flex(self): + # Test merge_arrays w/ a shorter flexndarray. + z = self.data[-1] + + # Fixme, this test looks incomplete and broken + #test = merge_arrays((z, np.array([10, 20, 30]).view([('C', int)]))) + #control = np.array([('A', 1., 10), ('B', 2., 20), ('-1', -1, 20)], + # dtype=[('A', '|S3'), ('B', float), ('C', int)]) + #assert_equal(test, control) + + # Hack to avoid pyflakes warnings about unused variables + merge_arrays((z, np.array([10, 20, 30]).view([('C', int)]))) + np.array([('A', 1., 10), ('B', 2., 20), ('-1', -1, 20)], + dtype=[('A', '|S3'), ('B', float), ('C', int)]) + + def test_singlerecord(self): + (_, x, y, z) = self.data + test = merge_arrays((x[0], y[0], z[0]), usemask=False) + control = np.array([(1, 10, ('A', 1))], + dtype=[('f0', int), + ('f1', int), + ('f2', [('A', '|S3'), ('B', float)])]) + assert_equal(test, control) + + +class TestAppendFields(object): + # Test append_fields + + def setup(self): + x = np.array([1, 2, ]) + y = np.array([10, 20, 30]) + z = np.array( + [('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)]) + w = np.array([(1, (2, 3.0)), (4, (5, 6.0))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + self.data = (w, x, y, z) + + def test_append_single(self): + # Test simple case + (_, x, _, _) = self.data + test = append_fields(x, 'A', data=[10, 20, 30]) + control = ma.array([(1, 10), (2, 20), (-1, 30)], + mask=[(0, 0), (0, 0), (1, 0)], + dtype=[('f0', int), ('A', int)],) + assert_equal(test, control) + + def test_append_double(self): + # Test simple case + (_, x, _, _) = self.data + test = append_fields(x, ('A', 'B'), data=[[10, 20, 30], [100, 200]]) + control = ma.array([(1, 10, 100), (2, 20, 200), (-1, 30, -1)], + mask=[(0, 0, 0), (0, 0, 0), (1, 0, 1)], + dtype=[('f0', int), ('A', int), ('B', int)],) + assert_equal(test, control) + + def test_append_on_flex(self): + # Test append_fields on flexible type arrays + z = self.data[-1] + test = append_fields(z, 'C', data=[10, 20, 30]) + control = ma.array([('A', 1., 10), ('B', 2., 20), (-1, -1., 30)], + mask=[(0, 0, 0), (0, 0, 0), (1, 1, 0)], + dtype=[('A', '|S3'), ('B', float), ('C', int)],) + assert_equal(test, control) + + def test_append_on_nested(self): + # Test append_fields on nested fields + w = self.data[0] + test = append_fields(w, 'C', data=[10, 20, 30]) + control = ma.array([(1, (2, 3.0), 10), + (4, (5, 6.0), 20), + (-1, (-1, -1.), 30)], + mask=[( + 0, (0, 0), 0), (0, (0, 0), 0), (1, (1, 1), 0)], + dtype=[('a', int), + ('b', [('ba', float), ('bb', int)]), + ('C', int)],) + assert_equal(test, control) + + +class TestStackArrays(object): + # Test stack_arrays + def setup(self): + x = np.array([1, 2, ]) + y = np.array([10, 20, 30]) + z = np.array( + [('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)]) + w = np.array([(1, (2, 3.0)), (4, (5, 6.0))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + self.data = (w, x, y, z) + + def test_solo(self): + # Test stack_arrays on single arrays + (_, x, _, _) = self.data + test = stack_arrays((x,)) + assert_equal(test, x) + assert_(test is x) + + test = stack_arrays(x) + assert_equal(test, x) + assert_(test is x) + + def test_unnamed_fields(self): + # Tests combinations of arrays w/o named fields + (_, x, y, _) = self.data + + test = stack_arrays((x, x), usemask=False) + control = np.array([1, 2, 1, 2]) + assert_equal(test, control) + + test = stack_arrays((x, y), usemask=False) + control = np.array([1, 2, 10, 20, 30]) + assert_equal(test, control) + + test = stack_arrays((y, x), usemask=False) + control = np.array([10, 20, 30, 1, 2]) + assert_equal(test, control) + + def test_unnamed_and_named_fields(self): + # Test combination of arrays w/ & w/o named fields + (_, x, _, z) = self.data + + test = stack_arrays((x, z)) + control = ma.array([(1, -1, -1), (2, -1, -1), + (-1, 'A', 1), (-1, 'B', 2)], + mask=[(0, 1, 1), (0, 1, 1), + (1, 0, 0), (1, 0, 0)], + dtype=[('f0', int), ('A', '|S3'), ('B', float)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + test = stack_arrays((z, x)) + control = ma.array([('A', 1, -1), ('B', 2, -1), + (-1, -1, 1), (-1, -1, 2), ], + mask=[(0, 0, 1), (0, 0, 1), + (1, 1, 0), (1, 1, 0)], + dtype=[('A', '|S3'), ('B', float), ('f2', int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + test = stack_arrays((z, z, x)) + control = ma.array([('A', 1, -1), ('B', 2, -1), + ('A', 1, -1), ('B', 2, -1), + (-1, -1, 1), (-1, -1, 2), ], + mask=[(0, 0, 1), (0, 0, 1), + (0, 0, 1), (0, 0, 1), + (1, 1, 0), (1, 1, 0)], + dtype=[('A', '|S3'), ('B', float), ('f2', int)]) + assert_equal(test, control) + + def test_matching_named_fields(self): + # Test combination of arrays w/ matching field names + (_, x, _, z) = self.data + zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], + dtype=[('A', '|S3'), ('B', float), ('C', float)]) + test = stack_arrays((z, zz)) + control = ma.array([('A', 1, -1), ('B', 2, -1), + ( + 'a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], + dtype=[('A', '|S3'), ('B', float), ('C', float)], + mask=[(0, 0, 1), (0, 0, 1), + (0, 0, 0), (0, 0, 0), (0, 0, 0)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + test = stack_arrays((z, zz, x)) + ndtype = [('A', '|S3'), ('B', float), ('C', float), ('f3', int)] + control = ma.array([('A', 1, -1, -1), ('B', 2, -1, -1), + ('a', 10., 100., -1), ('b', 20., 200., -1), + ('c', 30., 300., -1), + (-1, -1, -1, 1), (-1, -1, -1, 2)], + dtype=ndtype, + mask=[(0, 0, 1, 1), (0, 0, 1, 1), + (0, 0, 0, 1), (0, 0, 0, 1), (0, 0, 0, 1), + (1, 1, 1, 0), (1, 1, 1, 0)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + def test_defaults(self): + # Test defaults: no exception raised if keys of defaults are not fields. + (_, _, _, z) = self.data + zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], + dtype=[('A', '|S3'), ('B', float), ('C', float)]) + defaults = {'A': '???', 'B': -999., 'C': -9999., 'D': -99999.} + test = stack_arrays((z, zz), defaults=defaults) + control = ma.array([('A', 1, -9999.), ('B', 2, -9999.), + ( + 'a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], + dtype=[('A', '|S3'), ('B', float), ('C', float)], + mask=[(0, 0, 1), (0, 0, 1), + (0, 0, 0), (0, 0, 0), (0, 0, 0)]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + def test_autoconversion(self): + # Tests autoconversion + adtype = [('A', int), ('B', bool), ('C', float)] + a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype) + bdtype = [('A', int), ('B', float), ('C', float)] + b = ma.array([(4, 5, 6)], dtype=bdtype) + control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)], + dtype=bdtype) + test = stack_arrays((a, b), autoconvert=True) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + with assert_raises(TypeError): + stack_arrays((a, b), autoconvert=False) + + def test_checktitles(self): + # Test using titles in the field names + adtype = [(('a', 'A'), int), (('b', 'B'), bool), (('c', 'C'), float)] + a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype) + bdtype = [(('a', 'A'), int), (('b', 'B'), bool), (('c', 'C'), float)] + b = ma.array([(4, 5, 6)], dtype=bdtype) + test = stack_arrays((a, b)) + control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)], + dtype=bdtype) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + def test_subdtype(self): + z = np.array([ + ('A', 1), ('B', 2) + ], dtype=[('A', '|S3'), ('B', float, (1,))]) + zz = np.array([ + ('a', [10.], 100.), ('b', [20.], 200.), ('c', [30.], 300.) + ], dtype=[('A', '|S3'), ('B', float, (1,)), ('C', float)]) + + res = stack_arrays((z, zz)) + expected = ma.array( + data=[ + (b'A', [1.0], 0), + (b'B', [2.0], 0), + (b'a', [10.0], 100.0), + (b'b', [20.0], 200.0), + (b'c', [30.0], 300.0)], + mask=[ + (False, [False], True), + (False, [False], True), + (False, [False], False), + (False, [False], False), + (False, [False], False) + ], + dtype=zz.dtype + ) + assert_equal(res.dtype, expected.dtype) + assert_equal(res, expected) + assert_equal(res.mask, expected.mask) + + +class TestJoinBy(object): + def setup(self): + self.a = np.array(list(zip(np.arange(10), np.arange(50, 60), + np.arange(100, 110))), + dtype=[('a', int), ('b', int), ('c', int)]) + self.b = np.array(list(zip(np.arange(5, 15), np.arange(65, 75), + np.arange(100, 110))), + dtype=[('a', int), ('b', int), ('d', int)]) + + def test_inner_join(self): + # Basic test of join_by + a, b = self.a, self.b + + test = join_by('a', a, b, jointype='inner') + control = np.array([(5, 55, 65, 105, 100), (6, 56, 66, 106, 101), + (7, 57, 67, 107, 102), (8, 58, 68, 108, 103), + (9, 59, 69, 109, 104)], + dtype=[('a', int), ('b1', int), ('b2', int), + ('c', int), ('d', int)]) + assert_equal(test, control) + + def test_join(self): + a, b = self.a, self.b + + # Fixme, this test is broken + #test = join_by(('a', 'b'), a, b) + #control = np.array([(5, 55, 105, 100), (6, 56, 106, 101), + # (7, 57, 107, 102), (8, 58, 108, 103), + # (9, 59, 109, 104)], + # dtype=[('a', int), ('b', int), + # ('c', int), ('d', int)]) + #assert_equal(test, control) + + # Hack to avoid pyflakes unused variable warnings + join_by(('a', 'b'), a, b) + np.array([(5, 55, 105, 100), (6, 56, 106, 101), + (7, 57, 107, 102), (8, 58, 108, 103), + (9, 59, 109, 104)], + dtype=[('a', int), ('b', int), + ('c', int), ('d', int)]) + + def test_join_subdtype(self): + # tests the bug in https://stackoverflow.com/q/44769632/102441 + from numpy.lib import recfunctions as rfn + foo = np.array([(1,)], + dtype=[('key', int)]) + bar = np.array([(1, np.array([1,2,3]))], + dtype=[('key', int), ('value', 'uint16', 3)]) + res = join_by('key', foo, bar) + assert_equal(res, bar.view(ma.MaskedArray)) + + def test_outer_join(self): + a, b = self.a, self.b + + test = join_by(('a', 'b'), a, b, 'outer') + control = ma.array([(0, 50, 100, -1), (1, 51, 101, -1), + (2, 52, 102, -1), (3, 53, 103, -1), + (4, 54, 104, -1), (5, 55, 105, -1), + (5, 65, -1, 100), (6, 56, 106, -1), + (6, 66, -1, 101), (7, 57, 107, -1), + (7, 67, -1, 102), (8, 58, 108, -1), + (8, 68, -1, 103), (9, 59, 109, -1), + (9, 69, -1, 104), (10, 70, -1, 105), + (11, 71, -1, 106), (12, 72, -1, 107), + (13, 73, -1, 108), (14, 74, -1, 109)], + mask=[(0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 1, 0), (0, 0, 0, 1), + (0, 0, 1, 0), (0, 0, 0, 1), + (0, 0, 1, 0), (0, 0, 0, 1), + (0, 0, 1, 0), (0, 0, 0, 1), + (0, 0, 1, 0), (0, 0, 1, 0), + (0, 0, 1, 0), (0, 0, 1, 0), + (0, 0, 1, 0), (0, 0, 1, 0)], + dtype=[('a', int), ('b', int), + ('c', int), ('d', int)]) + assert_equal(test, control) + + def test_leftouter_join(self): + a, b = self.a, self.b + + test = join_by(('a', 'b'), a, b, 'leftouter') + control = ma.array([(0, 50, 100, -1), (1, 51, 101, -1), + (2, 52, 102, -1), (3, 53, 103, -1), + (4, 54, 104, -1), (5, 55, 105, -1), + (6, 56, 106, -1), (7, 57, 107, -1), + (8, 58, 108, -1), (9, 59, 109, -1)], + mask=[(0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1), + (0, 0, 0, 1), (0, 0, 0, 1)], + dtype=[('a', int), ('b', int), ('c', int), ('d', int)]) + assert_equal(test, control) + + def test_different_field_order(self): + # gh-8940 + a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u1')]) + b = np.ones(3, dtype=[('c', 'u1'), ('b', 'f4'), ('a', 'i4')]) + # this should not give a FutureWarning: + j = join_by(['c', 'b'], a, b, jointype='inner', usemask=False) + assert_equal(j.dtype.names, ['b', 'c', 'a1', 'a2']) + + def test_duplicate_keys(self): + a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u1')]) + b = np.ones(3, dtype=[('c', 'u1'), ('b', 'f4'), ('a', 'i4')]) + assert_raises(ValueError, join_by, ['a', 'b', 'b'], a, b) + + @pytest.mark.xfail(reason="See comment at gh-9343") + def test_same_name_different_dtypes_key(self): + a_dtype = np.dtype([('key', 'S5'), ('value', '<f4')]) + b_dtype = np.dtype([('key', 'S10'), ('value', '<f4')]) + expected_dtype = np.dtype([ + ('key', 'S10'), ('value1', '<f4'), ('value2', '<f4')]) + + a = np.array([('Sarah', 8.0), ('John', 6.0)], dtype=a_dtype) + b = np.array([('Sarah', 10.0), ('John', 7.0)], dtype=b_dtype) + res = join_by('key', a, b) + + assert_equal(res.dtype, expected_dtype) + + def test_same_name_different_dtypes(self): + # gh-9338 + a_dtype = np.dtype([('key', 'S10'), ('value', '<f4')]) + b_dtype = np.dtype([('key', 'S10'), ('value', '<f8')]) + expected_dtype = np.dtype([ + ('key', '|S10'), ('value1', '<f4'), ('value2', '<f8')]) + + a = np.array([('Sarah', 8.0), ('John', 6.0)], dtype=a_dtype) + b = np.array([('Sarah', 10.0), ('John', 7.0)], dtype=b_dtype) + res = join_by('key', a, b) + + assert_equal(res.dtype, expected_dtype) + + def test_subarray_key(self): + a_dtype = np.dtype([('pos', int, 3), ('f', '<f4')]) + a = np.array([([1, 1, 1], np.pi), ([1, 2, 3], 0.0)], dtype=a_dtype) + + b_dtype = np.dtype([('pos', int, 3), ('g', '<f4')]) + b = np.array([([1, 1, 1], 3), ([3, 2, 1], 0.0)], dtype=b_dtype) + + expected_dtype = np.dtype([('pos', int, 3), ('f', '<f4'), ('g', '<f4')]) + expected = np.array([([1, 1, 1], np.pi, 3)], dtype=expected_dtype) + + res = join_by('pos', a, b) + assert_equal(res.dtype, expected_dtype) + assert_equal(res, expected) + + def test_padded_dtype(self): + dt = np.dtype('i1,f4', align=True) + dt.names = ('k', 'v') + assert_(len(dt.descr), 3) # padding field is inserted + + a = np.array([(1, 3), (3, 2)], dt) + b = np.array([(1, 1), (2, 2)], dt) + res = join_by('k', a, b) + + # no padding fields remain + expected_dtype = np.dtype([ + ('k', 'i1'), ('v1', 'f4'), ('v2', 'f4') + ]) + + assert_equal(res.dtype, expected_dtype) + + +class TestJoinBy2(object): + @classmethod + def setup(cls): + cls.a = np.array(list(zip(np.arange(10), np.arange(50, 60), + np.arange(100, 110))), + dtype=[('a', int), ('b', int), ('c', int)]) + cls.b = np.array(list(zip(np.arange(10), np.arange(65, 75), + np.arange(100, 110))), + dtype=[('a', int), ('b', int), ('d', int)]) + + def test_no_r1postfix(self): + # Basic test of join_by no_r1postfix + a, b = self.a, self.b + + test = join_by( + 'a', a, b, r1postfix='', r2postfix='2', jointype='inner') + control = np.array([(0, 50, 65, 100, 100), (1, 51, 66, 101, 101), + (2, 52, 67, 102, 102), (3, 53, 68, 103, 103), + (4, 54, 69, 104, 104), (5, 55, 70, 105, 105), + (6, 56, 71, 106, 106), (7, 57, 72, 107, 107), + (8, 58, 73, 108, 108), (9, 59, 74, 109, 109)], + dtype=[('a', int), ('b', int), ('b2', int), + ('c', int), ('d', int)]) + assert_equal(test, control) + + def test_no_postfix(self): + assert_raises(ValueError, join_by, 'a', self.a, self.b, + r1postfix='', r2postfix='') + + def test_no_r2postfix(self): + # Basic test of join_by no_r2postfix + a, b = self.a, self.b + + test = join_by( + 'a', a, b, r1postfix='1', r2postfix='', jointype='inner') + control = np.array([(0, 50, 65, 100, 100), (1, 51, 66, 101, 101), + (2, 52, 67, 102, 102), (3, 53, 68, 103, 103), + (4, 54, 69, 104, 104), (5, 55, 70, 105, 105), + (6, 56, 71, 106, 106), (7, 57, 72, 107, 107), + (8, 58, 73, 108, 108), (9, 59, 74, 109, 109)], + dtype=[('a', int), ('b1', int), ('b', int), + ('c', int), ('d', int)]) + assert_equal(test, control) + + def test_two_keys_two_vars(self): + a = np.array(list(zip(np.tile([10, 11], 5), np.repeat(np.arange(5), 2), + np.arange(50, 60), np.arange(10, 20))), + dtype=[('k', int), ('a', int), ('b', int), ('c', int)]) + + b = np.array(list(zip(np.tile([10, 11], 5), np.repeat(np.arange(5), 2), + np.arange(65, 75), np.arange(0, 10))), + dtype=[('k', int), ('a', int), ('b', int), ('c', int)]) + + control = np.array([(10, 0, 50, 65, 10, 0), (11, 0, 51, 66, 11, 1), + (10, 1, 52, 67, 12, 2), (11, 1, 53, 68, 13, 3), + (10, 2, 54, 69, 14, 4), (11, 2, 55, 70, 15, 5), + (10, 3, 56, 71, 16, 6), (11, 3, 57, 72, 17, 7), + (10, 4, 58, 73, 18, 8), (11, 4, 59, 74, 19, 9)], + dtype=[('k', int), ('a', int), ('b1', int), + ('b2', int), ('c1', int), ('c2', int)]) + test = join_by( + ['a', 'k'], a, b, r1postfix='1', r2postfix='2', jointype='inner') + assert_equal(test.dtype, control.dtype) + assert_equal(test, control) + +class TestAppendFieldsObj(object): + """ + Test append_fields with arrays containing objects + """ + # https://github.com/numpy/numpy/issues/2346 + + def setup(self): + from datetime import date + self.data = dict(obj=date(2000, 1, 1)) + + def test_append_to_objects(self): + "Test append_fields when the base array contains objects" + obj = self.data['obj'] + x = np.array([(obj, 1.), (obj, 2.)], + dtype=[('A', object), ('B', float)]) + y = np.array([10, 20], dtype=int) + test = append_fields(x, 'C', data=y, usemask=False) + control = np.array([(obj, 1.0, 10), (obj, 2.0, 20)], + dtype=[('A', object), ('B', float), ('C', int)]) + assert_equal(test, control) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_regression.py b/contrib/python/numpy/py2/numpy/lib/tests/test_regression.py new file mode 100644 index 0000000000..4c46bc46b5 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_regression.py @@ -0,0 +1,254 @@ +from __future__ import division, absolute_import, print_function + +import os +import sys + +import numpy as np +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_array_almost_equal, + assert_raises, _assert_valid_refcount, + ) +from numpy.compat import unicode + + +class TestRegression(object): + def test_poly1d(self): + # Ticket #28 + assert_equal(np.poly1d([1]) - np.poly1d([1, 0]), + np.poly1d([-1, 1])) + + def test_cov_parameters(self): + # Ticket #91 + x = np.random.random((3, 3)) + y = x.copy() + np.cov(x, rowvar=1) + np.cov(y, rowvar=0) + assert_array_equal(x, y) + + def test_mem_digitize(self): + # Ticket #95 + for i in range(100): + np.digitize([1, 2, 3, 4], [1, 3]) + np.digitize([0, 1, 2, 3, 4], [1, 3]) + + def test_unique_zero_sized(self): + # Ticket #205 + assert_array_equal([], np.unique(np.array([]))) + + def test_mem_vectorise(self): + # Ticket #325 + vt = np.vectorize(lambda *args: args) + vt(np.zeros((1, 2, 1)), np.zeros((2, 1, 1)), np.zeros((1, 1, 2))) + vt(np.zeros((1, 2, 1)), np.zeros((2, 1, 1)), np.zeros((1, + 1, 2)), np.zeros((2, 2))) + + def test_mgrid_single_element(self): + # Ticket #339 + assert_array_equal(np.mgrid[0:0:1j], [0]) + assert_array_equal(np.mgrid[0:0], []) + + def test_refcount_vectorize(self): + # Ticket #378 + def p(x, y): + return 123 + v = np.vectorize(p) + _assert_valid_refcount(v) + + def test_poly1d_nan_roots(self): + # Ticket #396 + p = np.poly1d([np.nan, np.nan, 1], r=0) + assert_raises(np.linalg.LinAlgError, getattr, p, "r") + + def test_mem_polymul(self): + # Ticket #448 + np.polymul([], [1.]) + + def test_mem_string_concat(self): + # Ticket #469 + x = np.array([]) + np.append(x, 'asdasd\tasdasd') + + def test_poly_div(self): + # Ticket #553 + u = np.poly1d([1, 2, 3]) + v = np.poly1d([1, 2, 3, 4, 5]) + q, r = np.polydiv(u, v) + assert_equal(q*v + r, u) + + def test_poly_eq(self): + # Ticket #554 + x = np.poly1d([1, 2, 3]) + y = np.poly1d([3, 4]) + assert_(x != y) + assert_(x == x) + + def test_polyfit_build(self): + # Ticket #628 + ref = [-1.06123820e-06, 5.70886914e-04, -1.13822012e-01, + 9.95368241e+00, -3.14526520e+02] + x = [90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, + 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 129, + 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, + 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, + 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, + 170, 171, 172, 173, 174, 175, 176] + y = [9.0, 3.0, 7.0, 4.0, 4.0, 8.0, 6.0, 11.0, 9.0, 8.0, 11.0, 5.0, + 6.0, 5.0, 9.0, 8.0, 6.0, 10.0, 6.0, 10.0, 7.0, 6.0, 6.0, 6.0, + 13.0, 4.0, 9.0, 11.0, 4.0, 5.0, 8.0, 5.0, 7.0, 7.0, 6.0, 12.0, + 7.0, 7.0, 9.0, 4.0, 12.0, 6.0, 6.0, 4.0, 3.0, 9.0, 8.0, 8.0, + 6.0, 7.0, 9.0, 10.0, 6.0, 8.0, 4.0, 7.0, 7.0, 10.0, 8.0, 8.0, + 6.0, 3.0, 8.0, 4.0, 5.0, 7.0, 8.0, 6.0, 6.0, 4.0, 12.0, 9.0, + 8.0, 8.0, 8.0, 6.0, 7.0, 4.0, 4.0, 5.0, 7.0] + tested = np.polyfit(x, y, 4) + assert_array_almost_equal(ref, tested) + + def test_polydiv_type(self): + # Make polydiv work for complex types + msg = "Wrong type, should be complex" + x = np.ones(3, dtype=complex) + q, r = np.polydiv(x, x) + assert_(q.dtype == complex, msg) + msg = "Wrong type, should be float" + x = np.ones(3, dtype=int) + q, r = np.polydiv(x, x) + assert_(q.dtype == float, msg) + + def test_histogramdd_too_many_bins(self): + # Ticket 928. + assert_raises(ValueError, np.histogramdd, np.ones((1, 10)), bins=2**10) + + def test_polyint_type(self): + # Ticket #944 + msg = "Wrong type, should be complex" + x = np.ones(3, dtype=complex) + assert_(np.polyint(x).dtype == complex, msg) + msg = "Wrong type, should be float" + x = np.ones(3, dtype=int) + assert_(np.polyint(x).dtype == float, msg) + + def test_ndenumerate_crash(self): + # Ticket 1140 + # Shouldn't crash: + list(np.ndenumerate(np.array([[]]))) + + def test_asfarray_none(self): + # Test for changeset r5065 + assert_array_equal(np.array([np.nan]), np.asfarray([None])) + + def test_large_fancy_indexing(self): + # Large enough to fail on 64-bit. + nbits = np.dtype(np.intp).itemsize * 8 + thesize = int((2**nbits)**(1.0/5.0)+1) + + def dp(): + n = 3 + a = np.ones((n,)*5) + i = np.random.randint(0, n, size=thesize) + a[np.ix_(i, i, i, i, i)] = 0 + + def dp2(): + n = 3 + a = np.ones((n,)*5) + i = np.random.randint(0, n, size=thesize) + a[np.ix_(i, i, i, i, i)] + + assert_raises(ValueError, dp) + assert_raises(ValueError, dp2) + + def test_void_coercion(self): + dt = np.dtype([('a', 'f4'), ('b', 'i4')]) + x = np.zeros((1,), dt) + assert_(np.r_[x, x].dtype == dt) + + def test_who_with_0dim_array(self): + # ticket #1243 + import os + import sys + + oldstdout = sys.stdout + sys.stdout = open(os.devnull, 'w') + try: + try: + np.who({'foo': np.array(1)}) + except Exception: + raise AssertionError("ticket #1243") + finally: + sys.stdout.close() + sys.stdout = oldstdout + + def test_include_dirs(self): + # As a sanity check, just test that get_include + # includes something reasonable. Somewhat + # related to ticket #1405. + include_dirs = [np.get_include()] + for path in include_dirs: + assert_(isinstance(path, (str, unicode))) + assert_(path != '') + + def test_polyder_return_type(self): + # Ticket #1249 + assert_(isinstance(np.polyder(np.poly1d([1]), 0), np.poly1d)) + assert_(isinstance(np.polyder([1], 0), np.ndarray)) + assert_(isinstance(np.polyder(np.poly1d([1]), 1), np.poly1d)) + assert_(isinstance(np.polyder([1], 1), np.ndarray)) + + def test_append_fields_dtype_list(self): + # Ticket #1676 + from numpy.lib.recfunctions import append_fields + + base = np.array([1, 2, 3], dtype=np.int32) + names = ['a', 'b', 'c'] + data = np.eye(3).astype(np.int32) + dlist = [np.float64, np.int32, np.int32] + try: + append_fields(base, names, data, dlist) + except Exception: + raise AssertionError() + + def test_loadtxt_fields_subarrays(self): + # For ticket #1936 + if sys.version_info[0] >= 3: + from io import StringIO + else: + from StringIO import StringIO + + dt = [("a", 'u1', 2), ("b", 'u1', 2)] + x = np.loadtxt(StringIO("0 1 2 3"), dtype=dt) + assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt)) + + dt = [("a", [("a", 'u1', (1, 3)), ("b", 'u1')])] + x = np.loadtxt(StringIO("0 1 2 3"), dtype=dt) + assert_equal(x, np.array([(((0, 1, 2), 3),)], dtype=dt)) + + dt = [("a", 'u1', (2, 2))] + x = np.loadtxt(StringIO("0 1 2 3"), dtype=dt) + assert_equal(x, np.array([(((0, 1), (2, 3)),)], dtype=dt)) + + dt = [("a", 'u1', (2, 3, 2))] + x = np.loadtxt(StringIO("0 1 2 3 4 5 6 7 8 9 10 11"), dtype=dt) + data = [((((0, 1), (2, 3), (4, 5)), ((6, 7), (8, 9), (10, 11))),)] + assert_equal(x, np.array(data, dtype=dt)) + + def test_nansum_with_boolean(self): + # gh-2978 + a = np.zeros(2, dtype=bool) + try: + np.nansum(a) + except Exception: + raise AssertionError() + + def test_py3_compat(self): + # gh-2561 + # Test if the oldstyle class test is bypassed in python3 + class C(): + """Old-style class in python2, normal class in python3""" + pass + + out = open(os.devnull, 'w') + try: + np.info(C(), output=out) + except AttributeError: + raise AssertionError() + finally: + out.close() diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_shape_base.py b/contrib/python/numpy/py2/numpy/lib/tests/test_shape_base.py new file mode 100644 index 0000000000..01ea028bbf --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_shape_base.py @@ -0,0 +1,708 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +import warnings +import functools +import sys +import pytest + +from numpy.lib.shape_base import ( + apply_along_axis, apply_over_axes, array_split, split, hsplit, dsplit, + vsplit, dstack, column_stack, kron, tile, expand_dims, take_along_axis, + put_along_axis + ) +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_raises, assert_warns + ) + + +IS_64BIT = sys.maxsize > 2**32 + + +def _add_keepdims(func): + """ hack in keepdims behavior into a function taking an axis """ + @functools.wraps(func) + def wrapped(a, axis, **kwargs): + res = func(a, axis=axis, **kwargs) + if axis is None: + axis = 0 # res is now a scalar, so we can insert this anywhere + return np.expand_dims(res, axis=axis) + return wrapped + + +class TestTakeAlongAxis(object): + def test_argequivalent(self): + """ Test it translates from arg<func> to <func> """ + from numpy.random import rand + a = rand(3, 4, 5) + + funcs = [ + (np.sort, np.argsort, dict()), + (_add_keepdims(np.min), _add_keepdims(np.argmin), dict()), + (_add_keepdims(np.max), _add_keepdims(np.argmax), dict()), + (np.partition, np.argpartition, dict(kth=2)), + ] + + for func, argfunc, kwargs in funcs: + for axis in list(range(a.ndim)) + [None]: + a_func = func(a, axis=axis, **kwargs) + ai_func = argfunc(a, axis=axis, **kwargs) + assert_equal(a_func, take_along_axis(a, ai_func, axis=axis)) + + def test_invalid(self): + """ Test it errors when indices has too few dimensions """ + a = np.ones((10, 10)) + ai = np.ones((10, 2), dtype=np.intp) + + # sanity check + take_along_axis(a, ai, axis=1) + + # not enough indices + assert_raises(ValueError, take_along_axis, a, np.array(1), axis=1) + # bool arrays not allowed + assert_raises(IndexError, take_along_axis, a, ai.astype(bool), axis=1) + # float arrays not allowed + assert_raises(IndexError, take_along_axis, a, ai.astype(float), axis=1) + # invalid axis + assert_raises(np.AxisError, take_along_axis, a, ai, axis=10) + + def test_empty(self): + """ Test everything is ok with empty results, even with inserted dims """ + a = np.ones((3, 4, 5)) + ai = np.ones((3, 0, 5), dtype=np.intp) + + actual = take_along_axis(a, ai, axis=1) + assert_equal(actual.shape, ai.shape) + + def test_broadcast(self): + """ Test that non-indexing dimensions are broadcast in both directions """ + a = np.ones((3, 4, 1)) + ai = np.ones((1, 2, 5), dtype=np.intp) + actual = take_along_axis(a, ai, axis=1) + assert_equal(actual.shape, (3, 2, 5)) + + +class TestPutAlongAxis(object): + def test_replace_max(self): + a_base = np.array([[10, 30, 20], [60, 40, 50]]) + + for axis in list(range(a_base.ndim)) + [None]: + # we mutate this in the loop + a = a_base.copy() + + # replace the max with a small value + i_max = _add_keepdims(np.argmax)(a, axis=axis) + put_along_axis(a, i_max, -99, axis=axis) + + # find the new minimum, which should max + i_min = _add_keepdims(np.argmin)(a, axis=axis) + + assert_equal(i_min, i_max) + + def test_broadcast(self): + """ Test that non-indexing dimensions are broadcast in both directions """ + a = np.ones((3, 4, 1)) + ai = np.arange(10, dtype=np.intp).reshape((1, 2, 5)) % 4 + put_along_axis(a, ai, 20, axis=1) + assert_equal(take_along_axis(a, ai, axis=1), 20) + + +class TestApplyAlongAxis(object): + def test_simple(self): + a = np.ones((20, 10), 'd') + assert_array_equal( + apply_along_axis(len, 0, a), len(a)*np.ones(a.shape[1])) + + def test_simple101(self): + a = np.ones((10, 101), 'd') + assert_array_equal( + apply_along_axis(len, 0, a), len(a)*np.ones(a.shape[1])) + + def test_3d(self): + a = np.arange(27).reshape((3, 3, 3)) + assert_array_equal(apply_along_axis(np.sum, 0, a), + [[27, 30, 33], [36, 39, 42], [45, 48, 51]]) + + def test_preserve_subclass(self): + def double(row): + return row * 2 + + class MyNDArray(np.ndarray): + pass + + m = np.array([[0, 1], [2, 3]]).view(MyNDArray) + expected = np.array([[0, 2], [4, 6]]).view(MyNDArray) + + result = apply_along_axis(double, 0, m) + assert_(isinstance(result, MyNDArray)) + assert_array_equal(result, expected) + + result = apply_along_axis(double, 1, m) + assert_(isinstance(result, MyNDArray)) + assert_array_equal(result, expected) + + def test_subclass(self): + class MinimalSubclass(np.ndarray): + data = 1 + + def minimal_function(array): + return array.data + + a = np.zeros((6, 3)).view(MinimalSubclass) + + assert_array_equal( + apply_along_axis(minimal_function, 0, a), np.array([1, 1, 1]) + ) + + def test_scalar_array(self, cls=np.ndarray): + a = np.ones((6, 3)).view(cls) + res = apply_along_axis(np.sum, 0, a) + assert_(isinstance(res, cls)) + assert_array_equal(res, np.array([6, 6, 6]).view(cls)) + + def test_0d_array(self, cls=np.ndarray): + def sum_to_0d(x): + """ Sum x, returning a 0d array of the same class """ + assert_equal(x.ndim, 1) + return np.squeeze(np.sum(x, keepdims=True)) + a = np.ones((6, 3)).view(cls) + res = apply_along_axis(sum_to_0d, 0, a) + assert_(isinstance(res, cls)) + assert_array_equal(res, np.array([6, 6, 6]).view(cls)) + + res = apply_along_axis(sum_to_0d, 1, a) + assert_(isinstance(res, cls)) + assert_array_equal(res, np.array([3, 3, 3, 3, 3, 3]).view(cls)) + + def test_axis_insertion(self, cls=np.ndarray): + def f1to2(x): + """produces an asymmetric non-square matrix from x""" + assert_equal(x.ndim, 1) + return (x[::-1] * x[1:,None]).view(cls) + + a2d = np.arange(6*3).reshape((6, 3)) + + # 2d insertion along first axis + actual = apply_along_axis(f1to2, 0, a2d) + expected = np.stack([ + f1to2(a2d[:,i]) for i in range(a2d.shape[1]) + ], axis=-1).view(cls) + assert_equal(type(actual), type(expected)) + assert_equal(actual, expected) + + # 2d insertion along last axis + actual = apply_along_axis(f1to2, 1, a2d) + expected = np.stack([ + f1to2(a2d[i,:]) for i in range(a2d.shape[0]) + ], axis=0).view(cls) + assert_equal(type(actual), type(expected)) + assert_equal(actual, expected) + + # 3d insertion along middle axis + a3d = np.arange(6*5*3).reshape((6, 5, 3)) + + actual = apply_along_axis(f1to2, 1, a3d) + expected = np.stack([ + np.stack([ + f1to2(a3d[i,:,j]) for i in range(a3d.shape[0]) + ], axis=0) + for j in range(a3d.shape[2]) + ], axis=-1).view(cls) + assert_equal(type(actual), type(expected)) + assert_equal(actual, expected) + + def test_subclass_preservation(self): + class MinimalSubclass(np.ndarray): + pass + self.test_scalar_array(MinimalSubclass) + self.test_0d_array(MinimalSubclass) + self.test_axis_insertion(MinimalSubclass) + + def test_axis_insertion_ma(self): + def f1to2(x): + """produces an asymmetric non-square matrix from x""" + assert_equal(x.ndim, 1) + res = x[::-1] * x[1:,None] + return np.ma.masked_where(res%5==0, res) + a = np.arange(6*3).reshape((6, 3)) + res = apply_along_axis(f1to2, 0, a) + assert_(isinstance(res, np.ma.masked_array)) + assert_equal(res.ndim, 3) + assert_array_equal(res[:,:,0].mask, f1to2(a[:,0]).mask) + assert_array_equal(res[:,:,1].mask, f1to2(a[:,1]).mask) + assert_array_equal(res[:,:,2].mask, f1to2(a[:,2]).mask) + + def test_tuple_func1d(self): + def sample_1d(x): + return x[1], x[0] + res = np.apply_along_axis(sample_1d, 1, np.array([[1, 2], [3, 4]])) + assert_array_equal(res, np.array([[2, 1], [4, 3]])) + + def test_empty(self): + # can't apply_along_axis when there's no chance to call the function + def never_call(x): + assert_(False) # should never be reached + + a = np.empty((0, 0)) + assert_raises(ValueError, np.apply_along_axis, never_call, 0, a) + assert_raises(ValueError, np.apply_along_axis, never_call, 1, a) + + # but it's sometimes ok with some non-zero dimensions + def empty_to_1(x): + assert_(len(x) == 0) + return 1 + + a = np.empty((10, 0)) + actual = np.apply_along_axis(empty_to_1, 1, a) + assert_equal(actual, np.ones(10)) + assert_raises(ValueError, np.apply_along_axis, empty_to_1, 0, a) + + def test_with_iterable_object(self): + # from issue 5248 + d = np.array([ + [{1, 11}, {2, 22}, {3, 33}], + [{4, 44}, {5, 55}, {6, 66}] + ]) + actual = np.apply_along_axis(lambda a: set.union(*a), 0, d) + expected = np.array([{1, 11, 4, 44}, {2, 22, 5, 55}, {3, 33, 6, 66}]) + + assert_equal(actual, expected) + + # issue 8642 - assert_equal doesn't detect this! + for i in np.ndindex(actual.shape): + assert_equal(type(actual[i]), type(expected[i])) + + +class TestApplyOverAxes(object): + def test_simple(self): + a = np.arange(24).reshape(2, 3, 4) + aoa_a = apply_over_axes(np.sum, a, [0, 2]) + assert_array_equal(aoa_a, np.array([[[60], [92], [124]]])) + + +class TestExpandDims(object): + def test_functionality(self): + s = (2, 3, 4, 5) + a = np.empty(s) + for axis in range(-5, 4): + b = expand_dims(a, axis) + assert_(b.shape[axis] == 1) + assert_(np.squeeze(b).shape == s) + + def test_deprecations(self): + # 2017-05-17, 1.13.0 + s = (2, 3, 4, 5) + a = np.empty(s) + with warnings.catch_warnings(): + warnings.simplefilter("always") + assert_warns(DeprecationWarning, expand_dims, a, -6) + assert_warns(DeprecationWarning, expand_dims, a, 5) + + def test_subclasses(self): + a = np.arange(10).reshape((2, 5)) + a = np.ma.array(a, mask=a%3 == 0) + + expanded = np.expand_dims(a, axis=1) + assert_(isinstance(expanded, np.ma.MaskedArray)) + assert_equal(expanded.shape, (2, 1, 5)) + assert_equal(expanded.mask.shape, (2, 1, 5)) + + +class TestArraySplit(object): + def test_integer_0_split(self): + a = np.arange(10) + assert_raises(ValueError, array_split, a, 0) + + def test_integer_split(self): + a = np.arange(10) + res = array_split(a, 1) + desired = [np.arange(10)] + compare_results(res, desired) + + res = array_split(a, 2) + desired = [np.arange(5), np.arange(5, 10)] + compare_results(res, desired) + + res = array_split(a, 3) + desired = [np.arange(4), np.arange(4, 7), np.arange(7, 10)] + compare_results(res, desired) + + res = array_split(a, 4) + desired = [np.arange(3), np.arange(3, 6), np.arange(6, 8), + np.arange(8, 10)] + compare_results(res, desired) + + res = array_split(a, 5) + desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6), + np.arange(6, 8), np.arange(8, 10)] + compare_results(res, desired) + + res = array_split(a, 6) + desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6), + np.arange(6, 8), np.arange(8, 9), np.arange(9, 10)] + compare_results(res, desired) + + res = array_split(a, 7) + desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6), + np.arange(6, 7), np.arange(7, 8), np.arange(8, 9), + np.arange(9, 10)] + compare_results(res, desired) + + res = array_split(a, 8) + desired = [np.arange(2), np.arange(2, 4), np.arange(4, 5), + np.arange(5, 6), np.arange(6, 7), np.arange(7, 8), + np.arange(8, 9), np.arange(9, 10)] + compare_results(res, desired) + + res = array_split(a, 9) + desired = [np.arange(2), np.arange(2, 3), np.arange(3, 4), + np.arange(4, 5), np.arange(5, 6), np.arange(6, 7), + np.arange(7, 8), np.arange(8, 9), np.arange(9, 10)] + compare_results(res, desired) + + res = array_split(a, 10) + desired = [np.arange(1), np.arange(1, 2), np.arange(2, 3), + np.arange(3, 4), np.arange(4, 5), np.arange(5, 6), + np.arange(6, 7), np.arange(7, 8), np.arange(8, 9), + np.arange(9, 10)] + compare_results(res, desired) + + res = array_split(a, 11) + desired = [np.arange(1), np.arange(1, 2), np.arange(2, 3), + np.arange(3, 4), np.arange(4, 5), np.arange(5, 6), + np.arange(6, 7), np.arange(7, 8), np.arange(8, 9), + np.arange(9, 10), np.array([])] + compare_results(res, desired) + + def test_integer_split_2D_rows(self): + a = np.array([np.arange(10), np.arange(10)]) + res = array_split(a, 3, axis=0) + tgt = [np.array([np.arange(10)]), np.array([np.arange(10)]), + np.zeros((0, 10))] + compare_results(res, tgt) + assert_(a.dtype.type is res[-1].dtype.type) + + # Same thing for manual splits: + res = array_split(a, [0, 1, 2], axis=0) + tgt = [np.zeros((0, 10)), np.array([np.arange(10)]), + np.array([np.arange(10)])] + compare_results(res, tgt) + assert_(a.dtype.type is res[-1].dtype.type) + + def test_integer_split_2D_cols(self): + a = np.array([np.arange(10), np.arange(10)]) + res = array_split(a, 3, axis=-1) + desired = [np.array([np.arange(4), np.arange(4)]), + np.array([np.arange(4, 7), np.arange(4, 7)]), + np.array([np.arange(7, 10), np.arange(7, 10)])] + compare_results(res, desired) + + def test_integer_split_2D_default(self): + """ This will fail if we change default axis + """ + a = np.array([np.arange(10), np.arange(10)]) + res = array_split(a, 3) + tgt = [np.array([np.arange(10)]), np.array([np.arange(10)]), + np.zeros((0, 10))] + compare_results(res, tgt) + assert_(a.dtype.type is res[-1].dtype.type) + # perhaps should check higher dimensions + + @pytest.mark.skipif(not IS_64BIT, reason="Needs 64bit platform") + def test_integer_split_2D_rows_greater_max_int32(self): + a = np.broadcast_to([0], (1 << 32, 2)) + res = array_split(a, 4) + chunk = np.broadcast_to([0], (1 << 30, 2)) + tgt = [chunk] * 4 + for i in range(len(tgt)): + assert_equal(res[i].shape, tgt[i].shape) + + def test_index_split_simple(self): + a = np.arange(10) + indices = [1, 5, 7] + res = array_split(a, indices, axis=-1) + desired = [np.arange(0, 1), np.arange(1, 5), np.arange(5, 7), + np.arange(7, 10)] + compare_results(res, desired) + + def test_index_split_low_bound(self): + a = np.arange(10) + indices = [0, 5, 7] + res = array_split(a, indices, axis=-1) + desired = [np.array([]), np.arange(0, 5), np.arange(5, 7), + np.arange(7, 10)] + compare_results(res, desired) + + def test_index_split_high_bound(self): + a = np.arange(10) + indices = [0, 5, 7, 10, 12] + res = array_split(a, indices, axis=-1) + desired = [np.array([]), np.arange(0, 5), np.arange(5, 7), + np.arange(7, 10), np.array([]), np.array([])] + compare_results(res, desired) + + +class TestSplit(object): + # The split function is essentially the same as array_split, + # except that it test if splitting will result in an + # equal split. Only test for this case. + + def test_equal_split(self): + a = np.arange(10) + res = split(a, 2) + desired = [np.arange(5), np.arange(5, 10)] + compare_results(res, desired) + + def test_unequal_split(self): + a = np.arange(10) + assert_raises(ValueError, split, a, 3) + + +class TestColumnStack(object): + def test_non_iterable(self): + assert_raises(TypeError, column_stack, 1) + + def test_1D_arrays(self): + # example from docstring + a = np.array((1, 2, 3)) + b = np.array((2, 3, 4)) + expected = np.array([[1, 2], + [2, 3], + [3, 4]]) + actual = np.column_stack((a, b)) + assert_equal(actual, expected) + + def test_2D_arrays(self): + # same as hstack 2D docstring example + a = np.array([[1], [2], [3]]) + b = np.array([[2], [3], [4]]) + expected = np.array([[1, 2], + [2, 3], + [3, 4]]) + actual = np.column_stack((a, b)) + assert_equal(actual, expected) + + def test_generator(self): + with assert_warns(FutureWarning): + column_stack((np.arange(3) for _ in range(2))) + + +class TestDstack(object): + def test_non_iterable(self): + assert_raises(TypeError, dstack, 1) + + def test_0D_array(self): + a = np.array(1) + b = np.array(2) + res = dstack([a, b]) + desired = np.array([[[1, 2]]]) + assert_array_equal(res, desired) + + def test_1D_array(self): + a = np.array([1]) + b = np.array([2]) + res = dstack([a, b]) + desired = np.array([[[1, 2]]]) + assert_array_equal(res, desired) + + def test_2D_array(self): + a = np.array([[1], [2]]) + b = np.array([[1], [2]]) + res = dstack([a, b]) + desired = np.array([[[1, 1]], [[2, 2, ]]]) + assert_array_equal(res, desired) + + def test_2D_array2(self): + a = np.array([1, 2]) + b = np.array([1, 2]) + res = dstack([a, b]) + desired = np.array([[[1, 1], [2, 2]]]) + assert_array_equal(res, desired) + + def test_generator(self): + with assert_warns(FutureWarning): + dstack((np.arange(3) for _ in range(2))) + + +# array_split has more comprehensive test of splitting. +# only do simple test on hsplit, vsplit, and dsplit +class TestHsplit(object): + """Only testing for integer splits. + + """ + def test_non_iterable(self): + assert_raises(ValueError, hsplit, 1, 1) + + def test_0D_array(self): + a = np.array(1) + try: + hsplit(a, 2) + assert_(0) + except ValueError: + pass + + def test_1D_array(self): + a = np.array([1, 2, 3, 4]) + res = hsplit(a, 2) + desired = [np.array([1, 2]), np.array([3, 4])] + compare_results(res, desired) + + def test_2D_array(self): + a = np.array([[1, 2, 3, 4], + [1, 2, 3, 4]]) + res = hsplit(a, 2) + desired = [np.array([[1, 2], [1, 2]]), np.array([[3, 4], [3, 4]])] + compare_results(res, desired) + + +class TestVsplit(object): + """Only testing for integer splits. + + """ + def test_non_iterable(self): + assert_raises(ValueError, vsplit, 1, 1) + + def test_0D_array(self): + a = np.array(1) + assert_raises(ValueError, vsplit, a, 2) + + def test_1D_array(self): + a = np.array([1, 2, 3, 4]) + try: + vsplit(a, 2) + assert_(0) + except ValueError: + pass + + def test_2D_array(self): + a = np.array([[1, 2, 3, 4], + [1, 2, 3, 4]]) + res = vsplit(a, 2) + desired = [np.array([[1, 2, 3, 4]]), np.array([[1, 2, 3, 4]])] + compare_results(res, desired) + + +class TestDsplit(object): + # Only testing for integer splits. + def test_non_iterable(self): + assert_raises(ValueError, dsplit, 1, 1) + + def test_0D_array(self): + a = np.array(1) + assert_raises(ValueError, dsplit, a, 2) + + def test_1D_array(self): + a = np.array([1, 2, 3, 4]) + assert_raises(ValueError, dsplit, a, 2) + + def test_2D_array(self): + a = np.array([[1, 2, 3, 4], + [1, 2, 3, 4]]) + try: + dsplit(a, 2) + assert_(0) + except ValueError: + pass + + def test_3D_array(self): + a = np.array([[[1, 2, 3, 4], + [1, 2, 3, 4]], + [[1, 2, 3, 4], + [1, 2, 3, 4]]]) + res = dsplit(a, 2) + desired = [np.array([[[1, 2], [1, 2]], [[1, 2], [1, 2]]]), + np.array([[[3, 4], [3, 4]], [[3, 4], [3, 4]]])] + compare_results(res, desired) + + +class TestSqueeze(object): + def test_basic(self): + from numpy.random import rand + + a = rand(20, 10, 10, 1, 1) + b = rand(20, 1, 10, 1, 20) + c = rand(1, 1, 20, 10) + assert_array_equal(np.squeeze(a), np.reshape(a, (20, 10, 10))) + assert_array_equal(np.squeeze(b), np.reshape(b, (20, 10, 20))) + assert_array_equal(np.squeeze(c), np.reshape(c, (20, 10))) + + # Squeezing to 0-dim should still give an ndarray + a = [[[1.5]]] + res = np.squeeze(a) + assert_equal(res, 1.5) + assert_equal(res.ndim, 0) + assert_equal(type(res), np.ndarray) + + +class TestKron(object): + def test_return_type(self): + class myarray(np.ndarray): + __array_priority__ = 0.0 + + a = np.ones([2, 2]) + ma = myarray(a.shape, a.dtype, a.data) + assert_equal(type(kron(a, a)), np.ndarray) + assert_equal(type(kron(ma, ma)), myarray) + assert_equal(type(kron(a, ma)), np.ndarray) + assert_equal(type(kron(ma, a)), myarray) + + +class TestTile(object): + def test_basic(self): + a = np.array([0, 1, 2]) + b = [[1, 2], [3, 4]] + assert_equal(tile(a, 2), [0, 1, 2, 0, 1, 2]) + assert_equal(tile(a, (2, 2)), [[0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2]]) + assert_equal(tile(a, (1, 2)), [[0, 1, 2, 0, 1, 2]]) + assert_equal(tile(b, 2), [[1, 2, 1, 2], [3, 4, 3, 4]]) + assert_equal(tile(b, (2, 1)), [[1, 2], [3, 4], [1, 2], [3, 4]]) + assert_equal(tile(b, (2, 2)), [[1, 2, 1, 2], [3, 4, 3, 4], + [1, 2, 1, 2], [3, 4, 3, 4]]) + + def test_tile_one_repetition_on_array_gh4679(self): + a = np.arange(5) + b = tile(a, 1) + b += 2 + assert_equal(a, np.arange(5)) + + def test_empty(self): + a = np.array([[[]]]) + b = np.array([[], []]) + c = tile(b, 2).shape + d = tile(a, (3, 2, 5)).shape + assert_equal(c, (2, 0)) + assert_equal(d, (3, 2, 0)) + + def test_kroncompare(self): + from numpy.random import randint + + reps = [(2,), (1, 2), (2, 1), (2, 2), (2, 3, 2), (3, 2)] + shape = [(3,), (2, 3), (3, 4, 3), (3, 2, 3), (4, 3, 2, 4), (2, 2)] + for s in shape: + b = randint(0, 10, size=s) + for r in reps: + a = np.ones(r, b.dtype) + large = tile(b, r) + klarge = kron(a, b) + assert_equal(large, klarge) + + +class TestMayShareMemory(object): + def test_basic(self): + d = np.ones((50, 60)) + d2 = np.ones((30, 60, 6)) + assert_(np.may_share_memory(d, d)) + assert_(np.may_share_memory(d, d[::-1])) + assert_(np.may_share_memory(d, d[::2])) + assert_(np.may_share_memory(d, d[1:, ::-1])) + + assert_(not np.may_share_memory(d[::-1], d2)) + assert_(not np.may_share_memory(d[::2], d2)) + assert_(not np.may_share_memory(d[1:, ::-1], d2)) + assert_(np.may_share_memory(d2[1:, ::-1], d2)) + + +# Utility +def compare_results(res, desired): + for i in range(len(desired)): + assert_array_equal(res[i], desired[i]) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_stride_tricks.py b/contrib/python/numpy/py2/numpy/lib/tests/test_stride_tricks.py new file mode 100644 index 0000000000..b2bd7da3ef --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_stride_tricks.py @@ -0,0 +1,445 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.core._rational_tests import rational +from numpy.testing import ( + assert_equal, assert_array_equal, assert_raises, assert_, + assert_raises_regex + ) +from numpy.lib.stride_tricks import ( + as_strided, broadcast_arrays, _broadcast_shape, broadcast_to + ) + +def assert_shapes_correct(input_shapes, expected_shape): + # Broadcast a list of arrays with the given input shapes and check the + # common output shape. + + inarrays = [np.zeros(s) for s in input_shapes] + outarrays = broadcast_arrays(*inarrays) + outshapes = [a.shape for a in outarrays] + expected = [expected_shape] * len(inarrays) + assert_equal(outshapes, expected) + + +def assert_incompatible_shapes_raise(input_shapes): + # Broadcast a list of arrays with the given (incompatible) input shapes + # and check that they raise a ValueError. + + inarrays = [np.zeros(s) for s in input_shapes] + assert_raises(ValueError, broadcast_arrays, *inarrays) + + +def assert_same_as_ufunc(shape0, shape1, transposed=False, flipped=False): + # Broadcast two shapes against each other and check that the data layout + # is the same as if a ufunc did the broadcasting. + + x0 = np.zeros(shape0, dtype=int) + # Note that multiply.reduce's identity element is 1.0, so when shape1==(), + # this gives the desired n==1. + n = int(np.multiply.reduce(shape1)) + x1 = np.arange(n).reshape(shape1) + if transposed: + x0 = x0.T + x1 = x1.T + if flipped: + x0 = x0[::-1] + x1 = x1[::-1] + # Use the add ufunc to do the broadcasting. Since we're adding 0s to x1, the + # result should be exactly the same as the broadcasted view of x1. + y = x0 + x1 + b0, b1 = broadcast_arrays(x0, x1) + assert_array_equal(y, b1) + + +def test_same(): + x = np.arange(10) + y = np.arange(10) + bx, by = broadcast_arrays(x, y) + assert_array_equal(x, bx) + assert_array_equal(y, by) + +def test_broadcast_kwargs(): + # ensure that a TypeError is appropriately raised when + # np.broadcast_arrays() is called with any keyword + # argument other than 'subok' + x = np.arange(10) + y = np.arange(10) + + with assert_raises_regex(TypeError, + r'broadcast_arrays\(\) got an unexpected keyword*'): + broadcast_arrays(x, y, dtype='float64') + + +def test_one_off(): + x = np.array([[1, 2, 3]]) + y = np.array([[1], [2], [3]]) + bx, by = broadcast_arrays(x, y) + bx0 = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) + by0 = bx0.T + assert_array_equal(bx0, bx) + assert_array_equal(by0, by) + + +def test_same_input_shapes(): + # Check that the final shape is just the input shape. + + data = [ + (), + (1,), + (3,), + (0, 1), + (0, 3), + (1, 0), + (3, 0), + (1, 3), + (3, 1), + (3, 3), + ] + for shape in data: + input_shapes = [shape] + # Single input. + assert_shapes_correct(input_shapes, shape) + # Double input. + input_shapes2 = [shape, shape] + assert_shapes_correct(input_shapes2, shape) + # Triple input. + input_shapes3 = [shape, shape, shape] + assert_shapes_correct(input_shapes3, shape) + + +def test_two_compatible_by_ones_input_shapes(): + # Check that two different input shapes of the same length, but some have + # ones, broadcast to the correct shape. + + data = [ + [[(1,), (3,)], (3,)], + [[(1, 3), (3, 3)], (3, 3)], + [[(3, 1), (3, 3)], (3, 3)], + [[(1, 3), (3, 1)], (3, 3)], + [[(1, 1), (3, 3)], (3, 3)], + [[(1, 1), (1, 3)], (1, 3)], + [[(1, 1), (3, 1)], (3, 1)], + [[(1, 0), (0, 0)], (0, 0)], + [[(0, 1), (0, 0)], (0, 0)], + [[(1, 0), (0, 1)], (0, 0)], + [[(1, 1), (0, 0)], (0, 0)], + [[(1, 1), (1, 0)], (1, 0)], + [[(1, 1), (0, 1)], (0, 1)], + ] + for input_shapes, expected_shape in data: + assert_shapes_correct(input_shapes, expected_shape) + # Reverse the input shapes since broadcasting should be symmetric. + assert_shapes_correct(input_shapes[::-1], expected_shape) + + +def test_two_compatible_by_prepending_ones_input_shapes(): + # Check that two different input shapes (of different lengths) broadcast + # to the correct shape. + + data = [ + [[(), (3,)], (3,)], + [[(3,), (3, 3)], (3, 3)], + [[(3,), (3, 1)], (3, 3)], + [[(1,), (3, 3)], (3, 3)], + [[(), (3, 3)], (3, 3)], + [[(1, 1), (3,)], (1, 3)], + [[(1,), (3, 1)], (3, 1)], + [[(1,), (1, 3)], (1, 3)], + [[(), (1, 3)], (1, 3)], + [[(), (3, 1)], (3, 1)], + [[(), (0,)], (0,)], + [[(0,), (0, 0)], (0, 0)], + [[(0,), (0, 1)], (0, 0)], + [[(1,), (0, 0)], (0, 0)], + [[(), (0, 0)], (0, 0)], + [[(1, 1), (0,)], (1, 0)], + [[(1,), (0, 1)], (0, 1)], + [[(1,), (1, 0)], (1, 0)], + [[(), (1, 0)], (1, 0)], + [[(), (0, 1)], (0, 1)], + ] + for input_shapes, expected_shape in data: + assert_shapes_correct(input_shapes, expected_shape) + # Reverse the input shapes since broadcasting should be symmetric. + assert_shapes_correct(input_shapes[::-1], expected_shape) + + +def test_incompatible_shapes_raise_valueerror(): + # Check that a ValueError is raised for incompatible shapes. + + data = [ + [(3,), (4,)], + [(2, 3), (2,)], + [(3,), (3,), (4,)], + [(1, 3, 4), (2, 3, 3)], + ] + for input_shapes in data: + assert_incompatible_shapes_raise(input_shapes) + # Reverse the input shapes since broadcasting should be symmetric. + assert_incompatible_shapes_raise(input_shapes[::-1]) + + +def test_same_as_ufunc(): + # Check that the data layout is the same as if a ufunc did the operation. + + data = [ + [[(1,), (3,)], (3,)], + [[(1, 3), (3, 3)], (3, 3)], + [[(3, 1), (3, 3)], (3, 3)], + [[(1, 3), (3, 1)], (3, 3)], + [[(1, 1), (3, 3)], (3, 3)], + [[(1, 1), (1, 3)], (1, 3)], + [[(1, 1), (3, 1)], (3, 1)], + [[(1, 0), (0, 0)], (0, 0)], + [[(0, 1), (0, 0)], (0, 0)], + [[(1, 0), (0, 1)], (0, 0)], + [[(1, 1), (0, 0)], (0, 0)], + [[(1, 1), (1, 0)], (1, 0)], + [[(1, 1), (0, 1)], (0, 1)], + [[(), (3,)], (3,)], + [[(3,), (3, 3)], (3, 3)], + [[(3,), (3, 1)], (3, 3)], + [[(1,), (3, 3)], (3, 3)], + [[(), (3, 3)], (3, 3)], + [[(1, 1), (3,)], (1, 3)], + [[(1,), (3, 1)], (3, 1)], + [[(1,), (1, 3)], (1, 3)], + [[(), (1, 3)], (1, 3)], + [[(), (3, 1)], (3, 1)], + [[(), (0,)], (0,)], + [[(0,), (0, 0)], (0, 0)], + [[(0,), (0, 1)], (0, 0)], + [[(1,), (0, 0)], (0, 0)], + [[(), (0, 0)], (0, 0)], + [[(1, 1), (0,)], (1, 0)], + [[(1,), (0, 1)], (0, 1)], + [[(1,), (1, 0)], (1, 0)], + [[(), (1, 0)], (1, 0)], + [[(), (0, 1)], (0, 1)], + ] + for input_shapes, expected_shape in data: + assert_same_as_ufunc(input_shapes[0], input_shapes[1], + "Shapes: %s %s" % (input_shapes[0], input_shapes[1])) + # Reverse the input shapes since broadcasting should be symmetric. + assert_same_as_ufunc(input_shapes[1], input_shapes[0]) + # Try them transposed, too. + assert_same_as_ufunc(input_shapes[0], input_shapes[1], True) + # ... and flipped for non-rank-0 inputs in order to test negative + # strides. + if () not in input_shapes: + assert_same_as_ufunc(input_shapes[0], input_shapes[1], False, True) + assert_same_as_ufunc(input_shapes[0], input_shapes[1], True, True) + + +def test_broadcast_to_succeeds(): + data = [ + [np.array(0), (0,), np.array(0)], + [np.array(0), (1,), np.zeros(1)], + [np.array(0), (3,), np.zeros(3)], + [np.ones(1), (1,), np.ones(1)], + [np.ones(1), (2,), np.ones(2)], + [np.ones(1), (1, 2, 3), np.ones((1, 2, 3))], + [np.arange(3), (3,), np.arange(3)], + [np.arange(3), (1, 3), np.arange(3).reshape(1, -1)], + [np.arange(3), (2, 3), np.array([[0, 1, 2], [0, 1, 2]])], + # test if shape is not a tuple + [np.ones(0), 0, np.ones(0)], + [np.ones(1), 1, np.ones(1)], + [np.ones(1), 2, np.ones(2)], + # these cases with size 0 are strange, but they reproduce the behavior + # of broadcasting with ufuncs (see test_same_as_ufunc above) + [np.ones(1), (0,), np.ones(0)], + [np.ones((1, 2)), (0, 2), np.ones((0, 2))], + [np.ones((2, 1)), (2, 0), np.ones((2, 0))], + ] + for input_array, shape, expected in data: + actual = broadcast_to(input_array, shape) + assert_array_equal(expected, actual) + + +def test_broadcast_to_raises(): + data = [ + [(0,), ()], + [(1,), ()], + [(3,), ()], + [(3,), (1,)], + [(3,), (2,)], + [(3,), (4,)], + [(1, 2), (2, 1)], + [(1, 1), (1,)], + [(1,), -1], + [(1,), (-1,)], + [(1, 2), (-1, 2)], + ] + for orig_shape, target_shape in data: + arr = np.zeros(orig_shape) + assert_raises(ValueError, lambda: broadcast_to(arr, target_shape)) + + +def test_broadcast_shape(): + # broadcast_shape is already exercized indirectly by broadcast_arrays + assert_equal(_broadcast_shape(), ()) + assert_equal(_broadcast_shape([1, 2]), (2,)) + assert_equal(_broadcast_shape(np.ones((1, 1))), (1, 1)) + assert_equal(_broadcast_shape(np.ones((1, 1)), np.ones((3, 4))), (3, 4)) + assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2)) + assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2)) + + # regression tests for gh-5862 + assert_equal(_broadcast_shape(*([np.ones(2)] * 32 + [1])), (2,)) + bad_args = [np.ones(2)] * 32 + [np.ones(3)] * 32 + assert_raises(ValueError, lambda: _broadcast_shape(*bad_args)) + + +def test_as_strided(): + a = np.array([None]) + a_view = as_strided(a) + expected = np.array([None]) + assert_array_equal(a_view, np.array([None])) + + a = np.array([1, 2, 3, 4]) + a_view = as_strided(a, shape=(2,), strides=(2 * a.itemsize,)) + expected = np.array([1, 3]) + assert_array_equal(a_view, expected) + + a = np.array([1, 2, 3, 4]) + a_view = as_strided(a, shape=(3, 4), strides=(0, 1 * a.itemsize)) + expected = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]) + assert_array_equal(a_view, expected) + + # Regression test for gh-5081 + dt = np.dtype([('num', 'i4'), ('obj', 'O')]) + a = np.empty((4,), dtype=dt) + a['num'] = np.arange(1, 5) + a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize)) + expected_num = [[1, 2, 3, 4]] * 3 + expected_obj = [[None]*4]*3 + assert_equal(a_view.dtype, dt) + assert_array_equal(expected_num, a_view['num']) + assert_array_equal(expected_obj, a_view['obj']) + + # Make sure that void types without fields are kept unchanged + a = np.empty((4,), dtype='V4') + a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize)) + assert_equal(a.dtype, a_view.dtype) + + # Make sure that the only type that could fail is properly handled + dt = np.dtype({'names': [''], 'formats': ['V4']}) + a = np.empty((4,), dtype=dt) + a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize)) + assert_equal(a.dtype, a_view.dtype) + + # Custom dtypes should not be lost (gh-9161) + r = [rational(i) for i in range(4)] + a = np.array(r, dtype=rational) + a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize)) + assert_equal(a.dtype, a_view.dtype) + assert_array_equal([r] * 3, a_view) + +def as_strided_writeable(): + arr = np.ones(10) + view = as_strided(arr, writeable=False) + assert_(not view.flags.writeable) + + # Check that writeable also is fine: + view = as_strided(arr, writeable=True) + assert_(view.flags.writeable) + view[...] = 3 + assert_array_equal(arr, np.full_like(arr, 3)) + + # Test that things do not break down for readonly: + arr.flags.writeable = False + view = as_strided(arr, writeable=False) + view = as_strided(arr, writeable=True) + assert_(not view.flags.writeable) + + +class VerySimpleSubClass(np.ndarray): + def __new__(cls, *args, **kwargs): + kwargs['subok'] = True + return np.array(*args, **kwargs).view(cls) + + +class SimpleSubClass(VerySimpleSubClass): + def __new__(cls, *args, **kwargs): + kwargs['subok'] = True + self = np.array(*args, **kwargs).view(cls) + self.info = 'simple' + return self + + def __array_finalize__(self, obj): + self.info = getattr(obj, 'info', '') + ' finalized' + + +def test_subclasses(): + # test that subclass is preserved only if subok=True + a = VerySimpleSubClass([1, 2, 3, 4]) + assert_(type(a) is VerySimpleSubClass) + a_view = as_strided(a, shape=(2,), strides=(2 * a.itemsize,)) + assert_(type(a_view) is np.ndarray) + a_view = as_strided(a, shape=(2,), strides=(2 * a.itemsize,), subok=True) + assert_(type(a_view) is VerySimpleSubClass) + # test that if a subclass has __array_finalize__, it is used + a = SimpleSubClass([1, 2, 3, 4]) + a_view = as_strided(a, shape=(2,), strides=(2 * a.itemsize,), subok=True) + assert_(type(a_view) is SimpleSubClass) + assert_(a_view.info == 'simple finalized') + + # similar tests for broadcast_arrays + b = np.arange(len(a)).reshape(-1, 1) + a_view, b_view = broadcast_arrays(a, b) + assert_(type(a_view) is np.ndarray) + assert_(type(b_view) is np.ndarray) + assert_(a_view.shape == b_view.shape) + a_view, b_view = broadcast_arrays(a, b, subok=True) + assert_(type(a_view) is SimpleSubClass) + assert_(a_view.info == 'simple finalized') + assert_(type(b_view) is np.ndarray) + assert_(a_view.shape == b_view.shape) + + # and for broadcast_to + shape = (2, 4) + a_view = broadcast_to(a, shape) + assert_(type(a_view) is np.ndarray) + assert_(a_view.shape == shape) + a_view = broadcast_to(a, shape, subok=True) + assert_(type(a_view) is SimpleSubClass) + assert_(a_view.info == 'simple finalized') + assert_(a_view.shape == shape) + + +def test_writeable(): + # broadcast_to should return a readonly array + original = np.array([1, 2, 3]) + result = broadcast_to(original, (2, 3)) + assert_equal(result.flags.writeable, False) + assert_raises(ValueError, result.__setitem__, slice(None), 0) + + # but the result of broadcast_arrays needs to be writeable (for now), to + # preserve backwards compatibility + for results in [broadcast_arrays(original), + broadcast_arrays(0, original)]: + for result in results: + assert_equal(result.flags.writeable, True) + # keep readonly input readonly + original.flags.writeable = False + _, result = broadcast_arrays(0, original) + assert_equal(result.flags.writeable, False) + + # regression test for GH6491 + shape = (2,) + strides = [0] + tricky_array = as_strided(np.array(0), shape, strides) + other = np.zeros((1,)) + first, second = broadcast_arrays(tricky_array, other) + assert_(first.shape == second.shape) + + +def test_reference_types(): + input_array = np.array('a', dtype=object) + expected = np.array(['a'] * 3, dtype=object) + actual = broadcast_to(input_array, (3,)) + assert_array_equal(expected, actual) + + actual, _ = broadcast_arrays(input_array, np.ones(3)) + assert_array_equal(expected, actual) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_twodim_base.py b/contrib/python/numpy/py2/numpy/lib/tests/test_twodim_base.py new file mode 100644 index 0000000000..fe1348d286 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_twodim_base.py @@ -0,0 +1,534 @@ +"""Test functions for matrix module + +""" +from __future__ import division, absolute_import, print_function + +from numpy.testing import ( + assert_equal, assert_array_equal, assert_array_max_ulp, + assert_array_almost_equal, assert_raises, assert_ + ) + +from numpy import ( + arange, add, fliplr, flipud, zeros, ones, eye, array, diag, histogram2d, + tri, mask_indices, triu_indices, triu_indices_from, tril_indices, + tril_indices_from, vander, + ) + +import numpy as np + + +from numpy.core.tests.test_overrides import requires_array_function + + +def get_mat(n): + data = arange(n) + data = add.outer(data, data) + return data + + +class TestEye(object): + def test_basic(self): + assert_equal(eye(4), + array([[1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1]])) + + assert_equal(eye(4, dtype='f'), + array([[1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1]], 'f')) + + assert_equal(eye(3) == 1, + eye(3, dtype=bool)) + + def test_diag(self): + assert_equal(eye(4, k=1), + array([[0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1], + [0, 0, 0, 0]])) + + assert_equal(eye(4, k=-1), + array([[0, 0, 0, 0], + [1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0]])) + + def test_2d(self): + assert_equal(eye(4, 3), + array([[1, 0, 0], + [0, 1, 0], + [0, 0, 1], + [0, 0, 0]])) + + assert_equal(eye(3, 4), + array([[1, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0]])) + + def test_diag2d(self): + assert_equal(eye(3, 4, k=2), + array([[0, 0, 1, 0], + [0, 0, 0, 1], + [0, 0, 0, 0]])) + + assert_equal(eye(4, 3, k=-2), + array([[0, 0, 0], + [0, 0, 0], + [1, 0, 0], + [0, 1, 0]])) + + def test_eye_bounds(self): + assert_equal(eye(2, 2, 1), [[0, 1], [0, 0]]) + assert_equal(eye(2, 2, -1), [[0, 0], [1, 0]]) + assert_equal(eye(2, 2, 2), [[0, 0], [0, 0]]) + assert_equal(eye(2, 2, -2), [[0, 0], [0, 0]]) + assert_equal(eye(3, 2, 2), [[0, 0], [0, 0], [0, 0]]) + assert_equal(eye(3, 2, 1), [[0, 1], [0, 0], [0, 0]]) + assert_equal(eye(3, 2, -1), [[0, 0], [1, 0], [0, 1]]) + assert_equal(eye(3, 2, -2), [[0, 0], [0, 0], [1, 0]]) + assert_equal(eye(3, 2, -3), [[0, 0], [0, 0], [0, 0]]) + + def test_strings(self): + assert_equal(eye(2, 2, dtype='S3'), + [[b'1', b''], [b'', b'1']]) + + def test_bool(self): + assert_equal(eye(2, 2, dtype=bool), [[True, False], [False, True]]) + + def test_order(self): + mat_c = eye(4, 3, k=-1) + mat_f = eye(4, 3, k=-1, order='F') + assert_equal(mat_c, mat_f) + assert mat_c.flags.c_contiguous + assert not mat_c.flags.f_contiguous + assert not mat_f.flags.c_contiguous + assert mat_f.flags.f_contiguous + + +class TestDiag(object): + def test_vector(self): + vals = (100 * arange(5)).astype('l') + b = zeros((5, 5)) + for k in range(5): + b[k, k] = vals[k] + assert_equal(diag(vals), b) + b = zeros((7, 7)) + c = b.copy() + for k in range(5): + b[k, k + 2] = vals[k] + c[k + 2, k] = vals[k] + assert_equal(diag(vals, k=2), b) + assert_equal(diag(vals, k=-2), c) + + def test_matrix(self, vals=None): + if vals is None: + vals = (100 * get_mat(5) + 1).astype('l') + b = zeros((5,)) + for k in range(5): + b[k] = vals[k, k] + assert_equal(diag(vals), b) + b = b * 0 + for k in range(3): + b[k] = vals[k, k + 2] + assert_equal(diag(vals, 2), b[:3]) + for k in range(3): + b[k] = vals[k + 2, k] + assert_equal(diag(vals, -2), b[:3]) + + def test_fortran_order(self): + vals = array((100 * get_mat(5) + 1), order='F', dtype='l') + self.test_matrix(vals) + + def test_diag_bounds(self): + A = [[1, 2], [3, 4], [5, 6]] + assert_equal(diag(A, k=2), []) + assert_equal(diag(A, k=1), [2]) + assert_equal(diag(A, k=0), [1, 4]) + assert_equal(diag(A, k=-1), [3, 6]) + assert_equal(diag(A, k=-2), [5]) + assert_equal(diag(A, k=-3), []) + + def test_failure(self): + assert_raises(ValueError, diag, [[[1]]]) + + +class TestFliplr(object): + def test_basic(self): + assert_raises(ValueError, fliplr, ones(4)) + a = get_mat(4) + b = a[:, ::-1] + assert_equal(fliplr(a), b) + a = [[0, 1, 2], + [3, 4, 5]] + b = [[2, 1, 0], + [5, 4, 3]] + assert_equal(fliplr(a), b) + + +class TestFlipud(object): + def test_basic(self): + a = get_mat(4) + b = a[::-1, :] + assert_equal(flipud(a), b) + a = [[0, 1, 2], + [3, 4, 5]] + b = [[3, 4, 5], + [0, 1, 2]] + assert_equal(flipud(a), b) + + +class TestHistogram2d(object): + def test_simple(self): + x = array( + [0.41702200, 0.72032449, 1.1437481e-4, 0.302332573, 0.146755891]) + y = array( + [0.09233859, 0.18626021, 0.34556073, 0.39676747, 0.53881673]) + xedges = np.linspace(0, 1, 10) + yedges = np.linspace(0, 1, 10) + H = histogram2d(x, y, (xedges, yedges))[0] + answer = array( + [[0, 0, 0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [1, 0, 1, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0]]) + assert_array_equal(H.T, answer) + H = histogram2d(x, y, xedges)[0] + assert_array_equal(H.T, answer) + H, xedges, yedges = histogram2d(list(range(10)), list(range(10))) + assert_array_equal(H, eye(10, 10)) + assert_array_equal(xedges, np.linspace(0, 9, 11)) + assert_array_equal(yedges, np.linspace(0, 9, 11)) + + def test_asym(self): + x = array([1, 1, 2, 3, 4, 4, 4, 5]) + y = array([1, 3, 2, 0, 1, 2, 3, 4]) + H, xed, yed = histogram2d( + x, y, (6, 5), range=[[0, 6], [0, 5]], density=True) + answer = array( + [[0., 0, 0, 0, 0], + [0, 1, 0, 1, 0], + [0, 0, 1, 0, 0], + [1, 0, 0, 0, 0], + [0, 1, 1, 1, 0], + [0, 0, 0, 0, 1]]) + assert_array_almost_equal(H, answer/8., 3) + assert_array_equal(xed, np.linspace(0, 6, 7)) + assert_array_equal(yed, np.linspace(0, 5, 6)) + + def test_density(self): + x = array([1, 2, 3, 1, 2, 3, 1, 2, 3]) + y = array([1, 1, 1, 2, 2, 2, 3, 3, 3]) + H, xed, yed = histogram2d( + x, y, [[1, 2, 3, 5], [1, 2, 3, 5]], density=True) + answer = array([[1, 1, .5], + [1, 1, .5], + [.5, .5, .25]])/9. + assert_array_almost_equal(H, answer, 3) + + def test_all_outliers(self): + r = np.random.rand(100) + 1. + 1e6 # histogramdd rounds by decimal=6 + H, xed, yed = histogram2d(r, r, (4, 5), range=([0, 1], [0, 1])) + assert_array_equal(H, 0) + + def test_empty(self): + a, edge1, edge2 = histogram2d([], [], bins=([0, 1], [0, 1])) + assert_array_max_ulp(a, array([[0.]])) + + a, edge1, edge2 = histogram2d([], [], bins=4) + assert_array_max_ulp(a, np.zeros((4, 4))) + + def test_binparameter_combination(self): + x = array( + [0, 0.09207008, 0.64575234, 0.12875982, 0.47390599, + 0.59944483, 1]) + y = array( + [0, 0.14344267, 0.48988575, 0.30558665, 0.44700682, + 0.15886423, 1]) + edges = (0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1) + H, xe, ye = histogram2d(x, y, (edges, 4)) + answer = array( + [[2., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 0.], + [0., 0., 0., 0.], + [0., 1., 0., 0.], + [1., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 0.], + [0., 0., 0., 0.], + [0., 0., 0., 1.]]) + assert_array_equal(H, answer) + assert_array_equal(ye, array([0., 0.25, 0.5, 0.75, 1])) + H, xe, ye = histogram2d(x, y, (4, edges)) + answer = array( + [[1., 1., 0., 1., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.], + [0., 1., 0., 0., 1., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]) + assert_array_equal(H, answer) + assert_array_equal(xe, array([0., 0.25, 0.5, 0.75, 1])) + + @requires_array_function + def test_dispatch(self): + class ShouldDispatch(object): + def __array_function__(self, function, types, args, kwargs): + return types, args, kwargs + + xy = [1, 2] + s_d = ShouldDispatch() + r = histogram2d(s_d, xy) + # Cannot use assert_equal since that dispatches... + assert_(r == ((ShouldDispatch,), (s_d, xy), {})) + r = histogram2d(xy, s_d) + assert_(r == ((ShouldDispatch,), (xy, s_d), {})) + r = histogram2d(xy, xy, bins=s_d) + assert_(r, ((ShouldDispatch,), (xy, xy), dict(bins=s_d))) + r = histogram2d(xy, xy, bins=[s_d, 5]) + assert_(r, ((ShouldDispatch,), (xy, xy), dict(bins=[s_d, 5]))) + assert_raises(Exception, histogram2d, xy, xy, bins=[s_d]) + r = histogram2d(xy, xy, weights=s_d) + assert_(r, ((ShouldDispatch,), (xy, xy), dict(weights=s_d))) + + +class TestTri(object): + def test_dtype(self): + out = array([[1, 0, 0], + [1, 1, 0], + [1, 1, 1]]) + assert_array_equal(tri(3), out) + assert_array_equal(tri(3, dtype=bool), out.astype(bool)) + + +def test_tril_triu_ndim2(): + for dtype in np.typecodes['AllFloat'] + np.typecodes['AllInteger']: + a = np.ones((2, 2), dtype=dtype) + b = np.tril(a) + c = np.triu(a) + assert_array_equal(b, [[1, 0], [1, 1]]) + assert_array_equal(c, b.T) + # should return the same dtype as the original array + assert_equal(b.dtype, a.dtype) + assert_equal(c.dtype, a.dtype) + + +def test_tril_triu_ndim3(): + for dtype in np.typecodes['AllFloat'] + np.typecodes['AllInteger']: + a = np.array([ + [[1, 1], [1, 1]], + [[1, 1], [1, 0]], + [[1, 1], [0, 0]], + ], dtype=dtype) + a_tril_desired = np.array([ + [[1, 0], [1, 1]], + [[1, 0], [1, 0]], + [[1, 0], [0, 0]], + ], dtype=dtype) + a_triu_desired = np.array([ + [[1, 1], [0, 1]], + [[1, 1], [0, 0]], + [[1, 1], [0, 0]], + ], dtype=dtype) + a_triu_observed = np.triu(a) + a_tril_observed = np.tril(a) + assert_array_equal(a_triu_observed, a_triu_desired) + assert_array_equal(a_tril_observed, a_tril_desired) + assert_equal(a_triu_observed.dtype, a.dtype) + assert_equal(a_tril_observed.dtype, a.dtype) + + +def test_tril_triu_with_inf(): + # Issue 4859 + arr = np.array([[1, 1, np.inf], + [1, 1, 1], + [np.inf, 1, 1]]) + out_tril = np.array([[1, 0, 0], + [1, 1, 0], + [np.inf, 1, 1]]) + out_triu = out_tril.T + assert_array_equal(np.triu(arr), out_triu) + assert_array_equal(np.tril(arr), out_tril) + + +def test_tril_triu_dtype(): + # Issue 4916 + # tril and triu should return the same dtype as input + for c in np.typecodes['All']: + if c == 'V': + continue + arr = np.zeros((3, 3), dtype=c) + assert_equal(np.triu(arr).dtype, arr.dtype) + assert_equal(np.tril(arr).dtype, arr.dtype) + + # check special cases + arr = np.array([['2001-01-01T12:00', '2002-02-03T13:56'], + ['2004-01-01T12:00', '2003-01-03T13:45']], + dtype='datetime64') + assert_equal(np.triu(arr).dtype, arr.dtype) + assert_equal(np.tril(arr).dtype, arr.dtype) + + arr = np.zeros((3,3), dtype='f4,f4') + assert_equal(np.triu(arr).dtype, arr.dtype) + assert_equal(np.tril(arr).dtype, arr.dtype) + + +def test_mask_indices(): + # simple test without offset + iu = mask_indices(3, np.triu) + a = np.arange(9).reshape(3, 3) + assert_array_equal(a[iu], array([0, 1, 2, 4, 5, 8])) + # Now with an offset + iu1 = mask_indices(3, np.triu, 1) + assert_array_equal(a[iu1], array([1, 2, 5])) + + +def test_tril_indices(): + # indices without and with offset + il1 = tril_indices(4) + il2 = tril_indices(4, k=2) + il3 = tril_indices(4, m=5) + il4 = tril_indices(4, k=2, m=5) + + a = np.array([[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12], + [13, 14, 15, 16]]) + b = np.arange(1, 21).reshape(4, 5) + + # indexing: + assert_array_equal(a[il1], + array([1, 5, 6, 9, 10, 11, 13, 14, 15, 16])) + assert_array_equal(b[il3], + array([1, 6, 7, 11, 12, 13, 16, 17, 18, 19])) + + # And for assigning values: + a[il1] = -1 + assert_array_equal(a, + array([[-1, 2, 3, 4], + [-1, -1, 7, 8], + [-1, -1, -1, 12], + [-1, -1, -1, -1]])) + b[il3] = -1 + assert_array_equal(b, + array([[-1, 2, 3, 4, 5], + [-1, -1, 8, 9, 10], + [-1, -1, -1, 14, 15], + [-1, -1, -1, -1, 20]])) + # These cover almost the whole array (two diagonals right of the main one): + a[il2] = -10 + assert_array_equal(a, + array([[-10, -10, -10, 4], + [-10, -10, -10, -10], + [-10, -10, -10, -10], + [-10, -10, -10, -10]])) + b[il4] = -10 + assert_array_equal(b, + array([[-10, -10, -10, 4, 5], + [-10, -10, -10, -10, 10], + [-10, -10, -10, -10, -10], + [-10, -10, -10, -10, -10]])) + + +class TestTriuIndices(object): + def test_triu_indices(self): + iu1 = triu_indices(4) + iu2 = triu_indices(4, k=2) + iu3 = triu_indices(4, m=5) + iu4 = triu_indices(4, k=2, m=5) + + a = np.array([[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12], + [13, 14, 15, 16]]) + b = np.arange(1, 21).reshape(4, 5) + + # Both for indexing: + assert_array_equal(a[iu1], + array([1, 2, 3, 4, 6, 7, 8, 11, 12, 16])) + assert_array_equal(b[iu3], + array([1, 2, 3, 4, 5, 7, 8, 9, + 10, 13, 14, 15, 19, 20])) + + # And for assigning values: + a[iu1] = -1 + assert_array_equal(a, + array([[-1, -1, -1, -1], + [5, -1, -1, -1], + [9, 10, -1, -1], + [13, 14, 15, -1]])) + b[iu3] = -1 + assert_array_equal(b, + array([[-1, -1, -1, -1, -1], + [6, -1, -1, -1, -1], + [11, 12, -1, -1, -1], + [16, 17, 18, -1, -1]])) + + # These cover almost the whole array (two diagonals right of the + # main one): + a[iu2] = -10 + assert_array_equal(a, + array([[-1, -1, -10, -10], + [5, -1, -1, -10], + [9, 10, -1, -1], + [13, 14, 15, -1]])) + b[iu4] = -10 + assert_array_equal(b, + array([[-1, -1, -10, -10, -10], + [6, -1, -1, -10, -10], + [11, 12, -1, -1, -10], + [16, 17, 18, -1, -1]])) + + +class TestTrilIndicesFrom(object): + def test_exceptions(self): + assert_raises(ValueError, tril_indices_from, np.ones((2,))) + assert_raises(ValueError, tril_indices_from, np.ones((2, 2, 2))) + # assert_raises(ValueError, tril_indices_from, np.ones((2, 3))) + + +class TestTriuIndicesFrom(object): + def test_exceptions(self): + assert_raises(ValueError, triu_indices_from, np.ones((2,))) + assert_raises(ValueError, triu_indices_from, np.ones((2, 2, 2))) + # assert_raises(ValueError, triu_indices_from, np.ones((2, 3))) + + +class TestVander(object): + def test_basic(self): + c = np.array([0, 1, -2, 3]) + v = vander(c) + powers = np.array([[0, 0, 0, 0, 1], + [1, 1, 1, 1, 1], + [16, -8, 4, -2, 1], + [81, 27, 9, 3, 1]]) + # Check default value of N: + assert_array_equal(v, powers[:, 1:]) + # Check a range of N values, including 0 and 5 (greater than default) + m = powers.shape[1] + for n in range(6): + v = vander(c, N=n) + assert_array_equal(v, powers[:, m-n:m]) + + def test_dtypes(self): + c = array([11, -12, 13], dtype=np.int8) + v = vander(c) + expected = np.array([[121, 11, 1], + [144, -12, 1], + [169, 13, 1]]) + assert_array_equal(v, expected) + + c = array([1.0+1j, 1.0-1j]) + v = vander(c, N=3) + expected = np.array([[2j, 1+1j, 1], + [-2j, 1-1j, 1]]) + # The data is floating point, but the values are small integers, + # so assert_array_equal *should* be safe here (rather than, say, + # assert_array_almost_equal). + assert_array_equal(v, expected) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_type_check.py b/contrib/python/numpy/py2/numpy/lib/tests/test_type_check.py new file mode 100644 index 0000000000..2982ca31a3 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_type_check.py @@ -0,0 +1,442 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.compat import long +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_raises + ) +from numpy.lib.type_check import ( + common_type, mintypecode, isreal, iscomplex, isposinf, isneginf, + nan_to_num, isrealobj, iscomplexobj, asfarray, real_if_close + ) + + +def assert_all(x): + assert_(np.all(x), x) + + +class TestCommonType(object): + def test_basic(self): + ai32 = np.array([[1, 2], [3, 4]], dtype=np.int32) + af16 = np.array([[1, 2], [3, 4]], dtype=np.float16) + af32 = np.array([[1, 2], [3, 4]], dtype=np.float32) + af64 = np.array([[1, 2], [3, 4]], dtype=np.float64) + acs = np.array([[1+5j, 2+6j], [3+7j, 4+8j]], dtype=np.csingle) + acd = np.array([[1+5j, 2+6j], [3+7j, 4+8j]], dtype=np.cdouble) + assert_(common_type(ai32) == np.float64) + assert_(common_type(af16) == np.float16) + assert_(common_type(af32) == np.float32) + assert_(common_type(af64) == np.float64) + assert_(common_type(acs) == np.csingle) + assert_(common_type(acd) == np.cdouble) + + +class TestMintypecode(object): + + def test_default_1(self): + for itype in '1bcsuwil': + assert_equal(mintypecode(itype), 'd') + assert_equal(mintypecode('f'), 'f') + assert_equal(mintypecode('d'), 'd') + assert_equal(mintypecode('F'), 'F') + assert_equal(mintypecode('D'), 'D') + + def test_default_2(self): + for itype in '1bcsuwil': + assert_equal(mintypecode(itype+'f'), 'f') + assert_equal(mintypecode(itype+'d'), 'd') + assert_equal(mintypecode(itype+'F'), 'F') + assert_equal(mintypecode(itype+'D'), 'D') + assert_equal(mintypecode('ff'), 'f') + assert_equal(mintypecode('fd'), 'd') + assert_equal(mintypecode('fF'), 'F') + assert_equal(mintypecode('fD'), 'D') + assert_equal(mintypecode('df'), 'd') + assert_equal(mintypecode('dd'), 'd') + #assert_equal(mintypecode('dF',savespace=1),'F') + assert_equal(mintypecode('dF'), 'D') + assert_equal(mintypecode('dD'), 'D') + assert_equal(mintypecode('Ff'), 'F') + #assert_equal(mintypecode('Fd',savespace=1),'F') + assert_equal(mintypecode('Fd'), 'D') + assert_equal(mintypecode('FF'), 'F') + assert_equal(mintypecode('FD'), 'D') + assert_equal(mintypecode('Df'), 'D') + assert_equal(mintypecode('Dd'), 'D') + assert_equal(mintypecode('DF'), 'D') + assert_equal(mintypecode('DD'), 'D') + + def test_default_3(self): + assert_equal(mintypecode('fdF'), 'D') + #assert_equal(mintypecode('fdF',savespace=1),'F') + assert_equal(mintypecode('fdD'), 'D') + assert_equal(mintypecode('fFD'), 'D') + assert_equal(mintypecode('dFD'), 'D') + + assert_equal(mintypecode('ifd'), 'd') + assert_equal(mintypecode('ifF'), 'F') + assert_equal(mintypecode('ifD'), 'D') + assert_equal(mintypecode('idF'), 'D') + #assert_equal(mintypecode('idF',savespace=1),'F') + assert_equal(mintypecode('idD'), 'D') + + +class TestIsscalar(object): + + def test_basic(self): + assert_(np.isscalar(3)) + assert_(not np.isscalar([3])) + assert_(not np.isscalar((3,))) + assert_(np.isscalar(3j)) + assert_(np.isscalar(long(10))) + assert_(np.isscalar(4.0)) + + +class TestReal(object): + + def test_real(self): + y = np.random.rand(10,) + assert_array_equal(y, np.real(y)) + + y = np.array(1) + out = np.real(y) + assert_array_equal(y, out) + assert_(isinstance(out, np.ndarray)) + + y = 1 + out = np.real(y) + assert_equal(y, out) + assert_(not isinstance(out, np.ndarray)) + + def test_cmplx(self): + y = np.random.rand(10,)+1j*np.random.rand(10,) + assert_array_equal(y.real, np.real(y)) + + y = np.array(1 + 1j) + out = np.real(y) + assert_array_equal(y.real, out) + assert_(isinstance(out, np.ndarray)) + + y = 1 + 1j + out = np.real(y) + assert_equal(1.0, out) + assert_(not isinstance(out, np.ndarray)) + + +class TestImag(object): + + def test_real(self): + y = np.random.rand(10,) + assert_array_equal(0, np.imag(y)) + + y = np.array(1) + out = np.imag(y) + assert_array_equal(0, out) + assert_(isinstance(out, np.ndarray)) + + y = 1 + out = np.imag(y) + assert_equal(0, out) + assert_(not isinstance(out, np.ndarray)) + + def test_cmplx(self): + y = np.random.rand(10,)+1j*np.random.rand(10,) + assert_array_equal(y.imag, np.imag(y)) + + y = np.array(1 + 1j) + out = np.imag(y) + assert_array_equal(y.imag, out) + assert_(isinstance(out, np.ndarray)) + + y = 1 + 1j + out = np.imag(y) + assert_equal(1.0, out) + assert_(not isinstance(out, np.ndarray)) + + +class TestIscomplex(object): + + def test_fail(self): + z = np.array([-1, 0, 1]) + res = iscomplex(z) + assert_(not np.sometrue(res, axis=0)) + + def test_pass(self): + z = np.array([-1j, 1, 0]) + res = iscomplex(z) + assert_array_equal(res, [1, 0, 0]) + + +class TestIsreal(object): + + def test_pass(self): + z = np.array([-1, 0, 1j]) + res = isreal(z) + assert_array_equal(res, [1, 1, 0]) + + def test_fail(self): + z = np.array([-1j, 1, 0]) + res = isreal(z) + assert_array_equal(res, [0, 1, 1]) + + +class TestIscomplexobj(object): + + def test_basic(self): + z = np.array([-1, 0, 1]) + assert_(not iscomplexobj(z)) + z = np.array([-1j, 0, -1]) + assert_(iscomplexobj(z)) + + def test_scalar(self): + assert_(not iscomplexobj(1.0)) + assert_(iscomplexobj(1+0j)) + + def test_list(self): + assert_(iscomplexobj([3, 1+0j, True])) + assert_(not iscomplexobj([3, 1, True])) + + def test_duck(self): + class DummyComplexArray: + @property + def dtype(self): + return np.dtype(complex) + dummy = DummyComplexArray() + assert_(iscomplexobj(dummy)) + + def test_pandas_duck(self): + # This tests a custom np.dtype duck-typed class, such as used by pandas + # (pandas.core.dtypes) + class PdComplex(np.complex128): + pass + class PdDtype(object): + name = 'category' + names = None + type = PdComplex + kind = 'c' + str = '<c16' + base = np.dtype('complex128') + class DummyPd: + @property + def dtype(self): + return PdDtype + dummy = DummyPd() + assert_(iscomplexobj(dummy)) + + def test_custom_dtype_duck(self): + class MyArray(list): + @property + def dtype(self): + return complex + + a = MyArray([1+0j, 2+0j, 3+0j]) + assert_(iscomplexobj(a)) + + +class TestIsrealobj(object): + def test_basic(self): + z = np.array([-1, 0, 1]) + assert_(isrealobj(z)) + z = np.array([-1j, 0, -1]) + assert_(not isrealobj(z)) + + +class TestIsnan(object): + + def test_goodvalues(self): + z = np.array((-1., 0., 1.)) + res = np.isnan(z) == 0 + assert_all(np.all(res, axis=0)) + + def test_posinf(self): + with np.errstate(divide='ignore'): + assert_all(np.isnan(np.array((1.,))/0.) == 0) + + def test_neginf(self): + with np.errstate(divide='ignore'): + assert_all(np.isnan(np.array((-1.,))/0.) == 0) + + def test_ind(self): + with np.errstate(divide='ignore', invalid='ignore'): + assert_all(np.isnan(np.array((0.,))/0.) == 1) + + def test_integer(self): + assert_all(np.isnan(1) == 0) + + def test_complex(self): + assert_all(np.isnan(1+1j) == 0) + + def test_complex1(self): + with np.errstate(divide='ignore', invalid='ignore'): + assert_all(np.isnan(np.array(0+0j)/0.) == 1) + + +class TestIsfinite(object): + # Fixme, wrong place, isfinite now ufunc + + def test_goodvalues(self): + z = np.array((-1., 0., 1.)) + res = np.isfinite(z) == 1 + assert_all(np.all(res, axis=0)) + + def test_posinf(self): + with np.errstate(divide='ignore', invalid='ignore'): + assert_all(np.isfinite(np.array((1.,))/0.) == 0) + + def test_neginf(self): + with np.errstate(divide='ignore', invalid='ignore'): + assert_all(np.isfinite(np.array((-1.,))/0.) == 0) + + def test_ind(self): + with np.errstate(divide='ignore', invalid='ignore'): + assert_all(np.isfinite(np.array((0.,))/0.) == 0) + + def test_integer(self): + assert_all(np.isfinite(1) == 1) + + def test_complex(self): + assert_all(np.isfinite(1+1j) == 1) + + def test_complex1(self): + with np.errstate(divide='ignore', invalid='ignore'): + assert_all(np.isfinite(np.array(1+1j)/0.) == 0) + + +class TestIsinf(object): + # Fixme, wrong place, isinf now ufunc + + def test_goodvalues(self): + z = np.array((-1., 0., 1.)) + res = np.isinf(z) == 0 + assert_all(np.all(res, axis=0)) + + def test_posinf(self): + with np.errstate(divide='ignore', invalid='ignore'): + assert_all(np.isinf(np.array((1.,))/0.) == 1) + + def test_posinf_scalar(self): + with np.errstate(divide='ignore', invalid='ignore'): + assert_all(np.isinf(np.array(1.,)/0.) == 1) + + def test_neginf(self): + with np.errstate(divide='ignore', invalid='ignore'): + assert_all(np.isinf(np.array((-1.,))/0.) == 1) + + def test_neginf_scalar(self): + with np.errstate(divide='ignore', invalid='ignore'): + assert_all(np.isinf(np.array(-1.)/0.) == 1) + + def test_ind(self): + with np.errstate(divide='ignore', invalid='ignore'): + assert_all(np.isinf(np.array((0.,))/0.) == 0) + + +class TestIsposinf(object): + + def test_generic(self): + with np.errstate(divide='ignore', invalid='ignore'): + vals = isposinf(np.array((-1., 0, 1))/0.) + assert_(vals[0] == 0) + assert_(vals[1] == 0) + assert_(vals[2] == 1) + + +class TestIsneginf(object): + + def test_generic(self): + with np.errstate(divide='ignore', invalid='ignore'): + vals = isneginf(np.array((-1., 0, 1))/0.) + assert_(vals[0] == 1) + assert_(vals[1] == 0) + assert_(vals[2] == 0) + + +class TestNanToNum(object): + + def test_generic(self): + with np.errstate(divide='ignore', invalid='ignore'): + vals = nan_to_num(np.array((-1., 0, 1))/0.) + assert_all(vals[0] < -1e10) and assert_all(np.isfinite(vals[0])) + assert_(vals[1] == 0) + assert_all(vals[2] > 1e10) and assert_all(np.isfinite(vals[2])) + assert_equal(type(vals), np.ndarray) + + # perform the same test but in-place + with np.errstate(divide='ignore', invalid='ignore'): + vals = np.array((-1., 0, 1))/0. + result = nan_to_num(vals, copy=False) + + assert_(result is vals) + assert_all(vals[0] < -1e10) and assert_all(np.isfinite(vals[0])) + assert_(vals[1] == 0) + assert_all(vals[2] > 1e10) and assert_all(np.isfinite(vals[2])) + assert_equal(type(vals), np.ndarray) + + def test_array(self): + vals = nan_to_num([1]) + assert_array_equal(vals, np.array([1], int)) + assert_equal(type(vals), np.ndarray) + + def test_integer(self): + vals = nan_to_num(1) + assert_all(vals == 1) + assert_equal(type(vals), np.int_) + + def test_float(self): + vals = nan_to_num(1.0) + assert_all(vals == 1.0) + assert_equal(type(vals), np.float_) + + def test_complex_good(self): + vals = nan_to_num(1+1j) + assert_all(vals == 1+1j) + assert_equal(type(vals), np.complex_) + + def test_complex_bad(self): + with np.errstate(divide='ignore', invalid='ignore'): + v = 1 + 1j + v += np.array(0+1.j)/0. + vals = nan_to_num(v) + # !! This is actually (unexpectedly) zero + assert_all(np.isfinite(vals)) + assert_equal(type(vals), np.complex_) + + def test_complex_bad2(self): + with np.errstate(divide='ignore', invalid='ignore'): + v = 1 + 1j + v += np.array(-1+1.j)/0. + vals = nan_to_num(v) + assert_all(np.isfinite(vals)) + assert_equal(type(vals), np.complex_) + # Fixme + #assert_all(vals.imag > 1e10) and assert_all(np.isfinite(vals)) + # !! This is actually (unexpectedly) positive + # !! inf. Comment out for now, and see if it + # !! changes + #assert_all(vals.real < -1e10) and assert_all(np.isfinite(vals)) + + +class TestRealIfClose(object): + + def test_basic(self): + a = np.random.rand(10) + b = real_if_close(a+1e-15j) + assert_all(isrealobj(b)) + assert_array_equal(a, b) + b = real_if_close(a+1e-7j) + assert_all(iscomplexobj(b)) + b = real_if_close(a+1e-7j, tol=1e-6) + assert_all(isrealobj(b)) + + +class TestArrayConversion(object): + + def test_asfarray(self): + a = asfarray(np.array([1, 2, 3])) + assert_equal(a.__class__, np.ndarray) + assert_(np.issubdtype(a.dtype, np.floating)) + + # previously this would infer dtypes from arrays, unlike every single + # other numpy function + assert_raises(TypeError, + asfarray, np.array([1, 2, 3]), dtype=np.array(1.0)) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_ufunclike.py b/contrib/python/numpy/py2/numpy/lib/tests/test_ufunclike.py new file mode 100644 index 0000000000..0f06876a1b --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_ufunclike.py @@ -0,0 +1,106 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +import numpy.core as nx +import numpy.lib.ufunclike as ufl +from numpy.testing import ( + assert_, assert_equal, assert_array_equal, assert_warns, assert_raises +) + + +class TestUfunclike(object): + + def test_isposinf(self): + a = nx.array([nx.inf, -nx.inf, nx.nan, 0.0, 3.0, -3.0]) + out = nx.zeros(a.shape, bool) + tgt = nx.array([True, False, False, False, False, False]) + + res = ufl.isposinf(a) + assert_equal(res, tgt) + res = ufl.isposinf(a, out) + assert_equal(res, tgt) + assert_equal(out, tgt) + + a = a.astype(np.complex) + with assert_raises(TypeError): + ufl.isposinf(a) + + def test_isneginf(self): + a = nx.array([nx.inf, -nx.inf, nx.nan, 0.0, 3.0, -3.0]) + out = nx.zeros(a.shape, bool) + tgt = nx.array([False, True, False, False, False, False]) + + res = ufl.isneginf(a) + assert_equal(res, tgt) + res = ufl.isneginf(a, out) + assert_equal(res, tgt) + assert_equal(out, tgt) + + a = a.astype(np.complex) + with assert_raises(TypeError): + ufl.isneginf(a) + + def test_fix(self): + a = nx.array([[1.0, 1.1, 1.5, 1.8], [-1.0, -1.1, -1.5, -1.8]]) + out = nx.zeros(a.shape, float) + tgt = nx.array([[1., 1., 1., 1.], [-1., -1., -1., -1.]]) + + res = ufl.fix(a) + assert_equal(res, tgt) + res = ufl.fix(a, out) + assert_equal(res, tgt) + assert_equal(out, tgt) + assert_equal(ufl.fix(3.14), 3) + + def test_fix_with_subclass(self): + class MyArray(nx.ndarray): + def __new__(cls, data, metadata=None): + res = nx.array(data, copy=True).view(cls) + res.metadata = metadata + return res + + def __array_wrap__(self, obj, context=None): + if isinstance(obj, MyArray): + obj.metadata = self.metadata + return obj + + def __array_finalize__(self, obj): + self.metadata = getattr(obj, 'metadata', None) + return self + + a = nx.array([1.1, -1.1]) + m = MyArray(a, metadata='foo') + f = ufl.fix(m) + assert_array_equal(f, nx.array([1, -1])) + assert_(isinstance(f, MyArray)) + assert_equal(f.metadata, 'foo') + + # check 0d arrays don't decay to scalars + m0d = m[0,...] + m0d.metadata = 'bar' + f0d = ufl.fix(m0d) + assert_(isinstance(f0d, MyArray)) + assert_equal(f0d.metadata, 'bar') + + def test_deprecated(self): + # NumPy 1.13.0, 2017-04-26 + assert_warns(DeprecationWarning, ufl.fix, [1, 2], y=nx.empty(2)) + assert_warns(DeprecationWarning, ufl.isposinf, [1, 2], y=nx.empty(2)) + assert_warns(DeprecationWarning, ufl.isneginf, [1, 2], y=nx.empty(2)) + + def test_scalar(self): + x = np.inf + actual = np.isposinf(x) + expected = np.True_ + assert_equal(actual, expected) + assert_equal(type(actual), type(expected)) + + x = -3.4 + actual = np.fix(x) + expected = np.float64(-3.0) + assert_equal(actual, expected) + assert_equal(type(actual), type(expected)) + + out = np.array(0.0) + actual = np.fix(x, out=out) + assert_(actual is out) diff --git a/contrib/python/numpy/py2/numpy/lib/tests/test_utils.py b/contrib/python/numpy/py2/numpy/lib/tests/test_utils.py new file mode 100644 index 0000000000..2723f34407 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/lib/tests/test_utils.py @@ -0,0 +1,91 @@ +from __future__ import division, absolute_import, print_function + +import sys +import pytest + +from numpy.core import arange +from numpy.testing import assert_, assert_equal, assert_raises_regex +from numpy.lib import deprecate +import numpy.lib.utils as utils + +if sys.version_info[0] >= 3: + from io import StringIO +else: + from StringIO import StringIO + + +@pytest.mark.skipif(sys.flags.optimize == 2, reason="Python running -OO") +def test_lookfor(): + out = StringIO() + utils.lookfor('eigenvalue', module='numpy', output=out, + import_modules=False) + out = out.getvalue() + assert_('numpy.linalg.eig' in out) + + +@deprecate +def old_func(self, x): + return x + + +@deprecate(message="Rather use new_func2") +def old_func2(self, x): + return x + + +def old_func3(self, x): + return x +new_func3 = deprecate(old_func3, old_name="old_func3", new_name="new_func3") + + +def test_deprecate_decorator(): + assert_('deprecated' in old_func.__doc__) + + +def test_deprecate_decorator_message(): + assert_('Rather use new_func2' in old_func2.__doc__) + + +def test_deprecate_fn(): + assert_('old_func3' in new_func3.__doc__) + assert_('new_func3' in new_func3.__doc__) + + +def test_safe_eval_nameconstant(): + # Test if safe_eval supports Python 3.4 _ast.NameConstant + utils.safe_eval('None') + + +class TestByteBounds(object): + + def test_byte_bounds(self): + # pointer difference matches size * itemsize + # due to contiguity + a = arange(12).reshape(3, 4) + low, high = utils.byte_bounds(a) + assert_equal(high - low, a.size * a.itemsize) + + def test_unusual_order_positive_stride(self): + a = arange(12).reshape(3, 4) + b = a.T + low, high = utils.byte_bounds(b) + assert_equal(high - low, b.size * b.itemsize) + + def test_unusual_order_negative_stride(self): + a = arange(12).reshape(3, 4) + b = a.T[::-1] + low, high = utils.byte_bounds(b) + assert_equal(high - low, b.size * b.itemsize) + + def test_strided(self): + a = arange(12) + b = a[::2] + low, high = utils.byte_bounds(b) + # the largest pointer address is lost (even numbers only in the + # stride), and compensate addresses for striding by 2 + assert_equal(high - low, b.size * 2 * b.itemsize - b.itemsize) + + +def test_assert_raises_regex_context_manager(): + with assert_raises_regex(ValueError, 'no deprecation warning'): + raise ValueError('no deprecation warning') diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/clapack_scrub.py b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/clapack_scrub.py new file mode 100644 index 0000000000..e72a39e64e --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/clapack_scrub.py @@ -0,0 +1,310 @@ +#!/usr/bin/env python +from __future__ import division, absolute_import, print_function + +import sys, os +import re +from plex import Scanner, Str, Lexicon, Opt, Bol, State, AnyChar, TEXT, IGNORE +from plex.traditional import re as Re + +PY2 = sys.version_info < (3, 0) + +if PY2: + from io import BytesIO as UStringIO +else: + from io import StringIO as UStringIO + +class MyScanner(Scanner): + def __init__(self, info, name='<default>'): + Scanner.__init__(self, self.lexicon, info, name) + + def begin(self, state_name): + Scanner.begin(self, state_name) + +def sep_seq(sequence, sep): + pat = Str(sequence[0]) + for s in sequence[1:]: + pat += sep + Str(s) + return pat + +def runScanner(data, scanner_class, lexicon=None): + info = UStringIO(data) + outfo = UStringIO() + if lexicon is not None: + scanner = scanner_class(lexicon, info) + else: + scanner = scanner_class(info) + while True: + value, text = scanner.read() + if value is None: + break + elif value is IGNORE: + pass + else: + outfo.write(value) + return outfo.getvalue(), scanner + +class LenSubsScanner(MyScanner): + """Following clapack, we remove ftnlen arguments, which f2c puts after + a char * argument to hold the length of the passed string. This is just + a nuisance in C. + """ + def __init__(self, info, name='<ftnlen>'): + MyScanner.__init__(self, info, name) + self.paren_count = 0 + + def beginArgs(self, text): + if self.paren_count == 0: + self.begin('args') + self.paren_count += 1 + return text + + def endArgs(self, text): + self.paren_count -= 1 + if self.paren_count == 0: + self.begin('') + return text + + digits = Re('[0-9]+') + iofun = Re(r'\([^;]*;') + decl = Re(r'\([^)]*\)[,;'+'\n]') + any = Re('[.]*') + S = Re('[ \t\n]*') + cS = Str(',') + S + len_ = Re('[a-z][a-z0-9]*_len') + + iofunctions = Str("s_cat", "s_copy", "s_stop", "s_cmp", + "i_len", "do_fio", "do_lio") + iofun + + # Routines to not scrub the ftnlen argument from + keep_ftnlen = (Str('ilaenv_') | Str('iparmq_') | Str('s_rnge')) + Str('(') + + lexicon = Lexicon([ + (iofunctions, TEXT), + (keep_ftnlen, beginArgs), + State('args', [ + (Str(')'), endArgs), + (Str('('), beginArgs), + (AnyChar, TEXT), + ]), + (cS+Re(r'[1-9][0-9]*L'), IGNORE), + (cS+Str('ftnlen')+Opt(S+len_), IGNORE), + (cS+sep_seq(['(', 'ftnlen', ')'], S)+S+digits, IGNORE), + (Bol+Str('ftnlen ')+len_+Str(';\n'), IGNORE), + (cS+len_, TEXT), + (AnyChar, TEXT), + ]) + +def scrubFtnlen(source): + return runScanner(source, LenSubsScanner)[0] + +def cleanSource(source): + # remove whitespace at end of lines + source = re.sub(r'[\t ]+\n', '\n', source) + # remove comments like .. Scalar Arguments .. + source = re.sub(r'(?m)^[\t ]*/\* *\.\. .*?\n', '', source) + # collapse blanks of more than two in-a-row to two + source = re.sub(r'\n\n\n\n+', r'\n\n\n', source) + return source + +class LineQueue(object): + def __init__(self): + object.__init__(self) + self._queue = [] + + def add(self, line): + self._queue.append(line) + + def clear(self): + self._queue = [] + + def flushTo(self, other_queue): + for line in self._queue: + other_queue.add(line) + self.clear() + + def getValue(self): + q = LineQueue() + self.flushTo(q) + s = ''.join(q._queue) + self.clear() + return s + +class CommentQueue(LineQueue): + def __init__(self): + LineQueue.__init__(self) + + def add(self, line): + if line.strip() == '': + LineQueue.add(self, '\n') + else: + line = ' ' + line[2:-3].rstrip() + '\n' + LineQueue.add(self, line) + + def flushTo(self, other_queue): + if len(self._queue) == 0: + pass + elif len(self._queue) == 1: + other_queue.add('/*' + self._queue[0][2:].rstrip() + ' */\n') + else: + other_queue.add('/*\n') + LineQueue.flushTo(self, other_queue) + other_queue.add('*/\n') + self.clear() + +# This really seems to be about 4x longer than it needs to be +def cleanComments(source): + lines = LineQueue() + comments = CommentQueue() + def isCommentLine(line): + return line.startswith('/*') and line.endswith('*/\n') + + blanks = LineQueue() + def isBlank(line): + return line.strip() == '' + + def SourceLines(line): + if isCommentLine(line): + comments.add(line) + return HaveCommentLines + else: + lines.add(line) + return SourceLines + def HaveCommentLines(line): + if isBlank(line): + blanks.add('\n') + return HaveBlankLines + elif isCommentLine(line): + comments.add(line) + return HaveCommentLines + else: + comments.flushTo(lines) + lines.add(line) + return SourceLines + def HaveBlankLines(line): + if isBlank(line): + blanks.add('\n') + return HaveBlankLines + elif isCommentLine(line): + blanks.flushTo(comments) + comments.add(line) + return HaveCommentLines + else: + comments.flushTo(lines) + blanks.flushTo(lines) + lines.add(line) + return SourceLines + + state = SourceLines + for line in UStringIO(source): + state = state(line) + comments.flushTo(lines) + return lines.getValue() + +def removeHeader(source): + lines = LineQueue() + + def LookingForHeader(line): + m = re.match(r'/\*[^\n]*-- translated', line) + if m: + return InHeader + else: + lines.add(line) + return LookingForHeader + def InHeader(line): + if line.startswith('*/'): + return OutOfHeader + else: + return InHeader + def OutOfHeader(line): + if line.startswith('#include "f2c.h"'): + pass + else: + lines.add(line) + return OutOfHeader + + state = LookingForHeader + for line in UStringIO(source): + state = state(line) + return lines.getValue() + +def removeSubroutinePrototypes(source): + expression = re.compile( + r'/[*] Subroutine [*]/^\s*(?:(?:inline|static)\s+){0,2}(?!else|typedef|return)\w+\s+\*?\s*(\w+)\s*\([^0]+\)\s*;?' + ) + lines = LineQueue() + for line in UStringIO(source): + if not expression.match(line): + lines.add(line) + + return lines.getValue() + +def removeBuiltinFunctions(source): + lines = LineQueue() + def LookingForBuiltinFunctions(line): + if line.strip() == '/* Builtin functions */': + return InBuiltInFunctions + else: + lines.add(line) + return LookingForBuiltinFunctions + + def InBuiltInFunctions(line): + if line.strip() == '': + return LookingForBuiltinFunctions + else: + return InBuiltInFunctions + + state = LookingForBuiltinFunctions + for line in UStringIO(source): + state = state(line) + return lines.getValue() + +def replaceDlamch(source): + """Replace dlamch_ calls with appropriate macros""" + def repl(m): + s = m.group(1) + return dict(E='EPSILON', P='PRECISION', S='SAFEMINIMUM', + B='BASE')[s[0]] + source = re.sub(r'dlamch_\("(.*?)"\)', repl, source) + source = re.sub(r'^\s+extern.*? dlamch_.*?;$(?m)', '', source) + return source + +# do it + +def scrubSource(source, nsteps=None, verbose=False): + steps = [ + ('scrubbing ftnlen', scrubFtnlen), + ('remove header', removeHeader), + ('clean source', cleanSource), + ('clean comments', cleanComments), + ('replace dlamch_() calls', replaceDlamch), + ('remove prototypes', removeSubroutinePrototypes), + ('remove builtin function prototypes', removeBuiltinFunctions), + ] + + if nsteps is not None: + steps = steps[:nsteps] + + for msg, step in steps: + if verbose: + print(msg) + source = step(source) + + return source + +if __name__ == '__main__': + filename = sys.argv[1] + outfilename = os.path.join(sys.argv[2], os.path.basename(filename)) + fo = open(filename, 'r') + source = fo.read() + fo.close() + + if len(sys.argv) > 3: + nsteps = int(sys.argv[3]) + else: + nsteps = None + + source = scrub_source(source, nsteps, verbose=True) + + writefo = open(outfilename, 'w') + writefo.write(source) + writefo.close() diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c.c b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c.c new file mode 100644 index 0000000000..1114bef3b1 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c.c @@ -0,0 +1,764 @@ +/* + Functions here are copied from the source code for libf2c. + + Typically each function there is in its own file. + + We don't link against libf2c directly, because we can't guarantee + it is available, and shipping a static library isn't portable. +*/ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "f2c.h" + + +extern void s_wsfe(cilist *f) {;} +extern void e_wsfe(void) {;} +extern void do_fio(integer *c, char *s, ftnlen l) {;} + +/* You'll want this if you redo the f2c_*.c files with the -C option + * to f2c for checking array subscripts. (It's not suggested you do that + * for production use, of course.) */ +extern int +s_rnge(char *var, int index, char *routine, int lineno) +{ + fprintf(stderr, "array index out-of-bounds for %s[%d] in routine %s:%d\n", + var, index, routine, lineno); + fflush(stderr); + abort(); +} + +#ifdef KR_headers +extern float sqrtf(); +double f__cabsf(real, imag) float real, imag; +#else +#undef abs + +double f__cabsf(float real, float imag) +#endif +{ +float temp; + +if(real < 0.0f) + real = -real; +if(imag < 0.0f) + imag = -imag; +if(imag > real){ + temp = real; + real = imag; + imag = temp; +} +if((imag+real) == real) + return((float)real); + +temp = imag/real; +temp = real*sqrtf(1.0 + temp*temp); /*overflow!!*/ +return(temp); +} + + +#ifdef KR_headers +extern double sqrt(); +double f__cabs(real, imag) double real, imag; +#else +#undef abs + +double f__cabs(double real, double imag) +#endif +{ +double temp; + +if(real < 0) + real = -real; +if(imag < 0) + imag = -imag; +if(imag > real){ + temp = real; + real = imag; + imag = temp; +} +if((imag+real) == real) + return((double)real); + +temp = imag/real; +temp = real*sqrt(1.0 + temp*temp); /*overflow!!*/ +return(temp); +} + + VOID +#ifdef KR_headers +r_cnjg(r, z) complex *r, *z; +#else +r_cnjg(complex *r, complex *z) +#endif +{ +r->r = z->r; +r->i = - z->i; +} + + VOID +#ifdef KR_headers +d_cnjg(r, z) doublecomplex *r, *z; +#else +d_cnjg(doublecomplex *r, doublecomplex *z) +#endif +{ +r->r = z->r; +r->i = - z->i; +} + + +#ifdef KR_headers +float r_imag(z) complex *z; +#else +float r_imag(complex *z) +#endif +{ +return(z->i); +} + +#ifdef KR_headers +double d_imag(z) doublecomplex *z; +#else +double d_imag(doublecomplex *z) +#endif +{ +return(z->i); +} + + +#define log10e 0.43429448190325182765 + +#ifdef KR_headers +float logf(); +float r_lg10(x) real *x; +#else +#undef abs + +float r_lg10(real *x) +#endif +{ +return( log10e * logf(*x) ); +} + +#ifdef KR_headers +double log(); +double d_lg10(x) doublereal *x; +#else +#undef abs + +double d_lg10(doublereal *x) +#endif +{ +return( log10e * log(*x) ); +} + +#ifdef KR_headers +double r_sign(a,b) real *a, *b; +#else +double r_sign(real *a, real *b) +#endif +{ +float x; +x = (*a >= 0.0f ? *a : - *a); +return( *b >= 0.0f ? x : -x); +} + +#ifdef KR_headers +double d_sign(a,b) doublereal *a, *b; +#else +double d_sign(doublereal *a, doublereal *b) +#endif +{ +double x; +x = (*a >= 0 ? *a : - *a); +return( *b >= 0 ? x : -x); +} + + +#ifdef KR_headers +double floor(); +integer i_dnnt(x) doublereal *x; +#else +#undef abs + +integer i_dnnt(doublereal *x) +#endif +{ +return( (*x)>=0 ? + floor(*x + .5) : -floor(.5 - *x) ); +} + + +#ifdef KR_headers +double floor(); +integer i_nint(x) real *x; +#else +#undef abs +integer i_nint(real *x) +#endif +{ +return (integer)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x)); +} + +#ifdef KR_headers +double pow(); +double pow_dd(ap, bp) doublereal *ap, *bp; +#else +#undef abs + +double pow_dd(doublereal *ap, doublereal *bp) +#endif +{ +return(pow(*ap, *bp) ); +} + + +#ifdef KR_headers +double pow_ri(ap, bp) real *ap; integer *bp; +#else +double pow_ri(real *ap, integer *bp) +#endif +{ +float pow, x; +integer n; +unsigned long u; + +pow = 1; +x = *ap; +n = *bp; + +if(n != 0) + { + if(n < 0) + { + n = -n; + x = 1.0f/x; + } + for(u = n; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + } +return(pow); +} + +#ifdef KR_headers +double pow_di(ap, bp) doublereal *ap; integer *bp; +#else +double pow_di(doublereal *ap, integer *bp) +#endif +{ +double pow, x; +integer n; +unsigned long u; + +pow = 1; +x = *ap; +n = *bp; + +if(n != 0) + { + if(n < 0) + { + n = -n; + x = 1/x; + } + for(u = n; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + } +return(pow); +} + +#ifdef KR_headers +VOID pow_zi(p, a, b) /* p = a**b */ + doublecomplex *p, *a; integer *b; +#else +extern void z_div(doublecomplex*, doublecomplex*, doublecomplex*); +void pow_zi(doublecomplex *p, doublecomplex *a, integer *b) /* p = a**b */ +#endif +{ + integer n; + unsigned long u; + double t; + doublecomplex q, x; + static doublecomplex one = {1.0, 0.0}; + + n = *b; + q.r = 1; + q.i = 0; + + if(n == 0) + goto done; + if(n < 0) + { + n = -n; + z_div(&x, &one, a); + } + else + { + x.r = a->r; + x.i = a->i; + } + + for(u = n; ; ) + { + if(u & 01) + { + t = q.r * x.r - q.i * x.i; + q.i = q.r * x.i + q.i * x.r; + q.r = t; + } + if(u >>= 1) + { + t = x.r * x.r - x.i * x.i; + x.i = 2 * x.r * x.i; + x.r = t; + } + else + break; + } + done: + p->i = q.i; + p->r = q.r; + } + +#ifdef KR_headers +VOID pow_ci(p, a, b) /* p = a**b */ + complex *p, *a; integer *b; +#else +extern void pow_zi(doublecomplex*, doublecomplex*, integer*); +void pow_ci(complex *p, complex *a, integer *b) /* p = a**b */ +#endif +{ +doublecomplex p1, a1; + +a1.r = a->r; +a1.i = a->i; + +pow_zi(&p1, &a1, b); + +p->r = p1.r; +p->i = p1.i; +} + +/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the + * target of a concatenation to appear on its right-hand side (contrary + * to the Fortran 77 Standard, but in accordance with Fortran 90). + */ +#define NO_OVERWRITE + + +#ifndef NO_OVERWRITE + +#undef abs +#ifdef KR_headers + extern char *F77_aloc(); + extern void free(); + extern void exit_(); +#else + + extern char *F77_aloc(ftnlen, char*); +#endif + +#endif /* NO_OVERWRITE */ + + VOID +#ifdef KR_headers +s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnlen rnp[], *np, ll; +#else +s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen *np, ftnlen ll) +#endif +{ + ftnlen i, nc; + char *rp; + ftnlen n = *np; +#ifndef NO_OVERWRITE + ftnlen L, m; + char *lp0, *lp1; + + lp0 = 0; + lp1 = lp; + L = ll; + i = 0; + while(i < n) { + rp = rpp[i]; + m = rnp[i++]; + if (rp >= lp1 || rp + m <= lp) { + if ((L -= m) <= 0) { + n = i; + break; + } + lp1 += m; + continue; + } + lp0 = lp; + lp = lp1 = F77_aloc(L = ll, "s_cat"); + break; + } + lp1 = lp; +#endif /* NO_OVERWRITE */ + for(i = 0 ; i < n ; ++i) { + nc = ll; + if(rnp[i] < nc) + nc = rnp[i]; + ll -= nc; + rp = rpp[i]; + while(--nc >= 0) + *lp++ = *rp++; + } + while(--ll >= 0) + *lp++ = ' '; +#ifndef NO_OVERWRITE + if (lp0) { + memmove(lp0, lp1, L); + free(lp1); + } +#endif + } + + +/* compare two strings */ + +#ifdef KR_headers +integer s_cmp(a0, b0, la, lb) char *a0, *b0; ftnlen la, lb; +#else +integer s_cmp(char *a0, char *b0, ftnlen la, ftnlen lb) +#endif +{ +register unsigned char *a, *aend, *b, *bend; +a = (unsigned char *)a0; +b = (unsigned char *)b0; +aend = a + la; +bend = b + lb; + +if(la <= lb) + { + while(a < aend) + if(*a != *b) + return( *a - *b ); + else + { ++a; ++b; } + + while(b < bend) + if(*b != ' ') + return( ' ' - *b ); + else ++b; + } + +else + { + while(b < bend) + if(*a == *b) + { ++a; ++b; } + else + return( *a - *b ); + while(a < aend) + if(*a != ' ') + return(*a - ' '); + else ++a; + } +return(0); +} +/* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the + * target of an assignment to appear on its right-hand side (contrary + * to the Fortran 77 Standard, but in accordance with Fortran 90), + * as in a(2:5) = a(4:7) . + */ + + + +/* assign strings: a = b */ + +#ifdef KR_headers +VOID s_copy(a, b, la, lb) register char *a, *b; ftnlen la, lb; +#else +void s_copy(register char *a, register char *b, ftnlen la, ftnlen lb) +#endif +{ + register char *aend, *bend; + + aend = a + la; + + if(la <= lb) +#ifndef NO_OVERWRITE + if (a <= b || a >= b + la) +#endif + while(a < aend) + *a++ = *b++; +#ifndef NO_OVERWRITE + else + for(b += la; a < aend; ) + *--aend = *--b; +#endif + + else { + bend = b + lb; +#ifndef NO_OVERWRITE + if (a <= b || a >= bend) +#endif + while(b < bend) + *a++ = *b++; +#ifndef NO_OVERWRITE + else { + a += lb; + while(b < bend) + *--a = *--bend; + a += lb; + } +#endif + while(a < aend) + *a++ = ' '; + } + } + + +#ifdef KR_headers +double f__cabsf(); +double c_abs(z) complex *z; +#else +double f__cabsf(float, float); +double c_abs(complex *z) +#endif +{ +return( f__cabsf( z->r, z->i ) ); +} + +#ifdef KR_headers +double f__cabs(); +double z_abs(z) doublecomplex *z; +#else +double f__cabs(double, double); +double z_abs(doublecomplex *z) +#endif +{ +return( f__cabs( z->r, z->i ) ); +} + + +#ifdef KR_headers +extern void sig_die(); +VOID c_div(c, a, b) complex *a, *b, *c; +#else +extern void sig_die(char*, int); +void c_div(complex *c, complex *a, complex *b) +#endif +{ +float ratio, den; +float abr, abi; + +if( (abr = b->r) < 0.f) + abr = - abr; +if( (abi = b->i) < 0.f) + abi = - abi; +if( abr <= abi ) + { + /*Let IEEE Infinties handle this ;( */ + /*if(abi == 0) + sig_die("complex division by zero", 1);*/ + ratio = b->r / b->i ; + den = b->i * (1 + ratio*ratio); + c->r = (a->r*ratio + a->i) / den; + c->i = (a->i*ratio - a->r) / den; + } + +else + { + ratio = b->i / b->r ; + den = b->r * (1.f + ratio*ratio); + c->r = (a->r + a->i*ratio) / den; + c->i = (a->i - a->r*ratio) / den; + } + +} + +#ifdef KR_headers +extern void sig_die(); +VOID z_div(c, a, b) doublecomplex *a, *b, *c; +#else +extern void sig_die(char*, int); +void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b) +#endif +{ +double ratio, den; +double abr, abi; + +if( (abr = b->r) < 0.) + abr = - abr; +if( (abi = b->i) < 0.) + abi = - abi; +if( abr <= abi ) + { + /*Let IEEE Infinties handle this ;( */ + /*if(abi == 0) + sig_die("complex division by zero", 1);*/ + ratio = b->r / b->i ; + den = b->i * (1 + ratio*ratio); + c->r = (a->r*ratio + a->i) / den; + c->i = (a->i*ratio - a->r) / den; + } + +else + { + ratio = b->i / b->r ; + den = b->r * (1 + ratio*ratio); + c->r = (a->r + a->i*ratio) / den; + c->i = (a->i - a->r*ratio) / den; + } + +} + + +#ifdef KR_headers +float sqrtf(), f__cabsf(); +VOID c_sqrt(r, z) complex *r, *z; +#else +#undef abs + +extern double f__cabsf(float, float); +void c_sqrt(complex *r, complex *z) +#endif +{ +float mag; + +if( (mag = f__cabsf(z->r, z->i)) == 0.f) + r->r = r->i = 0.f; +else if(z->r > 0.0f) + { + r->r = sqrtf(0.5f * (mag + z->r) ); + r->i = z->i / r->r / 2.0f; + } +else + { + r->i = sqrtf(0.5f * (mag - z->r) ); + if(z->i < 0.0f) + r->i = - r->i; + r->r = z->i / r->i / 2.0f; + } +} + + +#ifdef KR_headers +double sqrt(), f__cabs(); +VOID z_sqrt(r, z) doublecomplex *r, *z; +#else +#undef abs + +extern double f__cabs(double, double); +void z_sqrt(doublecomplex *r, doublecomplex *z) +#endif +{ +double mag; + +if( (mag = f__cabs(z->r, z->i)) == 0.) + r->r = r->i = 0.; +else if(z->r > 0) + { + r->r = sqrt(0.5 * (mag + z->r) ); + r->i = z->i / r->r / 2; + } +else + { + r->i = sqrt(0.5 * (mag - z->r) ); + if(z->i < 0) + r->i = - r->i; + r->r = z->i / r->i / 2; + } +} +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +integer pow_ii(ap, bp) integer *ap, *bp; +#else +integer pow_ii(integer *ap, integer *bp) +#endif +{ + integer pow, x, n; + unsigned long u; + + x = *ap; + n = *bp; + + if (n <= 0) { + if (n == 0 || x == 1) + return 1; + if (x != -1) + return x == 0 ? 1/x : 0; + n = -n; + } + u = n; + for(pow = 1; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + return(pow); + } +#ifdef __cplusplus +} +#endif + +#ifdef KR_headers +extern void f_exit(); +VOID s_stop(s, n) char *s; ftnlen n; +#else +#undef abs +#undef min +#undef max +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +extern "C" { +#endif +void f_exit(void); + +int s_stop(char *s, ftnlen n) +#endif +{ +int i; + +if(n > 0) + { + fprintf(stderr, "STOP "); + for(i = 0; i<n ; ++i) + putc(*s++, stderr); + fprintf(stderr, " statement executed\n"); + } +#ifdef NO_ONEXIT +f_exit(); +#endif +exit(0); + +/* We cannot avoid (useless) compiler diagnostics here: */ +/* some compilers complain if there is no return statement, */ +/* and others complain that this one cannot be reached. */ + +return 0; /* NOT REACHED */ +} +#ifdef __cplusplus +} +#endif +#ifdef __cplusplus +} +#endif diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c.h b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c.h new file mode 100644 index 0000000000..80f1a12b19 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c.h @@ -0,0 +1,388 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include <math.h> + +typedef int integer; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +#ifdef f2c_i2 +/* for -i2 */ +typedef short flag; +typedef short ftnlen; +typedef short ftnint; +#else +typedef int flag; +typedef int ftnlen; +typedef int ftnint; +#endif + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + shortint h; + integer i; + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +typedef long Long; /* No longer used; formerly in Namelist */ + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#ifndef abs +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#endif +#define dabs(x) (doublereal)abs(x) +#ifndef min +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#endif +#ifndef max +#define max(a,b) ((a) >= (b) ? (a) : (b)) +#endif +#define dmin(a,b) (doublereal)min(a,b) +#define dmax(a,b) (doublereal)max(a,b) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef int /* Unknown procedure type */ (*U_fp)(...); +typedef shortint (*J_fp)(...); +typedef integer (*I_fp)(...); +typedef real (*R_fp)(...); +typedef doublereal (*D_fp)(...), (*E_fp)(...); +typedef /* Complex */ VOID (*C_fp)(...); +typedef /* Double Complex */ VOID (*Z_fp)(...); +typedef logical (*L_fp)(...); +typedef shortlogical (*K_fp)(...); +typedef /* Character */ VOID (*H_fp)(...); +typedef /* Subroutine */ int (*S_fp)(...); +#else +typedef int /* Unknown procedure type */ (*U_fp)(void); +typedef shortint (*J_fp)(void); +typedef integer (*I_fp)(void); +typedef real (*R_fp)(void); +typedef doublereal (*D_fp)(void), (*E_fp)(void); +typedef /* Complex */ VOID (*C_fp)(void); +typedef /* Double Complex */ VOID (*Z_fp)(void); +typedef logical (*L_fp)(void); +typedef shortlogical (*K_fp)(void); +typedef /* Character */ VOID (*H_fp)(void); +typedef /* Subroutine */ int (*S_fp)(void); +#endif +/* E_fp is for real functions when -R is not specified */ +typedef VOID C_f; /* complex function */ +typedef VOID H_f; /* character function */ +typedef VOID Z_f; /* double complex function */ +typedef doublereal E_f; /* real function with -R not specified */ + +/* undef any lower-case symbols that your C compiler predefines, e.g.: */ + +#ifndef Skip_f2c_Undefs +#undef cray +#undef gcos +#undef mc68010 +#undef mc68020 +#undef mips +#undef pdp11 +#undef sgi +#undef sparc +#undef sun +#undef sun2 +#undef sun3 +#undef sun4 +#undef u370 +#undef u3b +#undef u3b2 +#undef u3b5 +#undef unix +#undef vax +#endif + +/* https://anonscm.debian.org/cgit/collab-maint/libf2c2.git/tree/f2ch.add */ + +/* If you are using a C++ compiler, append the following to f2c.h + for compiling libF77 and libI77. */ + +#ifdef __cplusplus +extern "C" { +#endif + +extern int abort_(void); +extern double c_abs(complex *); +extern void c_cos(complex *, complex *); +extern void c_div(complex *, complex *, complex *); +extern void c_exp(complex *, complex *); +extern void c_log(complex *, complex *); +extern void c_sin(complex *, complex *); +extern void c_sqrt(complex *, complex *); +extern double d_abs(double *); +extern double d_acos(double *); +extern double d_asin(double *); +extern double d_atan(double *); +extern double d_atn2(double *, double *); +extern void d_cnjg(doublecomplex *, doublecomplex *); +extern double d_cos(double *); +extern double d_cosh(double *); +extern double d_dim(double *, double *); +extern double d_exp(double *); +extern double d_imag(doublecomplex *); +extern double d_int(double *); +extern double d_lg10(double *); +extern double d_log(double *); +extern double d_mod(double *, double *); +extern double d_nint(double *); +extern double d_prod(float *, float *); +extern double d_sign(double *, double *); +extern double d_sin(double *); +extern double d_sinh(double *); +extern double d_sqrt(double *); +extern double d_tan(double *); +extern double d_tanh(double *); +extern double derf_(double *); +extern double derfc_(double *); +extern void do_fio(ftnint *, char *, ftnlen); +extern integer do_lio(ftnint *, ftnint *, char *, ftnlen); +extern integer do_uio(ftnint *, char *, ftnlen); +extern integer e_rdfe(void); +extern integer e_rdue(void); +extern integer e_rsfe(void); +extern integer e_rsfi(void); +extern integer e_rsle(void); +extern integer e_rsli(void); +extern integer e_rsue(void); +extern integer e_wdfe(void); +extern integer e_wdue(void); +extern void e_wsfe(void); +extern integer e_wsfi(void); +extern integer e_wsle(void); +extern integer e_wsli(void); +extern integer e_wsue(void); +extern int ef1asc_(ftnint *, ftnlen *, ftnint *, ftnlen *); +extern integer ef1cmc_(ftnint *, ftnlen *, ftnint *, ftnlen *); + +extern double erf_(float *); +extern double erfc_(float *); +extern integer f_back(alist *); +extern integer f_clos(cllist *); +extern integer f_end(alist *); +extern void f_exit(void); +extern integer f_inqu(inlist *); +extern integer f_open(olist *); +extern integer f_rew(alist *); +extern int flush_(void); +extern void getarg_(integer *, char *, ftnlen); +extern void getenv_(char *, char *, ftnlen, ftnlen); +extern short h_abs(short *); +extern short h_dim(short *, short *); +extern short h_dnnt(double *); +extern short h_indx(char *, char *, ftnlen, ftnlen); +extern short h_len(char *, ftnlen); +extern short h_mod(short *, short *); +extern short h_nint(float *); +extern short h_sign(short *, short *); +extern short hl_ge(char *, char *, ftnlen, ftnlen); +extern short hl_gt(char *, char *, ftnlen, ftnlen); +extern short hl_le(char *, char *, ftnlen, ftnlen); +extern short hl_lt(char *, char *, ftnlen, ftnlen); +extern integer i_abs(integer *); +extern integer i_dim(integer *, integer *); +extern integer i_dnnt(double *); +extern integer i_indx(char *, char *, ftnlen, ftnlen); +extern integer i_len(char *, ftnlen); +extern integer i_mod(integer *, integer *); +extern integer i_nint(float *); +extern integer i_sign(integer *, integer *); +extern integer iargc_(void); +extern ftnlen l_ge(char *, char *, ftnlen, ftnlen); +extern ftnlen l_gt(char *, char *, ftnlen, ftnlen); +extern ftnlen l_le(char *, char *, ftnlen, ftnlen); +extern ftnlen l_lt(char *, char *, ftnlen, ftnlen); +extern void pow_ci(complex *, complex *, integer *); +extern double pow_dd(double *, double *); +extern double pow_di(double *, integer *); +extern short pow_hh(short *, shortint *); +extern integer pow_ii(integer *, integer *); +extern double pow_ri(float *, integer *); +extern void pow_zi(doublecomplex *, doublecomplex *, integer *); +extern void pow_zz(doublecomplex *, doublecomplex *, doublecomplex *); +extern double r_abs(float *); +extern double r_acos(float *); +extern double r_asin(float *); +extern double r_atan(float *); +extern double r_atn2(float *, float *); +extern void r_cnjg(complex *, complex *); +extern double r_cos(float *); +extern double r_cosh(float *); +extern double r_dim(float *, float *); +extern double r_exp(float *); +extern float r_imag(complex *); +extern double r_int(float *); +extern float r_lg10(real *); +extern double r_log(float *); +extern double r_mod(float *, float *); +extern double r_nint(float *); +extern double r_sign(float *, float *); +extern double r_sin(float *); +extern double r_sinh(float *); +extern double r_sqrt(float *); +extern double r_tan(float *); +extern double r_tanh(float *); +extern void s_cat(char *, char **, integer *, integer *, ftnlen); +extern integer s_cmp(char *, char *, ftnlen, ftnlen); +extern void s_copy(char *, char *, ftnlen, ftnlen); +extern int s_paus(char *, ftnlen); +extern integer s_rdfe(cilist *); +extern integer s_rdue(cilist *); +extern integer s_rnge(char *, integer, char *, integer); +extern integer s_rsfe(cilist *); +extern integer s_rsfi(icilist *); +extern integer s_rsle(cilist *); +extern integer s_rsli(icilist *); +extern integer s_rsne(cilist *); +extern integer s_rsni(icilist *); +extern integer s_rsue(cilist *); +extern int s_stop(char *, ftnlen); +extern integer s_wdfe(cilist *); +extern integer s_wdue(cilist *); +extern void s_wsfe( cilist *); +extern integer s_wsfi(icilist *); +extern integer s_wsle(cilist *); +extern integer s_wsli(icilist *); +extern integer s_wsne(cilist *); +extern integer s_wsni(icilist *); +extern integer s_wsue(cilist *); +extern void sig_die(char *, int); +extern integer signal_(integer *, void (*)(int)); +extern integer system_(char *, ftnlen); +extern double z_abs(doublecomplex *); +extern void z_cos(doublecomplex *, doublecomplex *); +extern void z_div(doublecomplex *, doublecomplex *, doublecomplex *); +extern void z_exp(doublecomplex *, doublecomplex *); +extern void z_log(doublecomplex *, doublecomplex *); +extern void z_sin(doublecomplex *, doublecomplex *); +extern void z_sqrt(doublecomplex *, doublecomplex *); + +#ifdef __cplusplus + } +#endif + +#endif diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_blas.c b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_blas.c new file mode 100644 index 0000000000..3af506b712 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_blas.c @@ -0,0 +1,21615 @@ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif + + +/* Table of constant values */ + +static complex c_b21 = {1.f,0.f}; +static doublecomplex c_b1078 = {1.,0.}; + +/* Subroutine */ int caxpy_(integer *n, complex *ca, complex *cx, integer * + incx, complex *cy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + complex q__1, q__2; + + /* Local variables */ + static integer i__, ix, iy; + extern doublereal scabs1_(complex *); + + +/* + Purpose + ======= + + CAXPY constant times a vector plus a vector. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (scabs1_(ca) == 0.f) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + i__4 = ix; + q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[ + i__4].i + ca->i * cx[i__4].r; + q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i; + cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + i__4 = i__; + q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[ + i__4].i + ca->i * cx[i__4].r; + q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i; + cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; +/* L30: */ + } + return 0; +} /* caxpy_ */ + +/* Subroutine */ int ccopy_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + static integer i__, ix, iy; + + +/* + Purpose + ======= + + CCOPY copies a vector x to a vector y. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = ix; + cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i; +/* L30: */ + } + return 0; +} /* ccopy_ */ + +/* Complex */ VOID cdotc_(complex * ret_val, integer *n, complex *cx, integer + *incx, complex *cy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2; + complex q__1, q__2, q__3; + + /* Local variables */ + static integer i__, ix, iy; + static complex ctemp; + + +/* + Purpose + ======= + + forms the dot product of two vectors, conjugating the first + vector. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + ctemp.r = 0.f, ctemp.i = 0.f; + ret_val->r = 0.f, ret_val->i = 0.f; + if (*n <= 0) { + return ; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + r_cnjg(&q__3, &cx[ix]); + i__2 = iy; + q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r * + cy[i__2].i + q__3.i * cy[i__2].r; + q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val->r = ctemp.r, ret_val->i = ctemp.i; + return ; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + r_cnjg(&q__3, &cx[i__]); + i__2 = i__; + q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r * + cy[i__2].i + q__3.i * cy[i__2].r; + q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; +/* L30: */ + } + ret_val->r = ctemp.r, ret_val->i = ctemp.i; + return ; +} /* cdotc_ */ + +/* Complex */ VOID cdotu_(complex * ret_val, integer *n, complex *cx, integer + *incx, complex *cy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + complex q__1, q__2; + + /* Local variables */ + static integer i__, ix, iy; + static complex ctemp; + + +/* + Purpose + ======= + + CDOTU forms the dot product of two vectors. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + ctemp.r = 0.f, ctemp.i = 0.f; + ret_val->r = 0.f, ret_val->i = 0.f; + if (*n <= 0) { + return ; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + i__3 = iy; + q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i = + cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r; + q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val->r = ctemp.r, ret_val->i = ctemp.i; + return ; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i = + cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r; + q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; +/* L30: */ + } + ret_val->r = ctemp.r, ret_val->i = ctemp.i; + return ; +} /* cdotu_ */ + +/* Subroutine */ int cgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, + integer *ldb, complex *beta, complex *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5, i__6; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + static integer i__, j, l, info; + static logical nota, notb; + static complex temp; + static logical conja, conjb; + static integer ncola; + extern logical lsame_(char *, char *); + static integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CGEMM performs one of the matrix-matrix operations + + C := alpha*op( A )*op( B ) + beta*C, + + where op( X ) is one of + + op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), + + alpha and beta are scalars, and A, B and C are matrices, with op( A ) + an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + Arguments + ========== + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n', op( A ) = A. + + TRANSA = 'T' or 't', op( A ) = A'. + + TRANSA = 'C' or 'c', op( A ) = conjg( A' ). + + Unchanged on exit. + + TRANSB - CHARACTER*1. + On entry, TRANSB specifies the form of op( B ) to be used in + the matrix multiplication as follows: + + TRANSB = 'N' or 'n', op( B ) = B. + + TRANSB = 'T' or 't', op( B ) = B'. + + TRANSB = 'C' or 'c', op( B ) = conjg( B' ). + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix + op( A ) and of the matrix C. M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix + op( B ) and the number of columns of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry, K specifies the number of columns of the matrix + op( A ) and the number of rows of the matrix op( B ). K must + be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is + k when TRANSA = 'N' or 'n', and is m otherwise. + Before entry with TRANSA = 'N' or 'n', the leading m by k + part of the array A must contain the matrix A, otherwise + the leading k by m part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANSA = 'N' or 'n' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, k ). + Unchanged on exit. + + B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is + n when TRANSB = 'N' or 'n', and is k otherwise. + Before entry with TRANSB = 'N' or 'n', the leading k by n + part of the array B must contain the matrix B, otherwise + the leading n by k part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANSB = 'N' or 'n' then + LDB must be at least max( 1, k ), otherwise LDB must be at + least max( 1, n ). + Unchanged on exit. + + BETA - COMPLEX . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - COMPLEX array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n matrix + ( alpha*op( A )*op( B ) + beta*C ). + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Set NOTA and NOTB as true if A and B respectively are not + conjugated or transposed, set CONJA and CONJB as true if A and + B respectively are to be transposed but not conjugated and set + NROWA, NCOLA and NROWB as the number of rows and columns of A + and the number of rows of B respectively. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, "N"); + notb = lsame_(transb, "N"); + conja = lsame_(transa, "C"); + conjb = lsame_(transb, "C"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! conja && ! lsame_(transa, "T")) { + info = 1; + } else if (! notb && ! conjb && ! lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_("CGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) + && (beta->r == 1.f && beta->i == 0.f)) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0.f && alpha->i == 0.f) { + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + q__1.i = beta->r * c__[i__4].i + beta->i * c__[ + i__4].r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0.f && beta->i == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L50: */ + } + } else if (beta->r != 1.f || beta->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = l + j * b_dim1; + if (b[i__3].r != 0.f || b[i__3].i != 0.f) { + i__3 = l + j * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + q__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + q__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] + .i + q__2.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else if (conja) { + +/* Form C := alpha*conjg( A' )*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, + q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L100: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L110: */ + } +/* L120: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + i__5 = l + j * b_dim1; + q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] + .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4] + .i * b[i__5].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L130: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L140: */ + } +/* L150: */ + } + } + } else if (nota) { + if (conjb) { + +/* Form C := alpha*A*conjg( B' ) + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0.f && beta->i == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L160: */ + } + } else if (beta->r != 1.f || beta->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L170: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * b_dim1; + if (b[i__3].r != 0.f || b[i__3].i != 0.f) { + r_cnjg(&q__2, &b[j + l * b_dim1]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, + q__1.i = alpha->r * q__2.i + alpha->i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + q__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] + .i + q__2.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L180: */ + } + } +/* L190: */ + } +/* L200: */ + } + } else { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0.f && beta->i == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L210: */ + } + } else if (beta->r != 1.f || beta->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L220: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * b_dim1; + if (b[i__3].r != 0.f || b[i__3].i != 0.f) { + i__3 = j + l * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + q__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + q__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] + .i + q__2.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L230: */ + } + } +/* L240: */ + } +/* L250: */ + } + } + } else if (conja) { + if (conjb) { + +/* Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + r_cnjg(&q__4, &b[j + l * b_dim1]); + q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = + q__3.r * q__4.i + q__3.i * q__4.r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L260: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L270: */ + } +/* L280: */ + } + } else { + +/* Form C := alpha*conjg( A' )*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = j + l * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, + q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L290: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L300: */ + } +/* L310: */ + } + } + } else { + if (conjb) { + +/* Form C := alpha*A'*conjg( B' ) + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + r_cnjg(&q__3, &b[j + l * b_dim1]); + q__2.r = a[i__4].r * q__3.r - a[i__4].i * q__3.i, + q__2.i = a[i__4].r * q__3.i + a[i__4].i * + q__3.r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L320: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L330: */ + } +/* L340: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + i__5 = j + l * b_dim1; + q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] + .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4] + .i * b[i__5].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L350: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L360: */ + } +/* L370: */ + } + } + } + + return 0; + +/* End of CGEMM . */ + +} /* cgemm_ */ + +/* Subroutine */ int cgemv_(char *trans, integer *m, integer *n, complex * + alpha, complex *a, integer *lda, complex *x, integer *incx, complex * + beta, complex *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2, q__3; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static complex temp; + static integer lenx, leny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj; + + +/* + Purpose + ======= + + CGEMV performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or + + y := alpha*conjg( A' )*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + Arguments + ========== + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' y := alpha*A*x + beta*y. + + TRANS = 'T' or 't' y := alpha*A'*x + beta*y. + + TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + X - COMPLEX array of DIMENSION at least + ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + Before entry, the incremented array X must contain the + vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - COMPLEX . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - COMPLEX array of DIMENSION at least + ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. + Before entry with BETA non-zero, the incremented array Y + must contain the vector y. On exit, Y is overwritten by the + updated vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("CGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r + == 1.f && beta->i == 0.f)) { + return 0; + } + + noconj = lsame_(trans, "T"); + +/* + Set LENX and LENY, the lengths of the vectors x and y, and set + up the start points in X and Y. +*/ + + if (lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. + + First form y := beta*y. +*/ + + if (beta->r != 1.f || beta->i != 0.f) { + if (*incy == 1) { + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0.f, y[i__2].i = 0.f; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0.f, y[i__2].i = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + if (lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + temp.r = q__1.r, temp.i = q__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + temp.r = q__1.r, temp.i = q__1.i; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = 0.f, temp.i = 0.f; + if (noconj) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] + .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3] + .i * x[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L90: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L100: */ + } + } + i__2 = jy; + i__3 = jy; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = + alpha->r * temp.i + alpha->i * temp.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jy += *incy; +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = 0.f, temp.i = 0.f; + ix = kx; + if (noconj) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] + .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3] + .i * x[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; +/* L120: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; +/* L130: */ + } + } + i__2 = jy; + i__3 = jy; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = + alpha->r * temp.i + alpha->i * temp.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jy += *incy; +/* L140: */ + } + } + } + + return 0; + +/* End of CGEMV . */ + +} /* cgemv_ */ + +/* Subroutine */ int cgerc_(integer *m, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2; + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static complex temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CGERC performs the rank 1 operation + + A := alpha*x*conjg( y' ) + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Arguments + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("CGERC ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0.f || y[i__2].i != 0.f) { + r_cnjg(&q__2, &y[jy]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0.f || y[i__2].i != 0.f) { + r_cnjg(&q__2, &y[jy]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of CGERC . */ + +} /* cgerc_ */ + +/* Subroutine */ int cgeru_(integer *m, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2; + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static complex temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CGERU performs the rank 1 operation + + A := alpha*x*y' + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Arguments + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("CGERU ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0.f || y[i__2].i != 0.f) { + i__2 = jy; + q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i = + alpha->r * y[i__2].i + alpha->i * y[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0.f || y[i__2].i != 0.f) { + i__2 = jy; + q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i = + alpha->r * y[i__2].i + alpha->i * y[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of CGERU . */ + +} /* cgeru_ */ + +/* Subroutine */ int chemv_(char *uplo, integer *n, complex *alpha, complex * + a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, + integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + real r__1; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static complex temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CHEMV performs the matrix-vector operation + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n hermitian matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of A is not referenced. + Note that the imaginary parts of the diagonal elements need + not be set and are assumed to be zero. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - COMPLEX . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. On exit, Y is overwritten by the updated + vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_("CHEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && + beta->i == 0.f)) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. + + First form y := beta*y. +*/ + + if (beta->r != 1.f || beta->i != 0.f) { + if (*incy == 1) { + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0.f, y[i__2].i = 0.f; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0.f, y[i__2].i = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + if (lsame_(uplo, "U")) { + +/* Form y when A is stored in upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L50: */ + } + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + r__1 = a[i__4].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + ix += *incx; + iy += *incy; +/* L70: */ + } + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + r__1 = a[i__4].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } else { + +/* Form y when A is stored in lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + r__1 = a[i__4].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L90: */ + } + i__2 = j; + i__3 = j; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + r__1 = a[i__4].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L110: */ + } + i__2 = jy; + i__3 = jy; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of CHEMV . */ + +} /* chemv_ */ + +/* Subroutine */ int cher2_(char *uplo, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + real r__1; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static complex temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CHER2 performs the hermitian rank 2 operation + + A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, + + where alpha is a scalar, x and y are n element vectors and A is an n + by n hermitian matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*n)) { + info = 9; + } + if (info != 0) { + xerbla_("CHER2 ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + +/* + Set up the start points in X and Y if the increments are not both + unity. +*/ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. +*/ + + if (lsame_(uplo, "U")) { + +/* Form A when A is stored in the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f + || y[i__3].i != 0.f)) { + r_cnjg(&q__2, &y[j]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__2 = j; + q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + q__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + + q__3.i; + i__6 = i__; + q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + q__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L10: */ + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + q__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = j; + q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + q__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = a[i__3].r + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + i__3 = jy; + if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f + || y[i__3].i != 0.f)) { + r_cnjg(&q__2, &y[jy]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__2 = jx; + q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + q__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + + q__3.i; + i__6 = iy; + q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + q__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + ix += *incx; + iy += *incy; +/* L30: */ + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + q__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = jy; + q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + q__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = a[i__3].r + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } + jx += *incx; + jy += *incy; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f + || y[i__3].i != 0.f)) { + r_cnjg(&q__2, &y[j]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__2 = j; + q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + q__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = j; + q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + q__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = a[i__3].r + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + q__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + + q__3.i; + i__6 = i__; + q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + q__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L50: */ + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + i__3 = jy; + if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f + || y[i__3].i != 0.f)) { + r_cnjg(&q__2, &y[jy]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__2 = jx; + q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + q__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = jy; + q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + q__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = a[i__3].r + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + q__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + + q__3.i; + i__6 = iy; + q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + q__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L70: */ + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } + + return 0; + +/* End of CHER2 . */ + +} /* cher2_ */ + +/* Subroutine */ int cher2k_(char *uplo, char *trans, integer *n, integer *k, + complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, + real *beta, complex *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5, i__6, i__7; + real r__1; + complex q__1, q__2, q__3, q__4, q__5, q__6; + + /* Local variables */ + static integer i__, j, l, info; + static complex temp1, temp2; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CHER2K performs one of the hermitian rank 2k operations + + C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, + + or + + C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, + + where alpha and beta are scalars with beta real, C is an n by n + hermitian matrix and A and B are n by k matrices in the first case + and k by n matrices in the second case. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*conjg( B' ) + + conjg( alpha )*B*conjg( A' ) + + beta*C. + + TRANS = 'C' or 'c' C := alpha*conjg( A' )*B + + conjg( alpha )*conjg( B' )*A + + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrices A and B, and on entry with + TRANS = 'C' or 'c', K specifies the number of rows of the + matrices A and B. K must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array B must contain the matrix B, otherwise + the leading k by n part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDB must be at least max( 1, n ), otherwise LDB must + be at least max( 1, k ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - COMPLEX array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. + Ed Anderson, Cray Research Inc. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,nrowa)) { + info = 9; + } else if (*ldc < max(1,*n)) { + info = 12; + } + if (info != 0) { + xerbla_("CHER2K", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) && *beta == + 1.f) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0.f && alpha->i == 0.f) { + if (upper) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L30: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; +/* L40: */ + } + } + } else { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* + Form C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + + C. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L90: */ + } + } else if (*beta != 1.f) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L100: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + i__4 = j + l * b_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != + 0.f || b[i__4].i != 0.f)) { + r_cnjg(&q__2, &b[j + l * b_dim1]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, + q__1.i = alpha->r * q__2.i + alpha->i * + q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__3 = j + l * a_dim1; + q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + q__2.i = alpha->r * a[i__3].i + alpha->i * a[ + i__3].r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__3.r = a[i__6].r * temp1.r - a[i__6].i * + temp1.i, q__3.i = a[i__6].r * temp1.i + a[ + i__6].i * temp1.r; + q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5] + .i + q__3.i; + i__7 = i__ + l * b_dim1; + q__4.r = b[i__7].r * temp2.r - b[i__7].i * + temp2.i, q__4.i = b[i__7].r * temp2.i + b[ + i__7].i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + + q__4.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L110: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, + q__2.i = a[i__5].r * temp1.i + a[i__5].i * + temp1.r; + i__6 = j + l * b_dim1; + q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, + q__3.i = b[i__6].r * temp2.i + b[i__6].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L140: */ + } + } else if (*beta != 1.f) { + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L150: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + i__4 = j + l * b_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != + 0.f || b[i__4].i != 0.f)) { + r_cnjg(&q__2, &b[j + l * b_dim1]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, + q__1.i = alpha->r * q__2.i + alpha->i * + q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__3 = j + l * a_dim1; + q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + q__2.i = alpha->r * a[i__3].i + alpha->i * a[ + i__3].r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__3.r = a[i__6].r * temp1.r - a[i__6].i * + temp1.i, q__3.i = a[i__6].r * temp1.i + a[ + i__6].i * temp1.r; + q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5] + .i + q__3.i; + i__7 = i__ + l * b_dim1; + q__4.r = b[i__7].r * temp2.r - b[i__7].i * + temp2.i, q__4.i = b[i__7].r * temp2.i + b[ + i__7].i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + + q__4.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L160: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, + q__2.i = a[i__5].r * temp1.i + a[i__5].i * + temp1.r; + i__6 = j + l * b_dim1; + q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, + q__3.i = b[i__6].r * temp2.i + b[i__6].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* + Form C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + + C. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1.r = 0.f, temp1.i = 0.f; + temp2.r = 0.f, temp2.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, + q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] + .r; + q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i; + temp1.r = q__1.r, temp1.i = q__1.i; + r_cnjg(&q__3, &b[l + i__ * b_dim1]); + i__4 = l + j * a_dim1; + q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, + q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] + .r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L190: */ + } + if (i__ == j) { + if (*beta == 0.f) { + i__3 = j + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + r__1 = q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } else { + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + r__1 = *beta * c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } + } else { + if (*beta == 0.f) { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__3.r = *beta * c__[i__4].r, q__3.i = *beta * + c__[i__4].i; + q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__4.i = alpha->r * temp1.i + alpha->i * + temp1.r; + q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + + q__4.i; + r_cnjg(&q__6, alpha); + q__5.r = q__6.r * temp2.r - q__6.i * temp2.i, + q__5.i = q__6.r * temp2.i + q__6.i * + temp2.r; + q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + + q__5.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp1.r = 0.f, temp1.i = 0.f; + temp2.r = 0.f, temp2.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, + q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] + .r; + q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i; + temp1.r = q__1.r, temp1.i = q__1.i; + r_cnjg(&q__3, &b[l + i__ * b_dim1]); + i__4 = l + j * a_dim1; + q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, + q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] + .r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L220: */ + } + if (i__ == j) { + if (*beta == 0.f) { + i__3 = j + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + r__1 = q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } else { + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + r__1 = *beta * c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } + } else { + if (*beta == 0.f) { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__3.r = *beta * c__[i__4].r, q__3.i = *beta * + c__[i__4].i; + q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__4.i = alpha->r * temp1.i + alpha->i * + temp1.r; + q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + + q__4.i; + r_cnjg(&q__6, alpha); + q__5.r = q__6.r * temp2.r - q__6.i * temp2.i, + q__5.i = q__6.r * temp2.i + q__6.i * + temp2.r; + q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + + q__5.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of CHER2K. */ + +} /* cher2k_ */ + +/* Subroutine */ int cherk_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, complex *a, integer *lda, real *beta, complex *c__, + integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + real r__1; + complex q__1, q__2, q__3; + + /* Local variables */ + static integer i__, j, l, info; + static complex temp; + extern logical lsame_(char *, char *); + static integer nrowa; + static real rtemp; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CHERK performs one of the hermitian rank k operations + + C := alpha*A*conjg( A' ) + beta*C, + + or + + C := alpha*conjg( A' )*A + beta*C, + + where alpha and beta are real scalars, C is an n by n hermitian + matrix and A is an n by k matrix in the first case and a k by n + matrix in the second case. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*conjg( A' ) + beta*C. + + TRANS = 'C' or 'c' C := alpha*conjg( A' )*A + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrix A, and on entry with + TRANS = 'C' or 'c', K specifies the number of rows of the + matrix A. K must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - COMPLEX array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. + Ed Anderson, Cray Research Inc. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + xerbla_("CHERK ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (upper) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L30: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; +/* L40: */ + } + } + } else { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*conjg( A' ) + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L90: */ + } + } else if (*beta != 1.f) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L100: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + r_cnjg(&q__2, &a[j + l * a_dim1]); + q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + q__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] + .i + q__2.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L110: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = i__ + l * a_dim1; + q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__1.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + r__1 = c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L140: */ + } + } else if (*beta != 1.f) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L150: */ + } + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + r_cnjg(&q__2, &a[j + l * a_dim1]); + q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__1.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + r__1 = c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + q__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] + .i + q__2.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*conjg( A' )*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, + q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L190: */ + } + if (*beta == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L200: */ + } + rtemp = 0.f; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + r_cnjg(&q__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i = + q__3.r * a[i__3].i + q__3.i * a[i__3].r; + q__1.r = rtemp + q__2.r, q__1.i = q__2.i; + rtemp = q__1.r; +/* L210: */ + } + if (*beta == 0.f) { + i__2 = j + j * c_dim1; + r__1 = *alpha * rtemp; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } +/* L220: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + rtemp = 0.f; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + r_cnjg(&q__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i = + q__3.r * a[i__3].i + q__3.i * a[i__3].r; + q__1.r = rtemp + q__2.r, q__1.i = q__2.i; + rtemp = q__1.r; +/* L230: */ + } + if (*beta == 0.f) { + i__2 = j + j * c_dim1; + r__1 = *alpha * rtemp; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, + q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L240: */ + } + if (*beta == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L250: */ + } +/* L260: */ + } + } + } + + return 0; + +/* End of CHERK . */ + +} /* cherk_ */ + +/* Subroutine */ int cscal_(integer *n, complex *ca, complex *cx, integer * + incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + complex q__1; + + /* Local variables */ + static integer i__, nincx; + + +/* + Purpose + ======= + + CSCAL scales a vector by a constant. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --cx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = i__; + i__4 = i__; + q__1.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__1.i = ca->r * cx[ + i__4].i + ca->i * cx[i__4].r; + cx[i__3].r = q__1.r, cx[i__3].i = q__1.i; +/* L10: */ + } + return 0; + +/* code for increment equal to 1 */ + +L20: + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__1 = i__; + i__3 = i__; + q__1.r = ca->r * cx[i__3].r - ca->i * cx[i__3].i, q__1.i = ca->r * cx[ + i__3].i + ca->i * cx[i__3].r; + cx[i__1].r = q__1.r, cx[i__1].i = q__1.i; +/* L30: */ + } + return 0; +} /* cscal_ */ + +/* Subroutine */ int csrot_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy, real *c__, real *s) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + complex q__1, q__2, q__3; + + /* Local variables */ + static integer i__, ix, iy; + static complex ctemp; + + +/* + Purpose + ======= + + CSROT applies a plane rotation, where the cos and sin (c and s) are real + and the vectors cx and cy are complex. + jack dongarra, linpack, 3/11/78. + + Arguments + ========== + + N (input) INTEGER + On entry, N specifies the order of the vectors cx and cy. + N must be at least zero. + Unchanged on exit. + + CX (input) COMPLEX array, dimension at least + ( 1 + ( N - 1 )*abs( INCX ) ). + Before entry, the incremented array CX must contain the n + element vector cx. On exit, CX is overwritten by the updated + vector cx. + + INCX (input) INTEGER + On entry, INCX specifies the increment for the elements of + CX. INCX must not be zero. + Unchanged on exit. + + CY (input) COMPLEX array, dimension at least + ( 1 + ( N - 1 )*abs( INCY ) ). + Before entry, the incremented array CY must contain the n + element vector cy. On exit, CY is overwritten by the updated + vector cy. + + INCY (input) INTEGER + On entry, INCY specifies the increment for the elements of + CY. INCY must not be zero. + Unchanged on exit. + + C (input) REAL + On entry, C specifies the cosine, cos. + Unchanged on exit. + + S (input) REAL + On entry, S specifies the sine, sin. + Unchanged on exit. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i; + i__3 = iy; + q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; + i__2 = iy; + i__3 = iy; + q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i; + i__4 = ix; + q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; + i__2 = ix; + cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i; + i__3 = i__; + q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; + i__2 = i__; + i__3 = i__; + q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i; + i__4 = i__; + q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; + i__2 = i__; + cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; +/* L30: */ + } + return 0; +} /* csrot_ */ + +/* Subroutine */ int csscal_(integer *n, real *sa, complex *cx, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + real r__1, r__2; + complex q__1; + + /* Local variables */ + static integer i__, nincx; + + +/* + Purpose + ======= + + CSSCAL scales a complex vector by a real constant. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --cx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = i__; + i__4 = i__; + r__1 = *sa * cx[i__4].r; + r__2 = *sa * r_imag(&cx[i__]); + q__1.r = r__1, q__1.i = r__2; + cx[i__3].r = q__1.r, cx[i__3].i = q__1.i; +/* L10: */ + } + return 0; + +/* code for increment equal to 1 */ + +L20: + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__1 = i__; + i__3 = i__; + r__1 = *sa * cx[i__3].r; + r__2 = *sa * r_imag(&cx[i__]); + q__1.r = r__1, q__1.i = r__2; + cx[i__1].r = q__1.r, cx[i__1].i = q__1.i; +/* L30: */ + } + return 0; +} /* csscal_ */ + +/* Subroutine */ int cswap_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + static integer i__, ix, iy; + static complex ctemp; + + +/* + Purpose + ======= + + CSWAP interchanges two vectors. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i; + i__2 = ix; + i__3 = iy; + cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i; + i__2 = iy; + cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i; + i__2 = i__; + i__3 = i__; + cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i; + i__2 = i__; + cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i; +/* L30: */ + } + return 0; +} /* cswap_ */ + +/* Subroutine */ int ctrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, complex *alpha, complex *a, integer *lda, + complex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + complex q__1, q__2, q__3; + + /* Local variables */ + static integer i__, j, k, info; + static complex temp; + extern logical lsame_(char *, char *); + static logical lside; + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + CTRMM performs one of the matrix-matrix operations + + B := alpha*op( A )*B, or B := alpha*B*op( A ) + + where alpha is a scalar, B is an m by n matrix, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). + + Arguments + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) multiplies B from + the left or right as follows: + + SIDE = 'L' or 'l' B := alpha*op( A )*B. + + SIDE = 'R' or 'r' B := alpha*B*op( A ). + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = conjg( A' ). + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - COMPLEX array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the matrix B, and on exit is overwritten by the + transformed matrix. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, "T"); + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("CTRMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0.f && alpha->i == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0.f || b[i__3].i != 0.f) { + i__3 = k + j * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, q__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6] + .i, q__2.i = temp.r * a[i__6].i + + temp.i * a[i__6].r; + q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5] + .i + q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L30: */ + } + if (nounit) { + i__3 = k + k * a_dim1; + q__1.r = temp.r * a[i__3].r - temp.i * a[i__3] + .i, q__1.i = temp.r * a[i__3].i + + temp.i * a[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__3 = k + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0.f || b[i__2].i != 0.f) { + i__2 = k + j * b_dim1; + q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2] + .i, q__1.i = alpha->r * b[i__2].i + + alpha->i * b[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + i__2 = k + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + if (nounit) { + i__2 = k + j * b_dim1; + i__3 = k + j * b_dim1; + i__4 = k + k * a_dim1; + q__1.r = b[i__3].r * a[i__4].r - b[i__3].i * + a[i__4].i, q__1.i = b[i__3].r * a[ + i__4].i + b[i__3].i * a[i__4].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5] + .i, q__2.i = temp.r * a[i__5].i + + temp.i * a[i__5].r; + q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4] + .i + q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A'*B or B := alpha*conjg( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + temp.r = b[i__2].r, temp.i = b[i__2].i; + if (noconj) { + if (nounit) { + i__2 = i__ + i__ * a_dim1; + q__1.r = temp.r * a[i__2].r - temp.i * a[i__2] + .i, q__1.i = temp.r * a[i__2].i + + temp.i * a[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + q__2.r = a[i__3].r * b[i__4].r - a[i__3].i * + b[i__4].i, q__2.i = a[i__3].r * b[ + i__4].i + a[i__3].i * b[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L90: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + r_cnjg(&q__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3] + .i, q__2.i = q__3.r * b[i__3].i + + q__3.i * b[i__3].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L100: */ + } + } + i__2 = i__ + j * b_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L110: */ + } +/* L120: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + temp.r = b[i__3].r, temp.i = b[i__3].i; + if (noconj) { + if (nounit) { + i__3 = i__ + i__ * a_dim1; + q__1.r = temp.r * a[i__3].r - temp.i * a[i__3] + .i, q__1.i = temp.r * a[i__3].i + + temp.i * a[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * + b[i__5].i, q__2.i = a[i__4].r * b[ + i__5].i + a[i__4].i * b[i__5].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L130: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + r_cnjg(&q__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4] + .i, q__2.i = q__3.r * b[i__4].i + + q__3.i * b[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L140: */ + } + } + i__3 = i__ + j * b_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + i__1 = j + j * a_dim1; + q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + q__1.i = temp.r * a[i__1].i + temp.i * a[i__1] + .r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + q__1.i = temp.r * b[i__3].i + temp.i * b[i__3] + .r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L170: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + i__2 = k + j * a_dim1; + q__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2] + .i, q__1.i = alpha->r * a[i__2].i + + alpha->i * a[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, q__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4] + .i + q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L180: */ + } + } +/* L190: */ + } +/* L200: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + i__2 = j + j * a_dim1; + q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + q__1.i = temp.r * a[i__2].i + temp.i * a[i__2] + .r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + q__1.i = temp.r * b[i__4].i + temp.i * b[i__4] + .r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L210: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + i__3 = k + j * a_dim1; + q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3] + .i, q__1.i = alpha->r * a[i__3].i + + alpha->i * a[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, q__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5] + .i + q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L220: */ + } + } +/* L230: */ + } +/* L240: */ + } + } + } else { + +/* Form B := alpha*B*A' or B := alpha*B*conjg( A' ). */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + if (noconj) { + i__3 = j + k * a_dim1; + q__1.r = alpha->r * a[i__3].r - alpha->i * a[ + i__3].i, q__1.i = alpha->r * a[i__3] + .i + alpha->i * a[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[j + k * a_dim1]); + q__1.r = alpha->r * q__2.r - alpha->i * + q__2.i, q__1.i = alpha->r * q__2.i + + alpha->i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, q__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5] + .i + q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L250: */ + } + } +/* L260: */ + } + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + if (noconj) { + i__2 = k + k * a_dim1; + q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + q__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[k + k * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + } + if (temp.r != 1.f || temp.i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + q__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L270: */ + } + } +/* L280: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + if (noconj) { + i__2 = j + k * a_dim1; + q__1.r = alpha->r * a[i__2].r - alpha->i * a[ + i__2].i, q__1.i = alpha->r * a[i__2] + .i + alpha->i * a[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[j + k * a_dim1]); + q__1.r = alpha->r * q__2.r - alpha->i * + q__2.i, q__1.i = alpha->r * q__2.i + + alpha->i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, q__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4] + .i + q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L290: */ + } + } +/* L300: */ + } + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + if (noconj) { + i__1 = k + k * a_dim1; + q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + q__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[k + k * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + } + if (temp.r != 1.f || temp.i != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + q__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L310: */ + } + } +/* L320: */ + } + } + } + } + + return 0; + +/* End of CTRMM . */ + +} /* ctrmm_ */ + +/* Subroutine */ int ctrmv_(char *uplo, char *trans, char *diag, integer *n, + complex *a, integer *lda, complex *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2, q__3; + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static complex temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + CTRMV performs one of the matrix-vector operations + + x := A*x, or x := A'*x, or x := conjg( A' )*x, + + where x is an n element vector and A is an n by n unit, or non-unit, + upper or lower triangular matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' x := A*x. + + TRANS = 'T' or 't' x := A'*x. + + TRANS = 'C' or 'c' x := conjg( A' )*x. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. On exit, X is overwritten with the + tranformed vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("CTRMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, "T"); + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + + q__2.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; +/* L10: */ + } + if (nounit) { + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ + i__4].i, q__1.i = x[i__3].r * a[i__4].i + + x[i__3].i * a[i__4].r; + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + + q__2.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + ix += *incx; +/* L30: */ + } + if (nounit) { + i__2 = jx; + i__3 = jx; + i__4 = j + j * a_dim1; + q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ + i__4].i, q__1.i = x[i__3].r * a[i__4].i + + x[i__3].i * a[i__4].r; + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0.f || x[i__1].i != 0.f) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__; + i__3 = i__; + i__4 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + q__2.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + + q__2.i; + x[i__2].r = q__1.r, x[i__2].i = q__1.i; +/* L50: */ + } + if (nounit) { + i__1 = j; + i__2 = j; + i__3 = j + j * a_dim1; + q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, q__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__1].r = q__1.r, x[i__1].i = q__1.i; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0.f || x[i__1].i != 0.f) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = ix; + i__3 = ix; + i__4 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + q__2.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + + q__2.i; + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + ix -= *incx; +/* L70: */ + } + if (nounit) { + i__1 = jx; + i__2 = jx; + i__3 = j + j * a_dim1; + q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, q__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__1].r = q__1.r, x[i__1].i = q__1.i; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x or x := conjg( A' )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + if (nounit) { + i__1 = j + j * a_dim1; + q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + q__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = q__1.r, temp.i = q__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__; + q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, q__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L90: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__1 = i__; + q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, + q__2.i = q__3.r * x[i__1].i + q__3.i * x[ + i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L100: */ + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; +/* L110: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + if (noconj) { + if (nounit) { + i__1 = j + j * a_dim1; + q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + q__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = q__1.r, temp.i = q__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = i__ + j * a_dim1; + i__2 = ix; + q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, q__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L120: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__1 = ix; + q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, + q__2.i = q__3.r * x[i__1].i + q__3.i * x[ + i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L130: */ + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; +/* L140: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + if (nounit) { + i__2 = j + j * a_dim1; + q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + q__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, q__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L150: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[ + i__3].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L160: */ + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; +/* L170: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + if (noconj) { + if (nounit) { + i__2 = j + j * a_dim1; + q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + q__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = i__ + j * a_dim1; + i__4 = ix; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, q__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L180: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[ + i__3].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L190: */ + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; +/* L200: */ + } + } + } + } + + return 0; + +/* End of CTRMV . */ + +} /* ctrmv_ */ + +/* Subroutine */ int ctrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, complex *alpha, complex *a, integer *lda, + complex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, + i__6, i__7; + complex q__1, q__2, q__3; + + /* Local variables */ + static integer i__, j, k, info; + static complex temp; + extern logical lsame_(char *, char *); + static logical lside; + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + CTRSM solves one of the matrix equations + + op( A )*X = alpha*B, or X*op( A ) = alpha*B, + + where alpha is a scalar, X and B are m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). + + The matrix X is overwritten on B. + + Arguments + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) appears on the left + or right of X as follows: + + SIDE = 'L' or 'l' op( A )*X = alpha*B. + + SIDE = 'R' or 'r' X*op( A ) = alpha*B. + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = conjg( A' ). + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - COMPLEX array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the right-hand side matrix B, and on exit is + overwritten by the solution matrix X. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, "T"); + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("CTRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0.f && alpha->i == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1.f || alpha->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, q__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0.f || b[i__2].i != 0.f) { + if (nounit) { + i__2 = k + j * b_dim1; + c_div(&q__1, &b[k + j * b_dim1], &a[k + k * + a_dim1]); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * b_dim1; + i__6 = i__ + k * a_dim1; + q__2.r = b[i__5].r * a[i__6].r - b[i__5].i * + a[i__6].i, q__2.i = b[i__5].r * a[ + i__6].i + b[i__5].i * a[i__6].r; + q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4] + .i - q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1.f || alpha->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, q__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0.f || b[i__3].i != 0.f) { + if (nounit) { + i__3 = k + j * b_dim1; + c_div(&q__1, &b[k + j * b_dim1], &a[k + k * + a_dim1]); + b[i__3].r = q__1.r, b[i__3].i = q__1.i; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * b_dim1; + i__7 = i__ + k * a_dim1; + q__2.r = b[i__6].r * a[i__7].r - b[i__6].i * + a[i__7].i, q__2.i = b[i__6].r * a[ + i__7].i + b[i__6].i * a[i__7].r; + q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5] + .i - q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* + Form B := alpha*inv( A' )*B + or B := alpha*inv( conjg( A' ) )*B. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + q__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + if (noconj) { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * + b[i__5].i, q__2.i = a[i__4].r * b[ + i__5].i + a[i__4].i * b[i__5].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L110: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + r_cnjg(&q__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4] + .i, q__2.i = q__3.r * b[i__4].i + + q__3.i * b[i__4].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L120: */ + } + if (nounit) { + r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__3 = i__ + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; +/* L130: */ + } +/* L140: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, + q__1.i = alpha->r * b[i__2].i + alpha->i * b[ + i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + if (noconj) { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + q__2.r = a[i__3].r * b[i__4].r - a[i__3].i * + b[i__4].i, q__2.i = a[i__3].r * b[ + i__4].i + a[i__3].i * b[i__4].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L150: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + r_cnjg(&q__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3] + .i, q__2.i = q__3.r * b[i__3].i + + q__3.i * b[i__3].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L160: */ + } + if (nounit) { + r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__2 = i__ + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; +/* L170: */ + } +/* L180: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1.f || alpha->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, q__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L190: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * a_dim1; + i__7 = i__ + k * b_dim1; + q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * + b[i__7].i, q__2.i = a[i__6].r * b[ + i__7].i + a[i__6].i * b[i__7].r; + q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5] + .i - q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L200: */ + } + } +/* L210: */ + } + if (nounit) { + c_div(&q__1, &c_b21, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + q__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L220: */ + } + } +/* L230: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (alpha->r != 1.f || alpha->i != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, q__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L240: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * a_dim1; + i__6 = i__ + k * b_dim1; + q__2.r = a[i__5].r * b[i__6].r - a[i__5].i * + b[i__6].i, q__2.i = a[i__5].r * b[ + i__6].i + a[i__5].i * b[i__6].r; + q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4] + .i - q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L250: */ + } + } +/* L260: */ + } + if (nounit) { + c_div(&q__1, &c_b21, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + q__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L270: */ + } + } +/* L280: */ + } + } + } else { + +/* + Form B := alpha*B*inv( A' ) + or B := alpha*B*inv( conjg( A' ) ). +*/ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + if (noconj) { + c_div(&q__1, &c_b21, &a[k + k * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[k + k * a_dim1]); + c_div(&q__1, &c_b21, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + q__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L290: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + if (noconj) { + i__2 = j + k * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + } else { + r_cnjg(&q__1, &a[j + k * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, q__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4] + .i - q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L300: */ + } + } +/* L310: */ + } + if (alpha->r != 1.f || alpha->i != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, q__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L320: */ + } + } +/* L330: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + if (noconj) { + c_div(&q__1, &c_b21, &a[k + k * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[k + k * a_dim1]); + c_div(&q__1, &c_b21, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + q__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L340: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + if (noconj) { + i__3 = j + k * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + } else { + r_cnjg(&q__1, &a[j + k * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, q__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5] + .i - q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L350: */ + } + } +/* L360: */ + } + if (alpha->r != 1.f || alpha->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, q__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L370: */ + } + } +/* L380: */ + } + } + } + } + + return 0; + +/* End of CTRSM . */ + +} /* ctrsm_ */ + +/* Subroutine */ int ctrsv_(char *uplo, char *trans, char *diag, integer *n, + complex *a, integer *lda, complex *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2, q__3; + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static complex temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + CTRSV solves one of the systems of equations + + A*x = b, or A'*x = b, or conjg( A' )*x = b, + + where b and x are n element vectors and A is an n by n unit, or + non-unit, upper or lower triangular matrix. + + No test for singularity or near-singularity is included in this + routine. Such tests must be performed before calling this routine. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the equations to be solved as + follows: + + TRANS = 'N' or 'n' A*x = b. + + TRANS = 'T' or 't' A'*x = b. + + TRANS = 'C' or 'c' conjg( A' )*x = b. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element right-hand side vector b. On exit, X is overwritten + with the solution vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("CTRSV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, "T"); + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := inv( A )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0.f || x[i__1].i != 0.f) { + if (nounit) { + i__1 = j; + c_div(&q__1, &x[j], &a[j + j * a_dim1]); + x[i__1].r = q__1.r, x[i__1].i = q__1.i; + } + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__; + i__2 = i__; + i__3 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + q__2.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - + q__2.i; + x[i__1].r = q__1.r, x[i__1].i = q__1.i; +/* L10: */ + } + } +/* L20: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0.f || x[i__1].i != 0.f) { + if (nounit) { + i__1 = jx; + c_div(&q__1, &x[jx], &a[j + j * a_dim1]); + x[i__1].r = q__1.r, x[i__1].i = q__1.i; + } + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = ix; + i__2 = ix; + i__3 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + q__2.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - + q__2.i; + x[i__1].r = q__1.r, x[i__1].i = q__1.i; +/* L30: */ + } + } + jx -= *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + if (nounit) { + i__2 = j; + c_div(&q__1, &x[j], &a[j + j * a_dim1]); + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + } + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - + q__2.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; +/* L50: */ + } + } +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + if (nounit) { + i__2 = jx; + c_div(&q__1, &x[jx], &a[j + j * a_dim1]); + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + } + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - + q__2.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, q__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L90: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[ + i__3].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L100: */ + } + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; +/* L110: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ix = kx; + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, q__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; +/* L120: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[ + i__3].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; +/* L130: */ + } + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; +/* L140: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__; + q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ + i__3].i, q__2.i = a[i__2].r * x[i__3].i + + a[i__2].i * x[i__3].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L150: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__2 = i__; + q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, + q__2.i = q__3.r * x[i__2].i + q__3.i * x[ + i__2].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L160: */ + } + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; +/* L170: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + ix = kx; + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = ix; + q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ + i__3].i, q__2.i = a[i__2].r * x[i__3].i + + a[i__2].i * x[i__3].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix -= *incx; +/* L180: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__2 = ix; + q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, + q__2.i = q__3.r * x[i__2].i + q__3.i * x[ + i__2].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix -= *incx; +/* L190: */ + } + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; +/* L200: */ + } + } + } + } + + return 0; + +/* End of CTRSV . */ + +} /* ctrsv_ */ + +/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx, + integer *incx, doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + + +/* + Purpose + ======= + + DAXPY constant times a vector plus a vector. + uses unrolled loops for increments equal to one. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*da == 0.) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[iy] += *da * dx[ix]; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 4; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[i__] += *da * dx[i__]; +/* L30: */ + } + if (*n < 4) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 4) { + dy[i__] += *da * dx[i__]; + dy[i__ + 1] += *da * dx[i__ + 1]; + dy[i__ + 2] += *da * dx[i__ + 2]; + dy[i__ + 3] += *da * dx[i__ + 3]; +/* L50: */ + } + return 0; +} /* daxpy_ */ + +doublereal dcabs1_(doublecomplex *z__) +{ + /* System generated locals */ + doublereal ret_val, d__1, d__2; + +/* + Purpose + ======= + + DCABS1 computes absolute value of a double complex number + + ===================================================================== +*/ + + + ret_val = (d__1 = z__->r, abs(d__1)) + (d__2 = d_imag(z__), abs(d__2)); + return ret_val; +} /* dcabs1_ */ + +/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + + +/* + Purpose + ======= + + DCOPY copies a vector, x, to a vector, y. + uses unrolled loops for increments equal to one. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[iy] = dx[ix]; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 7; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[i__] = dx[i__]; +/* L30: */ + } + if (*n < 7) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 7) { + dy[i__] = dx[i__]; + dy[i__ + 1] = dx[i__ + 1]; + dy[i__ + 2] = dx[i__ + 2]; + dy[i__ + 3] = dx[i__ + 3]; + dy[i__ + 4] = dx[i__ + 4]; + dy[i__ + 5] = dx[i__ + 5]; + dy[i__ + 6] = dx[i__ + 6]; +/* L50: */ + } + return 0; +} /* dcopy_ */ + +doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, + integer *incy) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + static doublereal dtemp; + + +/* + Purpose + ======= + + DDOT forms the dot product of two vectors. + uses unrolled loops for increments equal to one. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + ret_val = 0.; + dtemp = 0.; + if (*n <= 0) { + return ret_val; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp += dx[ix] * dy[iy]; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val = dtemp; + return ret_val; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp += dx[i__] * dy[i__]; +/* L30: */ + } + if (*n < 5) { + goto L60; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 5) { + dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[ + i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + + 4] * dy[i__ + 4]; +/* L50: */ + } +L60: + ret_val = dtemp; + return ret_val; +} /* ddot_ */ + +/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublereal *alpha, doublereal *a, integer *lda, + doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, + integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, l, info; + static logical nota, notb; + static doublereal temp; + static integer ncola; + extern logical lsame_(char *, char *); + static integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DGEMM performs one of the matrix-matrix operations + + C := alpha*op( A )*op( B ) + beta*C, + + where op( X ) is one of + + op( X ) = X or op( X ) = X', + + alpha and beta are scalars, and A, B and C are matrices, with op( A ) + an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + Arguments + ========== + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n', op( A ) = A. + + TRANSA = 'T' or 't', op( A ) = A'. + + TRANSA = 'C' or 'c', op( A ) = A'. + + Unchanged on exit. + + TRANSB - CHARACTER*1. + On entry, TRANSB specifies the form of op( B ) to be used in + the matrix multiplication as follows: + + TRANSB = 'N' or 'n', op( B ) = B. + + TRANSB = 'T' or 't', op( B ) = B'. + + TRANSB = 'C' or 'c', op( B ) = B'. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix + op( A ) and of the matrix C. M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix + op( B ) and the number of columns of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry, K specifies the number of columns of the matrix + op( A ) and the number of rows of the matrix op( B ). K must + be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is + k when TRANSA = 'N' or 'n', and is m otherwise. + Before entry with TRANSA = 'N' or 'n', the leading m by k + part of the array A must contain the matrix A, otherwise + the leading k by m part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANSA = 'N' or 'n' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, k ). + Unchanged on exit. + + B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is + n when TRANSB = 'N' or 'n', and is k otherwise. + Before entry with TRANSB = 'N' or 'n', the leading k by n + part of the array B must contain the matrix B, otherwise + the leading n by k part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANSB = 'N' or 'n' then + LDB must be at least max( 1, k ), otherwise LDB must be at + least max( 1, n ). + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n matrix + ( alpha*op( A )*op( B ) + beta*C ). + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Set NOTA and NOTB as true if A and B respectively are not + transposed and set NROWA, NCOLA and NROWB as the number of rows + and columns of A and the number of rows of B respectively. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, "N"); + notb = lsame_(transb, "N"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! lsame_(transa, "C") && ! lsame_( + transa, "T")) { + info = 1; + } else if (! notb && ! lsame_(transb, "C") && ! + lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_("DGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And if alpha.eq.zero. */ + + if (*alpha == 0.) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } + } else if (*beta != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[l + j * b_dim1] != 0.) { + temp = *alpha * b[l + j * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; +/* L100: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L110: */ + } +/* L120: */ + } + } + } else { + if (nota) { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L130: */ + } + } else if (*beta != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L140: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[j + l * b_dim1] != 0.) { + temp = *alpha * b[j + l * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L150: */ + } + } +/* L160: */ + } +/* L170: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; +/* L180: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L190: */ + } +/* L200: */ + } + } + } + + return 0; + +/* End of DGEMM . */ + +} /* dgemm_ */ + +/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal * + alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublereal temp; + static integer lenx, leny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DGEMV performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + Arguments + ========== + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' y := alpha*A*x + beta*y. + + TRANS = 'T' or 't' y := alpha*A'*x + beta*y. + + TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + X - DOUBLE PRECISION array of DIMENSION at least + ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + Before entry, the incremented array X must contain the + vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - DOUBLE PRECISION array of DIMENSION at least + ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. + Before entry with BETA non-zero, the incremented array Y + must contain the vector y. On exit, Y is overwritten by the + updated vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("DGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* + Set LENX and LENY, the lengths of the vectors x and y, and set + up the start points in X and Y. +*/ + + if (lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. + + First form y := beta*y. +*/ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + if (lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp * a[i__ + j * a_dim1]; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L100: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of DGEMV . */ + +} /* dgemv_ */ + +/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static doublereal temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DGER performs the rank 1 operation + + A := alpha*x*y' + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Arguments + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - DOUBLE PRECISION array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("DGER ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.) { + temp = *alpha * y[jy]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.) { + temp = *alpha * y[jy]; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of DGER . */ + +} /* dger_ */ + +doublereal dnrm2_(integer *n, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val, d__1; + + /* Local variables */ + static integer ix; + static doublereal ssq, norm, scale, absxi; + + +/* + Purpose + ======= + + DNRM2 returns the euclidean norm of a vector via the function + name, so that + + DNRM2 := sqrt( x'*x ) + + Further Details + =============== + + -- This version written on 25-October-1982. + Modified on 14-October-1993 to inline the call to DLASSQ. + Sven Hammarling, Nag Ltd. + + ===================================================================== +*/ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.; + } else if (*n == 1) { + norm = abs(x[1]); + } else { + scale = 0.; + ssq = 1.; +/* + The following loop is equivalent to this call to the LAPACK + auxiliary routine: + CALL DLASSQ( N, X, INCX, SCALE, SSQ ) +*/ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.) { + absxi = (d__1 = x[ix], abs(d__1)); + if (scale < absxi) { +/* Computing 2nd power */ + d__1 = scale / absxi; + ssq = ssq * (d__1 * d__1) + 1.; + scale = absxi; + } else { +/* Computing 2nd power */ + d__1 = absxi / scale; + ssq += d__1 * d__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of DNRM2. */ + +} /* dnrm2_ */ + +/* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy, doublereal *c__, doublereal *s) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, ix, iy; + static doublereal dtemp; + + +/* + Purpose + ======= + + DROT applies a plane rotation. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = *c__ * dx[ix] + *s * dy[iy]; + dy[iy] = *c__ * dy[iy] - *s * dx[ix]; + dx[ix] = dtemp; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = *c__ * dx[i__] + *s * dy[i__]; + dy[i__] = *c__ * dy[i__] - *s * dx[i__]; + dx[i__] = dtemp; +/* L30: */ + } + return 0; +} /* drot_ */ + +/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx, + integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, m, mp1, nincx; + + +/* + Purpose + ======= + + DSCAL scales a vector by a constant. + uses unrolled loops for increment equal to one. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --dx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + dx[i__] = *da * dx[i__]; +/* L10: */ + } + return 0; + +/* + code for increment equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__2 = m; + for (i__ = 1; i__ <= i__2; ++i__) { + dx[i__] = *da * dx[i__]; +/* L30: */ + } + if (*n < 5) { + return 0; + } +L40: + mp1 = m + 1; + i__2 = *n; + for (i__ = mp1; i__ <= i__2; i__ += 5) { + dx[i__] = *da * dx[i__]; + dx[i__ + 1] = *da * dx[i__ + 1]; + dx[i__ + 2] = *da * dx[i__ + 2]; + dx[i__ + 3] = *da * dx[i__ + 3]; + dx[i__ + 4] = *da * dx[i__ + 4]; +/* L50: */ + } + return 0; +} /* dscal_ */ + +/* Subroutine */ int dswap_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + static doublereal dtemp; + + +/* + Purpose + ======= + + interchanges two vectors. + uses unrolled loops for increments equal one. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = dx[ix]; + dx[ix] = dy[iy]; + dy[iy] = dtemp; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 3; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = dx[i__]; + dx[i__] = dy[i__]; + dy[i__] = dtemp; +/* L30: */ + } + if (*n < 3) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 3) { + dtemp = dx[i__]; + dx[i__] = dy[i__]; + dy[i__] = dtemp; + dtemp = dx[i__ + 1]; + dx[i__ + 1] = dy[i__ + 1]; + dy[i__ + 1] = dtemp; + dtemp = dx[i__ + 2]; + dx[i__ + 2] = dy[i__ + 2]; + dy[i__ + 2] = dtemp; +/* L50: */ + } + return 0; +} /* dswap_ */ + +/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha, + doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal + *beta, doublereal *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublereal temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DSYMV performs the matrix-vector operation + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n symmetric matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of A is not referenced. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. On exit, Y is overwritten by the updated + vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_("DSYMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. + + First form y := beta*y. +*/ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + if (lsame_(uplo, "U")) { + +/* Form y when A is stored in upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L50: */ + } + y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2; + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } else { + +/* Form y when A is stored in lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + y[j] += temp1 * a[j + j * a_dim1]; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[j] += *alpha * temp2; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + y[jy] += temp1 * a[j + j * a_dim1]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of DSYMV . */ + +} /* dsymv_ */ + +/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublereal temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DSYR2 performs the symmetric rank 2 operation + + A := alpha*x*y' + alpha*y*x' + A, + + where alpha is a scalar, x and y are n element vectors and A is an n + by n symmetric matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*n)) { + info = 9; + } + if (info != 0) { + xerbla_("DSYR2 ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.) { + return 0; + } + +/* + Set up the start points in X and Y if the increments are not both + unity. +*/ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. +*/ + + if (lsame_(uplo, "U")) { + +/* Form A when A is stored in the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0. || y[j] != 0.) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L10: */ + } + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0. || y[jy] != 0.) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = kx; + iy = ky; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L30: */ + } + } + jx += *incx; + jy += *incy; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0. || y[j] != 0.) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L50: */ + } + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0. || y[jy] != 0.) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L70: */ + } + } + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } + + return 0; + +/* End of DSYR2 . */ + +} /* dsyr2_ */ + +/* Subroutine */ int dsyr2k_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *beta, doublereal *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, l, info; + static doublereal temp1, temp2; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DSYR2K performs one of the symmetric rank 2k operations + + C := alpha*A*B' + alpha*B*A' + beta*C, + + or + + C := alpha*A'*B + alpha*B'*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A and B are n by k matrices in the first case and k by n + matrices in the second case. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + + beta*C. + + TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + + beta*C. + + TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrices A and B, and on entry with + TRANS = 'T' or 't' or 'C' or 'c', K specifies the number + of rows of the matrices A and B. K must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array B must contain the matrix B, otherwise + the leading k by n part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDB must be at least max( 1, n ), otherwise LDB must + be at least max( 1, k ). + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,nrowa)) { + info = 9; + } else if (*ldc < max(1,*n)) { + info = 12; + } + if (info != 0) { + xerbla_("DSYR2K", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*B' + alpha*B*A' + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*B + alpha*B'*A + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1 = 0.; + temp2 = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp1 = 0.; + temp2 = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of DSYR2K. */ + +} /* dsyr2k_ */ + +/* Subroutine */ int dsyrk_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, + doublereal *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, info; + static doublereal temp; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DSYRK performs one of the symmetric rank k operations + + C := alpha*A*A' + beta*C, + + or + + C := alpha*A'*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A is an n by k matrix in the first case and a k by n matrix + in the second case. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. + + TRANS = 'T' or 't' C := alpha*A'*A + beta*C. + + TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrix A, and on entry with + TRANS = 'T' or 't' or 'C' or 'c', K specifies the number + of rows of the matrix A. K must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + xerbla_("DSYRK ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*A' + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of DSYRK . */ + +} /* dsyrk_ */ + +/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, k, info; + static doublereal temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + DTRMM performs one of the matrix-matrix operations + + B := alpha*op( A )*B, or B := alpha*B*op( A ), + + where alpha is a scalar, B is an m by n matrix, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A'. + + Arguments + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) multiplies B from + the left or right as follows: + + SIDE = 'L' or 'l' B := alpha*op( A )*B. + + SIDE = 'R' or 'r' B := alpha*B*op( A ). + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = A'. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the matrix B, and on exit is overwritten by the + transformed matrix. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("DTRMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.) { + temp = *alpha * b[k + j * b_dim1]; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L30: */ + } + if (nounit) { + temp *= a[k + k * a_dim1]; + } + b[k + j * b_dim1] = temp; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.) { + temp = *alpha * b[k + j * b_dim1]; + b[k + j * b_dim1] = temp; + if (nounit) { + b[k + j * b_dim1] *= a[k + k * a_dim1]; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A'*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L90: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L100: */ + } +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L120: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L130: */ + } +/* L140: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L150: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.) { + temp = *alpha * a[k + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L190: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.) { + temp = *alpha * a[k + j * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L200: */ + } + } +/* L210: */ + } +/* L220: */ + } + } + } else { + +/* Form B := alpha*B*A'. */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = *alpha * a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = *alpha * a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L270: */ + } + } +/* L280: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L290: */ + } + } +/* L300: */ + } + } + } + } + + return 0; + +/* End of DTRMM . */ + +} /* dtrmm_ */ + +/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static doublereal temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + DTRMV performs one of the matrix-vector operations + + x := A*x, or x := A'*x, + + where x is an n element vector and A is an n by n unit, or non-unit, + upper or lower triangular matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' x := A*x. + + TRANS = 'T' or 't' x := A'*x. + + TRANS = 'C' or 'c' x := A'*x. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. On exit, X is overwritten with the + tranformed vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("DTRMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = x[j]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L10: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix += *incx; +/* L30: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + temp = x[j]; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix -= *incx; +/* L70: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + x[j] = temp; +/* L100: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + x[jx] = temp; + jx -= *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + x[j] = temp; +/* L140: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L150: */ + } + x[jx] = temp; + jx += *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTRMV . */ + +} /* dtrmv_ */ + +/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, k, info; + static doublereal temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + DTRSM solves one of the matrix equations + + op( A )*X = alpha*B, or X*op( A ) = alpha*B, + + where alpha is a scalar, X and B are m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A'. + + The matrix X is overwritten on B. + + Arguments + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) appears on the left + or right of X as follows: + + SIDE = 'L' or 'l' op( A )*X = alpha*B. + + SIDE = 'R' or 'r' X*op( A ) = alpha*B. + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = A'. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the right-hand side matrix B, and on exit is + overwritten by the solution matrix X. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("DTRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form B := alpha*inv( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L110: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L140: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L170: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L180: */ + } + } +/* L190: */ + } + if (nounit) { + temp = 1. / a[j + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L200: */ + } + } +/* L210: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (*alpha != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L220: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + if (nounit) { + temp = 1. / a[j + j * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } + } else { + +/* Form B := alpha*B*inv( A' ). */ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + temp = 1. / a[k + k * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L270: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L280: */ + } + } +/* L290: */ + } + if (*alpha != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L300: */ + } + } +/* L310: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + temp = 1. / a[k + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L320: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L330: */ + } + } +/* L340: */ + } + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L350: */ + } + } +/* L360: */ + } + } + } + } + + return 0; + +/* End of DTRSM . */ + +} /* dtrsm_ */ + +doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val; + + /* Local variables */ + static integer i__, ix; + static doublereal stemp; + extern doublereal dcabs1_(doublecomplex *); + + +/* + Purpose + ======= + + DZASUM takes the sum of the absolute values. + + Further Details + =============== + + jack dongarra, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --zx; + + /* Function Body */ + ret_val = 0.; + stemp = 0.; + if (*n <= 0 || *incx <= 0) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += dcabs1_(&zx[ix]); + ix += *incx; +/* L10: */ + } + ret_val = stemp; + return ret_val; + +/* code for increment equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += dcabs1_(&zx[i__]); +/* L30: */ + } + ret_val = stemp; + return ret_val; +} /* dzasum_ */ + +doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal ret_val, d__1; + + /* Local variables */ + static integer ix; + static doublereal ssq, temp, norm, scale; + + +/* + Purpose + ======= + + DZNRM2 returns the euclidean norm of a vector via the function + name, so that + + DZNRM2 := sqrt( conjg( x' )*x ) + + Further Details + =============== + + -- This version written on 25-October-1982. + Modified on 14-October-1993 to inline the call to ZLASSQ. + Sven Hammarling, Nag Ltd. + + ===================================================================== +*/ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.; + } else { + scale = 0.; + ssq = 1.; +/* + The following loop is equivalent to this call to the LAPACK + auxiliary routine: + CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) +*/ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + i__3 = ix; + if (x[i__3].r != 0.) { + i__3 = ix; + temp = (d__1 = x[i__3].r, abs(d__1)); + if (scale < temp) { +/* Computing 2nd power */ + d__1 = scale / temp; + ssq = ssq * (d__1 * d__1) + 1.; + scale = temp; + } else { +/* Computing 2nd power */ + d__1 = temp / scale; + ssq += d__1 * d__1; + } + } + if (d_imag(&x[ix]) != 0.) { + temp = (d__1 = d_imag(&x[ix]), abs(d__1)); + if (scale < temp) { +/* Computing 2nd power */ + d__1 = scale / temp; + ssq = ssq * (d__1 * d__1) + 1.; + scale = temp; + } else { +/* Computing 2nd power */ + d__1 = temp / scale; + ssq += d__1 * d__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of DZNRM2. */ + +} /* dznrm2_ */ + +integer icamax_(integer *n, complex *cx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + + /* Local variables */ + static integer i__, ix; + static real smax; + extern doublereal scabs1_(complex *); + + +/* + Purpose + ======= + + ICAMAX finds the index of element having max. absolute value. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --cx; + + /* Function Body */ + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + smax = scabs1_(&cx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (scabs1_(&cx[ix]) <= smax) { + goto L5; + } + ret_val = i__; + smax = scabs1_(&cx[ix]); +L5: + ix += *incx; +/* L10: */ + } + return ret_val; + +/* code for increment equal to 1 */ + +L20: + smax = scabs1_(&cx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (scabs1_(&cx[i__]) <= smax) { + goto L30; + } + ret_val = i__; + smax = scabs1_(&cx[i__]); +L30: + ; + } + return ret_val; +} /* icamax_ */ + +integer idamax_(integer *n, doublereal *dx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + doublereal d__1; + + /* Local variables */ + static integer i__, ix; + static doublereal dmax__; + + +/* + Purpose + ======= + + IDAMAX finds the index of element having max. absolute value. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --dx; + + /* Function Body */ + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + dmax__ = abs(dx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((d__1 = dx[ix], abs(d__1)) <= dmax__) { + goto L5; + } + ret_val = i__; + dmax__ = (d__1 = dx[ix], abs(d__1)); +L5: + ix += *incx; +/* L10: */ + } + return ret_val; + +/* code for increment equal to 1 */ + +L20: + dmax__ = abs(dx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((d__1 = dx[i__], abs(d__1)) <= dmax__) { + goto L30; + } + ret_val = i__; + dmax__ = (d__1 = dx[i__], abs(d__1)); +L30: + ; + } + return ret_val; +} /* idamax_ */ + +integer isamax_(integer *n, real *sx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + real r__1; + + /* Local variables */ + static integer i__, ix; + static real smax; + + +/* + Purpose + ======= + + ISAMAX finds the index of element having max. absolute value. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --sx; + + /* Function Body */ + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + smax = dabs(sx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((r__1 = sx[ix], dabs(r__1)) <= smax) { + goto L5; + } + ret_val = i__; + smax = (r__1 = sx[ix], dabs(r__1)); +L5: + ix += *incx; +/* L10: */ + } + return ret_val; + +/* code for increment equal to 1 */ + +L20: + smax = dabs(sx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((r__1 = sx[i__], dabs(r__1)) <= smax) { + goto L30; + } + ret_val = i__; + smax = (r__1 = sx[i__], dabs(r__1)); +L30: + ; + } + return ret_val; +} /* isamax_ */ + +integer izamax_(integer *n, doublecomplex *zx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + + /* Local variables */ + static integer i__, ix; + static doublereal smax; + extern doublereal dcabs1_(doublecomplex *); + + +/* + Purpose + ======= + + IZAMAX finds the index of element having max. absolute value. + + Further Details + =============== + + jack dongarra, 1/15/85. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --zx; + + /* Function Body */ + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + smax = dcabs1_(&zx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (dcabs1_(&zx[ix]) <= smax) { + goto L5; + } + ret_val = i__; + smax = dcabs1_(&zx[ix]); +L5: + ix += *incx; +/* L10: */ + } + return ret_val; + +/* code for increment equal to 1 */ + +L20: + smax = dcabs1_(&zx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (dcabs1_(&zx[i__]) <= smax) { + goto L30; + } + ret_val = i__; + smax = dcabs1_(&zx[i__]); +L30: + ; + } + return ret_val; +} /* izamax_ */ + +/* Subroutine */ int saxpy_(integer *n, real *sa, real *sx, integer *incx, + real *sy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + + +/* + Purpose + ======= + + SAXPY constant times a vector plus a vector. + uses unrolled loop for increments equal to one. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*sa == 0.f) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + sy[iy] += *sa * sx[ix]; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 4; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + sy[i__] += *sa * sx[i__]; +/* L30: */ + } + if (*n < 4) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 4) { + sy[i__] += *sa * sx[i__]; + sy[i__ + 1] += *sa * sx[i__ + 1]; + sy[i__ + 2] += *sa * sx[i__ + 2]; + sy[i__ + 3] += *sa * sx[i__ + 3]; +/* L50: */ + } + return 0; +} /* saxpy_ */ + +doublereal scabs1_(complex *z__) +{ + /* System generated locals */ + real ret_val, r__1, r__2; + + +/* + Purpose + ======= + + SCABS1 computes absolute value of a complex number + + ===================================================================== +*/ + + ret_val = (r__1 = z__->r, dabs(r__1)) + (r__2 = r_imag(z__), dabs(r__2)); + return ret_val; +} /* scabs1_ */ + +doublereal scasum_(integer *n, complex *cx, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + real ret_val, r__1, r__2; + + /* Local variables */ + static integer i__, nincx; + static real stemp; + + +/* + Purpose + ======= + + SCASUM takes the sum of the absolute values of a complex vector and + returns a single precision result. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --cx; + + /* Function Body */ + ret_val = 0.f; + stemp = 0.f; + if (*n <= 0 || *incx <= 0) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = i__; + stemp = stemp + (r__1 = cx[i__3].r, dabs(r__1)) + (r__2 = r_imag(&cx[ + i__]), dabs(r__2)); +/* L10: */ + } + ret_val = stemp; + return ret_val; + +/* code for increment equal to 1 */ + +L20: + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__1 = i__; + stemp = stemp + (r__1 = cx[i__1].r, dabs(r__1)) + (r__2 = r_imag(&cx[ + i__]), dabs(r__2)); +/* L30: */ + } + ret_val = stemp; + return ret_val; +} /* scasum_ */ + +doublereal scnrm2_(integer *n, complex *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + real ret_val, r__1; + + /* Local variables */ + static integer ix; + static real ssq, temp, norm, scale; + + +/* + Purpose + ======= + + SCNRM2 returns the euclidean norm of a vector via the function + name, so that + + SCNRM2 := sqrt( conjg( x' )*x ) + + Further Details + =============== + + -- This version written on 25-October-1982. + Modified on 14-October-1993 to inline the call to CLASSQ. + Sven Hammarling, Nag Ltd. + + ===================================================================== +*/ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.f; + } else { + scale = 0.f; + ssq = 1.f; +/* + The following loop is equivalent to this call to the LAPACK + auxiliary routine: + CALL CLASSQ( N, X, INCX, SCALE, SSQ ) +*/ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + i__3 = ix; + if (x[i__3].r != 0.f) { + i__3 = ix; + temp = (r__1 = x[i__3].r, dabs(r__1)); + if (scale < temp) { +/* Computing 2nd power */ + r__1 = scale / temp; + ssq = ssq * (r__1 * r__1) + 1.f; + scale = temp; + } else { +/* Computing 2nd power */ + r__1 = temp / scale; + ssq += r__1 * r__1; + } + } + if (r_imag(&x[ix]) != 0.f) { + temp = (r__1 = r_imag(&x[ix]), dabs(r__1)); + if (scale < temp) { +/* Computing 2nd power */ + r__1 = scale / temp; + ssq = ssq * (r__1 * r__1) + 1.f; + scale = temp; + } else { +/* Computing 2nd power */ + r__1 = temp / scale; + ssq += r__1 * r__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of SCNRM2. */ + +} /* scnrm2_ */ + +/* Subroutine */ int scopy_(integer *n, real *sx, integer *incx, real *sy, + integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + + +/* + Purpose + ======= + + SCOPY copies a vector, x, to a vector, y. + uses unrolled loops for increments equal to 1. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + sy[iy] = sx[ix]; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 7; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + sy[i__] = sx[i__]; +/* L30: */ + } + if (*n < 7) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 7) { + sy[i__] = sx[i__]; + sy[i__ + 1] = sx[i__ + 1]; + sy[i__ + 2] = sx[i__ + 2]; + sy[i__ + 3] = sx[i__ + 3]; + sy[i__ + 4] = sx[i__ + 4]; + sy[i__ + 5] = sx[i__ + 5]; + sy[i__ + 6] = sx[i__ + 6]; +/* L50: */ + } + return 0; +} /* scopy_ */ + +doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy) +{ + /* System generated locals */ + integer i__1; + real ret_val; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + static real stemp; + + +/* + Purpose + ======= + + SDOT forms the dot product of two vectors. + uses unrolled loops for increments equal to one. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + stemp = 0.f; + ret_val = 0.f; + if (*n <= 0) { + return ret_val; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += sx[ix] * sy[iy]; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val = stemp; + return ret_val; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += sx[i__] * sy[i__]; +/* L30: */ + } + if (*n < 5) { + goto L60; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 5) { + stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[ + i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ + + 4] * sy[i__ + 4]; +/* L50: */ + } +L60: + ret_val = stemp; + return ret_val; +} /* sdot_ */ + +/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * + ldb, real *beta, real *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, l, info; + static logical nota, notb; + static real temp; + static integer ncola; + extern logical lsame_(char *, char *); + static integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SGEMM performs one of the matrix-matrix operations + + C := alpha*op( A )*op( B ) + beta*C, + + where op( X ) is one of + + op( X ) = X or op( X ) = X', + + alpha and beta are scalars, and A, B and C are matrices, with op( A ) + an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + Arguments + ========== + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n', op( A ) = A. + + TRANSA = 'T' or 't', op( A ) = A'. + + TRANSA = 'C' or 'c', op( A ) = A'. + + Unchanged on exit. + + TRANSB - CHARACTER*1. + On entry, TRANSB specifies the form of op( B ) to be used in + the matrix multiplication as follows: + + TRANSB = 'N' or 'n', op( B ) = B. + + TRANSB = 'T' or 't', op( B ) = B'. + + TRANSB = 'C' or 'c', op( B ) = B'. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix + op( A ) and of the matrix C. M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix + op( B ) and the number of columns of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry, K specifies the number of columns of the matrix + op( A ) and the number of rows of the matrix op( B ). K must + be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + k when TRANSA = 'N' or 'n', and is m otherwise. + Before entry with TRANSA = 'N' or 'n', the leading m by k + part of the array A must contain the matrix A, otherwise + the leading k by m part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANSA = 'N' or 'n' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, k ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, kb ), where kb is + n when TRANSB = 'N' or 'n', and is k otherwise. + Before entry with TRANSB = 'N' or 'n', the leading k by n + part of the array B must contain the matrix B, otherwise + the leading n by k part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANSB = 'N' or 'n' then + LDB must be at least max( 1, k ), otherwise LDB must be at + least max( 1, n ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n matrix + ( alpha*op( A )*op( B ) + beta*C ). + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Set NOTA and NOTB as true if A and B respectively are not + transposed and set NROWA, NCOLA and NROWB as the number of rows + and columns of A and the number of rows of B respectively. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, "N"); + notb = lsame_(transb, "N"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! lsame_(transa, "C") && ! lsame_( + transa, "T")) { + info = 1; + } else if (! notb && ! lsame_(transb, "C") && ! + lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_("SGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { + return 0; + } + +/* And if alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L50: */ + } + } else if (*beta != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[l + j * b_dim1] != 0.f) { + temp = *alpha * b[l + j * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; +/* L100: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L110: */ + } +/* L120: */ + } + } + } else { + if (nota) { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L130: */ + } + } else if (*beta != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L140: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[j + l * b_dim1] != 0.f) { + temp = *alpha * b[j + l * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L150: */ + } + } +/* L160: */ + } +/* L170: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; +/* L180: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L190: */ + } +/* L200: */ + } + } + } + + return 0; + +/* End of SGEMM . */ + +} /* sgemm_ */ + +/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, + real *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static real temp; + static integer lenx, leny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SGEMV performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + Arguments + ========== + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' y := alpha*A*x + beta*y. + + TRANS = 'T' or 't' y := alpha*A'*x + beta*y. + + TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + X - REAL array of DIMENSION at least + ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + Before entry, the incremented array X must contain the + vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - REAL array of DIMENSION at least + ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. + Before entry with BETA non-zero, the incremented array Y + must contain the vector y. On exit, Y is overwritten by the + updated vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("SGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) { + return 0; + } + +/* + Set LENX and LENY, the lengths of the vectors x and y, and set + up the start points in X and Y. +*/ + + if (lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. + + First form y := beta*y. +*/ + + if (*beta != 1.f) { + if (*incy == 1) { + if (*beta == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.f) { + return 0; + } + if (lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = *alpha * x[jx]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = *alpha * x[jx]; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp * a[i__ + j * a_dim1]; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.f; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L100: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.f; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of SGEMV . */ + +} /* sgemv_ */ + +/* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x, + integer *incx, real *y, integer *incy, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static real temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SGER performs the rank 1 operation + + A := alpha*x*y' + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Arguments + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - REAL array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("SGER ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.f) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.f) { + temp = *alpha * y[jy]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.f) { + temp = *alpha * y[jy]; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of SGER . */ + +} /* sger_ */ + +doublereal snrm2_(integer *n, real *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + real ret_val, r__1; + + /* Local variables */ + static integer ix; + static real ssq, norm, scale, absxi; + + +/* + Purpose + ======= + + SNRM2 returns the euclidean norm of a vector via the function + name, so that + + SNRM2 := sqrt( x'*x ). + + Further Details + =============== + + -- This version written on 25-October-1982. + Modified on 14-October-1993 to inline the call to SLASSQ. + Sven Hammarling, Nag Ltd. + + ===================================================================== +*/ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.f; + } else if (*n == 1) { + norm = dabs(x[1]); + } else { + scale = 0.f; + ssq = 1.f; +/* + The following loop is equivalent to this call to the LAPACK + auxiliary routine: + CALL SLASSQ( N, X, INCX, SCALE, SSQ ) +*/ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.f) { + absxi = (r__1 = x[ix], dabs(r__1)); + if (scale < absxi) { +/* Computing 2nd power */ + r__1 = scale / absxi; + ssq = ssq * (r__1 * r__1) + 1.f; + scale = absxi; + } else { +/* Computing 2nd power */ + r__1 = absxi / scale; + ssq += r__1 * r__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of SNRM2. */ + +} /* snrm2_ */ + +/* Subroutine */ int srot_(integer *n, real *sx, integer *incx, real *sy, + integer *incy, real *c__, real *s) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, ix, iy; + static real stemp; + + +/* + Purpose + ======= + + applies a plane rotation. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp = *c__ * sx[ix] + *s * sy[iy]; + sy[iy] = *c__ * sy[iy] - *s * sx[ix]; + sx[ix] = stemp; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp = *c__ * sx[i__] + *s * sy[i__]; + sy[i__] = *c__ * sy[i__] - *s * sx[i__]; + sx[i__] = stemp; +/* L30: */ + } + return 0; +} /* srot_ */ + +/* Subroutine */ int sscal_(integer *n, real *sa, real *sx, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, m, mp1, nincx; + + +/* + Purpose + ======= + + scales a vector by a constant. + uses unrolled loops for increment equal to 1. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --sx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + sx[i__] = *sa * sx[i__]; +/* L10: */ + } + return 0; + +/* + code for increment equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__2 = m; + for (i__ = 1; i__ <= i__2; ++i__) { + sx[i__] = *sa * sx[i__]; +/* L30: */ + } + if (*n < 5) { + return 0; + } +L40: + mp1 = m + 1; + i__2 = *n; + for (i__ = mp1; i__ <= i__2; i__ += 5) { + sx[i__] = *sa * sx[i__]; + sx[i__ + 1] = *sa * sx[i__ + 1]; + sx[i__ + 2] = *sa * sx[i__ + 2]; + sx[i__ + 3] = *sa * sx[i__ + 3]; + sx[i__ + 4] = *sa * sx[i__ + 4]; +/* L50: */ + } + return 0; +} /* sscal_ */ + +/* Subroutine */ int sswap_(integer *n, real *sx, integer *incx, real *sy, + integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + static real stemp; + + +/* + Purpose + ======= + + interchanges two vectors. + uses unrolled loops for increments equal to 1. + + Further Details + =============== + + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp = sx[ix]; + sx[ix] = sy[iy]; + sy[iy] = stemp; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 3; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp = sx[i__]; + sx[i__] = sy[i__]; + sy[i__] = stemp; +/* L30: */ + } + if (*n < 3) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 3) { + stemp = sx[i__]; + sx[i__] = sy[i__]; + sy[i__] = stemp; + stemp = sx[i__ + 1]; + sx[i__ + 1] = sy[i__ + 1]; + sy[i__ + 1] = stemp; + stemp = sx[i__ + 2]; + sx[i__ + 2] = sy[i__ + 2]; + sy[i__ + 2] = stemp; +/* L50: */ + } + return 0; +} /* sswap_ */ + +/* Subroutine */ int ssymv_(char *uplo, integer *n, real *alpha, real *a, + integer *lda, real *x, integer *incx, real *beta, real *y, integer * + incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static real temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYMV performs the matrix-vector operation + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n symmetric matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of A is not referenced. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. On exit, Y is overwritten by the updated + vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_("SSYMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.f && *beta == 1.f) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. + + First form y := beta*y. +*/ + + if (*beta != 1.f) { + if (*incy == 1) { + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.f) { + return 0; + } + if (lsame_(uplo, "U")) { + +/* Form y when A is stored in upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L50: */ + } + y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2; + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } else { + +/* Form y when A is stored in lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + y[j] += temp1 * a[j + j * a_dim1]; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[j] += *alpha * temp2; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + y[jy] += temp1 * a[j + j * a_dim1]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of SSYMV . */ + +} /* ssymv_ */ + +/* Subroutine */ int ssyr2_(char *uplo, integer *n, real *alpha, real *x, + integer *incx, real *y, integer *incy, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static real temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYR2 performs the symmetric rank 2 operation + + A := alpha*x*y' + alpha*y*x' + A, + + where alpha is a scalar, x and y are n element vectors and A is an n + by n symmetric matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*n)) { + info = 9; + } + if (info != 0) { + xerbla_("SSYR2 ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.f) { + return 0; + } + +/* + Set up the start points in X and Y if the increments are not both + unity. +*/ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. +*/ + + if (lsame_(uplo, "U")) { + +/* Form A when A is stored in the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.f || y[j] != 0.f) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L10: */ + } + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f || y[jy] != 0.f) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = kx; + iy = ky; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L30: */ + } + } + jx += *incx; + jy += *incy; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.f || y[j] != 0.f) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L50: */ + } + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f || y[jy] != 0.f) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L70: */ + } + } + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } + + return 0; + +/* End of SSYR2 . */ + +} /* ssyr2_ */ + +/* Subroutine */ int ssyr2k_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, + real *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, l, info; + static real temp1, temp2; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYR2K performs one of the symmetric rank 2k operations + + C := alpha*A*B' + alpha*B*A' + beta*C, + + or + + C := alpha*A'*B + alpha*B'*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A and B are n by k matrices in the first case and k by n + matrices in the second case. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + + beta*C. + + TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + + beta*C. + + TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrices A and B, and on entry with + TRANS = 'T' or 't' or 'C' or 'c', K specifies the number + of rows of the matrices A and B. K must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, kb ), where kb is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array B must contain the matrix B, otherwise + the leading k by n part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDB must be at least max( 1, n ), otherwise LDB must + be at least max( 1, k ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,nrowa)) { + info = 9; + } else if (*ldc < max(1,*n)) { + info = 12; + } + if (info != 0) { + xerbla_("SSYR2K", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (upper) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*B' + alpha*B*A' + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L90: */ + } + } else if (*beta != 1.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f) + { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L140: */ + } + } else if (*beta != 1.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f) + { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*B + alpha*B'*A + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1 = 0.f; + temp2 = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp1 = 0.f; + temp2 = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of SSYR2K. */ + +} /* ssyr2k_ */ + +/* Subroutine */ int ssyrk_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, real *a, integer *lda, real *beta, real *c__, integer * + ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, info; + static real temp; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYRK performs one of the symmetric rank k operations + + C := alpha*A*A' + beta*C, + + or + + C := alpha*A'*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A is an n by k matrix in the first case and a k by n matrix + in the second case. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. + + TRANS = 'T' or 't' C := alpha*A'*A + beta*C. + + TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrix A, and on entry with + TRANS = 'T' or 't' or 'C' or 'c', K specifies the number + of rows of the matrix A. K must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + xerbla_("SSYRK ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (upper) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*A' + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L90: */ + } + } else if (*beta != 1.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L140: */ + } + } else if (*beta != 1.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of SSYRK . */ + +} /* ssyrk_ */ + +/* Subroutine */ int strmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, + integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, k, info; + static real temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + STRMM performs one of the matrix-matrix operations + + B := alpha*op( A )*B, or B := alpha*B*op( A ), + + where alpha is a scalar, B is an m by n matrix, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A'. + + Arguments + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) multiplies B from + the left or right as follows: + + SIDE = 'L' or 'l' B := alpha*op( A )*B. + + SIDE = 'R' or 'r' B := alpha*B*op( A ). + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = A'. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the matrix B, and on exit is overwritten by the + transformed matrix. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("STRMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.f) { + temp = *alpha * b[k + j * b_dim1]; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L30: */ + } + if (nounit) { + temp *= a[k + k * a_dim1]; + } + b[k + j * b_dim1] = temp; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.f) { + temp = *alpha * b[k + j * b_dim1]; + b[k + j * b_dim1] = temp; + if (nounit) { + b[k + j * b_dim1] *= a[k + k * a_dim1]; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A'*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L90: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L100: */ + } +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L120: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L130: */ + } +/* L140: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L150: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.f) { + temp = *alpha * a[k + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L190: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.f) { + temp = *alpha * a[k + j * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L200: */ + } + } +/* L210: */ + } +/* L220: */ + } + } + } else { + +/* Form B := alpha*B*A'. */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = *alpha * a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = *alpha * a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L270: */ + } + } +/* L280: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L290: */ + } + } +/* L300: */ + } + } + } + } + + return 0; + +/* End of STRMM . */ + +} /* strmm_ */ + +/* Subroutine */ int strmv_(char *uplo, char *trans, char *diag, integer *n, + real *a, integer *lda, real *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static real temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + STRMV performs one of the matrix-vector operations + + x := A*x, or x := A'*x, + + where x is an n element vector and A is an n by n unit, or non-unit, + upper or lower triangular matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' x := A*x. + + TRANS = 'T' or 't' x := A'*x. + + TRANS = 'C' or 'c' x := A'*x. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. On exit, X is overwritten with the + tranformed vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("STRMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.f) { + temp = x[j]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L10: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = x[jx]; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix += *incx; +/* L30: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.f) { + temp = x[j]; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.f) { + temp = x[jx]; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix -= *incx; +/* L70: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + x[j] = temp; +/* L100: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + x[jx] = temp; + jx -= *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + x[j] = temp; +/* L140: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L150: */ + } + x[jx] = temp; + jx += *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of STRMV . */ + +} /* strmv_ */ + +/* Subroutine */ int strsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, + integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, k, info; + static real temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + STRSM solves one of the matrix equations + + op( A )*X = alpha*B, or X*op( A ) = alpha*B, + + where alpha is a scalar, X and B are m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A'. + + The matrix X is overwritten on B. + + Arguments + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) appears on the left + or right of X as follows: + + SIDE = 'L' or 'l' op( A )*X = alpha*B. + + SIDE = 'R' or 'r' X*op( A ) = alpha*B. + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = A'. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the right-hand side matrix B, and on exit is + overwritten by the solution matrix X. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("STRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.f) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.f) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form B := alpha*inv( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L110: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L140: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L170: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.f) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L180: */ + } + } +/* L190: */ + } + if (nounit) { + temp = 1.f / a[j + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L200: */ + } + } +/* L210: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (*alpha != 1.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L220: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + if (nounit) { + temp = 1.f / a[j + j * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } + } else { + +/* Form B := alpha*B*inv( A' ). */ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + temp = 1.f / a[k + k * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L270: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L280: */ + } + } +/* L290: */ + } + if (*alpha != 1.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L300: */ + } + } +/* L310: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + temp = 1.f / a[k + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L320: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L330: */ + } + } +/* L340: */ + } + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L350: */ + } + } +/* L360: */ + } + } + } + } + + return 0; + +/* End of STRSM . */ + +} /* strsm_ */ + +/* Subroutine */ int zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx, + integer *incx, doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, ix, iy; + extern doublereal dcabs1_(doublecomplex *); + + +/* + Purpose + ======= + + ZAXPY constant times a vector plus a vector. + + Further Details + =============== + + jack dongarra, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (dcabs1_(za) == 0.) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + i__4 = ix; + z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[ + i__4].i + za->i * zx[i__4].r; + z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i; + zy[i__2].r = z__1.r, zy[i__2].i = z__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + i__4 = i__; + z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[ + i__4].i + za->i * zx[i__4].r; + z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i; + zy[i__2].r = z__1.r, zy[i__2].i = z__1.i; +/* L30: */ + } + return 0; +} /* zaxpy_ */ + +/* Subroutine */ int zcopy_(integer *n, doublecomplex *zx, integer *incx, + doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + static integer i__, ix, iy; + + +/* + Purpose + ======= + + ZCOPY copies a vector, x, to a vector, y. + + Further Details + =============== + + jack dongarra, linpack, 4/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = ix; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; +/* L30: */ + } + return 0; +} /* zcopy_ */ + +/* Double Complex */ VOID zdotc_(doublecomplex * ret_val, integer *n, + doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static integer i__, ix, iy; + static doublecomplex ztemp; + + +/* + Purpose + ======= + + ZDOTC forms the dot product of a vector. + + Further Details + =============== + + jack dongarra, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + ztemp.r = 0., ztemp.i = 0.; + ret_val->r = 0., ret_val->i = 0.; + if (*n <= 0) { + return ; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d_cnjg(&z__3, &zx[ix]); + i__2 = iy; + z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r * + zy[i__2].i + z__3.i * zy[i__2].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return ; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d_cnjg(&z__3, &zx[i__]); + i__2 = i__; + z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r * + zy[i__2].i + z__3.i * zy[i__2].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; +/* L30: */ + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return ; +} /* zdotc_ */ + +/* Double Complex */ VOID zdotu_(doublecomplex * ret_val, integer *n, + doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, ix, iy; + static doublecomplex ztemp; + + +/* + Purpose + ======= + + ZDOTU forms the dot product of two vectors. + + Further Details + =============== + + jack dongarra, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + ztemp.r = 0., ztemp.i = 0.; + ret_val->r = 0., ret_val->i = 0.; + if (*n <= 0) { + return ; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + i__3 = iy; + z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i = + zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return ; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i = + zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; +/* L30: */ + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return ; +} /* zdotu_ */ + +/* Subroutine */ int zdrot_(integer *n, doublecomplex *cx, integer *incx, + doublecomplex *cy, integer *incy, doublereal *c__, doublereal *s) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static integer i__, ix, iy; + static doublecomplex ctemp; + + +/* + Purpose + ======= + + Applies a plane rotation, where the cos and sin (c and s) are real + and the vectors cx and cy are complex. + jack dongarra, linpack, 3/11/78. + + Arguments + ========== + + N (input) INTEGER + On entry, N specifies the order of the vectors cx and cy. + N must be at least zero. + Unchanged on exit. + + CX (input) COMPLEX*16 array, dimension at least + ( 1 + ( N - 1 )*abs( INCX ) ). + Before entry, the incremented array CX must contain the n + element vector cx. On exit, CX is overwritten by the updated + vector cx. + + INCX (input) INTEGER + On entry, INCX specifies the increment for the elements of + CX. INCX must not be zero. + Unchanged on exit. + + CY (input) COMPLEX*16 array, dimension at least + ( 1 + ( N - 1 )*abs( INCY ) ). + Before entry, the incremented array CY must contain the n + element vector cy. On exit, CY is overwritten by the updated + vector cy. + + INCY (input) INTEGER + On entry, INCY specifies the increment for the elements of + CY. INCY must not be zero. + Unchanged on exit. + + C (input) DOUBLE PRECISION + On entry, C specifies the cosine, cos. + Unchanged on exit. + + S (input) DOUBLE PRECISION + On entry, S specifies the sine, sin. + Unchanged on exit. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i; + i__3 = iy; + z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + ctemp.r = z__1.r, ctemp.i = z__1.i; + i__2 = iy; + i__3 = iy; + z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i; + i__4 = ix; + z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + cy[i__2].r = z__1.r, cy[i__2].i = z__1.i; + i__2 = ix; + cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i; + i__3 = i__; + z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + ctemp.r = z__1.r, ctemp.i = z__1.i; + i__2 = i__; + i__3 = i__; + z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i; + i__4 = i__; + z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + cy[i__2].r = z__1.r, cy[i__2].i = z__1.i; + i__2 = i__; + cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; +/* L30: */ + } + return 0; +} /* zdrot_ */ + +/* Subroutine */ int zdscal_(integer *n, doublereal *da, doublecomplex *zx, + integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, ix; + + +/* + Purpose + ======= + + ZDSCAL scales a vector by a constant. + + Further Details + =============== + + jack dongarra, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --zx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + z__2.r = *da, z__2.i = 0.; + i__3 = ix; + z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r * + zx[i__3].i + z__2.i * zx[i__3].r; + zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; + ix += *incx; +/* L10: */ + } + return 0; + +/* code for increment equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + z__2.r = *da, z__2.i = 0.; + i__3 = i__; + z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r * + zx[i__3].i + z__2.i * zx[i__3].r; + zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; +/* L30: */ + } + return 0; +} /* zdscal_ */ + +/* Subroutine */ int zgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex * + c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5, i__6; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + static integer i__, j, l, info; + static logical nota, notb; + static doublecomplex temp; + static logical conja, conjb; + static integer ncola; + extern logical lsame_(char *, char *); + static integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZGEMM performs one of the matrix-matrix operations + + C := alpha*op( A )*op( B ) + beta*C, + + where op( X ) is one of + + op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), + + alpha and beta are scalars, and A, B and C are matrices, with op( A ) + an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + Arguments + ========== + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n', op( A ) = A. + + TRANSA = 'T' or 't', op( A ) = A'. + + TRANSA = 'C' or 'c', op( A ) = conjg( A' ). + + Unchanged on exit. + + TRANSB - CHARACTER*1. + On entry, TRANSB specifies the form of op( B ) to be used in + the matrix multiplication as follows: + + TRANSB = 'N' or 'n', op( B ) = B. + + TRANSB = 'T' or 't', op( B ) = B'. + + TRANSB = 'C' or 'c', op( B ) = conjg( B' ). + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix + op( A ) and of the matrix C. M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix + op( B ) and the number of columns of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry, K specifies the number of columns of the matrix + op( A ) and the number of rows of the matrix op( B ). K must + be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is + k when TRANSA = 'N' or 'n', and is m otherwise. + Before entry with TRANSA = 'N' or 'n', the leading m by k + part of the array A must contain the matrix A, otherwise + the leading k by m part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANSA = 'N' or 'n' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, k ). + Unchanged on exit. + + B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is + n when TRANSB = 'N' or 'n', and is k otherwise. + Before entry with TRANSB = 'N' or 'n', the leading k by n + part of the array B must contain the matrix B, otherwise + the leading n by k part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANSB = 'N' or 'n' then + LDB must be at least max( 1, k ), otherwise LDB must be at + least max( 1, n ). + Unchanged on exit. + + BETA - COMPLEX*16 . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - COMPLEX*16 array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n matrix + ( alpha*op( A )*op( B ) + beta*C ). + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Set NOTA and NOTB as true if A and B respectively are not + conjugated or transposed, set CONJA and CONJB as true if A and + B respectively are to be transposed but not conjugated and set + NROWA, NCOLA and NROWB as the number of rows and columns of A + and the number of rows of B respectively. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, "N"); + notb = lsame_(transb, "N"); + conja = lsame_(transa, "C"); + conjb = lsame_(transb, "C"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! conja && ! lsame_(transa, "T")) { + info = 1; + } else if (! notb && ! conjb && ! lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_("ZGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && + (beta->r == 1. && beta->i == 0.)) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0. && alpha->i == 0.) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__1.i = beta->r * c__[i__4].i + beta->i * c__[ + i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L50: */ + } + } else if (beta->r != 1. || beta->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = l + j * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + i__3 = l + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] + .i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else if (conja) { + +/* Form C := alpha*conjg( A' )*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L110: */ + } +/* L120: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + i__5 = l + j * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] + .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4] + .i * b[i__5].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L130: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L140: */ + } +/* L150: */ + } + } + } else if (nota) { + if (conjb) { + +/* Form C := alpha*A*conjg( B' ) + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L160: */ + } + } else if (beta->r != 1. || beta->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L170: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + d_cnjg(&z__2, &b[j + l * b_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] + .i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L180: */ + } + } +/* L190: */ + } +/* L200: */ + } + } else { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L210: */ + } + } else if (beta->r != 1. || beta->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L220: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + i__3 = j + l * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] + .i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L230: */ + } + } +/* L240: */ + } +/* L250: */ + } + } + } else if (conja) { + if (conjb) { + +/* Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + d_cnjg(&z__4, &b[j + l * b_dim1]); + z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = + z__3.r * z__4.i + z__3.i * z__4.r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L260: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L270: */ + } +/* L280: */ + } + } else { + +/* Form C := alpha*conjg( A' )*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = j + l * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L290: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L300: */ + } +/* L310: */ + } + } + } else { + if (conjb) { + +/* Form C := alpha*A'*conjg( B' ) + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + d_cnjg(&z__3, &b[j + l * b_dim1]); + z__2.r = a[i__4].r * z__3.r - a[i__4].i * z__3.i, + z__2.i = a[i__4].r * z__3.i + a[i__4].i * + z__3.r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L320: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L330: */ + } +/* L340: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + i__5 = j + l * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] + .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4] + .i * b[i__5].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L350: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L360: */ + } +/* L370: */ + } + } + } + + return 0; + +/* End of ZGEMM . */ + +} /* zgemm_ */ + +/* Subroutine */ int zgemv_(char *trans, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + x, integer *incx, doublecomplex *beta, doublecomplex *y, integer * + incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublecomplex temp; + static integer lenx, leny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj; + + +/* + Purpose + ======= + + ZGEMV performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or + + y := alpha*conjg( A' )*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + Arguments + ========== + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' y := alpha*A*x + beta*y. + + TRANS = 'T' or 't' y := alpha*A'*x + beta*y. + + TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + X - COMPLEX*16 array of DIMENSION at least + ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + Before entry, the incremented array X must contain the + vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - COMPLEX*16 . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - COMPLEX*16 array of DIMENSION at least + ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. + Before entry with BETA non-zero, the incremented array Y + must contain the vector y. On exit, Y is overwritten by the + updated vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("ZGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == + 1. && beta->i == 0.)) { + return 0; + } + + noconj = lsame_(trans, "T"); + +/* + Set LENX and LENY, the lengths of the vectors x and y, and set + up the start points in X and Y. +*/ + + if (lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. + + First form y := beta*y. +*/ + + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + temp.r = z__1.r, temp.i = z__1.i; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = 0., temp.i = 0.; + if (noconj) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] + .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3] + .i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = + alpha->r * temp.i + alpha->i * temp.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jy += *incy; +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = 0., temp.i = 0.; + ix = kx; + if (noconj) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] + .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3] + .i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L120: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L130: */ + } + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = + alpha->r * temp.i + alpha->i * temp.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jy += *incy; +/* L140: */ + } + } + } + + return 0; + +/* End of ZGEMV . */ + +} /* zgemv_ */ + +/* Subroutine */ int zgerc_(integer *m, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static doublecomplex temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZGERC performs the rank 1 operation + + A := alpha*x*conjg( y' ) + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Arguments + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("ZGERC ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0. || y[i__2].i != 0.) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0. || y[i__2].i != 0.) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of ZGERC . */ + +} /* zgerc_ */ + +/* Subroutine */ int zgeru_(integer *m, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static doublecomplex temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZGERU performs the rank 1 operation + + A := alpha*x*y' + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Arguments + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("ZGERU ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0. || y[i__2].i != 0.) { + i__2 = jy; + z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i = + alpha->r * y[i__2].i + alpha->i * y[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0. || y[i__2].i != 0.) { + i__2 = jy; + z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i = + alpha->r * y[i__2].i + alpha->i * y[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of ZGERU . */ + +} /* zgeru_ */ + +/* Subroutine */ int zhemv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + doublecomplex *beta, doublecomplex *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublecomplex temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZHEMV performs the matrix-vector operation + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n hermitian matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of A is not referenced. + Note that the imaginary parts of the diagonal elements need + not be set and are assumed to be zero. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - COMPLEX*16 . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. On exit, Y is overwritten by the updated + vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_("ZHEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && + beta->i == 0.)) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. + + First form y := beta*y. +*/ + + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (lsame_(uplo, "U")) { + +/* Form y when A is stored in upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L50: */ + } + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ix += *incx; + iy += *incy; +/* L70: */ + } + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } else { + +/* Form y when A is stored in lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L90: */ + } + i__2 = j; + i__3 = j; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L110: */ + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of ZHEMV . */ + +} /* zhemv_ */ + +/* Subroutine */ int zher2_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublecomplex temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZHER2 performs the hermitian rank 2 operation + + A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, + + where alpha is a scalar, x and y are n element vectors and A is an n + by n hermitian matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*n)) { + info = 9; + } + if (info != 0) { + xerbla_("ZHER2 ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || alpha->r == 0. && alpha->i == 0.) { + return 0; + } + +/* + Set up the start points in X and Y if the increments are not both + unity. +*/ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. +*/ + + if (lsame_(uplo, "U")) { + +/* Form A when A is stored in the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[j]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = j; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = i__; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L10: */ + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = j; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + i__3 = jy; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = jx; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = iy; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + iy += *incy; +/* L30: */ + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = jy; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + jy += *incy; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[j]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = j; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = j; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = i__; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L50: */ + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + i__3 = jy; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = jx; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = jy; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = iy; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L70: */ + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } + + return 0; + +/* End of ZHER2 . */ + +} /* zher2_ */ + +/* Subroutine */ int zher2k_(char *uplo, char *trans, integer *n, integer *k, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + b, integer *ldb, doublereal *beta, doublecomplex *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5, i__6, i__7; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6; + + /* Local variables */ + static integer i__, j, l, info; + static doublecomplex temp1, temp2; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZHER2K performs one of the hermitian rank 2k operations + + C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, + + or + + C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, + + where alpha and beta are scalars with beta real, C is an n by n + hermitian matrix and A and B are n by k matrices in the first case + and k by n matrices in the second case. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*conjg( B' ) + + conjg( alpha )*B*conjg( A' ) + + beta*C. + + TRANS = 'C' or 'c' C := alpha*conjg( A' )*B + + conjg( alpha )*conjg( B' )*A + + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrices A and B, and on entry with + TRANS = 'C' or 'c', K specifies the number of rows of the + matrices A and B. K must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array B must contain the matrix B, otherwise + the leading k by n part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDB must be at least max( 1, n ), otherwise LDB must + be at least max( 1, k ). + Unchanged on exit. + + BETA - DOUBLE PRECISION . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - COMPLEX*16 array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. + Ed Anderson, Cray Research Inc. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,nrowa)) { + info = 9; + } else if (*ldc < max(1,*n)) { + info = 12; + } + if (info != 0) { + xerbla_("ZHER2K", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && *beta == + 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0. && alpha->i == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L30: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* + Form C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + + C. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L100: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + i__4 = j + l * b_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != + 0. || b[i__4].i != 0.)) { + d_cnjg(&z__2, &b[j + l * b_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * + z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__3 = j + l * a_dim1; + z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__2.i = alpha->r * a[i__3].i + alpha->i * a[ + i__3].r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__3.r = a[i__6].r * temp1.r - a[i__6].i * + temp1.i, z__3.i = a[i__6].r * temp1.i + a[ + i__6].i * temp1.r; + z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5] + .i + z__3.i; + i__7 = i__ + l * b_dim1; + z__4.r = b[i__7].r * temp2.r - b[i__7].i * + temp2.i, z__4.i = b[i__7].r * temp2.i + b[ + i__7].i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + + z__4.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L110: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, + z__2.i = a[i__5].r * temp1.i + a[i__5].i * + temp1.r; + i__6 = j + l * b_dim1; + z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, + z__3.i = b[i__6].r * temp2.i + b[i__6].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L150: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + i__4 = j + l * b_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != + 0. || b[i__4].i != 0.)) { + d_cnjg(&z__2, &b[j + l * b_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * + z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__3 = j + l * a_dim1; + z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__2.i = alpha->r * a[i__3].i + alpha->i * a[ + i__3].r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__3.r = a[i__6].r * temp1.r - a[i__6].i * + temp1.i, z__3.i = a[i__6].r * temp1.i + a[ + i__6].i * temp1.r; + z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5] + .i + z__3.i; + i__7 = i__ + l * b_dim1; + z__4.r = b[i__7].r * temp2.r - b[i__7].i * + temp2.i, z__4.i = b[i__7].r * temp2.i + b[ + i__7].i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + + z__4.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L160: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, + z__2.i = a[i__5].r * temp1.i + a[i__5].i * + temp1.r; + i__6 = j + l * b_dim1; + z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, + z__3.i = b[i__6].r * temp2.i + b[i__6].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* + Form C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + + C. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1.r = 0., temp1.i = 0.; + temp2.r = 0., temp2.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i; + temp1.r = z__1.r, temp1.i = z__1.i; + d_cnjg(&z__3, &b[l + i__ * b_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] + .r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L190: */ + } + if (i__ == j) { + if (*beta == 0.) { + i__3 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } else { + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = *beta * c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } + } else { + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * + c__[i__4].i; + z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__4.i = alpha->r * temp1.i + alpha->i * + temp1.r; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + + z__4.i; + d_cnjg(&z__6, alpha); + z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, + z__5.i = z__6.r * temp2.i + z__6.i * + temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + + z__5.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp1.r = 0., temp1.i = 0.; + temp2.r = 0., temp2.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i; + temp1.r = z__1.r, temp1.i = z__1.i; + d_cnjg(&z__3, &b[l + i__ * b_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] + .r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L220: */ + } + if (i__ == j) { + if (*beta == 0.) { + i__3 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } else { + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = *beta * c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } + } else { + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * + c__[i__4].i; + z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__4.i = alpha->r * temp1.i + alpha->i * + temp1.r; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + + z__4.i; + d_cnjg(&z__6, alpha); + z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, + z__5.i = z__6.r * temp2.i + z__6.i * + temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + + z__5.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of ZHER2K. */ + +} /* zher2k_ */ + +/* Subroutine */ int zherk_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta, + doublecomplex *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static integer i__, j, l, info; + static doublecomplex temp; + extern logical lsame_(char *, char *); + static integer nrowa; + static doublereal rtemp; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZHERK performs one of the hermitian rank k operations + + C := alpha*A*conjg( A' ) + beta*C, + + or + + C := alpha*conjg( A' )*A + beta*C, + + where alpha and beta are real scalars, C is an n by n hermitian + matrix and A is an n by k matrix in the first case and a k by n + matrix in the second case. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*conjg( A' ) + beta*C. + + TRANS = 'C' or 'c' C := alpha*conjg( A' )*A + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrix A, and on entry with + TRANS = 'C' or 'c', K specifies the number of rows of the + matrix A. K must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - COMPLEX*16 array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. + Ed Anderson, Cray Research Inc. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + xerbla_("ZHERK ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L30: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*conjg( A' ) + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L100: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + d_cnjg(&z__2, &a[j + l * a_dim1]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] + .i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L110: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = i__ + l * a_dim1; + z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__1.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L150: */ + } + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + d_cnjg(&z__2, &a[j + l * a_dim1]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__1.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] + .i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*conjg( A' )*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L190: */ + } + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L200: */ + } + rtemp = 0.; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + d_cnjg(&z__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i = + z__3.r * a[i__3].i + z__3.i * a[i__3].r; + z__1.r = rtemp + z__2.r, z__1.i = z__2.i; + rtemp = z__1.r; +/* L210: */ + } + if (*beta == 0.) { + i__2 = j + j * c_dim1; + d__1 = *alpha * rtemp; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } +/* L220: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + rtemp = 0.; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + d_cnjg(&z__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i = + z__3.r * a[i__3].i + z__3.i * a[i__3].r; + z__1.r = rtemp + z__2.r, z__1.i = z__2.i; + rtemp = z__1.r; +/* L230: */ + } + if (*beta == 0.) { + i__2 = j + j * c_dim1; + d__1 = *alpha * rtemp; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L240: */ + } + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L250: */ + } +/* L260: */ + } + } + } + + return 0; + +/* End of ZHERK . */ + +} /* zherk_ */ + +/* Subroutine */ int zscal_(integer *n, doublecomplex *za, doublecomplex *zx, + integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__, ix; + + +/* + Purpose + ======= + + ZSCAL scales a vector by a constant. + + Further Details + =============== + + jack dongarra, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --zx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + i__3 = ix; + z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[ + i__3].i + za->i * zx[i__3].r; + zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; + ix += *incx; +/* L10: */ + } + return 0; + +/* code for increment equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[ + i__3].i + za->i * zx[i__3].r; + zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; +/* L30: */ + } + return 0; +} /* zscal_ */ + +/* Subroutine */ int zswap_(integer *n, doublecomplex *zx, integer *incx, + doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + static integer i__, ix, iy; + static doublecomplex ztemp; + + +/* + Purpose + ======= + + ZSWAP interchanges two vectors. + + Further Details + =============== + + jack dongarra, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) + + ===================================================================== +*/ + + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i; + i__2 = ix; + i__3 = iy; + zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i; + i__2 = iy; + zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i; + i__2 = i__; + i__3 = i__; + zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i; + i__2 = i__; + zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i; +/* L30: */ + } + return 0; +} /* zswap_ */ + +/* Subroutine */ int ztrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static integer i__, j, k, info; + static doublecomplex temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + ZTRMM performs one of the matrix-matrix operations + + B := alpha*op( A )*B, or B := alpha*B*op( A ) + + where alpha is a scalar, B is an m by n matrix, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). + + Arguments + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) multiplies B from + the left or right as follows: + + SIDE = 'L' or 'l' B := alpha*op( A )*B. + + SIDE = 'R' or 'r' B := alpha*B*op( A ). + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = conjg( A' ). + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - COMPLEX*16 array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the matrix B, and on exit is overwritten by the + transformed matrix. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, "T"); + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("ZTRMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0. && alpha->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + i__3 = k + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, z__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6] + .i, z__2.i = temp.r * a[i__6].i + + temp.i * a[i__6].r; + z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] + .i + z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L30: */ + } + if (nounit) { + i__3 = k + k * a_dim1; + z__1.r = temp.r * a[i__3].r - temp.i * a[i__3] + .i, z__1.i = temp.r * a[i__3].i + + temp.i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = k + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0. || b[i__2].i != 0.) { + i__2 = k + j * b_dim1; + z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2] + .i, z__1.i = alpha->r * b[i__2].i + + alpha->i * b[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = k + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + if (nounit) { + i__2 = k + j * b_dim1; + i__3 = k + j * b_dim1; + i__4 = k + k * a_dim1; + z__1.r = b[i__3].r * a[i__4].r - b[i__3].i * + a[i__4].i, z__1.i = b[i__3].r * a[ + i__4].i + b[i__3].i * a[i__4].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5] + .i, z__2.i = temp.r * a[i__5].i + + temp.i * a[i__5].r; + z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] + .i + z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A'*B or B := alpha*conjg( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + temp.r = b[i__2].r, temp.i = b[i__2].i; + if (noconj) { + if (nounit) { + i__2 = i__ + i__ * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2] + .i, z__1.i = temp.r * a[i__2].i + + temp.i * a[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * + b[i__4].i, z__2.i = a[i__3].r * b[ + i__4].i + a[i__3].i * b[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + d_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3] + .i, z__2.i = z__3.r * b[i__3].i + + z__3.i * b[i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + } + i__2 = i__ + j * b_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L110: */ + } +/* L120: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + temp.r = b[i__3].r, temp.i = b[i__3].i; + if (noconj) { + if (nounit) { + i__3 = i__ + i__ * a_dim1; + z__1.r = temp.r * a[i__3].r - temp.i * a[i__3] + .i, z__1.i = temp.r * a[i__3].i + + temp.i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * + b[i__5].i, z__2.i = a[i__4].r * b[ + i__5].i + a[i__4].i * b[i__5].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L130: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + d_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4] + .i, z__2.i = z__3.r * b[i__4].i + + z__3.i * b[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L140: */ + } + } + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + i__1 = j + j * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[i__1] + .r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[i__3] + .r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L170: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + i__2 = k + j * a_dim1; + z__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2] + .i, z__1.i = alpha->r * a[i__2].i + + alpha->i * a[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, z__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] + .i + z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L180: */ + } + } +/* L190: */ + } +/* L200: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + i__2 = j + j * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[i__2] + .r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[i__4] + .r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L210: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + i__3 = k + j * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3] + .i, z__1.i = alpha->r * a[i__3].i + + alpha->i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, z__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] + .i + z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L220: */ + } + } +/* L230: */ + } +/* L240: */ + } + } + } else { + +/* Form B := alpha*B*A' or B := alpha*B*conjg( A' ). */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + if (noconj) { + i__3 = j + k * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[ + i__3].i, z__1.i = alpha->r * a[i__3] + .i + alpha->i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * + z__2.i, z__1.i = alpha->r * z__2.i + + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, z__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] + .i + z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L250: */ + } + } +/* L260: */ + } + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + if (noconj) { + i__2 = k + k * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[k + k * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + } + if (temp.r != 1. || temp.i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L270: */ + } + } +/* L280: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + if (noconj) { + i__2 = j + k * a_dim1; + z__1.r = alpha->r * a[i__2].r - alpha->i * a[ + i__2].i, z__1.i = alpha->r * a[i__2] + .i + alpha->i * a[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * + z__2.i, z__1.i = alpha->r * z__2.i + + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, z__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] + .i + z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L290: */ + } + } +/* L300: */ + } + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + if (noconj) { + i__1 = k + k * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[k + k * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + } + if (temp.r != 1. || temp.i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L310: */ + } + } +/* L320: */ + } + } + } + } + + return 0; + +/* End of ZTRMM . */ + +} /* ztrmm_ */ + +/* Subroutine */ int ztrmv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static doublecomplex temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + ZTRMV performs one of the matrix-vector operations + + x := A*x, or x := A'*x, or x := conjg( A' )*x, + + where x is an n element vector and A is an n by n unit, or non-unit, + upper or lower triangular matrix. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' x := A*x. + + TRANS = 'T' or 't' x := A'*x. + + TRANS = 'C' or 'c' x := conjg( A' )*x. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. On exit, X is overwritten with the + tranformed vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("ZTRMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, "T"); + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; +/* L10: */ + } + if (nounit) { + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ + i__4].i, z__1.i = x[i__3].r * a[i__4].i + + x[i__3].i * a[i__4].r; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + ix += *incx; +/* L30: */ + } + if (nounit) { + i__2 = jx; + i__3 = jx; + i__4 = j + j * a_dim1; + z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ + i__4].i, z__1.i = x[i__3].r * a[i__4].i + + x[i__3].i * a[i__4].r; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__; + i__3 = i__; + i__4 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + z__2.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + + z__2.i; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; +/* L50: */ + } + if (nounit) { + i__1 = j; + i__2 = j; + i__3 = j + j * a_dim1; + z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, z__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = ix; + i__3 = ix; + i__4 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + z__2.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + + z__2.i; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + ix -= *incx; +/* L70: */ + } + if (nounit) { + i__1 = jx; + i__2 = jx; + i__3 = j + j * a_dim1; + z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, z__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x or x := conjg( A' )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + if (nounit) { + i__1 = j + j * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__; + z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, z__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__1 = i__; + z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, + z__2.i = z__3.r * x[i__1].i + z__3.i * x[ + i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; +/* L110: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + if (noconj) { + if (nounit) { + i__1 = j + j * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = i__ + j * a_dim1; + i__2 = ix; + z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, z__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L120: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__1 = ix; + z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, + z__2.i = z__3.r * x[i__1].i + z__3.i * x[ + i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L130: */ + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; +/* L140: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + if (nounit) { + i__2 = j + j * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, z__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L150: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L160: */ + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; +/* L170: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + if (noconj) { + if (nounit) { + i__2 = j + j * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, z__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L180: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L190: */ + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; +/* L200: */ + } + } + } + } + + return 0; + +/* End of ZTRMV . */ + +} /* ztrmv_ */ + +/* Subroutine */ int ztrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, + i__6, i__7; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static integer i__, j, k, info; + static doublecomplex temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + ZTRSM solves one of the matrix equations + + op( A )*X = alpha*B, or X*op( A ) = alpha*B, + + where alpha is a scalar, X and B are m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). + + The matrix X is overwritten on B. + + Arguments + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) appears on the left + or right of X as follows: + + SIDE = 'L' or 'l' op( A )*X = alpha*B. + + SIDE = 'R' or 'r' X*op( A ) = alpha*B. + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = conjg( A' ). + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - COMPLEX*16 array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the right-hand side matrix B, and on exit is + overwritten by the solution matrix X. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + Further Details + =============== + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, "T"); + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("ZTRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0. && alpha->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, z__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0. || b[i__2].i != 0.) { + if (nounit) { + i__2 = k + j * b_dim1; + z_div(&z__1, &b[k + j * b_dim1], &a[k + k * + a_dim1]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * b_dim1; + i__6 = i__ + k * a_dim1; + z__2.r = b[i__5].r * a[i__6].r - b[i__5].i * + a[i__6].i, z__2.i = b[i__5].r * a[ + i__6].i + b[i__5].i * a[i__6].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4] + .i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, z__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + if (nounit) { + i__3 = k + j * b_dim1; + z_div(&z__1, &b[k + j * b_dim1], &a[k + k * + a_dim1]); + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * b_dim1; + i__7 = i__ + k * a_dim1; + z__2.r = b[i__6].r * a[i__7].r - b[i__6].i * + a[i__7].i, z__2.i = b[i__6].r * a[ + i__7].i + b[i__6].i * a[i__7].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5] + .i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* + Form B := alpha*inv( A' )*B + or B := alpha*inv( conjg( A' ) )*B. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + if (noconj) { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * + b[i__5].i, z__2.i = a[i__4].r * b[ + i__5].i + a[i__4].i * b[i__5].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L110: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + d_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4] + .i, z__2.i = z__3.r * b[i__4].i + + z__3.i * b[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L120: */ + } + if (nounit) { + d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__3 = i__ + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; +/* L130: */ + } +/* L140: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, + z__1.i = alpha->r * b[i__2].i + alpha->i * b[ + i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + if (noconj) { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * + b[i__4].i, z__2.i = a[i__3].r * b[ + i__4].i + a[i__3].i * b[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L150: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + d_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3] + .i, z__2.i = z__3.r * b[i__3].i + + z__3.i * b[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L160: */ + } + if (nounit) { + d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = i__ + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; +/* L170: */ + } +/* L180: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, z__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L190: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * a_dim1; + i__7 = i__ + k * b_dim1; + z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * + b[i__7].i, z__2.i = a[i__6].r * b[ + i__7].i + a[i__6].i * b[i__7].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5] + .i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L200: */ + } + } +/* L210: */ + } + if (nounit) { + z_div(&z__1, &c_b1078, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L220: */ + } + } +/* L230: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, z__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L240: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * a_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = a[i__5].r * b[i__6].r - a[i__5].i * + b[i__6].i, z__2.i = a[i__5].r * b[ + i__6].i + a[i__5].i * b[i__6].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4] + .i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L250: */ + } + } +/* L260: */ + } + if (nounit) { + z_div(&z__1, &c_b1078, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L270: */ + } + } +/* L280: */ + } + } + } else { + +/* + Form B := alpha*B*inv( A' ) + or B := alpha*B*inv( conjg( A' ) ). +*/ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + if (noconj) { + z_div(&z__1, &c_b1078, &a[k + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[k + k * a_dim1]); + z_div(&z__1, &c_b1078, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L290: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + if (noconj) { + i__2 = j + k * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + } else { + d_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, z__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4] + .i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L300: */ + } + } +/* L310: */ + } + if (alpha->r != 1. || alpha->i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, z__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L320: */ + } + } +/* L330: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + if (noconj) { + z_div(&z__1, &c_b1078, &a[k + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[k + k * a_dim1]); + z_div(&z__1, &c_b1078, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L340: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + if (noconj) { + i__3 = j + k * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + } else { + d_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, z__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5] + .i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L350: */ + } + } +/* L360: */ + } + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, z__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L370: */ + } + } +/* L380: */ + } + } + } + } + + return 0; + +/* End of ZTRSM . */ + +} /* ztrsm_ */ + +/* Subroutine */ int ztrsv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static doublecomplex temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + ZTRSV solves one of the systems of equations + + A*x = b, or A'*x = b, or conjg( A' )*x = b, + + where b and x are n element vectors and A is an n by n unit, or + non-unit, upper or lower triangular matrix. + + No test for singularity or near-singularity is included in this + routine. Such tests must be performed before calling this routine. + + Arguments + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the equations to be solved as + follows: + + TRANS = 'N' or 'n' A*x = b. + + TRANS = 'T' or 't' A'*x = b. + + TRANS = 'C' or 'c' conjg( A' )*x = b. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element right-hand side vector b. On exit, X is overwritten + with the solution vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Further Details + =============== + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("ZTRSV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, "T"); + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := inv( A )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = j; + z_div(&z__1, &x[j], &a[j + j * a_dim1]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__; + i__2 = i__; + i__3 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__2.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - + z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; +/* L10: */ + } + } +/* L20: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = jx; + z_div(&z__1, &x[jx], &a[j + j * a_dim1]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = ix; + i__2 = ix; + i__3 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__2.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - + z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; +/* L30: */ + } + } + jx -= *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = j; + z_div(&z__1, &x[j], &a[j + j * a_dim1]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; +/* L50: */ + } + } +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = jx; + z_div(&z__1, &x[jx], &a[j + j * a_dim1]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, z__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; +/* L110: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ix = kx; + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, z__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L120: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L130: */ + } + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; +/* L140: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__; + z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ + i__3].i, z__2.i = a[i__2].r * x[i__3].i + + a[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L150: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__2 = i__; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[ + i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L160: */ + } + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; +/* L170: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + ix = kx; + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = ix; + z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ + i__3].i, z__2.i = a[i__2].r * x[i__3].i + + a[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; +/* L180: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__2 = ix; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[ + i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; +/* L190: */ + } + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; +/* L200: */ + } + } + } + } + + return 0; + +/* End of ZTRSV . */ + +} /* ztrsv_ */ + diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_c_lapack.c b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_c_lapack.c new file mode 100644 index 0000000000..f52e1e1572 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_c_lapack.c @@ -0,0 +1,29861 @@ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif + + +/* Table of constant values */ + +static integer c__1 = 1; +static complex c_b56 = {0.f,0.f}; +static complex c_b57 = {1.f,0.f}; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__0 = 0; +static integer c__65 = 65; +static integer c__9 = 9; +static integer c__6 = 6; +static real c_b328 = 0.f; +static real c_b1034 = 1.f; +static integer c__12 = 12; +static integer c__49 = 49; +static real c_b1276 = -1.f; +static integer c__13 = 13; +static integer c__15 = 15; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_false = FALSE_; +static logical c_true = TRUE_; +static real c_b2435 = .5f; + +/* Subroutine */ int cgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *scale, integer *m, complex *v, integer *ldv, + integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + static integer i__, k; + static real s; + static integer ii; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + static logical leftv; + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *); + static logical rightv; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGEBAK forms the right or left eigenvectors of a complex general + matrix by backward transformation on the computed eigenvectors of the + balanced matrix output by CGEBAL. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the type of backward transformation required: + = 'N', do nothing, return immediately; + = 'P', do backward transformation for permutation only; + = 'S', do backward transformation for scaling only; + = 'B', do backward transformations for both permutation and + scaling. + JOB must be the same as the argument JOB supplied to CGEBAL. + + SIDE (input) CHARACTER*1 + = 'R': V contains right eigenvectors; + = 'L': V contains left eigenvectors. + + N (input) INTEGER + The number of rows of the matrix V. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + The integers ILO and IHI determined by CGEBAL. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + SCALE (input) REAL array, dimension (N) + Details of the permutation and scaling factors, as returned + by CGEBAL. + + M (input) INTEGER + The number of columns of the matrix V. M >= 0. + + V (input/output) COMPLEX array, dimension (LDV,M) + On entry, the matrix of right or left eigenvectors to be + transformed, as returned by CHSEIN or CTREVC. + On exit, V is overwritten by the transformed eigenvectors. + + LDV (input) INTEGER + The leading dimension of the array V. LDV >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Decode and Test the input parameters +*/ + + /* Parameter adjustments */ + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEBAK", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + csscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1.f / scale[i__]; + csscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + + } + +/* + Backward permutation + + For I = ILO-1 step -1 until 1, + IHI+1 step 1 until N do -- +*/ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = scale[i__]; + if (k == i__) { + goto L40; + } + cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + } + + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = scale[i__]; + if (k == i__) { + goto L50; + } + cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L50: + ; + } + } + } + + return 0; + +/* End of CGEBAK */ + +} /* cgebak_ */ + +/* Subroutine */ int cgebal_(char *job, integer *n, complex *a, integer *lda, + integer *ilo, integer *ihi, real *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1, r__2; + + /* Local variables */ + static real c__, f, g; + static integer i__, j, k, l, m; + static real r__, s, ca, ra; + static integer ica, ira, iexc; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + static real sfmin1, sfmin2, sfmax1, sfmax2; + extern integer icamax_(integer *, complex *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *); + extern logical sisnan_(real *); + static logical noconv; + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + CGEBAL balances a general complex matrix A. This involves, first, + permuting A by a similarity transformation to isolate eigenvalues + in the first 1 to ILO-1 and last IHI+1 to N elements on the + diagonal; and second, applying a diagonal similarity transformation + to rows and columns ILO to IHI to make the rows and columns as + close in norm as possible. Both steps are optional. + + Balancing may reduce the 1-norm of the matrix, and improve the + accuracy of the computed eigenvalues and/or eigenvectors. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the operations to be performed on A: + = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 + for i = 1,...,N; + = 'P': permute only; + = 'S': scale only; + = 'B': both permute and scale. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the input matrix A. + On exit, A is overwritten by the balanced matrix. + If JOB = 'N', A is not referenced. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + ILO (output) INTEGER + IHI (output) INTEGER + ILO and IHI are set to integers such that on exit + A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. + If JOB = 'N' or 'S', ILO = 1 and IHI = N. + + SCALE (output) REAL array, dimension (N) + Details of the permutations and scaling factors applied to + A. If P(j) is the index of the row and column interchanged + with row and column j and D(j) is the scaling factor + applied to row and column j, then + SCALE(j) = P(j) for j = 1,...,ILO-1 + = D(j) for j = ILO,...,IHI + = P(j) for j = IHI+1,...,N. + The order in which the interchanges are made is N to IHI+1, + then 1 to ILO-1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The permutations consist of row and column interchanges which put + the matrix in the form + + ( T1 X Y ) + P A P = ( 0 B Z ) + ( 0 0 T2 ) + + where T1 and T2 are upper triangular matrices whose eigenvalues lie + along the diagonal. The column indices ILO and IHI mark the starting + and ending columns of the submatrix B. Balancing consists of applying + a diagonal similarity transformation inv(D) * B * D to make the + 1-norms of each row of B and its corresponding column nearly equal. + The output matrix is + + ( T1 X*D Y ) + ( 0 inv(D)*B*D inv(D)*Z ). + ( 0 0 T2 ) + + Information about the permutations P and the diagonal matrix D is + returned in the vector SCALE. + + This subroutine is based on the EISPACK routine CBAL. + + Modified by Tzu-Yi Chen, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEBAL", &i__1); + return 0; + } + + k = 1; + l = *n; + + if (*n == 0) { + goto L210; + } + + if (lsame_(job, "N")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.f; +/* L10: */ + } + goto L210; + } + + if (lsame_(job, "S")) { + goto L120; + } + +/* Permutation to isolate eigenvalues if possible */ + + goto L50; + +/* Row and column exchange. */ + +L20: + scale[m] = (real) j; + if (j == m) { + goto L30; + } + + cswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + cswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); + +L30: + switch (iexc) { + case 1: goto L40; + case 2: goto L80; + } + +/* Search for rows isolating an eigenvalue and push them down. */ + +L40: + if (l == 1) { + goto L210; + } + --l; + +L50: + for (j = l; j >= 1; --j) { + + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + i__2 = j + i__ * a_dim1; + if (a[i__2].r != 0.f || r_imag(&a[j + i__ * a_dim1]) != 0.f) { + goto L70; + } +L60: + ; + } + + m = l; + iexc = 1; + goto L20; +L70: + ; + } + + goto L90; + +/* Search for columns isolating an eigenvalue and push them left. */ + +L80: + ++k; + +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + i__3 = i__ + j * a_dim1; + if (a[i__3].r != 0.f || r_imag(&a[i__ + j * a_dim1]) != 0.f) { + goto L110; + } +L100: + ; + } + + m = k; + iexc = 2; + goto L20; +L110: + ; + } + +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.f; +/* L130: */ + } + + if (lsame_(job, "P")) { + goto L210; + } + +/* + Balance the submatrix in rows K to L. + + Iterative loop for norm reduction +*/ + + sfmin1 = slamch_("S") / slamch_("P"); + sfmax1 = 1.f / sfmin1; + sfmin2 = sfmin1 * 2.f; + sfmax2 = 1.f / sfmin2; +L140: + noconv = FALSE_; + + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + c__ = 0.f; + r__ = 0.f; + + i__2 = l; + for (j = k; j <= i__2; ++j) { + if (j == i__) { + goto L150; + } + i__3 = j + i__ * a_dim1; + c__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[j + i__ + * a_dim1]), dabs(r__2)); + i__3 = i__ + j * a_dim1; + r__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[i__ + j + * a_dim1]), dabs(r__2)); +L150: + ; + } + ica = icamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = c_abs(&a[ica + i__ * a_dim1]); + i__2 = *n - k + 1; + ira = icamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = c_abs(&a[i__ + (ira + k - 1) * a_dim1]); + +/* Guard against zero C or R due to underflow. */ + + if (c__ == 0.f || r__ == 0.f) { + goto L200; + } + g = r__ / 2.f; + f = 1.f; + s = c__ + r__; +L160: +/* Computing MAX */ + r__1 = max(f,c__); +/* Computing MIN */ + r__2 = min(r__,g); + if (c__ >= g || dmax(r__1,ca) >= sfmax2 || dmin(r__2,ra) <= sfmin2) { + goto L170; + } + r__1 = c__ + f + ca + r__ + g + ra; + if (sisnan_(&r__1)) { + +/* Exit if NaN to avoid infinite loop */ + + *info = -3; + i__2 = -(*info); + xerbla_("CGEBAL", &i__2); + return 0; + } + f *= 2.f; + c__ *= 2.f; + ca *= 2.f; + r__ /= 2.f; + g /= 2.f; + ra /= 2.f; + goto L160; + +L170: + g = c__ / 2.f; +L180: +/* Computing MIN */ + r__1 = min(f,c__), r__1 = min(r__1,g); + if (g < r__ || dmax(r__,ra) >= sfmax2 || dmin(r__1,ca) <= sfmin2) { + goto L190; + } + f /= 2.f; + c__ /= 2.f; + g /= 2.f; + ca /= 2.f; + r__ *= 2.f; + ra *= 2.f; + goto L180; + +/* Now balance. */ + +L190: + if (c__ + r__ >= s * .95f) { + goto L200; + } + if (f < 1.f && scale[i__] < 1.f) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1.f && scale[i__] > 1.f) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1.f / f; + scale[i__] *= f; + noconv = TRUE_; + + i__2 = *n - k + 1; + csscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + csscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + +L200: + ; + } + + if (noconv) { + goto L140; + } + +L210: + *ilo = k; + *ihi = l; + + return 0; + +/* End of CGEBAL */ + +} /* cgebal_ */ + +/* Subroutine */ int cgebd2_(integer *m, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tauq, complex *taup, complex *work, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + static integer i__; + static complex alpha; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *), + clarfg_(integer *, complex *, complex *, integer *, complex *), + clacgv_(integer *, complex *, integer *), xerbla_(char *, integer + *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGEBD2 reduces a complex general m by n matrix A to upper or lower + real bidiagonal form B by a unitary transformation: Q' * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the unitary matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the unitary matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) REAL array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) COMPLEX array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + WORK (workspace) COMPLEX array, dimension (max(M,N)) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in + A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in + A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, v and u are complex vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("CGEBD2", &i__1); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & + tauq[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Apply H(i)' to A(i:m,i+1:n) from the left */ + + if (i__ < *n) { + i__2 = *m - i__ + 1; + i__3 = *n - i__; + r_cnjg(&q__1, &tauq[i__]); + clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, & + q__1, &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + } + i__2 = i__ + i__ * a_dim1; + i__3 = i__; + a[i__2].r = d__[i__3], a[i__2].i = 0.f; + + if (i__ < *n) { + +/* + Generate elementary reflector G(i) to annihilate + A(i,i+2:n) +*/ + + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + (i__ + 1) * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + clarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + (i__ + 1) * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.f; + } else { + i__2 = i__; + taup[i__2].r = 0.f, taup[i__2].i = 0.f; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + if (i__ < *m) { + i__2 = *m - i__; + i__3 = *n - i__ + 1; + clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, & + taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__; + a[i__2].r = d__[i__3], a[i__2].i = 0.f; + + if (i__ < *m) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:m,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, + &tauq[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Apply H(i)' to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + r_cnjg(&q__1, &tauq[i__]); + clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &q__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, & + work[1]); + i__2 = i__ + 1 + i__ * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.f; + } else { + i__2 = i__; + tauq[i__2].r = 0.f, tauq[i__2].i = 0.f; + } +/* L20: */ + } + } + return 0; + +/* End of CGEBD2 */ + +} /* cgebd2_ */ + +/* Subroutine */ int cgebrd_(integer *m, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tauq, complex *taup, complex *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + real r__1; + complex q__1; + + /* Local variables */ + static integer i__, j, nb, nx; + static real ws; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + static integer nbmin, iinfo, minmn; + extern /* Subroutine */ int cgebd2_(integer *, integer *, complex *, + integer *, real *, real *, complex *, complex *, complex *, + integer *), clabrd_(integer *, integer *, integer *, complex *, + integer *, real *, real *, complex *, complex *, complex *, + integer *, complex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwrkx, ldwrky, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGEBRD reduces a general complex M-by-N matrix A to upper or lower + bidiagonal form B by a unitary transformation: Q**H * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the M-by-N general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the unitary matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the unitary matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) REAL array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) COMPLEX array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,M,N). + For optimum performance LWORK >= (M+N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in + A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in + A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "CGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = max(i__1,i__2); + lwkopt = (*m + *n) * nb; + r__1 = (real) lwkopt; + work[1].r = r__1, work[1].i = 0.f; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*lwork < max(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("CGEBRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = min(*m,*n); + if (minmn == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + ws = (real) max(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* + Set the crossover point NX. + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "CGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (real) ((*m + *n) * nb); + if ((real) (*lwork) < ws) { + +/* + Not enough work space for the optimal NB, consider using + a smaller block size. +*/ + + nbmin = ilaenv_(&c__2, "CGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* + Reduce rows and columns i:i+ib-1 to bidiagonal form and return + the matrices X and Y which are needed to update the unreduced + part of the matrix +*/ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + clabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* + Update the trailing submatrix A(i+ib:m,i+ib:n), using + an update of the form A := A - V*Y' - X*U' +*/ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, & + q__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + + nb + 1], &ldwrky, &c_b57, &a[i__ + nb + (i__ + nb) * a_dim1], + lda); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &q__1, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b57, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + j * a_dim1; + i__5 = j; + a[i__4].r = d__[i__5], a[i__4].i = 0.f; + i__4 = j + (j + 1) * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.f; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + j * a_dim1; + i__5 = j; + a[i__4].r = d__[i__5], a[i__4].i = 0.f; + i__4 = j + 1 + j * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.f; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + cgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1].r = ws, work[1].i = 0.f; + return 0; + +/* End of CGEBRD */ + +} /* cgebrd_ */ + +/* Subroutine */ int cgeev_(char *jobvl, char *jobvr, integer *n, complex *a, + integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr, + integer *ldvr, complex *work, integer *lwork, real *rwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + real r__1, r__2; + complex q__1, q__2; + + /* Local variables */ + static integer i__, k, ihi; + static real scl; + static integer ilo; + static real dum[1], eps; + static complex tmp; + static integer ibal; + static char side[1]; + static real anrm; + static integer ierr, itau, iwrk, nout; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *); + extern logical lsame_(char *, char *); + extern doublereal scnrm2_(integer *, complex *, integer *); + extern /* Subroutine */ int cgebak_(char *, char *, integer *, integer *, + integer *, real *, integer *, complex *, integer *, integer *), cgebal_(char *, integer *, complex *, integer *, + integer *, integer *, real *, integer *), slabad_(real *, + real *); + static logical scalea; + extern doublereal clange_(char *, integer *, integer *, complex *, + integer *, real *); + static real cscale; + extern /* Subroutine */ int cgehrd_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *), + clascl_(char *, integer *, integer *, real *, real *, integer *, + integer *, complex *, integer *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), clacpy_(char *, integer *, integer *, complex *, integer *, + complex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical select[1]; + static real bignum; + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int chseqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *), ctrevc_(char *, + char *, logical *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, integer *, integer *, complex *, + real *, integer *), cunghr_(integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + integer *); + static integer minwrk, maxwrk; + static logical wantvl; + static real smlnum; + static integer hswork, irwork; + static logical lquery, wantvr; + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGEEV computes for an N-by-N complex nonsymmetric matrix A, the + eigenvalues and, optionally, the left and/or right eigenvectors. + + The right eigenvector v(j) of A satisfies + A * v(j) = lambda(j) * v(j) + where lambda(j) is its eigenvalue. + The left eigenvector u(j) of A satisfies + u(j)**H * A = lambda(j) * u(j)**H + where u(j)**H denotes the conjugate transpose of u(j). + + The computed eigenvectors are normalized to have Euclidean norm + equal to 1 and largest component real. + + Arguments + ========= + + JOBVL (input) CHARACTER*1 + = 'N': left eigenvectors of A are not computed; + = 'V': left eigenvectors of are computed. + + JOBVR (input) CHARACTER*1 + = 'N': right eigenvectors of A are not computed; + = 'V': right eigenvectors of A are computed. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the N-by-N matrix A. + On exit, A has been overwritten. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) COMPLEX array, dimension (N) + W contains the computed eigenvalues. + + VL (output) COMPLEX array, dimension (LDVL,N) + If JOBVL = 'V', the left eigenvectors u(j) are stored one + after another in the columns of VL, in the same order + as their eigenvalues. + If JOBVL = 'N', VL is not referenced. + u(j) = VL(:,j), the j-th column of VL. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1; if + JOBVL = 'V', LDVL >= N. + + VR (output) COMPLEX array, dimension (LDVR,N) + If JOBVR = 'V', the right eigenvectors v(j) are stored one + after another in the columns of VR, in the same order + as their eigenvalues. + If JOBVR = 'N', VR is not referenced. + v(j) = VR(:,j), the j-th column of VR. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1; if + JOBVR = 'V', LDVR >= N. + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,2*N). + For good performance, LWORK must generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + RWORK (workspace) REAL array, dimension (2*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = i, the QR algorithm failed to compute all the + eigenvalues, and no eigenvectors have been computed; + elements and i+1:N of W contain eigenvalues which have + converged. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -1; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + CWorkspace refers to complex workspace, and RWorkspace to real + workspace. NB refers to the optimal block size for the + immediately following subroutine, as returned by ILAENV. + HSWORK refers to the workspace preferred by CHSEQR, as + calculated below. HSWORK is computed assuming ILO=1 and IHI=N, + the worst case.) +*/ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = *n + *n * ilaenv_(&c__1, "CGEHRD", " ", n, &c__1, n, & + c__0, (ftnlen)6, (ftnlen)1); + minwrk = *n << 1; + if (wantvl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "CUNGHR", + " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); + chseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &w[1], &vl[ + vl_offset], ldvl, &work[1], &c_n1, info); + } else if (wantvr) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "CUNGHR", + " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); + chseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[1], &c_n1, info); + } else { + chseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[1], &c_n1, info); + } + hswork = work[1].r; +/* Computing MAX */ + i__1 = max(maxwrk,hswork); + maxwrk = max(i__1,minwrk); + } + work[1].r = (real) maxwrk, work[1].i = 0.f; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEEV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + clascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* + Balance the matrix + (CWorkspace: none) + (RWorkspace: need N) +*/ + + ibal = 1; + cgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &rwork[ibal], &ierr); + +/* + Reduce to upper Hessenberg form + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: none) +*/ + + itau = 1; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + cgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvl) { + +/* + Want left eigenvectors + Copy Householder vectors to VL +*/ + + *(unsigned char *)side = 'L'; + clacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* + Generate unitary matrix in VL + (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) + (RWorkspace: none) +*/ + + i__1 = *lwork - iwrk + 1; + cunghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VL + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vl[ + vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* + Want left and right eigenvectors + Copy Schur vectors to VR +*/ + + *(unsigned char *)side = 'B'; + clacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* + Want right eigenvectors + Copy Householder vectors to VR +*/ + + *(unsigned char *)side = 'R'; + clacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* + Generate unitary matrix in VR + (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) + (RWorkspace: none) +*/ + + i__1 = *lwork - iwrk + 1; + cunghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VR + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* + Compute eigenvalues only + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + chseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO > 0 from CHSEQR, then quit */ + + if (*info > 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* + Compute left and/or right eigenvectors + (CWorkspace: need 2*N) + (RWorkspace: need 2*N) +*/ + + irwork = ibal + *n; + ctrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &rwork[irwork], + &ierr); + } + + if (wantvl) { + +/* + Undo balancing of left eigenvectors + (CWorkspace: none) + (RWorkspace: need N) +*/ + + cgebak_("B", "L", n, &ilo, &ihi, &rwork[ibal], n, &vl[vl_offset], + ldvl, &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scl = 1.f / scnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + csscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * vl_dim1; +/* Computing 2nd power */ + r__1 = vl[i__3].r; +/* Computing 2nd power */ + r__2 = r_imag(&vl[k + i__ * vl_dim1]); + rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2; +/* L10: */ + } + k = isamax_(n, &rwork[irwork], &c__1); + r_cnjg(&q__2, &vl[k + i__ * vl_dim1]); + r__1 = sqrt(rwork[irwork + k - 1]); + q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1; + tmp.r = q__1.r, tmp.i = q__1.i; + cscal_(n, &tmp, &vl[i__ * vl_dim1 + 1], &c__1); + i__2 = k + i__ * vl_dim1; + i__3 = k + i__ * vl_dim1; + r__1 = vl[i__3].r; + q__1.r = r__1, q__1.i = 0.f; + vl[i__2].r = q__1.r, vl[i__2].i = q__1.i; +/* L20: */ + } + } + + if (wantvr) { + +/* + Undo balancing of right eigenvectors + (CWorkspace: none) + (RWorkspace: need N) +*/ + + cgebak_("B", "R", n, &ilo, &ihi, &rwork[ibal], n, &vr[vr_offset], + ldvr, &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scl = 1.f / scnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + csscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * vr_dim1; +/* Computing 2nd power */ + r__1 = vr[i__3].r; +/* Computing 2nd power */ + r__2 = r_imag(&vr[k + i__ * vr_dim1]); + rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2; +/* L30: */ + } + k = isamax_(n, &rwork[irwork], &c__1); + r_cnjg(&q__2, &vr[k + i__ * vr_dim1]); + r__1 = sqrt(rwork[irwork + k - 1]); + q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1; + tmp.r = q__1.r, tmp.i = q__1.i; + cscal_(n, &tmp, &vr[i__ * vr_dim1 + 1], &c__1); + i__2 = k + i__ * vr_dim1; + i__3 = k + i__ * vr_dim1; + r__1 = vr[i__3].r; + q__1.r = r__1, q__1.i = 0.f; + vr[i__2].r = q__1.r, vr[i__2].i = q__1.i; +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[*info + 1] + , &i__2, &ierr); + if (*info > 0) { + i__1 = ilo - 1; + clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[1], n, + &ierr); + } + } + + work[1].r = (real) maxwrk, work[1].i = 0.f; + return 0; + +/* End of CGEEV */ + +} /* cgeev_ */ + +/* Subroutine */ int cgehd2_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + static integer i__; + static complex alpha; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *), + clarfg_(integer *, complex *, complex *, integer *, complex *), + xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGEHD2 reduces a complex general matrix A to upper Hessenberg form H + by a unitary similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to CGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= max(1,N). + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the n by n general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the unitary matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEHD2", &i__1); + return 0; + } + + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + +/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *ihi - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[ + i__]); + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ + + i__2 = *ihi - i__; + clarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); + +/* Apply H(i)' to A(i+1:ihi,i+1:n) from the left */ + + i__2 = *ihi - i__; + i__3 = *n - i__; + r_cnjg(&q__1, &tau[i__]); + clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &q__1, + &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); + + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; +/* L10: */ + } + + return 0; + +/* End of CGEHD2 */ + +} /* cgehd2_ */ + +/* Subroutine */ int cgehrd_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *lwork, integer + *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + complex q__1; + + /* Local variables */ + static integer i__, j; + static complex t[4160] /* was [65][64] */; + static integer ib; + static complex ei; + static integer nb, nh, nx, iws; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + static integer nbmin, iinfo; + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), caxpy_(integer *, + complex *, complex *, integer *, complex *, integer *), cgehd2_( + integer *, integer *, integer *, complex *, integer *, complex *, + complex *, integer *), clahr2_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, complex *, + integer *), clarfb_(char *, char *, char *, char *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + complex *, integer *, complex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2.1) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + -- April 2009 -- + + + Purpose + ======= + + CGEHRD reduces a complex general matrix A to upper Hessenberg form H by + an unitary similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to CGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the N-by-N general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the unitary matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to + zero. + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + This file is a slight modification of LAPACK-3.0's DGEHRD + subroutine incorporating improvements proposed by Quintana-Orti and + Van de Geijn (2006). (See DLAHR2.) + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + lwkopt = *n * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEHRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + tau[i__2].r = 0.f, tau[i__2].i = 0.f; +/* L10: */ + } + i__1 = *n - 1; + for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { + i__2 = i__; + tau[i__2].r = 0.f, tau[i__2].i = 0.f; +/* L20: */ + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + +/* + Determine the block size + + Computing MIN +*/ + i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + nbmin = 2; + iws = 1; + if (nb > 1 && nb < nh) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code) + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "CGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code */ + + iws = *n * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code + + Computing MAX +*/ + i__1 = 2, i__2 = ilaenv_(&c__2, "CGEHRD", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + if (*lwork >= *n * nbmin) { + nb = *lwork / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + i__ = *ilo; + + } else { + +/* Use blocked code */ + + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3,i__4); + +/* + Reduce columns i:i+ib-1 to Hessenberg form, returning the + matrices V and T of the block reflector H = I - V*T*V' + which performs the reduction, and also the matrix Y = A*V*T +*/ + + clahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & + c__65, &work[1], &ldwork); + +/* + Apply the block reflector H to A(1:ihi,i+ib:ihi) from the + right, computing A := A - Y * V'. V(i+ib,ib-1) must be set + to 1 +*/ + + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + ei.r = a[i__3].r, ei.i = a[i__3].i; + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + a[i__3].r = 1.f, a[i__3].i = 0.f; + i__3 = *ihi - i__ - ib + 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, & + q__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, + &c_b57, &a[(i__ + ib) * a_dim1 + 1], lda); + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + a[i__3].r = ei.r, a[i__3].i = ei.i; + +/* + Apply the block reflector H to A(1:i,i+1:i+ib-1) from the + right +*/ + + i__3 = ib - 1; + ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", &i__, & + i__3, &c_b57, &a[i__ + 1 + i__ * a_dim1], lda, &work[1], & + ldwork); + i__3 = ib - 2; + for (j = 0; j <= i__3; ++j) { + q__1.r = -1.f, q__1.i = -0.f; + caxpy_(&i__, &q__1, &work[ldwork * j + 1], &c__1, &a[(i__ + j + + 1) * a_dim1 + 1], &c__1); +/* L30: */ + } + +/* + Apply the block reflector H to A(i+1:ihi,i+ib:n) from the + left +*/ + + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", & + i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, & + c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], & + ldwork); +/* L40: */ + } + } + +/* Use unblocked code to reduce the rest of the matrix */ + + cgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1].r = (real) iws, work[1].i = 0.f; + + return 0; + +/* End of CGEHRD */ + +} /* cgehrd_ */ + +/* Subroutine */ int cgelq2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static complex alpha; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *), + clarfg_(integer *, complex *, complex *, integer *, complex *), + clacgv_(integer *, complex *, integer *), xerbla_(char *, integer + *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + CGELQ2 computes an LQ factorization of a complex m by n matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and below the diagonal of the array + contain the m by min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + A(i,i+1:n), and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGELQ2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &tau[i__] + ); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__2 = i__ + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); +/* L10: */ + } + return 0; + +/* End of CGELQ2 */ + +} /* cgelq2_ */ + +/* Subroutine */ int cgelqf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int cgelq2_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *), clarfb_(char *, char + *, char *, char *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *), clarft_(char *, char * + , integer *, integer *, complex *, integer *, complex *, complex * + , integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGELQF computes an LQ factorization of a complex M-by-N matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and below the diagonal of the array + contain the m-by-min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + A(i,i+1:n), and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "CGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *m * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGELQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "CGELQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "CGELQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the LQ factorization of the current block + A(i:i+ib-1,i:n) +*/ + + i__3 = *n - i__ + 1; + cgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *n - i__ + 1; + clarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + clarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + cgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1].r = (real) iws, work[1].i = 0.f; + return 0; + +/* End of CGELQF */ + +} /* cgelqf_ */ + +/* Subroutine */ int cgelsd_(integer *m, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, real *s, real *rcond, + integer *rank, complex *work, integer *lwork, real *rwork, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer ie, il, mm; + static real eps, anrm, bnrm; + static integer itau, nlvl, iascl, ibscl; + static real sfmin; + static integer minmn, maxmn, itaup, itauq, mnthr, nwork; + extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *, + integer *, real *, real *, complex *, complex *, complex *, + integer *, integer *), slabad_(real *, real *); + extern doublereal clange_(char *, integer *, integer *, complex *, + integer *, real *); + extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *), clalsd_( + char *, integer *, integer *, integer *, real *, real *, complex * + , integer *, real *, integer *, complex *, real *, integer *, + integer *), clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), cgeqrf_(integer *, integer *, complex *, integer *, + complex *, complex *, integer *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), claset_(char *, + integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), cunmbr_(char *, char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *), slaset_( + char *, integer *, integer *, real *, real *, real *, integer *), cunmlq_(char *, char *, integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, complex *, + integer *, integer *); + static integer ldwork; + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + static integer liwork, minwrk, maxwrk; + static real smlnum; + static integer lrwork; + static logical lquery; + static integer nrwork, smlsiz; + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGELSD computes the minimum-norm solution to a real linear least + squares problem: + minimize 2-norm(| b - A*x |) + using the singular value decomposition (SVD) of A. A is an M-by-N + matrix which may be rank-deficient. + + Several right hand side vectors b and solution vectors x can be + handled in a single call; they are stored as the columns of the + M-by-NRHS right hand side matrix B and the N-by-NRHS solution + matrix X. + + The problem is solved in three steps: + (1) Reduce the coefficient matrix A to bidiagonal form with + Householder tranformations, reducing the original problem + into a "bidiagonal least squares problem" (BLS) + (2) Solve the BLS using a divide and conquer approach. + (3) Apply back all the Householder tranformations to solve + the original least squares problem. + + The effective rank of A is determined by treating as zero those + singular values which are less than RCOND times the largest singular + value. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrices B and X. NRHS >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, A has been destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (input/output) COMPLEX array, dimension (LDB,NRHS) + On entry, the M-by-NRHS right hand side matrix B. + On exit, B is overwritten by the N-by-NRHS solution matrix X. + If m >= n and RANK = n, the residual sum-of-squares for + the solution in the i-th column is given by the sum of + squares of the modulus of elements n+1:m in that column. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M,N). + + S (output) REAL array, dimension (min(M,N)) + The singular values of A in decreasing order. + The condition number of A in the 2-norm = S(1)/S(min(m,n)). + + RCOND (input) REAL + RCOND is used to determine the effective rank of A. + Singular values S(i) <= RCOND*S(1) are treated as zero. + If RCOND < 0, machine precision is used instead. + + RANK (output) INTEGER + The effective rank of A, i.e., the number of singular values + which are greater than RCOND*S(1). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK must be at least 1. + The exact minimum amount of workspace needed depends on M, + N and NRHS. As long as LWORK is at least + 2 * N + N * NRHS + if M is greater than or equal to N or + 2 * M + M * NRHS + if M is less than N, the code will execute correctly. + For good performance, LWORK should generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the array WORK and the + minimum sizes of the arrays RWORK and IWORK, and returns + these values as the first entries of the WORK, RWORK and + IWORK arrays, and no error message related to LWORK is issued + by XERBLA. + + RWORK (workspace) REAL array, dimension (MAX(1,LRWORK)) + LRWORK >= + 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + + MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) + if M is greater than or equal to N or + 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + + MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) + if M is less than N, the code will execute correctly. + SMLSIZ is returned by ILAENV and is equal to the maximum + size of the subproblems at the bottom of the computation + tree (usually about 25), and + NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) + On exit, if INFO = 0, RWORK(1) returns the minimum LRWORK. + + IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) + LIWORK >= max(1, 3*MINMN*NLVL + 11*MINMN), + where MINMN = MIN( M,N ). + On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: the algorithm for computing the SVD failed to converge; + if INFO = i, i off-diagonal elements of an intermediate + bidiagonal form did not converge to zero. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input arguments. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --s; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + maxmn = max(*m,*n); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,maxmn)) { + *info = -7; + } + +/* + Compute workspace. + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV.) +*/ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + liwork = 1; + lrwork = 1; + if (minmn > 0) { + smlsiz = ilaenv_(&c__9, "CGELSD", " ", &c__0, &c__0, &c__0, &c__0, + (ftnlen)6, (ftnlen)1); + mnthr = ilaenv_(&c__6, "CGELSD", " ", m, n, nrhs, &c_n1, (ftnlen) + 6, (ftnlen)1); +/* Computing MAX */ + i__1 = (integer) (log((real) minmn / (real) (smlsiz + 1)) / log( + 2.f)) + 1; + nlvl = max(i__1,0); + liwork = minmn * 3 * nlvl + minmn * 11; + mm = *m; + if (*m >= *n && *m >= mnthr) { + +/* + Path 1a - overdetermined, with many more rows than + columns. +*/ + + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, + &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *nrhs * ilaenv_(&c__1, "CUNMQR", "LC", + m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); + } + if (*m >= *n) { + +/* + Path 1 - overdetermined or exactly determined. + + Computing MAX + Computing 2nd power +*/ + i__3 = smlsiz + 1; + i__1 = i__3 * i__3, i__2 = *n * (*nrhs + 1) + (*nrhs << 1); + lrwork = *n * 10 + (*n << 1) * smlsiz + (*n << 3) * nlvl + + smlsiz * 3 * *nrhs + max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (mm + *n) * ilaenv_(&c__1, + "CGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *nrhs * ilaenv_(&c__1, + "CUNMBR", "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "CUNMBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * *nrhs; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = (*n << 1) + mm, i__2 = (*n << 1) + *n * *nrhs; + minwrk = max(i__1,i__2); + } + if (*n > *m) { +/* + Computing MAX + Computing 2nd power +*/ + i__3 = smlsiz + 1; + i__1 = i__3 * i__3, i__2 = *n * (*nrhs + 1) + (*nrhs << 1); + lrwork = *m * 10 + (*m << 1) * smlsiz + (*m << 3) * nlvl + + smlsiz * 3 * *nrhs + max(i__1,i__2); + if (*n >= mnthr) { + +/* + Path 2a - underdetermined, with many more columns + than rows. +*/ + + maxwrk = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * + ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * + ilaenv_(&c__1, "CUNMBR", "QLC", m, nrhs, m, &c_n1, + (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * + ilaenv_(&c__1, "CUNMLQ", "LC", n, nrhs, m, &c_n1, + (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *m * *nrhs; + maxwrk = max(i__1,i__2); +/* + XXX: Ensure the Path 2a case below is triggered. The workspace + calculation should use queries for all routines eventually. + Computing MAX + Computing MAX +*/ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), + i__3 = max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3,i__4) + ; + maxwrk = max(i__1,i__2); + } else { + +/* Path 2 - underdetermined. */ + + maxwrk = (*m << 1) + (*n + *m) * ilaenv_(&c__1, "CGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *nrhs * ilaenv_(&c__1, + "CUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "PLN", n, nrhs, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * *nrhs; + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = (*m << 1) + *n, i__2 = (*m << 1) + *m * *nrhs; + minwrk = max(i__1,i__2); + } + } + minwrk = min(minwrk,maxwrk); + work[1].r = (real) maxwrk, work[1].i = 0.f; + iwork[1] = liwork; + rwork[1] = (real) lrwork; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGELSD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters. */ + + eps = slamch_("P"); + sfmin = slamch_("S"); + smlnum = sfmin / eps; + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ + + anrm = clange_("M", m, n, &a[a_offset], lda, &rwork[1]); + iascl = 0; + if (anrm > 0.f && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.f) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + claset_("F", &i__1, nrhs, &c_b56, &c_b56, &b[b_offset], ldb); + slaset_("F", &minmn, &c__1, &c_b328, &c_b328, &s[1], &c__1) + ; + *rank = 0; + goto L10; + } + +/* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ + + bnrm = clange_("M", m, nrhs, &b[b_offset], ldb, &rwork[1]); + ibscl = 0; + if (bnrm > 0.f && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + clascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + clascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* If M < N make sure B(M+1:N,:) = 0 */ + + if (*m < *n) { + i__1 = *n - *m; + claset_("F", &i__1, nrhs, &c_b56, &c_b56, &b[*m + 1 + b_dim1], ldb); + } + +/* Overdetermined case. */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns */ + + mm = *n; + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R. + (RWorkspace: need N) + (CWorkspace: need N, prefer N*NB) +*/ + + i__1 = *lwork - nwork + 1; + cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + +/* + Multiply B by transpose(Q). + (RWorkspace: need N) + (CWorkspace: need NRHS, prefer NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + cunmqr_("L", "C", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below R. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + claset_("L", &i__1, &i__2, &c_b56, &c_b56, &a[a_dim1 + 2], + lda); + } + } + + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + ie = 1; + nrwork = ie + *n; + +/* + Bidiagonalize R in A. + (RWorkspace: need N) + (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(&mm, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of R. + (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "C", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + clalsd_("U", &smlsiz, n, nrhs, &s[1], &rwork[ie], &b[b_offset], ldb, + rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of R. */ + + i__1 = *lwork - nwork + 1; + cunmbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & + b[b_offset], ldb, &work[nwork], &i__1, info); + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( + i__1,*nrhs), i__2 = *n - *m * 3; + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) { + +/* + Path 2a - underdetermined, with many more columns than rows + and sufficient workspace for an efficient algorithm. +*/ + + ldwork = *m; +/* + Computing MAX + Computing MAX +*/ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + + *m + *m * *nrhs; + if (*lwork >= max(i__1,i__2)) { + ldwork = *lda; + } + itau = 1; + nwork = *m + 1; + +/* + Compute A=L*Q. + (CWorkspace: need 2*M, prefer M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + il = nwork; + +/* Copy L to WORK(IL), zeroing out above its diagonal. */ + + clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); + i__1 = *m - 1; + i__2 = *m - 1; + claset_("U", &i__1, &i__2, &c_b56, &c_b56, &work[il + ldwork], & + ldwork); + itauq = il + ldwork * *m; + itaup = itauq + *m; + nwork = itaup + *m; + ie = 1; + nrwork = ie + *m; + +/* + Bidiagonalize L in WORK(IL). + (RWorkspace: need M) + (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(m, m, &work[il], &ldwork, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of L. + (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "C", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + clalsd_("U", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], + info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of L. */ + + i__1 = *lwork - nwork + 1; + cunmbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ + itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below first M rows of B. */ + + i__1 = *n - *m; + claset_("F", &i__1, nrhs, &c_b56, &c_b56, &b[*m + 1 + b_dim1], + ldb); + nwork = itau + *m; + +/* + Multiply transpose(Q) by B. + (CWorkspace: need NRHS, prefer NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + cunmlq_("L", "C", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + + } else { + +/* Path 2 - remaining underdetermined cases. */ + + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + ie = 1; + nrwork = ie + *m; + +/* + Bidiagonalize A. + (RWorkspace: need M) + (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors. + (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "C", m, nrhs, n, &a[a_offset], lda, &work[itauq] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + clalsd_("L", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], + info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of A. */ + + i__1 = *lwork - nwork + 1; + cunmbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + + } + } + +/* Undo scaling. */ + + if (iascl == 1) { + clascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } else if (iascl == 2) { + clascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (ibscl == 1) { + clascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + clascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L10: + work[1].r = (real) maxwrk, work[1].i = 0.f; + iwork[1] = liwork; + rwork[1] = (real) lrwork; + return 0; + +/* End of CGELSD */ + +} /* cgelsd_ */ + +/* Subroutine */ int cgeqr2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + static integer i__, k; + static complex alpha; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *), + clarfg_(integer *, complex *, complex *, integer *, complex *), + xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + CGEQR2 computes a QR factorization of a complex m by n matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(m,n) by n upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEQR2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] + , &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i)' to A(i:m,i+1:n) from the left */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + r_cnjg(&q__1, &tau[i__]); + clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1, + &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + i__2 = i__ + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; + } +/* L10: */ + } + return 0; + +/* End of CGEQR2 */ + +} /* cgeqr2_ */ + +/* Subroutine */ int cgeqrf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int cgeqr2_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *), clarfb_(char *, char + *, char *, char *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *), clarft_(char *, char * + , integer *, integer *, complex *, integer *, complex *, complex * + , integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGEQRF computes a QR factorization of a complex M-by-N matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(M,N)-by-N upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of min(m,n) elementary reflectors (see Further + Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "CGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "CGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "CGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the QR factorization of the current block + A(i:m,i:i+ib-1) +*/ + + i__3 = *m - i__ + 1; + cgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *m - i__ + 1; + clarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise" + , &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, & + work[1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, + &work[ib + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + cgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1].r = (real) iws, work[1].i = 0.f; + return 0; + +/* End of CGEQRF */ + +} /* cgeqrf_ */ + +/* Subroutine */ int cgesdd_(char *jobz, integer *m, integer *n, complex *a, + integer *lda, real *s, complex *u, integer *ldu, complex *vt, integer + *ldvt, complex *work, integer *lwork, real *rwork, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Local variables */ + static integer i__, ie, il, ir, iu, blk; + static real dum[1], eps; + static integer iru, ivt, iscl; + static real anrm; + static integer idum[1], ierr, itau, irvt; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + extern logical lsame_(char *, char *); + static integer chunk, minmn, wrkbl, itaup, itauq; + static logical wntqa; + static integer nwork; + extern /* Subroutine */ int clacp2_(char *, integer *, integer *, real *, + integer *, complex *, integer *); + static logical wntqn, wntqo, wntqs; + static integer mnthr1, mnthr2; + extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *, + integer *, real *, real *, complex *, complex *, complex *, + integer *, integer *); + extern doublereal clange_(char *, integer *, integer *, complex *, + integer *, real *); + extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *), clacrm_( + integer *, integer *, complex *, integer *, real *, integer *, + complex *, integer *, real *), clarcm_(integer *, integer *, real + *, integer *, complex *, integer *, complex *, integer *, real *), + clascl_(char *, integer *, integer *, real *, real *, integer *, + integer *, complex *, integer *, integer *), sbdsdc_(char + *, char *, integer *, real *, real *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *, integer *), cgeqrf_(integer *, integer *, complex *, integer + *, complex *, complex *, integer *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), claset_(char *, + integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cungbr_(char *, integer *, integer *, integer + *, complex *, integer *, complex *, complex *, integer *, integer + *); + static real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), cunmbr_(char *, char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *), cunglq_( + integer *, integer *, integer *, complex *, integer *, complex *, + complex *, integer *, integer *); + static integer ldwrkl; + extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *); + static integer ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; + static real smlnum; + static logical wntqas; + static integer nrwork; + + +/* + -- LAPACK driver routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + 8-15-00: Improve consistency of WS calculations (eca) + + + Purpose + ======= + + CGESDD computes the singular value decomposition (SVD) of a complex + M-by-N matrix A, optionally computing the left and/or right singular + vectors, by using divide-and-conquer method. The SVD is written + + A = U * SIGMA * conjugate-transpose(V) + + where SIGMA is an M-by-N matrix which is zero except for its + min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + V is an N-by-N unitary matrix. The diagonal elements of SIGMA + are the singular values of A; they are real and non-negative, and + are returned in descending order. The first min(m,n) columns of + U and V are the left and right singular vectors of A. + + Note that the routine returns VT = V**H, not V. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + Specifies options for computing all or part of the matrix U: + = 'A': all M columns of U and all N rows of V**H are + returned in the arrays U and VT; + = 'S': the first min(M,N) columns of U and the first + min(M,N) rows of V**H are returned in the arrays U + and VT; + = 'O': If M >= N, the first N columns of U are overwritten + in the array A and all rows of V**H are returned in + the array VT; + otherwise, all columns of U are returned in the + array U and the first M rows of V**H are overwritten + in the array A; + = 'N': no columns of U or rows of V**H are computed. + + M (input) INTEGER + The number of rows of the input matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the input matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, + if JOBZ = 'O', A is overwritten with the first N columns + of U (the left singular vectors, stored + columnwise) if M >= N; + A is overwritten with the first M rows + of V**H (the right singular vectors, stored + rowwise) otherwise. + if JOBZ .ne. 'O', the contents of A are destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + S (output) REAL array, dimension (min(M,N)) + The singular values of A, sorted so that S(i) >= S(i+1). + + U (output) COMPLEX array, dimension (LDU,UCOL) + UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; + UCOL = min(M,N) if JOBZ = 'S'. + If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M + unitary matrix U; + if JOBZ = 'S', U contains the first min(M,N) columns of U + (the left singular vectors, stored columnwise); + if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1; if + JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. + + VT (output) COMPLEX array, dimension (LDVT,N) + If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the + N-by-N unitary matrix V**H; + if JOBZ = 'S', VT contains the first min(M,N) rows of + V**H (the right singular vectors, stored rowwise); + if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1; if + JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; + if JOBZ = 'S', LDVT >= min(M,N). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N). + if JOBZ = 'O', + LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + if JOBZ = 'S' or 'A', + LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + For good performance, LWORK should generally be larger. + + If LWORK = -1, a workspace query is assumed. The optimal + size for the WORK array is calculated and stored in WORK(1), + and no other work except argument checking is performed. + + RWORK (workspace) REAL array, dimension (MAX(1,LRWORK)) + If JOBZ = 'N', LRWORK >= 5*min(M,N). + Otherwise, + LRWORK >= min(M,N)*max(5*min(M,N)+7,2*max(M,N)+2*min(M,N)+1) + + IWORK (workspace) INTEGER array, dimension (8*min(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The updating process of SBDSDC did not converge. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + mnthr1 = (integer) (minmn * 17.f / 9.f); + mnthr2 = (integer) (minmn * 5.f / 3.f); + wntqa = lsame_(jobz, "A"); + wntqs = lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, "O"); + wntqn = lsame_(jobz, "N"); + minwrk = 1; + maxwrk = 1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + CWorkspace refers to complex workspace, and RWorkspace to + real workspace. NB refers to the optimal block size for the + immediately following subroutine, as returned by ILAENV.) +*/ + + if (*info == 0 && *m > 0 && *n > 0) { + if (*m >= *n) { + +/* + There is no complex work space needed for bidiagonal SVD + The real work space needed for bidiagonal SVD is BDSPAC + for computing singular values and singular vectors; BDSPAN + for computing singular values only. + BDSPAC = 5*N*N + 7*N + BDSPAN = MAX(7*N+4, 3*N+2+SMLSIZ*(SMLSIZ+8)) +*/ + + if (*m >= mnthr1) { + if (wntqn) { + +/* Path 1 (M much larger than N, JOBZ='N') */ + + maxwrk = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + maxwrk = max(i__1,i__2); + minwrk = *n * 3; + } else if (wntqo) { + +/* Path 2 (M much larger than N, JOBZ='O') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *n + *n * *n + wrkbl; + minwrk = (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M much larger than N, JOBZ='S') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *n * *n + wrkbl; + minwrk = *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M much larger than N, JOBZ='A') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "CUNGQR", + " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *n * *n + wrkbl; + minwrk = *n * *n + (*n << 1) + *m; + } + } else if (*m >= mnthr2) { + +/* Path 5 (M much larger than N, but not as much as MNTHR1) */ + + maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*n << 1) + *m; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *n * *n; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } + } else { + +/* Path 6 (M at least N, but not much larger) */ + + maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*n << 1) + *m; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *n * *n; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } + } + } else { + +/* + There is no complex work space needed for bidiagonal SVD + The real work space needed for bidiagonal SVD is BDSPAC + for computing singular values and singular vectors; BDSPAN + for computing singular values only. + BDSPAC = 5*M*M + 7*M + BDSPAN = MAX(7*M+4, 3*M+2+SMLSIZ*(SMLSIZ+8)) +*/ + + if (*n >= mnthr1) { + if (wntqn) { + +/* Path 1t (N much larger than M, JOBZ='N') */ + + maxwrk = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + maxwrk = max(i__1,i__2); + minwrk = *m * 3; + } else if (wntqo) { + +/* Path 2t (N much larger than M, JOBZ='O') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *n + *m * *m + wrkbl; + minwrk = (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N much larger than M, JOBZ='S') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *m + wrkbl; + minwrk = *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N much larger than M, JOBZ='A') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "CUNGLQ", + " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *m + wrkbl; + minwrk = *m * *m + (*m << 1) + *n; + } + } else if (*n >= mnthr2) { + +/* Path 5t (N much larger than M, but not as much as MNTHR1) */ + + maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*m << 1) + *n; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *m * *m; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } + } else { + +/* Path 6t (N greater than M, but not much larger) */ + + maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*m << 1) + *n; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *m * *m; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "PRC", n, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } + } + } + maxwrk = max(maxwrk,minwrk); + } + if (*info == 0) { + work[1].r = (real) maxwrk, work[1].i = 0.f; + if (*lwork < minwrk && *lwork != -1) { + *info = -13; + } + } + +/* Quick returns */ + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGESDD", &i__1); + return 0; + } + if (*lwork == -1) { + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = sqrt(slamch_("S")) / eps; + bignum = 1.f / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0.f && anrm < smlnum) { + iscl = 1; + clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* + A has at least as many rows as columns. If A has sufficiently + more rows than columns, first reduce using the QR + decomposition (if sufficient workspace available) +*/ + + if (*m >= mnthr1) { + + if (wntqn) { + +/* + Path 1 (M much larger than N, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: need 0) +*/ + + i__1 = *lwork - nwork + 1; + cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + claset_("L", &i__1, &i__2, &c_b56, &c_b56, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (CWorkspace: need 3*N, prefer 2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nrwork = ie + *n; + +/* + Perform bidiagonal SVD, compute singular values only + (CWorkspace: 0) + (RWorkspace: need BDSPAN) +*/ + + sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2 (M much larger than N, JOBZ='O') + N left singular vectors to be overwritten on A and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + if (*lwork >= *m * *n + *n * *n + *n * 3) { + +/* WORK(IR) is M by N */ + + ldwrkr = *m; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK( IR ), zeroing out below it */ + + clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + claset_("L", &i__1, &i__2, &c_b56, &c_b56, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of R in WORK(IRU) and computing right singular vectors + of R in WORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by the left singular vectors of R + (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & + ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by the right singular vectors of R + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IU), storing result in WORK(IR) and copying to A + (CWorkspace: need 2*N*N, prefer N*N+M*N) + (RWorkspace: 0) +*/ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + cgemm_("N", "N", &chunk, n, n, &c_b57, &a[i__ + a_dim1], + lda, &work[iu], &ldwrku, &c_b56, &work[ir], & + ldwrkr); + clacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* + Path 3 (M much larger than N, JOBZ='S') + N left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + claset_("L", &i__2, &i__1, &c_b56, &c_b56, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IR), storing result in U + (CWorkspace: need N*N) + (RWorkspace: 0) +*/ + + clacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + cgemm_("N", "N", m, n, n, &c_b57, &a[a_offset], lda, &work[ir] + , &ldwrkr, &c_b56, &u[u_offset], ldu); + + } else if (wntqa) { + +/* + Path 4 (M much larger than N, JOBZ='A') + M left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* + Compute A=Q*R, copying result to U + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Generate Q in U + (CWorkspace: need N+M, prefer N+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out below it */ + + i__2 = *n - 1; + i__1 = *n - 1; + claset_("L", &i__2, &i__1, &c_b56, &c_b56, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (CWorkspace: need 3*N, prefer 2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by left singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of R + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in U by left singular vectors of R in + WORK(IU), storing result in A + (CWorkspace: need N*N) + (RWorkspace: 0) +*/ + + cgemm_("N", "N", m, n, n, &c_b57, &u[u_offset], ldu, &work[iu] + , &ldwrku, &c_b56, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else if (*m >= mnthr2) { + +/* + MNTHR2 <= M < MNTHR1 + + Path 5 (M much larger than N, but not as much as MNTHR1) + Reduce to bidiagonal form without QR decomposition, use + CUNGBR and matrix multiplication to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *n; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAN) +*/ + + sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ + nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *n * 3) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + } else { + +/* WORK(IU) is LDWRKU by N */ + + ldwrku = (*lwork - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in WORK(IU), copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &work[iu] + , &ldwrku, &rwork[nrwork]); + clacpy_("F", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need N*N, prefer M*N) + (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +*/ + + nrwork = irvt; + i__2 = *m; + i__1 = ldwrku; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrku); + clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], n, + &work[iu], &ldwrku, &rwork[nrwork]); + clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L20: */ + } + + } else if (wntqs) { + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__1, &ierr); + +/* + Copy A to U, generate Q + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cungbr_("Q", m, n, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need N*N+2*M*N) +*/ + + nrwork = irvt; + clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], + lda, &rwork[nrwork]); + clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + } else { + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__1, &ierr); + +/* + Copy A to U, generate Q + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: 0) + (Rworkspace: need 3*N*N) +*/ + + nrwork = irvt; + clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], + lda, &rwork[nrwork]); + clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + } + + } else { + +/* + M .LT. MNTHR2 + + Path 6 (M at least N, but not much larger) + Reduce to bidiagonal form without QR decomposition + Use CUNMBR to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *n; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAN) +*/ + + sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + if (*lwork >= *m * *n + *n * 3) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + } else { + +/* WORK( IU ) is LDWRKU by N */ + + ldwrku = (*lwork - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: need 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + + if (*lwork >= *m * *n + *n * 3) { + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by left singular vectors of A, copying + to A + (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB) + (Rworkspace: need 0) +*/ + + claset_("F", m, n, &c_b56, &c_b56, &work[iu], &ldwrku); + clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & + ierr); + clacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* + Generate Q in A + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: need 0) +*/ + + i__1 = *lwork - nwork + 1; + cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__1, &ierr); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need N*N, prefer M*N) + (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +*/ + + nrwork = irvt; + i__1 = *m; + i__2 = ldwrku; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrku); + clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], + n, &work[iu], &ldwrku, &rwork[nrwork]); + clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L30: */ + } + } + + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + claset_("F", m, n, &c_b56, &c_b56, &u[u_offset], ldu); + clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + } else { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + claset_("F", m, m, &c_b56, &c_b56, &u[u_offset], ldu); + if (*m > *n) { + i__2 = *m - *n; + i__1 = *m - *n; + claset_("F", &i__2, &i__1, &c_b56, &c_b57, &u[*n + 1 + (* + n + 1) * u_dim1], ldu); + } + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 2*N+M, prefer 2*N+M*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + } + + } + + } else { + +/* + A has more columns than rows. If A has sufficiently more + columns than rows, first reduce using the LQ decomposition (if + sufficient workspace available) +*/ + + if (*n >= mnthr1) { + + if (wntqn) { + +/* + Path 1t (N much larger than M, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Zero out above L */ + + i__2 = *m - 1; + i__1 = *m - 1; + claset_("U", &i__2, &i__1, &c_b56, &c_b56, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (CWorkspace: need 3*M, prefer 2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + nrwork = ie + *m; + +/* + Perform bidiagonal SVD, compute singular values only + (CWorkspace: 0) + (RWorkspace: need BDSPAN) +*/ + + sbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2t (N much larger than M, JOBZ='O') + M right singular vectors to be overwritten on A and + M left singular vectors to be computed in U +*/ + + ivt = 1; + ldwkvt = *m; + +/* WORK(IVT) is M by M */ + + il = ivt + ldwkvt * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3) { + +/* WORK(IL) M by N */ + + ldwrkl = *m; + chunk = *n; + } else { + +/* WORK(IL) is M by CHUNK */ + + ldwrkl = *m; + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + itau = il + ldwrkl * chunk; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + claset_("U", &i__2, &i__1, &c_b56, &c_b56, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by the left singular vectors of L + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by the right singular vectors of L + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IL) by Q + in A, storing result in WORK(IL) and copying to A + (CWorkspace: need 2*M*M, prefer M*M+M*N)) + (RWorkspace: 0) +*/ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + cgemm_("N", "N", m, &blk, m, &c_b57, &work[ivt], m, &a[ + i__ * a_dim1 + 1], lda, &c_b56, &work[il], & + ldwrkl); + clacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + + } else if (wntqs) { + +/* + Path 3t (N much larger than M, JOBZ='S') + M right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + claset_("U", &i__1, &i__2, &c_b56, &c_b56, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by left singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Copy VT to WORK(IL), multiply right singular vectors of L + in WORK(IL) by Q in A, storing result in VT + (CWorkspace: need M*M) + (RWorkspace: 0) +*/ + + clacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + cgemm_("N", "N", m, n, m, &c_b57, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b56, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* + Path 9t (N much larger than M, JOBZ='A') + N right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* + Compute A=L*Q, copying result to VT + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Generate Q in VT + (CWorkspace: need M+N, prefer M+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__1, &ierr); + +/* Produce L in A, zeroing out above it */ + + i__1 = *m - 1; + i__2 = *m - 1; + claset_("U", &i__1, &i__2, &c_b56, &c_b56, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of L + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by right singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by + Q in VT, storing result in A + (CWorkspace: need M*M) + (RWorkspace: 0) +*/ + + cgemm_("N", "N", m, n, m, &c_b57, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b56, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else if (*n >= mnthr2) { + +/* + MNTHR2 <= N < MNTHR1 + + Path 5t (N much larger than M, but not as much as MNTHR1) + Reduce to bidiagonal form without QR decomposition, use + CUNGBR and matrix multiplication to compute singular vectors +*/ + + + ie = 1; + nrwork = ie + *m; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) + (RWorkspace: M) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, &ierr); + + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAN) +*/ + + sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + ivt = nwork; + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Generate P**H in A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + nwork], &i__1, &ierr); + + ldwkvt = *m; + if (*lwork >= *m * *n + *m * 3) { + +/* WORK( IVT ) is M by N */ + + nwork = ivt + ldwkvt * *n; + chunk = *n; + } else { + +/* WORK( IVT ) is M by CHUNK */ + + chunk = (*lwork - *m * 3) / *m; + nwork = ivt + ldwkvt * chunk; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRVT) + storing the result in WORK(IVT), copying to U + (Cworkspace: need 0) + (Rworkspace: need 2*M*M) +*/ + + clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &work[ivt], & + ldwkvt, &rwork[nrwork]); + clacpy_("F", m, m, &work[ivt], &ldwkvt, &u[u_offset], ldu); + +/* + Multiply RWORK(IRVT) by P**H in A, storing the + result in WORK(IVT), copying to A + (CWorkspace: need M*M, prefer M*N) + (Rworkspace: need 2*M*M, prefer 2*M*N) +*/ + + nrwork = iru; + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1], + lda, &work[ivt], &ldwkvt, &rwork[nrwork]); + clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * + a_dim1 + 1], lda); +/* L50: */ + } + } else if (wntqs) { + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__2, &ierr); + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cungbr_("P", m, n, m, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need 3*M*M) +*/ + + clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], + lda, &rwork[nrwork]); + clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need M*M+2*M*N) +*/ + + nrwork = iru; + clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + } else { + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__2, &ierr); + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cungbr_("P", n, n, m, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need 3*M*M) +*/ + + clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], + lda, &rwork[nrwork]); + clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need M*M+2*M*N) +*/ + + clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + } + + } else { + +/* + N .LT. MNTHR2 + + Path 6t (N greater than M, but not much larger) + Reduce to bidiagonal form without LQ decomposition + Use CUNMBR to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *m; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) + (RWorkspace: M) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAN) +*/ + + sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3) { + +/* WORK( IVT ) is M by N */ + + claset_("F", m, n, &c_b56, &c_b56, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; + } else { + +/* WORK( IVT ) is M by CHUNK */ + + chunk = (*lwork - *m * 3) / *m; + nwork = ivt + ldwkvt * chunk; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: need 0) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3) { + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by right singular vectors of A, + copying to A + (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB) + (Rworkspace: need 0) +*/ + + clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + clacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* + Generate P**H in A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: need 0) +*/ + + i__2 = *lwork - nwork + 1; + cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need M*M, prefer M*N) + (Rworkspace: need 3*M*M, prefer M*M+2*M*N) +*/ + + nrwork = iru; + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1] + , lda, &work[ivt], &ldwkvt, &rwork[nrwork]); + clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * + a_dim1 + 1], lda); +/* L60: */ + } + } + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + claset_("F", m, n, &c_b56, &c_b56, &vt[vt_offset], ldvt); + clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* Set all of VT to identity matrix */ + + claset_("F", n, n, &c_b56, &c_b57, &vt[vt_offset], ldvt); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 2*M+N, prefer 2*M+N*NB) + (RWorkspace: M*M) +*/ + + clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm > bignum) { + i__1 = minmn - 1; + slascl_("G", &c__0, &c__0, &bignum, &anrm, &i__1, &c__1, &rwork[ + ie], &minmn, &ierr); + } + if (anrm < smlnum) { + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm < smlnum) { + i__1 = minmn - 1; + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__1, &c__1, &rwork[ + ie], &minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1].r = (real) maxwrk, work[1].i = 0.f; + + return 0; + +/* End of CGESDD */ + +} /* cgesdd_ */ + +/* Subroutine */ int cgesv_(integer *n, integer *nrhs, complex *a, integer * + lda, integer *ipiv, complex *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int cgetrf_(integer *, integer *, complex *, + integer *, integer *, integer *), xerbla_(char *, integer *), cgetrs_(char *, integer *, integer *, complex *, integer + *, integer *, complex *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGESV computes the solution to a complex system of linear equations + A * X = B, + where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + + The LU decomposition with partial pivoting and row interchanges is + used to factor A as + A = P * L * U, + where P is a permutation matrix, L is unit lower triangular, and U is + upper triangular. The factored form of A is then used to solve the + system of equations A * X = B. + + Arguments + ========= + + N (input) INTEGER + The number of linear equations, i.e., the order of the + matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the N-by-N coefficient matrix A. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (output) INTEGER array, dimension (N) + The pivot indices that define the permutation matrix P; + row i of the matrix was interchanged with row IPIV(i). + + B (input/output) COMPLEX array, dimension (LDB,NRHS) + On entry, the N-by-NRHS matrix of right hand side matrix B. + On exit, if INFO = 0, the N-by-NRHS solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, so the solution could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGESV ", &i__1); + return 0; + } + +/* Compute the LU factorization of A. */ + + cgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + cgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of CGESV */ + +} /* cgesv_ */ + +/* Subroutine */ int cgetf2_(integer *m, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + static integer i__, j, jp; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), cgeru_(integer *, integer *, complex *, complex *, + integer *, complex *, integer *, complex *, integer *); + static real sfmin; + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + extern integer icamax_(integer *, complex *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGETF2 computes an LU factorization of a general m-by-n matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 2 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, U(k,k) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGETF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Compute machine safe minimum */ + + sfmin = slamch_("S"); + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + icamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + i__2 = jp + j * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + cswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + if (c_abs(&a[j + j * a_dim1]) >= sfmin) { + i__2 = *m - j; + c_div(&q__1, &c_b57, &a[j + j * a_dim1]); + cscal_(&i__2, &q__1, &a[j + 1 + j * a_dim1], &c__1); + } else { + i__2 = *m - j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ + j * a_dim1; + c_div(&q__1, &a[j + i__ + j * a_dim1], &a[j + j * + a_dim1]); + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L20: */ + } + } + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < min(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + q__1.r = -1.f, q__1.i = -0.f; + cgeru_(&i__2, &i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, &a[j + + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda) + ; + } +/* L10: */ + } + return 0; + +/* End of CGETF2 */ + +} /* cgetf2_ */ + +/* Subroutine */ int cgetrf_(integer *m, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1; + + /* Local variables */ + static integer i__, j, jb, nb; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + static integer iinfo; + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), cgetf2_(integer *, + integer *, complex *, integer *, integer *, integer *), xerbla_( + char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int claswp_(integer *, complex *, integer *, + integer *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGETRF computes an LU factorization of a general M-by-N matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 3 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the M-by-N matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "CGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + cgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* + Factor diagonal and subdiagonal blocks and test for exact + singularity. +*/ + + i__3 = *m - j + 1; + cgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + claswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + claswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b57, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &q__1, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b57, &a[j + jb + (j + jb) * + a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of CGETRF */ + +} /* cgetrf_ */ + +/* Subroutine */ int cgetrs_(char *trans, integer *n, integer *nrhs, complex * + a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), xerbla_(char *, + integer *), claswp_(integer *, complex *, integer *, + integer *, integer *, integer *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CGETRS solves a system of linear equations + A * X = B, A**T * X = B, or A**H * X = B + with a general N-by-N matrix A using the LU factorization computed + by CGETRF. + + Arguments + ========= + + TRANS (input) CHARACTER*1 + Specifies the form of the system of equations: + = 'N': A * X = B (No transpose) + = 'T': A**T * X = B (Transpose) + = 'C': A**H * X = B (Conjugate transpose) + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) COMPLEX array, dimension (LDA,N) + The factors L and U from the factorization A = P*L*U + as computed by CGETRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (input) INTEGER array, dimension (N) + The pivot indices from CGETRF; for 1<=i<=N, row i of the + matrix was interchanged with row IPIV(i). + + B (input/output) COMPLEX array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGETRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* + Solve A * X = B. + + Apply row interchanges to the right hand sides. +*/ + + claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + ctrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b57, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b57, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A**T * X = B or A**H * X = B. + + Solve U'*X = B, overwriting B with X. +*/ + + ctrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b57, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + ctrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b57, &a[a_offset], + lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of CGETRS */ + +} /* cgetrs_ */ + +/* Subroutine */ int cheevd_(char *jobz, char *uplo, integer *n, complex *a, + integer *lda, real *w, complex *work, integer *lwork, real *rwork, + integer *lrwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static real eps; + static integer inde; + static real anrm; + static integer imax; + static real rmin, rmax; + static integer lopt; + static real sigma; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static integer lwmin, liopt; + static logical lower; + static integer llrwk, lropt; + static logical wantz; + static integer indwk2, llwrk2; + extern doublereal clanhe_(char *, char *, integer *, complex *, integer *, + real *); + static integer iscale; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *, + integer *, complex *, integer *, real *, integer *, integer *, + integer *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int chetrd_(char *, integer *, complex *, integer + *, real *, real *, complex *, complex *, integer *, integer *), clacpy_(char *, integer *, integer *, complex *, integer + *, complex *, integer *); + static real safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *); + static real bignum; + static integer indtau, indrwk, indwrk, liwmin; + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + static integer lrwmin; + extern /* Subroutine */ int cunmtr_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + static integer llwork; + static real smlnum; + static logical lquery; + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CHEEVD computes all eigenvalues and, optionally, eigenvectors of a + complex Hermitian matrix A. If eigenvectors are desired, it uses a + divide and conquer algorithm. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA, N) + On entry, the Hermitian matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of A contains the + upper triangular part of the matrix A. If UPLO = 'L', + the leading N-by-N lower triangular part of A contains + the lower triangular part of the matrix A. + On exit, if JOBZ = 'V', then if INFO = 0, A contains the + orthonormal eigenvectors of the matrix A. + If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') + or the upper triangle (if UPLO='U') of A, including the + diagonal, is destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) REAL array, dimension (N) + If INFO = 0, the eigenvalues in ascending order. + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. + If N <= 1, LWORK must be at least 1. + If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. + If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal sizes of the WORK, RWORK and + IWORK arrays, returns these values as the first entries of + the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + RWORK (workspace/output) REAL array, + dimension (LRWORK) + On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. + + LRWORK (input) INTEGER + The dimension of the array RWORK. + If N <= 1, LRWORK must be at least 1. + If JOBZ = 'N' and N > 1, LRWORK must be at least N. + If JOBZ = 'V' and N > 1, LRWORK must be at least + 1 + 5*N + 2*N**2. + + If LRWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal sizes of the WORK, RWORK + and IWORK arrays, returns these values as the first entries + of the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If N <= 1, LIWORK must be at least 1. + If JOBZ = 'N' and N > 1, LIWORK must be at least 1. + If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal sizes of the WORK, RWORK + and IWORK arrays, returns these values as the first entries + of the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i and JOBZ = 'N', then the algorithm failed + to converge; i off-diagonal elements of an intermediate + tridiagonal form did not converge to zero; + if INFO = i and JOBZ = 'V', then the algorithm failed + to compute an eigenvalue while working on the submatrix + lying in rows and columns INFO/(N+1) through + mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + Modified description of INFO. Sven, 16 Feb 05. + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + lopt = lwmin; + lropt = lrwmin; + liopt = liwmin; + } else { + if (wantz) { + lwmin = (*n << 1) + *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n + 1; + lrwmin = *n; + liwmin = 1; + } +/* Computing MAX */ + i__1 = lwmin, i__2 = *n + ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, + &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lopt = max(i__1,i__2); + lropt = lrwmin; + liopt = liwmin; + } + work[1].r = (real) lopt, work[1].i = 0.f; + rwork[1] = (real) lropt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + clascl_(uplo, &c__0, &c__0, &c_b1034, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call CHETRD to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indwrk = indtau + *n; + indrwk = inde + *n; + indwk2 = indwrk + *n * *n; + llwork = *lwork - indwrk + 1; + llwrk2 = *lwork - indwk2 + 1; + llrwk = *lrwork - indrwk + 1; + chetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + +/* + For eigenvalues only, call SSTERF. For eigenvectors, first call + CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the + tridiagonal matrix, then call CUNMTR to multiply it to the + Householder transformations represented as Householder vectors in + A. +*/ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + cstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], + &llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); + cunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + clacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + + work[1].r = (real) lopt, work[1].i = 0.f; + rwork[1] = (real) lropt; + iwork[1] = liopt; + + return 0; + +/* End of CHEEVD */ + +} /* cheevd_ */ + +/* Subroutine */ int chetd2_(char *uplo, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tau, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + static integer i__; + static complex taui; + extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex * + , integer *, complex *, integer *, complex *, integer *); + static complex alpha; + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex * + , integer *, complex *, integer *, complex *, complex *, integer * + ), caxpy_(integer *, complex *, complex *, integer *, + complex *, integer *); + static logical upper; + extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, + integer *, complex *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CHETD2 reduces a complex Hermitian matrix A to real symmetric + tridiagonal form T by a unitary similarity transformation: + Q' * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the unitary + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the unitary matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) REAL array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) REAL array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) COMPLEX array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETD2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + i__1 = *n + *n * a_dim1; + i__2 = *n + *n * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(1:i-1,i+1) +*/ + + i__1 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__1].r, alpha.i = a[i__1].i; + clarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui); + i__1 = i__; + e[i__1] = alpha.r; + + if (taui.r != 0.f || taui.i != 0.f) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + i__1 = i__ + (i__ + 1) * a_dim1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + chemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b56, &tau[1], &c__1) + ; + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + q__3.r = -.5f, q__3.i = -0.f; + q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r * + taui.i + q__3.i * taui.r; + cdotc_(&q__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1] + , &c__1); + q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * + q__4.i + q__2.i * q__4.r; + alpha.r = q__1.r, alpha.i = q__1.i; + caxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + q__1.r = -1.f, q__1.i = -0.f; + cher2_(uplo, &i__, &q__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, & + tau[1], &c__1, &a[a_offset], lda); + + } else { + i__1 = i__ + i__ * a_dim1; + i__2 = i__ + i__ * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } + i__1 = i__ + (i__ + 1) * a_dim1; + i__2 = i__; + a[i__1].r = e[i__2], a[i__1].i = 0.f; + i__1 = i__ + 1; + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + d__[i__1] = a[i__2].r; + i__1 = i__; + tau[i__1].r = taui.r, tau[i__1].i = taui.i; +/* L10: */ + } + i__1 = a_dim1 + 1; + d__[1] = a[i__1].r; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(i+2:n,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, & + taui); + i__2 = i__; + e[i__2] = alpha.r; + + if (taui.r != 0.f || taui.i != 0.f) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + chemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b56, &tau[ + i__], &c__1); + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + q__3.r = -.5f, q__3.i = -0.f; + q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r * + taui.i + q__3.i * taui.r; + i__2 = *n - i__; + cdotc_(&q__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * + a_dim1], &c__1); + q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * + q__4.i + q__2.i * q__4.r; + alpha.r = q__1.r, alpha.i = q__1.i; + i__2 = *n - i__; + caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + i__2 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cher2_(uplo, &i__2, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1, + &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda); + + } else { + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + i__3 = i__ + 1 + (i__ + 1) * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } + i__2 = i__ + 1 + i__ * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.f; + i__2 = i__; + i__3 = i__ + i__ * a_dim1; + d__[i__2] = a[i__3].r; + i__2 = i__; + tau[i__2].r = taui.r, tau[i__2].i = taui.i; +/* L20: */ + } + i__1 = *n; + i__2 = *n + *n * a_dim1; + d__[i__1] = a[i__2].r; + } + + return 0; + +/* End of CHETD2 */ + +} /* chetd2_ */ + +/* Subroutine */ int chetrd_(char *uplo, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tau, complex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1; + + /* Local variables */ + static integer i__, j, nb, kk, nx, iws; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + static logical upper; + extern /* Subroutine */ int chetd2_(char *, integer *, complex *, integer + *, real *, real *, complex *, integer *), cher2k_(char *, + char *, integer *, integer *, complex *, complex *, integer *, + complex *, integer *, real *, complex *, integer *), clatrd_(char *, integer *, integer *, complex *, integer + *, real *, complex *, complex *, integer *), xerbla_(char + *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CHETRD reduces a complex Hermitian matrix A to real symmetric + tridiagonal form T by a unitary similarity transformation: + Q**H * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the unitary + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the unitary matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) REAL array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) REAL array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) COMPLEX array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "CHETRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code by setting NX = N. + + Computing MAX +*/ + i__1 = *lwork / ldwork; + nb = max(i__1,1); + nbmin = ilaenv_(&c__2, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* + Reduce the upper triangle of A. + Columns 1:kk are handled by the unblocked method. +*/ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = i__ + nb - 1; + clatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork); + +/* + Update the unreduced submatrix A(1:i-1,1:i-1), using an + update of the form: A := A - V*W' - W*V' +*/ + + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ * a_dim1 + + 1], lda, &work[1], &ldwork, &c_b1034, &a[a_offset], lda); + +/* + Copy superdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j - 1 + j * a_dim1; + i__5 = j - 1; + a[i__4].r = e[i__5], a[i__4].i = 0.f; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + chetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = *n - i__ + 1; + clatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork); + +/* + Update the unreduced submatrix A(i+nb:n,i+nb:n), using + an update of the form: A := A - V*W' - W*V' +*/ + + i__3 = *n - i__ - nb + 1; + q__1.r = -1.f, q__1.i = -0.f; + cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b1034, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda); + +/* + Copy subdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + 1 + j * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.f; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + chetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo); + } + + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CHETRD */ + +} /* chetrd_ */ + +/* Subroutine */ int chseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__, + integer *ldz, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2]; + real r__1, r__2, r__3; + complex q__1; + char ch__1[2]; + + /* Local variables */ + static complex hl[2401] /* was [49][49] */; + static integer kbot, nmin; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *); + static logical initz; + static complex workl[49]; + static logical wantt, wantz; + extern /* Subroutine */ int claqr0_(logical *, logical *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + integer *, complex *, integer *, complex *, integer *, integer *), + clahqr_(logical *, logical *, integer *, integer *, integer *, + complex *, integer *, complex *, integer *, integer *, complex *, + integer *, integer *), clacpy_(char *, integer *, integer *, + complex *, integer *, complex *, integer *), claset_(char + *, integer *, integer *, complex *, complex *, complex *, integer + *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical lquery; + + +/* + -- LAPACK computational routine (version 3.2.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + June 2010 + + Purpose + ======= + + CHSEQR computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**H, where T is an upper triangular matrix (the + Schur form), and Z is the unitary matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input unitary + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + Arguments + ========= + + JOB (input) CHARACTER*1 + = 'E': compute eigenvalues only; + = 'S': compute eigenvalues and the Schur form T. + + COMPZ (input) CHARACTER*1 + = 'N': no Schur vectors are computed; + = 'I': Z is initialized to the unit matrix and the matrix Z + of Schur vectors of H is returned; + = 'V': Z must contain an unitary matrix Q on entry, and + the product Q*Z is returned. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to CGEBAL, and then passed to CGEHRD + when the matrix output by CGEBAL is reduced to Hessenberg + form. Otherwise ILO and IHI should be set to 1 and N + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) COMPLEX array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and JOB = 'S', H contains the upper + triangular matrix T from the Schur decomposition (the + Schur form). If INFO = 0 and JOB = 'E', the contents of + H are unspecified on exit. (The output value of H when + INFO.GT.0 is given under the description of INFO below.) + + Unlike earlier versions of CHSEQR, this subroutine may + explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 + or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + W (output) COMPLEX array, dimension (N) + The computed eigenvalues. If JOB = 'S', the eigenvalues are + stored in the same order as on the diagonal of the Schur + form returned in H, with W(i) = H(i,i). + + Z (input/output) COMPLEX array, dimension (LDZ,N) + If COMPZ = 'N', Z is not referenced. + If COMPZ = 'I', on entry Z need not be set and on exit, + if INFO = 0, Z contains the unitary matrix Z of the Schur + vectors of H. If COMPZ = 'V', on entry Z must contain an + N-by-N matrix Q, which is assumed to be equal to the unit + matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, + if INFO = 0, Z contains Q*Z. + Normally Q is the unitary matrix generated by CUNGHR + after the call to CGEHRD which formed the Hessenberg matrix + H. (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if COMPZ = 'I' or + COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient and delivers very good and sometimes + optimal performance. However, LWORK as large as 11*N + may be required for optimal performance. A workspace + query is recommended to determine the optimal workspace + size. + + If LWORK = -1, then CHSEQR does a workspace query. + In this case, CHSEQR checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .LT. 0: if INFO = -i, the i-th argument had an illegal + value + .GT. 0: if INFO = i, CHSEQR failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and JOB = 'E', then on exit, the + remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and JOB = 'S', then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is a unitary matrix. The final + value of H is upper Hessenberg and triangular in + rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and COMPZ = 'V', then on exit + + (final value of Z) = (initial value of Z)*U + + where U is the unitary matrix in (*) (regard- + less of the value of JOB.) + + If INFO .GT. 0 and COMPZ = 'I', then on exit + (final value of Z) = U + where U is the unitary matrix in (*) (regard- + less of the value of JOB.) + + If INFO .GT. 0 and COMPZ = 'N', then Z is not + accessed. + + ================================================================ + Default values supplied by + ILAENV(ISPEC,'CHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). + It is suggested that these defaults be adjusted in order + to attain best performance in each particular + computational environment. + + ISPEC=12: The CLAHQR vs CLAQR0 crossover point. + Default: 75. (Must be at least 11.) + + ISPEC=13: Recommended deflation window size. + This depends on ILO, IHI and NS. NS is the + number of simultaneous shifts returned + by ILAENV(ISPEC=15). (See ISPEC=15 below.) + The default for (IHI-ILO+1).LE.500 is NS. + The default for (IHI-ILO+1).GT.500 is 3*NS/2. + + ISPEC=14: Nibble crossover point. (See IPARMQ for + details.) Default: 14% of deflation window + size. + + ISPEC=15: Number of simultaneous shifts in a multishift + QR iteration. + + If IHI-ILO+1 is ... + + greater than ...but less ... the + or equal to ... than default is + + 1 30 NS = 2(+) + 30 60 NS = 4(+) + 60 150 NS = 10(+) + 150 590 NS = ** + 590 3000 NS = 64 + 3000 6000 NS = 128 + 6000 infinity NS = 256 + + (+) By default some or all matrices of this order + are passed to the implicit double shift routine + CLAHQR and this parameter is ignored. See + ISPEC=12 above and comments in IPARMQ for + details. + + (**) The asterisks (**) indicate an ad-hoc + function of N increasing from 10 to 64. + + ISPEC=16: Select structured matrix multiply. + If the number of simultaneous shifts (specified + by ISPEC=15) is less than 14, then the default + for ISPEC=16 is 0. Otherwise the default for + ISPEC=16 is 2. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . CLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== NL allocates some local workspace to help small matrices + . through a rare CLAHQR failure. NL .GT. NTINY = 11 is + . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom- + . mended. (The default value of NMIN is 75.) Using NL = 49 + . allows up to six simultaneous shifts and a 16-by-16 + . deflation window. ==== + + ==== Decode and check the input parameters. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantt = lsame_(job, "S"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + r__1 = (real) max(1,*n); + q__1.r = r__1, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(job, "E") && ! wantt) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1,*n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { + *info = -10; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -12; + } + + if (*info != 0) { + +/* ==== Quick return in case of invalid argument. ==== */ + + i__1 = -(*info); + xerbla_("CHSEQR", &i__1); + return 0; + + } else if (*n == 0) { + +/* ==== Quick return in case N = 0; nothing to do. ==== */ + + return 0; + + } else if (lquery) { + +/* ==== Quick return in case of a workspace query ==== */ + + claqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo, + ihi, &z__[z_offset], ldz, &work[1], lwork, info); +/* + ==== Ensure reported workspace size is backward-compatible with + . previous LAPACK versions. ==== + Computing MAX +*/ + r__2 = work[1].r, r__3 = (real) max(1,*n); + r__1 = dmax(r__2,r__3); + q__1.r = r__1, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + return 0; + + } else { + +/* ==== copy eigenvalues isolated by CGEBAL ==== */ + + if (*ilo > 1) { + i__1 = *ilo - 1; + i__2 = *ldh + 1; + ccopy_(&i__1, &h__[h_offset], &i__2, &w[1], &c__1); + } + if (*ihi < *n) { + i__1 = *n - *ihi; + i__2 = *ldh + 1; + ccopy_(&i__1, &h__[*ihi + 1 + (*ihi + 1) * h_dim1], &i__2, &w[* + ihi + 1], &c__1); + } + +/* ==== Initialize Z, if requested ==== */ + + if (initz) { + claset_("A", n, n, &c_b56, &c_b57, &z__[z_offset], ldz) + ; + } + +/* ==== Quick return if possible ==== */ + + if (*ilo == *ihi) { + i__1 = *ilo; + i__2 = *ilo + *ilo * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + return 0; + } + +/* + ==== CLAHQR/CLAQR0 crossover point ==== + + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = job; + i__3[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + nmin = ilaenv_(&c__12, "CHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nmin = max(11,nmin); + +/* ==== CLAQR0 for big matrices; CLAHQR for small ones ==== */ + + if (*n > nmin) { + claqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], + ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info); + } else { + +/* ==== Small matrix ==== */ + + clahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], + ilo, ihi, &z__[z_offset], ldz, info); + + if (*info > 0) { + +/* + ==== A rare CLAHQR failure! CLAQR0 sometimes succeeds + . when CLAHQR fails. ==== +*/ + + kbot = *info; + + if (*n >= 49) { + +/* + ==== Larger matrices have enough subdiagonal scratch + . space to call CLAQR0 directly. ==== +*/ + + claqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], + ldh, &w[1], ilo, ihi, &z__[z_offset], ldz, &work[ + 1], lwork, info); + + } else { + +/* + ==== Tiny matrices don't have enough subdiagonal + . scratch space to benefit from CLAQR0. Hence, + . tiny matrices must be copied into a larger + . array before calling CLAQR0. ==== +*/ + + clacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49); + i__1 = *n + 1 + *n * 49 - 50; + hl[i__1].r = 0.f, hl[i__1].i = 0.f; + i__1 = 49 - *n; + claset_("A", &c__49, &i__1, &c_b56, &c_b56, &hl[(*n + 1) * + 49 - 49], &c__49); + claqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, & + w[1], ilo, ihi, &z__[z_offset], ldz, workl, & + c__49, info); + if (wantt || *info != 0) { + clacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh); + } + } + } + } + +/* ==== Clear out the trash, if necessary. ==== */ + + if ((wantt || *info != 0) && *n > 2) { + i__1 = *n - 2; + i__2 = *n - 2; + claset_("L", &i__1, &i__2, &c_b56, &c_b56, &h__[h_dim1 + 3], ldh); + } + +/* + ==== Ensure reported workspace size is backward-compatible with + . previous LAPACK versions. ==== + + Computing MAX +*/ + r__2 = (real) max(1,*n), r__3 = work[1].r; + r__1 = dmax(r__2,r__3); + q__1.r = r__1, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + } + +/* ==== End of CHSEQR ==== */ + + return 0; +} /* chseqr_ */ + +/* Subroutine */ int clabrd_(integer *m, integer *n, integer *nb, complex *a, + integer *lda, real *d__, real *e, complex *tauq, complex *taup, + complex *x, integer *ldx, complex *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, + i__3; + complex q__1; + + /* Local variables */ + static integer i__; + static complex alpha; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), cgemv_(char *, integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, complex *, + integer *), clarfg_(integer *, complex *, complex *, + integer *, complex *), clacgv_(integer *, complex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLABRD reduces the first NB rows and columns of a complex general + m by n matrix A to upper or lower real bidiagonal form by a unitary + transformation Q' * A * P, and returns the matrices X and Y which + are needed to apply the transformation to the unreduced part of A. + + If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + bidiagonal form. + + This is an auxiliary routine called by CGEBRD + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. + + N (input) INTEGER + The number of columns in the matrix A. + + NB (input) INTEGER + The number of leading rows and columns of A to be reduced. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, the first NB rows and columns of the matrix are + overwritten; the rest of the array is unchanged. + If m >= n, elements on and below the diagonal in the first NB + columns, with the array TAUQ, represent the unitary + matrix Q as a product of elementary reflectors; and + elements above the diagonal in the first NB rows, with the + array TAUP, represent the unitary matrix P as a product + of elementary reflectors. + If m < n, elements below the diagonal in the first NB + columns, with the array TAUQ, represent the unitary + matrix Q as a product of elementary reflectors, and + elements on and above the diagonal in the first NB rows, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (NB) + The diagonal elements of the first NB rows and columns of + the reduced matrix. D(i) = A(i,i). + + E (output) REAL array, dimension (NB) + The off-diagonal elements of the first NB rows and columns of + the reduced matrix. + + TAUQ (output) COMPLEX array dimension (NB) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX array, dimension (NB) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + X (output) COMPLEX array, dimension (LDX,NB) + The m-by-nb matrix X required to update the unreduced part + of A. + + LDX (input) INTEGER + The leading dimension of the array X. LDX >= max(1,M). + + Y (output) COMPLEX array, dimension (LDY,NB) + The n-by-nb matrix Y required to update the unreduced part + of A. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= max(1,N). + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors. + + If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in + A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The elements of the vectors v and u together form the m-by-nb matrix + V and the nb-by-n matrix U' which are needed, with X and Y, to apply + the transformation to the unreduced part of the matrix, using a block + update of the form: A := A - V*Y' - X*U'. + + The contents of A on exit are illustrated by the following examples + with nb = 2: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) + ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) + ( v1 v2 a a a ) ( v1 1 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix which is unchanged, + vi denotes an element of the vector defining H(i), and ui an element + of the vector defining G(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:m,i) */ + + i__2 = i__ - 1; + clacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda, + &y[i__ + y_dim1], ldy, &c_b57, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + clacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + x_dim1], ldx, + &a[i__ * a_dim1 + 1], &c__1, &c_b57, &a[i__ + i__ * + a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & + tauq[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + ( + i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], & + c__1, &c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b56, & + y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b57, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &x[i__ + + x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b56, & + y[i__ * y_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ + + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & + c_b57, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + +/* Update A(i,i+1:n) */ + + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + clacgv_(&i__, &a[i__ + a_dim1], lda); + i__2 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__, &q__1, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b57, &a[i__ + + (i__ + 1) * a_dim1], lda); + clacgv_(&i__, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &x[i__ + x_dim1], ldx); + i__2 = i__ - 1; + i__3 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ + + 1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b57, + &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &x[i__ + x_dim1], ldx); + +/* Generate reflection P(i) to annihilate A(i,i+2:n) */ + + i__2 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + (i__ + 1) * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + cgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ + 1 + ( + i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], + lda, &c_b56, &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + cgemv_("Conjugate transpose", &i__2, &i__, &c_b57, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b56, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__, &q__1, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + cgemv_("No transpose", &i__2, &i__3, &c_b57, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b56, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i,i:n) */ + + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + y_dim1], ldy, + &a[i__ + a_dim1], lda, &c_b57, &a[i__ + i__ * a_dim1], + lda); + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &x[i__ + x_dim1], ldx); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[i__ * + a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b57, &a[i__ + + i__ * a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &x[i__ + x_dim1], ldx); + +/* Generate reflection P(i) to annihilate A(i,i+1:n) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + if (i__ < *m) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; + cgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ + 1 + i__ + * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b56, & + x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &y[i__ + + y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b56, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + cgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ * a_dim1 + + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b56, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + +/* Update A(i+1:m,i) */ + + i__2 = i__ - 1; + clacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 + + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b57, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = i__ - 1; + clacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__, &q__1, &x[i__ + 1 + + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b57, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, + &tauq[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * + a_dim1], &c__1, &c_b56, &y[i__ + 1 + i__ * y_dim1], & + c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b56, &y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b57, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + cgemv_("Conjugate transpose", &i__2, &i__, &c_b57, &x[i__ + 1 + + x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b56, &y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("Conjugate transpose", &i__, &i__2, &q__1, &a[(i__ + 1) + * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & + c_b57, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + } else { + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + } +/* L20: */ + } + } + return 0; + +/* End of CLABRD */ + +} /* clabrd_ */ + +/* Subroutine */ int clacgv_(integer *n, complex *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + complex q__1; + + /* Local variables */ + static integer i__, ioff; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLACGV conjugates a complex vector of length N. + + Arguments + ========= + + N (input) INTEGER + The length of the vector X. N >= 0. + + X (input/output) COMPLEX array, dimension + (1+(N-1)*abs(INCX)) + On entry, the vector of length N to be conjugated. + On exit, X is overwritten with conjg(X). + + INCX (input) INTEGER + The spacing between successive elements of X. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*incx == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + r_cnjg(&q__1, &x[i__]); + x[i__2].r = q__1.r, x[i__2].i = q__1.i; +/* L10: */ + } + } else { + ioff = 1; + if (*incx < 0) { + ioff = 1 - (*n - 1) * *incx; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ioff; + r_cnjg(&q__1, &x[ioff]); + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + ioff += *incx; +/* L20: */ + } + } + return 0; + +/* End of CLACGV */ + +} /* clacgv_ */ + +/* Subroutine */ int clacp2_(char *uplo, integer *m, integer *n, real *a, + integer *lda, complex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLACP2 copies all or part of a real two-dimensional matrix A to a + complex matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) REAL array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper trapezium + is accessed; if UPLO = 'L', only the lower trapezium is + accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) COMPLEX array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.f; +/* L30: */ + } +/* L40: */ + } + + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.f; +/* L50: */ + } +/* L60: */ + } + } + + return 0; + +/* End of CLACP2 */ + +} /* clacp2_ */ + +/* Subroutine */ int clacpy_(char *uplo, integer *m, integer *n, complex *a, + integer *lda, complex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLACPY copies all or part of a two-dimensional matrix A to another + matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) COMPLEX array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper trapezium + is accessed; if UPLO = 'L', only the lower trapezium is + accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) COMPLEX array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L30: */ + } +/* L40: */ + } + + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L50: */ + } +/* L60: */ + } + } + + return 0; + +/* End of CLACPY */ + +} /* clacpy_ */ + +/* Subroutine */ int clacrm_(integer *m, integer *n, complex *a, integer *lda, + real *b, integer *ldb, complex *c__, integer *ldc, real *rwork) +{ + /* System generated locals */ + integer b_dim1, b_offset, a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5; + real r__1; + complex q__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLACRM performs a very simple matrix-matrix multiplication: + C := A * B, + where A is M by N and complex; B is N by N and real; + C is M by N and complex. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A and of the matrix C. + M >= 0. + + N (input) INTEGER + The number of columns and rows of the matrix B and + the number of columns of the matrix C. + N >= 0. + + A (input) COMPLEX array, dimension (LDA, N) + A contains the M by N matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >=max(1,M). + + B (input) REAL array, dimension (LDB, N) + B contains the N by N matrix B. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >=max(1,N). + + C (input) COMPLEX array, dimension (LDC, N) + C contains the M by N matrix C. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >=max(1,N). + + RWORK (workspace) REAL array, dimension (2*M*N) + + ===================================================================== + + + Quick return if possible. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --rwork; + + /* Function Body */ + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + rwork[(j - 1) * *m + i__] = a[i__3].r; +/* L10: */ + } +/* L20: */ + } + + l = *m * *n + 1; + sgemm_("N", "N", m, n, n, &c_b1034, &rwork[1], m, &b[b_offset], ldb, & + c_b328, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = l + (j - 1) * *m + i__ - 1; + c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f; +/* L30: */ + } +/* L40: */ + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + rwork[(j - 1) * *m + i__] = r_imag(&a[i__ + j * a_dim1]); +/* L50: */ + } +/* L60: */ + } + sgemm_("N", "N", m, n, n, &c_b1034, &rwork[1], m, &b[b_offset], ldb, & + c_b328, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + r__1 = c__[i__4].r; + i__5 = l + (j - 1) * *m + i__ - 1; + q__1.r = r__1, q__1.i = rwork[i__5]; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L70: */ + } +/* L80: */ + } + + return 0; + +/* End of CLACRM */ + +} /* clacrm_ */ + +/* Complex */ VOID cladiv_(complex * ret_val, complex *x, complex *y) +{ + /* System generated locals */ + real r__1, r__2, r__3, r__4; + complex q__1; + + /* Local variables */ + static real zi, zr; + extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real * + , real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLADIV := X / Y, where X and Y are complex. The computation of X / Y + will not overflow on an intermediary step unless the results + overflows. + + Arguments + ========= + + X (input) COMPLEX + Y (input) COMPLEX + The complex scalars X and Y. + + ===================================================================== +*/ + + + r__1 = x->r; + r__2 = r_imag(x); + r__3 = y->r; + r__4 = r_imag(y); + sladiv_(&r__1, &r__2, &r__3, &r__4, &zr, &zi); + q__1.r = zr, q__1.i = zi; + ret_val->r = q__1.r, ret_val->i = q__1.i; + + return ; + +/* End of CLADIV */ + +} /* cladiv_ */ + +/* Subroutine */ int claed0_(integer *qsiz, integer *n, real *d__, real *e, + complex *q, integer *ldq, complex *qstore, integer *ldqs, real *rwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static integer i__, j, k, ll, iq, lgn, msd2, smm1, spm1, spm2; + static real temp; + static integer curr, iperm; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *); + static integer indxq, iwrem; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + static integer iqptr; + extern /* Subroutine */ int claed7_(integer *, integer *, integer *, + integer *, integer *, integer *, real *, complex *, integer *, + real *, integer *, real *, integer *, integer *, integer *, + integer *, integer *, real *, complex *, real *, integer *, + integer *); + static integer tlvls; + extern /* Subroutine */ int clacrm_(integer *, integer *, complex *, + integer *, real *, integer *, complex *, integer *, real *); + static integer igivcl; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer igivnm, submat, curprb, subpbs, igivpt, curlvl, matsiz, + iprmpt, smlsiz; + extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, + real *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + Using the divide and conquer method, CLAED0 computes all eigenvalues + of a symmetric tridiagonal matrix which is one diagonal block of + those from reducing a dense or band Hermitian matrix and + corresponding eigenvectors of the dense or band matrix. + + Arguments + ========= + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the off-diagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Q (input/output) COMPLEX array, dimension (LDQ,N) + On entry, Q must contain an QSIZ x N matrix whose columns + unitarily orthonormal. It is a part of the unitary matrix + that reduces the full dense Hermitian matrix to a + (reducible) symmetric tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + IWORK (workspace) INTEGER array, + the dimension of IWORK must be at least + 6 + 6*N + 5*N*lg N + ( lg( N ) = smallest integer k + such that 2^k >= N ) + + RWORK (workspace) REAL array, + dimension (1 + 3*N + 2*N*lg N + 3*N**2) + ( lg( N ) = smallest integer k + such that 2^k >= N ) + + QSTORE (workspace) COMPLEX array, dimension (LDQS, N) + Used to store parts of + the eigenvector matrix when the updating matrix multiplies + take place. + + LDQS (input) INTEGER + The leading dimension of the array QSTORE. + LDQS >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + ===================================================================== + + Warning: N could be as big as QSIZ! + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + qstore_dim1 = *ldqs; + qstore_offset = 1 + qstore_dim1; + qstore -= qstore_offset; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + +/* + IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN + INFO = -1 + ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) ) + $ THEN +*/ + if (*qsiz < max(0,*n)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } else if (*ldqs < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLAED0", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + smlsiz = ilaenv_(&c__9, "CLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + Determine the size and placement of the submatrices, and save in + the leading elements of IWORK. +*/ + + iwork[1] = *n; + subpbs = 1; + tlvls = 0; +L10: + if (iwork[subpbs] > smlsiz) { + for (j = subpbs; j >= 1; --j) { + iwork[j * 2] = (iwork[j] + 1) / 2; + iwork[(j << 1) - 1] = iwork[j] / 2; +/* L20: */ + } + ++tlvls; + subpbs <<= 1; + goto L10; + } + i__1 = subpbs; + for (j = 2; j <= i__1; ++j) { + iwork[j] += iwork[j - 1]; +/* L30: */ + } + +/* + Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 + using rank-1 modifications (cuts). +*/ + + spm1 = subpbs - 1; + i__1 = spm1; + for (i__ = 1; i__ <= i__1; ++i__) { + submat = iwork[i__] + 1; + smm1 = submat - 1; + d__[smm1] -= (r__1 = e[smm1], dabs(r__1)); + d__[submat] -= (r__1 = e[smm1], dabs(r__1)); +/* L40: */ + } + + indxq = (*n << 2) + 3; + +/* + Set up workspaces for eigenvalues only/accumulate new vectors + routine +*/ + + temp = log((real) (*n)) / log(2.f); + lgn = (integer) temp; + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; + iperm = iprmpt + *n * lgn; + iqptr = iperm + *n * lgn; + igivpt = iqptr + *n + 2; + igivcl = igivpt + *n * lgn; + + igivnm = 1; + iq = igivnm + (*n << 1) * lgn; +/* Computing 2nd power */ + i__1 = *n; + iwrem = iq + i__1 * i__1 + 1; +/* Initialize pointers */ + i__1 = subpbs; + for (i__ = 0; i__ <= i__1; ++i__) { + iwork[iprmpt + i__] = 1; + iwork[igivpt + i__] = 1; +/* L50: */ + } + iwork[iqptr] = 1; + +/* + Solve each submatrix eigenproblem at the bottom of the divide and + conquer tree. +*/ + + curr = 0; + i__1 = spm1; + for (i__ = 0; i__ <= i__1; ++i__) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[1]; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 1] - iwork[i__]; + } + ll = iq - 1 + iwork[iqptr + curr]; + ssteqr_("I", &matsiz, &d__[submat], &e[submat], &rwork[ll], &matsiz, & + rwork[1], info); + clacrm_(qsiz, &matsiz, &q[submat * q_dim1 + 1], ldq, &rwork[ll], & + matsiz, &qstore[submat * qstore_dim1 + 1], ldqs, &rwork[iwrem] + ); +/* Computing 2nd power */ + i__2 = matsiz; + iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; + ++curr; + if (*info > 0) { + *info = submat * (*n + 1) + submat + matsiz - 1; + return 0; + } + k = 1; + i__2 = iwork[i__ + 1]; + for (j = submat; j <= i__2; ++j) { + iwork[indxq + j] = k; + ++k; +/* L60: */ + } +/* L70: */ + } + +/* + Successively merge eigensystems of adjacent submatrices + into eigensystem for the corresponding larger matrix. + + while ( SUBPBS > 1 ) +*/ + + curlvl = 1; +L80: + if (subpbs > 1) { + spm2 = subpbs - 2; + i__1 = spm2; + for (i__ = 0; i__ <= i__1; i__ += 2) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[2]; + msd2 = iwork[1]; + curprb = 0; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 2] - iwork[i__]; + msd2 = matsiz / 2; + ++curprb; + } + +/* + Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) + into an eigensystem of size MATSIZ. CLAED7 handles the case + when the eigenvectors of a full or band Hermitian matrix (which + was reduced to tridiagonal form) are desired. + + I am free to use Q as a valuable working space until Loop 150. +*/ + + claed7_(&matsiz, &msd2, qsiz, &tlvls, &curlvl, &curprb, &d__[ + submat], &qstore[submat * qstore_dim1 + 1], ldqs, &e[ + submat + msd2 - 1], &iwork[indxq + submat], &rwork[iq], & + iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[ + igivpt], &iwork[igivcl], &rwork[igivnm], &q[submat * + q_dim1 + 1], &rwork[iwrem], &iwork[subpbs + 1], info); + if (*info > 0) { + *info = submat * (*n + 1) + submat + matsiz - 1; + return 0; + } + iwork[i__ / 2 + 1] = iwork[i__ + 2]; +/* L90: */ + } + subpbs /= 2; + ++curlvl; + goto L80; + } + +/* + end while + + Re-merge the eigenvalues/vectors which were deflated at the final + merge step. +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + rwork[i__] = d__[j]; + ccopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1] + , &c__1); +/* L100: */ + } + scopy_(n, &rwork[1], &c__1, &d__[1], &c__1); + + return 0; + +/* End of CLAED0 */ + +} /* claed0_ */ + +/* Subroutine */ int claed7_(integer *n, integer *cutpnt, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, real *d__, complex * + q, integer *ldq, real *rho, integer *indxq, real *qstore, integer * + qptr, integer *prmptr, integer *perm, integer *givptr, integer * + givcol, real *givnum, complex *work, real *rwork, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Local variables */ + static integer i__, k, n1, n2, iq, iw, iz, ptr, indx, curr, indxc, indxp; + extern /* Subroutine */ int claed8_(integer *, integer *, integer *, + complex *, integer *, real *, real *, integer *, real *, real *, + complex *, integer *, real *, integer *, integer *, integer *, + integer *, integer *, integer *, real *, integer *), slaed9_( + integer *, integer *, integer *, integer *, real *, real *, + integer *, real *, real *, real *, real *, integer *, integer *), + slaeda_(integer *, integer *, integer *, integer *, integer *, + integer *, integer *, integer *, real *, real *, integer *, real * + , real *, integer *); + static integer idlmda; + extern /* Subroutine */ int clacrm_(integer *, integer *, complex *, + integer *, real *, integer *, complex *, integer *, real *), + xerbla_(char *, integer *), slamrg_(integer *, integer *, + real *, integer *, integer *, integer *); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLAED7 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and optionally eigenvectors of a dense or banded + Hermitian matrix that has been reduced to tridiagonal form. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine SLAED2. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine SLAED4 (as called by SLAED3). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= curlvl <= tlvls. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + D (input/output) REAL array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) COMPLEX array, dimension (LDQ,N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + RHO (input) REAL + Contains the subdiagonal element used to create the rank-1 + modification. + + INDXQ (output) INTEGER array, dimension (N) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, + ie. D( INDXQ( I = 1, N ) ) will be in ascending order. + + IWORK (workspace) INTEGER array, dimension (4*N) + + RWORK (workspace) REAL array, + dimension (3*N+2*QSIZ*N) + + WORK (workspace) COMPLEX array, dimension (QSIZ*N) + + QSTORE (input/output) REAL array, dimension (N**2+1) + Stores eigenvectors of submatrices encountered during + divide and conquer, packed together. QPTR points to + beginning of the submatrices. + + QPTR (input/output) INTEGER array, dimension (N+2) + List of indices pointing to beginning of submatrices stored + in QSTORE. The submatrices are numbered starting at the + bottom left of the divide and conquer tree, from left to + right and bottom to top. + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and also the size of + the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) REAL array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --qstore; + --qptr; + --prmptr; + --perm; + --givptr; + givcol -= 3; + givnum -= 3; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + +/* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN +*/ + if (*n < 0) { + *info = -1; + } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { + *info = -2; + } else if (*qsiz < *n) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLAED7", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in SLAED2 and SLAED3. +*/ + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq = iw + *n; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; + ptr += pow_ii(&c__2, &i__2); +/* L10: */ + } + curr = ptr + *curpbm; + slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & + givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[ + iz + *n], info); + +/* + When solving the final problem, we no longer need the stored data, + so we will overwrite the data from this level onto the previously + used storage space. +*/ + + if (*curlvl == *tlvls) { + qptr[curr] = 1; + prmptr[curr] = 1; + givptr[curr] = 1; + } + +/* Sort and Deflate eigenvalues. */ + + claed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz], + &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[ + indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[ + (givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], info); + prmptr[curr + 1] = prmptr[curr] + *n; + givptr[curr + 1] += givptr[curr]; + +/* Solve Secular Equation. */ + + if (k != 0) { + slaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda] + , &rwork[iw], &qstore[qptr[curr]], &k, info); + clacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[ + q_offset], ldq, &rwork[iq]); +/* Computing 2nd power */ + i__1 = k; + qptr[curr + 1] = qptr[curr] + i__1 * i__1; + if (*info != 0) { + return 0; + } + +/* Prepare the INDXQ sorting premutation. */ + + n1 = k; + n2 = *n - k; + slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + qptr[curr + 1] = qptr[curr]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L20: */ + } + } + + return 0; + +/* End of CLAED7 */ + +} /* claed7_ */ + +/* Subroutine */ int claed8_(integer *k, integer *n, integer *qsiz, complex * + q, integer *ldq, real *d__, real *rho, integer *cutpnt, real *z__, + real *dlamda, complex *q2, integer *ldq2, real *w, integer *indxp, + integer *indx, integer *indxq, integer *perm, integer *givptr, + integer *givcol, real *givnum, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; + real r__1; + + /* Local variables */ + static real c__; + static integer i__, j; + static real s, t; + static integer k2, n1, n2, jp, n1p1; + static real eps, tau, tol; + static integer jlam, imax, jmax; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + ccopy_(integer *, complex *, integer *, complex *, integer *), + csrot_(integer *, complex *, integer *, complex *, integer *, + real *, real *), scopy_(integer *, real *, integer *, real *, + integer *); + extern doublereal slapy2_(real *, real *), slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), xerbla_(char *, + integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer + *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + CLAED8 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny element in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + K (output) INTEGER + Contains the number of non-deflated eigenvalues. + This is the order of the related secular equation. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the dense or band matrix to tridiagonal form. + QSIZ >= N if ICOMPQ = 1. + + Q (input/output) COMPLEX array, dimension (LDQ,N) + On entry, Q contains the eigenvectors of the partially solved + system which has been previously updated in matrix + multiplies with other partially solved eigensystems. + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max( 1, N ). + + D (input/output) REAL array, dimension (N) + On entry, D contains the eigenvalues of the two submatrices to + be combined. On exit, D contains the trailing (N-K) updated + eigenvalues (those which were deflated) sorted into increasing + order. + + RHO (input/output) REAL + Contains the off diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. RHO is modified during the computation to + the value required by SLAED3. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. MIN(1,N) <= CUTPNT <= N. + + Z (input) REAL array, dimension (N) + On input this vector contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). The contents of Z are + destroyed during the updating process. + + DLAMDA (output) REAL array, dimension (N) + Contains a copy of the first K eigenvalues which will be used + by SLAED3 to form the secular equation. + + Q2 (output) COMPLEX array, dimension (LDQ2,N) + If ICOMPQ = 0, Q2 is not referenced. Otherwise, + Contains a copy of the first K eigenvectors which will be used + by SLAED7 in a matrix multiply (SGEMM) to update the new + eigenvectors. + + LDQ2 (input) INTEGER + The leading dimension of the array Q2. LDQ2 >= max( 1, N ). + + W (output) REAL array, dimension (N) + This will hold the first k values of the final + deflation-altered z-vector and will be passed to SLAED3. + + INDXP (workspace) INTEGER array, dimension (N) + This will contain the permutation used to place deflated + values of D at the end of the array. On output INDXP(1:K) + points to the nondeflated D-values and INDXP(K+1:N) + points to the deflated eigenvalues. + + INDX (workspace) INTEGER array, dimension (N) + This will contain the permutation used to sort the contents of + D into ascending order. + + INDXQ (input) INTEGER array, dimension (N) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that elements in + the second half of this permutation must first have CUTPNT + added to their values in order to be accurate. + + PERM (output) INTEGER array, dimension (N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (output) INTEGER + Contains the number of Givens rotations which took place in + this subproblem. + + GIVCOL (output) INTEGER array, dimension (2, N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (output) REAL array, dimension (2, N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --d__; + --z__; + --dlamda; + q2_dim1 = *ldq2; + q2_offset = 1 + q2_dim1; + q2 -= q2_offset; + --w; + --indxp; + --indx; + --indxq; + --perm; + givcol -= 3; + givnum -= 3; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -2; + } else if (*qsiz < *n) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -5; + } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { + *info = -8; + } else if (*ldq2 < max(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLAED8", &i__1); + return 0; + } + +/* + Need to initialize GIVPTR to O here in case of quick exit + to prevent an unspecified code behavior (usually sigfault) + when IWORK array on entry to *stedc is not zeroed + (or at least some IWORK entries which used in *laed7 for GIVPTR). +*/ + + *givptr = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n1 = *cutpnt; + n2 = *n - n1; + n1p1 = n1 + 1; + + if (*rho < 0.f) { + sscal_(&n2, &c_b1276, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1 */ + + t = 1.f / sqrt(2.f); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + indx[j] = j; +/* L10: */ + } + sscal_(n, &t, &z__[1], &c__1); + *rho = (r__1 = *rho * 2.f, dabs(r__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { + indxq[i__] += *cutpnt; +/* L20: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; + w[i__] = z__[indxq[i__]]; +/* L30: */ + } + i__ = 1; + j = *cutpnt + 1; + slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = dlamda[indx[i__]]; + z__[i__] = w[indx[i__]]; +/* L40: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = isamax_(n, &z__[1], &c__1); + jmax = isamax_(n, &d__[1], &c__1); + eps = slamch_("Epsilon"); + tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1)); + +/* + If the rank-1 modifier is small enough, no more needs to be done + -- except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { + *k = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; + ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] + , &c__1); +/* L50: */ + } + clacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); + return 0; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + if (j == *n) { + goto L100; + } + } else { + jlam = j; + goto L70; + } +/* L60: */ + } +L70: + ++j; + if (j > *n) { + goto L90; + } + if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[jlam]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = slapy2_(&c__, &s); + t = d__[j] - d__[jlam]; + c__ /= tau; + s = -s / tau; + if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { + +/* Deflation is possible. */ + + z__[j] = tau; + z__[jlam] = 0.f; + +/* Record the appropriate Givens rotation */ + + ++(*givptr); + givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; + givcol[(*givptr << 1) + 2] = indxq[indx[j]]; + givnum[(*givptr << 1) + 1] = c__; + givnum[(*givptr << 1) + 2] = s; + csrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[ + indx[j]] * q_dim1 + 1], &c__1, &c__, &s); + t = d__[jlam] * c__ * c__ + d__[j] * s * s; + d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; + d__[jlam] = t; + --k2; + i__ = 1; +L80: + if (k2 + i__ <= *n) { + if (d__[jlam] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = jlam; + ++i__; + goto L80; + } else { + indxp[k2 + i__ - 1] = jlam; + } + } else { + indxp[k2 + i__ - 1] = jlam; + } + jlam = j; + } else { + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + jlam = j; + } + } + goto L70; +L90: + +/* Record the last eigenvalue. */ + + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + +L100: + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; + ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], & + c__1); +/* L110: */ + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + if (*k < *n) { + i__1 = *n - *k; + scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = *n - *k; + clacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k + + 1) * q_dim1 + 1], ldq); + } + + return 0; + +/* End of CLAED8 */ + +} /* claed8_ */ + +/* Subroutine */ int clahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * + info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3, r__4, r__5, r__6; + complex q__1, q__2, q__3, q__4, q__5, q__6, q__7; + + /* Local variables */ + static integer i__, j, k, l, m; + static real s; + static complex t, u, v[2], x, y; + static integer i1, i2; + static complex t1; + static real t2; + static complex v2; + static real aa, ab, ba, bb, h10; + static complex h11; + static real h21; + static complex h22, sc; + static integer nh, nz; + static real sx; + static integer jhi; + static complex h11s; + static integer jlo, its; + static real ulp; + static complex sum; + static real tst; + static complex temp; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), ccopy_(integer *, complex *, integer *, complex *, + integer *); + static real rtemp; + extern /* Subroutine */ int slabad_(real *, real *), clarfg_(integer *, + complex *, complex *, integer *, complex *); + extern /* Complex */ VOID cladiv_(complex *, complex *, complex *); + extern doublereal slamch_(char *); + static real safmin, safmax, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Purpose + ======= + + CLAHQR is an auxiliary routine called by CHSEQR to update the + eigenvalues and Schur decomposition already computed by CHSEQR, by + dealing with the Hessenberg submatrix in rows and columns ILO to + IHI. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows and + columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). + CLAHQR works primarily with the Hessenberg submatrix in rows + and columns ILO to IHI, but applies transformations to all of + H if WANTT is .TRUE.. + 1 <= ILO <= max(1,IHI); IHI <= N. + + H (input/output) COMPLEX array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO is zero and if WANTT is .TRUE., then H + is upper triangular in rows and columns ILO:IHI. If INFO + is zero and if WANTT is .FALSE., then the contents of H + are unspecified on exit. The output state of H in case + INF is positive is below under the description of INFO. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + W (output) COMPLEX array, dimension (N) + The computed eigenvalues ILO to IHI are stored in the + corresponding elements of W. If WANTT is .TRUE., the + eigenvalues are stored in the same order as on the diagonal + of the Schur form returned in H, with W(i) = H(i,i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. + + Z (input/output) COMPLEX array, dimension (LDZ,N) + If WANTZ is .TRUE., on entry Z must contain the current + matrix Z of transformations accumulated by CHSEQR, and on + exit Z has been updated; transformations are applied only to + the submatrix Z(ILOZ:IHIZ,ILO:IHI). + If WANTZ is .FALSE., Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: if INFO = i, CLAHQR failed to compute all the + eigenvalues ILO to IHI in a total of 30 iterations + per eigenvalue; elements i+1:ihi of W contain + those eigenvalues which have been successfully + computed. + + If INFO .GT. 0 and WANTT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the + eigenvalues of the upper Hessenberg matrix + rows and columns ILO thorugh INFO of the final, + output value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + (*) (initial value of H)*U = U*(final value of H) + where U is an orthognal matrix. The final + value of H is upper Hessenberg and triangular in + rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + (final value of Z) = (initial value of Z)*U + where U is the orthogonal matrix in (*) + (regardless of the value of WANTT.) + + Further Details + =============== + + 02-96 Based on modifications by + David Day, Sandia National Laboratory, USA + + 12-04 Further modifications by + Ralph Byers, University of Kansas, USA + This is a modified version of CLAHQR from LAPACK version 3.0. + It is (1) more robust against overflow and underflow and + (2) adopts the more conservative Ahues & Tisseur stopping + criterion (LAWN 122, 1997). + + ========================================================= +*/ + + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + i__1 = *ilo; + i__2 = *ilo + *ilo * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + return 0; + } + +/* ==== clear out the trash ==== */ + i__1 = *ihi - 3; + for (j = *ilo; j <= i__1; ++j) { + i__2 = j + 2 + j * h_dim1; + h__[i__2].r = 0.f, h__[i__2].i = 0.f; + i__2 = j + 3 + j * h_dim1; + h__[i__2].r = 0.f, h__[i__2].i = 0.f; +/* L10: */ + } + if (*ilo <= *ihi - 2) { + i__1 = *ihi + (*ihi - 2) * h_dim1; + h__[i__1].r = 0.f, h__[i__1].i = 0.f; + } +/* ==== ensure that subdiagonal entries are real ==== */ + if (*wantt) { + jlo = 1; + jhi = *n; + } else { + jlo = *ilo; + jhi = *ihi; + } + i__1 = *ihi; + for (i__ = *ilo + 1; i__ <= i__1; ++i__) { + if (r_imag(&h__[i__ + (i__ - 1) * h_dim1]) != 0.f) { +/* + ==== The following redundant normalization + . avoids problems with both gradual and + . sudden underflow in ABS(H(I,I-1)) ==== +*/ + i__2 = i__ + (i__ - 1) * h_dim1; + i__3 = i__ + (i__ - 1) * h_dim1; + r__3 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[i__ + + (i__ - 1) * h_dim1]), dabs(r__2)); + q__1.r = h__[i__2].r / r__3, q__1.i = h__[i__2].i / r__3; + sc.r = q__1.r, sc.i = q__1.i; + r_cnjg(&q__2, &sc); + r__1 = c_abs(&sc); + q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1; + sc.r = q__1.r, sc.i = q__1.i; + i__2 = i__ + (i__ - 1) * h_dim1; + r__1 = c_abs(&h__[i__ + (i__ - 1) * h_dim1]); + h__[i__2].r = r__1, h__[i__2].i = 0.f; + i__2 = jhi - i__ + 1; + cscal_(&i__2, &sc, &h__[i__ + i__ * h_dim1], ldh); +/* Computing MIN */ + i__3 = jhi, i__4 = i__ + 1; + i__2 = min(i__3,i__4) - jlo + 1; + r_cnjg(&q__1, &sc); + cscal_(&i__2, &q__1, &h__[jlo + i__ * h_dim1], &c__1); + if (*wantz) { + i__2 = *ihiz - *iloz + 1; + r_cnjg(&q__1, &sc); + cscal_(&i__2, &q__1, &z__[*iloz + i__ * z_dim1], &c__1); + } + } +/* L20: */ + } + + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + +/* Set machine-dependent constants for the stopping criterion. */ + + safmin = slamch_("SAFE MINIMUM"); + safmax = 1.f / safmin; + slabad_(&safmin, &safmax); + ulp = slamch_("PRECISION"); + smlnum = safmin * ((real) nh / ulp); + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are set inside the main loop. +*/ + + if (*wantt) { + i1 = 1; + i2 = *n; + } + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of 1. Each iteration of the loop works + with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO, or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L30: + if (i__ < *ilo) { + goto L150; + } + +/* + Perform QR iterations on rows and columns ILO to I until a + submatrix of order 1 splits off at the bottom because a + subdiagonal element has become negligible. +*/ + + l = *ilo; + for (its = 0; its <= 30; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__1 = l + 1; + for (k = i__; k >= i__1; --k) { + i__2 = k + (k - 1) * h_dim1; + if ((r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(&h__[k + (k + - 1) * h_dim1]), dabs(r__2)) <= smlnum) { + goto L50; + } + i__2 = k - 1 + (k - 1) * h_dim1; + i__3 = k + k * h_dim1; + tst = (r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(&h__[k - + 1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__3] + .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]), + dabs(r__4))); + if (tst == 0.f) { + if (k - 2 >= *ilo) { + i__2 = k - 1 + (k - 2) * h_dim1; + tst += (r__1 = h__[i__2].r, dabs(r__1)); + } + if (k + 1 <= *ihi) { + i__2 = k + 1 + k * h_dim1; + tst += (r__1 = h__[i__2].r, dabs(r__1)); + } + } +/* + ==== The following is a conservative small subdiagonal + . deflation criterion due to Ahues & Tisseur (LAWN 122, + . 1997). It has better mathematical foundation and + . improves accuracy in some examples. ==== +*/ + i__2 = k + (k - 1) * h_dim1; + if ((r__1 = h__[i__2].r, dabs(r__1)) <= ulp * tst) { +/* Computing MAX */ + i__2 = k + (k - 1) * h_dim1; + i__3 = k - 1 + k * h_dim1; + r__5 = (r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(&h__[ + k + (k - 1) * h_dim1]), dabs(r__2)), r__6 = (r__3 = + h__[i__3].r, dabs(r__3)) + (r__4 = r_imag(&h__[k - 1 + + k * h_dim1]), dabs(r__4)); + ab = dmax(r__5,r__6); +/* Computing MIN */ + i__2 = k + (k - 1) * h_dim1; + i__3 = k - 1 + k * h_dim1; + r__5 = (r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(&h__[ + k + (k - 1) * h_dim1]), dabs(r__2)), r__6 = (r__3 = + h__[i__3].r, dabs(r__3)) + (r__4 = r_imag(&h__[k - 1 + + k * h_dim1]), dabs(r__4)); + ba = dmin(r__5,r__6); + i__2 = k - 1 + (k - 1) * h_dim1; + i__3 = k + k * h_dim1; + q__2.r = h__[i__2].r - h__[i__3].r, q__2.i = h__[i__2].i - + h__[i__3].i; + q__1.r = q__2.r, q__1.i = q__2.i; +/* Computing MAX */ + i__4 = k + k * h_dim1; + r__5 = (r__1 = h__[i__4].r, dabs(r__1)) + (r__2 = r_imag(&h__[ + k + k * h_dim1]), dabs(r__2)), r__6 = (r__3 = q__1.r, + dabs(r__3)) + (r__4 = r_imag(&q__1), dabs(r__4)); + aa = dmax(r__5,r__6); + i__2 = k - 1 + (k - 1) * h_dim1; + i__3 = k + k * h_dim1; + q__2.r = h__[i__2].r - h__[i__3].r, q__2.i = h__[i__2].i - + h__[i__3].i; + q__1.r = q__2.r, q__1.i = q__2.i; +/* Computing MIN */ + i__4 = k + k * h_dim1; + r__5 = (r__1 = h__[i__4].r, dabs(r__1)) + (r__2 = r_imag(&h__[ + k + k * h_dim1]), dabs(r__2)), r__6 = (r__3 = q__1.r, + dabs(r__3)) + (r__4 = r_imag(&q__1), dabs(r__4)); + bb = dmin(r__5,r__6); + s = aa + ab; +/* Computing MAX */ + r__1 = smlnum, r__2 = ulp * (bb * (aa / s)); + if (ba * (ab / s) <= dmax(r__1,r__2)) { + goto L50; + } + } +/* L40: */ + } +L50: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible */ + + i__1 = l + (l - 1) * h_dim1; + h__[i__1].r = 0.f, h__[i__1].i = 0.f; + } + +/* Exit from loop if a submatrix of order 1 has split off. */ + + if (l >= i__) { + goto L140; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! (*wantt)) { + i1 = l; + i2 = i__; + } + + if (its == 10) { + +/* Exceptional shift. */ + + i__1 = l + 1 + l * h_dim1; + s = (r__1 = h__[i__1].r, dabs(r__1)) * .75f; + i__1 = l + l * h_dim1; + q__1.r = s + h__[i__1].r, q__1.i = h__[i__1].i; + t.r = q__1.r, t.i = q__1.i; + } else if (its == 20) { + +/* Exceptional shift. */ + + i__1 = i__ + (i__ - 1) * h_dim1; + s = (r__1 = h__[i__1].r, dabs(r__1)) * .75f; + i__1 = i__ + i__ * h_dim1; + q__1.r = s + h__[i__1].r, q__1.i = h__[i__1].i; + t.r = q__1.r, t.i = q__1.i; + } else { + +/* Wilkinson's shift. */ + + i__1 = i__ + i__ * h_dim1; + t.r = h__[i__1].r, t.i = h__[i__1].i; + c_sqrt(&q__2, &h__[i__ - 1 + i__ * h_dim1]); + c_sqrt(&q__3, &h__[i__ + (i__ - 1) * h_dim1]); + q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = q__2.r * + q__3.i + q__2.i * q__3.r; + u.r = q__1.r, u.i = q__1.i; + s = (r__1 = u.r, dabs(r__1)) + (r__2 = r_imag(&u), dabs(r__2)); + if (s != 0.f) { + i__1 = i__ - 1 + (i__ - 1) * h_dim1; + q__2.r = h__[i__1].r - t.r, q__2.i = h__[i__1].i - t.i; + q__1.r = q__2.r * .5f, q__1.i = q__2.i * .5f; + x.r = q__1.r, x.i = q__1.i; + sx = (r__1 = x.r, dabs(r__1)) + (r__2 = r_imag(&x), dabs(r__2) + ); +/* Computing MAX */ + r__3 = s, r__4 = (r__1 = x.r, dabs(r__1)) + (r__2 = r_imag(&x) + , dabs(r__2)); + s = dmax(r__3,r__4); + q__5.r = x.r / s, q__5.i = x.i / s; + pow_ci(&q__4, &q__5, &c__2); + q__7.r = u.r / s, q__7.i = u.i / s; + pow_ci(&q__6, &q__7, &c__2); + q__3.r = q__4.r + q__6.r, q__3.i = q__4.i + q__6.i; + c_sqrt(&q__2, &q__3); + q__1.r = s * q__2.r, q__1.i = s * q__2.i; + y.r = q__1.r, y.i = q__1.i; + if (sx > 0.f) { + q__1.r = x.r / sx, q__1.i = x.i / sx; + q__2.r = x.r / sx, q__2.i = x.i / sx; + if (q__1.r * y.r + r_imag(&q__2) * r_imag(&y) < 0.f) { + q__3.r = -y.r, q__3.i = -y.i; + y.r = q__3.r, y.i = q__3.i; + } + } + q__4.r = x.r + y.r, q__4.i = x.i + y.i; + cladiv_(&q__3, &u, &q__4); + q__2.r = u.r * q__3.r - u.i * q__3.i, q__2.i = u.r * q__3.i + + u.i * q__3.r; + q__1.r = t.r - q__2.r, q__1.i = t.i - q__2.i; + t.r = q__1.r, t.i = q__1.i; + } + } + +/* Look for two consecutive small subdiagonal elements. */ + + i__1 = l + 1; + for (m = i__ - 1; m >= i__1; --m) { + +/* + Determine the effect of starting the single-shift QR + iteration at row M, and see if this would make H(M,M-1) + negligible. +*/ + + i__2 = m + m * h_dim1; + h11.r = h__[i__2].r, h11.i = h__[i__2].i; + i__2 = m + 1 + (m + 1) * h_dim1; + h22.r = h__[i__2].r, h22.i = h__[i__2].i; + q__1.r = h11.r - t.r, q__1.i = h11.i - t.i; + h11s.r = q__1.r, h11s.i = q__1.i; + i__2 = m + 1 + m * h_dim1; + h21 = h__[i__2].r; + s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs( + r__2)) + dabs(h21); + q__1.r = h11s.r / s, q__1.i = h11s.i / s; + h11s.r = q__1.r, h11s.i = q__1.i; + h21 /= s; + v[0].r = h11s.r, v[0].i = h11s.i; + v[1].r = h21, v[1].i = 0.f; + i__2 = m + (m - 1) * h_dim1; + h10 = h__[i__2].r; + if (dabs(h10) * dabs(h21) <= ulp * (((r__1 = h11s.r, dabs(r__1)) + + (r__2 = r_imag(&h11s), dabs(r__2))) * ((r__3 = h11.r, + dabs(r__3)) + (r__4 = r_imag(&h11), dabs(r__4)) + ((r__5 = + h22.r, dabs(r__5)) + (r__6 = r_imag(&h22), dabs(r__6))))) + ) { + goto L70; + } +/* L60: */ + } + i__1 = l + l * h_dim1; + h11.r = h__[i__1].r, h11.i = h__[i__1].i; + i__1 = l + 1 + (l + 1) * h_dim1; + h22.r = h__[i__1].r, h22.i = h__[i__1].i; + q__1.r = h11.r - t.r, q__1.i = h11.i - t.i; + h11s.r = q__1.r, h11s.i = q__1.i; + i__1 = l + 1 + l * h_dim1; + h21 = h__[i__1].r; + s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(r__2)) + + dabs(h21); + q__1.r = h11s.r / s, q__1.i = h11s.i / s; + h11s.r = q__1.r, h11s.i = q__1.i; + h21 /= s; + v[0].r = h11s.r, v[0].i = h11s.i; + v[1].r = h21, v[1].i = 0.f; +L70: + +/* Single-shift QR step */ + + i__1 = i__ - 1; + for (k = m; k <= i__1; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. + + V(2) is always real before the call to CLARFG, and hence + after the call T2 ( = T1*V(2) ) is also real. +*/ + + if (k > m) { + ccopy_(&c__2, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + clarfg_(&c__2, v, &v[1], &c__1, &t1); + if (k > m) { + i__2 = k + (k - 1) * h_dim1; + h__[i__2].r = v[0].r, h__[i__2].i = v[0].i; + i__2 = k + 1 + (k - 1) * h_dim1; + h__[i__2].r = 0.f, h__[i__2].i = 0.f; + } + v2.r = v[1].r, v2.i = v[1].i; + q__1.r = t1.r * v2.r - t1.i * v2.i, q__1.i = t1.r * v2.i + t1.i * + v2.r; + t2 = q__1.r; + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__2 = i2; + for (j = k; j <= i__2; ++j) { + r_cnjg(&q__3, &t1); + i__3 = k + j * h_dim1; + q__2.r = q__3.r * h__[i__3].r - q__3.i * h__[i__3].i, q__2.i = + q__3.r * h__[i__3].i + q__3.i * h__[i__3].r; + i__4 = k + 1 + j * h_dim1; + q__4.r = t2 * h__[i__4].r, q__4.i = t2 * h__[i__4].i; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + sum.r = q__1.r, sum.i = q__1.i; + i__3 = k + j * h_dim1; + i__4 = k + j * h_dim1; + q__1.r = h__[i__4].r - sum.r, q__1.i = h__[i__4].i - sum.i; + h__[i__3].r = q__1.r, h__[i__3].i = q__1.i; + i__3 = k + 1 + j * h_dim1; + i__4 = k + 1 + j * h_dim1; + q__2.r = sum.r * v2.r - sum.i * v2.i, q__2.i = sum.r * v2.i + + sum.i * v2.r; + q__1.r = h__[i__4].r - q__2.r, q__1.i = h__[i__4].i - q__2.i; + h__[i__3].r = q__1.r, h__[i__3].i = q__1.i; +/* L80: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+2,I). + + Computing MIN +*/ + i__3 = k + 2; + i__2 = min(i__3,i__); + for (j = i1; j <= i__2; ++j) { + i__3 = j + k * h_dim1; + q__2.r = t1.r * h__[i__3].r - t1.i * h__[i__3].i, q__2.i = + t1.r * h__[i__3].i + t1.i * h__[i__3].r; + i__4 = j + (k + 1) * h_dim1; + q__3.r = t2 * h__[i__4].r, q__3.i = t2 * h__[i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + sum.r = q__1.r, sum.i = q__1.i; + i__3 = j + k * h_dim1; + i__4 = j + k * h_dim1; + q__1.r = h__[i__4].r - sum.r, q__1.i = h__[i__4].i - sum.i; + h__[i__3].r = q__1.r, h__[i__3].i = q__1.i; + i__3 = j + (k + 1) * h_dim1; + i__4 = j + (k + 1) * h_dim1; + r_cnjg(&q__3, &v2); + q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r * + q__3.i + sum.i * q__3.r; + q__1.r = h__[i__4].r - q__2.r, q__1.i = h__[i__4].i - q__2.i; + h__[i__3].r = q__1.r, h__[i__3].i = q__1.i; +/* L90: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__2 = *ihiz; + for (j = *iloz; j <= i__2; ++j) { + i__3 = j + k * z_dim1; + q__2.r = t1.r * z__[i__3].r - t1.i * z__[i__3].i, q__2.i = + t1.r * z__[i__3].i + t1.i * z__[i__3].r; + i__4 = j + (k + 1) * z_dim1; + q__3.r = t2 * z__[i__4].r, q__3.i = t2 * z__[i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + sum.r = q__1.r, sum.i = q__1.i; + i__3 = j + k * z_dim1; + i__4 = j + k * z_dim1; + q__1.r = z__[i__4].r - sum.r, q__1.i = z__[i__4].i - + sum.i; + z__[i__3].r = q__1.r, z__[i__3].i = q__1.i; + i__3 = j + (k + 1) * z_dim1; + i__4 = j + (k + 1) * z_dim1; + r_cnjg(&q__3, &v2); + q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r * + q__3.i + sum.i * q__3.r; + q__1.r = z__[i__4].r - q__2.r, q__1.i = z__[i__4].i - + q__2.i; + z__[i__3].r = q__1.r, z__[i__3].i = q__1.i; +/* L100: */ + } + } + + if (k == m && m > l) { + +/* + If the QR step was started at row M > L because two + consecutive small subdiagonals were found, then extra + scaling must be performed to ensure that H(M,M-1) remains + real. +*/ + + q__1.r = 1.f - t1.r, q__1.i = 0.f - t1.i; + temp.r = q__1.r, temp.i = q__1.i; + r__1 = c_abs(&temp); + q__1.r = temp.r / r__1, q__1.i = temp.i / r__1; + temp.r = q__1.r, temp.i = q__1.i; + i__2 = m + 1 + m * h_dim1; + i__3 = m + 1 + m * h_dim1; + r_cnjg(&q__2, &temp); + q__1.r = h__[i__3].r * q__2.r - h__[i__3].i * q__2.i, q__1.i = + h__[i__3].r * q__2.i + h__[i__3].i * q__2.r; + h__[i__2].r = q__1.r, h__[i__2].i = q__1.i; + if (m + 2 <= i__) { + i__2 = m + 2 + (m + 1) * h_dim1; + i__3 = m + 2 + (m + 1) * h_dim1; + q__1.r = h__[i__3].r * temp.r - h__[i__3].i * temp.i, + q__1.i = h__[i__3].r * temp.i + h__[i__3].i * + temp.r; + h__[i__2].r = q__1.r, h__[i__2].i = q__1.i; + } + i__2 = i__; + for (j = m; j <= i__2; ++j) { + if (j != m + 1) { + if (i2 > j) { + i__3 = i2 - j; + cscal_(&i__3, &temp, &h__[j + (j + 1) * h_dim1], + ldh); + } + i__3 = j - i1; + r_cnjg(&q__1, &temp); + cscal_(&i__3, &q__1, &h__[i1 + j * h_dim1], &c__1); + if (*wantz) { + r_cnjg(&q__1, &temp); + cscal_(&nz, &q__1, &z__[*iloz + j * z_dim1], & + c__1); + } + } +/* L110: */ + } + } +/* L120: */ + } + +/* Ensure that H(I,I-1) is real. */ + + i__1 = i__ + (i__ - 1) * h_dim1; + temp.r = h__[i__1].r, temp.i = h__[i__1].i; + if (r_imag(&temp) != 0.f) { + rtemp = c_abs(&temp); + i__1 = i__ + (i__ - 1) * h_dim1; + h__[i__1].r = rtemp, h__[i__1].i = 0.f; + q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp; + temp.r = q__1.r, temp.i = q__1.i; + if (i2 > i__) { + i__1 = i2 - i__; + r_cnjg(&q__1, &temp); + cscal_(&i__1, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); + } + i__1 = i__ - i1; + cscal_(&i__1, &temp, &h__[i1 + i__ * h_dim1], &c__1); + if (*wantz) { + cscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1); + } + } + +/* L130: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L140: + +/* H(I,I-1) is negligible: one eigenvalue has converged. */ + + i__1 = i__; + i__2 = i__ + i__ * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + +/* return to start of the main loop with new value of I. */ + + i__ = l - 1; + goto L30; + +L150: + return 0; + +/* End of CLAHQR */ + +} /* clahqr_ */ + +/* Subroutine */ int clahr2_(integer *n, integer *k, integer *nb, complex *a, + integer *lda, complex *tau, complex *t, integer *ldt, complex *y, + integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, + i__3; + complex q__1; + + /* Local variables */ + static integer i__; + static complex ei; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), cgemm_(char *, char *, integer *, integer *, integer * + , complex *, complex *, integer *, complex *, integer *, complex * + , complex *, integer *), cgemv_(char *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *), ccopy_(integer *, + complex *, integer *, complex *, integer *), ctrmm_(char *, char * + , char *, char *, integer *, integer *, complex *, complex *, + integer *, complex *, integer *), + caxpy_(integer *, complex *, complex *, integer *, complex *, + integer *), ctrmv_(char *, char *, char *, integer *, complex *, + integer *, complex *, integer *), clarfg_( + integer *, complex *, complex *, integer *, complex *), clacgv_( + integer *, complex *, integer *), clacpy_(char *, integer *, + integer *, complex *, integer *, complex *, integer *); + + +/* -- LAPACK auxiliary routine (version 3.2.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, --* -- April 2009 + -- */ +/* + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + CLAHR2 reduces the first NB columns of A complex general n-BY-(n-k+1) + matrix A so that elements below the k-th subdiagonal are zero. The + reduction is performed by an unitary similarity transformation + Q' * A * Q. The routine returns the matrices V and T which determine + Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. + + This is an auxiliary routine called by CGEHRD. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. + + K (input) INTEGER + The offset for the reduction. Elements below the k-th + subdiagonal in the first NB columns are reduced to zero. + K < N. + + NB (input) INTEGER + The number of columns to be reduced. + + A (input/output) COMPLEX array, dimension (LDA,N-K+1) + On entry, the n-by-(n-k+1) general matrix A. + On exit, the elements on and above the k-th subdiagonal in + the first NB columns are overwritten with the corresponding + elements of the reduced matrix; the elements below the k-th + subdiagonal, with the array TAU, represent the matrix Q as a + product of elementary reflectors. The other columns of A are + unchanged. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX array, dimension (NB) + The scalar factors of the elementary reflectors. See Further + Details. + + T (output) COMPLEX array, dimension (LDT,NB) + The upper triangular matrix T. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= NB. + + Y (output) COMPLEX array, dimension (LDY,NB) + The n-by-nb matrix Y. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= N. + + Further Details + =============== + + The matrix Q is represented as a product of nb elementary reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in + A(i+k+1:n,i), and tau in TAU(i). + + The elements of the vectors v together form the (n-k+1)-by-nb matrix + V which is needed, with T and Y, to apply the transformation to the + unreduced part of the matrix, using an update of the form: + A := (I - V*T*V') * (A - Y*V'). + + The contents of A on exit are illustrated by the following example + with n = 7, k = 3 and nb = 2: + + ( a a a a a ) + ( a a a a a ) + ( a a a a a ) + ( h h a a a ) + ( v1 h a a a ) + ( v1 v2 a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + This subroutine is a slight modification of LAPACK-3.0's DLAHRD + incorporating improvements proposed by Quintana-Orti and Van de + Gejin. Note that the entries of A(1:K,2:NB) differ from those + returned by the original LAPACK-3.0's DLAHRD routine. (This + subroutine is not backward compatible with LAPACK-3.0's DLAHRD.) + + References + ========== + + Gregorio Quintana-Orti and Robert van de Geijn, "Improving the + performance of reduction to Hessenberg form," ACM Transactions on + Mathematical Software, 32(2):180-194, June 2006. + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*n <= 1) { + return 0; + } + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + +/* + Update A(K+1:N,I) + + Update I-th column of A - Y * V' +*/ + + i__2 = i__ - 1; + clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); + i__2 = *n - *k; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("NO TRANSPOSE", &i__2, &i__3, &q__1, &y[*k + 1 + y_dim1], + ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b57, &a[*k + 1 + + i__ * a_dim1], &c__1); + i__2 = i__ - 1; + clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); + +/* + Apply I - V * T' * V' to this column (call it b) from the + left, using the last column of T as workspace + + Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) + ( V2 ) ( b2 ) + + where V1 is unit lower triangular + + w := V1' * b1 +*/ + + i__2 = i__ - 1; + ccopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + ctrmv_("Lower", "Conjugate transpose", "UNIT", &i__2, &a[*k + 1 + + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1); + +/* w := w + V2'*b2 */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[*k + i__ + + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b57, + &t[*nb * t_dim1 + 1], &c__1); + +/* w := T'*w */ + + i__2 = i__ - 1; + ctrmv_("Upper", "Conjugate transpose", "NON-UNIT", &i__2, &t[ + t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1); + +/* b2 := b2 - V2*w */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("NO TRANSPOSE", &i__2, &i__3, &q__1, &a[*k + i__ + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1, &c_b57, &a[*k + i__ + + i__ * a_dim1], &c__1); + +/* b1 := b1 - V1*w */ + + i__2 = i__ - 1; + ctrmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1] + , lda, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + caxpy_(&i__2, &q__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ + * a_dim1], &c__1); + + i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1; + a[i__2].r = ei.r, a[i__2].i = ei.i; + } + +/* + Generate the elementary reflector H(I) to annihilate + A(K+I+1:N,I) +*/ + + i__2 = *n - *k - i__ + 1; +/* Computing MIN */ + i__3 = *k + i__ + 1; + clarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + i__2 = *k + i__ + i__ * a_dim1; + ei.r = a[i__2].r, ei.i = a[i__2].i; + i__2 = *k + i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute Y(K+1:N,I) */ + + i__2 = *n - *k; + i__3 = *n - *k - i__ + 1; + cgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b57, &a[*k + 1 + (i__ + 1) * + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56, &y[* + k + 1 + i__ * y_dim1], &c__1); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[*k + i__ + + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56, &t[ + i__ * t_dim1 + 1], &c__1); + i__2 = *n - *k; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("NO TRANSPOSE", &i__2, &i__3, &q__1, &y[*k + 1 + y_dim1], ldy, + &t[i__ * t_dim1 + 1], &c__1, &c_b57, &y[*k + 1 + i__ * y_dim1] + , &c__1); + i__2 = *n - *k; + cscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1); + +/* Compute T(1:I,I) */ + + i__2 = i__ - 1; + i__3 = i__; + q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; + cscal_(&i__2, &q__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + ctrmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, + &t[i__ * t_dim1 + 1], &c__1) + ; + i__2 = i__ + i__ * t_dim1; + i__3 = i__; + t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; + +/* L10: */ + } + i__1 = *k + *nb + *nb * a_dim1; + a[i__1].r = ei.r, a[i__1].i = ei.i; + +/* Compute Y(1:K,1:NB) */ + + clacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy); + ctrmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b57, &a[*k + 1 + + a_dim1], lda, &y[y_offset], ldy); + if (*n > *k + *nb) { + i__1 = *n - *k - *nb; + cgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b57, &a[(*nb + + 2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, & + c_b57, &y[y_offset], ldy); + } + ctrmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b57, &t[ + t_offset], ldt, &y[y_offset], ldy); + + return 0; + +/* End of CLAHR2 */ + +} /* clahr2_ */ + +/* Subroutine */ int clals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, complex *b, integer *ldb, complex *bx, + integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * + difl, real *difr, real *z__, integer *k, real *c__, real *s, real * + rwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, difr_dim1, difr_offset, givnum_dim1, + givnum_offset, poles_dim1, poles_offset, b_dim1, b_offset, + bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5; + real r__1; + complex q__1; + + /* Local variables */ + static integer i__, j, m, n; + static real dj; + static integer nlp1, jcol; + static real temp; + static integer jrow; + extern doublereal snrm2_(integer *, real *, integer *); + static real diflj, difrj, dsigj; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *), sgemv_(char *, integer *, integer *, real * + , real *, integer *, real *, integer *, real *, real *, integer *), csrot_(integer *, complex *, integer *, complex *, + integer *, real *, real *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), csscal_(integer *, real *, complex *, integer *), + clacpy_(char *, integer *, integer *, complex *, integer *, + complex *, integer *), xerbla_(char *, integer *); + static real dsigjp; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLALS0 applies back the multiplying factors of either the left or the + right singular vector matrix of a diagonal matrix appended by a row + to the right hand side matrix B in solving the least squares problem + using the divide-and-conquer SVD approach. + + For the left singular vector matrix, three types of orthogonal + matrices are involved: + + (1L) Givens rotations: the number of such rotations is GIVPTR; the + pairs of columns/rows they were applied to are stored in GIVCOL; + and the C- and S-values of these rotations are stored in GIVNUM. + + (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + row, and for J=2:N, PERM(J)-th row of B is to be moved to the + J-th row. + + (3L) The left singular vector matrix of the remaining matrix. + + For the right singular vector matrix, four types of orthogonal + matrices are involved: + + (1R) The right singular vector matrix of the remaining matrix. + + (2R) If SQRE = 1, one extra Givens rotation to generate the right + null space. + + (3R) The inverse transformation of (2L). + + (4R) The inverse transformation of (1L). + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form: + = 0: Left singular vector matrix. + = 1: Right singular vector matrix. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input/output) COMPLEX array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. On output, B contains + the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B. LDB must be at least + max(1,MAX( M, N ) ). + + BX (workspace) COMPLEX array, dimension ( LDBX, NRHS ) + + LDBX (input) INTEGER + The leading dimension of BX. + + PERM (input) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) applied + to the two blocks. + + GIVPTR (input) INTEGER + The number of Givens rotations which took place in this + subproblem. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of rows/columns + involved in a Givens rotation. + + LDGCOL (input) INTEGER + The leading dimension of GIVCOL, must be at least N. + + GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value used in the + corresponding Givens rotation. + + LDGNUM (input) INTEGER + The leading dimension of arrays DIFR, POLES and + GIVNUM, must be at least K. + + POLES (input) REAL array, dimension ( LDGNUM, 2 ) + On entry, POLES(1:K, 1) contains the new singular + values obtained from solving the secular equation, and + POLES(1:K, 2) is an array containing the poles in the secular + equation. + + DIFL (input) REAL array, dimension ( K ). + On entry, DIFL(I) is the distance between I-th updated + (undeflated) singular value and the I-th (undeflated) old + singular value. + + DIFR (input) REAL array, dimension ( LDGNUM, 2 ). + On entry, DIFR(I, 1) contains the distances between I-th + updated (undeflated) singular value and the I+1-th + (undeflated) old singular value. And DIFR(I, 2) is the + normalizing factor for the I-th right singular vector. + + Z (input) REAL array, dimension ( K ) + Contain the components of the deflation-adjusted updating row + vector. + + K (input) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + C (input) REAL + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (input) REAL + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + RWORK (workspace) REAL array, dimension + ( K*(1+NRHS) + 2*NRHS ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + difr_dim1 = *ldgnum; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --z__; + --rwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } + + n = *nl + *nr + 1; + + if (*nrhs < 1) { + *info = -5; + } else if (*ldb < n) { + *info = -7; + } else if (*ldbx < n) { + *info = -9; + } else if (*givptr < 0) { + *info = -11; + } else if (*ldgcol < n) { + *info = -13; + } else if (*ldgnum < n) { + *info = -15; + } else if (*k < 1) { + *info = -20; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLALS0", &i__1); + return 0; + } + + m = n + *sqre; + nlp1 = *nl + 1; + + if (*icompq == 0) { + +/* + Apply back orthogonal transformations from the left. + + Step (1L): apply back the Givens rotations performed. +*/ + + i__1 = *givptr; + for (i__ = 1; i__ <= i__1; ++i__) { + csrot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); +/* L10: */ + } + +/* Step (2L): permute rows of B. */ + + ccopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + ccopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], + ldbx); +/* L20: */ + } + +/* + Step (3L): apply the inverse of the left singular vector + matrix to BX. +*/ + + if (*k == 1) { + ccopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); + if (z__[1] < 0.f) { + csscal_(nrhs, &c_b1276, &b[b_offset], ldb); + } + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = poles[j + poles_dim1]; + dsigj = -poles[j + (poles_dim1 << 1)]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; + } + if (z__[j] == 0.f || poles[j + (poles_dim1 << 1)] == 0.f) { + rwork[j] = 0.f; + } else { + rwork[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj + / (poles[j + (poles_dim1 << 1)] + dj); + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == + 0.f) { + rwork[i__] = 0.f; + } else { + rwork[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (slamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigj) - diflj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L30: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == + 0.f) { + rwork[i__] = 0.f; + } else { + rwork[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (slamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigjp) + difrj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L40: */ + } + rwork[1] = -1.f; + temp = snrm2_(k, &rwork[1], &c__1); + +/* + Since B and BX are complex, the following call to SGEMV + is performed in two steps (real and imaginary parts). + + CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, + $ B( J, 1 ), LDB ) +*/ + + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + i__4 = jrow + jcol * bx_dim1; + rwork[i__] = bx[i__4].r; +/* L50: */ + } +/* L60: */ + } + sgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1], &c__1); + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + rwork[i__] = r_imag(&bx[jrow + jcol * bx_dim1]); +/* L70: */ + } +/* L80: */ + } + sgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1 + *nrhs], + &c__1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = j + jcol * b_dim1; + i__4 = jcol + *k; + i__5 = jcol + *k + *nrhs; + q__1.r = rwork[i__4], q__1.i = rwork[i__5]; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L90: */ + } + clascl_("G", &c__0, &c__0, &temp, &c_b1034, &c__1, nrhs, &b[j + + b_dim1], ldb, info); +/* L100: */ + } + } + +/* Move the deflated rows of BX to B also. */ + + if (*k < max(m,n)) { + i__1 = n - *k; + clacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 + + b_dim1], ldb); + } + } else { + +/* + Apply back the right orthogonal transformations. + + Step (1R): apply back the new right singular vector matrix + to B. +*/ + + if (*k == 1) { + ccopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dsigj = poles[j + (poles_dim1 << 1)]; + if (z__[j] == 0.f) { + rwork[j] = 0.f; + } else { + rwork[j] = -z__[j] / difl[j] / (dsigj + poles[j + + poles_dim1]) / difr[j + (difr_dim1 << 1)]; + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.f) { + rwork[i__] = 0.f; + } else { + r__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; + rwork[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr[ + i__ + difr_dim1]) / (dsigj + poles[i__ + + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; + } +/* L110: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.f) { + rwork[i__] = 0.f; + } else { + r__1 = -poles[i__ + (poles_dim1 << 1)]; + rwork[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[ + i__]) / (dsigj + poles[i__ + poles_dim1]) / + difr[i__ + (difr_dim1 << 1)]; + } +/* L120: */ + } + +/* + Since B and BX are complex, the following call to SGEMV + is performed in two steps (real and imaginary parts). + + CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, + $ BX( J, 1 ), LDBX ) +*/ + + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + i__4 = jrow + jcol * b_dim1; + rwork[i__] = b[i__4].r; +/* L130: */ + } +/* L140: */ + } + sgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1], &c__1); + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + rwork[i__] = r_imag(&b[jrow + jcol * b_dim1]); +/* L150: */ + } +/* L160: */ + } + sgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1 + *nrhs], + &c__1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = j + jcol * bx_dim1; + i__4 = jcol + *k; + i__5 = jcol + *k + *nrhs; + q__1.r = rwork[i__4], q__1.i = rwork[i__5]; + bx[i__3].r = q__1.r, bx[i__3].i = q__1.i; +/* L170: */ + } +/* L180: */ + } + } + +/* + Step (2R): if SQRE = 1, apply back the rotation that is + related to the right null space of the subproblem. +*/ + + if (*sqre == 1) { + ccopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); + csrot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, + s); + } + if (*k < max(m,n)) { + i__1 = n - *k; + clacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + + bx_dim1], ldbx); + } + +/* Step (3R): permute rows of B. */ + + ccopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); + if (*sqre == 1) { + ccopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); + } + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + ccopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], + ldb); +/* L190: */ + } + +/* Step (4R): apply back the Givens rotations performed. */ + + for (i__ = *givptr; i__ >= 1; --i__) { + r__1 = -givnum[i__ + givnum_dim1]; + csrot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &r__1); +/* L200: */ + } + } + + return 0; + +/* End of CLALS0 */ + +} /* clals0_ */ + +/* Subroutine */ int clalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, + real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, + real *z__, real *poles, integer *givptr, integer *givcol, integer * + ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, + difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, + z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1, + i__2, i__3, i__4, i__5, i__6; + complex q__1; + + /* Local variables */ + static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl, + ndb1, nlp1, lvl2, nrp1, jcol, nlvl, sqre, jrow, jimag, jreal, + inode, ndiml; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer ndimr; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *), clals0_(integer *, integer *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + integer *, integer *, integer *, integer *, real *, integer *, + real *, real *, real *, real *, integer *, real *, real *, real *, + integer *), xerbla_(char *, integer *), slasdt_(integer * + , integer *, integer *, integer *, integer *, integer *, integer * + ); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLALSA is an itermediate step in solving the least squares problem + by computing the SVD of the coefficient matrix in compact form (The + singular vectors are computed as products of simple orthorgonal + matrices.). + + If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector + matrix of an upper bidiagonal matrix to the right hand side; and if + ICOMPQ = 1, CLALSA applies the right singular vector matrix to the + right hand side. The singular vector matrices were generated in + compact form by CLALSA. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether the left or the right singular vector + matrix is involved. + = 0: Left singular vector matrix + = 1: Right singular vector matrix + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The row and column dimensions of the upper bidiagonal matrix. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input/output) COMPLEX array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. + On output, B contains the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,MAX( M, N ) ). + + BX (output) COMPLEX array, dimension ( LDBX, NRHS ) + On exit, the result of applying the left or right singular + vector matrix to B. + + LDBX (input) INTEGER + The leading dimension of BX. + + U (input) REAL array, dimension ( LDU, SMLSIZ ). + On entry, U contains the left singular vector matrices of all + subproblems at the bottom level. + + LDU (input) INTEGER, LDU = > N. + The leading dimension of arrays U, VT, DIFL, DIFR, + POLES, GIVNUM, and Z. + + VT (input) REAL array, dimension ( LDU, SMLSIZ+1 ). + On entry, VT' contains the right singular vector matrices of + all subproblems at the bottom level. + + K (input) INTEGER array, dimension ( N ). + + DIFL (input) REAL array, dimension ( LDU, NLVL ). + where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. + + DIFR (input) REAL array, dimension ( LDU, 2 * NLVL ). + On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record + distances between singular values on the I-th level and + singular values on the (I -1)-th level, and DIFR(*, 2 * I) + record the normalizing factors of the right singular vectors + matrices of subproblems on I-th level. + + Z (input) REAL array, dimension ( LDU, NLVL ). + On entry, Z(1, I) contains the components of the deflation- + adjusted updating row vector for subproblems on the I-th + level. + + POLES (input) REAL array, dimension ( LDU, 2 * NLVL ). + On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old + singular values involved in the secular equations on the I-th + level. + + GIVPTR (input) INTEGER array, dimension ( N ). + On entry, GIVPTR( I ) records the number of Givens + rotations performed on the I-th problem on the computation + tree. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). + On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the + locations of Givens rotations performed on the I-th level on + the computation tree. + + LDGCOL (input) INTEGER, LDGCOL = > N. + The leading dimension of arrays GIVCOL and PERM. + + PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). + On entry, PERM(*, I) records permutations done on the I-th + level of the computation tree. + + GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ). + On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- + values of Givens rotations performed on the I-th level on the + computation tree. + + C (input) REAL array, dimension ( N ). + On entry, if the I-th subproblem is not square, + C( I ) contains the C-value of a Givens rotation related to + the right null space of the I-th subproblem. + + S (input) REAL array, dimension ( N ). + On entry, if the I-th subproblem is not square, + S( I ) contains the S-value of a Givens rotation related to + the right null space of the I-th subproblem. + + RWORK (workspace) REAL array, dimension at least + MAX( (SMLSZ+1)*NRHS*3, N*(1+NRHS) + 2*NRHS ). + + IWORK (workspace) INTEGER array. + The dimension must be at least 3 * N + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < *smlsiz) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < *n) { + *info = -6; + } else if (*ldbx < *n) { + *info = -8; + } else if (*ldu < *n) { + *info = -10; + } else if (*ldgcol < *n) { + *info = -19; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLALSA", &i__1); + return 0; + } + +/* Book-keeping and setting up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + + slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + The following code applies back the left singular vector factors. + For applying back the right singular vector factors, go to 170. +*/ + + if (*icompq == 1) { + goto L170; + } + +/* + The nodes on the bottom level of the tree were solved + by SLASDQ. The corresponding left and right singular vector + matrices are in explicit form. First apply back the left + singular vector matrices. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + +/* + Since B and BX are complex, the following call to SGEMM + is performed in two steps (real and imaginary parts). + + CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +*/ + + j = nl * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nl - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L10: */ + } +/* L20: */ + } + sgemm_("T", "N", &nl, nrhs, &nl, &c_b1034, &u[nlf + u_dim1], ldu, & + rwork[(nl * *nrhs << 1) + 1], &nl, &c_b328, &rwork[1], &nl); + j = nl * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nl - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = r_imag(&b[jrow + jcol * b_dim1]); +/* L30: */ + } +/* L40: */ + } + sgemm_("T", "N", &nl, nrhs, &nl, &c_b1034, &u[nlf + u_dim1], ldu, & + rwork[(nl * *nrhs << 1) + 1], &nl, &c_b328, &rwork[nl * *nrhs + + 1], &nl); + jreal = 0; + jimag = nl * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nl - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + q__1.r = rwork[i__5], q__1.i = rwork[i__6]; + bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; +/* L50: */ + } +/* L60: */ + } + +/* + Since B and BX are complex, the following call to SGEMM + is performed in two steps (real and imaginary parts). + + CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +*/ + + j = nr * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nr - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L70: */ + } +/* L80: */ + } + sgemm_("T", "N", &nr, nrhs, &nr, &c_b1034, &u[nrf + u_dim1], ldu, & + rwork[(nr * *nrhs << 1) + 1], &nr, &c_b328, &rwork[1], &nr); + j = nr * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nr - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = r_imag(&b[jrow + jcol * b_dim1]); +/* L90: */ + } +/* L100: */ + } + sgemm_("T", "N", &nr, nrhs, &nr, &c_b1034, &u[nrf + u_dim1], ldu, & + rwork[(nr * *nrhs << 1) + 1], &nr, &c_b328, &rwork[nr * *nrhs + + 1], &nr); + jreal = 0; + jimag = nr * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nr - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + q__1.r = rwork[i__5], q__1.i = rwork[i__6]; + bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; +/* L110: */ + } +/* L120: */ + } + +/* L130: */ + } + +/* + Next copy the rows of B that correspond to unchanged rows + in the bidiagonal matrix to BX. +*/ + + i__1 = nd; + for (i__ = 1; i__ <= i__1; ++i__) { + ic = iwork[inode + i__ - 1]; + ccopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx); +/* L140: */ + } + +/* + Finally go through the left singular vector matrices of all + the other subproblems bottom-up on the tree. +*/ + + j = pow_ii(&c__2, &nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* + find the first node LF and last node LL on + the current level LVL +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + --j; + clals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, & + b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &rwork[1], info); +/* L150: */ + } +/* L160: */ + } + goto L330; + +/* ICOMPQ = 1: applying back the right singular vector factors. */ + +L170: + +/* + First now go through the right singular vector matrices of all + the tree nodes top-down. +*/ + + j = 0; + i__1 = nlvl; + for (lvl = 1; lvl <= i__1; ++lvl) { + lvl2 = (lvl << 1) - 1; + +/* + Find the first node LF and last node LL on + the current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__2 = lvl - 1; + lf = pow_ii(&c__2, &i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; + for (i__ = ll; i__ >= i__2; --i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqre = 0; + } else { + sqre = 1; + } + ++j; + clals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[ + nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &rwork[1], info); +/* L180: */ + } +/* L190: */ + } + +/* + The nodes on the bottom level of the tree were solved + by SLASDQ. The corresponding right singular vector + matrices are in explicit form. Apply them back. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlp1 = nl + 1; + if (i__ == nd) { + nrp1 = nr; + } else { + nrp1 = nr + 1; + } + nlf = ic - nl; + nrf = ic + 1; + +/* + Since B and BX are complex, the following call to SGEMM is + performed in two steps (real and imaginary parts). + + CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +*/ + + j = nlp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nlp1 - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L200: */ + } +/* L210: */ + } + sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1034, &vt[nlf + vt_dim1], + ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b328, &rwork[ + 1], &nlp1); + j = nlp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nlp1 - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = r_imag(&b[jrow + jcol * b_dim1]); +/* L220: */ + } +/* L230: */ + } + sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1034, &vt[nlf + vt_dim1], + ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b328, &rwork[ + nlp1 * *nrhs + 1], &nlp1); + jreal = 0; + jimag = nlp1 * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nlp1 - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + q__1.r = rwork[i__5], q__1.i = rwork[i__6]; + bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; +/* L240: */ + } +/* L250: */ + } + +/* + Since B and BX are complex, the following call to SGEMM is + performed in two steps (real and imaginary parts). + + CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +*/ + + j = nrp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nrp1 - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L260: */ + } +/* L270: */ + } + sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1034, &vt[nrf + vt_dim1], + ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b328, &rwork[ + 1], &nrp1); + j = nrp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nrp1 - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = r_imag(&b[jrow + jcol * b_dim1]); +/* L280: */ + } +/* L290: */ + } + sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1034, &vt[nrf + vt_dim1], + ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b328, &rwork[ + nrp1 * *nrhs + 1], &nrp1); + jreal = 0; + jimag = nrp1 * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nrp1 - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + q__1.r = rwork[i__5], q__1.i = rwork[i__6]; + bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; +/* L300: */ + } +/* L310: */ + } + +/* L320: */ + } + +L330: + + return 0; + +/* End of CLALSA */ + +} /* clalsa_ */ + +/* Subroutine */ int clalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, real *d__, real *e, complex *b, integer *ldb, real *rcond, + integer *rank, complex *work, real *rwork, integer *iwork, integer * + info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6; + real r__1; + complex q__1; + + /* Local variables */ + static integer c__, i__, j, k; + static real r__; + static integer s, u, z__; + static real cs; + static integer bx; + static real sn; + static integer st, vt, nm1, st1; + static real eps; + static integer iwk; + static real tol; + static integer difl, difr; + static real rcnd; + static integer jcol, irwb, perm, nsub, nlvl, sqre, bxst, jrow, irwu, + jimag, jreal; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer irwib; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *); + static integer poles, sizei, irwrb, nsize; + extern /* Subroutine */ int csrot_(integer *, complex *, integer *, + complex *, integer *, real *, real *); + static integer irwvt, icmpq1, icmpq2; + extern /* Subroutine */ int clalsa_(integer *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, real *, + integer *, real *, integer *, real *, real *, real *, real *, + integer *, integer *, integer *, integer *, real *, real *, real * + , real *, integer *, integer *), clascl_(char *, integer *, + integer *, real *, real *, integer *, integer *, complex *, + integer *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int slasda_(integer *, integer *, integer *, + integer *, real *, real *, real *, integer *, real *, integer *, + real *, real *, real *, real *, integer *, integer *, integer *, + integer *, real *, real *, real *, real *, integer *, integer *), + clacpy_(char *, integer *, integer *, complex *, integer *, + complex *, integer *), claset_(char *, integer *, integer + *, complex *, complex *, complex *, integer *), xerbla_( + char *, integer *), slascl_(char *, integer *, integer *, + real *, real *, integer *, integer *, real *, integer *, integer * + ); + extern integer isamax_(integer *, real *, integer *); + static integer givcol; + extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer + *, integer *, integer *, real *, real *, real *, integer *, real * + , integer *, real *, integer *, real *, integer *), + slaset_(char *, integer *, integer *, real *, real *, real *, + integer *), slartg_(real *, real *, real *, real *, real * + ); + static real orgnrm; + static integer givnum; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); + static integer givptr, nrwork, irwwrk, smlszp; + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + CLALSD uses the singular value decomposition of A to solve the least + squares problem of finding X to minimize the Euclidean norm of each + column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + are N-by-NRHS. The solution X overwrites B. + + The singular values of A smaller than RCOND times the largest + singular value are treated as zero in solving the least squares + problem; in this case a minimum norm solution is returned. + The actual singular values are returned in D in ascending order. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': D and E define an upper bidiagonal matrix. + = 'L': D and E define a lower bidiagonal matrix. + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The dimension of the bidiagonal matrix. N >= 0. + + NRHS (input) INTEGER + The number of columns of B. NRHS must be at least 1. + + D (input/output) REAL array, dimension (N) + On entry D contains the main diagonal of the bidiagonal + matrix. On exit, if INFO = 0, D contains its singular values. + + E (input/output) REAL array, dimension (N-1) + Contains the super-diagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + B (input/output) COMPLEX array, dimension (LDB,NRHS) + On input, B contains the right hand sides of the least + squares problem. On output, B contains the solution X. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,N). + + RCOND (input) REAL + The singular values of A less than or equal to RCOND times + the largest singular value are treated as zero in solving + the least squares problem. If RCOND is negative, + machine precision is used instead. + For example, if diag(S)*X=B were the least squares problem, + where diag(S) is a diagonal matrix of singular values, the + solution would be X(i) = B(i) / S(i) if S(i) is greater than + RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to + RCOND*max(S). + + RANK (output) INTEGER + The number of singular values of A greater than RCOND times + the largest singular value. + + WORK (workspace) COMPLEX array, dimension (N * NRHS). + + RWORK (workspace) REAL array, dimension at least + (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + + MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ), + where + NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) + + IWORK (workspace) INTEGER array, dimension (3*N*NLVL + 11*N). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute a singular value while + working on the submatrix lying in rows and columns + INFO/(N+1) through MOD(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < 1 || *ldb < *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLALSD", &i__1); + return 0; + } + + eps = slamch_("Epsilon"); + +/* Set up the tolerance. */ + + if (*rcond <= 0.f || *rcond >= 1.f) { + rcnd = eps; + } else { + rcnd = *rcond; + } + + *rank = 0; + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } else if (*n == 1) { + if (d__[1] == 0.f) { + claset_("A", &c__1, nrhs, &c_b56, &c_b56, &b[b_offset], ldb); + } else { + *rank = 1; + clascl_("G", &c__0, &c__0, &d__[1], &c_b1034, &c__1, nrhs, &b[ + b_offset], ldb, info); + d__[1] = dabs(d__[1]); + } + return 0; + } + +/* Rotate the matrix if it is lower bidiagonal. */ + + if (*(unsigned char *)uplo == 'L') { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (*nrhs == 1) { + csrot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], & + c__1, &cs, &sn); + } else { + rwork[(i__ << 1) - 1] = cs; + rwork[i__ * 2] = sn; + } +/* L10: */ + } + if (*nrhs > 1) { + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n - 1; + for (j = 1; j <= i__2; ++j) { + cs = rwork[(j << 1) - 1]; + sn = rwork[j * 2]; + csrot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ + * b_dim1], &c__1, &cs, &sn); +/* L20: */ + } +/* L30: */ + } + } + } + +/* Scale. */ + + nm1 = *n - 1; + orgnrm = slanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.f) { + claset_("A", n, nrhs, &c_b56, &c_b56, &b[b_offset], ldb); + return 0; + } + + slascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, n, &c__1, &d__[1], n, info); + slascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, &nm1, &c__1, &e[1], &nm1, + info); + +/* + If N is smaller than the minimum divide size SMLSIZ, then solve + the problem with another solver. +*/ + + if (*n <= *smlsiz) { + irwu = 1; + irwvt = irwu + *n * *n; + irwwrk = irwvt + *n * *n; + irwrb = irwwrk; + irwib = irwrb + *n * *nrhs; + irwb = irwib + *n * *nrhs; + slaset_("A", n, n, &c_b328, &c_b1034, &rwork[irwu], n); + slaset_("A", n, n, &c_b328, &c_b1034, &rwork[irwvt], n); + slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &rwork[irwvt], n, + &rwork[irwu], n, &rwork[irwwrk], &c__1, &rwork[irwwrk], info); + if (*info != 0) { + return 0; + } + +/* + In the real version, B is passed to SLASDQ and multiplied + internally by Q'. Here B is complex and that product is + computed below in two steps (real and imaginary parts). +*/ + + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + i__3 = jrow + jcol * b_dim1; + rwork[j] = b[i__3].r; +/* L40: */ + } +/* L50: */ + } + sgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwu], n, &rwork[irwb], + n, &c_b328, &rwork[irwrb], n); + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + rwork[j] = r_imag(&b[jrow + jcol * b_dim1]); +/* L60: */ + } +/* L70: */ + } + sgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwu], n, &rwork[irwb], + n, &c_b328, &rwork[irwib], n); + jreal = irwrb - 1; + jimag = irwib - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++jreal; + ++jimag; + i__3 = jrow + jcol * b_dim1; + i__4 = jreal; + i__5 = jimag; + q__1.r = rwork[i__4], q__1.i = rwork[i__5]; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L80: */ + } +/* L90: */ + } + + tol = rcnd * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1)); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] <= tol) { + claset_("A", &c__1, nrhs, &c_b56, &c_b56, &b[i__ + b_dim1], + ldb); + } else { + clascl_("G", &c__0, &c__0, &d__[i__], &c_b1034, &c__1, nrhs, & + b[i__ + b_dim1], ldb, info); + ++(*rank); + } +/* L100: */ + } + +/* + Since B is complex, the following call to SGEMM is performed + in two steps (real and imaginary parts). That is for V * B + (in the real version of the code V' is stored in WORK). + + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, + $ WORK( NWORK ), N ) +*/ + + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + i__3 = jrow + jcol * b_dim1; + rwork[j] = b[i__3].r; +/* L110: */ + } +/* L120: */ + } + sgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwvt], n, &rwork[irwb], + n, &c_b328, &rwork[irwrb], n); + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + rwork[j] = r_imag(&b[jrow + jcol * b_dim1]); +/* L130: */ + } +/* L140: */ + } + sgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwvt], n, &rwork[irwb], + n, &c_b328, &rwork[irwib], n); + jreal = irwrb - 1; + jimag = irwib - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++jreal; + ++jimag; + i__3 = jrow + jcol * b_dim1; + i__4 = jreal; + i__5 = jimag; + q__1.r = rwork[i__4], q__1.i = rwork[i__5]; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L150: */ + } +/* L160: */ + } + +/* Unscale. */ + + slascl_("G", &c__0, &c__0, &c_b1034, &orgnrm, n, &c__1, &d__[1], n, + info); + slasrt_("D", n, &d__[1], info); + clascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, n, nrhs, &b[b_offset], + ldb, info); + + return 0; + } + +/* Book-keeping and setting up some constants. */ + + nlvl = (integer) (log((real) (*n) / (real) (*smlsiz + 1)) / log(2.f)) + 1; + + smlszp = *smlsiz + 1; + + u = 1; + vt = *smlsiz * *n + 1; + difl = vt + smlszp * *n; + difr = difl + nlvl * *n; + z__ = difr + (nlvl * *n << 1); + c__ = z__ + nlvl * *n; + s = c__ + *n; + poles = s + *n; + givnum = poles + (nlvl << 1) * *n; + nrwork = givnum + (nlvl << 1) * *n; + bx = 1; + + irwrb = nrwork; + irwib = irwrb + *smlsiz * *nrhs; + irwb = irwib + *smlsiz * *nrhs; + + sizei = *n + 1; + k = sizei + *n; + givptr = k + *n; + perm = givptr + *n; + givcol = perm + nlvl * *n; + iwk = givcol + (nlvl * *n << 1); + + st = 1; + sqre = 0; + icmpq1 = 1; + icmpq2 = 0; + nsub = 0; + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = d__[i__], dabs(r__1)) < eps) { + d__[i__] = r_sign(&eps, &d__[i__]); + } +/* L170: */ + } + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = e[i__], dabs(r__1)) < eps || i__ == nm1) { + ++nsub; + iwork[nsub] = st; + +/* + Subproblem found. First determine its size and then + apply divide and conquer on it. +*/ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else if ((r__1 = e[i__], dabs(r__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else { + +/* + A subproblem with E(NM1) small. This implies an + 1-by-1 subproblem at D(N), which is not solved + explicitly. +*/ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + ++nsub; + iwork[nsub] = *n; + iwork[sizei + nsub - 1] = 1; + ccopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n); + } + st1 = st - 1; + if (nsize == 1) { + +/* + This is a 1-by-1 subproblem and is not solved + explicitly. +*/ + + ccopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); + } else if (nsize <= *smlsiz) { + +/* This is a small subproblem and is solved by SLASDQ. */ + + slaset_("A", &nsize, &nsize, &c_b328, &c_b1034, &rwork[vt + + st1], n); + slaset_("A", &nsize, &nsize, &c_b328, &c_b1034, &rwork[u + + st1], n); + slasdq_("U", &c__0, &nsize, &nsize, &nsize, &c__0, &d__[st], & + e[st], &rwork[vt + st1], n, &rwork[u + st1], n, & + rwork[nrwork], &c__1, &rwork[nrwork], info) + ; + if (*info != 0) { + return 0; + } + +/* + In the real version, B is passed to SLASDQ and multiplied + internally by Q'. Here B is complex and that product is + computed below in two steps (real and imaginary parts). +*/ + + j = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L180: */ + } +/* L190: */ + } + sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[u + + st1], n, &rwork[irwb], &nsize, &c_b328, &rwork[irwrb], + &nsize); + j = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = r_imag(&b[jrow + jcol * b_dim1]); +/* L200: */ + } +/* L210: */ + } + sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[u + + st1], n, &rwork[irwb], &nsize, &c_b328, &rwork[irwib], + &nsize); + jreal = irwrb - 1; + jimag = irwib - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * b_dim1; + i__5 = jreal; + i__6 = jimag; + q__1.r = rwork[i__5], q__1.i = rwork[i__6]; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L220: */ + } +/* L230: */ + } + + clacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx + + st1], n); + } else { + +/* A large problem. Solve it using divide and conquer. */ + + slasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & + rwork[u + st1], n, &rwork[vt + st1], &iwork[k + st1], + &rwork[difl + st1], &rwork[difr + st1], &rwork[z__ + + st1], &rwork[poles + st1], &iwork[givptr + st1], & + iwork[givcol + st1], n, &iwork[perm + st1], &rwork[ + givnum + st1], &rwork[c__ + st1], &rwork[s + st1], & + rwork[nrwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + bxst = bx + st1; + clalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, & + work[bxst], n, &rwork[u + st1], n, &rwork[vt + st1], & + iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1] + , &rwork[z__ + st1], &rwork[poles + st1], &iwork[ + givptr + st1], &iwork[givcol + st1], n, &iwork[perm + + st1], &rwork[givnum + st1], &rwork[c__ + st1], &rwork[ + s + st1], &rwork[nrwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + } + st = i__ + 1; + } +/* L240: */ + } + +/* Apply the singular values and treat the tiny ones as zero. */ + + tol = rcnd * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1)); + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Some of the elements in D can be negative because 1-by-1 + subproblems were not solved explicitly. +*/ + + if ((r__1 = d__[i__], dabs(r__1)) <= tol) { + claset_("A", &c__1, nrhs, &c_b56, &c_b56, &work[bx + i__ - 1], n); + } else { + ++(*rank); + clascl_("G", &c__0, &c__0, &d__[i__], &c_b1034, &c__1, nrhs, & + work[bx + i__ - 1], n, info); + } + d__[i__] = (r__1 = d__[i__], dabs(r__1)); +/* L250: */ + } + +/* Now apply back the right singular vectors. */ + + icmpq2 = 1; + i__1 = nsub; + for (i__ = 1; i__ <= i__1; ++i__) { + st = iwork[i__]; + st1 = st - 1; + nsize = iwork[sizei + i__ - 1]; + bxst = bx + st1; + if (nsize == 1) { + ccopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb); + } else if (nsize <= *smlsiz) { + +/* + Since B and BX are complex, the following call to SGEMM + is performed in two steps (real and imaginary parts). + + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO, + $ B( ST, 1 ), LDB ) +*/ + + j = bxst - *n - 1; + jreal = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + j += *n; + i__3 = nsize; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++jreal; + i__4 = j + jrow; + rwork[jreal] = work[i__4].r; +/* L260: */ + } +/* L270: */ + } + sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[vt + st1], + n, &rwork[irwb], &nsize, &c_b328, &rwork[irwrb], &nsize); + j = bxst - *n - 1; + jimag = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + j += *n; + i__3 = nsize; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++jimag; + rwork[jimag] = r_imag(&work[j + jrow]); +/* L280: */ + } +/* L290: */ + } + sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[vt + st1], + n, &rwork[irwb], &nsize, &c_b328, &rwork[irwib], &nsize); + jreal = irwrb - 1; + jimag = irwib - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * b_dim1; + i__5 = jreal; + i__6 = jimag; + q__1.r = rwork[i__5], q__1.i = rwork[i__6]; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L300: */ + } +/* L310: */ + } + } else { + clalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st + + b_dim1], ldb, &rwork[u + st1], n, &rwork[vt + st1], & + iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1], & + rwork[z__ + st1], &rwork[poles + st1], &iwork[givptr + + st1], &iwork[givcol + st1], n, &iwork[perm + st1], &rwork[ + givnum + st1], &rwork[c__ + st1], &rwork[s + st1], &rwork[ + nrwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + } +/* L320: */ + } + +/* Unscale and sort the singular values. */ + + slascl_("G", &c__0, &c__0, &c_b1034, &orgnrm, n, &c__1, &d__[1], n, info); + slasrt_("D", n, &d__[1], info); + clascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, n, nrhs, &b[b_offset], ldb, + info); + + return 0; + +/* End of CLALSD */ + +} /* clalsd_ */ + +doublereal clange_(char *norm, integer *m, integer *n, complex *a, integer * + lda, real *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real ret_val, r__1, r__2; + + /* Local variables */ + static integer i__, j; + static real sum, scale; + extern logical lsame_(char *, char *); + static real value; + extern /* Subroutine */ int classq_(integer *, complex *, integer *, real + *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLANGE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + complex matrix A. + + Description + =========== + + CLANGE returns the value + + CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in CLANGE as described + above. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. When M = 0, + CLANGE is set to zero. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. When N = 0, + CLANGE is set to zero. + + A (input) COMPLEX array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(M,1). + + WORK (workspace) REAL array, dimension (MAX(1,LWORK)), + where LWORK >= M when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]); + value = dmax(r__1,r__2); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.f; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += c_abs(&a[i__ + j * a_dim1]); +/* L30: */ + } + value = dmax(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.f; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += c_abs(&a[i__ + j * a_dim1]); +/* L60: */ + } +/* L70: */ + } + value = 0.f; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = work[i__]; + value = dmax(r__1,r__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + classq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of CLANGE */ + +} /* clange_ */ + +doublereal clanhe_(char *norm, char *uplo, integer *n, complex *a, integer * + lda, real *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real ret_val, r__1, r__2, r__3; + + /* Local variables */ + static integer i__, j; + static real sum, absa, scale; + extern logical lsame_(char *, char *); + static real value; + extern /* Subroutine */ int classq_(integer *, complex *, integer *, real + *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLANHE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + complex hermitian matrix A. + + Description + =========== + + CLANHE returns the value + + CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in CLANHE as described + above. + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + hermitian matrix A is to be referenced. + = 'U': Upper triangular part of A is referenced + = 'L': Lower triangular part of A is referenced + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, CLANHE is + set to zero. + + A (input) COMPLEX array, dimension (LDA,N) + The hermitian matrix A. If UPLO = 'U', the leading n by n + upper triangular part of A contains the upper triangular part + of the matrix A, and the strictly lower triangular part of A + is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of A contains the lower triangular part of + the matrix A, and the strictly upper triangular part of A is + not referenced. Note that the imaginary parts of the diagonal + elements need not be set and are assumed to be zero. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) REAL array, dimension (MAX(1,LWORK)), + where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, + WORK is not referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]); + value = dmax(r__1,r__2); +/* L10: */ + } +/* Computing MAX */ + i__2 = j + j * a_dim1; + r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1)); + value = dmax(r__2,r__3); +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j + j * a_dim1; + r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1)); + value = dmax(r__2,r__3); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]); + value = dmax(r__1,r__2); +/* L30: */ + } +/* L40: */ + } + } + } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is hermitian). */ + + value = 0.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.f; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = c_abs(&a[i__ + j * a_dim1]); + sum += absa; + work[i__] += absa; +/* L50: */ + } + i__2 = j + j * a_dim1; + work[j] = sum + (r__1 = a[i__2].r, dabs(r__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = work[i__]; + value = dmax(r__1,r__2); +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.f; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * a_dim1; + sum = work[j] + (r__1 = a[i__2].r, dabs(r__1)); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = c_abs(&a[i__ + j * a_dim1]); + sum += absa; + work[i__] += absa; +/* L90: */ + } + value = dmax(value,sum); +/* L100: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + classq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); +/* L120: */ + } + } + sum *= 2; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + if (a[i__2].r != 0.f) { + i__2 = i__ + i__ * a_dim1; + absa = (r__1 = a[i__2].r, dabs(r__1)); + if (scale < absa) { +/* Computing 2nd power */ + r__1 = scale / absa; + sum = sum * (r__1 * r__1) + 1.f; + scale = absa; + } else { +/* Computing 2nd power */ + r__1 = absa / scale; + sum += r__1 * r__1; + } + } +/* L130: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of CLANHE */ + +} /* clanhe_ */ + +/* Subroutine */ int claqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8; + complex q__1, q__2, q__3, q__4, q__5; + + /* Local variables */ + static integer i__, k; + static real s; + static complex aa, bb, cc, dd; + static integer ld, nh, it, ks, kt, ku, kv, ls, ns, nw; + static complex tr2, det; + static integer inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, + nmin; + static complex swap; + static integer ktop; + static complex zdum[1] /* was [1][1] */; + static integer kacc22, itmax, nsmax, nwmax, kwtop; + extern /* Subroutine */ int claqr3_(logical *, logical *, integer *, + integer *, integer *, integer *, complex *, integer *, integer *, + integer *, complex *, integer *, integer *, integer *, complex *, + complex *, integer *, integer *, complex *, integer *, integer *, + complex *, integer *, complex *, integer *), claqr4_(logical *, + logical *, integer *, integer *, integer *, complex *, integer *, + complex *, integer *, integer *, complex *, integer *, complex *, + integer *, integer *), claqr5_(logical *, logical *, integer *, + integer *, integer *, integer *, integer *, complex *, complex *, + integer *, integer *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, integer *, complex *, integer *, + integer *, complex *, integer *); + static integer nibble; + extern /* Subroutine */ int clahqr_(logical *, logical *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + integer *, complex *, integer *, integer *), clacpy_(char *, + integer *, integer *, complex *, integer *, complex *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static char jbcmpz[2]; + static complex rtdisc; + static integer nwupbd; + static logical sorted; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Purpose + ======= + + CLAQR0 computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**H, where T is an upper triangular matrix (the + Schur form), and Z is the unitary matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input unitary + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, + H(ILO,ILO-1) is zero. ILO and IHI are normally set by a + previous call to CGEBAL, and then passed to CGEHRD when the + matrix output by CGEBAL is reduced to Hessenberg form. + Otherwise, ILO and IHI should be set to 1 and N, + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) COMPLEX array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and WANTT is .TRUE., then H + contains the upper triangular matrix T from the Schur + decomposition (the Schur form). If INFO = 0 and WANT is + .FALSE., then the contents of H are unspecified on exit. + (The output value of H when INFO.GT.0 is given under the + description of INFO below.) + + This subroutine may explicitly set H(i,j) = 0 for i.GT.j and + j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + W (output) COMPLEX array, dimension (N) + The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored + in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are + stored in the same order as on the diagonal of the Schur + form returned in H, with W(i) = H(i,i). + + Z (input/output) COMPLEX array, dimension (LDZ,IHI) + If WANTZ is .FALSE., then Z is not referenced. + If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is + replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the + orthogonal Schur factor of H(ILO:IHI,ILO:IHI). + (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if WANTZ is .TRUE. + then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. + + WORK (workspace/output) COMPLEX array, dimension LWORK + On exit, if LWORK = -1, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient, but LWORK typically as large as 6*N may + be required for optimal performance. A workspace query + to determine the optimal workspace size is recommended. + + If LWORK = -1, then CLAQR0 does a workspace query. + In this case, CLAQR0 checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: if INFO = i, CLAQR0 failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and WANT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is a unitary matrix. The final + value of H is upper Hessenberg and triangular in + rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + + (final value of Z(ILO:IHI,ILOZ:IHIZ) + = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U + + where U is the unitary matrix in (*) (regard- + less of the value of WANTT.) + + If INFO .GT. 0 and WANTZ is .FALSE., then Z is not + accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . CLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== Exceptional deflation windows: try to cure rare + . slow convergence by varying the size of the + . deflation window after KEXNW iterations. ==== + + ==== Exceptional shifts: try to cure rare slow convergence + . with ad-hoc exceptional shifts every KEXSH iterations. + . ==== + + ==== The constant WILK1 is used to form the exceptional + . shifts. ==== +*/ + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + +/* ==== Quick return for N = 0: nothing to do. ==== */ + + if (*n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + if (*n <= 11) { + +/* ==== Tiny matrices must use CLAHQR. ==== */ + + lwkopt = 1; + if (*lwork != -1) { + clahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], + iloz, ihiz, &z__[z_offset], ldz, info); + } + } else { + +/* + ==== Use small bulge multi-shift QR with aggressive early + . deflation on larger-than-tiny matrices. ==== + + ==== Hope for the best. ==== +*/ + + *info = 0; + +/* ==== Set up job flags for ILAENV. ==== */ + + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + +/* + ==== NWR = recommended deflation window size. At this + . point, N .GT. NTINY = 11, so there is enough + . subdiagonal workspace for NWR.GE.2 as required. + . (In fact, there is enough subdiagonal space for + . NWR.GE.3.) ==== +*/ + + nwr = ilaenv_(&c__13, "CLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nwr = max(2,nwr); +/* Computing MIN */ + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); + nwr = min(i__1,nwr); + +/* + ==== NSR = recommended number of simultaneous shifts. + . At this point N .GT. NTINY = 11, so there is at + . enough subdiagonal workspace for NSR to be even + . and greater than or equal to two as required. ==== +*/ + + nsr = ilaenv_(&c__15, "CLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); +/* Computing MIN */ + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - + *ilo; + nsr = min(i__1,i__2); +/* Computing MAX */ + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1,i__2); + +/* + ==== Estimate optimal workspace ==== + + ==== Workspace query call to CLAQR3 ==== +*/ + + i__1 = nwr + 1; + claqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, + ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[h_offset], + ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], ldh, &work[1], + &c_n1); + +/* + ==== Optimal workspace = MAX(CLAQR5, CLAQR3) ==== + + Computing MAX +*/ + i__1 = nsr * 3 / 2, i__2 = (integer) work[1].r; + lwkopt = max(i__1,i__2); + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + r__1 = (real) lwkopt; + q__1.r = r__1, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + return 0; + } + +/* ==== CLAHQR/CLAQR0 crossover point ==== */ + + nmin = ilaenv_(&c__12, "CLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen) + 6, (ftnlen)2); + nmin = max(11,nmin); + +/* ==== Nibble crossover point ==== */ + + nibble = ilaenv_(&c__14, "CLAQR0", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + nibble = max(0,nibble); + +/* + ==== Accumulate reflections during ttswp? Use block + . 2-by-2 structure during matrix-matrix multiply? ==== +*/ + + kacc22 = ilaenv_(&c__16, "CLAQR0", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + kacc22 = max(0,kacc22); + kacc22 = min(2,kacc22); + +/* + ==== NWMAX = the largest possible deflation window for + . which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1,i__2); + nw = nwmax; + +/* + ==== NSMAX = the Largest number of simultaneous shifts + . for which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1,i__2); + nsmax -= nsmax % 2; + +/* ==== NDFL: an iteration count restarted at deflation. ==== */ + + ndfl = 1; + +/* + ==== ITMAX = iteration limit ==== + + Computing MAX +*/ + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1,i__2) * 30; + +/* ==== Last row and column in the active block ==== */ + + kbot = *ihi; + +/* ==== Main Loop ==== */ + + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + +/* ==== Done when KBOT falls below ILO ==== */ + + if (kbot < *ilo) { + goto L80; + } + +/* ==== Locate active block ==== */ + + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + i__3 = k + (k - 1) * h_dim1; + if (h__[i__3].r == 0.f && h__[i__3].i == 0.f) { + goto L20; + } +/* L10: */ + } + k = *ilo; +L20: + ktop = k; + +/* + ==== Select deflation window size: + . Typical Case: + . If possible and advisable, nibble the entire + . active block. If not, use size MIN(NWR,NWMAX) + . or MIN(NWR+1,NWMAX) depending upon which has + . the smaller corresponding subdiagonal entry + . (a heuristic). + . + . Exceptional Case: + . If there have been no deflations in KEXNW or + . more iterations, then vary the deflation window + . size. At first, because, larger windows are, + . in general, more powerful than smaller ones, + . rapidly increase the window to the maximum possible. + . Then, gradually reduce the window size. ==== +*/ + + nh = kbot - ktop + 1; + nwupbd = min(nh,nwmax); + if (ndfl < 5) { + nw = min(nwupbd,nwr); + } else { +/* Computing MIN */ + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2,i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + i__2 = kwtop + (kwtop - 1) * h_dim1; + i__3 = kwtop - 1 + (kwtop - 2) * h_dim1; + if ((r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(& + h__[kwtop + (kwtop - 1) * h_dim1]), dabs(r__2)) > + (r__3 = h__[i__3].r, dabs(r__3)) + (r__4 = r_imag( + &h__[kwtop - 1 + (kwtop - 2) * h_dim1]), dabs( + r__4))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + +/* + ==== Aggressive early deflation: + . split workspace under the subdiagonal into + . - an nw-by-nw work array V in the lower + . left-hand-corner, + . - an NW-by-at-least-NW-but-more-is-better + . (NW-by-NHO) horizontal work array along + . the bottom edge, + . - an at-least-NW-but-more-is-better (NHV-by-NW) + . vertical work array along the left-hand-edge. + . ==== +*/ + + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + +/* ==== Aggressive early deflation ==== */ + + claqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, + iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[kv + + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], ldh, &nve, & + h__[kwv + h_dim1], ldh, &work[1], lwork); + +/* ==== Adjust KBOT accounting for new deflations. ==== */ + + kbot -= ld; + +/* ==== KS points to the shifts. ==== */ + + ks = kbot - ls + 1; + +/* + ==== Skip an expensive QR sweep if there is a (partly + . heuristic) reason to expect that many eigenvalues + . will deflate without it. Here, the QR sweep is + . skipped if many eigenvalues have just been deflated + . or if the remaining active block is small. +*/ + + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( + nmin,nwmax)) { + +/* + ==== NS = nominal number of simultaneous shifts. + . This may be lowered (slightly) if CLAQR3 + . did not provide that many shifts. ==== + + Computing MIN + Computing MAX +*/ + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); + ns = min(i__2,i__3); + ns -= ns % 2; + +/* + ==== If there have been no deflations + . in a multiple of KEXSH iterations, + . then try exceptional shifts. + . Otherwise use shifts provided by + . CLAQR3 above or from the eigenvalues + . of a trailing principal submatrix. ==== +*/ + + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; + i__2 = ks + 1; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + i__3 = i__; + i__4 = i__ + i__ * h_dim1; + i__5 = i__ + (i__ - 1) * h_dim1; + r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = + r_imag(&h__[i__ + (i__ - 1) * h_dim1]), dabs( + r__2))) * .75f; + q__1.r = h__[i__4].r + r__3, q__1.i = h__[i__4].i; + w[i__3].r = q__1.r, w[i__3].i = q__1.i; + i__3 = i__ - 1; + i__4 = i__; + w[i__3].r = w[i__4].r, w[i__3].i = w[i__4].i; +/* L30: */ + } + } else { + +/* + ==== Got NS/2 or fewer shifts? Use CLAQR4 or + . CLAHQR on a trailing principal submatrix to + . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, + . there is enough space below the subdiagonal + . to fit an NS-by-NS scratch array.) ==== +*/ + + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + clacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & + h__[kt + h_dim1], ldh); + if (ns > nmin) { + claqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ + kt + h_dim1], ldh, &w[ks], &c__1, &c__1, + zdum, &c__1, &work[1], lwork, &inf); + } else { + clahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ + kt + h_dim1], ldh, &w[ks], &c__1, &c__1, + zdum, &c__1, &inf); + } + ks += inf; + +/* + ==== In case of a rare QR failure use + . eigenvalues of the trailing 2-by-2 + . principal submatrix. Scale to avoid + . overflows, underflows and subnormals. + . (The scale factor S can not be zero, + . because H(KBOT,KBOT-1) is nonzero.) ==== +*/ + + if (ks >= kbot) { + i__2 = kbot - 1 + (kbot - 1) * h_dim1; + i__3 = kbot + (kbot - 1) * h_dim1; + i__4 = kbot - 1 + kbot * h_dim1; + i__5 = kbot + kbot * h_dim1; + s = (r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = + r_imag(&h__[kbot - 1 + (kbot - 1) * + h_dim1]), dabs(r__2)) + ((r__3 = h__[i__3] + .r, dabs(r__3)) + (r__4 = r_imag(&h__[ + kbot + (kbot - 1) * h_dim1]), dabs(r__4))) + + ((r__5 = h__[i__4].r, dabs(r__5)) + ( + r__6 = r_imag(&h__[kbot - 1 + kbot * + h_dim1]), dabs(r__6))) + ((r__7 = h__[ + i__5].r, dabs(r__7)) + (r__8 = r_imag(& + h__[kbot + kbot * h_dim1]), dabs(r__8))); + i__2 = kbot - 1 + (kbot - 1) * h_dim1; + q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i / + s; + aa.r = q__1.r, aa.i = q__1.i; + i__2 = kbot + (kbot - 1) * h_dim1; + q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i / + s; + cc.r = q__1.r, cc.i = q__1.i; + i__2 = kbot - 1 + kbot * h_dim1; + q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i / + s; + bb.r = q__1.r, bb.i = q__1.i; + i__2 = kbot + kbot * h_dim1; + q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i / + s; + dd.r = q__1.r, dd.i = q__1.i; + q__2.r = aa.r + dd.r, q__2.i = aa.i + dd.i; + q__1.r = q__2.r / 2.f, q__1.i = q__2.i / 2.f; + tr2.r = q__1.r, tr2.i = q__1.i; + q__3.r = aa.r - tr2.r, q__3.i = aa.i - tr2.i; + q__4.r = dd.r - tr2.r, q__4.i = dd.i - tr2.i; + q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, + q__2.i = q__3.r * q__4.i + q__3.i * + q__4.r; + q__5.r = bb.r * cc.r - bb.i * cc.i, q__5.i = bb.r + * cc.i + bb.i * cc.r; + q__1.r = q__2.r - q__5.r, q__1.i = q__2.i - + q__5.i; + det.r = q__1.r, det.i = q__1.i; + q__2.r = -det.r, q__2.i = -det.i; + c_sqrt(&q__1, &q__2); + rtdisc.r = q__1.r, rtdisc.i = q__1.i; + i__2 = kbot - 1; + q__2.r = tr2.r + rtdisc.r, q__2.i = tr2.i + + rtdisc.i; + q__1.r = s * q__2.r, q__1.i = s * q__2.i; + w[i__2].r = q__1.r, w[i__2].i = q__1.i; + i__2 = kbot; + q__2.r = tr2.r - rtdisc.r, q__2.i = tr2.i - + rtdisc.i; + q__1.r = s * q__2.r, q__1.i = s * q__2.i; + w[i__2].r = q__1.r, w[i__2].i = q__1.i; + + ks = kbot - 1; + } + } + + if (kbot - ks + 1 > ns) { + +/* ==== Sort the shifts (Helps a little) ==== */ + + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + i__4 = i__; + i__5 = i__ + 1; + if ((r__1 = w[i__4].r, dabs(r__1)) + (r__2 = + r_imag(&w[i__]), dabs(r__2)) < (r__3 = + w[i__5].r, dabs(r__3)) + (r__4 = + r_imag(&w[i__ + 1]), dabs(r__4))) { + sorted = FALSE_; + i__4 = i__; + swap.r = w[i__4].r, swap.i = w[i__4].i; + i__4 = i__; + i__5 = i__ + 1; + w[i__4].r = w[i__5].r, w[i__4].i = w[i__5] + .i; + i__4 = i__ + 1; + w[i__4].r = swap.r, w[i__4].i = swap.i; + } +/* L40: */ + } +/* L50: */ + } +L60: + ; + } + } + +/* + ==== If there are only two shifts, then use + . only one. ==== +*/ + + if (kbot - ks + 1 == 2) { + i__2 = kbot; + i__3 = kbot + kbot * h_dim1; + q__2.r = w[i__2].r - h__[i__3].r, q__2.i = w[i__2].i - + h__[i__3].i; + q__1.r = q__2.r, q__1.i = q__2.i; + i__4 = kbot - 1; + i__5 = kbot + kbot * h_dim1; + q__4.r = w[i__4].r - h__[i__5].r, q__4.i = w[i__4].i - + h__[i__5].i; + q__3.r = q__4.r, q__3.i = q__4.i; + if ((r__1 = q__1.r, dabs(r__1)) + (r__2 = r_imag(&q__1), + dabs(r__2)) < (r__3 = q__3.r, dabs(r__3)) + (r__4 + = r_imag(&q__3), dabs(r__4))) { + i__2 = kbot - 1; + i__3 = kbot; + w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i; + } else { + i__2 = kbot; + i__3 = kbot - 1; + w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i; + } + } + +/* + ==== Use up to NS of the the smallest magnatiude + . shifts. If there aren't NS shifts available, + . then use them all, possibly dropping one to + . make the number of shifts even. ==== + + Computing MIN +*/ + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2,i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + +/* + ==== Small-bulge multi-shift QR sweep: + . split workspace under the subdiagonal into + . - a KDU-by-KDU work array U in the lower + . left-hand-corner, + . - a KDU-by-at-least-KDU-but-more-is-better + . (KDU-by-NHo) horizontal work array WH along + . the bottom edge, + . - and an at-least-KDU-but-more-is-better-by-KDU + . (NVE-by-KDU) vertical work WV arrow along + . the left-hand-edge. ==== +*/ + + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + +/* ==== Small-bulge multi-shift QR sweep ==== */ + + claqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &w[ks], & + h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, & + work[1], &c__3, &h__[ku + h_dim1], ldh, &nve, &h__[ + kwv + h_dim1], ldh, &nho, &h__[ku + kwh * h_dim1], + ldh); + } + +/* ==== Note progress (or the lack of it). ==== */ + + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + +/* + ==== End of main loop ==== + L70: +*/ + } + +/* + ==== Iteration limit exceeded. Set INFO to show where + . the problem occurred and exit. ==== +*/ + + *info = kbot; +L80: + ; + } + +/* ==== Return the optimal value of LWORK. ==== */ + + r__1 = (real) lwkopt; + q__1.r = r__1, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + +/* ==== End of CLAQR0 ==== */ + + return 0; +} /* claqr0_ */ + +/* Subroutine */ int claqr1_(integer *n, complex *h__, integer *ldh, complex * + s1, complex *s2, complex *v) +{ + /* System generated locals */ + integer h_dim1, h_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3, r__4, r__5, r__6; + complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8; + + /* Local variables */ + static real s; + static complex h21s, h31s; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Given a 2-by-2 or 3-by-3 matrix H, CLAQR1 sets v to a + scalar multiple of the first column of the product + + (*) K = (H - s1*I)*(H - s2*I) + + scaling to avoid overflows and most underflows. + + This is useful for starting double implicit shift bulges + in the QR algorithm. + + + N (input) integer + Order of the matrix H. N must be either 2 or 3. + + H (input) COMPLEX array of dimension (LDH,N) + The 2-by-2 or 3-by-3 matrix H in (*). + + LDH (input) integer + The leading dimension of H as declared in + the calling procedure. LDH.GE.N + + S1 (input) COMPLEX + S2 S1 and S2 are the shifts defining K in (*) above. + + V (output) COMPLEX array of dimension N + A scalar multiple of the first column of the + matrix K in (*). + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --v; + + /* Function Body */ + if (*n == 2) { + i__1 = h_dim1 + 1; + q__2.r = h__[i__1].r - s2->r, q__2.i = h__[i__1].i - s2->i; + q__1.r = q__2.r, q__1.i = q__2.i; + i__2 = h_dim1 + 2; + s = (r__1 = q__1.r, dabs(r__1)) + (r__2 = r_imag(&q__1), dabs(r__2)) + + ((r__3 = h__[i__2].r, dabs(r__3)) + (r__4 = r_imag(&h__[ + h_dim1 + 2]), dabs(r__4))); + if (s == 0.f) { + v[1].r = 0.f, v[1].i = 0.f; + v[2].r = 0.f, v[2].i = 0.f; + } else { + i__1 = h_dim1 + 2; + q__1.r = h__[i__1].r / s, q__1.i = h__[i__1].i / s; + h21s.r = q__1.r, h21s.i = q__1.i; + i__1 = (h_dim1 << 1) + 1; + q__2.r = h21s.r * h__[i__1].r - h21s.i * h__[i__1].i, q__2.i = + h21s.r * h__[i__1].i + h21s.i * h__[i__1].r; + i__2 = h_dim1 + 1; + q__4.r = h__[i__2].r - s1->r, q__4.i = h__[i__2].i - s1->i; + i__3 = h_dim1 + 1; + q__6.r = h__[i__3].r - s2->r, q__6.i = h__[i__3].i - s2->i; + q__5.r = q__6.r / s, q__5.i = q__6.i / s; + q__3.r = q__4.r * q__5.r - q__4.i * q__5.i, q__3.i = q__4.r * + q__5.i + q__4.i * q__5.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + v[1].r = q__1.r, v[1].i = q__1.i; + i__1 = h_dim1 + 1; + i__2 = (h_dim1 << 1) + 2; + q__4.r = h__[i__1].r + h__[i__2].r, q__4.i = h__[i__1].i + h__[ + i__2].i; + q__3.r = q__4.r - s1->r, q__3.i = q__4.i - s1->i; + q__2.r = q__3.r - s2->r, q__2.i = q__3.i - s2->i; + q__1.r = h21s.r * q__2.r - h21s.i * q__2.i, q__1.i = h21s.r * + q__2.i + h21s.i * q__2.r; + v[2].r = q__1.r, v[2].i = q__1.i; + } + } else { + i__1 = h_dim1 + 1; + q__2.r = h__[i__1].r - s2->r, q__2.i = h__[i__1].i - s2->i; + q__1.r = q__2.r, q__1.i = q__2.i; + i__2 = h_dim1 + 2; + i__3 = h_dim1 + 3; + s = (r__1 = q__1.r, dabs(r__1)) + (r__2 = r_imag(&q__1), dabs(r__2)) + + ((r__3 = h__[i__2].r, dabs(r__3)) + (r__4 = r_imag(&h__[ + h_dim1 + 2]), dabs(r__4))) + ((r__5 = h__[i__3].r, dabs(r__5)) + + (r__6 = r_imag(&h__[h_dim1 + 3]), dabs(r__6))); + if (s == 0.f) { + v[1].r = 0.f, v[1].i = 0.f; + v[2].r = 0.f, v[2].i = 0.f; + v[3].r = 0.f, v[3].i = 0.f; + } else { + i__1 = h_dim1 + 2; + q__1.r = h__[i__1].r / s, q__1.i = h__[i__1].i / s; + h21s.r = q__1.r, h21s.i = q__1.i; + i__1 = h_dim1 + 3; + q__1.r = h__[i__1].r / s, q__1.i = h__[i__1].i / s; + h31s.r = q__1.r, h31s.i = q__1.i; + i__1 = h_dim1 + 1; + q__4.r = h__[i__1].r - s1->r, q__4.i = h__[i__1].i - s1->i; + i__2 = h_dim1 + 1; + q__6.r = h__[i__2].r - s2->r, q__6.i = h__[i__2].i - s2->i; + q__5.r = q__6.r / s, q__5.i = q__6.i / s; + q__3.r = q__4.r * q__5.r - q__4.i * q__5.i, q__3.i = q__4.r * + q__5.i + q__4.i * q__5.r; + i__3 = (h_dim1 << 1) + 1; + q__7.r = h__[i__3].r * h21s.r - h__[i__3].i * h21s.i, q__7.i = + h__[i__3].r * h21s.i + h__[i__3].i * h21s.r; + q__2.r = q__3.r + q__7.r, q__2.i = q__3.i + q__7.i; + i__4 = h_dim1 * 3 + 1; + q__8.r = h__[i__4].r * h31s.r - h__[i__4].i * h31s.i, q__8.i = + h__[i__4].r * h31s.i + h__[i__4].i * h31s.r; + q__1.r = q__2.r + q__8.r, q__1.i = q__2.i + q__8.i; + v[1].r = q__1.r, v[1].i = q__1.i; + i__1 = h_dim1 + 1; + i__2 = (h_dim1 << 1) + 2; + q__5.r = h__[i__1].r + h__[i__2].r, q__5.i = h__[i__1].i + h__[ + i__2].i; + q__4.r = q__5.r - s1->r, q__4.i = q__5.i - s1->i; + q__3.r = q__4.r - s2->r, q__3.i = q__4.i - s2->i; + q__2.r = h21s.r * q__3.r - h21s.i * q__3.i, q__2.i = h21s.r * + q__3.i + h21s.i * q__3.r; + i__3 = h_dim1 * 3 + 2; + q__6.r = h__[i__3].r * h31s.r - h__[i__3].i * h31s.i, q__6.i = + h__[i__3].r * h31s.i + h__[i__3].i * h31s.r; + q__1.r = q__2.r + q__6.r, q__1.i = q__2.i + q__6.i; + v[2].r = q__1.r, v[2].i = q__1.i; + i__1 = h_dim1 + 1; + i__2 = h_dim1 * 3 + 3; + q__5.r = h__[i__1].r + h__[i__2].r, q__5.i = h__[i__1].i + h__[ + i__2].i; + q__4.r = q__5.r - s1->r, q__4.i = q__5.i - s1->i; + q__3.r = q__4.r - s2->r, q__3.i = q__4.i - s2->i; + q__2.r = h31s.r * q__3.r - h31s.i * q__3.i, q__2.i = h31s.r * + q__3.i + h31s.i * q__3.r; + i__3 = (h_dim1 << 1) + 3; + q__6.r = h21s.r * h__[i__3].r - h21s.i * h__[i__3].i, q__6.i = + h21s.r * h__[i__3].i + h21s.i * h__[i__3].r; + q__1.r = q__2.r + q__6.r, q__1.i = q__2.i + q__6.i; + v[3].r = q__1.r, v[3].i = q__1.i; + } + } + return 0; +} /* claqr1_ */ + +/* Subroutine */ int claqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * + ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, + complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, + complex *work, integer *lwork) +{ + /* System generated locals */ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3, r__4, r__5, r__6; + complex q__1, q__2; + + /* Local variables */ + static integer i__, j; + static complex s; + static integer jw; + static real foo; + static integer kln; + static complex tau; + static integer knt; + static real ulp; + static integer lwk1, lwk2; + static complex beta; + static integer kcol, info, ifst, ilst, ltop, krow; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *), + cgemm_(char *, char *, integer *, integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, complex *, + integer *), ccopy_(integer *, complex *, integer + *, complex *, integer *); + static integer infqr, kwtop; + extern /* Subroutine */ int slabad_(real *, real *), cgehrd_(integer *, + integer *, integer *, complex *, integer *, complex *, complex *, + integer *, integer *), clarfg_(integer *, complex *, complex *, + integer *, complex *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int clahqr_(logical *, logical *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + integer *, complex *, integer *, integer *), clacpy_(char *, + integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex + *, complex *, integer *); + static real safmin, safmax; + extern /* Subroutine */ int ctrexc_(char *, integer *, complex *, integer + *, complex *, integer *, integer *, integer *, integer *), + cunmhr_(char *, char *, integer *, integer *, integer *, integer + *, complex *, integer *, complex *, complex *, integer *, complex + *, integer *, integer *); + static real smlnum; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2.1) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + -- April 2009 -- + + + This subroutine is identical to CLAQR3 except that it avoids + recursion by calling CLAHQR instead of CLAQR4. + + + ****************************************************************** + Aggressive early deflation: + + This subroutine accepts as input an upper Hessenberg matrix + H and performs an unitary similarity transformation + designed to detect and deflate fully converged eigenvalues from + a trailing principal submatrix. On output H has been over- + written by a new Hessenberg matrix that is a perturbation of + an unitary similarity transformation of H. It is to be + hoped that the final version of H has many zero subdiagonal + entries. + + ****************************************************************** + WANTT (input) LOGICAL + If .TRUE., then the Hessenberg matrix H is fully updated + so that the triangular Schur factor may be + computed (in cooperation with the calling subroutine). + If .FALSE., then only enough of H is updated to preserve + the eigenvalues. + + WANTZ (input) LOGICAL + If .TRUE., then the unitary matrix Z is updated so + so that the unitary Schur factor may be computed + (in cooperation with the calling subroutine). + If .FALSE., then Z is not referenced. + + N (input) INTEGER + The order of the matrix H and (if WANTZ is .TRUE.) the + order of the unitary matrix Z. + + KTOP (input) INTEGER + It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. + KBOT and KTOP together determine an isolated block + along the diagonal of the Hessenberg matrix. + + KBOT (input) INTEGER + It is assumed without a check that either + KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together + determine an isolated block along the diagonal of the + Hessenberg matrix. + + NW (input) INTEGER + Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). + + H (input/output) COMPLEX array, dimension (LDH,N) + On input the initial N-by-N section of H stores the + Hessenberg matrix undergoing aggressive early deflation. + On output H has been transformed by a unitary + similarity transformation, perturbed, and the returned + to Hessenberg form that (it is to be hoped) has some + zero subdiagonal entries. + + LDH (input) integer + Leading dimension of H just as declared in the calling + subroutine. N .LE. LDH + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. + + Z (input/output) COMPLEX array, dimension (LDZ,N) + IF WANTZ is .TRUE., then on output, the unitary + similarity transformation mentioned above has been + accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ is .FALSE., then Z is unreferenced. + + LDZ (input) integer + The leading dimension of Z just as declared in the + calling subroutine. 1 .LE. LDZ. + + NS (output) integer + The number of unconverged (ie approximate) eigenvalues + returned in SR and SI that may be used as shifts by the + calling subroutine. + + ND (output) integer + The number of converged eigenvalues uncovered by this + subroutine. + + SH (output) COMPLEX array, dimension KBOT + On output, approximate eigenvalues that may + be used for shifts are stored in SH(KBOT-ND-NS+1) + through SR(KBOT-ND). Converged eigenvalues are + stored in SH(KBOT-ND+1) through SH(KBOT). + + V (workspace) COMPLEX array, dimension (LDV,NW) + An NW-by-NW work array. + + LDV (input) integer scalar + The leading dimension of V just as declared in the + calling subroutine. NW .LE. LDV + + NH (input) integer scalar + The number of columns of T. NH.GE.NW. + + T (workspace) COMPLEX array, dimension (LDT,NW) + + LDT (input) integer + The leading dimension of T just as declared in the + calling subroutine. NW .LE. LDT + + NV (input) integer + The number of rows of work array WV available for + workspace. NV.GE.NW. + + WV (workspace) COMPLEX array, dimension (LDWV,NW) + + LDWV (input) integer + The leading dimension of W just as declared in the + calling subroutine. NW .LE. LDV + + WORK (workspace) COMPLEX array, dimension LWORK. + On exit, WORK(1) is set to an estimate of the optimal value + of LWORK for the given values of N, NW, KTOP and KBOT. + + LWORK (input) integer + The dimension of the work array WORK. LWORK = 2*NW + suffices, but greater efficiency may result from larger + values of LWORK. + + If LWORK = -1, then a workspace query is assumed; CLAQR2 + only estimates the optimal workspace size for the given + values of N, NW, KTOP and KBOT. The estimate is returned + in WORK(1). No error message related to LWORK is issued + by XERBLA. Neither H nor Z are accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + + ==== Estimate optimal workspace. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sh; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + + /* Function Body */ +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + +/* ==== Workspace query call to CGEHRD ==== */ + + i__1 = jw - 1; + cgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & + c_n1, &info); + lwk1 = (integer) work[1].r; + +/* ==== Workspace query call to CUNMHR ==== */ + + i__1 = jw - 1; + cunmhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[1], &c_n1, &info); + lwk2 = (integer) work[1].r; + +/* ==== Optimal workspace ==== */ + + lwkopt = jw + max(lwk1,lwk2); + } + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + r__1 = (real) lwkopt; + q__1.r = r__1, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + return 0; + } + +/* + ==== Nothing to do ... + ... for an empty active block ... ==== +*/ + *ns = 0; + *nd = 0; + work[1].r = 1.f, work[1].i = 0.f; + if (*ktop > *kbot) { + return 0; + } +/* ... nor for an empty deflation window. ==== */ + if (*nw < 1) { + return 0; + } + +/* ==== Machine constants ==== */ + + safmin = slamch_("SAFE MINIMUM"); + safmax = 1.f / safmin; + slabad_(&safmin, &safmax); + ulp = slamch_("PRECISION"); + smlnum = safmin * ((real) (*n) / ulp); + +/* + ==== Setup deflation window ==== + + Computing MIN +*/ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s.r = 0.f, s.i = 0.f; + } else { + i__1 = kwtop + (kwtop - 1) * h_dim1; + s.r = h__[i__1].r, s.i = h__[i__1].i; + } + + if (*kbot == kwtop) { + +/* ==== 1-by-1 deflation window: not much to do ==== */ + + i__1 = kwtop; + i__2 = kwtop + kwtop * h_dim1; + sh[i__1].r = h__[i__2].r, sh[i__1].i = h__[i__2].i; + *ns = 1; + *nd = 0; +/* Computing MAX */ + i__1 = kwtop + kwtop * h_dim1; + r__5 = smlnum, r__6 = ulp * ((r__1 = h__[i__1].r, dabs(r__1)) + (r__2 + = r_imag(&h__[kwtop + kwtop * h_dim1]), dabs(r__2))); + if ((r__3 = s.r, dabs(r__3)) + (r__4 = r_imag(&s), dabs(r__4)) <= + dmax(r__5,r__6)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + i__1 = kwtop + (kwtop - 1) * h_dim1; + h__[i__1].r = 0.f, h__[i__1].i = 0.f; + } + } + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + +/* + ==== Convert to spike-triangular form. (In case of a + . rare QR failure, this routine continues to do + . aggressive early deflation using that part of + . the deflation window that converged using INFQR + . here and there to keep track.) ==== +*/ + + clacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], + ldt); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + ccopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & + i__3); + + claset_("A", &jw, &jw, &c_b56, &c_b57, &v[v_offset], ldv); + clahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[kwtop], + &c__1, &jw, &v[v_offset], ldv, &infqr); + +/* ==== Deflation detection loop ==== */ + + *ns = jw; + ilst = infqr + 1; + i__1 = jw; + for (knt = infqr + 1; knt <= i__1; ++knt) { + +/* ==== Small spike tip deflation test ==== */ + + i__2 = *ns + *ns * t_dim1; + foo = (r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[*ns + *ns * + t_dim1]), dabs(r__2)); + if (foo == 0.f) { + foo = (r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2)); + } + i__2 = *ns * v_dim1 + 1; +/* Computing MAX */ + r__5 = smlnum, r__6 = ulp * foo; + if (((r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2))) * (( + r__3 = v[i__2].r, dabs(r__3)) + (r__4 = r_imag(&v[*ns * + v_dim1 + 1]), dabs(r__4))) <= dmax(r__5,r__6)) { + +/* ==== One more converged eigenvalue ==== */ + + --(*ns); + } else { + +/* + ==== One undeflatable eigenvalue. Move it up out of the + . way. (CTREXC can not fail in this case.) ==== +*/ + + ifst = *ns; + ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, & + ilst, &info); + ++ilst; + } +/* L10: */ + } + +/* ==== Return to Hessenberg form ==== */ + + if (*ns == 0) { + s.r = 0.f, s.i = 0.f; + } + + if (*ns < jw) { + +/* + ==== sorting the diagonal of T improves accuracy for + . graded matrices. ==== +*/ + + i__1 = *ns; + for (i__ = infqr + 1; i__ <= i__1; ++i__) { + ifst = i__; + i__2 = *ns; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = j + j * t_dim1; + i__4 = ifst + ifst * t_dim1; + if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[j + j * + t_dim1]), dabs(r__2)) > (r__3 = t[i__4].r, dabs(r__3) + ) + (r__4 = r_imag(&t[ifst + ifst * t_dim1]), dabs( + r__4))) { + ifst = j; + } +/* L20: */ + } + ilst = i__; + if (ifst != ilst) { + ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &info); + } +/* L30: */ + } + } + +/* ==== Restore shift/eigenvalue array from T ==== */ + + i__1 = jw; + for (i__ = infqr + 1; i__ <= i__1; ++i__) { + i__2 = kwtop + i__ - 1; + i__3 = i__ + i__ * t_dim1; + sh[i__2].r = t[i__3].r, sh[i__2].i = t[i__3].i; +/* L40: */ + } + + + if (*ns < jw || s.r == 0.f && s.i == 0.f) { + if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) { + +/* ==== Reflect spike back into lower triangle ==== */ + + ccopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + i__1 = *ns; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + r_cnjg(&q__1, &work[i__]); + work[i__2].r = q__1.r, work[i__2].i = q__1.i; +/* L50: */ + } + beta.r = work[1].r, beta.i = work[1].i; + clarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1].r = 1.f, work[1].i = 0.f; + + i__1 = jw - 2; + i__2 = jw - 2; + claset_("L", &i__1, &i__2, &c_b56, &c_b56, &t[t_dim1 + 3], ldt); + + r_cnjg(&q__1, &tau); + clarf_("L", ns, &jw, &work[1], &c__1, &q__1, &t[t_offset], ldt, & + work[jw + 1]); + clarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + clarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & + work[jw + 1]); + + i__1 = *lwork - jw; + cgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] + , &i__1, &info); + } + +/* ==== Copy updated reduced window into place ==== */ + + if (kwtop > 1) { + i__1 = kwtop + (kwtop - 1) * h_dim1; + r_cnjg(&q__2, &v[v_dim1 + 1]); + q__1.r = s.r * q__2.r - s.i * q__2.i, q__1.i = s.r * q__2.i + s.i + * q__2.r; + h__[i__1].r = q__1.r, h__[i__1].i = q__1.i; + } + clacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] + , ldh); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + ccopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], + &i__3); + +/* + ==== Accumulate orthogonal matrix in order update + . H and Z, if requested. ==== +*/ + + if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) { + i__1 = *lwork - jw; + cunmhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[jw + 1], &i__1, &info); + } + +/* ==== Update vertical slab in H ==== */ + + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3,i__4); + cgemm_("N", "N", &kln, &jw, &jw, &c_b57, &h__[krow + kwtop * + h_dim1], ldh, &v[v_offset], ldv, &c_b56, &wv[wv_offset], + ldwv); + clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * + h_dim1], ldh); +/* L60: */ + } + +/* ==== Update horizontal slab in H ==== */ + + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; + kcol += i__1) { +/* Computing MIN */ + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3,i__4); + cgemm_("C", "N", &jw, &kln, &jw, &c_b57, &v[v_offset], ldv, & + h__[kwtop + kcol * h_dim1], ldh, &c_b56, &t[t_offset], + ldt); + clacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * + h_dim1], ldh); +/* L70: */ + } + } + +/* ==== Update vertical slab in Z ==== */ + + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3,i__4); + cgemm_("N", "N", &kln, &jw, &jw, &c_b57, &z__[krow + kwtop * + z_dim1], ldz, &v[v_offset], ldv, &c_b56, &wv[ + wv_offset], ldwv); + clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + + kwtop * z_dim1], ldz); +/* L80: */ + } + } + } + +/* ==== Return the number of deflations ... ==== */ + + *nd = jw - *ns; + +/* + ==== ... and the number of shifts. (Subtracting + . INFQR from the spike length takes care + . of the case of a rare QR failure while + . calculating eigenvalues of the deflation + . window.) ==== +*/ + + *ns -= infqr; + +/* ==== Return optimal workspace. ==== */ + + r__1 = (real) lwkopt; + q__1.r = r__1, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + +/* ==== End of CLAQR2 ==== */ + + return 0; +} /* claqr2_ */ + +/* Subroutine */ int claqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * + ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, + complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, + complex *work, integer *lwork) +{ + /* System generated locals */ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3, r__4, r__5, r__6; + complex q__1, q__2; + + /* Local variables */ + static integer i__, j; + static complex s; + static integer jw; + static real foo; + static integer kln; + static complex tau; + static integer knt; + static real ulp; + static integer lwk1, lwk2, lwk3; + static complex beta; + static integer kcol, info, nmin, ifst, ilst, ltop, krow; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *), + cgemm_(char *, char *, integer *, integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, complex *, + integer *), ccopy_(integer *, complex *, integer + *, complex *, integer *); + static integer infqr, kwtop; + extern /* Subroutine */ int claqr4_(logical *, logical *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + integer *, complex *, integer *, complex *, integer *, integer *), + slabad_(real *, real *), cgehrd_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *) + , clarfg_(integer *, complex *, complex *, integer *, complex *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int clahqr_(logical *, logical *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + integer *, complex *, integer *, integer *), clacpy_(char *, + integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex + *, complex *, integer *); + static real safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static real safmax; + extern /* Subroutine */ int ctrexc_(char *, integer *, complex *, integer + *, complex *, integer *, integer *, integer *, integer *), + cunmhr_(char *, char *, integer *, integer *, integer *, integer + *, complex *, integer *, complex *, complex *, integer *, complex + *, integer *, integer *); + static real smlnum; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2.1) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + -- April 2009 -- + + + ****************************************************************** + Aggressive early deflation: + + This subroutine accepts as input an upper Hessenberg matrix + H and performs an unitary similarity transformation + designed to detect and deflate fully converged eigenvalues from + a trailing principal submatrix. On output H has been over- + written by a new Hessenberg matrix that is a perturbation of + an unitary similarity transformation of H. It is to be + hoped that the final version of H has many zero subdiagonal + entries. + + ****************************************************************** + WANTT (input) LOGICAL + If .TRUE., then the Hessenberg matrix H is fully updated + so that the triangular Schur factor may be + computed (in cooperation with the calling subroutine). + If .FALSE., then only enough of H is updated to preserve + the eigenvalues. + + WANTZ (input) LOGICAL + If .TRUE., then the unitary matrix Z is updated so + so that the unitary Schur factor may be computed + (in cooperation with the calling subroutine). + If .FALSE., then Z is not referenced. + + N (input) INTEGER + The order of the matrix H and (if WANTZ is .TRUE.) the + order of the unitary matrix Z. + + KTOP (input) INTEGER + It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. + KBOT and KTOP together determine an isolated block + along the diagonal of the Hessenberg matrix. + + KBOT (input) INTEGER + It is assumed without a check that either + KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together + determine an isolated block along the diagonal of the + Hessenberg matrix. + + NW (input) INTEGER + Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). + + H (input/output) COMPLEX array, dimension (LDH,N) + On input the initial N-by-N section of H stores the + Hessenberg matrix undergoing aggressive early deflation. + On output H has been transformed by a unitary + similarity transformation, perturbed, and the returned + to Hessenberg form that (it is to be hoped) has some + zero subdiagonal entries. + + LDH (input) integer + Leading dimension of H just as declared in the calling + subroutine. N .LE. LDH + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. + + Z (input/output) COMPLEX array, dimension (LDZ,N) + IF WANTZ is .TRUE., then on output, the unitary + similarity transformation mentioned above has been + accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ is .FALSE., then Z is unreferenced. + + LDZ (input) integer + The leading dimension of Z just as declared in the + calling subroutine. 1 .LE. LDZ. + + NS (output) integer + The number of unconverged (ie approximate) eigenvalues + returned in SR and SI that may be used as shifts by the + calling subroutine. + + ND (output) integer + The number of converged eigenvalues uncovered by this + subroutine. + + SH (output) COMPLEX array, dimension KBOT + On output, approximate eigenvalues that may + be used for shifts are stored in SH(KBOT-ND-NS+1) + through SR(KBOT-ND). Converged eigenvalues are + stored in SH(KBOT-ND+1) through SH(KBOT). + + V (workspace) COMPLEX array, dimension (LDV,NW) + An NW-by-NW work array. + + LDV (input) integer scalar + The leading dimension of V just as declared in the + calling subroutine. NW .LE. LDV + + NH (input) integer scalar + The number of columns of T. NH.GE.NW. + + T (workspace) COMPLEX array, dimension (LDT,NW) + + LDT (input) integer + The leading dimension of T just as declared in the + calling subroutine. NW .LE. LDT + + NV (input) integer + The number of rows of work array WV available for + workspace. NV.GE.NW. + + WV (workspace) COMPLEX array, dimension (LDWV,NW) + + LDWV (input) integer + The leading dimension of W just as declared in the + calling subroutine. NW .LE. LDV + + WORK (workspace) COMPLEX array, dimension LWORK. + On exit, WORK(1) is set to an estimate of the optimal value + of LWORK for the given values of N, NW, KTOP and KBOT. + + LWORK (input) integer + The dimension of the work array WORK. LWORK = 2*NW + suffices, but greater efficiency may result from larger + values of LWORK. + + If LWORK = -1, then a workspace query is assumed; CLAQR3 + only estimates the optimal workspace size for the given + values of N, NW, KTOP and KBOT. The estimate is returned + in WORK(1). No error message related to LWORK is issued + by XERBLA. Neither H nor Z are accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + + ==== Estimate optimal workspace. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sh; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + + /* Function Body */ +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + +/* ==== Workspace query call to CGEHRD ==== */ + + i__1 = jw - 1; + cgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & + c_n1, &info); + lwk1 = (integer) work[1].r; + +/* ==== Workspace query call to CUNMHR ==== */ + + i__1 = jw - 1; + cunmhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[1], &c_n1, &info); + lwk2 = (integer) work[1].r; + +/* ==== Workspace query call to CLAQR4 ==== */ + + claqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[1], + &c__1, &jw, &v[v_offset], ldv, &work[1], &c_n1, &infqr); + lwk3 = (integer) work[1].r; + +/* + ==== Optimal workspace ==== + + Computing MAX +*/ + i__1 = jw + max(lwk1,lwk2); + lwkopt = max(i__1,lwk3); + } + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + r__1 = (real) lwkopt; + q__1.r = r__1, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + return 0; + } + +/* + ==== Nothing to do ... + ... for an empty active block ... ==== +*/ + *ns = 0; + *nd = 0; + work[1].r = 1.f, work[1].i = 0.f; + if (*ktop > *kbot) { + return 0; + } +/* ... nor for an empty deflation window. ==== */ + if (*nw < 1) { + return 0; + } + +/* ==== Machine constants ==== */ + + safmin = slamch_("SAFE MINIMUM"); + safmax = 1.f / safmin; + slabad_(&safmin, &safmax); + ulp = slamch_("PRECISION"); + smlnum = safmin * ((real) (*n) / ulp); + +/* + ==== Setup deflation window ==== + + Computing MIN +*/ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s.r = 0.f, s.i = 0.f; + } else { + i__1 = kwtop + (kwtop - 1) * h_dim1; + s.r = h__[i__1].r, s.i = h__[i__1].i; + } + + if (*kbot == kwtop) { + +/* ==== 1-by-1 deflation window: not much to do ==== */ + + i__1 = kwtop; + i__2 = kwtop + kwtop * h_dim1; + sh[i__1].r = h__[i__2].r, sh[i__1].i = h__[i__2].i; + *ns = 1; + *nd = 0; +/* Computing MAX */ + i__1 = kwtop + kwtop * h_dim1; + r__5 = smlnum, r__6 = ulp * ((r__1 = h__[i__1].r, dabs(r__1)) + (r__2 + = r_imag(&h__[kwtop + kwtop * h_dim1]), dabs(r__2))); + if ((r__3 = s.r, dabs(r__3)) + (r__4 = r_imag(&s), dabs(r__4)) <= + dmax(r__5,r__6)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + i__1 = kwtop + (kwtop - 1) * h_dim1; + h__[i__1].r = 0.f, h__[i__1].i = 0.f; + } + } + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + +/* + ==== Convert to spike-triangular form. (In case of a + . rare QR failure, this routine continues to do + . aggressive early deflation using that part of + . the deflation window that converged using INFQR + . here and there to keep track.) ==== +*/ + + clacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], + ldt); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + ccopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & + i__3); + + claset_("A", &jw, &jw, &c_b56, &c_b57, &v[v_offset], ldv); + nmin = ilaenv_(&c__12, "CLAQR3", "SV", &jw, &c__1, &jw, lwork, (ftnlen)6, + (ftnlen)2); + if (jw > nmin) { + claqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[ + kwtop], &c__1, &jw, &v[v_offset], ldv, &work[1], lwork, & + infqr); + } else { + clahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[ + kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr); + } + +/* ==== Deflation detection loop ==== */ + + *ns = jw; + ilst = infqr + 1; + i__1 = jw; + for (knt = infqr + 1; knt <= i__1; ++knt) { + +/* ==== Small spike tip deflation test ==== */ + + i__2 = *ns + *ns * t_dim1; + foo = (r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[*ns + *ns * + t_dim1]), dabs(r__2)); + if (foo == 0.f) { + foo = (r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2)); + } + i__2 = *ns * v_dim1 + 1; +/* Computing MAX */ + r__5 = smlnum, r__6 = ulp * foo; + if (((r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2))) * (( + r__3 = v[i__2].r, dabs(r__3)) + (r__4 = r_imag(&v[*ns * + v_dim1 + 1]), dabs(r__4))) <= dmax(r__5,r__6)) { + +/* ==== One more converged eigenvalue ==== */ + + --(*ns); + } else { + +/* + ==== One undeflatable eigenvalue. Move it up out of the + . way. (CTREXC can not fail in this case.) ==== +*/ + + ifst = *ns; + ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, & + ilst, &info); + ++ilst; + } +/* L10: */ + } + +/* ==== Return to Hessenberg form ==== */ + + if (*ns == 0) { + s.r = 0.f, s.i = 0.f; + } + + if (*ns < jw) { + +/* + ==== sorting the diagonal of T improves accuracy for + . graded matrices. ==== +*/ + + i__1 = *ns; + for (i__ = infqr + 1; i__ <= i__1; ++i__) { + ifst = i__; + i__2 = *ns; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = j + j * t_dim1; + i__4 = ifst + ifst * t_dim1; + if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[j + j * + t_dim1]), dabs(r__2)) > (r__3 = t[i__4].r, dabs(r__3) + ) + (r__4 = r_imag(&t[ifst + ifst * t_dim1]), dabs( + r__4))) { + ifst = j; + } +/* L20: */ + } + ilst = i__; + if (ifst != ilst) { + ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &info); + } +/* L30: */ + } + } + +/* ==== Restore shift/eigenvalue array from T ==== */ + + i__1 = jw; + for (i__ = infqr + 1; i__ <= i__1; ++i__) { + i__2 = kwtop + i__ - 1; + i__3 = i__ + i__ * t_dim1; + sh[i__2].r = t[i__3].r, sh[i__2].i = t[i__3].i; +/* L40: */ + } + + + if (*ns < jw || s.r == 0.f && s.i == 0.f) { + if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) { + +/* ==== Reflect spike back into lower triangle ==== */ + + ccopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + i__1 = *ns; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + r_cnjg(&q__1, &work[i__]); + work[i__2].r = q__1.r, work[i__2].i = q__1.i; +/* L50: */ + } + beta.r = work[1].r, beta.i = work[1].i; + clarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1].r = 1.f, work[1].i = 0.f; + + i__1 = jw - 2; + i__2 = jw - 2; + claset_("L", &i__1, &i__2, &c_b56, &c_b56, &t[t_dim1 + 3], ldt); + + r_cnjg(&q__1, &tau); + clarf_("L", ns, &jw, &work[1], &c__1, &q__1, &t[t_offset], ldt, & + work[jw + 1]); + clarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + clarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & + work[jw + 1]); + + i__1 = *lwork - jw; + cgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] + , &i__1, &info); + } + +/* ==== Copy updated reduced window into place ==== */ + + if (kwtop > 1) { + i__1 = kwtop + (kwtop - 1) * h_dim1; + r_cnjg(&q__2, &v[v_dim1 + 1]); + q__1.r = s.r * q__2.r - s.i * q__2.i, q__1.i = s.r * q__2.i + s.i + * q__2.r; + h__[i__1].r = q__1.r, h__[i__1].i = q__1.i; + } + clacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] + , ldh); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + ccopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], + &i__3); + +/* + ==== Accumulate orthogonal matrix in order update + . H and Z, if requested. ==== +*/ + + if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) { + i__1 = *lwork - jw; + cunmhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[jw + 1], &i__1, &info); + } + +/* ==== Update vertical slab in H ==== */ + + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3,i__4); + cgemm_("N", "N", &kln, &jw, &jw, &c_b57, &h__[krow + kwtop * + h_dim1], ldh, &v[v_offset], ldv, &c_b56, &wv[wv_offset], + ldwv); + clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * + h_dim1], ldh); +/* L60: */ + } + +/* ==== Update horizontal slab in H ==== */ + + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; + kcol += i__1) { +/* Computing MIN */ + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3,i__4); + cgemm_("C", "N", &jw, &kln, &jw, &c_b57, &v[v_offset], ldv, & + h__[kwtop + kcol * h_dim1], ldh, &c_b56, &t[t_offset], + ldt); + clacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * + h_dim1], ldh); +/* L70: */ + } + } + +/* ==== Update vertical slab in Z ==== */ + + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3,i__4); + cgemm_("N", "N", &kln, &jw, &jw, &c_b57, &z__[krow + kwtop * + z_dim1], ldz, &v[v_offset], ldv, &c_b56, &wv[ + wv_offset], ldwv); + clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + + kwtop * z_dim1], ldz); +/* L80: */ + } + } + } + +/* ==== Return the number of deflations ... ==== */ + + *nd = jw - *ns; + +/* + ==== ... and the number of shifts. (Subtracting + . INFQR from the spike length takes care + . of the case of a rare QR failure while + . calculating eigenvalues of the deflation + . window.) ==== +*/ + + *ns -= infqr; + +/* ==== Return optimal workspace. ==== */ + + r__1 = (real) lwkopt; + q__1.r = r__1, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + +/* ==== End of CLAQR3 ==== */ + + return 0; +} /* claqr3_ */ + +/* Subroutine */ int claqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8; + complex q__1, q__2, q__3, q__4, q__5; + + /* Local variables */ + static integer i__, k; + static real s; + static complex aa, bb, cc, dd; + static integer ld, nh, it, ks, kt, ku, kv, ls, ns, nw; + static complex tr2, det; + static integer inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, + nmin; + static complex swap; + static integer ktop; + static complex zdum[1] /* was [1][1] */; + static integer kacc22, itmax, nsmax, nwmax, kwtop; + extern /* Subroutine */ int claqr2_(logical *, logical *, integer *, + integer *, integer *, integer *, complex *, integer *, integer *, + integer *, complex *, integer *, integer *, integer *, complex *, + complex *, integer *, integer *, complex *, integer *, integer *, + complex *, integer *, complex *, integer *), claqr5_(logical *, + logical *, integer *, integer *, integer *, integer *, integer *, + complex *, complex *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, integer *, + complex *, integer *, integer *, complex *, integer *); + static integer nibble; + extern /* Subroutine */ int clahqr_(logical *, logical *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + integer *, complex *, integer *, integer *), clacpy_(char *, + integer *, integer *, complex *, integer *, complex *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static char jbcmpz[2]; + static complex rtdisc; + static integer nwupbd; + static logical sorted; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + This subroutine implements one level of recursion for CLAQR0. + It is a complete implementation of the small bulge multi-shift + QR algorithm. It may be called by CLAQR0 and, for large enough + deflation window size, it may be called by CLAQR3. This + subroutine is identical to CLAQR0 except that it calls CLAQR2 + instead of CLAQR3. + + Purpose + ======= + + CLAQR4 computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**H, where T is an upper triangular matrix (the + Schur form), and Z is the unitary matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input unitary + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, + H(ILO,ILO-1) is zero. ILO and IHI are normally set by a + previous call to CGEBAL, and then passed to CGEHRD when the + matrix output by CGEBAL is reduced to Hessenberg form. + Otherwise, ILO and IHI should be set to 1 and N, + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) COMPLEX array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and WANTT is .TRUE., then H + contains the upper triangular matrix T from the Schur + decomposition (the Schur form). If INFO = 0 and WANT is + .FALSE., then the contents of H are unspecified on exit. + (The output value of H when INFO.GT.0 is given under the + description of INFO below.) + + This subroutine may explicitly set H(i,j) = 0 for i.GT.j and + j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + W (output) COMPLEX array, dimension (N) + The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored + in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are + stored in the same order as on the diagonal of the Schur + form returned in H, with W(i) = H(i,i). + + Z (input/output) COMPLEX array, dimension (LDZ,IHI) + If WANTZ is .FALSE., then Z is not referenced. + If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is + replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the + orthogonal Schur factor of H(ILO:IHI,ILO:IHI). + (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if WANTZ is .TRUE. + then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. + + WORK (workspace/output) COMPLEX array, dimension LWORK + On exit, if LWORK = -1, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient, but LWORK typically as large as 6*N may + be required for optimal performance. A workspace query + to determine the optimal workspace size is recommended. + + If LWORK = -1, then CLAQR4 does a workspace query. + In this case, CLAQR4 checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: if INFO = i, CLAQR4 failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and WANT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is a unitary matrix. The final + value of H is upper Hessenberg and triangular in + rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + + (final value of Z(ILO:IHI,ILOZ:IHIZ) + = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U + + where U is the unitary matrix in (*) (regard- + less of the value of WANTT.) + + If INFO .GT. 0 and WANTZ is .FALSE., then Z is not + accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . CLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== Exceptional deflation windows: try to cure rare + . slow convergence by varying the size of the + . deflation window after KEXNW iterations. ==== + + ==== Exceptional shifts: try to cure rare slow convergence + . with ad-hoc exceptional shifts every KEXSH iterations. + . ==== + + ==== The constant WILK1 is used to form the exceptional + . shifts. ==== +*/ + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + +/* ==== Quick return for N = 0: nothing to do. ==== */ + + if (*n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + if (*n <= 11) { + +/* ==== Tiny matrices must use CLAHQR. ==== */ + + lwkopt = 1; + if (*lwork != -1) { + clahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], + iloz, ihiz, &z__[z_offset], ldz, info); + } + } else { + +/* + ==== Use small bulge multi-shift QR with aggressive early + . deflation on larger-than-tiny matrices. ==== + + ==== Hope for the best. ==== +*/ + + *info = 0; + +/* ==== Set up job flags for ILAENV. ==== */ + + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + +/* + ==== NWR = recommended deflation window size. At this + . point, N .GT. NTINY = 11, so there is enough + . subdiagonal workspace for NWR.GE.2 as required. + . (In fact, there is enough subdiagonal space for + . NWR.GE.3.) ==== +*/ + + nwr = ilaenv_(&c__13, "CLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nwr = max(2,nwr); +/* Computing MIN */ + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); + nwr = min(i__1,nwr); + +/* + ==== NSR = recommended number of simultaneous shifts. + . At this point N .GT. NTINY = 11, so there is at + . enough subdiagonal workspace for NSR to be even + . and greater than or equal to two as required. ==== +*/ + + nsr = ilaenv_(&c__15, "CLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); +/* Computing MIN */ + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - + *ilo; + nsr = min(i__1,i__2); +/* Computing MAX */ + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1,i__2); + +/* + ==== Estimate optimal workspace ==== + + ==== Workspace query call to CLAQR2 ==== +*/ + + i__1 = nwr + 1; + claqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, + ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[h_offset], + ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], ldh, &work[1], + &c_n1); + +/* + ==== Optimal workspace = MAX(CLAQR5, CLAQR2) ==== + + Computing MAX +*/ + i__1 = nsr * 3 / 2, i__2 = (integer) work[1].r; + lwkopt = max(i__1,i__2); + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + r__1 = (real) lwkopt; + q__1.r = r__1, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + return 0; + } + +/* ==== CLAHQR/CLAQR0 crossover point ==== */ + + nmin = ilaenv_(&c__12, "CLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen) + 6, (ftnlen)2); + nmin = max(11,nmin); + +/* ==== Nibble crossover point ==== */ + + nibble = ilaenv_(&c__14, "CLAQR4", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + nibble = max(0,nibble); + +/* + ==== Accumulate reflections during ttswp? Use block + . 2-by-2 structure during matrix-matrix multiply? ==== +*/ + + kacc22 = ilaenv_(&c__16, "CLAQR4", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + kacc22 = max(0,kacc22); + kacc22 = min(2,kacc22); + +/* + ==== NWMAX = the largest possible deflation window for + . which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1,i__2); + nw = nwmax; + +/* + ==== NSMAX = the Largest number of simultaneous shifts + . for which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1,i__2); + nsmax -= nsmax % 2; + +/* ==== NDFL: an iteration count restarted at deflation. ==== */ + + ndfl = 1; + +/* + ==== ITMAX = iteration limit ==== + + Computing MAX +*/ + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1,i__2) * 30; + +/* ==== Last row and column in the active block ==== */ + + kbot = *ihi; + +/* ==== Main Loop ==== */ + + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + +/* ==== Done when KBOT falls below ILO ==== */ + + if (kbot < *ilo) { + goto L80; + } + +/* ==== Locate active block ==== */ + + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + i__3 = k + (k - 1) * h_dim1; + if (h__[i__3].r == 0.f && h__[i__3].i == 0.f) { + goto L20; + } +/* L10: */ + } + k = *ilo; +L20: + ktop = k; + +/* + ==== Select deflation window size: + . Typical Case: + . If possible and advisable, nibble the entire + . active block. If not, use size MIN(NWR,NWMAX) + . or MIN(NWR+1,NWMAX) depending upon which has + . the smaller corresponding subdiagonal entry + . (a heuristic). + . + . Exceptional Case: + . If there have been no deflations in KEXNW or + . more iterations, then vary the deflation window + . size. At first, because, larger windows are, + . in general, more powerful than smaller ones, + . rapidly increase the window to the maximum possible. + . Then, gradually reduce the window size. ==== +*/ + + nh = kbot - ktop + 1; + nwupbd = min(nh,nwmax); + if (ndfl < 5) { + nw = min(nwupbd,nwr); + } else { +/* Computing MIN */ + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2,i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + i__2 = kwtop + (kwtop - 1) * h_dim1; + i__3 = kwtop - 1 + (kwtop - 2) * h_dim1; + if ((r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(& + h__[kwtop + (kwtop - 1) * h_dim1]), dabs(r__2)) > + (r__3 = h__[i__3].r, dabs(r__3)) + (r__4 = r_imag( + &h__[kwtop - 1 + (kwtop - 2) * h_dim1]), dabs( + r__4))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + +/* + ==== Aggressive early deflation: + . split workspace under the subdiagonal into + . - an nw-by-nw work array V in the lower + . left-hand-corner, + . - an NW-by-at-least-NW-but-more-is-better + . (NW-by-NHO) horizontal work array along + . the bottom edge, + . - an at-least-NW-but-more-is-better (NHV-by-NW) + . vertical work array along the left-hand-edge. + . ==== +*/ + + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + +/* ==== Aggressive early deflation ==== */ + + claqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, + iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[kv + + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], ldh, &nve, & + h__[kwv + h_dim1], ldh, &work[1], lwork); + +/* ==== Adjust KBOT accounting for new deflations. ==== */ + + kbot -= ld; + +/* ==== KS points to the shifts. ==== */ + + ks = kbot - ls + 1; + +/* + ==== Skip an expensive QR sweep if there is a (partly + . heuristic) reason to expect that many eigenvalues + . will deflate without it. Here, the QR sweep is + . skipped if many eigenvalues have just been deflated + . or if the remaining active block is small. +*/ + + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( + nmin,nwmax)) { + +/* + ==== NS = nominal number of simultaneous shifts. + . This may be lowered (slightly) if CLAQR2 + . did not provide that many shifts. ==== + + Computing MIN + Computing MAX +*/ + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); + ns = min(i__2,i__3); + ns -= ns % 2; + +/* + ==== If there have been no deflations + . in a multiple of KEXSH iterations, + . then try exceptional shifts. + . Otherwise use shifts provided by + . CLAQR2 above or from the eigenvalues + . of a trailing principal submatrix. ==== +*/ + + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; + i__2 = ks + 1; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + i__3 = i__; + i__4 = i__ + i__ * h_dim1; + i__5 = i__ + (i__ - 1) * h_dim1; + r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = + r_imag(&h__[i__ + (i__ - 1) * h_dim1]), dabs( + r__2))) * .75f; + q__1.r = h__[i__4].r + r__3, q__1.i = h__[i__4].i; + w[i__3].r = q__1.r, w[i__3].i = q__1.i; + i__3 = i__ - 1; + i__4 = i__; + w[i__3].r = w[i__4].r, w[i__3].i = w[i__4].i; +/* L30: */ + } + } else { + +/* + ==== Got NS/2 or fewer shifts? Use CLAHQR + . on a trailing principal submatrix to + . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, + . there is enough space below the subdiagonal + . to fit an NS-by-NS scratch array.) ==== +*/ + + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + clacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & + h__[kt + h_dim1], ldh); + clahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + + h_dim1], ldh, &w[ks], &c__1, &c__1, zdum, & + c__1, &inf); + ks += inf; + +/* + ==== In case of a rare QR failure use + . eigenvalues of the trailing 2-by-2 + . principal submatrix. Scale to avoid + . overflows, underflows and subnormals. + . (The scale factor S can not be zero, + . because H(KBOT,KBOT-1) is nonzero.) ==== +*/ + + if (ks >= kbot) { + i__2 = kbot - 1 + (kbot - 1) * h_dim1; + i__3 = kbot + (kbot - 1) * h_dim1; + i__4 = kbot - 1 + kbot * h_dim1; + i__5 = kbot + kbot * h_dim1; + s = (r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = + r_imag(&h__[kbot - 1 + (kbot - 1) * + h_dim1]), dabs(r__2)) + ((r__3 = h__[i__3] + .r, dabs(r__3)) + (r__4 = r_imag(&h__[ + kbot + (kbot - 1) * h_dim1]), dabs(r__4))) + + ((r__5 = h__[i__4].r, dabs(r__5)) + ( + r__6 = r_imag(&h__[kbot - 1 + kbot * + h_dim1]), dabs(r__6))) + ((r__7 = h__[ + i__5].r, dabs(r__7)) + (r__8 = r_imag(& + h__[kbot + kbot * h_dim1]), dabs(r__8))); + i__2 = kbot - 1 + (kbot - 1) * h_dim1; + q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i / + s; + aa.r = q__1.r, aa.i = q__1.i; + i__2 = kbot + (kbot - 1) * h_dim1; + q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i / + s; + cc.r = q__1.r, cc.i = q__1.i; + i__2 = kbot - 1 + kbot * h_dim1; + q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i / + s; + bb.r = q__1.r, bb.i = q__1.i; + i__2 = kbot + kbot * h_dim1; + q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i / + s; + dd.r = q__1.r, dd.i = q__1.i; + q__2.r = aa.r + dd.r, q__2.i = aa.i + dd.i; + q__1.r = q__2.r / 2.f, q__1.i = q__2.i / 2.f; + tr2.r = q__1.r, tr2.i = q__1.i; + q__3.r = aa.r - tr2.r, q__3.i = aa.i - tr2.i; + q__4.r = dd.r - tr2.r, q__4.i = dd.i - tr2.i; + q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, + q__2.i = q__3.r * q__4.i + q__3.i * + q__4.r; + q__5.r = bb.r * cc.r - bb.i * cc.i, q__5.i = bb.r + * cc.i + bb.i * cc.r; + q__1.r = q__2.r - q__5.r, q__1.i = q__2.i - + q__5.i; + det.r = q__1.r, det.i = q__1.i; + q__2.r = -det.r, q__2.i = -det.i; + c_sqrt(&q__1, &q__2); + rtdisc.r = q__1.r, rtdisc.i = q__1.i; + i__2 = kbot - 1; + q__2.r = tr2.r + rtdisc.r, q__2.i = tr2.i + + rtdisc.i; + q__1.r = s * q__2.r, q__1.i = s * q__2.i; + w[i__2].r = q__1.r, w[i__2].i = q__1.i; + i__2 = kbot; + q__2.r = tr2.r - rtdisc.r, q__2.i = tr2.i - + rtdisc.i; + q__1.r = s * q__2.r, q__1.i = s * q__2.i; + w[i__2].r = q__1.r, w[i__2].i = q__1.i; + + ks = kbot - 1; + } + } + + if (kbot - ks + 1 > ns) { + +/* ==== Sort the shifts (Helps a little) ==== */ + + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + i__4 = i__; + i__5 = i__ + 1; + if ((r__1 = w[i__4].r, dabs(r__1)) + (r__2 = + r_imag(&w[i__]), dabs(r__2)) < (r__3 = + w[i__5].r, dabs(r__3)) + (r__4 = + r_imag(&w[i__ + 1]), dabs(r__4))) { + sorted = FALSE_; + i__4 = i__; + swap.r = w[i__4].r, swap.i = w[i__4].i; + i__4 = i__; + i__5 = i__ + 1; + w[i__4].r = w[i__5].r, w[i__4].i = w[i__5] + .i; + i__4 = i__ + 1; + w[i__4].r = swap.r, w[i__4].i = swap.i; + } +/* L40: */ + } +/* L50: */ + } +L60: + ; + } + } + +/* + ==== If there are only two shifts, then use + . only one. ==== +*/ + + if (kbot - ks + 1 == 2) { + i__2 = kbot; + i__3 = kbot + kbot * h_dim1; + q__2.r = w[i__2].r - h__[i__3].r, q__2.i = w[i__2].i - + h__[i__3].i; + q__1.r = q__2.r, q__1.i = q__2.i; + i__4 = kbot - 1; + i__5 = kbot + kbot * h_dim1; + q__4.r = w[i__4].r - h__[i__5].r, q__4.i = w[i__4].i - + h__[i__5].i; + q__3.r = q__4.r, q__3.i = q__4.i; + if ((r__1 = q__1.r, dabs(r__1)) + (r__2 = r_imag(&q__1), + dabs(r__2)) < (r__3 = q__3.r, dabs(r__3)) + (r__4 + = r_imag(&q__3), dabs(r__4))) { + i__2 = kbot - 1; + i__3 = kbot; + w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i; + } else { + i__2 = kbot; + i__3 = kbot - 1; + w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i; + } + } + +/* + ==== Use up to NS of the the smallest magnatiude + . shifts. If there aren't NS shifts available, + . then use them all, possibly dropping one to + . make the number of shifts even. ==== + + Computing MIN +*/ + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2,i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + +/* + ==== Small-bulge multi-shift QR sweep: + . split workspace under the subdiagonal into + . - a KDU-by-KDU work array U in the lower + . left-hand-corner, + . - a KDU-by-at-least-KDU-but-more-is-better + . (KDU-by-NHo) horizontal work array WH along + . the bottom edge, + . - and an at-least-KDU-but-more-is-better-by-KDU + . (NVE-by-KDU) vertical work WV arrow along + . the left-hand-edge. ==== +*/ + + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + +/* ==== Small-bulge multi-shift QR sweep ==== */ + + claqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &w[ks], & + h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, & + work[1], &c__3, &h__[ku + h_dim1], ldh, &nve, &h__[ + kwv + h_dim1], ldh, &nho, &h__[ku + kwh * h_dim1], + ldh); + } + +/* ==== Note progress (or the lack of it). ==== */ + + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + +/* + ==== End of main loop ==== + L70: +*/ + } + +/* + ==== Iteration limit exceeded. Set INFO to show where + . the problem occurred and exit. ==== +*/ + + *info = kbot; +L80: + ; + } + +/* ==== Return the optimal value of LWORK. ==== */ + + r__1 = (real) lwkopt; + q__1.r = r__1, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + +/* ==== End of CLAQR4 ==== */ + + return 0; +} /* claqr4_ */ + +/* Subroutine */ int claqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, complex *s, + complex *h__, integer *ldh, integer *iloz, integer *ihiz, complex * + z__, integer *ldz, complex *v, integer *ldv, complex *u, integer *ldu, + integer *nv, complex *wv, integer *ldwv, integer *nh, complex *wh, + integer *ldwh) +{ + /* System generated locals */ + integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1, + wh_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3, + i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11; + real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8, r__9, r__10; + complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8; + + /* Local variables */ + static integer j, k, m, i2, j2, i4, j4, k1; + static real h11, h12, h21, h22; + static integer m22, ns, nu; + static complex vt[3]; + static real scl; + static integer kdu, kms; + static real ulp; + static integer knz, kzs; + static real tst1, tst2; + static complex beta; + static logical blk22, bmp22; + static integer mend, jcol, jlen, jbot, mbot, jtop, jrow, mtop; + static complex alpha; + static logical accum; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + static integer ndcol, incol, krcol, nbmps; + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), claqr1_(integer *, + complex *, integer *, complex *, complex *, complex *), slabad_( + real *, real *), clarfg_(integer *, complex *, complex *, integer + *, complex *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), claset_(char *, + integer *, integer *, complex *, complex *, complex *, integer *); + static real safmin, safmax; + static complex refsum; + static integer mstart; + static real smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + This auxiliary subroutine called by CLAQR0 performs a + single small-bulge multi-shift QR sweep. + + WANTT (input) logical scalar + WANTT = .true. if the triangular Schur factor + is being computed. WANTT is set to .false. otherwise. + + WANTZ (input) logical scalar + WANTZ = .true. if the unitary Schur factor is being + computed. WANTZ is set to .false. otherwise. + + KACC22 (input) integer with value 0, 1, or 2. + Specifies the computation mode of far-from-diagonal + orthogonal updates. + = 0: CLAQR5 does not accumulate reflections and does not + use matrix-matrix multiply to update far-from-diagonal + matrix entries. + = 1: CLAQR5 accumulates reflections and uses matrix-matrix + multiply to update the far-from-diagonal matrix entries. + = 2: CLAQR5 accumulates reflections, uses matrix-matrix + multiply to update the far-from-diagonal matrix entries, + and takes advantage of 2-by-2 block structure during + matrix multiplies. + + N (input) integer scalar + N is the order of the Hessenberg matrix H upon which this + subroutine operates. + + KTOP (input) integer scalar + KBOT (input) integer scalar + These are the first and last rows and columns of an + isolated diagonal block upon which the QR sweep is to be + applied. It is assumed without a check that + either KTOP = 1 or H(KTOP,KTOP-1) = 0 + and + either KBOT = N or H(KBOT+1,KBOT) = 0. + + NSHFTS (input) integer scalar + NSHFTS gives the number of simultaneous shifts. NSHFTS + must be positive and even. + + S (input/output) COMPLEX array of size (NSHFTS) + S contains the shifts of origin that define the multi- + shift QR sweep. On output S may be reordered. + + H (input/output) COMPLEX array of size (LDH,N) + On input H contains a Hessenberg matrix. On output a + multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied + to the isolated diagonal block in rows and columns KTOP + through KBOT. + + LDH (input) integer scalar + LDH is the leading dimension of H just as declared in the + calling procedure. LDH.GE.MAX(1,N). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N + + Z (input/output) COMPLEX array of size (LDZ,IHI) + If WANTZ = .TRUE., then the QR Sweep unitary + similarity transformation is accumulated into + Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ = .FALSE., then Z is unreferenced. + + LDZ (input) integer scalar + LDA is the leading dimension of Z just as declared in + the calling procedure. LDZ.GE.N. + + V (workspace) COMPLEX array of size (LDV,NSHFTS/2) + + LDV (input) integer scalar + LDV is the leading dimension of V as declared in the + calling procedure. LDV.GE.3. + + U (workspace) COMPLEX array of size + (LDU,3*NSHFTS-3) + + LDU (input) integer scalar + LDU is the leading dimension of U just as declared in the + in the calling subroutine. LDU.GE.3*NSHFTS-3. + + NH (input) integer scalar + NH is the number of columns in array WH available for + workspace. NH.GE.1. + + WH (workspace) COMPLEX array of size (LDWH,NH) + + LDWH (input) integer scalar + Leading dimension of WH just as declared in the + calling procedure. LDWH.GE.3*NSHFTS-3. + + NV (input) integer scalar + NV is the number of rows in WV agailable for workspace. + NV.GE.1. + + WV (workspace) COMPLEX array of size + (LDWV,3*NSHFTS-3) + + LDWV (input) integer scalar + LDWV is the leading dimension of WV as declared in the + in the calling subroutine. LDWV.GE.NV. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + Reference: + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and + Level 3 Performance, SIAM Journal of Matrix Analysis, + volume 23, pages 929--947, 2002. + + ================================================================ + + + ==== If there are no shifts, then there is nothing to do. ==== +*/ + + /* Parameter adjustments */ + --s; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + wh_dim1 = *ldwh; + wh_offset = 1 + wh_dim1; + wh -= wh_offset; + + /* Function Body */ + if (*nshfts < 2) { + return 0; + } + +/* + ==== If the active block is empty or 1-by-1, then there + . is nothing to do. ==== +*/ + + if (*ktop >= *kbot) { + return 0; + } + +/* + ==== NSHFTS is supposed to be even, but if it is odd, + . then simply reduce it by one. ==== +*/ + + ns = *nshfts - *nshfts % 2; + +/* ==== Machine constants for deflation ==== */ + + safmin = slamch_("SAFE MINIMUM"); + safmax = 1.f / safmin; + slabad_(&safmin, &safmax); + ulp = slamch_("PRECISION"); + smlnum = safmin * ((real) (*n) / ulp); + +/* + ==== Use accumulated reflections to update far-from-diagonal + . entries ? ==== +*/ + + accum = *kacc22 == 1 || *kacc22 == 2; + +/* ==== If so, exploit the 2-by-2 block structure? ==== */ + + blk22 = ns > 2 && *kacc22 == 2; + +/* ==== clear trash ==== */ + + if (*ktop + 2 <= *kbot) { + i__1 = *ktop + 2 + *ktop * h_dim1; + h__[i__1].r = 0.f, h__[i__1].i = 0.f; + } + +/* ==== NBMPS = number of 2-shift bulges in the chain ==== */ + + nbmps = ns / 2; + +/* ==== KDU = width of slab ==== */ + + kdu = nbmps * 6 - 3; + +/* ==== Create and chase chains of NBMPS bulges ==== */ + + i__1 = *kbot - 2; + i__2 = nbmps * 3 - 2; + for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 : + incol <= i__1; incol += i__2) { + ndcol = incol + kdu; + if (accum) { + claset_("ALL", &kdu, &kdu, &c_b56, &c_b57, &u[u_offset], ldu); + } + +/* + ==== Near-the-diagonal bulge chase. The following loop + . performs the near-the-diagonal part of a small bulge + . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal + . chunk extends from column INCOL to column NDCOL + . (including both column INCOL and column NDCOL). The + . following loop chases a 3*NBMPS column long chain of + . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL + . may be less than KTOP and and NDCOL may be greater than + . KBOT indicating phantom columns from which to chase + . bulges before they are actually introduced or to which + . to chase bulges beyond column KBOT.) ==== + + Computing MIN +*/ + i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2; + i__3 = min(i__4,i__5); + for (krcol = incol; krcol <= i__3; ++krcol) { + +/* + ==== Bulges number MTOP to MBOT are active double implicit + . shift bulges. There may or may not also be small + . 2-by-2 bulge, if there is room. The inactive bulges + . (if any) must wait until the active bulges have moved + . down the diagonal to make room. The phantom matrix + . paradigm described above helps keep track. ==== + + Computing MAX +*/ + i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1; + mtop = max(i__4,i__5); +/* Computing MIN */ + i__4 = nbmps, i__5 = (*kbot - krcol) / 3; + mbot = min(i__4,i__5); + m22 = mbot + 1; + bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2; + +/* + ==== Generate reflections to chase the chain right + . one column. (The minimum value of K is KTOP-1.) ==== +*/ + + i__4 = mbot; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + if (k == *ktop - 1) { + claqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &s[(m << + 1) - 1], &s[m * 2], &v[m * v_dim1 + 1]); + i__5 = m * v_dim1 + 1; + alpha.r = v[i__5].r, alpha.i = v[i__5].i; + clarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m * + v_dim1 + 1]); + } else { + i__5 = k + 1 + k * h_dim1; + beta.r = h__[i__5].r, beta.i = h__[i__5].i; + i__5 = m * v_dim1 + 2; + i__6 = k + 2 + k * h_dim1; + v[i__5].r = h__[i__6].r, v[i__5].i = h__[i__6].i; + i__5 = m * v_dim1 + 3; + i__6 = k + 3 + k * h_dim1; + v[i__5].r = h__[i__6].r, v[i__5].i = h__[i__6].i; + clarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m * + v_dim1 + 1]); + +/* + ==== A Bulge may collapse because of vigilant + . deflation or destructive underflow. In the + . underflow case, try the two-small-subdiagonals + . trick to try to reinflate the bulge. ==== +*/ + + i__5 = k + 3 + k * h_dim1; + i__6 = k + 3 + (k + 1) * h_dim1; + i__7 = k + 3 + (k + 2) * h_dim1; + if (h__[i__5].r != 0.f || h__[i__5].i != 0.f || (h__[i__6] + .r != 0.f || h__[i__6].i != 0.f) || h__[i__7].r == + 0.f && h__[i__7].i == 0.f) { + +/* ==== Typical case: not collapsed (yet). ==== */ + + i__5 = k + 1 + k * h_dim1; + h__[i__5].r = beta.r, h__[i__5].i = beta.i; + i__5 = k + 2 + k * h_dim1; + h__[i__5].r = 0.f, h__[i__5].i = 0.f; + i__5 = k + 3 + k * h_dim1; + h__[i__5].r = 0.f, h__[i__5].i = 0.f; + } else { + +/* + ==== Atypical case: collapsed. Attempt to + . reintroduce ignoring H(K+1,K) and H(K+2,K). + . If the fill resulting from the new + . reflector is too large, then abandon it. + . Otherwise, use the new one. ==== +*/ + + claqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, & + s[(m << 1) - 1], &s[m * 2], vt); + alpha.r = vt[0].r, alpha.i = vt[0].i; + clarfg_(&c__3, &alpha, &vt[1], &c__1, vt); + r_cnjg(&q__2, vt); + i__5 = k + 1 + k * h_dim1; + r_cnjg(&q__5, &vt[1]); + i__6 = k + 2 + k * h_dim1; + q__4.r = q__5.r * h__[i__6].r - q__5.i * h__[i__6].i, + q__4.i = q__5.r * h__[i__6].i + q__5.i * h__[ + i__6].r; + q__3.r = h__[i__5].r + q__4.r, q__3.i = h__[i__5].i + + q__4.i; + q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = + q__2.r * q__3.i + q__2.i * q__3.r; + refsum.r = q__1.r, refsum.i = q__1.i; + + i__5 = k + 2 + k * h_dim1; + q__3.r = refsum.r * vt[1].r - refsum.i * vt[1].i, + q__3.i = refsum.r * vt[1].i + refsum.i * vt[1] + .r; + q__2.r = h__[i__5].r - q__3.r, q__2.i = h__[i__5].i - + q__3.i; + q__1.r = q__2.r, q__1.i = q__2.i; + q__5.r = refsum.r * vt[2].r - refsum.i * vt[2].i, + q__5.i = refsum.r * vt[2].i + refsum.i * vt[2] + .r; + q__4.r = q__5.r, q__4.i = q__5.i; + i__6 = k + k * h_dim1; + i__7 = k + 1 + (k + 1) * h_dim1; + i__8 = k + 2 + (k + 2) * h_dim1; + if ((r__1 = q__1.r, dabs(r__1)) + (r__2 = r_imag(& + q__1), dabs(r__2)) + ((r__3 = q__4.r, dabs( + r__3)) + (r__4 = r_imag(&q__4), dabs(r__4))) + > ulp * ((r__5 = h__[i__6].r, dabs(r__5)) + ( + r__6 = r_imag(&h__[k + k * h_dim1]), dabs( + r__6)) + ((r__7 = h__[i__7].r, dabs(r__7)) + ( + r__8 = r_imag(&h__[k + 1 + (k + 1) * h_dim1]), + dabs(r__8))) + ((r__9 = h__[i__8].r, dabs( + r__9)) + (r__10 = r_imag(&h__[k + 2 + (k + 2) + * h_dim1]), dabs(r__10))))) { + +/* + ==== Starting a new bulge here would + . create non-negligible fill. Use + . the old one with trepidation. ==== +*/ + + i__5 = k + 1 + k * h_dim1; + h__[i__5].r = beta.r, h__[i__5].i = beta.i; + i__5 = k + 2 + k * h_dim1; + h__[i__5].r = 0.f, h__[i__5].i = 0.f; + i__5 = k + 3 + k * h_dim1; + h__[i__5].r = 0.f, h__[i__5].i = 0.f; + } else { + +/* + ==== Stating a new bulge here would + . create only negligible fill. + . Replace the old reflector with + . the new one. ==== +*/ + + i__5 = k + 1 + k * h_dim1; + i__6 = k + 1 + k * h_dim1; + q__1.r = h__[i__6].r - refsum.r, q__1.i = h__[ + i__6].i - refsum.i; + h__[i__5].r = q__1.r, h__[i__5].i = q__1.i; + i__5 = k + 2 + k * h_dim1; + h__[i__5].r = 0.f, h__[i__5].i = 0.f; + i__5 = k + 3 + k * h_dim1; + h__[i__5].r = 0.f, h__[i__5].i = 0.f; + i__5 = m * v_dim1 + 1; + v[i__5].r = vt[0].r, v[i__5].i = vt[0].i; + i__5 = m * v_dim1 + 2; + v[i__5].r = vt[1].r, v[i__5].i = vt[1].i; + i__5 = m * v_dim1 + 3; + v[i__5].r = vt[2].r, v[i__5].i = vt[2].i; + } + } + } +/* L10: */ + } + +/* ==== Generate a 2-by-2 reflection, if needed. ==== */ + + k = krcol + (m22 - 1) * 3; + if (bmp22) { + if (k == *ktop - 1) { + claqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &s[( + m22 << 1) - 1], &s[m22 * 2], &v[m22 * v_dim1 + 1]) + ; + i__4 = m22 * v_dim1 + 1; + beta.r = v[i__4].r, beta.i = v[i__4].i; + clarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 + * v_dim1 + 1]); + } else { + i__4 = k + 1 + k * h_dim1; + beta.r = h__[i__4].r, beta.i = h__[i__4].i; + i__4 = m22 * v_dim1 + 2; + i__5 = k + 2 + k * h_dim1; + v[i__4].r = h__[i__5].r, v[i__4].i = h__[i__5].i; + clarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 + * v_dim1 + 1]); + i__4 = k + 1 + k * h_dim1; + h__[i__4].r = beta.r, h__[i__4].i = beta.i; + i__4 = k + 2 + k * h_dim1; + h__[i__4].r = 0.f, h__[i__4].i = 0.f; + } + } + +/* ==== Multiply H by reflections from the left ==== */ + + if (accum) { + jbot = min(ndcol,*kbot); + } else if (*wantt) { + jbot = *n; + } else { + jbot = *kbot; + } + i__4 = jbot; + for (j = max(*ktop,krcol); j <= i__4; ++j) { +/* Computing MIN */ + i__5 = mbot, i__6 = (j - krcol + 2) / 3; + mend = min(i__5,i__6); + i__5 = mend; + for (m = mtop; m <= i__5; ++m) { + k = krcol + (m - 1) * 3; + r_cnjg(&q__2, &v[m * v_dim1 + 1]); + i__6 = k + 1 + j * h_dim1; + r_cnjg(&q__6, &v[m * v_dim1 + 2]); + i__7 = k + 2 + j * h_dim1; + q__5.r = q__6.r * h__[i__7].r - q__6.i * h__[i__7].i, + q__5.i = q__6.r * h__[i__7].i + q__6.i * h__[i__7] + .r; + q__4.r = h__[i__6].r + q__5.r, q__4.i = h__[i__6].i + + q__5.i; + r_cnjg(&q__8, &v[m * v_dim1 + 3]); + i__8 = k + 3 + j * h_dim1; + q__7.r = q__8.r * h__[i__8].r - q__8.i * h__[i__8].i, + q__7.i = q__8.r * h__[i__8].i + q__8.i * h__[i__8] + .r; + q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i; + q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = + q__2.r * q__3.i + q__2.i * q__3.r; + refsum.r = q__1.r, refsum.i = q__1.i; + i__6 = k + 1 + j * h_dim1; + i__7 = k + 1 + j * h_dim1; + q__1.r = h__[i__7].r - refsum.r, q__1.i = h__[i__7].i - + refsum.i; + h__[i__6].r = q__1.r, h__[i__6].i = q__1.i; + i__6 = k + 2 + j * h_dim1; + i__7 = k + 2 + j * h_dim1; + i__8 = m * v_dim1 + 2; + q__2.r = refsum.r * v[i__8].r - refsum.i * v[i__8].i, + q__2.i = refsum.r * v[i__8].i + refsum.i * v[i__8] + .r; + q__1.r = h__[i__7].r - q__2.r, q__1.i = h__[i__7].i - + q__2.i; + h__[i__6].r = q__1.r, h__[i__6].i = q__1.i; + i__6 = k + 3 + j * h_dim1; + i__7 = k + 3 + j * h_dim1; + i__8 = m * v_dim1 + 3; + q__2.r = refsum.r * v[i__8].r - refsum.i * v[i__8].i, + q__2.i = refsum.r * v[i__8].i + refsum.i * v[i__8] + .r; + q__1.r = h__[i__7].r - q__2.r, q__1.i = h__[i__7].i - + q__2.i; + h__[i__6].r = q__1.r, h__[i__6].i = q__1.i; +/* L20: */ + } +/* L30: */ + } + if (bmp22) { + k = krcol + (m22 - 1) * 3; +/* Computing MAX */ + i__4 = k + 1; + i__5 = jbot; + for (j = max(i__4,*ktop); j <= i__5; ++j) { + r_cnjg(&q__2, &v[m22 * v_dim1 + 1]); + i__4 = k + 1 + j * h_dim1; + r_cnjg(&q__5, &v[m22 * v_dim1 + 2]); + i__6 = k + 2 + j * h_dim1; + q__4.r = q__5.r * h__[i__6].r - q__5.i * h__[i__6].i, + q__4.i = q__5.r * h__[i__6].i + q__5.i * h__[i__6] + .r; + q__3.r = h__[i__4].r + q__4.r, q__3.i = h__[i__4].i + + q__4.i; + q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = + q__2.r * q__3.i + q__2.i * q__3.r; + refsum.r = q__1.r, refsum.i = q__1.i; + i__4 = k + 1 + j * h_dim1; + i__6 = k + 1 + j * h_dim1; + q__1.r = h__[i__6].r - refsum.r, q__1.i = h__[i__6].i - + refsum.i; + h__[i__4].r = q__1.r, h__[i__4].i = q__1.i; + i__4 = k + 2 + j * h_dim1; + i__6 = k + 2 + j * h_dim1; + i__7 = m22 * v_dim1 + 2; + q__2.r = refsum.r * v[i__7].r - refsum.i * v[i__7].i, + q__2.i = refsum.r * v[i__7].i + refsum.i * v[i__7] + .r; + q__1.r = h__[i__6].r - q__2.r, q__1.i = h__[i__6].i - + q__2.i; + h__[i__4].r = q__1.r, h__[i__4].i = q__1.i; +/* L40: */ + } + } + +/* + ==== Multiply H by reflections from the right. + . Delay filling in the last row until the + . vigilant deflation check is complete. ==== +*/ + + if (accum) { + jtop = max(*ktop,incol); + } else if (*wantt) { + jtop = 1; + } else { + jtop = *ktop; + } + i__5 = mbot; + for (m = mtop; m <= i__5; ++m) { + i__4 = m * v_dim1 + 1; + if (v[i__4].r != 0.f || v[i__4].i != 0.f) { + k = krcol + (m - 1) * 3; +/* Computing MIN */ + i__6 = *kbot, i__7 = k + 3; + i__4 = min(i__6,i__7); + for (j = jtop; j <= i__4; ++j) { + i__6 = m * v_dim1 + 1; + i__7 = j + (k + 1) * h_dim1; + i__8 = m * v_dim1 + 2; + i__9 = j + (k + 2) * h_dim1; + q__4.r = v[i__8].r * h__[i__9].r - v[i__8].i * h__[ + i__9].i, q__4.i = v[i__8].r * h__[i__9].i + v[ + i__8].i * h__[i__9].r; + q__3.r = h__[i__7].r + q__4.r, q__3.i = h__[i__7].i + + q__4.i; + i__10 = m * v_dim1 + 3; + i__11 = j + (k + 3) * h_dim1; + q__5.r = v[i__10].r * h__[i__11].r - v[i__10].i * h__[ + i__11].i, q__5.i = v[i__10].r * h__[i__11].i + + v[i__10].i * h__[i__11].r; + q__2.r = q__3.r + q__5.r, q__2.i = q__3.i + q__5.i; + q__1.r = v[i__6].r * q__2.r - v[i__6].i * q__2.i, + q__1.i = v[i__6].r * q__2.i + v[i__6].i * + q__2.r; + refsum.r = q__1.r, refsum.i = q__1.i; + i__6 = j + (k + 1) * h_dim1; + i__7 = j + (k + 1) * h_dim1; + q__1.r = h__[i__7].r - refsum.r, q__1.i = h__[i__7].i + - refsum.i; + h__[i__6].r = q__1.r, h__[i__6].i = q__1.i; + i__6 = j + (k + 2) * h_dim1; + i__7 = j + (k + 2) * h_dim1; + r_cnjg(&q__3, &v[m * v_dim1 + 2]); + q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, + q__2.i = refsum.r * q__3.i + refsum.i * + q__3.r; + q__1.r = h__[i__7].r - q__2.r, q__1.i = h__[i__7].i - + q__2.i; + h__[i__6].r = q__1.r, h__[i__6].i = q__1.i; + i__6 = j + (k + 3) * h_dim1; + i__7 = j + (k + 3) * h_dim1; + r_cnjg(&q__3, &v[m * v_dim1 + 3]); + q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, + q__2.i = refsum.r * q__3.i + refsum.i * + q__3.r; + q__1.r = h__[i__7].r - q__2.r, q__1.i = h__[i__7].i - + q__2.i; + h__[i__6].r = q__1.r, h__[i__6].i = q__1.i; +/* L50: */ + } + + if (accum) { + +/* + ==== Accumulate U. (If necessary, update Z later + . with with an efficient matrix-matrix + . multiply.) ==== +*/ + + kms = k - incol; +/* Computing MAX */ + i__4 = 1, i__6 = *ktop - incol; + i__7 = kdu; + for (j = max(i__4,i__6); j <= i__7; ++j) { + i__4 = m * v_dim1 + 1; + i__6 = j + (kms + 1) * u_dim1; + i__8 = m * v_dim1 + 2; + i__9 = j + (kms + 2) * u_dim1; + q__4.r = v[i__8].r * u[i__9].r - v[i__8].i * u[ + i__9].i, q__4.i = v[i__8].r * u[i__9].i + + v[i__8].i * u[i__9].r; + q__3.r = u[i__6].r + q__4.r, q__3.i = u[i__6].i + + q__4.i; + i__10 = m * v_dim1 + 3; + i__11 = j + (kms + 3) * u_dim1; + q__5.r = v[i__10].r * u[i__11].r - v[i__10].i * u[ + i__11].i, q__5.i = v[i__10].r * u[i__11] + .i + v[i__10].i * u[i__11].r; + q__2.r = q__3.r + q__5.r, q__2.i = q__3.i + + q__5.i; + q__1.r = v[i__4].r * q__2.r - v[i__4].i * q__2.i, + q__1.i = v[i__4].r * q__2.i + v[i__4].i * + q__2.r; + refsum.r = q__1.r, refsum.i = q__1.i; + i__4 = j + (kms + 1) * u_dim1; + i__6 = j + (kms + 1) * u_dim1; + q__1.r = u[i__6].r - refsum.r, q__1.i = u[i__6].i + - refsum.i; + u[i__4].r = q__1.r, u[i__4].i = q__1.i; + i__4 = j + (kms + 2) * u_dim1; + i__6 = j + (kms + 2) * u_dim1; + r_cnjg(&q__3, &v[m * v_dim1 + 2]); + q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, + q__2.i = refsum.r * q__3.i + refsum.i * + q__3.r; + q__1.r = u[i__6].r - q__2.r, q__1.i = u[i__6].i - + q__2.i; + u[i__4].r = q__1.r, u[i__4].i = q__1.i; + i__4 = j + (kms + 3) * u_dim1; + i__6 = j + (kms + 3) * u_dim1; + r_cnjg(&q__3, &v[m * v_dim1 + 3]); + q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, + q__2.i = refsum.r * q__3.i + refsum.i * + q__3.r; + q__1.r = u[i__6].r - q__2.r, q__1.i = u[i__6].i - + q__2.i; + u[i__4].r = q__1.r, u[i__4].i = q__1.i; +/* L60: */ + } + } else if (*wantz) { + +/* + ==== U is not accumulated, so update Z + . now by multiplying by reflections + . from the right. ==== +*/ + + i__7 = *ihiz; + for (j = *iloz; j <= i__7; ++j) { + i__4 = m * v_dim1 + 1; + i__6 = j + (k + 1) * z_dim1; + i__8 = m * v_dim1 + 2; + i__9 = j + (k + 2) * z_dim1; + q__4.r = v[i__8].r * z__[i__9].r - v[i__8].i * + z__[i__9].i, q__4.i = v[i__8].r * z__[ + i__9].i + v[i__8].i * z__[i__9].r; + q__3.r = z__[i__6].r + q__4.r, q__3.i = z__[i__6] + .i + q__4.i; + i__10 = m * v_dim1 + 3; + i__11 = j + (k + 3) * z_dim1; + q__5.r = v[i__10].r * z__[i__11].r - v[i__10].i * + z__[i__11].i, q__5.i = v[i__10].r * z__[ + i__11].i + v[i__10].i * z__[i__11].r; + q__2.r = q__3.r + q__5.r, q__2.i = q__3.i + + q__5.i; + q__1.r = v[i__4].r * q__2.r - v[i__4].i * q__2.i, + q__1.i = v[i__4].r * q__2.i + v[i__4].i * + q__2.r; + refsum.r = q__1.r, refsum.i = q__1.i; + i__4 = j + (k + 1) * z_dim1; + i__6 = j + (k + 1) * z_dim1; + q__1.r = z__[i__6].r - refsum.r, q__1.i = z__[ + i__6].i - refsum.i; + z__[i__4].r = q__1.r, z__[i__4].i = q__1.i; + i__4 = j + (k + 2) * z_dim1; + i__6 = j + (k + 2) * z_dim1; + r_cnjg(&q__3, &v[m * v_dim1 + 2]); + q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, + q__2.i = refsum.r * q__3.i + refsum.i * + q__3.r; + q__1.r = z__[i__6].r - q__2.r, q__1.i = z__[i__6] + .i - q__2.i; + z__[i__4].r = q__1.r, z__[i__4].i = q__1.i; + i__4 = j + (k + 3) * z_dim1; + i__6 = j + (k + 3) * z_dim1; + r_cnjg(&q__3, &v[m * v_dim1 + 3]); + q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, + q__2.i = refsum.r * q__3.i + refsum.i * + q__3.r; + q__1.r = z__[i__6].r - q__2.r, q__1.i = z__[i__6] + .i - q__2.i; + z__[i__4].r = q__1.r, z__[i__4].i = q__1.i; +/* L70: */ + } + } + } +/* L80: */ + } + +/* ==== Special case: 2-by-2 reflection (if needed) ==== */ + + k = krcol + (m22 - 1) * 3; + i__5 = m22 * v_dim1 + 1; + if (bmp22 && (v[i__5].r != 0.f || v[i__5].i != 0.f)) { +/* Computing MIN */ + i__7 = *kbot, i__4 = k + 3; + i__5 = min(i__7,i__4); + for (j = jtop; j <= i__5; ++j) { + i__7 = m22 * v_dim1 + 1; + i__4 = j + (k + 1) * h_dim1; + i__6 = m22 * v_dim1 + 2; + i__8 = j + (k + 2) * h_dim1; + q__3.r = v[i__6].r * h__[i__8].r - v[i__6].i * h__[i__8] + .i, q__3.i = v[i__6].r * h__[i__8].i + v[i__6].i * + h__[i__8].r; + q__2.r = h__[i__4].r + q__3.r, q__2.i = h__[i__4].i + + q__3.i; + q__1.r = v[i__7].r * q__2.r - v[i__7].i * q__2.i, q__1.i = + v[i__7].r * q__2.i + v[i__7].i * q__2.r; + refsum.r = q__1.r, refsum.i = q__1.i; + i__7 = j + (k + 1) * h_dim1; + i__4 = j + (k + 1) * h_dim1; + q__1.r = h__[i__4].r - refsum.r, q__1.i = h__[i__4].i - + refsum.i; + h__[i__7].r = q__1.r, h__[i__7].i = q__1.i; + i__7 = j + (k + 2) * h_dim1; + i__4 = j + (k + 2) * h_dim1; + r_cnjg(&q__3, &v[m22 * v_dim1 + 2]); + q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, q__2.i = + refsum.r * q__3.i + refsum.i * q__3.r; + q__1.r = h__[i__4].r - q__2.r, q__1.i = h__[i__4].i - + q__2.i; + h__[i__7].r = q__1.r, h__[i__7].i = q__1.i; +/* L90: */ + } + + if (accum) { + kms = k - incol; +/* Computing MAX */ + i__5 = 1, i__7 = *ktop - incol; + i__4 = kdu; + for (j = max(i__5,i__7); j <= i__4; ++j) { + i__5 = m22 * v_dim1 + 1; + i__7 = j + (kms + 1) * u_dim1; + i__6 = m22 * v_dim1 + 2; + i__8 = j + (kms + 2) * u_dim1; + q__3.r = v[i__6].r * u[i__8].r - v[i__6].i * u[i__8] + .i, q__3.i = v[i__6].r * u[i__8].i + v[i__6] + .i * u[i__8].r; + q__2.r = u[i__7].r + q__3.r, q__2.i = u[i__7].i + + q__3.i; + q__1.r = v[i__5].r * q__2.r - v[i__5].i * q__2.i, + q__1.i = v[i__5].r * q__2.i + v[i__5].i * + q__2.r; + refsum.r = q__1.r, refsum.i = q__1.i; + i__5 = j + (kms + 1) * u_dim1; + i__7 = j + (kms + 1) * u_dim1; + q__1.r = u[i__7].r - refsum.r, q__1.i = u[i__7].i - + refsum.i; + u[i__5].r = q__1.r, u[i__5].i = q__1.i; + i__5 = j + (kms + 2) * u_dim1; + i__7 = j + (kms + 2) * u_dim1; + r_cnjg(&q__3, &v[m22 * v_dim1 + 2]); + q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, + q__2.i = refsum.r * q__3.i + refsum.i * + q__3.r; + q__1.r = u[i__7].r - q__2.r, q__1.i = u[i__7].i - + q__2.i; + u[i__5].r = q__1.r, u[i__5].i = q__1.i; +/* L100: */ + } + } else if (*wantz) { + i__4 = *ihiz; + for (j = *iloz; j <= i__4; ++j) { + i__5 = m22 * v_dim1 + 1; + i__7 = j + (k + 1) * z_dim1; + i__6 = m22 * v_dim1 + 2; + i__8 = j + (k + 2) * z_dim1; + q__3.r = v[i__6].r * z__[i__8].r - v[i__6].i * z__[ + i__8].i, q__3.i = v[i__6].r * z__[i__8].i + v[ + i__6].i * z__[i__8].r; + q__2.r = z__[i__7].r + q__3.r, q__2.i = z__[i__7].i + + q__3.i; + q__1.r = v[i__5].r * q__2.r - v[i__5].i * q__2.i, + q__1.i = v[i__5].r * q__2.i + v[i__5].i * + q__2.r; + refsum.r = q__1.r, refsum.i = q__1.i; + i__5 = j + (k + 1) * z_dim1; + i__7 = j + (k + 1) * z_dim1; + q__1.r = z__[i__7].r - refsum.r, q__1.i = z__[i__7].i + - refsum.i; + z__[i__5].r = q__1.r, z__[i__5].i = q__1.i; + i__5 = j + (k + 2) * z_dim1; + i__7 = j + (k + 2) * z_dim1; + r_cnjg(&q__3, &v[m22 * v_dim1 + 2]); + q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, + q__2.i = refsum.r * q__3.i + refsum.i * + q__3.r; + q__1.r = z__[i__7].r - q__2.r, q__1.i = z__[i__7].i - + q__2.i; + z__[i__5].r = q__1.r, z__[i__5].i = q__1.i; +/* L110: */ + } + } + } + +/* ==== Vigilant deflation check ==== */ + + mstart = mtop; + if (krcol + (mstart - 1) * 3 < *ktop) { + ++mstart; + } + mend = mbot; + if (bmp22) { + ++mend; + } + if (krcol == *kbot - 2) { + ++mend; + } + i__4 = mend; + for (m = mstart; m <= i__4; ++m) { +/* Computing MIN */ + i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3; + k = min(i__5,i__7); + +/* + ==== The following convergence test requires that + . the tradition small-compared-to-nearby-diagonals + . criterion and the Ahues & Tisseur (LAWN 122, 1997) + . criteria both be satisfied. The latter improves + . accuracy in some examples. Falling back on an + . alternate convergence criterion when TST1 or TST2 + . is zero (as done here) is traditional but probably + . unnecessary. ==== +*/ + + i__5 = k + 1 + k * h_dim1; + if (h__[i__5].r != 0.f || h__[i__5].i != 0.f) { + i__5 = k + k * h_dim1; + i__7 = k + 1 + (k + 1) * h_dim1; + tst1 = (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = r_imag(& + h__[k + k * h_dim1]), dabs(r__2)) + ((r__3 = h__[ + i__7].r, dabs(r__3)) + (r__4 = r_imag(&h__[k + 1 + + (k + 1) * h_dim1]), dabs(r__4))); + if (tst1 == 0.f) { + if (k >= *ktop + 1) { + i__5 = k + (k - 1) * h_dim1; + tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = + r_imag(&h__[k + (k - 1) * h_dim1]), dabs( + r__2)); + } + if (k >= *ktop + 2) { + i__5 = k + (k - 2) * h_dim1; + tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = + r_imag(&h__[k + (k - 2) * h_dim1]), dabs( + r__2)); + } + if (k >= *ktop + 3) { + i__5 = k + (k - 3) * h_dim1; + tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = + r_imag(&h__[k + (k - 3) * h_dim1]), dabs( + r__2)); + } + if (k <= *kbot - 2) { + i__5 = k + 2 + (k + 1) * h_dim1; + tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = + r_imag(&h__[k + 2 + (k + 1) * h_dim1]), + dabs(r__2)); + } + if (k <= *kbot - 3) { + i__5 = k + 3 + (k + 1) * h_dim1; + tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = + r_imag(&h__[k + 3 + (k + 1) * h_dim1]), + dabs(r__2)); + } + if (k <= *kbot - 4) { + i__5 = k + 4 + (k + 1) * h_dim1; + tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = + r_imag(&h__[k + 4 + (k + 1) * h_dim1]), + dabs(r__2)); + } + } + i__5 = k + 1 + k * h_dim1; +/* Computing MAX */ + r__3 = smlnum, r__4 = ulp * tst1; + if ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = r_imag(& + h__[k + 1 + k * h_dim1]), dabs(r__2)) <= dmax( + r__3,r__4)) { +/* Computing MAX */ + i__5 = k + 1 + k * h_dim1; + i__7 = k + (k + 1) * h_dim1; + r__5 = (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = + r_imag(&h__[k + 1 + k * h_dim1]), dabs(r__2)), + r__6 = (r__3 = h__[i__7].r, dabs(r__3)) + ( + r__4 = r_imag(&h__[k + (k + 1) * h_dim1]), + dabs(r__4)); + h12 = dmax(r__5,r__6); +/* Computing MIN */ + i__5 = k + 1 + k * h_dim1; + i__7 = k + (k + 1) * h_dim1; + r__5 = (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = + r_imag(&h__[k + 1 + k * h_dim1]), dabs(r__2)), + r__6 = (r__3 = h__[i__7].r, dabs(r__3)) + ( + r__4 = r_imag(&h__[k + (k + 1) * h_dim1]), + dabs(r__4)); + h21 = dmin(r__5,r__6); + i__5 = k + k * h_dim1; + i__7 = k + 1 + (k + 1) * h_dim1; + q__2.r = h__[i__5].r - h__[i__7].r, q__2.i = h__[i__5] + .i - h__[i__7].i; + q__1.r = q__2.r, q__1.i = q__2.i; +/* Computing MAX */ + i__6 = k + 1 + (k + 1) * h_dim1; + r__5 = (r__1 = h__[i__6].r, dabs(r__1)) + (r__2 = + r_imag(&h__[k + 1 + (k + 1) * h_dim1]), dabs( + r__2)), r__6 = (r__3 = q__1.r, dabs(r__3)) + ( + r__4 = r_imag(&q__1), dabs(r__4)); + h11 = dmax(r__5,r__6); + i__5 = k + k * h_dim1; + i__7 = k + 1 + (k + 1) * h_dim1; + q__2.r = h__[i__5].r - h__[i__7].r, q__2.i = h__[i__5] + .i - h__[i__7].i; + q__1.r = q__2.r, q__1.i = q__2.i; +/* Computing MIN */ + i__6 = k + 1 + (k + 1) * h_dim1; + r__5 = (r__1 = h__[i__6].r, dabs(r__1)) + (r__2 = + r_imag(&h__[k + 1 + (k + 1) * h_dim1]), dabs( + r__2)), r__6 = (r__3 = q__1.r, dabs(r__3)) + ( + r__4 = r_imag(&q__1), dabs(r__4)); + h22 = dmin(r__5,r__6); + scl = h11 + h12; + tst2 = h22 * (h11 / scl); + +/* Computing MAX */ + r__1 = smlnum, r__2 = ulp * tst2; + if (tst2 == 0.f || h21 * (h12 / scl) <= dmax(r__1, + r__2)) { + i__5 = k + 1 + k * h_dim1; + h__[i__5].r = 0.f, h__[i__5].i = 0.f; + } + } + } +/* L120: */ + } + +/* + ==== Fill in the last row of each bulge. ==== + + Computing MIN +*/ + i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3; + mend = min(i__4,i__5); + i__4 = mend; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + i__5 = m * v_dim1 + 1; + i__7 = m * v_dim1 + 3; + q__2.r = v[i__5].r * v[i__7].r - v[i__5].i * v[i__7].i, + q__2.i = v[i__5].r * v[i__7].i + v[i__5].i * v[i__7] + .r; + i__6 = k + 4 + (k + 3) * h_dim1; + q__1.r = q__2.r * h__[i__6].r - q__2.i * h__[i__6].i, q__1.i = + q__2.r * h__[i__6].i + q__2.i * h__[i__6].r; + refsum.r = q__1.r, refsum.i = q__1.i; + i__5 = k + 4 + (k + 1) * h_dim1; + q__1.r = -refsum.r, q__1.i = -refsum.i; + h__[i__5].r = q__1.r, h__[i__5].i = q__1.i; + i__5 = k + 4 + (k + 2) * h_dim1; + q__2.r = -refsum.r, q__2.i = -refsum.i; + r_cnjg(&q__3, &v[m * v_dim1 + 2]); + q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = q__2.r * + q__3.i + q__2.i * q__3.r; + h__[i__5].r = q__1.r, h__[i__5].i = q__1.i; + i__5 = k + 4 + (k + 3) * h_dim1; + i__7 = k + 4 + (k + 3) * h_dim1; + r_cnjg(&q__3, &v[m * v_dim1 + 3]); + q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, q__2.i = + refsum.r * q__3.i + refsum.i * q__3.r; + q__1.r = h__[i__7].r - q__2.r, q__1.i = h__[i__7].i - q__2.i; + h__[i__5].r = q__1.r, h__[i__5].i = q__1.i; +/* L130: */ + } + +/* + ==== End of near-the-diagonal bulge chase. ==== + + L140: +*/ + } + +/* + ==== Use U (if accumulated) to update far-from-diagonal + . entries in H. If required, use U to update Z as + . well. ==== +*/ + + if (accum) { + if (*wantt) { + jtop = 1; + jbot = *n; + } else { + jtop = *ktop; + jbot = *kbot; + } + if (! blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) { + +/* + ==== Updates not exploiting the 2-by-2 block + . structure of U. K1 and NU keep track of + . the location and size of U in the special + . cases of introducing bulges and chasing + . bulges off the bottom. In these special + . cases and in case the number of shifts + . is NS = 2, there is no 2-by-2 block + . structure to exploit. ==== + + Computing MAX +*/ + i__3 = 1, i__4 = *ktop - incol; + k1 = max(i__3,i__4); +/* Computing MAX */ + i__3 = 0, i__4 = ndcol - *kbot; + nu = kdu - max(i__3,i__4) - k1 + 1; + +/* ==== Horizontal Multiply ==== */ + + i__3 = jbot; + i__4 = *nh; + for (jcol = min(ndcol,*kbot) + 1; i__4 < 0 ? jcol >= i__3 : + jcol <= i__3; jcol += i__4) { +/* Computing MIN */ + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5,i__7); + cgemm_("C", "N", &nu, &jlen, &nu, &c_b57, &u[k1 + k1 * + u_dim1], ldu, &h__[incol + k1 + jcol * h_dim1], + ldh, &c_b56, &wh[wh_offset], ldwh); + clacpy_("ALL", &nu, &jlen, &wh[wh_offset], ldwh, &h__[ + incol + k1 + jcol * h_dim1], ldh); +/* L150: */ + } + +/* ==== Vertical multiply ==== */ + + i__4 = max(*ktop,incol) - 1; + i__3 = *nv; + for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; + jrow += i__3) { +/* Computing MIN */ + i__5 = *nv, i__7 = max(*ktop,incol) - jrow; + jlen = min(i__5,i__7); + cgemm_("N", "N", &jlen, &nu, &nu, &c_b57, &h__[jrow + ( + incol + k1) * h_dim1], ldh, &u[k1 + k1 * u_dim1], + ldu, &c_b56, &wv[wv_offset], ldwv); + clacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &h__[ + jrow + (incol + k1) * h_dim1], ldh); +/* L160: */ + } + +/* ==== Z multiply (also vertical) ==== */ + + if (*wantz) { + i__3 = *ihiz; + i__4 = *nv; + for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; + jrow += i__4) { +/* Computing MIN */ + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5,i__7); + cgemm_("N", "N", &jlen, &nu, &nu, &c_b57, &z__[jrow + + (incol + k1) * z_dim1], ldz, &u[k1 + k1 * + u_dim1], ldu, &c_b56, &wv[wv_offset], ldwv); + clacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &z__[ + jrow + (incol + k1) * z_dim1], ldz) + ; +/* L170: */ + } + } + } else { + +/* + ==== Updates exploiting U's 2-by-2 block structure. + . (I2, I4, J2, J4 are the last rows and columns + . of the blocks.) ==== +*/ + + i2 = (kdu + 1) / 2; + i4 = kdu; + j2 = i4 - i2; + j4 = kdu; + +/* + ==== KZS and KNZ deal with the band of zeros + . along the diagonal of one of the triangular + . blocks. ==== +*/ + + kzs = j4 - j2 - (ns + 1); + knz = ns + 1; + +/* ==== Horizontal multiply ==== */ + + i__4 = jbot; + i__3 = *nh; + for (jcol = min(ndcol,*kbot) + 1; i__3 < 0 ? jcol >= i__4 : + jcol <= i__4; jcol += i__3) { +/* Computing MIN */ + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5,i__7); + +/* + ==== Copy bottom of H to top+KZS of scratch ==== + (The first KZS rows get multiplied by zero.) ==== +*/ + + clacpy_("ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol * + h_dim1], ldh, &wh[kzs + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U21' ==== */ + + claset_("ALL", &kzs, &jlen, &c_b56, &c_b56, &wh[wh_offset] + , ldwh); + ctrmm_("L", "U", "C", "N", &knz, &jlen, &c_b57, &u[j2 + 1 + + (kzs + 1) * u_dim1], ldu, &wh[kzs + 1 + wh_dim1] + , ldwh); + +/* ==== Multiply top of H by U11' ==== */ + + cgemm_("C", "N", &i2, &jlen, &j2, &c_b57, &u[u_offset], + ldu, &h__[incol + 1 + jcol * h_dim1], ldh, &c_b57, + &wh[wh_offset], ldwh); + +/* ==== Copy top of H to bottom of WH ==== */ + + clacpy_("ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1] + , ldh, &wh[i2 + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U21' ==== */ + + ctrmm_("L", "L", "C", "N", &j2, &jlen, &c_b57, &u[(i2 + 1) + * u_dim1 + 1], ldu, &wh[i2 + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + cgemm_("C", "N", &i__5, &jlen, &i__7, &c_b57, &u[j2 + 1 + + (i2 + 1) * u_dim1], ldu, &h__[incol + 1 + j2 + + jcol * h_dim1], ldh, &c_b57, &wh[i2 + 1 + wh_dim1] + , ldwh); + +/* ==== Copy it back ==== */ + + clacpy_("ALL", &kdu, &jlen, &wh[wh_offset], ldwh, &h__[ + incol + 1 + jcol * h_dim1], ldh); +/* L180: */ + } + +/* ==== Vertical multiply ==== */ + + i__3 = max(incol,*ktop) - 1; + i__4 = *nv; + for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; + jrow += i__4) { +/* Computing MIN */ + i__5 = *nv, i__7 = max(incol,*ktop) - jrow; + jlen = min(i__5,i__7); + +/* + ==== Copy right of H to scratch (the first KZS + . columns get multiplied by zero) ==== +*/ + + clacpy_("ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) * + h_dim1], ldh, &wv[(kzs + 1) * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U21 ==== */ + + claset_("ALL", &jlen, &kzs, &c_b56, &c_b56, &wv[wv_offset] + , ldwv); + ctrmm_("R", "U", "N", "N", &jlen, &knz, &c_b57, &u[j2 + 1 + + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Multiply by U11 ==== */ + + cgemm_("N", "N", &jlen, &i2, &j2, &c_b57, &h__[jrow + ( + incol + 1) * h_dim1], ldh, &u[u_offset], ldu, & + c_b57, &wv[wv_offset], ldwv) + ; + +/* ==== Copy left of H to right of scratch ==== */ + + clacpy_("ALL", &jlen, &j2, &h__[jrow + (incol + 1) * + h_dim1], ldh, &wv[(i2 + 1) * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U21 ==== */ + + i__5 = i4 - i2; + ctrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b57, &u[(i2 + + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1] + , ldwv); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + cgemm_("N", "N", &jlen, &i__5, &i__7, &c_b57, &h__[jrow + + (incol + 1 + j2) * h_dim1], ldh, &u[j2 + 1 + (i2 + + 1) * u_dim1], ldu, &c_b57, &wv[(i2 + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Copy it back ==== */ + + clacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &h__[ + jrow + (incol + 1) * h_dim1], ldh); +/* L190: */ + } + +/* ==== Multiply Z (also vertical) ==== */ + + if (*wantz) { + i__4 = *ihiz; + i__3 = *nv; + for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; + jrow += i__3) { +/* Computing MIN */ + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5,i__7); + +/* + ==== Copy right of Z to left of scratch (first + . KZS columns get multiplied by zero) ==== +*/ + + clacpy_("ALL", &jlen, &knz, &z__[jrow + (incol + 1 + + j2) * z_dim1], ldz, &wv[(kzs + 1) * wv_dim1 + + 1], ldwv); + +/* ==== Multiply by U12 ==== */ + + claset_("ALL", &jlen, &kzs, &c_b56, &c_b56, &wv[ + wv_offset], ldwv); + ctrmm_("R", "U", "N", "N", &jlen, &knz, &c_b57, &u[j2 + + 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) + * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U11 ==== */ + + cgemm_("N", "N", &jlen, &i2, &j2, &c_b57, &z__[jrow + + (incol + 1) * z_dim1], ldz, &u[u_offset], ldu, + &c_b57, &wv[wv_offset], ldwv); + +/* ==== Copy left of Z to right of scratch ==== */ + + clacpy_("ALL", &jlen, &j2, &z__[jrow + (incol + 1) * + z_dim1], ldz, &wv[(i2 + 1) * wv_dim1 + 1], + ldwv); + +/* ==== Multiply by U21 ==== */ + + i__5 = i4 - i2; + ctrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b57, &u[( + i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + cgemm_("N", "N", &jlen, &i__5, &i__7, &c_b57, &z__[ + jrow + (incol + 1 + j2) * z_dim1], ldz, &u[j2 + + 1 + (i2 + 1) * u_dim1], ldu, &c_b57, &wv[( + i2 + 1) * wv_dim1 + 1], ldwv); + +/* ==== Copy the result back to Z ==== */ + + clacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, & + z__[jrow + (incol + 1) * z_dim1], ldz); +/* L200: */ + } + } + } + } +/* L210: */ + } + +/* ==== End of CLAQR5 ==== */ + + return 0; +} /* claqr5_ */ + +/* Subroutine */ int clarcm_(integer *m, integer *n, real *a, integer *lda, + complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5; + real r__1; + complex q__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLARCM performs a very simple matrix-matrix multiplication: + C := A * B, + where A is M by M and real; B is M by N and complex; + C is M by N and complex. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A and of the matrix C. + M >= 0. + + N (input) INTEGER + The number of columns and rows of the matrix B and + the number of columns of the matrix C. + N >= 0. + + A (input) REAL array, dimension (LDA, M) + A contains the M by M matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >=max(1,M). + + B (input) REAL array, dimension (LDB, N) + B contains the M by N matrix B. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >=max(1,M). + + C (input) COMPLEX array, dimension (LDC, N) + C contains the M by N matrix C. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >=max(1,M). + + RWORK (workspace) REAL array, dimension (2*M*N) + + ===================================================================== + + + Quick return if possible. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --rwork; + + /* Function Body */ + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + rwork[(j - 1) * *m + i__] = b[i__3].r; +/* L10: */ + } +/* L20: */ + } + + l = *m * *n + 1; + sgemm_("N", "N", m, n, m, &c_b1034, &a[a_offset], lda, &rwork[1], m, & + c_b328, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = l + (j - 1) * *m + i__ - 1; + c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f; +/* L30: */ + } +/* L40: */ + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + rwork[(j - 1) * *m + i__] = r_imag(&b[i__ + j * b_dim1]); +/* L50: */ + } +/* L60: */ + } + sgemm_("N", "N", m, n, m, &c_b1034, &a[a_offset], lda, &rwork[1], m, & + c_b328, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + r__1 = c__[i__4].r; + i__5 = l + (j - 1) * *m + i__ - 1; + q__1.r = r__1, q__1.i = rwork[i__5]; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L70: */ + } +/* L80: */ + } + + return 0; + +/* End of CLARCM */ + +} /* clarcm_ */ + +/* Subroutine */ int clarf_(char *side, integer *m, integer *n, complex *v, + integer *incv, complex *tau, complex *c__, integer *ldc, complex * + work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + complex q__1; + + /* Local variables */ + static integer i__; + static logical applyleft; + extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, integer *), + cgemv_(char *, integer *, integer *, complex *, complex *, + integer *, complex *, integer *, complex *, complex *, integer *); + extern logical lsame_(char *, char *); + static integer lastc, lastv; + extern integer ilaclc_(integer *, integer *, complex *, integer *), + ilaclr_(integer *, integer *, complex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLARF applies a complex elementary reflector H to a complex M-by-N + matrix C, from either the left or the right. H is represented in the + form + + H = I - tau * v * v' + + where tau is a complex scalar and v is a complex vector. + + If tau = 0, then H is taken to be the unit matrix. + + To apply H' (the conjugate transpose of H), supply conjg(tau) instead + tau. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) COMPLEX array, dimension + (1 + (M-1)*abs(INCV)) if SIDE = 'L' + or (1 + (N-1)*abs(INCV)) if SIDE = 'R' + The vector v in the representation of H. V is not used if + TAU = 0. + + INCV (input) INTEGER + The increment between elements of v. INCV <> 0. + + TAU (input) COMPLEX + The value tau in the representation of H. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + lastv = 0; + lastc = 0; + if (tau->r != 0.f || tau->i != 0.f) { +/* + Set up variables for scanning V. LASTV begins pointing to the end + of V. +*/ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ + for(;;) { /* while(complicated condition) */ + i__1 = i__; + if (!(lastv > 0 && (v[i__1].r == 0.f && v[i__1].i == 0.f))) + break; + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc); + } + } +/* + Note that lastc.eq.0 renders the BLAS operations null; no special + case is needed at this level. +*/ + if (applyleft) { + +/* Form H * C */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */ + + cgemv_("Conjugate transpose", &lastv, &lastc, &c_b57, &c__[ + c_offset], ldc, &v[1], incv, &c_b56, &work[1], &c__1); + +/* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */ + + q__1.r = -tau->r, q__1.i = -tau->i; + cgerc_(&lastv, &lastc, &q__1, &v[1], incv, &work[1], &c__1, &c__[ + c_offset], ldc); + } + } else { + +/* Form C * H */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ + + cgemv_("No transpose", &lastc, &lastv, &c_b57, &c__[c_offset], + ldc, &v[1], incv, &c_b56, &work[1], &c__1); + +/* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */ + + q__1.r = -tau->r, q__1.i = -tau->i; + cgerc_(&lastc, &lastv, &q__1, &work[1], &c__1, &v[1], incv, &c__[ + c_offset], ldc); + } + } + return 0; + +/* End of CLARF */ + +} /* clarf_ */ + +/* Subroutine */ int clarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, complex *v, integer *ldv, + complex *t, integer *ldt, complex *c__, integer *ldc, complex *work, + integer *ldwork) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2; + + /* Local variables */ + static integer i__, j; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + extern logical lsame_(char *, char *); + static integer lastc; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *), ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + static integer lastv; + extern integer ilaclc_(integer *, integer *, complex *, integer *); + extern /* Subroutine */ int clacgv_(integer *, complex *, integer *); + extern integer ilaclr_(integer *, integer *, complex *, integer *); + static char transt[1]; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLARFB applies a complex block reflector H or its transpose H' to a + complex M-by-N matrix C, from either the left or the right. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply H or H' from the Left + = 'R': apply H or H' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply H (No transpose) + = 'C': apply H' (Conjugate transpose) + + DIRECT (input) CHARACTER*1 + Indicates how H is formed from a product of elementary + reflectors + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Indicates how the vectors which define the elementary + reflectors are stored: + = 'C': Columnwise + = 'R': Rowwise + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + K (input) INTEGER + The order of the matrix T (= the number of elementary + reflectors whose product defines the block reflector). + + V (input) COMPLEX array, dimension + (LDV,K) if STOREV = 'C' + (LDV,M) if STOREV = 'R' and SIDE = 'L' + (LDV,N) if STOREV = 'R' and SIDE = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); + if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); + if STOREV = 'R', LDV >= K. + + T (input) COMPLEX array, dimension (LDT,K) + The triangular K-by-K matrix T in the representation of the + block reflector. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by H*C or H'*C or C*H or C*H'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX array, dimension (LDWORK,K) + + LDWORK (input) INTEGER + The leading dimension of the array WORK. + If SIDE = 'L', LDWORK >= max(1,N); + if SIDE = 'R', LDWORK >= max(1,M). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (lsame_(trans, "N")) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (lsame_(storev, "C")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 ) (first K rows) + ( V2 ) + where V1 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaclr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); + clacgv_(&lastc, &work[j * work_dim1 + 1], &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + ctrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b57, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2 */ + + i__1 = lastv - *k; + cgemm_("Conjugate transpose", "No transpose", &lastc, k, & + i__1, &c_b57, &c__[*k + 1 + c_dim1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b57, &work[work_offset], + ldwork); + } + +/* W := W * T' or W * T */ + + ctrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b57, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (*m > *k) { + +/* C2 := C2 - V2 * W' */ + + i__1 = lastv - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", &i__1, & + lastc, k, &q__1, &v[*k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork, &c_b57, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1' */ + + ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[v_offset], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * c_dim1; + i__4 = j + i__ * c_dim1; + r_cnjg(&q__2, &work[i__ + j * work_dim1]); + q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - + q__2.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L20: */ + } +/* L30: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaclr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + ctrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b57, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2 */ + + i__1 = lastv - *k; + cgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b57, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b57, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + ctrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b57, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2' */ + + i__1 = lastv - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", &lastc, & + i__1, k, &q__1, &work[work_offset], ldwork, &v[*k + + 1 + v_dim1], ldv, &c_b57, &c__[(*k + 1) * + c_dim1 + 1], ldc); + } + +/* W := W * V1' */ + + ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[v_offset], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * work_dim1; + q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* + Let V = ( V1 ) + ( V2 ) (last K rows) + where V2 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaclr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); + clacgv_(&lastc, &work[j * work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + ctrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1 */ + + i__1 = lastv - *k; + cgemm_("Conjugate transpose", "No transpose", &lastc, k, & + i__1, &c_b57, &c__[c_offset], ldc, &v[v_offset], + ldv, &c_b57, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + ctrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b57, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1 * W' */ + + i__1 = lastv - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", &i__1, & + lastc, k, &q__1, &v[v_offset], ldv, &work[ + work_offset], ldwork, &c_b57, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[lastv - *k + 1 + v_dim1], ldv, & + work[work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = lastv - *k + j + i__ * c_dim1; + i__4 = lastv - *k + j + i__ * c_dim1; + r_cnjg(&q__2, &work[i__ + j * work_dim1]); + q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - + q__2.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L80: */ + } +/* L90: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaclr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, + &work[j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + ctrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1 */ + + i__1 = lastv - *k; + cgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b57, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b57, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ctrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b57, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1' */ + + i__1 = lastv - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", &lastc, & + i__1, k, &q__1, &work[work_offset], ldwork, &v[ + v_offset], ldv, &c_b57, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[lastv - *k + 1 + v_dim1], ldv, & + work[work_offset], ldwork); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (lastv - *k + j) * c_dim1; + i__4 = i__ + (lastv - *k + j) * c_dim1; + i__5 = i__ + j * work_dim1; + q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (lsame_(storev, "R")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 V2 ) (V1: first K columns) + where V1 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaclc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); + clacgv_(&lastc, &work[j * work_dim1 + 1], &c__1); +/* L130: */ + } + +/* W := W * V1' */ + + ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[v_offset], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2' */ + + i__1 = lastv - *k; + cgemm_("Conjugate transpose", "Conjugate transpose", & + lastc, k, &i__1, &c_b57, &c__[*k + 1 + c_dim1], + ldc, &v[(*k + 1) * v_dim1 + 1], ldv, &c_b57, & + work[work_offset], ldwork) + ; + } + +/* W := W * T' or W * T */ + + ctrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b57, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C2 := C2 - V2' * W' */ + + i__1 = lastv - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("Conjugate transpose", "Conjugate transpose", & + i__1, &lastc, k, &q__1, &v[(*k + 1) * v_dim1 + 1], + ldv, &work[work_offset], ldwork, &c_b57, &c__[*k + + 1 + c_dim1], ldc); + } + +/* W := W * V1 */ + + ctrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b57, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * c_dim1; + i__4 = j + i__ * c_dim1; + r_cnjg(&q__2, &work[i__ + j * work_dim1]); + q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - + q__2.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L140: */ + } +/* L150: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaclc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1' */ + + ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[v_offset], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2' */ + + i__1 = lastv - *k; + cgemm_("No transpose", "Conjugate transpose", &lastc, k, & + i__1, &c_b57, &c__[(*k + 1) * c_dim1 + 1], ldc, & + v[(*k + 1) * v_dim1 + 1], ldv, &c_b57, &work[ + work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ctrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b57, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = lastv - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + q__1, &work[work_offset], ldwork, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b57, &c__[(*k + 1) * c_dim1 + + 1], ldc); + } + +/* W := W * V1 */ + + ctrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b57, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * work_dim1; + q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* + Let V = ( V1 V2 ) (V2: last K columns) + where V2 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaclc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); + clacgv_(&lastc, &work[j * work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2' */ + + ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], + ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1' */ + + i__1 = lastv - *k; + cgemm_("Conjugate transpose", "Conjugate transpose", & + lastc, k, &i__1, &c_b57, &c__[c_offset], ldc, &v[ + v_offset], ldv, &c_b57, &work[work_offset], + ldwork); + } + +/* W := W * T' or W * T */ + + ctrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b57, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1' * W' */ + + i__1 = lastv - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("Conjugate transpose", "Conjugate transpose", & + i__1, &lastc, k, &q__1, &v[v_offset], ldv, &work[ + work_offset], ldwork, &c_b57, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + ctrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = lastv - *k + j + i__ * c_dim1; + i__4 = lastv - *k + j + i__ * c_dim1; + r_cnjg(&q__2, &work[i__ + j * work_dim1]); + q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - + q__2.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L200: */ + } +/* L210: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaclc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, + &work[j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2' */ + + ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], + ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1' */ + + i__1 = lastv - *k; + cgemm_("No transpose", "Conjugate transpose", &lastc, k, & + i__1, &c_b57, &c__[c_offset], ldc, &v[v_offset], + ldv, &c_b57, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ctrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b57, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = lastv - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + q__1, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b57, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + ctrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (lastv - *k + j) * c_dim1; + i__4 = i__ + (lastv - *k + j) * c_dim1; + i__5 = i__ + j * work_dim1; + q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of CLARFB */ + +} /* clarfb_ */ + +/* Subroutine */ int clarfg_(integer *n, complex *alpha, complex *x, integer * + incx, complex *tau) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2; + complex q__1, q__2; + + /* Local variables */ + static integer j, knt; + static real beta; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *); + static real alphi, alphr, xnorm; + extern doublereal scnrm2_(integer *, complex *, integer *), slapy3_(real * + , real *, real *); + extern /* Complex */ VOID cladiv_(complex *, complex *, complex *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *); + static real safmin, rsafmn; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLARFG generates a complex elementary reflector H of order n, such + that + + H' * ( alpha ) = ( beta ), H' * H = I. + ( x ) ( 0 ) + + where alpha and beta are scalars, with beta real, and x is an + (n-1)-element complex vector. H is represented in the form + + H = I - tau * ( 1 ) * ( 1 v' ) , + ( v ) + + where tau is a complex scalar and v is a complex (n-1)-element + vector. Note that H is not hermitian. + + If the elements of x are all zero and alpha is real, then tau = 0 + and H is taken to be the unit matrix. + + Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . + + Arguments + ========= + + N (input) INTEGER + The order of the elementary reflector. + + ALPHA (input/output) COMPLEX + On entry, the value alpha. + On exit, it is overwritten with the value beta. + + X (input/output) COMPLEX array, dimension + (1+(N-2)*abs(INCX)) + On entry, the vector x. + On exit, it is overwritten with the vector v. + + INCX (input) INTEGER + The increment between elements of X. INCX > 0. + + TAU (output) COMPLEX + The value tau. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 0) { + tau->r = 0.f, tau->i = 0.f; + return 0; + } + + i__1 = *n - 1; + xnorm = scnrm2_(&i__1, &x[1], incx); + alphr = alpha->r; + alphi = r_imag(alpha); + + if (xnorm == 0.f && alphi == 0.f) { + +/* H = I */ + + tau->r = 0.f, tau->i = 0.f; + } else { + +/* general case */ + + r__1 = slapy3_(&alphr, &alphi, &xnorm); + beta = -r_sign(&r__1, &alphr); + safmin = slamch_("S") / slamch_("E"); + rsafmn = 1.f / safmin; + + knt = 0; + if (dabs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + +L10: + ++knt; + i__1 = *n - 1; + csscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + alphi *= rsafmn; + alphr *= rsafmn; + if (dabs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = scnrm2_(&i__1, &x[1], incx); + q__1.r = alphr, q__1.i = alphi; + alpha->r = q__1.r, alpha->i = q__1.i; + r__1 = slapy3_(&alphr, &alphi, &xnorm); + beta = -r_sign(&r__1, &alphr); + } + r__1 = (beta - alphr) / beta; + r__2 = -alphi / beta; + q__1.r = r__1, q__1.i = r__2; + tau->r = q__1.r, tau->i = q__1.i; + q__2.r = alpha->r - beta, q__2.i = alpha->i; + cladiv_(&q__1, &c_b57, &q__2); + alpha->r = q__1.r, alpha->i = q__1.i; + i__1 = *n - 1; + cscal_(&i__1, alpha, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + beta *= safmin; +/* L20: */ + } + alpha->r = beta, alpha->i = 0.f; + } + + return 0; + +/* End of CLARFG */ + +} /* clarfg_ */ + +/* Subroutine */ int clarft_(char *direct, char *storev, integer *n, integer * + k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; + complex q__1; + + /* Local variables */ + static integer i__, j, prevlastv; + static complex vii; + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + extern logical lsame_(char *, char *); + static integer lastv; + extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *, + complex *, integer *, complex *, integer *), clacgv_(integer *, complex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLARFT forms the triangular factor T of a complex block reflector H + of order n, which is defined as a product of k elementary reflectors. + + If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + + If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + + If STOREV = 'C', the vector which defines the elementary reflector + H(i) is stored in the i-th column of the array V, and + + H = I - V * T * V' + + If STOREV = 'R', the vector which defines the elementary reflector + H(i) is stored in the i-th row of the array V, and + + H = I - V' * T * V + + Arguments + ========= + + DIRECT (input) CHARACTER*1 + Specifies the order in which the elementary reflectors are + multiplied to form the block reflector: + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Specifies how the vectors which define the elementary + reflectors are stored (see also Further Details): + = 'C': columnwise + = 'R': rowwise + + N (input) INTEGER + The order of the block reflector H. N >= 0. + + K (input) INTEGER + The order of the triangular factor T (= the number of + elementary reflectors). K >= 1. + + V (input/output) COMPLEX array, dimension + (LDV,K) if STOREV = 'C' + (LDV,N) if STOREV = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i). + + T (output) COMPLEX array, dimension (LDT,K) + The k by k triangular factor T of the block reflector. + If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is + lower triangular. The rest of the array is not used. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + Further Details + =============== + + The shape of the matrix V and the storage of the vectors which define + the H(i) is best illustrated by the following example with n = 5 and + k = 3. The elements equal to 1 are not stored; the corresponding + array elements are modified but restored on exit. The rest of the + array is not used. + + DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': + + V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) + ( v1 1 ) ( 1 v2 v2 v2 ) + ( v1 v2 1 ) ( 1 v3 v3 ) + ( v1 v2 v3 ) + ( v1 v2 v3 ) + + DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': + + V = ( v1 v2 v3 ) V = ( v1 v1 1 ) + ( v1 v2 v3 ) ( v2 v2 v2 1 ) + ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) + ( 1 v3 ) + ( 1 ) + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = max(prevlastv,i__); + i__2 = i__; + if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + t[i__3].r = 0.f, t[i__3].i = 0.f; +/* L10: */ + } + } else { + +/* general case */ + + i__2 = i__ + i__ * v_dim1; + vii.r = v[i__2].r, vii.i = v[i__2].i; + i__2 = i__ + i__ * v_dim1; + v[i__2].r = 1.f, v[i__2].i = 0.f; + if (lsame_(storev, "C")) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + i__3 = lastv + i__ * v_dim1; + if (v[i__3].r != 0.f || v[i__3].i != 0.f) { + goto L15; + } + } +L15: + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */ + + i__2 = j - i__ + 1; + i__3 = i__ - 1; + i__4 = i__; + q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i; + cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__ + + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, & + c_b56, &t[i__ * t_dim1 + 1], &c__1); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + i__3 = i__ + lastv * v_dim1; + if (v[i__3].r != 0.f || v[i__3].i != 0.f) { + goto L16; + } + } +L16: + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */ + + if (i__ < j) { + i__2 = j - i__; + clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); + } + i__2 = i__ - 1; + i__3 = j - i__ + 1; + i__4 = i__; + q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i; + cgemv_("No transpose", &i__2, &i__3, &q__1, &v[i__ * + v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & + c_b56, &t[i__ * t_dim1 + 1], &c__1); + if (i__ < j) { + i__2 = j - i__; + clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); + } + } + i__2 = i__ + i__ * v_dim1; + v[i__2].r = vii.r, v[i__2].i = vii.i; + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ + i__ * t_dim1; + i__3 = i__; + t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; + if (i__ > 1) { + prevlastv = max(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } +/* L20: */ + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + i__1 = i__; + if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + t[i__2].r = 0.f, t[i__2].i = 0.f; +/* L30: */ + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { + i__1 = *n - *k + i__ + i__ * v_dim1; + vii.r = v[i__1].r, vii.i = v[i__1].i; + i__1 = *n - *k + i__ + i__ * v_dim1; + v[i__1].r = 1.f, v[i__1].i = 0.f; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + i__2 = lastv + i__ * v_dim1; + if (v[i__2].r != 0.f || v[i__2].i != 0.f) { + goto L35; + } + } +L35: + j = max(lastv,prevlastv); + +/* + T(i+1:k,i) := + - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i) +*/ + + i__1 = *n - *k + i__ - j + 1; + i__2 = *k - i__; + i__3 = i__; + q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; + cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[ + j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * + v_dim1], &c__1, &c_b56, &t[i__ + 1 + i__ * + t_dim1], &c__1); + i__1 = *n - *k + i__ + i__ * v_dim1; + v[i__1].r = vii.r, v[i__1].i = vii.i; + } else { + i__1 = i__ + (*n - *k + i__) * v_dim1; + vii.r = v[i__1].r, vii.i = v[i__1].i; + i__1 = i__ + (*n - *k + i__) * v_dim1; + v[i__1].r = 1.f, v[i__1].i = 0.f; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + i__2 = i__ + lastv * v_dim1; + if (v[i__2].r != 0.f || v[i__2].i != 0.f) { + goto L36; + } + } +L36: + j = max(lastv,prevlastv); + +/* + T(i+1:k,i) := + - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)' +*/ + + i__1 = *n - *k + i__ - 1 - j + 1; + clacgv_(&i__1, &v[i__ + j * v_dim1], ldv); + i__1 = *k - i__; + i__2 = *n - *k + i__ - j + 1; + i__3 = i__; + q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; + cgemv_("No transpose", &i__1, &i__2, &q__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b56, &t[i__ + 1 + i__ * t_dim1], & + c__1); + i__1 = *n - *k + i__ - 1 - j + 1; + clacgv_(&i__1, &v[i__ + j * v_dim1], ldv); + i__1 = i__ + (*n - *k + i__) * v_dim1; + v[i__1].r = vii.r, v[i__1].i = vii.i; + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + if (i__ > 1) { + prevlastv = min(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + i__1 = i__ + i__ * t_dim1; + i__2 = i__; + t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i; + } +/* L40: */ + } + } + return 0; + +/* End of CLARFT */ + +} /* clarft_ */ + +/* Subroutine */ int clartg_(complex *f, complex *g, real *cs, complex *sn, + complex *r__) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8, r__9, r__10; + complex q__1, q__2, q__3; + + /* Local variables */ + static real d__; + static integer i__; + static real f2, g2; + static complex ff; + static real di, dr; + static complex fs, gs; + static real f2s, g2s, eps, scale; + static integer count; + static real safmn2, safmx2; + extern doublereal slapy2_(real *, real *), slamch_(char *); + static real safmin; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLARTG generates a plane rotation so that + + [ CS SN ] [ F ] [ R ] + [ __ ] . [ ] = [ ] where CS**2 + |SN|**2 = 1. + [ -SN CS ] [ G ] [ 0 ] + + This is a faster version of the BLAS1 routine CROTG, except for + the following differences: + F and G are unchanged on return. + If G=0, then CS=1 and SN=0. + If F=0, then CS=0 and SN is chosen so that R is real. + + Arguments + ========= + + F (input) COMPLEX + The first component of vector to be rotated. + + G (input) COMPLEX + The second component of vector to be rotated. + + CS (output) REAL + The cosine of the rotation. + + SN (output) COMPLEX + The sine of the rotation. + + R (output) COMPLEX + The nonzero component of the rotated vector. + + Further Details + ======= ======= + + 3-5-96 - Modified with a new algorithm by W. Kahan and J. Demmel + + This version has a few statements commented out for thread safety + (machine parameters are computed on each entry). 10 feb 03, SJH. + + ===================================================================== + + LOGICAL FIRST + SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 + DATA FIRST / .TRUE. / + + IF( FIRST ) THEN +*/ + safmin = slamch_("S"); + eps = slamch_("E"); + r__1 = slamch_("B"); + i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / 2.f); + safmn2 = pow_ri(&r__1, &i__1); + safmx2 = 1.f / safmn2; +/* + FIRST = .FALSE. + END IF + Computing MAX + Computing MAX +*/ + r__7 = (r__1 = f->r, dabs(r__1)), r__8 = (r__2 = r_imag(f), dabs(r__2)); +/* Computing MAX */ + r__9 = (r__3 = g->r, dabs(r__3)), r__10 = (r__4 = r_imag(g), dabs(r__4)); + r__5 = dmax(r__7,r__8), r__6 = dmax(r__9,r__10); + scale = dmax(r__5,r__6); + fs.r = f->r, fs.i = f->i; + gs.r = g->r, gs.i = g->i; + count = 0; + if (scale >= safmx2) { +L10: + ++count; + q__1.r = safmn2 * fs.r, q__1.i = safmn2 * fs.i; + fs.r = q__1.r, fs.i = q__1.i; + q__1.r = safmn2 * gs.r, q__1.i = safmn2 * gs.i; + gs.r = q__1.r, gs.i = q__1.i; + scale *= safmn2; + if (scale >= safmx2) { + goto L10; + } + } else if (scale <= safmn2) { + if (g->r == 0.f && g->i == 0.f) { + *cs = 1.f; + sn->r = 0.f, sn->i = 0.f; + r__->r = f->r, r__->i = f->i; + return 0; + } +L20: + --count; + q__1.r = safmx2 * fs.r, q__1.i = safmx2 * fs.i; + fs.r = q__1.r, fs.i = q__1.i; + q__1.r = safmx2 * gs.r, q__1.i = safmx2 * gs.i; + gs.r = q__1.r, gs.i = q__1.i; + scale *= safmx2; + if (scale <= safmn2) { + goto L20; + } + } +/* Computing 2nd power */ + r__1 = fs.r; +/* Computing 2nd power */ + r__2 = r_imag(&fs); + f2 = r__1 * r__1 + r__2 * r__2; +/* Computing 2nd power */ + r__1 = gs.r; +/* Computing 2nd power */ + r__2 = r_imag(&gs); + g2 = r__1 * r__1 + r__2 * r__2; + if (f2 <= dmax(g2,1.f) * safmin) { + +/* This is a rare case: F is very small. */ + + if (f->r == 0.f && f->i == 0.f) { + *cs = 0.f; + r__2 = g->r; + r__3 = r_imag(g); + r__1 = slapy2_(&r__2, &r__3); + r__->r = r__1, r__->i = 0.f; +/* Do complex/real division explicitly with two real divisions */ + r__1 = gs.r; + r__2 = r_imag(&gs); + d__ = slapy2_(&r__1, &r__2); + r__1 = gs.r / d__; + r__2 = -r_imag(&gs) / d__; + q__1.r = r__1, q__1.i = r__2; + sn->r = q__1.r, sn->i = q__1.i; + return 0; + } + r__1 = fs.r; + r__2 = r_imag(&fs); + f2s = slapy2_(&r__1, &r__2); +/* + G2 and G2S are accurate + G2 is at least SAFMIN, and G2S is at least SAFMN2 +*/ + g2s = sqrt(g2); +/* + Error in CS from underflow in F2S is at most + UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS + If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN, + and so CS .lt. sqrt(SAFMIN) + If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN + and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS) + Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S +*/ + *cs = f2s / g2s; +/* + Make sure abs(FF) = 1 + Do complex/real division explicitly with 2 real divisions + Computing MAX +*/ + r__3 = (r__1 = f->r, dabs(r__1)), r__4 = (r__2 = r_imag(f), dabs(r__2) + ); + if (dmax(r__3,r__4) > 1.f) { + r__1 = f->r; + r__2 = r_imag(f); + d__ = slapy2_(&r__1, &r__2); + r__1 = f->r / d__; + r__2 = r_imag(f) / d__; + q__1.r = r__1, q__1.i = r__2; + ff.r = q__1.r, ff.i = q__1.i; + } else { + dr = safmx2 * f->r; + di = safmx2 * r_imag(f); + d__ = slapy2_(&dr, &di); + r__1 = dr / d__; + r__2 = di / d__; + q__1.r = r__1, q__1.i = r__2; + ff.r = q__1.r, ff.i = q__1.i; + } + r__1 = gs.r / g2s; + r__2 = -r_imag(&gs) / g2s; + q__2.r = r__1, q__2.i = r__2; + q__1.r = ff.r * q__2.r - ff.i * q__2.i, q__1.i = ff.r * q__2.i + ff.i + * q__2.r; + sn->r = q__1.r, sn->i = q__1.i; + q__2.r = *cs * f->r, q__2.i = *cs * f->i; + q__3.r = sn->r * g->r - sn->i * g->i, q__3.i = sn->r * g->i + sn->i * + g->r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__->r = q__1.r, r__->i = q__1.i; + } else { + +/* + This is the most common case. + Neither F2 nor F2/G2 are less than SAFMIN + F2S cannot overflow, and it is accurate +*/ + + f2s = sqrt(g2 / f2 + 1.f); +/* Do the F2S(real)*FS(complex) multiply with two real multiplies */ + r__1 = f2s * fs.r; + r__2 = f2s * r_imag(&fs); + q__1.r = r__1, q__1.i = r__2; + r__->r = q__1.r, r__->i = q__1.i; + *cs = 1.f / f2s; + d__ = f2 + g2; +/* Do complex/real division explicitly with two real divisions */ + r__1 = r__->r / d__; + r__2 = r_imag(r__) / d__; + q__1.r = r__1, q__1.i = r__2; + sn->r = q__1.r, sn->i = q__1.i; + r_cnjg(&q__2, &gs); + q__1.r = sn->r * q__2.r - sn->i * q__2.i, q__1.i = sn->r * q__2.i + + sn->i * q__2.r; + sn->r = q__1.r, sn->i = q__1.i; + if (count != 0) { + if (count > 0) { + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + q__1.r = safmx2 * r__->r, q__1.i = safmx2 * r__->i; + r__->r = q__1.r, r__->i = q__1.i; +/* L30: */ + } + } else { + i__1 = -count; + for (i__ = 1; i__ <= i__1; ++i__) { + q__1.r = safmn2 * r__->r, q__1.i = safmn2 * r__->i; + r__->r = q__1.r, r__->i = q__1.i; +/* L40: */ + } + } + } + } + return 0; + +/* End of CLARTG */ + +} /* clartg_ */ + +/* Subroutine */ int clascl_(char *type__, integer *kl, integer *ku, real * + cfrom, real *cto, integer *m, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1; + + /* Local variables */ + static integer i__, j, k1, k2, k3, k4; + static real mul, cto1; + static logical done; + static real ctoc; + extern logical lsame_(char *, char *); + static integer itype; + static real cfrom1; + extern doublereal slamch_(char *); + static real cfromc; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real bignum; + extern logical sisnan_(real *); + static real smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLASCL multiplies the M by N complex matrix A by the real scalar + CTO/CFROM. This is done without over/underflow as long as the final + result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + A may be full, upper triangular, lower triangular, upper Hessenberg, + or banded. + + Arguments + ========= + + TYPE (input) CHARACTER*1 + TYPE indices the storage type of the input matrix. + = 'G': A is a full matrix. + = 'L': A is a lower triangular matrix. + = 'U': A is an upper triangular matrix. + = 'H': A is an upper Hessenberg matrix. + = 'B': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the lower + half stored. + = 'Q': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the upper + half stored. + = 'Z': A is a band matrix with lower bandwidth KL and upper + bandwidth KU. + + KL (input) INTEGER + The lower bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + KU (input) INTEGER + The upper bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + CFROM (input) REAL + CTO (input) REAL + The matrix A is multiplied by CTO/CFROM. A(I,J) is computed + without over/underflow if the final result CTO*A(I,J)/CFROM + can be represented without over/underflow. CFROM must be + nonzero. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + The matrix to be multiplied by CTO/CFROM. See TYPE for the + storage type. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + INFO (output) INTEGER + 0 - successful exit + <0 - if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (lsame_(type__, "G")) { + itype = 0; + } else if (lsame_(type__, "L")) { + itype = 1; + } else if (lsame_(type__, "U")) { + itype = 2; + } else if (lsame_(type__, "H")) { + itype = 3; + } else if (lsame_(type__, "B")) { + itype = 4; + } else if (lsame_(type__, "Q")) { + itype = 5; + } else if (lsame_(type__, "Z")) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0.f || sisnan_(cfrom)) { + *info = -4; + } else if (sisnan_(cto)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLASCL", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + if (cfrom1 == cfromc) { +/* + CFROMC is an inf. Multiply by a correctly signed zero for + finite CTOC, or a NaN if CTOC is infinite. +*/ + mul = ctoc / cfromc; + done = TRUE_; + cto1 = ctoc; + } else { + cto1 = ctoc / bignum; + if (cto1 == ctoc) { +/* + CTOC is either 0 or an inf. In both cases, CTOC itself + serves as the correct multiplication factor. +*/ + mul = ctoc; + done = TRUE_; + cfromc = 1.f; + } else if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (dabs(cto1) > dabs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + } + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + i__2 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of CLASCL */ + +} /* clascl_ */ + +/* Subroutine */ int claset_(char *uplo, integer *m, integer *n, complex * + alpha, complex *beta, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLASET initializes a 2-D array A to BETA on the diagonal and + ALPHA on the offdiagonals. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be set. + = 'U': Upper triangular part is set. The lower triangle + is unchanged. + = 'L': Lower triangular part is set. The upper triangle + is unchanged. + Otherwise: All of the matrix A is set. + + M (input) INTEGER + On entry, M specifies the number of rows of A. + + N (input) INTEGER + On entry, N specifies the number of columns of A. + + ALPHA (input) COMPLEX + All the offdiagonal array elements are set to ALPHA. + + BETA (input) COMPLEX + All the diagonal array elements are set to BETA. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; + A(i,i) = BETA , 1 <= i <= min(m,n) + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + +/* + Set the diagonal to BETA and the strictly upper triangular + part of the array to ALPHA. +*/ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L10: */ + } +/* L20: */ + } + i__1 = min(*n,*m); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L30: */ + } + + } else if (lsame_(uplo, "L")) { + +/* + Set the diagonal to BETA and the strictly lower triangular + part of the array to ALPHA. +*/ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L40: */ + } +/* L50: */ + } + i__1 = min(*n,*m); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L60: */ + } + + } else { + +/* + Set the array to BETA on the diagonal and ALPHA on the + offdiagonal. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L70: */ + } +/* L80: */ + } + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L90: */ + } + } + + return 0; + +/* End of CLASET */ + +} /* claset_ */ + +/* Subroutine */ int clasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, real *c__, real *s, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + complex q__1, q__2, q__3; + + /* Local variables */ + static integer i__, j, info; + static complex temp; + extern logical lsame_(char *, char *); + static real ctemp, stemp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLASR applies a sequence of real plane rotations to a complex matrix + A, from either the left or the right. + + When SIDE = 'L', the transformation takes the form + + A := P*A + + and when SIDE = 'R', the transformation takes the form + + A := A*P**T + + where P is an orthogonal matrix consisting of a sequence of z plane + rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', + and P**T is the transpose of P. + + When DIRECT = 'F' (Forward sequence), then + + P = P(z-1) * ... * P(2) * P(1) + + and when DIRECT = 'B' (Backward sequence), then + + P = P(1) * P(2) * ... * P(z-1) + + where P(k) is a plane rotation matrix defined by the 2-by-2 rotation + + R(k) = ( c(k) s(k) ) + = ( -s(k) c(k) ). + + When PIVOT = 'V' (Variable pivot), the rotation is performed + for the plane (k,k+1), i.e., P(k) has the form + + P(k) = ( 1 ) + ( ... ) + ( 1 ) + ( c(k) s(k) ) + ( -s(k) c(k) ) + ( 1 ) + ( ... ) + ( 1 ) + + where R(k) appears as a rank-2 modification to the identity matrix in + rows and columns k and k+1. + + When PIVOT = 'T' (Top pivot), the rotation is performed for the + plane (1,k+1), so P(k) has the form + + P(k) = ( c(k) s(k) ) + ( 1 ) + ( ... ) + ( 1 ) + ( -s(k) c(k) ) + ( 1 ) + ( ... ) + ( 1 ) + + where R(k) appears in rows and columns 1 and k+1. + + Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is + performed for the plane (k,z), giving P(k) the form + + P(k) = ( 1 ) + ( ... ) + ( 1 ) + ( c(k) s(k) ) + ( 1 ) + ( ... ) + ( 1 ) + ( -s(k) c(k) ) + + where R(k) appears in rows and columns k and z. The rotations are + performed without ever forming P(k) explicitly. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + Specifies whether the plane rotation matrix P is applied to + A on the left or the right. + = 'L': Left, compute A := P*A + = 'R': Right, compute A:= A*P**T + + PIVOT (input) CHARACTER*1 + Specifies the plane for which P(k) is a plane rotation + matrix. + = 'V': Variable pivot, the plane (k,k+1) + = 'T': Top pivot, the plane (1,k+1) + = 'B': Bottom pivot, the plane (k,z) + + DIRECT (input) CHARACTER*1 + Specifies whether P is a forward or backward sequence of + plane rotations. + = 'F': Forward, P = P(z-1)*...*P(2)*P(1) + = 'B': Backward, P = P(1)*P(2)*...*P(z-1) + + M (input) INTEGER + The number of rows of the matrix A. If m <= 1, an immediate + return is effected. + + N (input) INTEGER + The number of columns of the matrix A. If n <= 1, an + immediate return is effected. + + C (input) REAL array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + The cosines c(k) of the plane rotations. + + S (input) REAL array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + The sines s(k) of the plane rotations. The 2-by-2 plane + rotation part of the matrix P(k), R(k), has the form + R(k) = ( c(k) s(k) ) + ( -s(k) c(k) ). + + A (input/output) COMPLEX array, dimension (LDA,N) + The M-by-N matrix A. On exit, A is overwritten by P*A if + SIDE = 'R' or by A*P**T if SIDE = 'L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + --c__; + --s; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! (lsame_(side, "L") || lsame_(side, "R"))) { + info = 1; + } else if (! (lsame_(pivot, "V") || lsame_(pivot, + "T") || lsame_(pivot, "B"))) { + info = 2; + } else if (! (lsame_(direct, "F") || lsame_(direct, + "B"))) { + info = 3; + } else if (*m < 0) { + info = 4; + } else if (*n < 0) { + info = 5; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("CLASR ", &info); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form P * A */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + 1 + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + 1 + i__ * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__4 = j + i__ * a_dim1; + q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ + i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = j + i__ * a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__4 = j + i__ * a_dim1; + q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L10: */ + } + } +/* L20: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + 1 + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + 1 + i__ * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__3 = j + i__ * a_dim1; + q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ + i__3].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = j + i__ * a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__3 = j + i__ * a_dim1; + q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ + i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L30: */ + } + } +/* L40: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + i__ * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__4 = i__ * a_dim1 + 1; + q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ + i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = i__ * a_dim1 + 1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__4 = i__ * a_dim1 + 1; + q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L50: */ + } + } +/* L60: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + i__ * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__3 = i__ * a_dim1 + 1; + q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ + i__3].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = i__ * a_dim1 + 1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__3 = i__ * a_dim1 + 1; + q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ + i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + i__ * a_dim1; + i__4 = *m + i__ * a_dim1; + q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[ + i__4].i; + q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = *m + i__ * a_dim1; + i__4 = *m + i__ * a_dim1; + q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[ + i__4].i; + q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L90: */ + } + } +/* L100: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + i__ * a_dim1; + i__3 = *m + i__ * a_dim1; + q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[ + i__3].i; + q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = *m + i__ * a_dim1; + i__3 = *m + i__ * a_dim1; + q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[ + i__3].i; + q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L110: */ + } + } +/* L120: */ + } + } + } + } else if (lsame_(side, "R")) { + +/* Form A * P' */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (j + 1) * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + (j + 1) * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__4 = i__ + j * a_dim1; + q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ + i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = i__ + j * a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__4 = i__ + j * a_dim1; + q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L130: */ + } + } +/* L140: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + (j + 1) * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + (j + 1) * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__3 = i__ + j * a_dim1; + q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ + i__3].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = i__ + j * a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__3 = i__ + j * a_dim1; + q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ + i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L150: */ + } + } +/* L160: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + j * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__4 = i__ + a_dim1; + q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ + i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = i__ + a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__4 = i__ + a_dim1; + q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L170: */ + } + } +/* L180: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + j * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__3 = i__ + a_dim1; + q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ + i__3].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = i__ + a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__3 = i__ + a_dim1; + q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ + i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L190: */ + } + } +/* L200: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + j * a_dim1; + i__4 = i__ + *n * a_dim1; + q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[ + i__4].i; + q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = i__ + *n * a_dim1; + i__4 = i__ + *n * a_dim1; + q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[ + i__4].i; + q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L210: */ + } + } +/* L220: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + j * a_dim1; + i__3 = i__ + *n * a_dim1; + q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[ + i__3].i; + q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = i__ + *n * a_dim1; + i__3 = i__ + *n * a_dim1; + q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[ + i__3].i; + q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L230: */ + } + } +/* L240: */ + } + } + } + } + + return 0; + +/* End of CLASR */ + +} /* clasr_ */ + +/* Subroutine */ int classq_(integer *n, complex *x, integer *incx, real * + scale, real *sumsq) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + real r__1; + + /* Local variables */ + static integer ix; + static real temp1; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLASSQ returns the values scl and ssq such that + + ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + + where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is + assumed to be at least unity and the value of ssq will then satisfy + + 1.0 .le. ssq .le. ( sumsq + 2*n ). + + scale is assumed to be non-negative and scl returns the value + + scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), + i + + scale and sumsq must be supplied in SCALE and SUMSQ respectively. + SCALE and SUMSQ are overwritten by scl and ssq respectively. + + The routine makes only one pass through the vector X. + + Arguments + ========= + + N (input) INTEGER + The number of elements to be used from the vector X. + + X (input) COMPLEX array, dimension (N) + The vector x as described above. + x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. + + INCX (input) INTEGER + The increment between successive values of the vector X. + INCX > 0. + + SCALE (input/output) REAL + On entry, the value scale in the equation above. + On exit, SCALE is overwritten with the value scl . + + SUMSQ (input/output) REAL + On entry, the value sumsq in the equation above. + On exit, SUMSQ is overwritten with the value ssq . + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + i__3 = ix; + if (x[i__3].r != 0.f) { + i__3 = ix; + temp1 = (r__1 = x[i__3].r, dabs(r__1)); + if (*scale < temp1) { +/* Computing 2nd power */ + r__1 = *scale / temp1; + *sumsq = *sumsq * (r__1 * r__1) + 1; + *scale = temp1; + } else { +/* Computing 2nd power */ + r__1 = temp1 / *scale; + *sumsq += r__1 * r__1; + } + } + if (r_imag(&x[ix]) != 0.f) { + temp1 = (r__1 = r_imag(&x[ix]), dabs(r__1)); + if (*scale < temp1) { +/* Computing 2nd power */ + r__1 = *scale / temp1; + *sumsq = *sumsq * (r__1 * r__1) + 1; + *scale = temp1; + } else { +/* Computing 2nd power */ + r__1 = temp1 / *scale; + *sumsq += r__1 * r__1; + } + } +/* L10: */ + } + } + + return 0; + +/* End of CLASSQ */ + +} /* classq_ */ + +/* Subroutine */ int claswp_(integer *n, complex *a, integer *lda, integer * + k1, integer *k2, integer *ipiv, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + + /* Local variables */ + static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + static complex temp; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLASWP performs a series of row interchanges on the matrix A. + One row interchange is initiated for each of rows K1 through K2 of A. + + Arguments + ========= + + N (input) INTEGER + The number of columns of the matrix A. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the matrix of column dimension N to which the row + interchanges will be applied. + On exit, the permuted matrix. + + LDA (input) INTEGER + The leading dimension of the array A. + + K1 (input) INTEGER + The first element of IPIV for which a row interchange will + be done. + + K2 (input) INTEGER + The last element of IPIV for which a row interchange will + be done. + + IPIV (input) INTEGER array, dimension (K2*abs(INCX)) + The vector of pivot indices. Only the elements in positions + K1 through K2 of IPIV are accessed. + IPIV(K) = L implies rows K and L are to be interchanged. + + INCX (input) INTEGER + The increment between successive values of IPIV. If IPIV + is negative, the pivots are applied in reverse order. + + Further Details + =============== + + Modified by + R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA + + ===================================================================== + + + Interchange row I with row IPIV(I) for each of rows K1 through K2. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = (1 - *k2) * *incx + 1; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) + { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + i__5 = i__ + k * a_dim1; + temp.r = a[i__5].r, temp.i = a[i__5].i; + i__5 = i__ + k * a_dim1; + i__6 = ip + k * a_dim1; + a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i; + i__5 = ip + k * a_dim1; + a[i__5].r = temp.r, a[i__5].i = temp.i; +/* L10: */ + } + } + ix += *incx; +/* L20: */ + } +/* L30: */ + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + i__4 = i__ + k * a_dim1; + temp.r = a[i__4].r, temp.i = a[i__4].i; + i__4 = i__ + k * a_dim1; + i__5 = ip + k * a_dim1; + a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i; + i__4 = ip + k * a_dim1; + a[i__4].r = temp.r, a[i__4].i = temp.i; +/* L40: */ + } + } + ix += *incx; +/* L50: */ + } + } + + return 0; + +/* End of CLASWP */ + +} /* claswp_ */ + +/* Subroutine */ int clatrd_(char *uplo, integer *n, integer *nb, complex *a, + integer *lda, real *e, complex *tau, complex *w, integer *ldw) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; + real r__1; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + static integer i__, iw; + static complex alpha; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *); + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *), chemv_(char *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, complex *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *), clarfg_(integer *, complex *, + complex *, integer *, complex *), clacgv_(integer *, complex *, + integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLATRD reduces NB rows and columns of a complex Hermitian matrix A to + Hermitian tridiagonal form by a unitary similarity + transformation Q' * A * Q, and returns the matrices V and W which are + needed to apply the transformation to the unreduced part of A. + + If UPLO = 'U', CLATRD reduces the last NB rows and columns of a + matrix, of which the upper triangle is supplied; + if UPLO = 'L', CLATRD reduces the first NB rows and columns of a + matrix, of which the lower triangle is supplied. + + This is an auxiliary routine called by CHETRD. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. + + NB (input) INTEGER + The number of rows and columns to be reduced. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit: + if UPLO = 'U', the last NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements above the diagonal + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors; + if UPLO = 'L', the first NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements below the diagonal + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + E (output) REAL array, dimension (N-1) + If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal + elements of the last NB columns of the reduced matrix; + if UPLO = 'L', E(1:nb) contains the subdiagonal elements of + the first NB columns of the reduced matrix. + + TAU (output) COMPLEX array, dimension (N-1) + The scalar factors of the elementary reflectors, stored in + TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. + See Further Details. + + W (output) COMPLEX array, dimension (LDW,NB) + The n-by-nb matrix W required to update the unreduced part + of A. + + LDW (input) INTEGER + The leading dimension of the array W. LDW >= max(1,N). + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n) H(n-1) . . . H(n-nb+1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), + and tau in TAU(i-1). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), + and tau in TAU(i). + + The elements of the vectors v together form the n-by-nb matrix V + which is needed, with W, to apply the transformation to the unreduced + part of the matrix, using a Hermitian rank-2k update of the form: + A := A - V*W' - W*V'. + + The contents of A on exit are illustrated by the following examples + with n = 5 and nb = 2: + + if UPLO = 'U': if UPLO = 'L': + + ( a a a v4 v5 ) ( d ) + ( a a v4 v5 ) ( 1 d ) + ( a 1 v5 ) ( v1 1 a ) + ( d 1 ) ( v1 v2 a a ) + ( d ) ( v1 v2 a a a ) + + where d denotes a diagonal element of the reduced matrix, a denotes + an element of the original matrix that is unchanged, and vi denotes + an element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --e; + --tau; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + + if (lsame_(uplo, "U")) { + +/* Reduce last NB columns of upper triangle */ + + i__1 = *n - *nb + 1; + for (i__ = *n; i__ >= i__1; --i__) { + iw = i__ - *n + *nb; + if (i__ < *n) { + +/* Update A(1:i,i) */ + + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + i__2 = *n - i__; + clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); + i__2 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__, &i__2, &q__1, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & + c_b57, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__, &i__2, &q__1, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b57, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } + if (i__ > 1) { + +/* + Generate elementary reflector H(i) to annihilate + A(1:i-2,i) +*/ + + i__2 = i__ - 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = i__ - 1; + clarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__ + - 1]); + i__2 = i__ - 1; + e[i__2] = alpha.r; + i__2 = i__ - 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute W(1:i-1,i) */ + + i__2 = i__ - 1; + chemv_("Upper", &i__2, &c_b57, &a[a_offset], lda, &a[i__ * + a_dim1 + 1], &c__1, &c_b56, &w[iw * w_dim1 + 1], & + c__1); + if (i__ < *n) { + i__2 = i__ - 1; + i__3 = *n - i__; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &w[( + iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], + &c__1, &c_b56, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b57, &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[( + i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], + &c__1, &c_b56, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &w[(iw + 1) * + w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b57, &w[iw * w_dim1 + 1], &c__1); + } + i__2 = i__ - 1; + cscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); + q__3.r = -.5f, q__3.i = -0.f; + i__2 = i__ - 1; + q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i = + q__3.r * tau[i__2].i + q__3.i * tau[i__2].r; + i__3 = i__ - 1; + cdotc_(&q__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ * + a_dim1 + 1], &c__1); + q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * + q__4.i + q__2.i * q__4.r; + alpha.r = q__1.r, alpha.i = q__1.i; + i__2 = i__ - 1; + caxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * + w_dim1 + 1], &c__1); + } + +/* L10: */ + } + } else { + +/* Reduce first NB columns of lower triangle */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:n,i) */ + + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + i__2 = i__ - 1; + clacgv_(&i__2, &w[i__ + w_dim1], ldw); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda, + &w[i__ + w_dim1], ldw, &c_b57, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + clacgv_(&i__2, &w[i__ + w_dim1], ldw); + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + w_dim1], ldw, + &a[i__ + a_dim1], lda, &c_b57, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + if (i__ < *n) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:n,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, + &tau[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute W(i+1:n,i) */ + + i__2 = *n - i__; + chemv_("Lower", &i__2, &c_b57, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b56, &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &w[i__ + + 1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b56, &w[i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 + + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b57, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b56, &w[i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + 1 + + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b57, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + cscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); + q__3.r = -.5f, q__3.i = -0.f; + i__2 = i__; + q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i = + q__3.r * tau[i__2].i + q__3.i * tau[i__2].r; + i__3 = *n - i__; + cdotc_(&q__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * + q__4.i + q__2.i * q__4.r; + alpha.r = q__1.r, alpha.i = q__1.i; + i__2 = *n - i__; + caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + } + +/* L20: */ + } + } + + return 0; + +/* End of CLATRD */ + +} /* clatrd_ */ + +/* Subroutine */ int clatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, complex *a, integer *lda, complex *x, real *scale, + real *cnorm, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + real r__1, r__2, r__3, r__4; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + static integer i__, j; + static real xj, rec, tjj; + static integer jinc; + static real xbnd; + static integer imax; + static real tmax; + static complex tjjs; + static real xmax, grow; + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static real tscal; + static complex uscal; + static integer jlast; + extern /* Complex */ VOID cdotu_(complex *, integer *, complex *, integer + *, complex *, integer *); + static complex csumj; + extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *); + static logical upper; + extern /* Subroutine */ int ctrsv_(char *, char *, char *, integer *, + complex *, integer *, complex *, integer *), slabad_(real *, real *); + extern integer icamax_(integer *, complex *, integer *); + extern /* Complex */ VOID cladiv_(complex *, complex *, complex *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *); + static real bignum; + extern integer isamax_(integer *, real *, integer *); + extern doublereal scasum_(integer *, complex *, integer *); + static logical notran; + static integer jfirst; + static real smlnum; + static logical nounit; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLATRS solves one of the triangular systems + + A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + + with scaling to prevent overflow. Here A is an upper or lower + triangular matrix, A**T denotes the transpose of A, A**H denotes the + conjugate transpose of A, x and b are n-element vectors, and s is a + scaling factor, usually less than or equal to 1, chosen so that the + components of x will be less than the overflow threshold. If the + unscaled problem will not cause overflow, the Level 2 BLAS routine + CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), + then s is set to 0 and a non-trivial solution to A*x = 0 is returned. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + TRANS (input) CHARACTER*1 + Specifies the operation applied to A. + = 'N': Solve A * x = s*b (No transpose) + = 'T': Solve A**T * x = s*b (Transpose) + = 'C': Solve A**H * x = s*b (Conjugate transpose) + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + NORMIN (input) CHARACTER*1 + Specifies whether CNORM has been set or not. + = 'Y': CNORM contains the column norms on entry + = 'N': CNORM is not set on entry. On exit, the norms will + be computed and stored in CNORM. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input) COMPLEX array, dimension (LDA,N) + The triangular matrix A. If UPLO = 'U', the leading n by n + upper triangular part of the array A contains the upper + triangular matrix, and the strictly lower triangular part of + A is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of the array A contains the lower triangular + matrix, and the strictly upper triangular part of A is not + referenced. If DIAG = 'U', the diagonal elements of A are + also not referenced and are assumed to be 1. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max (1,N). + + X (input/output) COMPLEX array, dimension (N) + On entry, the right hand side b of the triangular system. + On exit, X is overwritten by the solution vector x. + + SCALE (output) REAL + The scaling factor s for the triangular system + A * x = s*b, A**T * x = s*b, or A**H * x = s*b. + If SCALE = 0, the matrix A is singular or badly scaled, and + the vector x is an exact or approximate solution to A*x = 0. + + CNORM (input or output) REAL array, dimension (N) + + If NORMIN = 'Y', CNORM is an input argument and CNORM(j) + contains the norm of the off-diagonal part of the j-th column + of A. If TRANS = 'N', CNORM(j) must be greater than or equal + to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) + must be greater than or equal to the 1-norm. + + If NORMIN = 'N', CNORM is an output argument and CNORM(j) + returns the 1-norm of the offdiagonal part of the j-th column + of A. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + Further Details + ======= ======= + + A rough bound on x is computed; if that is less than overflow, CTRSV + is called, otherwise, specific code is used which checks for possible + overflow or divide-by-zero at every operation. + + A columnwise scheme is used for solving A*x = b. The basic algorithm + if A is lower triangular is + + x[1:n] := b[1:n] + for j = 1, ..., n + x(j) := x(j) / A(j,j) + x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] + end + + Define bounds on the components of x after j iterations of the loop: + M(j) = bound on x[1:j] + G(j) = bound on x[j+1:n] + Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. + + Then for iteration j+1 we have + M(j+1) <= G(j) / | A(j+1,j+1) | + G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | + <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) + + where CNORM(j+1) is greater than or equal to the infinity-norm of + column j+1 of A, not counting the diagonal. Hence + + G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) + 1<=i<=j + and + + |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) + 1<=i< j + + Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTRSV if the + reciprocal of the largest M(j), j=1,..,n, is larger than + max(underflow, 1/overflow). + + The bound on x(j) is also used to determine when a step in the + columnwise method can be performed without fear of overflow. If + the computed bound is greater than a large constant, x is scaled to + prevent overflow, but if the bound overflows, x is set to 0, x(j) to + 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. + + Similarly, a row-wise scheme is used to solve A**T *x = b or + A**H *x = b. The basic algorithm for A upper triangular is + + for j = 1, ..., n + x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) + end + + We simultaneously compute two bounds + G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j + M(j) = bound on x(i), 1<=i<=j + + The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we + add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. + Then the bound on x(j) is + + M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | + + <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) + 1<=i<=j + + and we can safely call CTRSV if 1/M(n) and 1/G(n) are both greater + than max(underflow, 1/overflow). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --cnorm; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + notran = lsame_(trans, "N"); + nounit = lsame_(diag, "N"); + +/* Test the input parameters. */ + + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -3; + } else if (! lsame_(normin, "Y") && ! lsame_(normin, + "N")) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLATRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine machine dependent parameters to control overflow. */ + + smlnum = slamch_("Safe minimum"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum /= slamch_("Precision"); + bignum = 1.f / smlnum; + *scale = 1.f; + + if (lsame_(normin, "N")) { + +/* Compute the 1-norm of each column, not including the diagonal. */ + + if (upper) { + +/* A is upper triangular. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + cnorm[j] = scasum_(&i__2, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* A is lower triangular. */ + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + cnorm[j] = scasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1); +/* L20: */ + } + cnorm[*n] = 0.f; + } + } + +/* + Scale the column norms by TSCAL if the maximum element in CNORM is + greater than BIGNUM/2. +*/ + + imax = isamax_(n, &cnorm[1], &c__1); + tmax = cnorm[imax]; + if (tmax <= bignum * .5f) { + tscal = 1.f; + } else { + tscal = .5f / (smlnum * tmax); + sscal_(n, &tscal, &cnorm[1], &c__1); + } + +/* + Compute a bound on the computed solution vector to see if the + Level 2 BLAS routine CTRSV can be used. +*/ + + xmax = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j; + r__3 = xmax, r__4 = (r__1 = x[i__2].r / 2.f, dabs(r__1)) + (r__2 = + r_imag(&x[j]) / 2.f, dabs(r__2)); + xmax = dmax(r__3,r__4); +/* L30: */ + } + xbnd = xmax; + + if (notran) { + +/* Compute the growth in A * x = b. */ + + if (upper) { + jfirst = *n; + jlast = 1; + jinc = -1; + } else { + jfirst = 1; + jlast = *n; + jinc = 1; + } + + if (tscal != 1.f) { + grow = 0.f; + goto L60; + } + + if (nounit) { + +/* + A is non-unit triangular. + + Compute GROW = 1/G(j) and XBND = 1/M(j). + Initially, G(0) = max{x(i), i=1,...,n}. +*/ + + grow = .5f / dmax(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L60; + } + + i__3 = j + j * a_dim1; + tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + + if (tjj >= smlnum) { + +/* + M(j) = G(j-1) / abs(A(j,j)) + + Computing MIN +*/ + r__1 = xbnd, r__2 = dmin(1.f,tjj) * grow; + xbnd = dmin(r__1,r__2); + } else { + +/* M(j) could overflow, set XBND to 0. */ + + xbnd = 0.f; + } + + if (tjj + cnorm[j] >= smlnum) { + +/* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ + + grow *= tjj / (tjj + cnorm[j]); + } else { + +/* G(j) could overflow, set GROW to 0. */ + + grow = 0.f; + } +/* L40: */ + } + grow = xbnd; + } else { + +/* + A is unit triangular. + + Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. + + Computing MIN +*/ + r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum); + grow = dmin(r__1,r__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L60; + } + +/* G(j) = G(j-1)*( 1 + CNORM(j) ) */ + + grow *= 1.f / (cnorm[j] + 1.f); +/* L50: */ + } + } +L60: + + ; + } else { + +/* Compute the growth in A**T * x = b or A**H * x = b. */ + + if (upper) { + jfirst = 1; + jlast = *n; + jinc = 1; + } else { + jfirst = *n; + jlast = 1; + jinc = -1; + } + + if (tscal != 1.f) { + grow = 0.f; + goto L90; + } + + if (nounit) { + +/* + A is non-unit triangular. + + Compute GROW = 1/G(j) and XBND = 1/M(j). + Initially, M(0) = max{x(i), i=1,...,n}. +*/ + + grow = .5f / dmax(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L90; + } + +/* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ + + xj = cnorm[j] + 1.f; +/* Computing MIN */ + r__1 = grow, r__2 = xbnd / xj; + grow = dmin(r__1,r__2); + + i__3 = j + j * a_dim1; + tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + + if (tjj >= smlnum) { + +/* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ + + if (xj > tjj) { + xbnd *= tjj / xj; + } + } else { + +/* M(j) could overflow, set XBND to 0. */ + + xbnd = 0.f; + } +/* L70: */ + } + grow = dmin(grow,xbnd); + } else { + +/* + A is unit triangular. + + Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. + + Computing MIN +*/ + r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum); + grow = dmin(r__1,r__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L90; + } + +/* G(j) = ( 1 + CNORM(j) )*G(j-1) */ + + xj = cnorm[j] + 1.f; + grow /= xj; +/* L80: */ + } + } +L90: + ; + } + + if (grow * tscal > smlnum) { + +/* + Use the Level 2 BLAS solve if the reciprocal of the bound on + elements of X is not too small. +*/ + + ctrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1); + } else { + +/* Use a Level 1 BLAS solve, scaling intermediate results. */ + + if (xmax > bignum * .5f) { + +/* + Scale X so that its components are less than or equal to + BIGNUM in absolute value. +*/ + + *scale = bignum * .5f / xmax; + csscal_(n, scale, &x[1], &c__1); + xmax = bignum; + } else { + xmax *= 2.f; + } + + if (notran) { + +/* Solve A * x = b */ + + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ + + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), + dabs(r__2)); + if (nounit) { + i__3 = j + j * a_dim1; + q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3].i; + tjjs.r = q__1.r, tjjs.i = q__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.f; + if (tscal == 1.f) { + goto L105; + } + } + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.f) { + if (xj > tjj * bignum) { + +/* Scale x by 1/b(j). */ + + rec = 1.f / xj; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] + ), dabs(r__2)); + } else if (tjj > 0.f) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* + Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM + to avoid overflow when dividing by A(j,j). +*/ + + rec = tjj * bignum / xj; + if (cnorm[j] > 1.f) { + +/* + Scale by 1/CNORM(j) to avoid overflow when + multiplying x(j) times column j. +*/ + + rec /= cnorm[j]; + } + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] + ), dabs(r__2)); + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0, and compute a solution to A*x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0.f, x[i__4].i = 0.f; +/* L100: */ + } + i__3 = j; + x[i__3].r = 1.f, x[i__3].i = 0.f; + xj = 1.f; + *scale = 0.f; + xmax = 0.f; + } +L105: + +/* + Scale x if necessary to avoid overflow when adding a + multiple of column j of A. +*/ + + if (xj > 1.f) { + rec = 1.f / xj; + if (cnorm[j] > (bignum - xmax) * rec) { + +/* Scale x by 1/(2*abs(x(j))). */ + + rec *= .5f; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + } + } else if (xj * cnorm[j] > bignum - xmax) { + +/* Scale x by 1/2. */ + + csscal_(n, &c_b2435, &x[1], &c__1); + *scale *= .5f; + } + + if (upper) { + if (j > 1) { + +/* + Compute the update + x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +*/ + + i__3 = j - 1; + i__4 = j; + q__2.r = -x[i__4].r, q__2.i = -x[i__4].i; + q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; + caxpy_(&i__3, &q__1, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + i__3 = j - 1; + i__ = icamax_(&i__3, &x[1], &c__1); + i__3 = i__; + xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = + r_imag(&x[i__]), dabs(r__2)); + } + } else { + if (j < *n) { + +/* + Compute the update + x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +*/ + + i__3 = *n - j; + i__4 = j; + q__2.r = -x[i__4].r, q__2.i = -x[i__4].i; + q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; + caxpy_(&i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + i__3 = *n - j; + i__ = j + icamax_(&i__3, &x[j + 1], &c__1); + i__3 = i__; + xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = + r_imag(&x[i__]), dabs(r__2)); + } + } +/* L110: */ + } + + } else if (lsame_(trans, "T")) { + +/* Solve A**T * x = b */ + + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Compute x(j) = b(j) - sum A(k,j)*x(k). + k<>j +*/ + + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), + dabs(r__2)); + uscal.r = tscal, uscal.i = 0.f; + rec = 1.f / dmax(xmax,1.f); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5f; + if (nounit) { + i__3 = j + j * a_dim1; + q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3] + .i; + tjjs.r = q__1.r, tjjs.i = q__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.f; + } + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + if (tjj > 1.f) { + +/* + Divide by A(j,j) when scaling x if A(j,j) > 1. + + Computing MIN +*/ + r__1 = 1.f, r__2 = rec * tjj; + rec = dmin(r__1,r__2); + cladiv_(&q__1, &uscal, &tjjs); + uscal.r = q__1.r, uscal.i = q__1.i; + } + if (rec < 1.f) { + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + csumj.r = 0.f, csumj.i = 0.f; + if (uscal.r == 1.f && uscal.i == 0.f) { + +/* + If the scaling needed for A in the dot product is 1, + call CDOTU to perform the dot product. +*/ + + if (upper) { + i__3 = j - 1; + cdotu_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + csumj.r = q__1.r, csumj.i = q__1.i; + } else if (j < *n) { + i__3 = *n - j; + cdotu_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + csumj.r = q__1.r, csumj.i = q__1.i; + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * a_dim1; + q__3.r = a[i__4].r * uscal.r - a[i__4].i * + uscal.i, q__3.i = a[i__4].r * uscal.i + a[ + i__4].i * uscal.r; + i__5 = i__; + q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, + q__2.i = q__3.r * x[i__5].i + q__3.i * x[ + i__5].r; + q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + + q__2.i; + csumj.r = q__1.r, csumj.i = q__1.i; +/* L120: */ + } + } else if (j < *n) { + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * a_dim1; + q__3.r = a[i__4].r * uscal.r - a[i__4].i * + uscal.i, q__3.i = a[i__4].r * uscal.i + a[ + i__4].i * uscal.r; + i__5 = i__; + q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, + q__2.i = q__3.r * x[i__5].i + q__3.i * x[ + i__5].r; + q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + + q__2.i; + csumj.r = q__1.r, csumj.i = q__1.i; +/* L130: */ + } + } + } + + q__1.r = tscal, q__1.i = 0.f; + if (uscal.r == q__1.r && uscal.i == q__1.i) { + +/* + Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) + was not used to scale the dotproduct. +*/ + + i__3 = j; + i__4 = j; + q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i - + csumj.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] + ), dabs(r__2)); + if (nounit) { + i__3 = j + j * a_dim1; + q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3] + .i; + tjjs.r = q__1.r, tjjs.i = q__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.f; + if (tscal == 1.f) { + goto L145; + } + } + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.f) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1.f / xj; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } else if (tjj > 0.f) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0 and compute a solution to A**T *x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0.f, x[i__4].i = 0.f; +/* L140: */ + } + i__3 = j; + x[i__3].r = 1.f, x[i__3].i = 0.f; + *scale = 0.f; + xmax = 0.f; + } +L145: + ; + } else { + +/* + Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot + product has already been divided by 1/A(j,j). +*/ + + i__3 = j; + cladiv_(&q__2, &x[j], &tjjs); + q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } +/* Computing MAX */ + i__3 = j; + r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = + r_imag(&x[j]), dabs(r__2)); + xmax = dmax(r__3,r__4); +/* L150: */ + } + + } else { + +/* Solve A**H * x = b */ + + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Compute x(j) = b(j) - sum A(k,j)*x(k). + k<>j +*/ + + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), + dabs(r__2)); + uscal.r = tscal, uscal.i = 0.f; + rec = 1.f / dmax(xmax,1.f); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5f; + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; + tjjs.r = q__1.r, tjjs.i = q__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.f; + } + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + if (tjj > 1.f) { + +/* + Divide by A(j,j) when scaling x if A(j,j) > 1. + + Computing MIN +*/ + r__1 = 1.f, r__2 = rec * tjj; + rec = dmin(r__1,r__2); + cladiv_(&q__1, &uscal, &tjjs); + uscal.r = q__1.r, uscal.i = q__1.i; + } + if (rec < 1.f) { + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + csumj.r = 0.f, csumj.i = 0.f; + if (uscal.r == 1.f && uscal.i == 0.f) { + +/* + If the scaling needed for A in the dot product is 1, + call CDOTC to perform the dot product. +*/ + + if (upper) { + i__3 = j - 1; + cdotc_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + csumj.r = q__1.r, csumj.i = q__1.i; + } else if (j < *n) { + i__3 = *n - j; + cdotc_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + csumj.r = q__1.r, csumj.i = q__1.i; + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + r_cnjg(&q__4, &a[i__ + j * a_dim1]); + q__3.r = q__4.r * uscal.r - q__4.i * uscal.i, + q__3.i = q__4.r * uscal.i + q__4.i * + uscal.r; + i__4 = i__; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, + q__2.i = q__3.r * x[i__4].i + q__3.i * x[ + i__4].r; + q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + + q__2.i; + csumj.r = q__1.r, csumj.i = q__1.i; +/* L160: */ + } + } else if (j < *n) { + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + r_cnjg(&q__4, &a[i__ + j * a_dim1]); + q__3.r = q__4.r * uscal.r - q__4.i * uscal.i, + q__3.i = q__4.r * uscal.i + q__4.i * + uscal.r; + i__4 = i__; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, + q__2.i = q__3.r * x[i__4].i + q__3.i * x[ + i__4].r; + q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + + q__2.i; + csumj.r = q__1.r, csumj.i = q__1.i; +/* L170: */ + } + } + } + + q__1.r = tscal, q__1.i = 0.f; + if (uscal.r == q__1.r && uscal.i == q__1.i) { + +/* + Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) + was not used to scale the dotproduct. +*/ + + i__3 = j; + i__4 = j; + q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i - + csumj.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] + ), dabs(r__2)); + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; + tjjs.r = q__1.r, tjjs.i = q__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.f; + if (tscal == 1.f) { + goto L185; + } + } + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.f) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1.f / xj; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } else if (tjj > 0.f) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0 and compute a solution to A**H *x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0.f, x[i__4].i = 0.f; +/* L180: */ + } + i__3 = j; + x[i__3].r = 1.f, x[i__3].i = 0.f; + *scale = 0.f; + xmax = 0.f; + } +L185: + ; + } else { + +/* + Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot + product has already been divided by 1/A(j,j). +*/ + + i__3 = j; + cladiv_(&q__2, &x[j], &tjjs); + q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } +/* Computing MAX */ + i__3 = j; + r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = + r_imag(&x[j]), dabs(r__2)); + xmax = dmax(r__3,r__4); +/* L190: */ + } + } + *scale /= tscal; + } + +/* Scale the column norms by 1/TSCAL for return. */ + + if (tscal != 1.f) { + r__1 = 1.f / tscal; + sscal_(n, &r__1, &cnorm[1], &c__1); + } + + return 0; + +/* End of CLATRS */ + +} /* clatrs_ */ + +/* Subroutine */ int clauu2_(char *uplo, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + complex q__1; + + /* Local variables */ + static integer i__; + static real aii; + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + static logical upper; + extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), + csscal_(integer *, real *, complex *, integer *), xerbla_(char *, + integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLAUU2 computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the unblocked form of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLAUU2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + cdotc_(&q__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[ + i__ + (i__ + 1) * a_dim1], lda); + r__1 = aii * aii + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + q__1.r = aii, q__1.i = 0.f; + cgemv_("No transpose", &i__2, &i__3, &c_b57, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + q__1, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + } else { + csscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } +/* L10: */ + } + + } else { + +/* Compute the product L' * L. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + cdotc_(&q__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + r__1 = aii * aii + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__; + i__3 = i__ - 1; + q__1.r = aii, q__1.i = 0.f; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + q__1, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + } else { + csscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } +/* L20: */ + } + } + + return 0; + +/* End of CLAUU2 */ + +} /* clauu2_ */ + +/* Subroutine */ int clauum_(char *uplo, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, ib, nb; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *), cherk_(char *, + char *, integer *, integer *, real *, complex *, integer *, real * + , complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + static logical upper; + extern /* Subroutine */ int clauu2_(char *, integer *, complex *, integer + *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CLAUUM computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the blocked form of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLAUUM", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "CLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + clauu2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + ctrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", & + i__3, &ib, &c_b57, &a[i__ + i__ * a_dim1], lda, &a[ + i__ * a_dim1 + 1], lda); + clauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + cgemm_("No transpose", "Conjugate transpose", &i__3, &ib, + &i__4, &c_b57, &a[(i__ + ib) * a_dim1 + 1], lda, & + a[i__ + (i__ + ib) * a_dim1], lda, &c_b57, &a[i__ + * a_dim1 + 1], lda); + i__3 = *n - i__ - ib + 1; + cherk_("Upper", "No transpose", &ib, &i__3, &c_b1034, &a[ + i__ + (i__ + ib) * a_dim1], lda, &c_b1034, &a[i__ + + i__ * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the product L' * L. */ + + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + ctrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", & + ib, &i__3, &c_b57, &a[i__ + i__ * a_dim1], lda, &a[ + i__ + a_dim1], lda); + clauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + cgemm_("Conjugate transpose", "No transpose", &ib, &i__3, + &i__4, &c_b57, &a[i__ + ib + i__ * a_dim1], lda, & + a[i__ + ib + a_dim1], lda, &c_b57, &a[i__ + + a_dim1], lda); + i__3 = *n - i__ - ib + 1; + cherk_("Lower", "Conjugate transpose", &ib, &i__3, & + c_b1034, &a[i__ + ib + i__ * a_dim1], lda, & + c_b1034, &a[i__ + i__ * a_dim1], lda); + } +/* L20: */ + } + } + } + + return 0; + +/* End of CLAUUM */ + +} /* clauum_ */ + +/* Subroutine */ int cpotf2_(char *uplo, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + complex q__1, q__2; + + /* Local variables */ + static integer j; + static real ajj; + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + static logical upper; + extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), + csscal_(integer *, real *, complex *, integer *), xerbla_(char *, + integer *); + extern logical sisnan_(real *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CPOTF2 computes the Cholesky factorization of a complex Hermitian + positive definite matrix A. + + The factorization has the form + A = U' * U , if UPLO = 'U', or + A = L * L', if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the unblocked version of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored. + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n by n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U'*U or A = L*L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, the leading minor of order k is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CPOTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j + j * a_dim1; + r__1 = a[i__2].r; + i__3 = j - 1; + cdotc_(&q__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1] + , &c__1); + q__1.r = r__1 - q__2.r, q__1.i = -q__2.i; + ajj = q__1.r; + if (ajj <= 0.f || sisnan_(&ajj)) { + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.f; + goto L30; + } + ajj = sqrt(ajj); + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.f; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + i__3 = *n - j; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("Transpose", &i__2, &i__3, &q__1, &a[(j + 1) * a_dim1 + + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b57, &a[j + ( + j + 1) * a_dim1], lda); + i__2 = j - 1; + clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); + i__2 = *n - j; + r__1 = 1.f / ajj; + csscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j + j * a_dim1; + r__1 = a[i__2].r; + i__3 = j - 1; + cdotc_(&q__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda); + q__1.r = r__1 - q__2.r, q__1.i = -q__2.i; + ajj = q__1.r; + if (ajj <= 0.f || sisnan_(&ajj)) { + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.f; + goto L30; + } + ajj = sqrt(ajj); + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.f; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = j - 1; + clacgv_(&i__2, &a[j + a_dim1], lda); + i__2 = *n - j; + i__3 = j - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[j + 1 + a_dim1] + , lda, &a[j + a_dim1], lda, &c_b57, &a[j + 1 + j * + a_dim1], &c__1); + i__2 = j - 1; + clacgv_(&i__2, &a[j + a_dim1], lda); + i__2 = *n - j; + r__1 = 1.f / ajj; + csscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of CPOTF2 */ + +} /* cpotf2_ */ + +/* Subroutine */ int cpotrf_(char *uplo, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + complex q__1; + + /* Local variables */ + static integer j, jb, nb; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *), cherk_(char *, + char *, integer *, integer *, real *, complex *, integer *, real * + , complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + static logical upper; + extern /* Subroutine */ int cpotf2_(char *, integer *, complex *, integer + *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CPOTRF computes the Cholesky factorization of a complex Hermitian + positive definite matrix A. + + The factorization has the form + A = U**H * U, if UPLO = 'U', or + A = L * L**H, if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the block version of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**H*U or A = L*L**H. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "CPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + cpotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + cherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1276, & + a[j * a_dim1 + 1], lda, &c_b1034, &a[j + j * a_dim1], + lda); + cpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("Conjugate transpose", "No transpose", &jb, &i__3, + &i__4, &q__1, &a[j * a_dim1 + 1], lda, &a[(j + jb) + * a_dim1 + 1], lda, &c_b57, &a[j + (j + jb) * + a_dim1], lda); + i__3 = *n - j - jb + 1; + ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", + &jb, &i__3, &c_b57, &a[j + j * a_dim1], lda, &a[ + j + (j + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + cherk_("Lower", "No transpose", &jb, &i__3, &c_b1276, &a[j + + a_dim1], lda, &c_b1034, &a[j + j * a_dim1], lda); + cpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", &i__3, &jb, + &i__4, &q__1, &a[j + jb + a_dim1], lda, &a[j + + a_dim1], lda, &c_b57, &a[j + jb + j * a_dim1], + lda); + i__3 = *n - j - jb + 1; + ctrsm_("Right", "Lower", "Conjugate transpose", "Non-unit" + , &i__3, &jb, &c_b57, &a[j + j * a_dim1], lda, &a[ + j + jb + j * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of CPOTRF */ + +} /* cpotrf_ */ + +/* Subroutine */ int cpotri_(char *uplo, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *), clauum_( + char *, integer *, complex *, integer *, integer *), + ctrtri_(char *, char *, integer *, complex *, integer *, integer * + ); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CPOTRI computes the inverse of a complex Hermitian positive definite + matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + computed by CPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the triangular factor U or L from the Cholesky + factorization A = U**H*U or A = L*L**H, as computed by + CPOTRF. + On exit, the upper or lower triangle of the (Hermitian) + inverse of A, overwriting the input factor U or L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the (i,i) element of the factor U or L is + zero, and the inverse could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CPOTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + ctrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + +/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ + + clauum_(uplo, n, &a[a_offset], lda, info); + + return 0; + +/* End of CPOTRI */ + +} /* cpotri_ */ + +/* Subroutine */ int cpotrs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CPOTRS solves a system of linear equations A*X = B with a Hermitian + positive definite matrix A using the Cholesky factorization + A = U**H*U or A = L*L**H computed by CPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) COMPLEX array, dimension (LDA,N) + The triangular factor U or L from the Cholesky factorization + A = U**H*U or A = L*L**H, as computed by CPOTRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) COMPLEX array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CPOTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* + Solve A*X = B where A = U'*U. + + Solve U'*X = B, overwriting B with X. +*/ + + ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, & + c_b57, &a[a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b57, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A*X = B where A = L*L'. + + Solve L*X = B, overwriting B with X. +*/ + + ctrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b57, & + a[a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + ctrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, & + c_b57, &a[a_offset], lda, &b[b_offset], ldb); + } + + return 0; + +/* End of CPOTRS */ + +} /* cpotrs_ */ + +/* Subroutine */ int crot_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy, real *c__, complex *s) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + static integer i__, ix, iy; + static complex stemp; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CROT applies a plane rotation, where the cos (C) is real and the + sin (S) is complex, and the vectors CX and CY are complex. + + Arguments + ========= + + N (input) INTEGER + The number of elements in the vectors CX and CY. + + CX (input/output) COMPLEX array, dimension (N) + On input, the vector X. + On output, CX is overwritten with C*X + S*Y. + + INCX (input) INTEGER + The increment between successive values of CY. INCX <> 0. + + CY (input/output) COMPLEX array, dimension (N) + On input, the vector Y. + On output, CY is overwritten with -CONJG(S)*X + C*Y. + + INCY (input) INTEGER + The increment between successive values of CY. INCX <> 0. + + C (input) REAL + S (input) COMPLEX + C and S define a rotation + [ C S ] + [ -conjg(S) C ] + where C*C + S*CONJG(S) = 1.0. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* Code for unequal increments or equal increments not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i; + i__3 = iy; + q__3.r = s->r * cy[i__3].r - s->i * cy[i__3].i, q__3.i = s->r * cy[ + i__3].i + s->i * cy[i__3].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + stemp.r = q__1.r, stemp.i = q__1.i; + i__2 = iy; + i__3 = iy; + q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i; + r_cnjg(&q__4, s); + i__4 = ix; + q__3.r = q__4.r * cx[i__4].r - q__4.i * cx[i__4].i, q__3.i = q__4.r * + cx[i__4].i + q__4.i * cx[i__4].r; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; + i__2 = ix; + cx[i__2].r = stemp.r, cx[i__2].i = stemp.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* Code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i; + i__3 = i__; + q__3.r = s->r * cy[i__3].r - s->i * cy[i__3].i, q__3.i = s->r * cy[ + i__3].i + s->i * cy[i__3].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + stemp.r = q__1.r, stemp.i = q__1.i; + i__2 = i__; + i__3 = i__; + q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i; + r_cnjg(&q__4, s); + i__4 = i__; + q__3.r = q__4.r * cx[i__4].r - q__4.i * cx[i__4].i, q__3.i = q__4.r * + cx[i__4].i + q__4.i * cx[i__4].r; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; + i__2 = i__; + cx[i__2].r = stemp.r, cx[i__2].i = stemp.i; +/* L30: */ + } + return 0; +} /* crot_ */ + +/* Subroutine */ int cstedc_(char *compz, integer *n, real *d__, real *e, + complex *z__, integer *ldz, complex *work, integer *lwork, real * + rwork, integer *lrwork, integer *iwork, integer *liwork, integer * + info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2, i__3, i__4; + real r__1, r__2; + + /* Local variables */ + static integer i__, j, k, m; + static real p; + static integer ii, ll, lgn; + static real eps, tiny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + static integer lwmin; + extern /* Subroutine */ int claed0_(integer *, integer *, real *, real *, + complex *, integer *, complex *, integer *, real *, integer *, + integer *); + static integer start; + extern /* Subroutine */ int clacrm_(integer *, integer *, complex *, + integer *, real *, integer *, complex *, integer *, real *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer finish; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), sstedc_(char *, integer *, real *, real *, real *, + integer *, real *, integer *, integer *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *); + static integer liwmin, icompz; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *); + static real orgnrm; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + static integer lrwmin; + static logical lquery; + static integer smlsiz; + extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, + real *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CSTEDC computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + The eigenvectors of a full or band complex Hermitian matrix can also + be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this + matrix to tridiagonal form. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. See SLAED3 for details. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'I': Compute eigenvectors of tridiagonal matrix also. + = 'V': Compute eigenvectors of original Hermitian matrix + also. On entry, Z contains the unitary matrix used + to reduce the original matrix to tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the subdiagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Z (input/output) COMPLEX array, dimension (LDZ,N) + On entry, if COMPZ = 'V', then Z contains the unitary + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original Hermitian matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1. + If eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1. + If COMPZ = 'V' and N > 1, LWORK must be at least N*N. + Note that for COMPZ = 'V', then if N is less than or + equal to the minimum divide size, usually 25, then LWORK need + only be 1. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal sizes of the WORK, RWORK and + IWORK arrays, returns these values as the first entries of + the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + RWORK (workspace/output) REAL array, dimension (MAX(1,LRWORK)) + On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. + + LRWORK (input) INTEGER + The dimension of the array RWORK. + If COMPZ = 'N' or N <= 1, LRWORK must be at least 1. + If COMPZ = 'V' and N > 1, LRWORK must be at least + 1 + 3*N + 2*N*lg N + 3*N**2 , + where lg( N ) = smallest integer k such + that 2**k >= N. + If COMPZ = 'I' and N > 1, LRWORK must be at least + 1 + 4*N + 2*N**2 . + Note that for COMPZ = 'I' or 'V', then if N is less than or + equal to the minimum divide size, usually 25, then LRWORK + need only be max(1,2*(N-1)). + + If LRWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal sizes of the WORK, RWORK + and IWORK arrays, returns these values as the first entries + of the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If COMPZ = 'N' or N <= 1, LIWORK must be at least 1. + If COMPZ = 'V' or N > 1, LIWORK must be at least + 6 + 6*N + 5*N*lg N. + If COMPZ = 'I' or N > 1, LIWORK must be at least + 3 + 5*N . + Note that for COMPZ = 'I' or 'V', then if N is less than or + equal to the minimum divide size, usually 25, then LIWORK + need only be 1. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal sizes of the WORK, RWORK + and IWORK arrays, returns these values as the first entries + of the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + + smlsiz = ilaenv_(&c__9, "CSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + if (*n <= 1 || icompz == 0) { + lwmin = 1; + liwmin = 1; + lrwmin = 1; + } else if (*n <= smlsiz) { + lwmin = 1; + liwmin = 1; + lrwmin = *n - 1 << 1; + } else if (icompz == 1) { + lgn = (integer) (log((real) (*n)) / log(2.f)); + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + lwmin = *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; + liwmin = *n * 6 + 6 + *n * 5 * lgn; + } else if (icompz == 2) { + lwmin = 1; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CSTEDC", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*n == 1) { + if (icompz != 0) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + return 0; + } + +/* + If the following conditional clause is removed, then the routine + will use the Divide and Conquer routine to compute only the + eigenvalues, which requires (3N + 3N**2) real workspace and + (2 + 5N + 2N lg(N)) integer workspace. + Since on many architectures SSTERF is much faster than any other + algorithm for finding eigenvalues only, it is used here + as the default. If the conditional clause is removed, then + information on the size of workspace needs to be changed. + + If COMPZ = 'N', use SSTERF to compute the eigenvalues. +*/ + + if (icompz == 0) { + ssterf_(n, &d__[1], &e[1], info); + goto L70; + } + +/* + If N is smaller than the minimum divide size (SMLSIZ+1), then + solve the problem with another solver. +*/ + + if (*n <= smlsiz) { + + csteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1], + info); + + } else { + +/* If COMPZ = 'I', we simply call SSTEDC instead. */ + + if (icompz == 2) { + slaset_("Full", n, n, &c_b328, &c_b1034, &rwork[1], n); + ll = *n * *n + 1; + i__1 = *lrwork - ll + 1; + sstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, & + iwork[1], liwork, info); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * z_dim1; + i__4 = (j - 1) * *n + i__; + z__[i__3].r = rwork[i__4], z__[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + goto L70; + } + +/* + From now on, only option left to be handled is COMPZ = 'V', + i.e. ICOMPZ = 1. + + Scale. +*/ + + orgnrm = slanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.f) { + goto L70; + } + + eps = slamch_("Epsilon"); + + start = 1; + +/* while ( START <= N ) */ + +L30: + if (start <= *n) { + +/* + Let FINISH be the position of the next subdiagonal entry + such that E( FINISH ) <= TINY or FINISH = N if no such + subdiagonal exists. The matrix identified by the elements + between START and FINISH constitutes an independent + sub-problem. +*/ + + finish = start; +L40: + if (finish < *n) { + tiny = eps * sqrt((r__1 = d__[finish], dabs(r__1))) * sqrt(( + r__2 = d__[finish + 1], dabs(r__2))); + if ((r__1 = e[finish], dabs(r__1)) > tiny) { + ++finish; + goto L40; + } + } + +/* (Sub) Problem determined. Compute its size and solve it. */ + + m = finish - start + 1; + if (m > smlsiz) { + +/* Scale. */ + + orgnrm = slanst_("M", &m, &d__[start], &e[start]); + slascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, &m, &c__1, &d__[ + start], &m, info); + i__1 = m - 1; + i__2 = m - 1; + slascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, &i__1, &c__1, & + e[start], &i__2, info); + + claed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + + 1], ldz, &work[1], n, &rwork[1], &iwork[1], info); + if (*info > 0) { + *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % + (m + 1) + start - 1; + goto L70; + } + +/* Scale back. */ + + slascl_("G", &c__0, &c__0, &c_b1034, &orgnrm, &m, &c__1, &d__[ + start], &m, info); + + } else { + ssteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, & + rwork[m * m + 1], info); + clacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, & + work[1], n, &rwork[m * m + 1]); + clacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], + ldz); + if (*info > 0) { + *info = start * (*n + 1) + finish; + goto L70; + } + } + + start = finish + 1; + goto L30; + } + +/* + endwhile + + If the problem split any number of times, then the eigenvalues + will not be properly ordered. Here we permute the eigenvalues + (and the associated eigenvectors) into ascending order. +*/ + + if (m != *n) { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L50: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + + 1], &c__1); + } +/* L60: */ + } + } + } + +L70: + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + return 0; + +/* End of CSTEDC */ + +} /* cstedc_ */ + +/* Subroutine */ int csteqr_(char *compz, integer *n, real *d__, real *e, + complex *z__, integer *ldz, real *work, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + static real b, c__, f, g; + static integer i__, j, k, l, m; + static real p, r__, s; + static integer l1, ii, mm, lm1, mm1, nm1; + static real rt1, rt2, eps; + static integer lsv; + static real tst, eps2; + static integer lend, jtot; + extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *) + ; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int clasr_(char *, char *, char *, integer *, + integer *, real *, real *, complex *, integer *); + static real anorm; + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + static integer lendm1, lendp1; + extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real * + , real *, real *); + extern doublereal slapy2_(real *, real *); + static integer iscale; + extern doublereal slamch_(char *); + extern /* Subroutine */ int claset_(char *, integer *, integer *, complex + *, complex *, complex *, integer *); + static real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real safmax; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + static integer lendsv; + extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * + ); + static real ssfmin; + static integer nmaxit, icompz; + static real ssfmax; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CSTEQR computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the implicit QL or QR method. + The eigenvectors of a full or band complex Hermitian matrix can also + be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this + matrix to tridiagonal form. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors of the original + Hermitian matrix. On entry, Z must contain the + unitary matrix used to reduce the original matrix + to tridiagonal form. + = 'I': Compute eigenvalues and eigenvectors of the + tridiagonal matrix. Z is initialized to the identity + matrix. + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + Z (input/output) COMPLEX array, dimension (LDZ, N) + On entry, if COMPZ = 'V', then Z contains the unitary + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original Hermitian matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1, and if + eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace) REAL array, dimension (max(1,2*N-2)) + If COMPZ = 'N', then WORK is not referenced. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm has failed to find all the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero; on exit, D + and E contain the elements of a symmetric tridiagonal + matrix which is unitarily similar to the original + matrix. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CSTEQR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz == 2) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + return 0; + } + +/* Determine the unit roundoff and over/underflow thresholds. */ + + eps = slamch_("E"); +/* Computing 2nd power */ + r__1 = eps; + eps2 = r__1 * r__1; + safmin = slamch_("S"); + safmax = 1.f / safmin; + ssfmax = sqrt(safmax) / 3.f; + ssfmin = sqrt(safmin) / eps2; + +/* + Compute the eigenvalues and eigenvectors of the tridiagonal + matrix. +*/ + + if (icompz == 2) { + claset_("Full", n, n, &c_b56, &c_b57, &z__[z_offset], ldz); + } + + nmaxit = *n * 30; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + nm1 = *n - 1; + +L10: + if (l1 > *n) { + goto L160; + } + if (l1 > 1) { + e[l1 - 1] = 0.f; + } + if (l1 <= nm1) { + i__1 = nm1; + for (m = l1; m <= i__1; ++m) { + tst = (r__1 = e[m], dabs(r__1)); + if (tst == 0.f) { + goto L30; + } + if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m + + 1], dabs(r__2))) * eps) { + e[m] = 0.f; + goto L30; + } +/* L20: */ + } + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = slanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm == 0.f) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + +/* Choose between QL and QR iteration */ + + if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) { + lend = lsv; + l = lendsv; + } + + if (lend > l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L40: + if (l != lend) { + lendm1 = lend - 1; + i__1 = lendm1; + for (m = l; m <= i__1; ++m) { +/* Computing 2nd power */ + r__2 = (r__1 = e[m], dabs(r__1)); + tst = r__2 * r__2; + if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m + + 1], dabs(r__2)) + safmin) { + goto L60; + } +/* L50: */ + } + } + + m = lend; + +L60: + if (m < lend) { + e[m] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L80; + } + +/* + If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l + 1) { + if (icompz > 0) { + slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); + work[l] = c__; + work[*n - 1 + l] = s; + clasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & + z__[l * z_dim1 + 1], ldz); + } else { + slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); + } + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.f; + l += 2; + if (l <= lend) { + goto L40; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l + 1] - p) / (e[l] * 2.f); + r__ = slapy2_(&g, &c_b1034); + g = d__[m] - p + e[l] / (g + r_sign(&r__, &g)); + + s = 1.f; + c__ = 1.f; + p = 0.f; + +/* Inner loop */ + + mm1 = m - 1; + i__1 = l; + for (i__ = mm1; i__ >= i__1; --i__) { + f = s * e[i__]; + b = c__ * e[i__]; + slartg_(&g, &f, &c__, &s, &r__); + if (i__ != m - 1) { + e[i__ + 1] = r__; + } + g = d__[i__ + 1] - p; + r__ = (d__[i__] - g) * s + c__ * 2.f * b; + p = s * r__; + d__[i__ + 1] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = -s; + } + +/* L70: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = m - l + 1; + clasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[l] = g; + goto L40; + +/* Eigenvalue found. */ + +L80: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L40; + } + goto L140; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L90: + if (l != lend) { + lendp1 = lend + 1; + i__1 = lendp1; + for (m = l; m >= i__1; --m) { +/* Computing 2nd power */ + r__2 = (r__1 = e[m - 1], dabs(r__1)); + tst = r__2 * r__2; + if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m + - 1], dabs(r__2)) + safmin) { + goto L110; + } +/* L100: */ + } + } + + m = lend; + +L110: + if (m > lend) { + e[m - 1] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L130; + } + +/* + If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l - 1) { + if (icompz > 0) { + slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) + ; + work[m] = c__; + work[*n - 1 + m] = s; + clasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & + z__[(l - 1) * z_dim1 + 1], ldz); + } else { + slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); + } + d__[l - 1] = rt1; + d__[l] = rt2; + e[l - 1] = 0.f; + l += -2; + if (l >= lend) { + goto L90; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l - 1] - p) / (e[l - 1] * 2.f); + r__ = slapy2_(&g, &c_b1034); + g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g)); + + s = 1.f; + c__ = 1.f; + p = 0.f; + +/* Inner loop */ + + lm1 = l - 1; + i__1 = lm1; + for (i__ = m; i__ <= i__1; ++i__) { + f = s * e[i__]; + b = c__ * e[i__]; + slartg_(&g, &f, &c__, &s, &r__); + if (i__ != m) { + e[i__ - 1] = r__; + } + g = d__[i__] - p; + r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b; + p = s * r__; + d__[i__] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = s; + } + +/* L120: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = l - m + 1; + clasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[lm1] = g; + goto L90; + +/* Eigenvalue found. */ + +L130: + d__[l] = p; + + --l; + if (l >= lend) { + goto L90; + } + goto L140; + + } + +/* Undo scaling if necessary */ + +L140: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } else if (iscale == 2) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot == nmaxit) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.f) { + ++(*info); + } +/* L150: */ + } + return 0; + } + goto L10; + +/* Order eigenvalues and eigenvectors. */ + +L160: + if (icompz == 0) { + +/* Use Quick Sort */ + + slasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L170: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L180: */ + } + } + return 0; + +/* End of CSTEQR */ + +} /* csteqr_ */ + +/* Subroutine */ int ctrevc_(char *side, char *howmny, logical *select, + integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, + complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, + real *rwork, integer *info) +{ + /* System generated locals */ + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3, i__4, i__5; + real r__1, r__2, r__3; + complex q__1, q__2; + + /* Local variables */ + static integer i__, j, k, ii, ki, is; + static real ulp; + static logical allv; + static real unfl, ovfl, smin; + static logical over; + static real scale; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + static real remax; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *); + static logical leftv, bothv, somev; + extern /* Subroutine */ int slabad_(real *, real *); + extern integer icamax_(integer *, complex *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *), clatrs_(char *, char *, + char *, char *, integer *, complex *, integer *, complex *, real * + , real *, integer *); + extern doublereal scasum_(integer *, complex *, integer *); + static logical rightv; + static real smlnum; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CTREVC computes some or all of the right and/or left eigenvectors of + a complex upper triangular matrix T. + Matrices of this type are produced by the Schur factorization of + a complex general matrix: A = Q*T*Q**H, as computed by CHSEQR. + + The right eigenvector x and the left eigenvector y of T corresponding + to an eigenvalue w are defined by: + + T*x = w*x, (y**H)*T = w*(y**H) + + where y**H denotes the conjugate transpose of the vector y. + The eigenvalues are not input to this routine, but are read directly + from the diagonal of T. + + This routine returns the matrices X and/or Y of right and left + eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + input matrix. If Q is the unitary factor that reduces a matrix A to + Schur form T, then Q*X and Q*Y are the matrices of right and left + eigenvectors of A. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'R': compute right eigenvectors only; + = 'L': compute left eigenvectors only; + = 'B': compute both right and left eigenvectors. + + HOWMNY (input) CHARACTER*1 + = 'A': compute all right and/or left eigenvectors; + = 'B': compute all right and/or left eigenvectors, + backtransformed using the matrices supplied in + VR and/or VL; + = 'S': compute selected right and/or left eigenvectors, + as indicated by the logical array SELECT. + + SELECT (input) LOGICAL array, dimension (N) + If HOWMNY = 'S', SELECT specifies the eigenvectors to be + computed. + The eigenvector corresponding to the j-th eigenvalue is + computed if SELECT(j) = .TRUE.. + Not referenced if HOWMNY = 'A' or 'B'. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input/output) COMPLEX array, dimension (LDT,N) + The upper triangular matrix T. T is modified, but restored + on exit. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + VL (input/output) COMPLEX array, dimension (LDVL,MM) + On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must + contain an N-by-N matrix Q (usually the unitary matrix Q of + Schur vectors returned by CHSEQR). + On exit, if SIDE = 'L' or 'B', VL contains: + if HOWMNY = 'A', the matrix Y of left eigenvectors of T; + if HOWMNY = 'B', the matrix Q*Y; + if HOWMNY = 'S', the left eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VL, in the same order as their + eigenvalues. + Not referenced if SIDE = 'R'. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1, and if + SIDE = 'L' or 'B', LDVL >= N. + + VR (input/output) COMPLEX array, dimension (LDVR,MM) + On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must + contain an N-by-N matrix Q (usually the unitary matrix Q of + Schur vectors returned by CHSEQR). + On exit, if SIDE = 'R' or 'B', VR contains: + if HOWMNY = 'A', the matrix X of right eigenvectors of T; + if HOWMNY = 'B', the matrix Q*X; + if HOWMNY = 'S', the right eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VR, in the same order as their + eigenvalues. + Not referenced if SIDE = 'L'. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1, and if + SIDE = 'R' or 'B'; LDVR >= N. + + MM (input) INTEGER + The number of columns in the arrays VL and/or VR. MM >= M. + + M (output) INTEGER + The number of columns in the arrays VL and/or VR actually + used to store the eigenvectors. If HOWMNY = 'A' or 'B', M + is set to N. Each selected eigenvector occupies one + column. + + WORK (workspace) COMPLEX array, dimension (2*N) + + RWORK (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The algorithm used in this program is basically backward (forward) + substitution, with scaling to make the the code robust against + possible overflow. + + Each eigenvector is normalized so that the element of largest + magnitude has magnitude 1; here the magnitude of a complex number + (x,y) is taken to be |x| + |y|. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + bothv = lsame_(side, "B"); + rightv = lsame_(side, "R") || bothv; + leftv = lsame_(side, "L") || bothv; + + allv = lsame_(howmny, "A"); + over = lsame_(howmny, "B"); + somev = lsame_(howmny, "S"); + +/* + Set M to the number of columns required to store the selected + eigenvectors. +*/ + + if (somev) { + *m = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (select[j]) { + ++(*m); + } +/* L10: */ + } + } else { + *m = *n; + } + + *info = 0; + if (! rightv && ! leftv) { + *info = -1; + } else if (! allv && ! over && ! somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1,*n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else if (*mm < *m) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CTREVC", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* Set the constants to control overflow. */ + + unfl = slamch_("Safe minimum"); + ovfl = 1.f / unfl; + slabad_(&unfl, &ovfl); + ulp = slamch_("Precision"); + smlnum = unfl * (*n / ulp); + +/* Store the diagonal elements of T in working array WORK. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + *n; + i__3 = i__ + i__ * t_dim1; + work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i; +/* L20: */ + } + +/* + Compute 1-norm of each column of strictly upper triangular + part of T to control overflow in triangular solver. +*/ + + rwork[1] = 0.f; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + rwork[j] = scasum_(&i__2, &t[j * t_dim1 + 1], &c__1); +/* L30: */ + } + + if (rightv) { + +/* Compute right eigenvectors. */ + + is = *m; + for (ki = *n; ki >= 1; --ki) { + + if (somev) { + if (! select[ki]) { + goto L80; + } + } +/* Computing MAX */ + i__1 = ki + ki * t_dim1; + r__3 = ulp * ((r__1 = t[i__1].r, dabs(r__1)) + (r__2 = r_imag(&t[ + ki + ki * t_dim1]), dabs(r__2))); + smin = dmax(r__3,smlnum); + + work[1].r = 1.f, work[1].i = 0.f; + +/* Form right-hand side. */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k; + i__3 = k + ki * t_dim1; + q__1.r = -t[i__3].r, q__1.i = -t[i__3].i; + work[i__2].r = q__1.r, work[i__2].i = q__1.i; +/* L40: */ + } + +/* + Solve the triangular system: + (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. +*/ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * t_dim1; + i__3 = k + k * t_dim1; + i__4 = ki + ki * t_dim1; + q__1.r = t[i__3].r - t[i__4].r, q__1.i = t[i__3].i - t[i__4] + .i; + t[i__2].r = q__1.r, t[i__2].i = q__1.i; + i__2 = k + k * t_dim1; + if ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k * + t_dim1]), dabs(r__2)) < smin) { + i__3 = k + k * t_dim1; + t[i__3].r = smin, t[i__3].i = 0.f; + } +/* L50: */ + } + + if (ki > 1) { + i__1 = ki - 1; + clatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[ + t_offset], ldt, &work[1], &scale, &rwork[1], info); + i__1 = ki; + work[i__1].r = scale, work[i__1].i = 0.f; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + ccopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1); + + ii = icamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + i__1 = ii + is * vr_dim1; + remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 = + r_imag(&vr[ii + is * vr_dim1]), dabs(r__2))); + csscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + i__2 = k + is * vr_dim1; + vr[i__2].r = 0.f, vr[i__2].i = 0.f; +/* L60: */ + } + } else { + if (ki > 1) { + i__1 = ki - 1; + q__1.r = scale, q__1.i = 0.f; + cgemv_("N", n, &i__1, &c_b57, &vr[vr_offset], ldvr, &work[ + 1], &c__1, &q__1, &vr[ki * vr_dim1 + 1], &c__1); + } + + ii = icamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + i__1 = ii + ki * vr_dim1; + remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 = + r_imag(&vr[ii + ki * vr_dim1]), dabs(r__2))); + csscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + +/* Set back the original diagonal elements of T. */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * t_dim1; + i__3 = k + *n; + t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i; +/* L70: */ + } + + --is; +L80: + ; + } + } + + if (leftv) { + +/* Compute left eigenvectors. */ + + is = 1; + i__1 = *n; + for (ki = 1; ki <= i__1; ++ki) { + + if (somev) { + if (! select[ki]) { + goto L130; + } + } +/* Computing MAX */ + i__2 = ki + ki * t_dim1; + r__3 = ulp * ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[ + ki + ki * t_dim1]), dabs(r__2))); + smin = dmax(r__3,smlnum); + + i__2 = *n; + work[i__2].r = 1.f, work[i__2].i = 0.f; + +/* Form right-hand side. */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k; + r_cnjg(&q__2, &t[ki + k * t_dim1]); + q__1.r = -q__2.r, q__1.i = -q__2.i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; +/* L90: */ + } + +/* + Solve the triangular system: + (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. +*/ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k + k * t_dim1; + i__4 = k + k * t_dim1; + i__5 = ki + ki * t_dim1; + q__1.r = t[i__4].r - t[i__5].r, q__1.i = t[i__4].i - t[i__5] + .i; + t[i__3].r = q__1.r, t[i__3].i = q__1.i; + i__3 = k + k * t_dim1; + if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k * + t_dim1]), dabs(r__2)) < smin) { + i__4 = k + k * t_dim1; + t[i__4].r = smin, t[i__4].i = 0.f; + } +/* L100: */ + } + + if (ki < *n) { + i__2 = *n - ki; + clatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", & + i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki + + 1], &scale, &rwork[1], info); + i__2 = ki; + work[i__2].r = scale, work[i__2].i = 0.f; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + ccopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1) + ; + + i__2 = *n - ki + 1; + ii = icamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1; + i__2 = ii + is * vl_dim1; + remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 = + r_imag(&vl[ii + is * vl_dim1]), dabs(r__2))); + i__2 = *n - ki + 1; + csscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + is * vl_dim1; + vl[i__3].r = 0.f, vl[i__3].i = 0.f; +/* L110: */ + } + } else { + if (ki < *n) { + i__2 = *n - ki; + q__1.r = scale, q__1.i = 0.f; + cgemv_("N", n, &i__2, &c_b57, &vl[(ki + 1) * vl_dim1 + 1], + ldvl, &work[ki + 1], &c__1, &q__1, &vl[ki * + vl_dim1 + 1], &c__1); + } + + ii = icamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + i__2 = ii + ki * vl_dim1; + remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 = + r_imag(&vl[ii + ki * vl_dim1]), dabs(r__2))); + csscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + } + +/* Set back the original diagonal elements of T. */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k + k * t_dim1; + i__4 = k + *n; + t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i; +/* L120: */ + } + + ++is; +L130: + ; + } + } + + return 0; + +/* End of CTREVC */ + +} /* ctrevc_ */ + +/* Subroutine */ int ctrexc_(char *compq, integer *n, complex *t, integer * + ldt, complex *q, integer *ldq, integer *ifst, integer *ilst, integer * + info) +{ + /* System generated locals */ + integer q_dim1, q_offset, t_dim1, t_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + static integer k, m1, m2, m3; + static real cs; + static complex t11, t22, sn, temp; + extern /* Subroutine */ int crot_(integer *, complex *, integer *, + complex *, integer *, real *, complex *); + extern logical lsame_(char *, char *); + static logical wantq; + extern /* Subroutine */ int clartg_(complex *, complex *, real *, complex + *, complex *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CTREXC reorders the Schur factorization of a complex matrix + A = Q*T*Q**H, so that the diagonal element of T with row index IFST + is moved to row ILST. + + The Schur form T is reordered by a unitary similarity transformation + Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by + postmultplying it with Z. + + Arguments + ========= + + COMPQ (input) CHARACTER*1 + = 'V': update the matrix Q of Schur vectors; + = 'N': do not update Q. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input/output) COMPLEX array, dimension (LDT,N) + On entry, the upper triangular matrix T. + On exit, the reordered upper triangular matrix. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + Q (input/output) COMPLEX array, dimension (LDQ,N) + On entry, if COMPQ = 'V', the matrix Q of Schur vectors. + On exit, if COMPQ = 'V', Q has been postmultiplied by the + unitary transformation matrix Z which reorders T. + If COMPQ = 'N', Q is not referenced. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + IFST (input) INTEGER + ILST (input) INTEGER + Specify the reordering of the diagonal elements of T: + The element with row index IFST is moved to row ILST by a + sequence of transpositions between adjacent elements. + 1 <= IFST <= N; 1 <= ILST <= N. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Decode and test the input parameters. +*/ + + /* Parameter adjustments */ + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + + /* Function Body */ + *info = 0; + wantq = lsame_(compq, "V"); + if (! lsame_(compq, "N") && ! wantq) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldt < max(1,*n)) { + *info = -4; + } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) { + *info = -6; + } else if (*ifst < 1 || *ifst > *n) { + *info = -7; + } else if (*ilst < 1 || *ilst > *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CTREXC", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 1 || *ifst == *ilst) { + return 0; + } + + if (*ifst < *ilst) { + +/* Move the IFST-th diagonal element forward down the diagonal. */ + + m1 = 0; + m2 = -1; + m3 = 1; + } else { + +/* Move the IFST-th diagonal element backward up the diagonal. */ + + m1 = -1; + m2 = 0; + m3 = -1; + } + + i__1 = *ilst + m2; + i__2 = m3; + for (k = *ifst + m1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { + +/* Interchange the k-th and (k+1)-th diagonal elements. */ + + i__3 = k + k * t_dim1; + t11.r = t[i__3].r, t11.i = t[i__3].i; + i__3 = k + 1 + (k + 1) * t_dim1; + t22.r = t[i__3].r, t22.i = t[i__3].i; + +/* Determine the transformation to perform the interchange. */ + + q__1.r = t22.r - t11.r, q__1.i = t22.i - t11.i; + clartg_(&t[k + (k + 1) * t_dim1], &q__1, &cs, &sn, &temp); + +/* Apply transformation to the matrix T. */ + + if (k + 2 <= *n) { + i__3 = *n - k - 1; + crot_(&i__3, &t[k + (k + 2) * t_dim1], ldt, &t[k + 1 + (k + 2) * + t_dim1], ldt, &cs, &sn); + } + i__3 = k - 1; + r_cnjg(&q__1, &sn); + crot_(&i__3, &t[k * t_dim1 + 1], &c__1, &t[(k + 1) * t_dim1 + 1], & + c__1, &cs, &q__1); + + i__3 = k + k * t_dim1; + t[i__3].r = t22.r, t[i__3].i = t22.i; + i__3 = k + 1 + (k + 1) * t_dim1; + t[i__3].r = t11.r, t[i__3].i = t11.i; + + if (wantq) { + +/* Accumulate transformation in the matrix Q. */ + + r_cnjg(&q__1, &sn); + crot_(n, &q[k * q_dim1 + 1], &c__1, &q[(k + 1) * q_dim1 + 1], & + c__1, &cs, &q__1); + } + +/* L10: */ + } + + return 0; + +/* End of CTREXC */ + +} /* ctrexc_ */ + +/* Subroutine */ int ctrti2_(char *uplo, char *diag, integer *n, complex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + complex q__1; + + /* Local variables */ + static integer j; + static complex ajj; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *); + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *, + complex *, integer *, complex *, integer *), xerbla_(char *, integer *); + static logical nounit; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CTRTI2 computes the inverse of a complex upper or lower triangular + matrix. + + This is the Level 2 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading n by n upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CTRTI2", &i__1); + return 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + i__2 = j + j * a_dim1; + c_div(&q__1, &c_b57, &a[j + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = j + j * a_dim1; + q__1.r = -a[i__2].r, q__1.i = -a[i__2].i; + ajj.r = q__1.r, ajj.i = q__1.i; + } else { + q__1.r = -1.f, q__1.i = -0.f; + ajj.r = q__1.r, ajj.i = q__1.i; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + ctrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & + a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + cscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Compute inverse of lower triangular matrix. */ + + for (j = *n; j >= 1; --j) { + if (nounit) { + i__1 = j + j * a_dim1; + c_div(&q__1, &c_b57, &a[j + j * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + i__1 = j + j * a_dim1; + q__1.r = -a[i__1].r, q__1.i = -a[i__1].i; + ajj.r = q__1.r, ajj.i = q__1.i; + } else { + q__1.r = -1.f, q__1.i = -0.f; + ajj.r = q__1.r, ajj.i = q__1.i; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + ctrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + + 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); + i__1 = *n - j; + cscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + + return 0; + +/* End of CTRTI2 */ + +} /* ctrti2_ */ + +/* Subroutine */ int ctrtri_(char *uplo, char *diag, integer *n, complex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5; + complex q__1; + char ch__1[2]; + + /* Local variables */ + static integer j, jb, nb, nn; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), ctrsm_(char *, char *, + char *, char *, integer *, integer *, complex *, complex *, + integer *, complex *, integer *); + static logical upper; + extern /* Subroutine */ int ctrti2_(char *, char *, integer *, complex *, + integer *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical nounit; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CTRTRI computes the inverse of a complex upper or lower triangular + matrix A. + + This is the Level 3 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': A is upper triangular; + = 'L': A is lower triangular. + + DIAG (input) CHARACTER*1 + = 'N': A is non-unit triangular; + = 'U': A is unit triangular. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, A(i,i) is exactly zero. The triangular + matrix is singular and its inverse can not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CTRTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (a[i__2].r == 0.f && a[i__2].i == 0.f) { + return 0; + } +/* L10: */ + } + *info = 0; + } + +/* + Determine the block size for this environment. + + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = uplo; + i__3[1] = 1, a__1[1] = diag; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "CTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + ctrti2_(uplo, diag, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute inverse of upper triangular matrix */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4,i__5); + +/* Compute rows 1:j-1 of current block column */ + + i__4 = j - 1; + ctrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & + c_b57, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); + i__4 = j - 1; + q__1.r = -1.f, q__1.i = -0.f; + ctrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & + q__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], + lda); + +/* Compute inverse of current diagonal block */ + + ctrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L20: */ + } + } else { + +/* Compute inverse of lower triangular matrix */ + + nn = (*n - 1) / nb * nb + 1; + i__2 = -nb; + for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) { +/* Computing MIN */ + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1,i__4); + if (j + jb <= *n) { + +/* Compute rows j+jb:n of current block column */ + + i__1 = *n - j - jb + 1; + ctrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, + &c_b57, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + + jb + j * a_dim1], lda); + i__1 = *n - j - jb + 1; + q__1.r = -1.f, q__1.i = -0.f; + ctrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, + &q__1, &a[j + j * a_dim1], lda, &a[j + jb + j * + a_dim1], lda); + } + +/* Compute inverse of current diagonal block */ + + ctrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L30: */ + } + } + } + + return 0; + +/* End of CTRTRI */ + +} /* ctrtri_ */ + +/* Subroutine */ int cung2r_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), clarf_(char *, integer *, integer *, complex *, + integer *, complex *, complex *, integer *, complex *), + xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNG2R generates an m by n complex matrix Q with orthonormal columns, + which is defined as the first n columns of a product of k elementary + reflectors of order m + + Q = H(1) H(2) . . . H(k) + + as returned by CGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by CGEQRF in the first k columns of its array + argument A. + On exit, the m by n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQRF. + + WORK (workspace) COMPLEX array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNG2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns k+1:n to columns of the unit matrix */ + + i__1 = *n; + for (j = *k + 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + i__3 = l + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L10: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; +/* L20: */ + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the left */ + + if (i__ < *n) { + i__1 = i__ + i__ * a_dim1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + i__1 = *m - i__ + 1; + i__2 = *n - i__; + clarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + } + if (i__ < *m) { + i__1 = *m - i__; + i__2 = i__; + q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i; + cscal_(&i__1, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + } + i__1 = i__ + i__ * a_dim1; + i__2 = i__; + q__1.r = 1.f - tau[i__2].r, q__1.i = 0.f - tau[i__2].i; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + +/* Set A(1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + i__2 = l + i__ * a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of CUNG2R */ + +} /* cung2r_ */ + +/* Subroutine */ int cungbr_(char *vect, integer *m, integer *n, integer *k, + complex *a, integer *lda, complex *tau, complex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, mn; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical wantq; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cunglq_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *), + cungqr_(integer *, integer *, integer *, complex *, integer *, + complex *, complex *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNGBR generates one of the complex unitary matrices Q or P**H + determined by CGEBRD when reducing a complex matrix A to bidiagonal + form: A = Q * B * P**H. Q and P**H are defined as products of + elementary reflectors H(i) or G(i) respectively. + + If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + is of order M: + if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n + columns of Q, where m >= n >= k; + if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an + M-by-M matrix. + + If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H + is of order N: + if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m + rows of P**H, where n >= m >= k; + if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as + an N-by-N matrix. + + Arguments + ========= + + VECT (input) CHARACTER*1 + Specifies whether the matrix Q or the matrix P**H is + required, as defined in the transformation applied by CGEBRD: + = 'Q': generate Q; + = 'P': generate P**H. + + M (input) INTEGER + The number of rows of the matrix Q or P**H to be returned. + M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q or P**H to be returned. + N >= 0. + If VECT = 'Q', M >= N >= min(M,K); + if VECT = 'P', N >= M >= min(N,K). + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original M-by-K + matrix reduced by CGEBRD. + If VECT = 'P', the number of rows in the original K-by-N + matrix reduced by CGEBRD. + K >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by CGEBRD. + On exit, the M-by-N matrix Q or P**H. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= M. + + TAU (input) COMPLEX array, dimension + (min(M,K)) if VECT = 'Q' + (min(N,K)) if VECT = 'P' + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i), which determines Q or P**H, as + returned by CGEBRD in its array argument TAUQ or TAUP. + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,min(M,N)). + For optimum performance LWORK >= min(M,N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(vect, "Q"); + mn = min(*m,*n); + lquery = *lwork == -1; + if (! wantq && ! lsame_(vect, "P")) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( + *m > *n || *m < min(*n,*k))) { + *info = -3; + } else if (*k < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*lwork < max(1,mn) && ! lquery) { + *info = -9; + } + + if (*info == 0) { + if (wantq) { + nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } + lwkopt = max(1,mn) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNGBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + if (wantq) { + +/* + Form Q, determined by a call to CGEBRD to reduce an m-by-k + matrix +*/ + + if (*m >= *k) { + +/* If m >= k, assume m >= n >= k */ + + cungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If m < k, assume m = n + + Shift the vectors which define the elementary reflectors one + column to the right, and set the first row and column of Q + to those of the unit matrix +*/ + + for (j = *m; j >= 2; --j) { + i__1 = j * a_dim1 + 1; + a[i__1].r = 0.f, a[i__1].i = 0.f; + i__1 = *m; + for (i__ = j + 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__ + (j - 1) * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; +/* L10: */ + } +/* L20: */ + } + i__1 = a_dim1 + 1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + i__1 = *m; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__ + a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L30: */ + } + if (*m > 1) { + +/* Form Q(2:m,2:m) */ + + i__1 = *m - 1; + i__2 = *m - 1; + i__3 = *m - 1; + cungqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } else { + +/* + Form P', determined by a call to CGEBRD to reduce a k-by-n + matrix +*/ + + if (*k < *n) { + +/* If k < n, assume k <= m <= n */ + + cunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If k >= n, assume m = n + + Shift the vectors which define the elementary reflectors one + row downward, and set the first row and column of P' to + those of the unit matrix +*/ + + i__1 = a_dim1 + 1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__ + a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L40: */ + } + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + for (i__ = j - 1; i__ >= 2; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__ - 1 + j * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; +/* L50: */ + } + i__2 = j * a_dim1 + 1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L60: */ + } + if (*n > 1) { + +/* Form P'(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + cunglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNGBR */ + +} /* cungbr_ */ + +/* Subroutine */ int cunghr_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *lwork, integer + *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, nb, nh, iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNGHR generates a complex unitary matrix Q which is defined as the + product of IHI-ILO elementary reflectors of order N, as returned by + CGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + N (input) INTEGER + The order of the matrix Q. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of CGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by CGEHRD. + On exit, the N-by-N unitary matrix Q. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (input) COMPLEX array, dimension (N-1) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEHRD. + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= IHI-ILO. + For optimum performance LWORK >= (IHI-ILO)*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,nh) && ! lquery) { + *info = -8; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "CUNGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = max(1,nh) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNGHR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + +/* + Shift the vectors which define the elementary reflectors one + column to the right, and set the first ilo and the last n-ihi + rows and columns to those of the unit matrix +*/ + + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L10: */ + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + (j - 1) * a_dim1; + a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i; +/* L20: */ + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L30: */ + } +/* L40: */ + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L50: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; +/* L60: */ + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L70: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; +/* L80: */ + } + + if (nh > 0) { + +/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ + + cungqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* + ilo], &work[1], lwork, &iinfo); + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNGHR */ + +} /* cunghr_ */ + +/* Subroutine */ int cungl2_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1, q__2; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), clarf_(char *, integer *, integer *, complex *, + integer *, complex *, complex *, integer *, complex *), + clacgv_(integer *, complex *, integer *), xerbla_(char *, integer + *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, + which is defined as the first m rows of a product of k elementary + reflectors of order n + + Q = H(k)' . . . H(2)' H(1)' + + as returned by CGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by CGELQF in the first k rows of its array argument A. + On exit, the m by n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGELQF. + + WORK (workspace) COMPLEX array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNGL2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows k+1:m to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = *k + 1; l <= i__2; ++l) { + i__3 = l + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L10: */ + } + if (j > *k && j <= *m) { + i__2 = j + j * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + } +/* L20: */ + } + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i)' to A(i:m,i:n) from the right */ + + if (i__ < *n) { + i__1 = *n - i__; + clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); + if (i__ < *m) { + i__1 = i__ + i__ * a_dim1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + i__1 = *m - i__; + i__2 = *n - i__ + 1; + r_cnjg(&q__1, &tau[i__]); + clarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & + q__1, &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__1 = *n - i__; + i__2 = i__; + q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i; + cscal_(&i__1, &q__1, &a[i__ + (i__ + 1) * a_dim1], lda); + i__1 = *n - i__; + clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); + } + i__1 = i__ + i__ * a_dim1; + r_cnjg(&q__2, &tau[i__]); + q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + +/* Set A(i,1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + i__2 = i__ + l * a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of CUNGL2 */ + +} /* cungl2_ */ + +/* Subroutine */ int cunglq_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int cungl2_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *), clarfb_( + char *, char *, char *, char *, integer *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *, + complex *, integer *), clarft_( + char *, char *, integer *, integer *, complex *, integer *, + complex *, complex *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNGLQ generates an M-by-N complex matrix Q with orthonormal rows, + which is defined as the first M rows of a product of K elementary + reflectors of order N + + Q = H(k)' . . . H(2)' H(1)' + + as returned by CGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by CGELQF in the first k rows of its array argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGELQF. + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit; + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*m) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNGLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGLQ", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGLQ", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk rows are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(kk+1:m,1:kk) to zero. */ + + i__1 = kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = kk + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *m) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + cungl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *n - i__ + 1; + clarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i+ib:m,i:n) from the right */ + + i__2 = *m - i__ - ib + 1; + i__3 = *n - i__ + 1; + clarfb_("Right", "Conjugate transpose", "Forward", "Rowwise", + &i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ + ib + 1], &ldwork); + } + +/* Apply H' to columns i:n of current block */ + + i__2 = *n - i__ + 1; + cungl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set columns 1:i-1 of current block to zero */ + + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + ib - 1; + for (l = i__; l <= i__3; ++l) { + i__4 = l + j * a_dim1; + a[i__4].r = 0.f, a[i__4].i = 0.f; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1].r = (real) iws, work[1].i = 0.f; + return 0; + +/* End of CUNGLQ */ + +} /* cunglq_ */ + +/* Subroutine */ int cungqr_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int cung2r_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *), clarfb_( + char *, char *, char *, char *, integer *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *, + complex *, integer *), clarft_( + char *, char *, integer *, integer *, complex *, integer *, + complex *, complex *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNGQR generates an M-by-N complex matrix Q with orthonormal columns, + which is defined as the first N columns of a product of K elementary + reflectors of order M + + Q = H(1) H(2) . . . H(k) + + as returned by CGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by CGEQRF in the first k columns of its array + argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQRF. + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*n) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNGQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGQR", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGQR", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk columns are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(1:kk,kk+1:n) to zero. */ + + i__1 = *n; + for (j = kk + 1; j <= i__1; ++j) { + i__2 = kk; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *n) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + cung2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *m - i__ + 1; + clarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i:m,i+ib:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__ - ib + 1; + clarfb_("Left", "No transpose", "Forward", "Columnwise", & + i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & + work[ib + 1], &ldwork); + } + +/* Apply H to rows i:m of current block */ + + i__2 = *m - i__ + 1; + cung2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set rows 1:i-1 of current block to zero */ + + i__2 = i__ + ib - 1; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ - 1; + for (l = 1; l <= i__3; ++l) { + i__4 = l + j * a_dim1; + a[i__4].r = 0.f, a[i__4].i = 0.f; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1].r = (real) iws, work[1].i = 0.f; + return 0; + +/* End of CUNGQR */ + +} /* cungqr_ */ + +/* Subroutine */ int cunm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + static integer i__, i1, i2, i3, mi, ni, nq; + static complex aii; + static logical left; + static complex taui; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNM2L overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by CGEQLF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQLF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNM2L", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } else { + r_cnjg(&q__1, &tau[i__]); + taui.r = q__1.r, taui.i = q__1.i; + } + i__3 = nq - *k + i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = nq - *k + i__ + i__ * a_dim1; + a[i__3].r = 1.f, a[i__3].i = 0.f; + clarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &taui, &c__[ + c_offset], ldc, &work[1]); + i__3 = nq - *k + i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; +/* L10: */ + } + return 0; + +/* End of CUNM2L */ + +} /* cunm2l_ */ + +/* Subroutine */ int cunm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static complex aii; + static logical left; + static complex taui; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNM2R overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQRF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNM2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } else { + r_cnjg(&q__1, &tau[i__]); + taui.r = q__1.r, taui.i = q__1.i; + } + i__3 = i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = i__ + i__ * a_dim1; + a[i__3].r = 1.f, a[i__3].i = 0.f; + clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &taui, &c__[ic + + jc * c_dim1], ldc, &work[1]); + i__3 = i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; +/* L10: */ + } + return 0; + +/* End of CUNM2R */ + +} /* cunm2r_ */ + +/* Subroutine */ int cunmbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, complex *a, integer *lda, complex *tau, + complex *c__, integer *ldc, complex *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cunmlq_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + static logical notran; + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + static logical applyq; + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + If VECT = 'Q', CUNMBR overwrites the general complex M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': P * C C * P + TRANS = 'C': P**H * C C * P**H + + Here Q and P**H are the unitary matrices determined by CGEBRD when + reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q + and P**H are defined as products of elementary reflectors H(i) and + G(i) respectively. + + Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + order of the unitary matrix Q or P**H that is applied. + + If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + if nq >= k, Q = H(1) H(2) . . . H(k); + if nq < k, Q = H(1) H(2) . . . H(nq-1). + + If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + if k < nq, P = G(1) G(2) . . . G(k); + if k >= nq, P = G(1) G(2) . . . G(nq-1). + + Arguments + ========= + + VECT (input) CHARACTER*1 + = 'Q': apply Q or Q**H; + = 'P': apply P or P**H. + + SIDE (input) CHARACTER*1 + = 'L': apply Q, Q**H, P or P**H from the Left; + = 'R': apply Q, Q**H, P or P**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q or P; + = 'C': Conjugate transpose, apply Q**H or P**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original + matrix reduced by CGEBRD. + If VECT = 'P', the number of rows in the original + matrix reduced by CGEBRD. + K >= 0. + + A (input) COMPLEX array, dimension + (LDA,min(nq,K)) if VECT = 'Q' + (LDA,nq) if VECT = 'P' + The vectors which define the elementary reflectors H(i) and + G(i), whose products determine the matrices Q and P, as + returned by CGEBRD. + + LDA (input) INTEGER + The leading dimension of the array A. + If VECT = 'Q', LDA >= max(1,nq); + if VECT = 'P', LDA >= max(1,min(nq,K)). + + TAU (input) COMPLEX array, dimension (min(nq,K)) + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i) which determines Q or P, as returned + by CGEBRD in the array argument TAUQ or TAUP. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q + or P*C or P**H*C or C*P or C*P**H. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M); + if N = 0 or M = 0, LWORK >= 1. + For optimum performance LWORK >= max(1,N*NB) if SIDE = 'L', + and LWORK >= max(1,M*NB) if SIDE = 'R', where NB is the + optimal blocksize. (NB = 0 if M = 0 or N = 0.) + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + applyq = lsame_(vect, "Q"); + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (*m == 0 || *n == 0) { + nw = 0; + } + if (! applyq && ! lsame_(vect, "P")) { + *info = -1; + } else if (! left && ! lsame_(side, "R")) { + *info = -2; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*k < 0) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = min(nq,*k); + if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + } + + if (*info == 0) { + if (nw > 0) { + if (applyq) { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__1, n, &i__2, & + c_n1, (ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__1, &i__2, & + c_n1, (ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "CUNMLQ", ch__1, &i__1, n, &i__2, & + c_n1, (ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "CUNMLQ", ch__1, m, &i__1, &i__2, & + c_n1, (ftnlen)6, (ftnlen)2); + } + } +/* Computing MAX */ + i__1 = 1, i__2 = nw * nb; + lwkopt = max(i__1,i__2); + } else { + lwkopt = 1; + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNMBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + if (applyq) { + +/* Apply Q */ + + if (nq >= *k) { + +/* Q was determined by a call to CGEBRD with nq >= k */ + + cunmqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* Q was determined by a call to CGEBRD with nq < k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + cunmqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] + , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + } else { + +/* Apply P */ + + if (notran) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + if (nq > *k) { + +/* P was determined by a call to CGEBRD with nq > k */ + + cunmlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* P was determined by a call to CGEBRD with nq <= k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + cunmlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, + &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & + iinfo); + } + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNMBR */ + +} /* cunmbr_ */ + +/* Subroutine */ int cunmhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, complex *a, integer *lda, complex *tau, + complex *c__, integer *ldc, complex *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, nh, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNMHR overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + IHI-ILO elementary reflectors, as returned by CGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q**H (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of CGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and + ILO = 1 and IHI = 0, if M = 0; + if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and + ILO = 1 and IHI = 0, if N = 0. + + A (input) COMPLEX array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by CGEHRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) COMPLEX array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEHRD. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + left = lsame_(side, "L"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ilo < 1 || *ilo > max(1,nq)) { + *info = -5; + } else if (*ihi < min(*ilo,nq) || *ihi > nq) { + *info = -6; + } else if (*lda < max(1,nq)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + + if (*info == 0) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "CUNMQR", ch__1, &nh, n, &nh, &c_n1, (ftnlen) + 6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &nh, &nh, &c_n1, (ftnlen) + 6, (ftnlen)2); + } + lwkopt = max(1,nw) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("CUNMHR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nh == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + if (left) { + mi = nh; + ni = *n; + i1 = *ilo + 1; + i2 = 1; + } else { + mi = *m; + ni = nh; + i1 = 1; + i2 = *ilo + 1; + } + + cunmqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, & + tau[*ilo], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNMHR */ + +} /* cunmhr_ */ + +/* Subroutine */ int cunml2_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static complex aii; + static logical left; + static complex taui; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), + xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNML2 overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k)' . . . H(2)' H(1)' + + as returned by CGELQF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGELQF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNML2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + r_cnjg(&q__1, &tau[i__]); + taui.r = q__1.r, taui.i = q__1.i; + } else { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } + if (i__ < nq) { + i__3 = nq - i__; + clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); + } + i__3 = i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = i__ + i__ * a_dim1; + a[i__3].r = 1.f, a[i__3].i = 0.f; + clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &taui, &c__[ic + + jc * c_dim1], ldc, &work[1]); + i__3 = i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; + if (i__ < nq) { + i__3 = nq - i__; + clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); + } +/* L10: */ + } + return 0; + +/* End of CUNML2 */ + +} /* cunml2_ */ + +/* Subroutine */ int cunmlq_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static complex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int cunml2_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *), clarfb_(char *, char *, + char *, char *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *), clarft_(char *, char * + , integer *, integer *, complex *, integer *, complex *, complex * + , integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork; + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNMLQ overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k)' . . . H(2)' H(1)' + + as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGELQF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNMLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + cunml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + if (notran) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + clarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], + lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + clarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], + ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNMLQ */ + +} /* cunmlq_ */ + +/* Subroutine */ int cunmql_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static complex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int cunm2l_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *), clarfb_(char *, char *, + char *, char *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *), clarft_(char *, char * + , integer *, integer *, complex *, integer *, complex *, complex * + , integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNMQL overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQLF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + + if (*info == 0) { + if (*m == 0 || *n == 0) { + lwkopt = 1; + } else { + +/* + Determine the block size. NB may be at most NBMAX, where + NBMAX is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQL", ch__1, m, n, k, &c_n1, + (ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = nw * nb; + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + + if (*lwork < nw && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNMQL", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + cunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i+ib-1) . . . H(i+1) H(i) +*/ + + i__4 = nq - *k + i__ + ib - 1; + clarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] + , lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H' */ + + clarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ + i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & + work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNMQL */ + +} /* cunmql_ */ + +/* Subroutine */ int cunmqr_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static complex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int cunm2r_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *), clarfb_(char *, char *, + char *, char *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *), clarft_(char *, char * + , integer *, integer *, complex *, integer *, complex *, complex * + , integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNMQR overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQRF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNMQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + cunm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + clarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], t, &c__65) + ; + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + clarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * + c_dim1], ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNMQR */ + +} /* cunmqr_ */ + +/* Subroutine */ int cunmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cunmql_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *), cunmqr_(char *, + char *, integer *, integer *, integer *, complex *, integer *, + complex *, complex *, integer *, complex *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + CUNMTR overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + nq-1 elementary reflectors, as returned by CHETRD: + + if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + + if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A contains elementary reflectors + from CHETRD; + = 'L': Lower triangle of A contains elementary reflectors + from CHETRD. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + A (input) COMPLEX array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by CHETRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) COMPLEX array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CHETRD. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >=M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + if (upper) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "CUNMQL", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "CUNMQL", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("CUNMTR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nq == 1) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + if (left) { + mi = *m - 1; + ni = *n; + } else { + mi = *m; + ni = *n - 1; + } + + if (upper) { + +/* Q was determined by a call to CHETRD with UPLO = 'U' */ + + i__2 = nq - 1; + cunmql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & + tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); + } else { + +/* Q was determined by a call to CHETRD with UPLO = 'L' */ + + if (left) { + i1 = 2; + i2 = 1; + } else { + i1 = 1; + i2 = 2; + } + i__2 = nq - 1; + cunmqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & + c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNMTR */ + +} /* cunmtr_ */ + diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_c_lapack.f.patch b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_c_lapack.f.patch new file mode 100644 index 0000000000..bcf7507baa --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_c_lapack.f.patch @@ -0,0 +1,32 @@ +@@ -13163,5 +13163,6 @@ + ! Skip any trailing zeros. + DO LASTV = N, I+1, -1 +- IF( V( LASTV, I ).NE.ZERO ) EXIT ++ IF( V( LASTV, I ).NE.ZERO ) GO TO 15 + END DO ++ 15 CONTINUE + J = MIN( LASTV, PREVLASTV ) +@@ -13175,5 +13176,6 @@ + ! Skip any trailing zeros. + DO LASTV = N, I+1, -1 +- IF( V( I, LASTV ).NE.ZERO ) EXIT ++ IF( V( I, LASTV ).NE.ZERO ) GO TO 16 + END DO ++ 16 CONTINUE + J = MIN( LASTV, PREVLASTV ) +@@ -13223,5 +13225,6 @@ + ! Skip any leading zeros. + DO LASTV = 1, I-1 +- IF( V( LASTV, I ).NE.ZERO ) EXIT ++ IF( V( LASTV, I ).NE.ZERO ) GO TO 35 + END DO ++ 35 CONTINUE + J = MAX( LASTV, PREVLASTV ) +@@ -13239,5 +13242,6 @@ + ! Skip any leading zeros. + DO LASTV = N, I+1, -1 +- IF( V( I, LASTV ).NE.ZERO ) EXIT ++ IF( V( I, LASTV ).NE.ZERO ) GO TO 36 + END DO ++ 36 CONTINUE + J = MAX( LASTV, PREVLASTV ) diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_config.c b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_config.c new file mode 100644 index 0000000000..2fe608227f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_config.c @@ -0,0 +1,2068 @@ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif + + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b32 = 0.; +static real c_b66 = 0.f; + +doublereal dlamch_(char *cmach) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + integer i__1; + doublereal ret_val; + + /* Local variables */ + static doublereal t; + static integer it; + static doublereal rnd, eps, base; + static integer beta; + static doublereal emin, prec, emax; + static integer imin, imax; + static logical lrnd; + static doublereal rmin, rmax, rmach; + extern logical lsame_(char *, char *); + static doublereal small, sfmin; + extern /* Subroutine */ int dlamc2_(integer *, integer *, logical *, + doublereal *, integer *, doublereal *, integer *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + DLAMCH determines double precision machine parameters. + + Arguments + ========= + + CMACH (input) CHARACTER*1 + Specifies the value to be returned by DLAMCH: + = 'E' or 'e', DLAMCH := eps + = 'S' or 's , DLAMCH := sfmin + = 'B' or 'b', DLAMCH := base + = 'P' or 'p', DLAMCH := eps*base + = 'N' or 'n', DLAMCH := t + = 'R' or 'r', DLAMCH := rnd + = 'M' or 'm', DLAMCH := emin + = 'U' or 'u', DLAMCH := rmin + = 'L' or 'l', DLAMCH := emax + = 'O' or 'o', DLAMCH := rmax + + where + + eps = relative machine precision + sfmin = safe minimum, such that 1/sfmin does not overflow + base = base of the machine + prec = eps*base + t = number of (base) digits in the mantissa + rnd = 1.0 when rounding occurs in addition, 0.0 otherwise + emin = minimum exponent before (gradual) underflow + rmin = underflow threshold - base**(emin-1) + emax = largest exponent before overflow + rmax = overflow threshold - (base**emax)*(1-eps) + + ===================================================================== +*/ + + + if (first) { + dlamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); + base = (doublereal) beta; + t = (doublereal) it; + if (lrnd) { + rnd = 1.; + i__1 = 1 - it; + eps = pow_di(&base, &i__1) / 2; + } else { + rnd = 0.; + i__1 = 1 - it; + eps = pow_di(&base, &i__1); + } + prec = eps * base; + emin = (doublereal) imin; + emax = (doublereal) imax; + sfmin = rmin; + small = 1. / rmax; + if (small >= sfmin) { + +/* + Use SMALL plus a bit, to avoid the possibility of rounding + causing overflow when computing 1/sfmin. +*/ + + sfmin = small * (eps + 1.); + } + } + + if (lsame_(cmach, "E")) { + rmach = eps; + } else if (lsame_(cmach, "S")) { + rmach = sfmin; + } else if (lsame_(cmach, "B")) { + rmach = base; + } else if (lsame_(cmach, "P")) { + rmach = prec; + } else if (lsame_(cmach, "N")) { + rmach = t; + } else if (lsame_(cmach, "R")) { + rmach = rnd; + } else if (lsame_(cmach, "M")) { + rmach = emin; + } else if (lsame_(cmach, "U")) { + rmach = rmin; + } else if (lsame_(cmach, "L")) { + rmach = emax; + } else if (lsame_(cmach, "O")) { + rmach = rmax; + } + + ret_val = rmach; + first = FALSE_; + return ret_val; + +/* End of DLAMCH */ + +} /* dlamch_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int dlamc1_(integer *beta, integer *t, logical *rnd, logical + *ieee1) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + doublereal d__1, d__2; + + /* Local variables */ + static doublereal a, b, c__, f, t1, t2; + static integer lt; + static doublereal one, qtr; + static logical lrnd; + static integer lbeta; + static doublereal savec; + extern doublereal dlamc3_(doublereal *, doublereal *); + static logical lieee1; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + DLAMC1 determines the machine parameters given by BETA, T, RND, and + IEEE1. + + Arguments + ========= + + BETA (output) INTEGER + The base of the machine. + + T (output) INTEGER + The number of ( BETA ) digits in the mantissa. + + RND (output) LOGICAL + Specifies whether proper rounding ( RND = .TRUE. ) or + chopping ( RND = .FALSE. ) occurs in addition. This may not + be a reliable guide to the way in which the machine performs + its arithmetic. + + IEEE1 (output) LOGICAL + Specifies whether rounding appears to be done in the IEEE + 'round to nearest' style. + + Further Details + =============== + + The routine is based on the routine ENVRON by Malcolm and + incorporates suggestions by Gentleman and Marovich. See + + Malcolm M. A. (1972) Algorithms to reveal properties of + floating-point arithmetic. Comms. of the ACM, 15, 949-951. + + Gentleman W. M. and Marovich S. B. (1974) More on algorithms + that reveal properties of floating point arithmetic units. + Comms. of the ACM, 17, 276-277. + + ===================================================================== +*/ + + + if (first) { + one = 1.; + +/* + LBETA, LIEEE1, LT and LRND are the local values of BETA, + IEEE1, T and RND. + + Throughout this routine we use the function DLAMC3 to ensure + that relevant values are stored and not held in registers, or + are not affected by optimizers. + + Compute a = 2.0**m with the smallest positive integer m such + that + + fl( a + 1.0 ) = a. +*/ + + a = 1.; + c__ = 1.; + +/* + WHILE( C.EQ.ONE )LOOP */ +L10: + if (c__ == one) { + a *= 2; + c__ = dlamc3_(&a, &one); + d__1 = -a; + c__ = dlamc3_(&c__, &d__1); + goto L10; + } +/* + + END WHILE + + Now compute b = 2.0**m with the smallest positive integer m + such that + + fl( a + b ) .gt. a. +*/ + + b = 1.; + c__ = dlamc3_(&a, &b); + +/* + WHILE( C.EQ.A )LOOP */ +L20: + if (c__ == a) { + b *= 2; + c__ = dlamc3_(&a, &b); + goto L20; + } +/* + + END WHILE + + Now compute the base. a and c are neighbouring floating point + numbers in the interval ( beta**t, beta**( t + 1 ) ) and so + their difference is beta. Adding 0.25 to c is to ensure that it + is truncated to beta and not ( beta - 1 ). +*/ + + qtr = one / 4; + savec = c__; + d__1 = -a; + c__ = dlamc3_(&c__, &d__1); + lbeta = (integer) (c__ + qtr); + +/* + Now determine whether rounding or chopping occurs, by adding a + bit less than beta/2 and a bit more than beta/2 to a. +*/ + + b = (doublereal) lbeta; + d__1 = b / 2; + d__2 = -b / 100; + f = dlamc3_(&d__1, &d__2); + c__ = dlamc3_(&f, &a); + if (c__ == a) { + lrnd = TRUE_; + } else { + lrnd = FALSE_; + } + d__1 = b / 2; + d__2 = b / 100; + f = dlamc3_(&d__1, &d__2); + c__ = dlamc3_(&f, &a); + if (lrnd && c__ == a) { + lrnd = FALSE_; + } + +/* + Try and decide whether rounding is done in the IEEE 'round to + nearest' style. B/2 is half a unit in the last place of the two + numbers A and SAVEC. Furthermore, A is even, i.e. has last bit + zero, and SAVEC is odd. Thus adding B/2 to A should not change + A, but adding B/2 to SAVEC should change SAVEC. +*/ + + d__1 = b / 2; + t1 = dlamc3_(&d__1, &a); + d__1 = b / 2; + t2 = dlamc3_(&d__1, &savec); + lieee1 = t1 == a && t2 > savec && lrnd; + +/* + Now find the mantissa, t. It should be the integer part of + log to the base beta of a, however it is safer to determine t + by powering. So we find t as the smallest positive integer for + which + + fl( beta**t + 1.0 ) = 1.0. +*/ + + lt = 0; + a = 1.; + c__ = 1.; + +/* + WHILE( C.EQ.ONE )LOOP */ +L30: + if (c__ == one) { + ++lt; + a *= lbeta; + c__ = dlamc3_(&a, &one); + d__1 = -a; + c__ = dlamc3_(&c__, &d__1); + goto L30; + } +/* + END WHILE */ + + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *ieee1 = lieee1; + first = FALSE_; + return 0; + +/* End of DLAMC1 */ + +} /* dlamc1_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int dlamc2_(integer *beta, integer *t, logical *rnd, + doublereal *eps, integer *emin, doublereal *rmin, integer *emax, + doublereal *rmax) +{ + /* Initialized data */ + + static logical first = TRUE_; + static logical iwarn = FALSE_; + + /* Format strings */ + static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre" + "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" + "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" + " the IF block as marked within the code of routine\002,\002 DLAM" + "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; + + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3, d__4, d__5; + + /* Local variables */ + static doublereal a, b, c__; + static integer i__, lt; + static doublereal one, two; + static logical ieee; + static doublereal half; + static logical lrnd; + static doublereal leps, zero; + static integer lbeta; + static doublereal rbase; + static integer lemin, lemax, gnmin; + static doublereal small; + static integer gpmin; + static doublereal third, lrmin, lrmax, sixth; + extern /* Subroutine */ int dlamc1_(integer *, integer *, logical *, + logical *); + extern doublereal dlamc3_(doublereal *, doublereal *); + static logical lieee1; + extern /* Subroutine */ int dlamc4_(integer *, doublereal *, integer *), + dlamc5_(integer *, integer *, integer *, logical *, integer *, + doublereal *); + static integer ngnmin, ngpmin; + + /* Fortran I/O blocks */ + static cilist io___58 = { 0, 6, 0, fmt_9999, 0 }; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + DLAMC2 determines the machine parameters specified in its argument + list. + + Arguments + ========= + + BETA (output) INTEGER + The base of the machine. + + T (output) INTEGER + The number of ( BETA ) digits in the mantissa. + + RND (output) LOGICAL + Specifies whether proper rounding ( RND = .TRUE. ) or + chopping ( RND = .FALSE. ) occurs in addition. This may not + be a reliable guide to the way in which the machine performs + its arithmetic. + + EPS (output) DOUBLE PRECISION + The smallest positive number such that + + fl( 1.0 - EPS ) .LT. 1.0, + + where fl denotes the computed value. + + EMIN (output) INTEGER + The minimum exponent before (gradual) underflow occurs. + + RMIN (output) DOUBLE PRECISION + The smallest normalized number for the machine, given by + BASE**( EMIN - 1 ), where BASE is the floating point value + of BETA. + + EMAX (output) INTEGER + The maximum exponent before overflow occurs. + + RMAX (output) DOUBLE PRECISION + The largest positive number for the machine, given by + BASE**EMAX * ( 1 - EPS ), where BASE is the floating point + value of BETA. + + Further Details + =============== + + The computation of EPS is based on a routine PARANOIA by + W. Kahan of the University of California at Berkeley. + + ===================================================================== +*/ + + + if (first) { + zero = 0.; + one = 1.; + two = 2.; + +/* + LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of + BETA, T, RND, EPS, EMIN and RMIN. + + Throughout this routine we use the function DLAMC3 to ensure + that relevant values are stored and not held in registers, or + are not affected by optimizers. + + DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. +*/ + + dlamc1_(&lbeta, <, &lrnd, &lieee1); + +/* Start to find EPS. */ + + b = (doublereal) lbeta; + i__1 = -lt; + a = pow_di(&b, &i__1); + leps = a; + +/* Try some tricks to see whether or not this is the correct EPS. */ + + b = two / 3; + half = one / 2; + d__1 = -half; + sixth = dlamc3_(&b, &d__1); + third = dlamc3_(&sixth, &sixth); + d__1 = -half; + b = dlamc3_(&third, &d__1); + b = dlamc3_(&b, &sixth); + b = abs(b); + if (b < leps) { + b = leps; + } + + leps = 1.; + +/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ +L10: + if (leps > b && b > zero) { + leps = b; + d__1 = half * leps; +/* Computing 5th power */ + d__3 = two, d__4 = d__3, d__3 *= d__3; +/* Computing 2nd power */ + d__5 = leps; + d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5); + c__ = dlamc3_(&d__1, &d__2); + d__1 = -c__; + c__ = dlamc3_(&half, &d__1); + b = dlamc3_(&half, &c__); + d__1 = -b; + c__ = dlamc3_(&half, &d__1); + b = dlamc3_(&half, &c__); + goto L10; + } +/* + END WHILE */ + + if (a < leps) { + leps = a; + } + +/* + Computation of EPS complete. + + Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). + Keep dividing A by BETA until (gradual) underflow occurs. This + is detected when we cannot recover the previous A. +*/ + + rbase = one / lbeta; + small = one; + for (i__ = 1; i__ <= 3; ++i__) { + d__1 = small * rbase; + small = dlamc3_(&d__1, &zero); +/* L20: */ + } + a = dlamc3_(&one, &small); + dlamc4_(&ngpmin, &one, &lbeta); + d__1 = -one; + dlamc4_(&ngnmin, &d__1, &lbeta); + dlamc4_(&gpmin, &a, &lbeta); + d__1 = -a; + dlamc4_(&gnmin, &d__1, &lbeta); + ieee = FALSE_; + + if (ngpmin == ngnmin && gpmin == gnmin) { + if (ngpmin == gpmin) { + lemin = ngpmin; +/* + ( Non twos-complement machines, no gradual underflow; + e.g., VAX ) +*/ + } else if (gpmin - ngpmin == 3) { + lemin = ngpmin - 1 + lt; + ieee = TRUE_; +/* + ( Non twos-complement machines, with gradual underflow; + e.g., IEEE standard followers ) +*/ + } else { + lemin = min(ngpmin,gpmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else if (ngpmin == gpmin && ngnmin == gnmin) { + if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { + lemin = max(ngpmin,ngnmin); +/* + ( Twos-complement machines, no gradual underflow; + e.g., CYBER 205 ) +*/ + } else { + lemin = min(ngpmin,ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) + { + if (gpmin - min(ngpmin,ngnmin) == 3) { + lemin = max(ngpmin,ngnmin) - 1 + lt; +/* + ( Twos-complement machines with gradual underflow; + no known machine ) +*/ + } else { + lemin = min(ngpmin,ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else { +/* Computing MIN */ + i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); + lemin = min(i__1,gnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + first = FALSE_; +/* + ** + Comment out this if block if EMIN is ok +*/ + if (iwarn) { + first = TRUE_; + s_wsfe(&io___58); + do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer)); + e_wsfe(); + } +/* + ** + + Assume IEEE arithmetic if we found denormalised numbers above, + or if arithmetic seems to round in the IEEE style, determined + in routine DLAMC1. A true IEEE machine should have both things + true; however, faulty machines may have one or the other. +*/ + + ieee = ieee || lieee1; + +/* + Compute RMIN by successive division by BETA. We could compute + RMIN as BASE**( EMIN - 1 ), but some machines underflow during + this computation. +*/ + + lrmin = 1.; + i__1 = 1 - lemin; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = lrmin * rbase; + lrmin = dlamc3_(&d__1, &zero); +/* L30: */ + } + +/* Finally, call DLAMC5 to compute EMAX and RMAX. */ + + dlamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *eps = leps; + *emin = lemin; + *rmin = lrmin; + *emax = lemax; + *rmax = lrmax; + + return 0; + + +/* End of DLAMC2 */ + +} /* dlamc2_ */ + + +/* *********************************************************************** */ + +doublereal dlamc3_(doublereal *a, doublereal *b) +{ + /* System generated locals */ + doublereal ret_val; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + DLAMC3 is intended to force A and B to be stored prior to doing + the addition of A and B , for use in situations where optimizers + might hold one of these in a register. + + Arguments + ========= + + A (input) DOUBLE PRECISION + B (input) DOUBLE PRECISION + The values A and B. + + ===================================================================== +*/ + + + ret_val = *a + *b; + + return ret_val; + +/* End of DLAMC3 */ + +} /* dlamc3_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int dlamc4_(integer *emin, doublereal *start, integer *base) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + static doublereal a; + static integer i__; + static doublereal b1, b2, c1, c2, d1, d2, one, zero, rbase; + extern doublereal dlamc3_(doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + DLAMC4 is a service routine for DLAMC2. + + Arguments + ========= + + EMIN (output) INTEGER + The minimum exponent before (gradual) underflow, computed by + setting A = START and dividing by BASE until the previous A + can not be recovered. + + START (input) DOUBLE PRECISION + The starting point for determining EMIN. + + BASE (input) INTEGER + The base of the machine. + + ===================================================================== +*/ + + + a = *start; + one = 1.; + rbase = one / *base; + zero = 0.; + *emin = 1; + d__1 = a * rbase; + b1 = dlamc3_(&d__1, &zero); + c1 = a; + c2 = a; + d1 = a; + d2 = a; +/* + + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. + $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP +*/ +L10: + if (c1 == a && c2 == a && d1 == a && d2 == a) { + --(*emin); + a = b1; + d__1 = a / *base; + b1 = dlamc3_(&d__1, &zero); + d__1 = b1 * *base; + c1 = dlamc3_(&d__1, &zero); + d1 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d1 += b1; +/* L20: */ + } + d__1 = a * rbase; + b2 = dlamc3_(&d__1, &zero); + d__1 = b2 / rbase; + c2 = dlamc3_(&d__1, &zero); + d2 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d2 += b2; +/* L30: */ + } + goto L10; + } +/* + END WHILE */ + + return 0; + +/* End of DLAMC4 */ + +} /* dlamc4_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int dlamc5_(integer *beta, integer *p, integer *emin, + logical *ieee, integer *emax, doublereal *rmax) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + static integer i__; + static doublereal y, z__; + static integer try__, lexp; + static doublereal oldy; + static integer uexp, nbits; + extern doublereal dlamc3_(doublereal *, doublereal *); + static doublereal recbas; + static integer exbits, expsum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + DLAMC5 attempts to compute RMAX, the largest machine floating-point + number, without overflow. It assumes that EMAX + abs(EMIN) sum + approximately to a power of 2. It will fail on machines where this + assumption does not hold, for example, the Cyber 205 (EMIN = -28625, + EMAX = 28718). It will also fail if the value supplied for EMIN is + too large (i.e. too close to zero), probably with overflow. + + Arguments + ========= + + BETA (input) INTEGER + The base of floating-point arithmetic. + + P (input) INTEGER + The number of base BETA digits in the mantissa of a + floating-point value. + + EMIN (input) INTEGER + The minimum exponent before (gradual) underflow. + + IEEE (input) LOGICAL + A logical flag specifying whether or not the arithmetic + system is thought to comply with the IEEE standard. + + EMAX (output) INTEGER + The largest exponent before overflow + + RMAX (output) DOUBLE PRECISION + The largest machine floating-point number. + + ===================================================================== + + + First compute LEXP and UEXP, two powers of 2 that bound + abs(EMIN). We then assume that EMAX + abs(EMIN) will sum + approximately to the bound that is closest to abs(EMIN). + (EMAX is the exponent of the required number RMAX). +*/ + + lexp = 1; + exbits = 1; +L10: + try__ = lexp << 1; + if (try__ <= -(*emin)) { + lexp = try__; + ++exbits; + goto L10; + } + if (lexp == -(*emin)) { + uexp = lexp; + } else { + uexp = try__; + ++exbits; + } + +/* + Now -LEXP is less than or equal to EMIN, and -UEXP is greater + than or equal to EMIN. EXBITS is the number of bits needed to + store the exponent. +*/ + + if (uexp + *emin > -lexp - *emin) { + expsum = lexp << 1; + } else { + expsum = uexp << 1; + } + +/* + EXPSUM is the exponent range, approximately equal to + EMAX - EMIN + 1 . +*/ + + *emax = expsum + *emin - 1; + nbits = exbits + 1 + *p; + +/* + NBITS is the total number of bits needed to store a + floating-point number. +*/ + + if (nbits % 2 == 1 && *beta == 2) { + +/* + Either there are an odd number of bits used to store a + floating-point number, which is unlikely, or some bits are + not used in the representation of numbers, which is possible, + (e.g. Cray machines) or the mantissa has an implicit bit, + (e.g. IEEE machines, Dec Vax machines), which is perhaps the + most likely. We have to assume the last alternative. + If this is true, then we need to reduce EMAX by one because + there must be some way of representing zero in an implicit-bit + system. On machines like Cray, we are reducing EMAX by one + unnecessarily. +*/ + + --(*emax); + } + + if (*ieee) { + +/* + Assume we are on an IEEE machine which reserves one exponent + for infinity and NaN. +*/ + + --(*emax); + } + +/* + Now create RMAX, the largest machine number, which should + be equal to (1.0 - BETA**(-P)) * BETA**EMAX . + + First compute 1.0 - BETA**(-P), being careful that the + result is less than 1.0 . +*/ + + recbas = 1. / *beta; + z__ = *beta - 1.; + y = 0.; + i__1 = *p; + for (i__ = 1; i__ <= i__1; ++i__) { + z__ *= recbas; + if (y < 1.) { + oldy = y; + } + y = dlamc3_(&y, &z__); +/* L20: */ + } + if (y >= 1.) { + y = oldy; + } + +/* Now multiply by BETA**EMAX to get RMAX. */ + + i__1 = *emax; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = y * *beta; + y = dlamc3_(&d__1, &c_b32); +/* L30: */ + } + + *rmax = y; + return 0; + +/* End of DLAMC5 */ + +} /* dlamc5_ */ + +logical lsame_(char *ca, char *cb) +{ + /* System generated locals */ + logical ret_val; + + /* Local variables */ + static integer inta, intb, zcode; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + LSAME returns .TRUE. if CA is the same letter as CB regardless of + case. + + Arguments + ========= + + CA (input) CHARACTER*1 + CB (input) CHARACTER*1 + CA and CB specify the single characters to be compared. + + ===================================================================== + + + Test if the characters are equal +*/ + + ret_val = *(unsigned char *)ca == *(unsigned char *)cb; + if (ret_val) { + return ret_val; + } + +/* Now test for equivalence if both characters are alphabetic. */ + + zcode = 'Z'; + +/* + Use 'Z' rather than 'A' so that ASCII can be detected on Prime + machines, on which ICHAR returns a value with bit 8 set. + ICHAR('A') on Prime machines returns 193 which is the same as + ICHAR('A') on an EBCDIC machine. +*/ + + inta = *(unsigned char *)ca; + intb = *(unsigned char *)cb; + + if (zcode == 90 || zcode == 122) { + +/* + ASCII is assumed - ZCODE is the ASCII code of either lower or + upper case 'Z'. +*/ + + if (inta >= 97 && inta <= 122) { + inta += -32; + } + if (intb >= 97 && intb <= 122) { + intb += -32; + } + + } else if (zcode == 233 || zcode == 169) { + +/* + EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or + upper case 'Z'. +*/ + + if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta + >= 162 && inta <= 169) { + inta += 64; + } + if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb + >= 162 && intb <= 169) { + intb += 64; + } + + } else if (zcode == 218 || zcode == 250) { + +/* + ASCII is assumed, on Prime machines - ZCODE is the ASCII code + plus 128 of either lower or upper case 'Z'. +*/ + + if (inta >= 225 && inta <= 250) { + inta += -32; + } + if (intb >= 225 && intb <= 250) { + intb += -32; + } + } + ret_val = inta == intb; + +/* + RETURN + + End of LSAME +*/ + + return ret_val; +} /* lsame_ */ + +doublereal slamch_(char *cmach) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + integer i__1; + real ret_val; + + /* Local variables */ + static real t; + static integer it; + static real rnd, eps, base; + static integer beta; + static real emin, prec, emax; + static integer imin, imax; + static logical lrnd; + static real rmin, rmax, rmach; + extern logical lsame_(char *, char *); + static real small, sfmin; + extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real + *, integer *, real *, integer *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + SLAMCH determines single precision machine parameters. + + Arguments + ========= + + CMACH (input) CHARACTER*1 + Specifies the value to be returned by SLAMCH: + = 'E' or 'e', SLAMCH := eps + = 'S' or 's , SLAMCH := sfmin + = 'B' or 'b', SLAMCH := base + = 'P' or 'p', SLAMCH := eps*base + = 'N' or 'n', SLAMCH := t + = 'R' or 'r', SLAMCH := rnd + = 'M' or 'm', SLAMCH := emin + = 'U' or 'u', SLAMCH := rmin + = 'L' or 'l', SLAMCH := emax + = 'O' or 'o', SLAMCH := rmax + + where + + eps = relative machine precision + sfmin = safe minimum, such that 1/sfmin does not overflow + base = base of the machine + prec = eps*base + t = number of (base) digits in the mantissa + rnd = 1.0 when rounding occurs in addition, 0.0 otherwise + emin = minimum exponent before (gradual) underflow + rmin = underflow threshold - base**(emin-1) + emax = largest exponent before overflow + rmax = overflow threshold - (base**emax)*(1-eps) + + ===================================================================== +*/ + + + if (first) { + slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); + base = (real) beta; + t = (real) it; + if (lrnd) { + rnd = 1.f; + i__1 = 1 - it; + eps = pow_ri(&base, &i__1) / 2; + } else { + rnd = 0.f; + i__1 = 1 - it; + eps = pow_ri(&base, &i__1); + } + prec = eps * base; + emin = (real) imin; + emax = (real) imax; + sfmin = rmin; + small = 1.f / rmax; + if (small >= sfmin) { + +/* + Use SMALL plus a bit, to avoid the possibility of rounding + causing overflow when computing 1/sfmin. +*/ + + sfmin = small * (eps + 1.f); + } + } + + if (lsame_(cmach, "E")) { + rmach = eps; + } else if (lsame_(cmach, "S")) { + rmach = sfmin; + } else if (lsame_(cmach, "B")) { + rmach = base; + } else if (lsame_(cmach, "P")) { + rmach = prec; + } else if (lsame_(cmach, "N")) { + rmach = t; + } else if (lsame_(cmach, "R")) { + rmach = rnd; + } else if (lsame_(cmach, "M")) { + rmach = emin; + } else if (lsame_(cmach, "U")) { + rmach = rmin; + } else if (lsame_(cmach, "L")) { + rmach = emax; + } else if (lsame_(cmach, "O")) { + rmach = rmax; + } + + ret_val = rmach; + first = FALSE_; + return ret_val; + +/* End of SLAMCH */ + +} /* slamch_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical + *ieee1) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + real r__1, r__2; + + /* Local variables */ + static real a, b, c__, f, t1, t2; + static integer lt; + static real one, qtr; + static logical lrnd; + static integer lbeta; + static real savec; + static logical lieee1; + extern doublereal slamc3_(real *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + SLAMC1 determines the machine parameters given by BETA, T, RND, and + IEEE1. + + Arguments + ========= + + BETA (output) INTEGER + The base of the machine. + + T (output) INTEGER + The number of ( BETA ) digits in the mantissa. + + RND (output) LOGICAL + Specifies whether proper rounding ( RND = .TRUE. ) or + chopping ( RND = .FALSE. ) occurs in addition. This may not + be a reliable guide to the way in which the machine performs + its arithmetic. + + IEEE1 (output) LOGICAL + Specifies whether rounding appears to be done in the IEEE + 'round to nearest' style. + + Further Details + =============== + + The routine is based on the routine ENVRON by Malcolm and + incorporates suggestions by Gentleman and Marovich. See + + Malcolm M. A. (1972) Algorithms to reveal properties of + floating-point arithmetic. Comms. of the ACM, 15, 949-951. + + Gentleman W. M. and Marovich S. B. (1974) More on algorithms + that reveal properties of floating point arithmetic units. + Comms. of the ACM, 17, 276-277. + + ===================================================================== +*/ + + + if (first) { + one = 1.f; + +/* + LBETA, LIEEE1, LT and LRND are the local values of BETA, + IEEE1, T and RND. + + Throughout this routine we use the function SLAMC3 to ensure + that relevant values are stored and not held in registers, or + are not affected by optimizers. + + Compute a = 2.0**m with the smallest positive integer m such + that + + fl( a + 1.0 ) = a. +*/ + + a = 1.f; + c__ = 1.f; + +/* + WHILE( C.EQ.ONE )LOOP */ +L10: + if (c__ == one) { + a *= 2; + c__ = slamc3_(&a, &one); + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + goto L10; + } +/* + + END WHILE + + Now compute b = 2.0**m with the smallest positive integer m + such that + + fl( a + b ) .gt. a. +*/ + + b = 1.f; + c__ = slamc3_(&a, &b); + +/* + WHILE( C.EQ.A )LOOP */ +L20: + if (c__ == a) { + b *= 2; + c__ = slamc3_(&a, &b); + goto L20; + } +/* + + END WHILE + + Now compute the base. a and c are neighbouring floating point + numbers in the interval ( beta**t, beta**( t + 1 ) ) and so + their difference is beta. Adding 0.25 to c is to ensure that it + is truncated to beta and not ( beta - 1 ). +*/ + + qtr = one / 4; + savec = c__; + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + lbeta = c__ + qtr; + +/* + Now determine whether rounding or chopping occurs, by adding a + bit less than beta/2 and a bit more than beta/2 to a. +*/ + + b = (real) lbeta; + r__1 = b / 2; + r__2 = -b / 100; + f = slamc3_(&r__1, &r__2); + c__ = slamc3_(&f, &a); + if (c__ == a) { + lrnd = TRUE_; + } else { + lrnd = FALSE_; + } + r__1 = b / 2; + r__2 = b / 100; + f = slamc3_(&r__1, &r__2); + c__ = slamc3_(&f, &a); + if (lrnd && c__ == a) { + lrnd = FALSE_; + } + +/* + Try and decide whether rounding is done in the IEEE 'round to + nearest' style. B/2 is half a unit in the last place of the two + numbers A and SAVEC. Furthermore, A is even, i.e. has last bit + zero, and SAVEC is odd. Thus adding B/2 to A should not change + A, but adding B/2 to SAVEC should change SAVEC. +*/ + + r__1 = b / 2; + t1 = slamc3_(&r__1, &a); + r__1 = b / 2; + t2 = slamc3_(&r__1, &savec); + lieee1 = t1 == a && t2 > savec && lrnd; + +/* + Now find the mantissa, t. It should be the integer part of + log to the base beta of a, however it is safer to determine t + by powering. So we find t as the smallest positive integer for + which + + fl( beta**t + 1.0 ) = 1.0. +*/ + + lt = 0; + a = 1.f; + c__ = 1.f; + +/* + WHILE( C.EQ.ONE )LOOP */ +L30: + if (c__ == one) { + ++lt; + a *= lbeta; + c__ = slamc3_(&a, &one); + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + goto L30; + } +/* + END WHILE */ + + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *ieee1 = lieee1; + first = FALSE_; + return 0; + +/* End of SLAMC1 */ + +} /* slamc1_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real * + eps, integer *emin, real *rmin, integer *emax, real *rmax) +{ + /* Initialized data */ + + static logical first = TRUE_; + static logical iwarn = FALSE_; + + /* Format strings */ + static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre" + "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" + "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" + " the IF block as marked within the code of routine\002,\002 SLAM" + "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; + + /* System generated locals */ + integer i__1; + real r__1, r__2, r__3, r__4, r__5; + + /* Local variables */ + static real a, b, c__; + static integer i__, lt; + static real one, two; + static logical ieee; + static real half; + static logical lrnd; + static real leps, zero; + static integer lbeta; + static real rbase; + static integer lemin, lemax, gnmin; + static real small; + static integer gpmin; + static real third, lrmin, lrmax, sixth; + static logical lieee1; + extern /* Subroutine */ int slamc1_(integer *, integer *, logical *, + logical *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int slamc4_(integer *, real *, integer *), + slamc5_(integer *, integer *, integer *, logical *, integer *, + real *); + static integer ngnmin, ngpmin; + + /* Fortran I/O blocks */ + static cilist io___144 = { 0, 6, 0, fmt_9999, 0 }; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + SLAMC2 determines the machine parameters specified in its argument + list. + + Arguments + ========= + + BETA (output) INTEGER + The base of the machine. + + T (output) INTEGER + The number of ( BETA ) digits in the mantissa. + + RND (output) LOGICAL + Specifies whether proper rounding ( RND = .TRUE. ) or + chopping ( RND = .FALSE. ) occurs in addition. This may not + be a reliable guide to the way in which the machine performs + its arithmetic. + + EPS (output) REAL + The smallest positive number such that + + fl( 1.0 - EPS ) .LT. 1.0, + + where fl denotes the computed value. + + EMIN (output) INTEGER + The minimum exponent before (gradual) underflow occurs. + + RMIN (output) REAL + The smallest normalized number for the machine, given by + BASE**( EMIN - 1 ), where BASE is the floating point value + of BETA. + + EMAX (output) INTEGER + The maximum exponent before overflow occurs. + + RMAX (output) REAL + The largest positive number for the machine, given by + BASE**EMAX * ( 1 - EPS ), where BASE is the floating point + value of BETA. + + Further Details + =============== + + The computation of EPS is based on a routine PARANOIA by + W. Kahan of the University of California at Berkeley. + + ===================================================================== +*/ + + + if (first) { + zero = 0.f; + one = 1.f; + two = 2.f; + +/* + LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of + BETA, T, RND, EPS, EMIN and RMIN. + + Throughout this routine we use the function SLAMC3 to ensure + that relevant values are stored and not held in registers, or + are not affected by optimizers. + + SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. +*/ + + slamc1_(&lbeta, <, &lrnd, &lieee1); + +/* Start to find EPS. */ + + b = (real) lbeta; + i__1 = -lt; + a = pow_ri(&b, &i__1); + leps = a; + +/* Try some tricks to see whether or not this is the correct EPS. */ + + b = two / 3; + half = one / 2; + r__1 = -half; + sixth = slamc3_(&b, &r__1); + third = slamc3_(&sixth, &sixth); + r__1 = -half; + b = slamc3_(&third, &r__1); + b = slamc3_(&b, &sixth); + b = dabs(b); + if (b < leps) { + b = leps; + } + + leps = 1.f; + +/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ +L10: + if (leps > b && b > zero) { + leps = b; + r__1 = half * leps; +/* Computing 5th power */ + r__3 = two, r__4 = r__3, r__3 *= r__3; +/* Computing 2nd power */ + r__5 = leps; + r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); + c__ = slamc3_(&r__1, &r__2); + r__1 = -c__; + c__ = slamc3_(&half, &r__1); + b = slamc3_(&half, &c__); + r__1 = -b; + c__ = slamc3_(&half, &r__1); + b = slamc3_(&half, &c__); + goto L10; + } +/* + END WHILE */ + + if (a < leps) { + leps = a; + } + +/* + Computation of EPS complete. + + Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). + Keep dividing A by BETA until (gradual) underflow occurs. This + is detected when we cannot recover the previous A. +*/ + + rbase = one / lbeta; + small = one; + for (i__ = 1; i__ <= 3; ++i__) { + r__1 = small * rbase; + small = slamc3_(&r__1, &zero); +/* L20: */ + } + a = slamc3_(&one, &small); + slamc4_(&ngpmin, &one, &lbeta); + r__1 = -one; + slamc4_(&ngnmin, &r__1, &lbeta); + slamc4_(&gpmin, &a, &lbeta); + r__1 = -a; + slamc4_(&gnmin, &r__1, &lbeta); + ieee = FALSE_; + + if (ngpmin == ngnmin && gpmin == gnmin) { + if (ngpmin == gpmin) { + lemin = ngpmin; +/* + ( Non twos-complement machines, no gradual underflow; + e.g., VAX ) +*/ + } else if (gpmin - ngpmin == 3) { + lemin = ngpmin - 1 + lt; + ieee = TRUE_; +/* + ( Non twos-complement machines, with gradual underflow; + e.g., IEEE standard followers ) +*/ + } else { + lemin = min(ngpmin,gpmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else if (ngpmin == gpmin && ngnmin == gnmin) { + if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { + lemin = max(ngpmin,ngnmin); +/* + ( Twos-complement machines, no gradual underflow; + e.g., CYBER 205 ) +*/ + } else { + lemin = min(ngpmin,ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) + { + if (gpmin - min(ngpmin,ngnmin) == 3) { + lemin = max(ngpmin,ngnmin) - 1 + lt; +/* + ( Twos-complement machines with gradual underflow; + no known machine ) +*/ + } else { + lemin = min(ngpmin,ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else { +/* Computing MIN */ + i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); + lemin = min(i__1,gnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + first = FALSE_; +/* + ** + Comment out this if block if EMIN is ok +*/ + if (iwarn) { + first = TRUE_; + s_wsfe(&io___144); + do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer)); + e_wsfe(); + } +/* + ** + + Assume IEEE arithmetic if we found denormalised numbers above, + or if arithmetic seems to round in the IEEE style, determined + in routine SLAMC1. A true IEEE machine should have both things + true; however, faulty machines may have one or the other. +*/ + + ieee = ieee || lieee1; + +/* + Compute RMIN by successive division by BETA. We could compute + RMIN as BASE**( EMIN - 1 ), but some machines underflow during + this computation. +*/ + + lrmin = 1.f; + i__1 = 1 - lemin; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = lrmin * rbase; + lrmin = slamc3_(&r__1, &zero); +/* L30: */ + } + +/* Finally, call SLAMC5 to compute EMAX and RMAX. */ + + slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *eps = leps; + *emin = lemin; + *rmin = lrmin; + *emax = lemax; + *rmax = lrmax; + + return 0; + + +/* End of SLAMC2 */ + +} /* slamc2_ */ + + +/* *********************************************************************** */ + +doublereal slamc3_(real *a, real *b) +{ + /* System generated locals */ + real ret_val; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + SLAMC3 is intended to force A and B to be stored prior to doing + the addition of A and B , for use in situations where optimizers + might hold one of these in a register. + + Arguments + ========= + + A (input) REAL + B (input) REAL + The values A and B. + + ===================================================================== +*/ + + + ret_val = *a + *b; + + return ret_val; + +/* End of SLAMC3 */ + +} /* slamc3_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int slamc4_(integer *emin, real *start, integer *base) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Local variables */ + static real a; + static integer i__; + static real b1, b2, c1, c2, d1, d2, one, zero, rbase; + extern doublereal slamc3_(real *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + SLAMC4 is a service routine for SLAMC2. + + Arguments + ========= + + EMIN (output) INTEGER + The minimum exponent before (gradual) underflow, computed by + setting A = START and dividing by BASE until the previous A + can not be recovered. + + START (input) REAL + The starting point for determining EMIN. + + BASE (input) INTEGER + The base of the machine. + + ===================================================================== +*/ + + + a = *start; + one = 1.f; + rbase = one / *base; + zero = 0.f; + *emin = 1; + r__1 = a * rbase; + b1 = slamc3_(&r__1, &zero); + c1 = a; + c2 = a; + d1 = a; + d2 = a; +/* + + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. + $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP +*/ +L10: + if (c1 == a && c2 == a && d1 == a && d2 == a) { + --(*emin); + a = b1; + r__1 = a / *base; + b1 = slamc3_(&r__1, &zero); + r__1 = b1 * *base; + c1 = slamc3_(&r__1, &zero); + d1 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d1 += b1; +/* L20: */ + } + r__1 = a * rbase; + b2 = slamc3_(&r__1, &zero); + r__1 = b2 / rbase; + c2 = slamc3_(&r__1, &zero); + d2 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d2 += b2; +/* L30: */ + } + goto L10; + } +/* + END WHILE */ + + return 0; + +/* End of SLAMC4 */ + +} /* slamc4_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin, + logical *ieee, integer *emax, real *rmax) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Local variables */ + static integer i__; + static real y, z__; + static integer try__, lexp; + static real oldy; + static integer uexp, nbits; + extern doublereal slamc3_(real *, real *); + static real recbas; + static integer exbits, expsum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. + November 2006 + + + Purpose + ======= + + SLAMC5 attempts to compute RMAX, the largest machine floating-point + number, without overflow. It assumes that EMAX + abs(EMIN) sum + approximately to a power of 2. It will fail on machines where this + assumption does not hold, for example, the Cyber 205 (EMIN = -28625, + EMAX = 28718). It will also fail if the value supplied for EMIN is + too large (i.e. too close to zero), probably with overflow. + + Arguments + ========= + + BETA (input) INTEGER + The base of floating-point arithmetic. + + P (input) INTEGER + The number of base BETA digits in the mantissa of a + floating-point value. + + EMIN (input) INTEGER + The minimum exponent before (gradual) underflow. + + IEEE (input) LOGICAL + A logical flag specifying whether or not the arithmetic + system is thought to comply with the IEEE standard. + + EMAX (output) INTEGER + The largest exponent before overflow + + RMAX (output) REAL + The largest machine floating-point number. + + ===================================================================== + + + First compute LEXP and UEXP, two powers of 2 that bound + abs(EMIN). We then assume that EMAX + abs(EMIN) will sum + approximately to the bound that is closest to abs(EMIN). + (EMAX is the exponent of the required number RMAX). +*/ + + lexp = 1; + exbits = 1; +L10: + try__ = lexp << 1; + if (try__ <= -(*emin)) { + lexp = try__; + ++exbits; + goto L10; + } + if (lexp == -(*emin)) { + uexp = lexp; + } else { + uexp = try__; + ++exbits; + } + +/* + Now -LEXP is less than or equal to EMIN, and -UEXP is greater + than or equal to EMIN. EXBITS is the number of bits needed to + store the exponent. +*/ + + if (uexp + *emin > -lexp - *emin) { + expsum = lexp << 1; + } else { + expsum = uexp << 1; + } + +/* + EXPSUM is the exponent range, approximately equal to + EMAX - EMIN + 1 . +*/ + + *emax = expsum + *emin - 1; + nbits = exbits + 1 + *p; + +/* + NBITS is the total number of bits needed to store a + floating-point number. +*/ + + if (nbits % 2 == 1 && *beta == 2) { + +/* + Either there are an odd number of bits used to store a + floating-point number, which is unlikely, or some bits are + not used in the representation of numbers, which is possible, + (e.g. Cray machines) or the mantissa has an implicit bit, + (e.g. IEEE machines, Dec Vax machines), which is perhaps the + most likely. We have to assume the last alternative. + If this is true, then we need to reduce EMAX by one because + there must be some way of representing zero in an implicit-bit + system. On machines like Cray, we are reducing EMAX by one + unnecessarily. +*/ + + --(*emax); + } + + if (*ieee) { + +/* + Assume we are on an IEEE machine which reserves one exponent + for infinity and NaN. +*/ + + --(*emax); + } + +/* + Now create RMAX, the largest machine number, which should + be equal to (1.0 - BETA**(-P)) * BETA**EMAX . + + First compute 1.0 - BETA**(-P), being careful that the + result is less than 1.0 . +*/ + + recbas = 1.f / *beta; + z__ = *beta - 1.f; + y = 0.f; + i__1 = *p; + for (i__ = 1; i__ <= i__1; ++i__) { + z__ *= recbas; + if (y < 1.f) { + oldy = y; + } + y = slamc3_(&y, &z__); +/* L20: */ + } + if (y >= 1.f) { + y = oldy; + } + +/* Now multiply by BETA**EMAX to get RMAX. */ + + i__1 = *emax; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = y * *beta; + y = slamc3_(&r__1, &c_b66); +/* L30: */ + } + + *rmax = y; + return 0; + +/* End of SLAMC5 */ + +} /* slamc5_ */ + diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_config.c.patch b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_config.c.patch new file mode 100644 index 0000000000..4c43f8aa2a --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_config.c.patch @@ -0,0 +1,18 @@ +@@ -696,7 +696,7 @@ + doublereal dlamc3_(doublereal *a, doublereal *b) + { + /* System generated locals */ +- doublereal ret_val; ++ volatile doublereal ret_val; + + + /* +@@ -1773,7 +1773,7 @@ + doublereal slamc3_(real *a, real *b) + { + /* System generated locals */ +- real ret_val; ++ volatile real ret_val; + + + /* diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_d_lapack.c b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_d_lapack.c new file mode 100644 index 0000000000..1a6675ef11 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_d_lapack.c @@ -0,0 +1,41864 @@ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif + + +/* Table of constant values */ + +static integer c__9 = 9; +static integer c__0 = 0; +static doublereal c_b15 = 1.; +static integer c__1 = 1; +static doublereal c_b29 = 0.; +static doublereal c_b94 = -.125; +static doublereal c_b151 = -1.; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__65 = 65; +static integer c__6 = 6; +static integer c__12 = 12; +static integer c__49 = 49; +static integer c__4 = 4; +static logical c_false = FALSE_; +static integer c__13 = 13; +static integer c__15 = 15; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_true = TRUE_; +static integer c__10 = 10; +static integer c__11 = 11; +static doublereal c_b3192 = 2.; + +/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal * + d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, + integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer i__, j, k; + static doublereal p, r__; + static integer z__, ic, ii, kk; + static doublereal cs; + static integer is, iu; + static doublereal sn; + static integer nm1; + static doublereal eps; + static integer ivt, difl, difr, ierr, perm, mlvl, sqre; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer * + , doublereal *, integer *), dswap_(integer *, doublereal *, + integer *, doublereal *, integer *); + static integer poles, iuplo, nsize, start; + extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *); + + extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), dlaset_(char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *); + static integer givcol; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + static integer icompq; + static doublereal orgnrm; + static integer givnum, givptr, qstart, smlsiz, wstart, smlszp; + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DBDSDC computes the singular value decomposition (SVD) of a real + N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, + using a divide and conquer method, where S is a diagonal matrix + with non-negative diagonal elements (the singular values of B), and + U and VT are orthogonal matrices of left and right singular vectors, + respectively. DBDSDC can be used to compute all singular values, + and optionally, singular vectors or singular vectors in compact form. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. See DLASD3 for details. + + The code currently calls DLASDQ if singular values only are desired. + However, it can be slightly modified to compute singular values + using the divide and conquer method. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': B is upper bidiagonal. + = 'L': B is lower bidiagonal. + + COMPQ (input) CHARACTER*1 + Specifies whether singular vectors are to be computed + as follows: + = 'N': Compute singular values only; + = 'P': Compute singular values and compute singular + vectors in compact form; + = 'I': Compute singular values and singular vectors. + + N (input) INTEGER + The order of the matrix B. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the n diagonal elements of the bidiagonal matrix B. + On exit, if INFO=0, the singular values of B. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the elements of E contain the offdiagonal + elements of the bidiagonal matrix whose SVD is desired. + On exit, E has been destroyed. + + U (output) DOUBLE PRECISION array, dimension (LDU,N) + If COMPQ = 'I', then: + On exit, if INFO = 0, U contains the left singular vectors + of the bidiagonal matrix. + For other values of COMPQ, U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1. + If singular vectors are desired, then LDU >= max( 1, N ). + + VT (output) DOUBLE PRECISION array, dimension (LDVT,N) + If COMPQ = 'I', then: + On exit, if INFO = 0, VT' contains the right singular + vectors of the bidiagonal matrix. + For other values of COMPQ, VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1. + If singular vectors are desired, then LDVT >= max( 1, N ). + + Q (output) DOUBLE PRECISION array, dimension (LDQ) + If COMPQ = 'P', then: + On exit, if INFO = 0, Q and IQ contain the left + and right singular vectors in a compact form, + requiring O(N log N) space instead of 2*N**2. + In particular, Q contains all the DOUBLE PRECISION data in + LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) + words of memory, where SMLSIZ is returned by ILAENV and + is equal to the maximum size of the subproblems at the + bottom of the computation tree (usually about 25). + For other values of COMPQ, Q is not referenced. + + IQ (output) INTEGER array, dimension (LDIQ) + If COMPQ = 'P', then: + On exit, if INFO = 0, Q and IQ contain the left + and right singular vectors in a compact form, + requiring O(N log N) space instead of 2*N**2. + In particular, IQ contains all INTEGER data in + LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) + words of memory, where SMLSIZ is returned by ILAENV and + is equal to the maximum size of the subproblems at the + bottom of the computation tree (usually about 25). + For other values of COMPQ, IQ is not referenced. + + WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + If COMPQ = 'N' then LWORK >= (4 * N). + If COMPQ = 'P' then LWORK >= (6 * N). + If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). + + IWORK (workspace) INTEGER array, dimension (8*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute a singular value. + The update process of divide and conquer failed. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + Changed dimension statement in comment describing E from (N) to + (N-1). Sven, 17 Feb 05. + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --q; + --iq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + iuplo = 0; + if (lsame_(uplo, "U")) { + iuplo = 1; + } + if (lsame_(uplo, "L")) { + iuplo = 2; + } + if (lsame_(compq, "N")) { + icompq = 0; + } else if (lsame_(compq, "P")) { + icompq = 1; + } else if (lsame_(compq, "I")) { + icompq = 2; + } else { + icompq = -1; + } + if (iuplo == 0) { + *info = -1; + } else if (icompq < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { + *info = -7; + } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DBDSDC", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + if (*n == 1) { + if (icompq == 1) { + q[1] = d_sign(&c_b15, &d__[1]); + q[smlsiz * *n + 1] = 1.; + } else if (icompq == 2) { + u[u_dim1 + 1] = d_sign(&c_b15, &d__[1]); + vt[vt_dim1 + 1] = 1.; + } + d__[1] = abs(d__[1]); + return 0; + } + nm1 = *n - 1; + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left +*/ + + wstart = 1; + qstart = 3; + if (icompq == 1) { + dcopy_(n, &d__[1], &c__1, &q[1], &c__1); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); + } + if (iuplo == 2) { + qstart = 5; + wstart = (*n << 1) - 1; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (icompq == 1) { + q[i__ + (*n << 1)] = cs; + q[i__ + *n * 3] = sn; + } else if (icompq == 2) { + work[i__] = cs; + work[nm1 + i__] = -sn; + } +/* L10: */ + } + } + +/* If ICOMPQ = 0, use DLASDQ to compute the singular values. */ + + if (icompq == 0) { + dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + wstart], info); + goto L40; + } + +/* + If N is smaller than the minimum divide size SMLSIZ, then solve + the problem with another solver. +*/ + + if (*n <= smlsiz) { + if (icompq == 2) { + dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset] + , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + wstart], info); + } else if (icompq == 1) { + iu = 1; + ivt = iu + *n; + dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n); + dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n); + dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + ( + qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[ + iu + (qstart - 1) * *n], n, &work[wstart], info); + } + goto L40; + } + + if (icompq == 2) { + dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + } + +/* Scale. */ + + orgnrm = dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + return 0; + } + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, & + ierr); + + eps = EPSILON; + + mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) / + log(2.)) + 1; + smlszp = smlsiz + 1; + + if (icompq == 1) { + iu = 1; + ivt = smlsiz + 1; + difl = ivt + smlszp; + difr = difl + mlvl; + z__ = difr + (mlvl << 1); + ic = z__ + mlvl; + is = ic + 1; + poles = is + 1; + givnum = poles + (mlvl << 1); + + k = 1; + givptr = 2; + perm = 3; + givcol = perm + mlvl; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_sign(&eps, &d__[i__]); + } +/* L20: */ + } + + start = 1; + sqre = 0; + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + +/* + Subproblem found. First determine its size and then + apply divide and conquer on it. +*/ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - start + 1; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - start + 1; + } else { + +/* + A subproblem with E(NM1) small. This implies an + 1-by-1 subproblem at D(N). Solve this 1-by-1 problem + first. +*/ + + nsize = i__ - start + 1; + if (icompq == 2) { + u[*n + *n * u_dim1] = d_sign(&c_b15, &d__[*n]); + vt[*n + *n * vt_dim1] = 1.; + } else if (icompq == 1) { + q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]); + q[*n + (smlsiz + qstart - 1) * *n] = 1.; + } + d__[*n] = (d__1 = d__[*n], abs(d__1)); + } + if (icompq == 2) { + dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + + start * u_dim1], ldu, &vt[start + start * vt_dim1], + ldvt, &smlsiz, &iwork[1], &work[wstart], info); + } else { + dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[ + start], &q[start + (iu + qstart - 2) * *n], n, &q[ + start + (ivt + qstart - 2) * *n], &iq[start + k * *n], + &q[start + (difl + qstart - 2) * *n], &q[start + ( + difr + qstart - 2) * *n], &q[start + (z__ + qstart - + 2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[ + start + givptr * *n], &iq[start + givcol * *n], n, & + iq[start + perm * *n], &q[start + (givnum + qstart - + 2) * *n], &q[start + (ic + qstart - 2) * *n], &q[ + start + (is + qstart - 2) * *n], &work[wstart], & + iwork[1], info); + } + if (*info != 0) { + return 0; + } + start = i__ + 1; + } +/* L30: */ + } + +/* Unscale */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr); +L40: + +/* Use Selection Sort to minimize swaps of singular vectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + kk = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] > p) { + kk = j; + p = d__[j]; + } +/* L50: */ + } + if (kk != i__) { + d__[kk] = d__[i__]; + d__[i__] = p; + if (icompq == 1) { + iq[i__] = kk; + } else if (icompq == 2) { + dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], & + c__1); + dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); + } + } else if (icompq == 1) { + iq[i__] = i__; + } +/* L60: */ + } + +/* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */ + + if (icompq == 1) { + if (iuplo == 1) { + iq[*n] = 1; + } else { + iq[*n] = 0; + } + } + +/* + If B is lower bidiagonal, update U by those Givens rotations + which rotated B to be upper bidiagonal +*/ + + if (iuplo == 2 && icompq == 2) { + dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu); + } + + return 0; + +/* End of DBDSDC */ + +} /* dbdsdc_ */ + +/* Subroutine */ int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, + integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer * + ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + static doublereal f, g, h__; + static integer i__, j, m; + static doublereal r__, cs; + static integer ll; + static doublereal sn, mu; + static integer nm1, nm12, nm13, lll; + static doublereal eps, sll, tol, abse; + static integer idir; + static doublereal abss; + static integer oldm; + static doublereal cosl; + static integer isub, iter; + static doublereal unfl, sinl, cosr, smin, smax, sinr; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), dlas2_( + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + static doublereal oldcs; + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *); + static integer oldll; + static doublereal shift, sigmn, oldsn; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer maxit; + static doublereal sminl, sigmx; + static logical lower; + extern /* Subroutine */ int dlasq1_(integer *, doublereal *, doublereal *, + doublereal *, integer *), dlasv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), xerbla_(char *, + integer *); + static doublereal sminoa, thresh; + static logical rotate; + static doublereal tolmul; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + January 2007 + + + Purpose + ======= + + DBDSQR computes the singular values and, optionally, the right and/or + left singular vectors from the singular value decomposition (SVD) of + a real N-by-N (upper or lower) bidiagonal matrix B using the implicit + zero-shift QR algorithm. The SVD of B has the form + + B = Q * S * P**T + + where S is the diagonal matrix of singular values, Q is an orthogonal + matrix of left singular vectors, and P is an orthogonal matrix of + right singular vectors. If left singular vectors are requested, this + subroutine actually returns U*Q instead of Q, and, if right singular + vectors are requested, this subroutine returns P**T*VT instead of + P**T, for given real input matrices U and VT. When U and VT are the + orthogonal matrices that reduce a general matrix A to bidiagonal + form: A = U*B*VT, as computed by DGEBRD, then + + A = (U*Q) * S * (P**T*VT) + + is the SVD of A. Optionally, the subroutine may also compute Q**T*C + for a given real input matrix C. + + See "Computing Small Singular Values of Bidiagonal Matrices With + Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, + no. 5, pp. 873-912, Sept 1990) and + "Accurate singular values and differential qd algorithms," by + B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics + Department, University of California at Berkeley, July 1992 + for a detailed description of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': B is upper bidiagonal; + = 'L': B is lower bidiagonal. + + N (input) INTEGER + The order of the matrix B. N >= 0. + + NCVT (input) INTEGER + The number of columns of the matrix VT. NCVT >= 0. + + NRU (input) INTEGER + The number of rows of the matrix U. NRU >= 0. + + NCC (input) INTEGER + The number of columns of the matrix C. NCC >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the n diagonal elements of the bidiagonal matrix B. + On exit, if INFO=0, the singular values of B in decreasing + order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the N-1 offdiagonal elements of the bidiagonal + matrix B. + On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E + will contain the diagonal and superdiagonal elements of a + bidiagonal matrix orthogonally equivalent to the one given + as input. + + VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) + On entry, an N-by-NCVT matrix VT. + On exit, VT is overwritten by P**T * VT. + Not referenced if NCVT = 0. + + LDVT (input) INTEGER + The leading dimension of the array VT. + LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. + + U (input/output) DOUBLE PRECISION array, dimension (LDU, N) + On entry, an NRU-by-N matrix U. + On exit, U is overwritten by U * Q. + Not referenced if NRU = 0. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= max(1,NRU). + + C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) + On entry, an N-by-NCC matrix C. + On exit, C is overwritten by Q**T * C. + Not referenced if NCC = 0. + + LDC (input) INTEGER + The leading dimension of the array C. + LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. + + WORK (workspace) DOUBLE PRECISION array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: If INFO = -i, the i-th argument had an illegal value + > 0: + if NCVT = NRU = NCC = 0, + = 1, a split was marked by a positive value in E + = 2, current block of Z not diagonalized after 30*N + iterations (in inner while loop) + = 3, termination criterion of outer while loop not met + (program created more than N unreduced blocks) + else NCVT = NRU = NCC = 0, + the algorithm did not converge; D and E contain the + elements of a bidiagonal matrix which is orthogonally + similar to the input matrix B; if INFO = i, i + elements of E have not converged to zero. + + Internal Parameters + =================== + + TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) + TOLMUL controls the convergence criterion of the QR loop. + If it is positive, TOLMUL*EPS is the desired relative + precision in the computed singular values. + If it is negative, abs(TOLMUL*EPS*sigma_max) is the + desired absolute accuracy in the computed singular + values (corresponds to relative accuracy + abs(TOLMUL*EPS) in the largest singular value. + abs(TOLMUL) should be between 1 and 1/EPS, and preferably + between 10 (for fast convergence) and .1/EPS + (for there to be some accuracy in the results). + Default is to lose at either one eighth or 2 of the + available decimal digits in each computed singular value + (whichever is smaller). + + MAXITR INTEGER, default = 6 + MAXITR controls the maximum number of passes of the + algorithm through its inner loop. The algorithms stops + (and so fails to converge) if the number of passes + through the inner loop exceeds MAXITR*N**2. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + lower = lsame_(uplo, "L"); + if (! lsame_(uplo, "U") && ! lower) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ncvt < 0) { + *info = -3; + } else if (*nru < 0) { + *info = -4; + } else if (*ncc < 0) { + *info = -5; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -9; + } else if (*ldu < max(1,*nru)) { + *info = -11; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DBDSQR", &i__1); + return 0; + } + if (*n == 0) { + return 0; + } + if (*n == 1) { + goto L160; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + +/* If no singular vectors desired, use qd algorithm */ + + if (! rotate) { + dlasq1_(n, &d__[1], &e[1], &work[1], info); + return 0; + } + + nm1 = *n - 1; + nm12 = nm1 + nm1; + nm13 = nm12 + nm1; + idir = 0; + +/* Get machine constants */ + + eps = EPSILON; + unfl = SAFEMINIMUM; + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left +*/ + + if (lower) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + work[i__] = cs; + work[nm1 + i__] = sn; +/* L10: */ + } + +/* Update singular vectors if desired */ + + if (*nru > 0) { + dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], + ldu); + } + if (*ncc > 0) { + dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], + ldc); + } + } + +/* + Compute singular values to relative accuracy TOL + (By setting TOL to be negative, algorithm will compute + singular values to absolute accuracy ABS(TOL)*norm(input matrix)) + + Computing MAX + Computing MIN +*/ + d__3 = 100., d__4 = pow_dd(&eps, &c_b94); + d__1 = 10., d__2 = min(d__3,d__4); + tolmul = max(d__1,d__2); + tol = tolmul * eps; + +/* Compute approximate maximum, minimum singular values */ + + smax = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1)); + smax = max(d__2,d__3); +/* L20: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1)); + smax = max(d__2,d__3); +/* L30: */ + } + sminl = 0.; + if (tol >= 0.) { + +/* Relative accuracy desired */ + + sminoa = abs(d__[1]); + if (sminoa == 0.) { + goto L50; + } + mu = sminoa; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1] + , abs(d__1)))); + sminoa = min(sminoa,mu); + if (sminoa == 0.) { + goto L50; + } +/* L40: */ + } +L50: + sminoa /= sqrt((doublereal) (*n)); +/* Computing MAX */ + d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl; + thresh = max(d__1,d__2); + } else { + +/* + Absolute accuracy desired + + Computing MAX +*/ + d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl; + thresh = max(d__1,d__2); + } + +/* + Prepare for main iteration loop for the singular values + (MAXIT is the maximum number of passes through the inner + loop permitted before nonconvergence signalled.) +*/ + + maxit = *n * 6 * *n; + iter = 0; + oldll = -1; + oldm = -1; + +/* M points to last element of unconverged part of matrix */ + + m = *n; + +/* Begin main iteration loop */ + +L60: + +/* Check for convergence or exceeding iteration count */ + + if (m <= 1) { + goto L160; + } + if (iter > maxit) { + goto L200; + } + +/* Find diagonal block of matrix to work on */ + + if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) { + d__[m] = 0.; + } + smax = (d__1 = d__[m], abs(d__1)); + smin = smax; + i__1 = m - 1; + for (lll = 1; lll <= i__1; ++lll) { + ll = m - lll; + abss = (d__1 = d__[ll], abs(d__1)); + abse = (d__1 = e[ll], abs(d__1)); + if (tol < 0. && abss <= thresh) { + d__[ll] = 0.; + } + if (abse <= thresh) { + goto L80; + } + smin = min(smin,abss); +/* Computing MAX */ + d__1 = max(smax,abss); + smax = max(d__1,abse); +/* L70: */ + } + ll = 0; + goto L90; +L80: + e[ll] = 0.; + +/* Matrix splits since E(LL) = 0 */ + + if (ll == m - 1) { + +/* Convergence of bottom singular value, return to top of loop */ + + --m; + goto L60; + } +L90: + ++ll; + +/* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ + + if (ll == m - 1) { + +/* 2 by 2 block, handle separately */ + + dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, + &sinl, &cosl); + d__[m - 1] = sigmx; + e[m - 1] = 0.; + d__[m] = sigmn; + +/* Compute singular vectors, if desired */ + + if (*ncvt > 0) { + drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, & + cosr, &sinr); + } + if (*nru > 0) { + drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], & + c__1, &cosl, &sinl); + } + if (*ncc > 0) { + drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, & + cosl, &sinl); + } + m += -2; + goto L60; + } + +/* + If working on new submatrix, choose shift direction + (from larger end diagonal element towards smaller) +*/ + + if (ll > oldm || m < oldll) { + if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) { + +/* Chase bulge from top (big end) to bottom (small end) */ + + idir = 1; + } else { + +/* Chase bulge from bottom (big end) to top (small end) */ + + idir = 2; + } + } + +/* Apply convergence tests */ + + if (idir == 1) { + +/* + Run convergence test in forward direction + First apply standard test to bottom of matrix +*/ + + if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs( + d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) + { + e[m - 1] = 0.; + goto L60; + } + + if (tol >= 0.) { + +/* + If relative accuracy desired, + apply convergence criterion forward +*/ + + mu = (d__1 = d__[ll], abs(d__1)); + sminl = mu; + i__1 = m - 1; + for (lll = ll; lll <= i__1; ++lll) { + if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { + e[lll] = 0.; + goto L60; + } + mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[ + lll], abs(d__1)))); + sminl = min(sminl,mu); +/* L100: */ + } + } + + } else { + +/* + Run convergence test in backward direction + First apply standard test to top of matrix +*/ + + if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1) + ) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) { + e[ll] = 0.; + goto L60; + } + + if (tol >= 0.) { + +/* + If relative accuracy desired, + apply convergence criterion backward +*/ + + mu = (d__1 = d__[m], abs(d__1)); + sminl = mu; + i__1 = ll; + for (lll = m - 1; lll >= i__1; --lll) { + if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { + e[lll] = 0.; + goto L60; + } + mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll] + , abs(d__1)))); + sminl = min(sminl,mu); +/* L110: */ + } + } + } + oldll = ll; + oldm = m; + +/* + Compute shift. First, test if shifting would ruin relative + accuracy, and if so set the shift to zero. + + Computing MAX +*/ + d__1 = eps, d__2 = tol * .01; + if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) { + +/* Use a zero shift to avoid loss of relative accuracy */ + + shift = 0.; + } else { + +/* Compute the shift from 2-by-2 block at end of matrix */ + + if (idir == 1) { + sll = (d__1 = d__[ll], abs(d__1)); + dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); + } else { + sll = (d__1 = d__[m], abs(d__1)); + dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); + } + +/* Test if shift negligible, and if so set to zero */ + + if (sll > 0.) { +/* Computing 2nd power */ + d__1 = shift / sll; + if (d__1 * d__1 < eps) { + shift = 0.; + } + } + } + +/* Increment iteration count */ + + iter = iter + m - ll; + +/* If SHIFT = 0, do simplified QR iteration */ + + if (shift == 0.) { + if (idir == 1) { + +/* + Chase bulge from top to bottom + Save cosines and sines for later singular vector updates +*/ + + cs = 1.; + oldcs = 1.; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + d__1 = d__[i__] * cs; + dlartg_(&d__1, &e[i__], &cs, &sn, &r__); + if (i__ > ll) { + e[i__ - 1] = oldsn * r__; + } + d__1 = oldcs * r__; + d__2 = d__[i__ + 1] * sn; + dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll + 1] = cs; + work[i__ - ll + 1 + nm1] = sn; + work[i__ - ll + 1 + nm12] = oldcs; + work[i__ - ll + 1 + nm13] = oldsn; +/* L120: */ + } + h__ = d__[m] * cs; + d__[m] = h__ * oldcs; + e[m - 1] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { + e[m - 1] = 0.; + } + + } else { + +/* + Chase bulge from bottom to top + Save cosines and sines for later singular vector updates +*/ + + cs = 1.; + oldcs = 1.; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + d__1 = d__[i__] * cs; + dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__); + if (i__ < m) { + e[i__] = oldsn * r__; + } + d__1 = oldcs * r__; + d__2 = d__[i__ - 1] * sn; + dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll] = cs; + work[i__ - ll + nm1] = -sn; + work[i__ - ll + nm12] = oldcs; + work[i__ - ll + nm13] = -oldsn; +/* L130: */ + } + h__ = d__[ll] * cs; + d__[ll] = h__ * oldcs; + e[ll] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[ll], abs(d__1)) <= thresh) { + e[ll] = 0.; + } + } + } else { + +/* Use nonzero shift */ + + if (idir == 1) { + +/* + Chase bulge from top to bottom + Save cosines and sines for later singular vector updates +*/ + + f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b15, &d__[ + ll]) + shift / d__[ll]); + g = e[ll]; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + dlartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ > ll) { + e[i__ - 1] = r__; + } + f = cosr * d__[i__] + sinr * e[i__]; + e[i__] = cosr * e[i__] - sinr * d__[i__]; + g = sinr * d__[i__ + 1]; + d__[i__ + 1] = cosr * d__[i__ + 1]; + dlartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__] + sinl * d__[i__ + 1]; + d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; + if (i__ < m - 1) { + g = sinl * e[i__ + 1]; + e[i__ + 1] = cosl * e[i__ + 1]; + } + work[i__ - ll + 1] = cosr; + work[i__ - ll + 1 + nm1] = sinr; + work[i__ - ll + 1 + nm12] = cosl; + work[i__ - ll + 1 + nm13] = sinl; +/* L140: */ + } + e[m - 1] = f; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { + e[m - 1] = 0.; + } + + } else { + +/* + Chase bulge from bottom to top + Save cosines and sines for later singular vector updates +*/ + + f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b15, &d__[m] + ) + shift / d__[m]); + g = e[m - 1]; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + dlartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ < m) { + e[i__] = r__; + } + f = cosr * d__[i__] + sinr * e[i__ - 1]; + e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; + g = sinr * d__[i__ - 1]; + d__[i__ - 1] = cosr * d__[i__ - 1]; + dlartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; + d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; + if (i__ > ll + 1) { + g = sinl * e[i__ - 2]; + e[i__ - 2] = cosl * e[i__ - 2]; + } + work[i__ - ll] = cosr; + work[i__ - ll + nm1] = -sinr; + work[i__ - ll + nm12] = cosl; + work[i__ - ll + nm13] = -sinl; +/* L150: */ + } + e[ll] = f; + +/* Test convergence */ + + if ((d__1 = e[ll], abs(d__1)) <= thresh) { + e[ll] = 0.; + } + +/* Update singular vectors if desired */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + } + } + +/* QR iteration finished, go back and check convergence */ + + goto L60; + +/* All singular values converged, so make them positive */ + +L160: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] < 0.) { + d__[i__] = -d__[i__]; + +/* Change sign of singular vectors, if desired */ + + if (*ncvt > 0) { + dscal_(ncvt, &c_b151, &vt[i__ + vt_dim1], ldvt); + } + } +/* L170: */ + } + +/* + Sort the singular values into decreasing order (insertion sort on + singular values, but only one transposition per singular vector) +*/ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I) */ + + isub = 1; + smin = d__[1]; + i__2 = *n + 1 - i__; + for (j = 2; j <= i__2; ++j) { + if (d__[j] <= smin) { + isub = j; + smin = d__[j]; + } +/* L180: */ + } + if (isub != *n + 1 - i__) { + +/* Swap singular values and vectors */ + + d__[isub] = d__[*n + 1 - i__]; + d__[*n + 1 - i__] = smin; + if (*ncvt > 0) { + dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + + vt_dim1], ldvt); + } + if (*nru > 0) { + dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * + u_dim1 + 1], &c__1); + } + if (*ncc > 0) { + dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + + c_dim1], ldc); + } + } +/* L190: */ + } + goto L220; + +/* Maximum number of iterations exceeded, failure to converge */ + +L200: + *info = 0; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L210: */ + } +L220: + return 0; + +/* End of DBDSQR */ + +} /* dbdsqr_ */ + +/* Subroutine */ int dgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *scale, integer *m, doublereal *v, integer * + ldv, integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + static integer i__, k; + static doublereal s; + static integer ii; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + static logical leftv; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical rightv; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DGEBAK forms the right or left eigenvectors of a real general matrix + by backward transformation on the computed eigenvectors of the + balanced matrix output by DGEBAL. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the type of backward transformation required: + = 'N', do nothing, return immediately; + = 'P', do backward transformation for permutation only; + = 'S', do backward transformation for scaling only; + = 'B', do backward transformations for both permutation and + scaling. + JOB must be the same as the argument JOB supplied to DGEBAL. + + SIDE (input) CHARACTER*1 + = 'R': V contains right eigenvectors; + = 'L': V contains left eigenvectors. + + N (input) INTEGER + The number of rows of the matrix V. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + The integers ILO and IHI determined by DGEBAL. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + SCALE (input) DOUBLE PRECISION array, dimension (N) + Details of the permutation and scaling factors, as returned + by DGEBAL. + + M (input) INTEGER + The number of columns of the matrix V. M >= 0. + + V (input/output) DOUBLE PRECISION array, dimension (LDV,M) + On entry, the matrix of right or left eigenvectors to be + transformed, as returned by DHSEIN or DTREVC. + On exit, V is overwritten by the transformed eigenvectors. + + LDV (input) INTEGER + The leading dimension of the array V. LDV >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Decode and Test the input parameters +*/ + + /* Parameter adjustments */ + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEBAK", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1. / scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + + } + +/* + Backward permutation + + For I = ILO-1 step -1 until 1, + IHI+1 step 1 until N do -- +*/ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L40; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + } + + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L50; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L50: + ; + } + } + } + + return 0; + +/* End of DGEBAK */ + +} /* dgebak_ */ + +/* Subroutine */ int dgebal_(char *job, integer *n, doublereal *a, integer * + lda, integer *ilo, integer *ihi, doublereal *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal c__, f, g; + static integer i__, j, k, l, m; + static doublereal r__, s, ca, ra; + static integer ica, ira, iexc; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + static doublereal sfmin1, sfmin2, sfmax1, sfmax2; + + extern integer idamax_(integer *, doublereal *, integer *); + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconv; + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DGEBAL balances a general real matrix A. This involves, first, + permuting A by a similarity transformation to isolate eigenvalues + in the first 1 to ILO-1 and last IHI+1 to N elements on the + diagonal; and second, applying a diagonal similarity transformation + to rows and columns ILO to IHI to make the rows and columns as + close in norm as possible. Both steps are optional. + + Balancing may reduce the 1-norm of the matrix, and improve the + accuracy of the computed eigenvalues and/or eigenvectors. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the operations to be performed on A: + = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 + for i = 1,...,N; + = 'P': permute only; + = 'S': scale only; + = 'B': both permute and scale. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the input matrix A. + On exit, A is overwritten by the balanced matrix. + If JOB = 'N', A is not referenced. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + ILO (output) INTEGER + IHI (output) INTEGER + ILO and IHI are set to integers such that on exit + A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. + If JOB = 'N' or 'S', ILO = 1 and IHI = N. + + SCALE (output) DOUBLE PRECISION array, dimension (N) + Details of the permutations and scaling factors applied to + A. If P(j) is the index of the row and column interchanged + with row and column j and D(j) is the scaling factor + applied to row and column j, then + SCALE(j) = P(j) for j = 1,...,ILO-1 + = D(j) for j = ILO,...,IHI + = P(j) for j = IHI+1,...,N. + The order in which the interchanges are made is N to IHI+1, + then 1 to ILO-1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The permutations consist of row and column interchanges which put + the matrix in the form + + ( T1 X Y ) + P A P = ( 0 B Z ) + ( 0 0 T2 ) + + where T1 and T2 are upper triangular matrices whose eigenvalues lie + along the diagonal. The column indices ILO and IHI mark the starting + and ending columns of the submatrix B. Balancing consists of applying + a diagonal similarity transformation inv(D) * B * D to make the + 1-norms of each row of B and its corresponding column nearly equal. + The output matrix is + + ( T1 X*D Y ) + ( 0 inv(D)*B*D inv(D)*Z ). + ( 0 0 T2 ) + + Information about the permutations P and the diagonal matrix D is + returned in the vector SCALE. + + This subroutine is based on the EISPACK routine BALANC. + + Modified by Tzu-Yi Chen, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEBAL", &i__1); + return 0; + } + + k = 1; + l = *n; + + if (*n == 0) { + goto L210; + } + + if (lsame_(job, "N")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L10: */ + } + goto L210; + } + + if (lsame_(job, "S")) { + goto L120; + } + +/* Permutation to isolate eigenvalues if possible */ + + goto L50; + +/* Row and column exchange. */ + +L20: + scale[m] = (doublereal) j; + if (j == m) { + goto L30; + } + + dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); + +L30: + switch (iexc) { + case 1: goto L40; + case 2: goto L80; + } + +/* Search for rows isolating an eigenvalue and push them down. */ + +L40: + if (l == 1) { + goto L210; + } + --l; + +L50: + for (j = l; j >= 1; --j) { + + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + if (a[j + i__ * a_dim1] != 0.) { + goto L70; + } +L60: + ; + } + + m = l; + iexc = 1; + goto L20; +L70: + ; + } + + goto L90; + +/* Search for columns isolating an eigenvalue and push them left. */ + +L80: + ++k; + +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + if (a[i__ + j * a_dim1] != 0.) { + goto L110; + } +L100: + ; + } + + m = k; + iexc = 2; + goto L20; +L110: + ; + } + +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L130: */ + } + + if (lsame_(job, "P")) { + goto L210; + } + +/* + Balance the submatrix in rows K to L. + + Iterative loop for norm reduction +*/ + + sfmin1 = SAFEMINIMUM / PRECISION; + sfmax1 = 1. / sfmin1; + sfmin2 = sfmin1 * 2.; + sfmax2 = 1. / sfmin2; +L140: + noconv = FALSE_; + + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + c__ = 0.; + r__ = 0.; + + i__2 = l; + for (j = k; j <= i__2; ++j) { + if (j == i__) { + goto L150; + } + c__ += (d__1 = a[j + i__ * a_dim1], abs(d__1)); + r__ += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +L150: + ; + } + ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1)); + i__2 = *n - k + 1; + ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1)); + +/* Guard against zero C or R due to underflow. */ + + if (c__ == 0. || r__ == 0.) { + goto L200; + } + g = r__ / 2.; + f = 1.; + s = c__ + r__; +L160: +/* Computing MAX */ + d__1 = max(f,c__); +/* Computing MIN */ + d__2 = min(r__,g); + if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) { + goto L170; + } + d__1 = c__ + f + ca + r__ + g + ra; + if (disnan_(&d__1)) { + +/* Exit if NaN to avoid infinite loop */ + + *info = -3; + i__2 = -(*info); + xerbla_("DGEBAL", &i__2); + return 0; + } + f *= 2.; + c__ *= 2.; + ca *= 2.; + r__ /= 2.; + g /= 2.; + ra /= 2.; + goto L160; + +L170: + g = c__ / 2.; +L180: +/* Computing MIN */ + d__1 = min(f,c__), d__1 = min(d__1,g); + if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) { + goto L190; + } + f /= 2.; + c__ /= 2.; + g /= 2.; + ca /= 2.; + r__ *= 2.; + ra *= 2.; + goto L180; + +/* Now balance. */ + +L190: + if (c__ + r__ >= s * .95) { + goto L200; + } + if (f < 1. && scale[i__] < 1.) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1. && scale[i__] > 1.) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1. / f; + scale[i__] *= f; + noconv = TRUE_; + + i__2 = *n - k + 1; + dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + +L200: + ; + } + + if (noconv) { + goto L140; + } + +L210: + *ilo = k; + *ihi = l; + + return 0; + +/* End of DGEBAL */ + +} /* dgebal_ */ + +/* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DGEBD2 reduces a real general m by n matrix A to upper or lower + bidiagonal form B by an orthogonal transformation: Q' * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the orthogonal matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the orthogonal matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + WORK (workspace) DOUBLE PRECISION array, dimension (max(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); + u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("DGEBD2", &i__1); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + if (i__ < *n) { + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, & + tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1] + ); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *n) { + +/* + Generate elementary reflector G(i) to annihilate + A(i,i+2:n) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } else { + taup[i__] = 0.; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + if (i__ < *m) { + i__2 = *m - i__; + i__3 = *n - i__ + 1; + dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, & + taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *m) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:m,i) +*/ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } else { + tauq[i__] = 0.; + } +/* L20: */ + } + } + return 0; + +/* End of DGEBD2 */ + +} /* dgebd2_ */ + +/* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, nb, nx; + static doublereal ws; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer nbmin, iinfo, minmn; + extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), dlabrd_(integer *, integer *, integer * + , doublereal *, integer *, doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *, integer *, doublereal *, integer *) + , xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwrkx, ldwrky, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DGEBRD reduces a general real M-by-N matrix A to upper or lower + bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the orthogonal matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the orthogonal matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,M,N). + For optimum performance LWORK >= (M+N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); + u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = max(i__1,i__2); + lwkopt = (*m + *n) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*lwork < max(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("DGEBRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = min(*m,*n); + if (minmn == 0) { + work[1] = 1.; + return 0; + } + + ws = (doublereal) max(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* + Set the crossover point NX. + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (doublereal) ((*m + *n) * nb); + if ((doublereal) (*lwork) < ws) { + +/* + Not enough work space for the optimal NB, consider using + a smaller block size. +*/ + + nbmin = ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* + Reduce rows and columns i:i+nb-1 to bidiagonal form and return + the matrices X and Y which are needed to update the unreduced + part of the matrix +*/ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* + Update the trailing submatrix A(i+nb:m,i+nb:n), using an update + of the form A := A - V*Y' - X*U' +*/ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b151, &a[ + i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], & + ldwrky, &c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b151, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + (j + 1) * a_dim1] = e[j]; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + 1 + j * a_dim1] = e[j]; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1] = ws; + return 0; + +/* End of DGEBRD */ + +} /* dgebrd_ */ + +/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, + integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__, k; + static doublereal r__, cs, sn; + static integer ihi; + static doublereal scl; + static integer ilo; + static doublereal dum[1], eps; + static integer ibal; + static char side[1]; + static doublereal anrm; + static integer ierr, itau; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer iwrk, nout; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern doublereal dlapy2_(doublereal *, doublereal *); + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_( + char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + dgebal_(char *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *); + static logical scalea; + + static doublereal cscale; + extern doublereal dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), xerbla_(char *, integer *); + static logical select[1]; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static doublereal bignum; + extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dhseqr_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *); + static integer minwrk, maxwrk; + static logical wantvl; + static doublereal smlnum; + static integer hswork; + static logical lquery, wantvr; + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DGEEV computes for an N-by-N real nonsymmetric matrix A, the + eigenvalues and, optionally, the left and/or right eigenvectors. + + The right eigenvector v(j) of A satisfies + A * v(j) = lambda(j) * v(j) + where lambda(j) is its eigenvalue. + The left eigenvector u(j) of A satisfies + u(j)**H * A = lambda(j) * u(j)**H + where u(j)**H denotes the conjugate transpose of u(j). + + The computed eigenvectors are normalized to have Euclidean norm + equal to 1 and largest component real. + + Arguments + ========= + + JOBVL (input) CHARACTER*1 + = 'N': left eigenvectors of A are not computed; + = 'V': left eigenvectors of A are computed. + + JOBVR (input) CHARACTER*1 + = 'N': right eigenvectors of A are not computed; + = 'V': right eigenvectors of A are computed. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the N-by-N matrix A. + On exit, A has been overwritten. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + WR (output) DOUBLE PRECISION array, dimension (N) + WI (output) DOUBLE PRECISION array, dimension (N) + WR and WI contain the real and imaginary parts, + respectively, of the computed eigenvalues. Complex + conjugate pairs of eigenvalues appear consecutively + with the eigenvalue having the positive imaginary part + first. + + VL (output) DOUBLE PRECISION array, dimension (LDVL,N) + If JOBVL = 'V', the left eigenvectors u(j) are stored one + after another in the columns of VL, in the same order + as their eigenvalues. + If JOBVL = 'N', VL is not referenced. + If the j-th eigenvalue is real, then u(j) = VL(:,j), + the j-th column of VL. + If the j-th and (j+1)-st eigenvalues form a complex + conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and + u(j+1) = VL(:,j) - i*VL(:,j+1). + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1; if + JOBVL = 'V', LDVL >= N. + + VR (output) DOUBLE PRECISION array, dimension (LDVR,N) + If JOBVR = 'V', the right eigenvectors v(j) are stored one + after another in the columns of VR, in the same order + as their eigenvalues. + If JOBVR = 'N', VR is not referenced. + If the j-th eigenvalue is real, then v(j) = VR(:,j), + the j-th column of VR. + If the j-th and (j+1)-st eigenvalues form a complex + conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and + v(j+1) = VR(:,j) - i*VR(:,j+1). + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1; if + JOBVR = 'V', LDVR >= N. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,3*N), and + if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good + performance, LWORK must generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = i, the QR algorithm failed to compute all the + eigenvalues, and no eigenvectors have been computed; + elements i+1:N of WR and WI contain eigenvalues which + have converged. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -1; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -9; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -11; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV. + HSWORK refers to the workspace preferred by DHSEQR, as + calculated below. HSWORK is computed assuming ILO=1 and IHI=N, + the worst case.) +*/ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, + n, &c__0, (ftnlen)6, (ftnlen)1); + if (wantvl) { + minwrk = *n << 2; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vl[vl_offset], ldvl, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * + n + hswork; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1,i__2); + } else if (wantvr) { + minwrk = *n << 2; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * + n + hswork; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1,i__2); + } else { + minwrk = *n * 3; + dhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * + n + hswork; + maxwrk = max(i__1,i__2); + } + maxwrk = max(maxwrk,minwrk); + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEEV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = PRECISION; + smlnum = SAFEMINIMUM; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* + Balance the matrix + (Workspace: need N) +*/ + + ibal = 1; + dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* + Reduce to upper Hessenberg form + (Workspace: need 3*N, prefer 2*N+N*NB) +*/ + + itau = ibal + *n; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvl) { + +/* + Want left eigenvectors + Copy Householder vectors to VL +*/ + + *(unsigned char *)side = 'L'; + dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* + Generate orthogonal matrix in VL + (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +*/ + + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VL + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vl[vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* + Want left and right eigenvectors + Copy Schur vectors to VR +*/ + + *(unsigned char *)side = 'B'; + dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* + Want right eigenvectors + Copy Householder vectors to VR +*/ + + *(unsigned char *)side = 'R'; + dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* + Generate orthogonal matrix in VR + (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +*/ + + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VR + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* + Compute eigenvalues only + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO > 0 from DHSEQR, then quit */ + + if (*info > 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* + Compute left and/or right eigenvectors + (Workspace: need 4*N) +*/ + + dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr); + } + + if (wantvl) { + +/* + Undo balancing of left eigenvectors + (Workspace: need N) +*/ + + dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, + &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vl[k + i__ * vl_dim1]; +/* Computing 2nd power */ + d__2 = vl[k + (i__ + 1) * vl_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L10: */ + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], + &cs, &sn, &r__); + drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * + vl_dim1 + 1], &c__1, &cs, &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.; + } +/* L20: */ + } + } + + if (wantvr) { + +/* + Undo balancing of right eigenvectors + (Workspace: need N) +*/ + + dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, + &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vr[k + i__ * vr_dim1]; +/* Computing 2nd power */ + d__2 = vr[k + (i__ + 1) * vr_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L30: */ + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], + &cs, &sn, &r__); + drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * + vr_dim1 + 1], &c__1, &cs, &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.; + } +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + + 1], &i__2, &ierr); + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + + 1], &i__2, &ierr); + if (*info > 0) { + i__1 = ilo - 1; + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], + n, &ierr); + i__1 = ilo - 1; + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], + n, &ierr); + } + } + + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGEEV */ + +} /* dgeev_ */ + +/* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + static doublereal aii; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DGEHD2 reduces a real general matrix A to upper Hessenberg form H by + an orthogonal similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to DGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= max(1,N). + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the n by n general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the orthogonal matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) DOUBLE PRECISION array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEHD2", &i__1); + return 0; + } + + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + +/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ + + i__2 = *ihi - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + aii = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ + + i__2 = *ihi - i__; + dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); + +/* Apply H(i) to A(i+1:ihi,i+1:n) from the left */ + + i__2 = *ihi - i__; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); + + a[i__ + 1 + i__ * a_dim1] = aii; +/* L10: */ + } + + return 0; + +/* End of DGEHD2 */ + +} /* dgehd2_ */ + +/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j; + static doublereal t[4160] /* was [65][64] */; + static integer ib; + static doublereal ei; + static integer nb, nh, nx, iws; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer nbmin, iinfo; + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), daxpy_( + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *), dgehd2_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), dlahr2_( + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2.1) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + -- April 2009 -- + + + Purpose + ======= + + DGEHRD reduces a real general matrix A to upper Hessenberg form H by + an orthogonal similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to DGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the N-by-N general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the orthogonal matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) DOUBLE PRECISION array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to + zero. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + This file is a slight modification of LAPACK-3.0's DGEHRD + subroutine incorporating improvements proposed by Quintana-Orti and + Van de Geijn (2006). (See DLAHR2.) + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEHRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.; +/* L10: */ + } + i__1 = *n - 1; + for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { + tau[i__] = 0.; +/* L20: */ + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1] = 1.; + return 0; + } + +/* + Determine the block size + + Computing MIN +*/ + i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + nbmin = 2; + iws = 1; + if (nb > 1 && nb < nh) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code) + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code */ + + iws = *n * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code + + Computing MAX +*/ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + if (*lwork >= *n * nbmin) { + nb = *lwork / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + i__ = *ilo; + + } else { + +/* Use blocked code */ + + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3,i__4); + +/* + Reduce columns i:i+ib-1 to Hessenberg form, returning the + matrices V and T of the block reflector H = I - V*T*V' + which performs the reduction, and also the matrix Y = A*V*T +*/ + + dlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & + c__65, &work[1], &ldwork); + +/* + Apply the block reflector H to A(1:ihi,i+ib:ihi) from the + right, computing A := A - Y * V'. V(i+ib,ib-1) must be set + to 1 +*/ + + ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; + a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.; + i__3 = *ihi - i__ - ib + 1; + dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b151, & + work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, & + c_b15, &a[(i__ + ib) * a_dim1 + 1], lda); + a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; + +/* + Apply the block reflector H to A(1:i,i+1:i+ib-1) from the + right +*/ + + i__3 = ib - 1; + dtrmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b15, + &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork); + i__3 = ib - 2; + for (j = 0; j <= i__3; ++j) { + daxpy_(&i__, &c_b151, &work[ldwork * j + 1], &c__1, &a[(i__ + + j + 1) * a_dim1 + 1], &c__1); +/* L30: */ + } + +/* + Apply the block reflector H to A(i+1:ihi,i+ib:n) from the + left +*/ + + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[ + i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork); +/* L40: */ + } + } + +/* Use unblocked code to reduce the rest of the matrix */ + + dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1] = (doublereal) iws; + + return 0; + +/* End of DGEHRD */ + +} /* dgehrd_ */ + +/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static doublereal aii; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DGELQ2 computes an LQ factorization of a real m by n matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and below the diagonal of the array + contain the m by min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) DOUBLE PRECISION array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k) . . . H(2) H(1), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELQ2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1] + , lda, &tau[i__]); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGELQ2 */ + +} /* dgelq2_ */ + +/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DGELQF computes an LQ factorization of a real M-by-N matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and below the diagonal of the array + contain the m-by-min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k) . . . H(2) H(1), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *m * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the LQ factorization of the current block + A(i:i+ib-1,i:n) +*/ + + i__3 = *n - i__ + 1; + dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *n - i__ + 1; + dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGELQF */ + +} /* dgelqf_ */ + +/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer ie, il, mm; + static doublereal eps, anrm, bnrm; + static integer itau, nlvl, iascl, ibscl; + static doublereal sfmin; + static integer minmn, maxmn, itaup, itauq, mnthr, nwork; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebrd_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *); + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlalsd_(char *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), dlascl_(char *, + integer *, integer *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *), dgeqrf_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static doublereal bignum; + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + static integer wlalsd; + extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + static integer ldwork; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + static integer liwork, minwrk, maxwrk; + static doublereal smlnum; + static logical lquery; + static integer smlsiz; + + +/* + -- LAPACK driver routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DGELSD computes the minimum-norm solution to a real linear least + squares problem: + minimize 2-norm(| b - A*x |) + using the singular value decomposition (SVD) of A. A is an M-by-N + matrix which may be rank-deficient. + + Several right hand side vectors b and solution vectors x can be + handled in a single call; they are stored as the columns of the + M-by-NRHS right hand side matrix B and the N-by-NRHS solution + matrix X. + + The problem is solved in three steps: + (1) Reduce the coefficient matrix A to bidiagonal form with + Householder transformations, reducing the original problem + into a "bidiagonal least squares problem" (BLS) + (2) Solve the BLS using a divide and conquer approach. + (3) Apply back all the Householder tranformations to solve + the original least squares problem. + + The effective rank of A is determined by treating as zero those + singular values which are less than RCOND times the largest singular + value. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + M (input) INTEGER + The number of rows of A. M >= 0. + + N (input) INTEGER + The number of columns of A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrices B and X. NRHS >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, A has been destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) + On entry, the M-by-NRHS right hand side matrix B. + On exit, B is overwritten by the N-by-NRHS solution + matrix X. If m >= n and RANK = n, the residual + sum-of-squares for the solution in the i-th column is given + by the sum of squares of elements n+1:m in that column. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,max(M,N)). + + S (output) DOUBLE PRECISION array, dimension (min(M,N)) + The singular values of A in decreasing order. + The condition number of A in the 2-norm = S(1)/S(min(m,n)). + + RCOND (input) DOUBLE PRECISION + RCOND is used to determine the effective rank of A. + Singular values S(i) <= RCOND*S(1) are treated as zero. + If RCOND < 0, machine precision is used instead. + + RANK (output) INTEGER + The effective rank of A, i.e., the number of singular values + which are greater than RCOND*S(1). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK must be at least 1. + The exact minimum amount of workspace needed depends on M, + N and NRHS. As long as LWORK is at least + 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, + if M is greater than or equal to N or + 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, + if M is less than N, the code will execute correctly. + SMLSIZ is returned by ILAENV and is equal to the maximum + size of the subproblems at the bottom of the computation + tree (usually about 25), and + NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) + For good performance, LWORK should generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) + LIWORK >= max(1, 3 * MINMN * NLVL + 11 * MINMN), + where MINMN = MIN( M,N ). + On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: the algorithm for computing the SVD failed to converge; + if INFO = i, i off-diagonal elements of an intermediate + bidiagonal form did not converge to zero. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input arguments. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + maxmn = max(*m,*n); + mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, ( + ftnlen)1); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,maxmn)) { + *info = -7; + } + + smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + Compute workspace. + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV.) +*/ + + minwrk = 1; + liwork = 1; + minmn = max(1,minmn); +/* Computing MAX */ + i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) / + log(2.)) + 1; + nlvl = max(i__1,0); + + if (*info == 0) { + maxwrk = 0; + liwork = minmn * 3 * nlvl + minmn * 11; + mm = *m; + if (*m >= *n && *m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT", + m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); + } + if (*m >= *n) { + +/* + Path 1 - overdetermined or exactly determined. + + Computing MAX +*/ + i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD" + , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR", + "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR", + "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * * + nrhs + i__1 * i__1; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + wlalsd; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2), + i__2 = *n * 3 + wlalsd; + minwrk = max(i__1,i__2); + } + if (*n > *m) { +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * * + nrhs + i__1 * i__1; + if (*n >= mnthr) { + +/* + Path 2a - underdetermined, with many more columns + than rows. +*/ + + maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, + &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * + ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(& + c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * + ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1, ( + ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ", + "LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd; + maxwrk = max(i__1,i__2); +/* + XXX: Ensure the Path 2a case below is triggered. The workspace + calculation should use queries for all routines eventually. + Computing MAX + Computing MAX +*/ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3,i__4); + maxwrk = max(i__1,i__2); + } else { + +/* Path 2 - remaining underdetermined cases. */ + + maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "DORMBR" + , "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR", + "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + wlalsd; + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2), + i__2 = *m * 3 + wlalsd; + minwrk = max(i__1,i__2); + } + minwrk = min(minwrk,maxwrk); + work[1] = (doublereal) maxwrk; + iwork[1] = liwork; + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELSD", &i__1); + return 0; + } else if (lquery) { + goto L10; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters. */ + + eps = PRECISION; + sfmin = SAFEMINIMUM; + smlnum = sfmin / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[b_offset], ldb); + dlaset_("F", &minmn, &c__1, &c_b29, &c_b29, &s[1], &c__1); + *rank = 0; + goto L10; + } + +/* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ + + bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* If M < N make sure certain entries of B are zero. */ + + if (*m < *n) { + i__1 = *n - *m; + dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[*m + 1 + b_dim1], ldb); + } + +/* Overdetermined case. */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R. + (Workspace: need 2*N, prefer N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + +/* + Multiply B by transpose(Q). + (Workspace: need N+NRHS, prefer N+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below R. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &a[a_dim1 + 2], + lda); + } + } + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A. + (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of R. + (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, + rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of R. */ + + i__1 = *lwork - nwork + 1; + dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & + b[b_offset], ldb, &work[nwork], &i__1, info); + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( + i__1,*nrhs), i__2 = *n - *m * 3, i__1 = max(i__1,i__2); + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,wlalsd)) { + +/* + Path 2a - underdetermined, with many more columns than rows + and sufficient workspace for an efficient algorithm. +*/ + + ldwork = *m; +/* + Computing MAX + Computing MAX +*/ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + + *m + *m * *nrhs, i__1 = max(i__1,i__2), i__2 = (*m << 2) + + *m * *lda + wlalsd; + if (*lwork >= max(i__1,i__2)) { + ldwork = *lda; + } + itau = 1; + nwork = *m + 1; + +/* + Compute A=L*Q. + (Workspace: need 2*M, prefer M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + il = nwork; + +/* Copy L to WORK(IL), zeroing out above its diagonal. */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &work[il + ldwork], & + ldwork); + ie = il + ldwork * *m; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL). + (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of L. + (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of L. */ + + i__1 = *lwork - nwork + 1; + dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ + itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below first M rows of B. */ + + i__1 = *n - *m; + dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[*m + 1 + b_dim1], + ldb); + nwork = itau + *m; + +/* + Multiply transpose(Q) by B. + (Workspace: need M+NRHS, prefer M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + + } else { + +/* Path 2 - remaining underdetermined cases. */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A. + (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors. + (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of A. */ + + i__1 = *lwork - nwork + 1; + dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + + } + } + +/* Undo scaling. */ + + if (iascl == 1) { + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } else if (iascl == 2) { + dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (ibscl == 1) { + dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L10: + work[1] = (doublereal) maxwrk; + iwork[1] = liwork; + return 0; + +/* End of DGELSD */ + +} /* dgelsd_ */ + +/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static doublereal aii; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DGEQR2 computes a QR factorization of a real m by n matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(m,n) by n upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQR2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] + , &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGEQR2 */ + +} /* dgeqr2_ */ + +/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DGEQRF computes a QR factorization of a real M-by-N matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(M,N)-by-N upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of min(m,n) elementary reflectors (see Further + Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the QR factorization of the current block + A(i:m,i:i+ib-1) +*/ + + i__3 = *m - i__ + 1; + dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *m - i__ + 1; + dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQRF */ + +} /* dgeqrf_ */ + +/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *s, doublereal *u, integer *ldu, + doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Local variables */ + static integer i__, ie, il, ir, iu, blk; + static doublereal dum[1], eps; + static integer ivt, iscl; + static doublereal anrm; + static integer idum[1], ierr, itau; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + static integer chunk, minmn, wrkbl, itaup, itauq, mnthr; + static logical wntqa; + static integer nwork; + static logical wntqn, wntqo, wntqs; + extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal + *, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), dgebrd_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + static integer bdspac; + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), dlacpy_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *), dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *), dorgbr_(char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static doublereal bignum; + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + static integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; + static doublereal smlnum; + static logical wntqas, lquery; + + +/* + -- LAPACK driver routine (version 3.2.1) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + March 2009 + + + Purpose + ======= + + DGESDD computes the singular value decomposition (SVD) of a real + M-by-N matrix A, optionally computing the left and right singular + vectors. If singular vectors are desired, it uses a + divide-and-conquer algorithm. + + The SVD is written + + A = U * SIGMA * transpose(V) + + where SIGMA is an M-by-N matrix which is zero except for its + min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and + V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA + are the singular values of A; they are real and non-negative, and + are returned in descending order. The first min(m,n) columns of + U and V are the left and right singular vectors of A. + + Note that the routine returns VT = V**T, not V. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + Specifies options for computing all or part of the matrix U: + = 'A': all M columns of U and all N rows of V**T are + returned in the arrays U and VT; + = 'S': the first min(M,N) columns of U and the first + min(M,N) rows of V**T are returned in the arrays U + and VT; + = 'O': If M >= N, the first N columns of U are overwritten + on the array A and all rows of V**T are returned in + the array VT; + otherwise, all columns of U are returned in the + array U and the first M rows of V**T are overwritten + in the array A; + = 'N': no columns of U or rows of V**T are computed. + + M (input) INTEGER + The number of rows of the input matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the input matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, + if JOBZ = 'O', A is overwritten with the first N columns + of U (the left singular vectors, stored + columnwise) if M >= N; + A is overwritten with the first M rows + of V**T (the right singular vectors, stored + rowwise) otherwise. + if JOBZ .ne. 'O', the contents of A are destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + S (output) DOUBLE PRECISION array, dimension (min(M,N)) + The singular values of A, sorted so that S(i) >= S(i+1). + + U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) + UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; + UCOL = min(M,N) if JOBZ = 'S'. + If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M + orthogonal matrix U; + if JOBZ = 'S', U contains the first min(M,N) columns of U + (the left singular vectors, stored columnwise); + if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1; if + JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. + + VT (output) DOUBLE PRECISION array, dimension (LDVT,N) + If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the + N-by-N orthogonal matrix V**T; + if JOBZ = 'S', VT contains the first min(M,N) rows of + V**T (the right singular vectors, stored rowwise); + if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1; if + JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; + if JOBZ = 'S', LDVT >= min(M,N). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK; + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + If JOBZ = 'N', + LWORK >= 3*min(M,N) + max(max(M,N),7*min(M,N)). + If JOBZ = 'O', + LWORK >= 3*min(M,N) + + max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)). + If JOBZ = 'S' or 'A' + LWORK >= 3*min(M,N) + + max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)). + For good performance, LWORK should generally be larger. + If LWORK = -1 but other input arguments are legal, WORK(1) + returns the optimal LWORK. + + IWORK (workspace) INTEGER array, dimension (8*min(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: DBDSDC did not converge, updating process failed. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + wntqa = lsame_(jobz, "A"); + wntqs = lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, "O"); + wntqn = lsame_(jobz, "N"); + lquery = *lwork == -1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV.) +*/ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (*m >= *n && minmn > 0) { + +/* Compute space needed for DBDSDC */ + + mnthr = (integer) (minmn * 11. / 6.); + if (wntqn) { + bdspac = *n * 7; + } else { + bdspac = *n * 3 * *n + (*n << 2); + } + if (*m >= mnthr) { + if (wntqn) { + +/* Path 1 (M much larger than N, JOBZ='N') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n; + maxwrk = max(i__1,i__2); + minwrk = bdspac + *n; + } else if (wntqo) { + +/* Path 2 (M much larger than N, JOBZ='O') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + (*n << 1) * *n; + minwrk = bdspac + (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M much larger than N, JOBZ='S') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M much larger than N, JOBZ='A') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "DORGQR", + " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } + } else { + +/* Path 5 (M at least N, but not much larger) */ + + wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wntqn) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } else if (wntqo) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *m, i__2 = *n * *n + bdspac; + minwrk = *n * 3 + max(i__1,i__2); + } else if (wntqs) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } else if (wntqa) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } + } + } else if (minmn > 0) { + +/* Compute space needed for DBDSDC */ + + mnthr = (integer) (minmn * 11. / 6.); + if (wntqn) { + bdspac = *m * 7; + } else { + bdspac = *m * 3 * *m + (*m << 2); + } + if (*n >= mnthr) { + if (wntqn) { + +/* Path 1t (N much larger than M, JOBZ='N') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m; + maxwrk = max(i__1,i__2); + minwrk = bdspac + *m; + } else if (wntqo) { + +/* Path 2t (N much larger than M, JOBZ='O') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + (*m << 1) * *m; + minwrk = bdspac + (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N much larger than M, JOBZ='S') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N much larger than M, JOBZ='A') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "DORGLQ", + " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } + } else { + +/* Path 5t (N greater than M, but not much larger) */ + + wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wntqn) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } else if (wntqo) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *n, i__2 = *m * *m + bdspac; + minwrk = *m * 3 + max(i__1,i__2); + } else if (wntqs) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } else if (wntqa) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } + } + } + maxwrk = max(maxwrk,minwrk); + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGESDD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = PRECISION; + smlnum = sqrt(SAFEMINIMUM) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* + A has at least as many rows as columns. If A has sufficiently + more rows than columns, first reduce using the QR + decomposition (if sufficient workspace available) +*/ + + if (*m >= mnthr) { + + if (wntqn) { + +/* + Path 1 (M much larger than N, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need 2*N, prefer N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (Workspace: need 4*N, prefer 3*N+2*N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *n; + +/* + Perform bidiagonal SVD, computing singular values only + (Workspace: need N+BDSPAC) +*/ + + dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2 (M much larger than N, JOBZ = 'O') + N left singular vectors to be overwritten on A and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is LDWRKR by N */ + + if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { + ldwrkr = *lda; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in VT, copying result to WORK(IR) + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* WORK(IU) is N by N */ + + iu = nwork; + nwork = iu + *n * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite WORK(IU) by left singular vectors of R + and VT by right singular vectors of R + (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], n, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IU), storing result in WORK(IR) and copying to A + (Workspace: need 2*N*N, prefer N*N+M*N) +*/ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + dgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ + a_dim1], + lda, &work[iu], n, &c_b29, &work[ir], &ldwrkr); + dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* + Path 3 (M much larger than N, JOBZ='S') + N left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_("L", &i__2, &i__1, &c_b29, &c_b29, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagoal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of R and VT + by right singular vectors of R + (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IR), storing result in U + (Workspace: need N*N) +*/ + + dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + dgemm_("N", "N", m, n, n, &c_b15, &a[a_offset], lda, &work[ir] + , &ldwrkr, &c_b29, &u[u_offset], ldu); + + } else if (wntqa) { + +/* + Path 4 (M much larger than N, JOBZ='A') + M left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* + Compute A=Q*R, copying result to U + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Generate Q in U + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + i__2 = *lwork - nwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out other entries */ + + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_("L", &i__2, &i__1, &c_b29, &c_b29, &a[a_dim1 + 2], + lda); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite WORK(IU) by left singular vectors of R and VT + by right singular vectors of R + (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in U by left singular vectors of R in + WORK(IU), storing result in A + (Workspace: need N*N) +*/ + + dgemm_("N", "N", m, n, n, &c_b15, &u[u_offset], ldu, &work[iu] + , &ldwrku, &c_b29, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else { + +/* + M .LT. MNTHR + + Path 5 (M at least N, but not much larger) + Reduce to bidiagonal form without QR decomposition +*/ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Perform bidiagonal SVD, only computing singular values + (Workspace: need N+BDSPAC) +*/ + + dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + nwork = iu + ldwrku * *n; + dlaset_("F", m, n, &c_b29, &c_b29, &work[iu], &ldwrku); + } else { + +/* WORK( IU ) is N by N */ + + ldwrku = *n; + nwork = iu + ldwrku * *n; + +/* WORK(IR) is LDWRKR by N */ + + ir = nwork; + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, & + vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[ + 1], info); + +/* + Overwrite VT by right singular vectors of A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* + Overwrite WORK(IU) by left singular vectors of A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* Copy left singular vectors of A from WORK(IU) to A */ + + dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by left singular vectors of + bidiagonal matrix in WORK(IU), storing result in + WORK(IR) and copying to A + (Workspace: need 2*N*N, prefer N*N+M*N) +*/ + + i__2 = *m; + i__1 = ldwrkr; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + dgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ + + a_dim1], lda, &work[iu], &ldwrku, &c_b29, & + work[ir], &ldwrkr); + dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L20: */ + } + } + + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + dlaset_("F", m, n, &c_b29, &c_b29, &u[u_offset], ldu); + dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 3*N, prefer 2*N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + dlaset_("F", m, m, &c_b29, &c_b29, &u[u_offset], ldu); + dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + if (*m > *n) { + i__1 = *m - *n; + i__2 = *m - *n; + dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &u[*n + 1 + (* + n + 1) * u_dim1], ldu); + } + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } else { + +/* + A has more columns than rows. If A has sufficiently more + columns than rows, first reduce using the LQ decomposition (if + sufficient workspace available) +*/ + + if (*n >= mnthr) { + + if (wntqn) { + +/* + Path 1t (N much larger than M, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need 2*M, prefer M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out above L */ + + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (Workspace: need 4*M, prefer 3*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *m; + +/* + Perform bidiagonal SVD, computing singular values only + (Workspace: need M+BDSPAC) +*/ + + dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2t (N much larger than M, JOBZ='O') + M right singular vectors to be overwritten on A and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* IVT is M by M */ + + il = ivt + *m * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { + +/* WORK(IL) is M by N */ + + ldwrkl = *m; + chunk = *n; + } else { + ldwrkl = *m; + chunk = (*lwork - *m * *m) / *m; + } + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U, and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M+M*M+BDSPAC) +*/ + + dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], m, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of L and WORK(IVT) + by right singular vectors of L + (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], m, &work[nwork], &i__1, &ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by Q + in A, storing result in WORK(IL) and copying to A + (Workspace: need 2*M*M, prefer M*M+M*N) +*/ + + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + dgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], m, &a[ + i__ * a_dim1 + 1], lda, &c_b29, &work[il], & + ldwrkl); + dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L30: */ + } + + } else if (wntqs) { + +/* + Path 3t (N much larger than M, JOBZ='S') + M right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_("U", &i__2, &i__1, &c_b29, &c_b29, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IU), copying result to U + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of L and VT + by right singular vectors of L + (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IL) by + Q in A, storing result in VT + (Workspace: need M*M) +*/ + + dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + dgemm_("N", "N", m, n, m, &c_b15, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b29, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* + Path 4t (N much larger than M, JOBZ='A') + N right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* + Compute A=L*Q, copying result to VT + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Generate Q in VT + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__2, &ierr); + +/* Produce L in A, zeroing out other entries */ + + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_("U", &i__2, &i__1, &c_b29, &c_b29, &a[(a_dim1 << 1) + + 1], lda); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M+M*M+BDSPAC) +*/ + + dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* + Overwrite U by left singular vectors of L and WORK(IVT) + by right singular vectors of L + (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by + Q in VT, storing result in A + (Workspace: need M*M) +*/ + + dgemm_("N", "N", m, n, m, &c_b15, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b29, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else { + +/* + N .LT. MNTHR + + Path 5t (N greater than M, but not much larger) + Reduce to bidiagonal form without LQ decomposition +*/ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Perform bidiagonal SVD, only computing singular values + (Workspace: need M+BDSPAC) +*/ + + dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* WORK( IVT ) is M by N */ + + dlaset_("F", m, n, &c_b29, &c_b29, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; + } else { + +/* WORK( IVT ) is M by M */ + + nwork = ivt + ldwkvt * *m; + il = nwork; + +/* WORK(IL) is M by CHUNK */ + + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M*M+BDSPAC) +*/ + + dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* + Overwrite U by left singular vectors of A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* + Overwrite WORK(IVT) by left singular vectors of A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + +/* Copy right singular vectors of A from WORK(IVT) to A */ + + dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* + Generate P**T in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by right singular vectors of + bidiagonal matrix in WORK(IVT), storing result in + WORK(IL) and copying to A + (Workspace: need 2*M*M, prefer M*M+M*N) +*/ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + dgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], & + ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b29, & + work[il], m); + dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + } + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + dlaset_("F", m, n, &c_b29, &c_b29, &vt[vt_offset], ldvt); + dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 3*M, prefer 2*M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + dlaset_("F", n, n, &c_b29, &c_b29, &vt[vt_offset], ldvt); + dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of VT to identity matrix */ + + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &vt[*m + 1 + (* + m + 1) * vt_dim1], ldvt); + } + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 2*M+N, prefer 2*M+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (anrm < smlnum) { + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (doublereal) maxwrk; + + return 0; + +/* End of DGESDD */ + +} /* dgesdd_ */ + +/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer + *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *), dgetrs_(char *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DGESV computes the solution to a real system of linear equations + A * X = B, + where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + + The LU decomposition with partial pivoting and row interchanges is + used to factor A as + A = P * L * U, + where P is a permutation matrix, L is unit lower triangular, and U is + upper triangular. The factored form of A is then used to solve the + system of equations A * X = B. + + Arguments + ========= + + N (input) INTEGER + The number of linear equations, i.e., the order of the + matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the N-by-N coefficient matrix A. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (output) INTEGER array, dimension (N) + The pivot indices that define the permutation matrix P; + row i of the matrix was interchanged with row IPIV(i). + + B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) + On entry, the N-by-NRHS matrix of right hand side matrix B. + On exit, if INFO = 0, the N-by-NRHS solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, so the solution could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGESV ", &i__1); + return 0; + } + +/* Compute the LU factorization of A. */ + + dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of DGESV */ + +} /* dgesv_ */ + +/* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + static integer i__, j, jp; + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), dscal_(integer *, doublereal *, doublereal *, integer + *); + static doublereal sfmin; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DGETF2 computes an LU factorization of a general m-by-n matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 2 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the m by n matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, U(k,k) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Compute machine safe minimum */ + + sfmin = SAFEMINIMUM; + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + if (a[jp + j * a_dim1] != 0.) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) { + i__2 = *m - j; + d__1 = 1. / a[j + j * a_dim1]; + dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } else { + i__2 = *m - j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[j + i__ + j * a_dim1] /= a[j + j * a_dim1]; +/* L20: */ + } + } + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < min(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + dger_(&i__2, &i__3, &c_b151, &a[j + 1 + j * a_dim1], &c__1, &a[j + + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], + lda); + } +/* L10: */ + } + return 0; + +/* End of DGETF2 */ + +} /* dgetf2_ */ + +/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + static integer i__, j, jb, nb; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer iinfo; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), dgetf2_( + integer *, integer *, doublereal *, integer *, integer *, integer + *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DGETRF computes an LU factorization of a general M-by-N matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 3 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* + Factor diagonal and subdiagonal blocks and test for exact + singularity. +*/ + + i__3 = *m - j + 1; + dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b15, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &c_b151, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b15, &a[j + jb + (j + jb) + * a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of DGETRF */ + +} /* dgetrf_ */ + +/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs, + doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * + ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), xerbla_( + char *, integer *), dlaswp_(integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DGETRS solves a system of linear equations + A * X = B or A' * X = B + with a general N-by-N matrix A using the LU factorization computed + by DGETRF. + + Arguments + ========= + + TRANS (input) CHARACTER*1 + Specifies the form of the system of equations: + = 'N': A * X = B (No transpose) + = 'T': A'* X = B (Transpose) + = 'C': A'* X = B (Conjugate transpose = Transpose) + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The factors L and U from the factorization A = P*L*U + as computed by DGETRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (input) INTEGER array, dimension (N) + The pivot indices from DGETRF; for 1<=i<=N, row i of the + matrix was interchanged with row IPIV(i). + + B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* + Solve A * X = B. + + Apply row interchanges to the right hand sides. +*/ + + dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A' * X = B. + + Solve U'*X = B, overwriting B with X. +*/ + + dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of DGETRS */ + +} /* dgetrs_ */ + +/* Subroutine */ int dhseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, + doublereal *wi, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3; + doublereal d__1; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static doublereal hl[2401] /* was [49][49] */; + static integer kbot, nmin; + extern logical lsame_(char *, char *); + static logical initz; + static doublereal workl[49]; + static logical wantt, wantz; + extern /* Subroutine */ int dlaqr0_(logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), dlahqr_(logical *, logical *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical lquery; + + +/* + -- LAPACK computational routine (version 3.2.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + June 2010 + + Purpose + ======= + + DHSEQR computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**T, where T is an upper quasi-triangular matrix (the + Schur form), and Z is the orthogonal matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input orthogonal + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + Arguments + ========= + + JOB (input) CHARACTER*1 + = 'E': compute eigenvalues only; + = 'S': compute eigenvalues and the Schur form T. + + COMPZ (input) CHARACTER*1 + = 'N': no Schur vectors are computed; + = 'I': Z is initialized to the unit matrix and the matrix Z + of Schur vectors of H is returned; + = 'V': Z must contain an orthogonal matrix Q on entry, and + the product Q*Z is returned. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to DGEBAL, and then passed to DGEHRD + when the matrix output by DGEBAL is reduced to Hessenberg + form. Otherwise ILO and IHI should be set to 1 and N + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) DOUBLE PRECISION array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and JOB = 'S', then H contains the + upper quasi-triangular matrix T from the Schur decomposition + (the Schur form); 2-by-2 diagonal blocks (corresponding to + complex conjugate pairs of eigenvalues) are returned in + standard form, with H(i,i) = H(i+1,i+1) and + H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the + contents of H are unspecified on exit. (The output value of + H when INFO.GT.0 is given under the description of INFO + below.) + + Unlike earlier versions of DHSEQR, this subroutine may + explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 + or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + WR (output) DOUBLE PRECISION array, dimension (N) + WI (output) DOUBLE PRECISION array, dimension (N) + The real and imaginary parts, respectively, of the computed + eigenvalues. If two eigenvalues are computed as a complex + conjugate pair, they are stored in consecutive elements of + WR and WI, say the i-th and (i+1)th, with WI(i) .GT. 0 and + WI(i+1) .LT. 0. If JOB = 'S', the eigenvalues are stored in + the same order as on the diagonal of the Schur form returned + in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 + diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and + WI(i+1) = -WI(i). + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) + If COMPZ = 'N', Z is not referenced. + If COMPZ = 'I', on entry Z need not be set and on exit, + if INFO = 0, Z contains the orthogonal matrix Z of the Schur + vectors of H. If COMPZ = 'V', on entry Z must contain an + N-by-N matrix Q, which is assumed to be equal to the unit + matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, + if INFO = 0, Z contains Q*Z. + Normally Q is the orthogonal matrix generated by DORGHR + after the call to DGEHRD which formed the Hessenberg matrix + H. (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if COMPZ = 'I' or + COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient and delivers very good and sometimes + optimal performance. However, LWORK as large as 11*N + may be required for optimal performance. A workspace + query is recommended to determine the optimal workspace + size. + + If LWORK = -1, then DHSEQR does a workspace query. + In this case, DHSEQR checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .LT. 0: if INFO = -i, the i-th argument had an illegal + value + .GT. 0: if INFO = i, DHSEQR failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and JOB = 'E', then on exit, the + remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and JOB = 'S', then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is an orthogonal matrix. The final + value of H is upper Hessenberg and quasi-triangular + in rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and COMPZ = 'V', then on exit + + (final value of Z) = (initial value of Z)*U + + where U is the orthogonal matrix in (*) (regard- + less of the value of JOB.) + + If INFO .GT. 0 and COMPZ = 'I', then on exit + (final value of Z) = U + where U is the orthogonal matrix in (*) (regard- + less of the value of JOB.) + + If INFO .GT. 0 and COMPZ = 'N', then Z is not + accessed. + + ================================================================ + Default values supplied by + ILAENV(ISPEC,'DHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). + It is suggested that these defaults be adjusted in order + to attain best performance in each particular + computational environment. + + ISPEC=12: The DLAHQR vs DLAQR0 crossover point. + Default: 75. (Must be at least 11.) + + ISPEC=13: Recommended deflation window size. + This depends on ILO, IHI and NS. NS is the + number of simultaneous shifts returned + by ILAENV(ISPEC=15). (See ISPEC=15 below.) + The default for (IHI-ILO+1).LE.500 is NS. + The default for (IHI-ILO+1).GT.500 is 3*NS/2. + + ISPEC=14: Nibble crossover point. (See IPARMQ for + details.) Default: 14% of deflation window + size. + + ISPEC=15: Number of simultaneous shifts in a multishift + QR iteration. + + If IHI-ILO+1 is ... + + greater than ...but less ... the + or equal to ... than default is + + 1 30 NS = 2(+) + 30 60 NS = 4(+) + 60 150 NS = 10(+) + 150 590 NS = ** + 590 3000 NS = 64 + 3000 6000 NS = 128 + 6000 infinity NS = 256 + + (+) By default some or all matrices of this order + are passed to the implicit double shift routine + DLAHQR and this parameter is ignored. See + ISPEC=12 above and comments in IPARMQ for + details. + + (**) The asterisks (**) indicate an ad-hoc + function of N increasing from 10 to 64. + + ISPEC=16: Select structured matrix multiply. + If the number of simultaneous shifts (specified + by ISPEC=15) is less than 14, then the default + for ISPEC=16 is 0. Otherwise the default for + ISPEC=16 is 2. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . DLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== NL allocates some local workspace to help small matrices + . through a rare DLAHQR failure. NL .GT. NTINY = 11 is + . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom- + . mended. (The default value of NMIN is 75.) Using NL = 49 + . allows up to six simultaneous shifts and a 16-by-16 + . deflation window. ==== + + ==== Decode and check the input parameters. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantt = lsame_(job, "S"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + work[1] = (doublereal) max(1,*n); + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(job, "E") && ! wantt) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1,*n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { + *info = -11; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -13; + } + + if (*info != 0) { + +/* ==== Quick return in case of invalid argument. ==== */ + + i__1 = -(*info); + xerbla_("DHSEQR", &i__1); + return 0; + + } else if (*n == 0) { + +/* ==== Quick return in case N = 0; nothing to do. ==== */ + + return 0; + + } else if (lquery) { + +/* ==== Quick return in case of a workspace query ==== */ + + dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[ + 1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info); +/* + ==== Ensure reported workspace size is backward-compatible with + . previous LAPACK versions. ==== + Computing MAX +*/ + d__1 = (doublereal) max(1,*n); + work[1] = max(d__1,work[1]); + return 0; + + } else { + +/* ==== copy eigenvalues isolated by DGEBAL ==== */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; +/* L10: */ + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; +/* L20: */ + } + +/* ==== Initialize Z, if requested ==== */ + + if (initz) { + dlaset_("A", n, n, &c_b29, &c_b15, &z__[z_offset], ldz) + ; + } + +/* ==== Quick return if possible ==== */ + + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + +/* + ==== DLAHQR/DLAQR0 crossover point ==== + + Writing concatenation +*/ + i__2[0] = 1, a__1[0] = job; + i__2[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nmin = ilaenv_(&c__12, "DHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nmin = max(11,nmin); + +/* ==== DLAQR0 for big matrices; DLAHQR for small ones ==== */ + + if (*n > nmin) { + dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], + &wi[1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, + info); + } else { + +/* ==== Small matrix ==== */ + + dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], + &wi[1], ilo, ihi, &z__[z_offset], ldz, info); + + if (*info > 0) { + +/* + ==== A rare DLAHQR failure! DLAQR0 sometimes succeeds + . when DLAHQR fails. ==== +*/ + + kbot = *info; + + if (*n >= 49) { + +/* + ==== Larger matrices have enough subdiagonal scratch + . space to call DLAQR0 directly. ==== +*/ + + dlaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], + ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset], + ldz, &work[1], lwork, info); + + } else { + +/* + ==== Tiny matrices don't have enough subdiagonal + . scratch space to benefit from DLAQR0. Hence, + . tiny matrices must be copied into a larger + . array before calling DLAQR0. ==== +*/ + + dlacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49); + hl[*n + 1 + *n * 49 - 50] = 0.; + i__1 = 49 - *n; + dlaset_("A", &c__49, &i__1, &c_b29, &c_b29, &hl[(*n + 1) * + 49 - 49], &c__49); + dlaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, & + wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz, + workl, &c__49, info); + if (wantt || *info != 0) { + dlacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh); + } + } + } + } + +/* ==== Clear out the trash, if necessary. ==== */ + + if ((wantt || *info != 0) && *n > 2) { + i__1 = *n - 2; + i__3 = *n - 2; + dlaset_("L", &i__1, &i__3, &c_b29, &c_b29, &h__[h_dim1 + 3], ldh); + } + +/* + ==== Ensure reported workspace size is backward-compatible with + . previous LAPACK versions. ==== + + Computing MAX +*/ + d__1 = (doublereal) max(1,*n); + work[1] = max(d__1,work[1]); + } + +/* ==== End of DHSEQR ==== */ + + return 0; +} /* dhseqr_ */ + +logical disnan_(doublereal *din) +{ + /* System generated locals */ + logical ret_val; + + /* Local variables */ + extern logical dlaisnan_(doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DISNAN returns .TRUE. if its argument is NaN, and .FALSE. + otherwise. To be replaced by the Fortran 2003 intrinsic in the + future. + + Arguments + ========= + + DIN (input) DOUBLE PRECISION + Input to test for NaN. + + ===================================================================== +*/ + + ret_val = dlaisnan_(din, din); + return ret_val; +} /* disnan_ */ + +/* Subroutine */ int dlabad_(doublereal *small, doublereal *large) +{ + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLABAD takes as input the values computed by DLAMCH for underflow and + overflow, and returns the square root of each of these values if the + log of LARGE is sufficiently large. This subroutine is intended to + identify machines with a large exponent range, such as the Crays, and + redefine the underflow and overflow limits to be the square roots of + the values computed by DLAMCH. This subroutine is needed because + DLAMCH does not compensate for poor arithmetic in the upper half of + the exponent range, as is found on a Cray. + + Arguments + ========= + + SMALL (input/output) DOUBLE PRECISION + On entry, the underflow threshold as computed by DLAMCH. + On exit, if LOG10(LARGE) is sufficiently large, the square + root of SMALL, otherwise unchanged. + + LARGE (input/output) DOUBLE PRECISION + On entry, the overflow threshold as computed by DLAMCH. + On exit, if LOG10(LARGE) is sufficiently large, the square + root of LARGE, otherwise unchanged. + + ===================================================================== + + + If it looks like we're on a Cray, take the square root of + SMALL and LARGE to avoid overflow and underflow problems. +*/ + + if (d_lg10(large) > 2e3) { + *small = sqrt(*small); + *large = sqrt(*large); + } + + return 0; + +/* End of DLABAD */ + +} /* dlabad_ */ + +/* Subroutine */ int dlabrd_(integer *m, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq, + doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer + *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dgemv_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLABRD reduces the first NB rows and columns of a real general + m by n matrix A to upper or lower bidiagonal form by an orthogonal + transformation Q' * A * P, and returns the matrices X and Y which + are needed to apply the transformation to the unreduced part of A. + + If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + bidiagonal form. + + This is an auxiliary routine called by DGEBRD + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. + + N (input) INTEGER + The number of columns in the matrix A. + + NB (input) INTEGER + The number of leading rows and columns of A to be reduced. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, the first NB rows and columns of the matrix are + overwritten; the rest of the array is unchanged. + If m >= n, elements on and below the diagonal in the first NB + columns, with the array TAUQ, represent the orthogonal + matrix Q as a product of elementary reflectors; and + elements above the diagonal in the first NB rows, with the + array TAUP, represent the orthogonal matrix P as a product + of elementary reflectors. + If m < n, elements below the diagonal in the first NB + columns, with the array TAUQ, represent the orthogonal + matrix Q as a product of elementary reflectors, and + elements on and above the diagonal in the first NB rows, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (NB) + The diagonal elements of the first NB rows and columns of + the reduced matrix. D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (NB) + The off-diagonal elements of the first NB rows and columns of + the reduced matrix. + + TAUQ (output) DOUBLE PRECISION array dimension (NB) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) DOUBLE PRECISION array, dimension (NB) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + X (output) DOUBLE PRECISION array, dimension (LDX,NB) + The m-by-nb matrix X required to update the unreduced part + of A. + + LDX (input) INTEGER + The leading dimension of the array X. LDX >= M. + + Y (output) DOUBLE PRECISION array, dimension (LDY,NB) + The n-by-nb matrix Y required to update the unreduced part + of A. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= N. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors. + + If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in + A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The elements of the vectors v and u together form the m-by-nb matrix + V and the nb-by-n matrix U' which are needed, with X and Y, to apply + the transformation to the unreduced part of the matrix, using a block + update of the form: A := A - V*Y' - X*U'. + + The contents of A on exit are illustrated by the following examples + with nb = 2: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) + ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) + ( v1 v2 a a a ) ( v1 1 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix which is unchanged, + vi denotes an element of the vector defining H(i), and ui an element + of the vector defining G(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:m,i) */ + + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1], + lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + x_dim1], + ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[i__ + i__ * + a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + (i__ + 1) * + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + a_dim1], + lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &x[i__ + x_dim1], + ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + +/* Update A(i,i+1:n) */ + + i__2 = *n - i__; + dgemv_("No transpose", &i__2, &i__, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b15, &a[ + i__ + (i__ + 1) * a_dim1], lda); + +/* Generate reflection P(i) to annihilate A(i,i+2:n) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + ( + i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], + lda, &c_b29, &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + dgemv_("Transpose", &i__2, &i__, &c_b15, &y[i__ + 1 + y_dim1], + ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b29, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + dgemv_("No transpose", &i__2, &i__, &c_b151, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b29, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i,i:n) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + y_dim1], + ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1] + , lda); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[i__ * a_dim1 + 1], + lda, &x[i__ + x_dim1], ldx, &c_b15, &a[i__ + i__ * a_dim1] + , lda); + +/* Generate reflection P(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; + dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + i__ + * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, & + x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &y[i__ + y_dim1], + ldy, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[i__ * + x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ * a_dim1 + + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + +/* Update A(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = *m - i__; + dgemv_("No transpose", &i__2, &i__, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + (i__ + + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, + &c_b29, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[ + i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + dgemv_("Transpose", &i__2, &i__, &c_b15, &x[i__ + 1 + x_dim1], + ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[ + i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + dgemv_("Transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + } +/* L20: */ + } + } + return 0; + +/* End of DLABRD */ + +} /* dlabrd_ */ + +/* Subroutine */ int dlacpy_(char *uplo, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLACPY copies all or part of a two-dimensional matrix A to another + matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper triangle + or trapezoid is accessed; if UPLO = 'L', only the lower + triangle or trapezoid is accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) DOUBLE PRECISION array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L50: */ + } +/* L60: */ + } + } + return 0; + +/* End of DLACPY */ + +} /* dlacpy_ */ + +/* Subroutine */ int dladiv_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *p, doublereal *q) +{ + static doublereal e, f; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLADIV performs complex division in real arithmetic + + a + i*b + p + i*q = --------- + c + i*d + + The algorithm is due to Robert L. Smith and can be found + in D. Knuth, The art of Computer Programming, Vol.2, p.195 + + Arguments + ========= + + A (input) DOUBLE PRECISION + B (input) DOUBLE PRECISION + C (input) DOUBLE PRECISION + D (input) DOUBLE PRECISION + The scalars a, b, c, and d in the above expression. + + P (output) DOUBLE PRECISION + Q (output) DOUBLE PRECISION + The scalars p and q in the above expression. + + ===================================================================== +*/ + + + if (abs(*d__) < abs(*c__)) { + e = *d__ / *c__; + f = *c__ + *d__ * e; + *p = (*a + *b * e) / f; + *q = (*b - *a * e) / f; + } else { + e = *c__ / *d__; + f = *d__ + *c__ * e; + *p = (*b + *a * e) / f; + *q = (-(*a) + *b * e) / f; + } + + return 0; + +/* End of DLADIV */ + +} /* dladiv_ */ + +/* Subroutine */ int dlae2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2) +{ + /* System generated locals */ + doublereal d__1; + + /* Local variables */ + static doublereal ab, df, tb, sm, rt, adf, acmn, acmx; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix + [ A B ] + [ B C ]. + On return, RT1 is the eigenvalue of larger absolute value, and RT2 + is the eigenvalue of smaller absolute value. + + Arguments + ========= + + A (input) DOUBLE PRECISION + The (1,1) element of the 2-by-2 matrix. + + B (input) DOUBLE PRECISION + The (1,2) and (2,1) elements of the 2-by-2 matrix. + + C (input) DOUBLE PRECISION + The (2,2) element of the 2-by-2 matrix. + + RT1 (output) DOUBLE PRECISION + The eigenvalue of larger absolute value. + + RT2 (output) DOUBLE PRECISION + The eigenvalue of smaller absolute value. + + Further Details + =============== + + RT1 is accurate to a few ulps barring over/underflow. + + RT2 may be inaccurate if there is massive cancellation in the + determinant A*C-B*B; higher precision or correctly rounded or + correctly truncated arithmetic would be needed to compute RT2 + accurately in all cases. + + Overflow is possible only if RT1 is within a factor of 5 of overflow. + Underflow is harmless if the input data is 0 or exceeds + underflow_threshold / macheps. + + ===================================================================== + + + Compute the eigenvalues +*/ + + sm = *a + *c__; + df = *a - *c__; + adf = abs(df); + tb = *b + *b; + ab = abs(tb); + if (abs(*a) > abs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + d__1 = ab / adf; + rt = adf * sqrt(d__1 * d__1 + 1.); + } else if (adf < ab) { +/* Computing 2nd power */ + d__1 = adf / ab; + rt = ab * sqrt(d__1 * d__1 + 1.); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.); + } + if (sm < 0.) { + *rt1 = (sm - rt) * .5; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.) { + *rt1 = (sm + rt) * .5; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5; + *rt2 = rt * -.5; + } + return 0; + +/* End of DLAE2 */ + +} /* dlae2_ */ + +/* Subroutine */ int dlaed0_(integer *icompq, integer *qsiz, integer *n, + doublereal *d__, doublereal *e, doublereal *q, integer *ldq, + doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2; + static doublereal temp; + static integer curr; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer iperm; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer indxq, iwrem; + extern /* Subroutine */ int dlaed1_(integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *); + static integer iqptr; + extern /* Subroutine */ int dlaed7_(integer *, integer *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, doublereal *, integer *, integer *); + static integer tlvls; + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *); + static integer igivcl; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer igivnm, submat, curprb, subpbs, igivpt; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + static integer curlvl, matsiz, iprmpt, smlsiz; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAED0 computes all eigenvalues and corresponding eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + = 2: Compute eigenvalues and eigenvectors of tridiagonal + matrix. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the main diagonal of the tridiagonal matrix. + On exit, its eigenvalues. + + E (input) DOUBLE PRECISION array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) + On entry, Q must contain an N-by-N orthogonal matrix. + If ICOMPQ = 0 Q is not referenced. + If ICOMPQ = 1 On entry, Q is a subset of the columns of the + orthogonal matrix used to reduce the full + matrix to tridiagonal form corresponding to + the subset of the full matrix which is being + decomposed at this time. + If ICOMPQ = 2 On entry, Q will be the identity matrix. + On exit, Q contains the eigenvectors of the + tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. If eigenvectors are + desired, then LDQ >= max(1,N). In any case, LDQ >= 1. + + QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N) + Referenced only when ICOMPQ = 1. Used to store parts of + the eigenvector matrix when the updating matrix multiplies + take place. + + LDQS (input) INTEGER + The leading dimension of the array QSTORE. If ICOMPQ = 1, + then LDQS >= max(1,N). In any case, LDQS >= 1. + + WORK (workspace) DOUBLE PRECISION array, + If ICOMPQ = 0 or 1, the dimension of WORK must be at least + 1 + 3*N + 2*N*lg N + 2*N**2 + ( lg( N ) = smallest integer k + such that 2^k >= N ) + If ICOMPQ = 2, the dimension of WORK must be at least + 4*N + N**2. + + IWORK (workspace) INTEGER array, + If ICOMPQ = 0 or 1, the dimension of IWORK must be at least + 6 + 6*N + 5*N*lg N. + ( lg( N ) = smallest integer k + such that 2^k >= N ) + If ICOMPQ = 2, the dimension of IWORK must be at least + 3 + 5*N. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + qstore_dim1 = *ldqs; + qstore_offset = 1 + qstore_dim1; + qstore -= qstore_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 2) { + *info = -1; + } else if (*icompq == 1 && *qsiz < max(0,*n)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*ldqs < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED0", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + smlsiz = ilaenv_(&c__9, "DLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + Determine the size and placement of the submatrices, and save in + the leading elements of IWORK. +*/ + + iwork[1] = *n; + subpbs = 1; + tlvls = 0; +L10: + if (iwork[subpbs] > smlsiz) { + for (j = subpbs; j >= 1; --j) { + iwork[j * 2] = (iwork[j] + 1) / 2; + iwork[(j << 1) - 1] = iwork[j] / 2; +/* L20: */ + } + ++tlvls; + subpbs <<= 1; + goto L10; + } + i__1 = subpbs; + for (j = 2; j <= i__1; ++j) { + iwork[j] += iwork[j - 1]; +/* L30: */ + } + +/* + Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 + using rank-1 modifications (cuts). +*/ + + spm1 = subpbs - 1; + i__1 = spm1; + for (i__ = 1; i__ <= i__1; ++i__) { + submat = iwork[i__] + 1; + smm1 = submat - 1; + d__[smm1] -= (d__1 = e[smm1], abs(d__1)); + d__[submat] -= (d__1 = e[smm1], abs(d__1)); +/* L40: */ + } + + indxq = (*n << 2) + 3; + if (*icompq != 2) { + +/* + Set up workspaces for eigenvalues only/accumulate new vectors + routine +*/ + + temp = log((doublereal) (*n)) / log(2.); + lgn = (integer) temp; + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; + iperm = iprmpt + *n * lgn; + iqptr = iperm + *n * lgn; + igivpt = iqptr + *n + 2; + igivcl = igivpt + *n * lgn; + + igivnm = 1; + iq = igivnm + (*n << 1) * lgn; +/* Computing 2nd power */ + i__1 = *n; + iwrem = iq + i__1 * i__1 + 1; + +/* Initialize pointers */ + + i__1 = subpbs; + for (i__ = 0; i__ <= i__1; ++i__) { + iwork[iprmpt + i__] = 1; + iwork[igivpt + i__] = 1; +/* L50: */ + } + iwork[iqptr] = 1; + } + +/* + Solve each submatrix eigenproblem at the bottom of the divide and + conquer tree. +*/ + + curr = 0; + i__1 = spm1; + for (i__ = 0; i__ <= i__1; ++i__) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[1]; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 1] - iwork[i__]; + } + if (*icompq == 2) { + dsteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat + + submat * q_dim1], ldq, &work[1], info); + if (*info != 0) { + goto L130; + } + } else { + dsteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + + iwork[iqptr + curr]], &matsiz, &work[1], info); + if (*info != 0) { + goto L130; + } + if (*icompq == 1) { + dgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b15, &q[submat * + q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]], + &matsiz, &c_b29, &qstore[submat * qstore_dim1 + 1], + ldqs); + } +/* Computing 2nd power */ + i__2 = matsiz; + iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; + ++curr; + } + k = 1; + i__2 = iwork[i__ + 1]; + for (j = submat; j <= i__2; ++j) { + iwork[indxq + j] = k; + ++k; +/* L60: */ + } +/* L70: */ + } + +/* + Successively merge eigensystems of adjacent submatrices + into eigensystem for the corresponding larger matrix. + + while ( SUBPBS > 1 ) +*/ + + curlvl = 1; +L80: + if (subpbs > 1) { + spm2 = subpbs - 2; + i__1 = spm2; + for (i__ = 0; i__ <= i__1; i__ += 2) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[2]; + msd2 = iwork[1]; + curprb = 0; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 2] - iwork[i__]; + msd2 = matsiz / 2; + ++curprb; + } + +/* + Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) + into an eigensystem of size MATSIZ. + DLAED1 is used only for the full eigensystem of a tridiagonal + matrix. + DLAED7 handles the cases in which eigenvalues only or eigenvalues + and eigenvectors of a full symmetric matrix (which was reduced to + tridiagonal form) are desired. +*/ + + if (*icompq == 2) { + dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], + ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], & + msd2, &work[1], &iwork[subpbs + 1], info); + } else { + dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[ + submat], &qstore[submat * qstore_dim1 + 1], ldqs, & + iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, & + work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm] + , &iwork[igivpt], &iwork[igivcl], &work[igivnm], & + work[iwrem], &iwork[subpbs + 1], info); + } + if (*info != 0) { + goto L130; + } + iwork[i__ / 2 + 1] = iwork[i__ + 2]; +/* L90: */ + } + subpbs /= 2; + ++curlvl; + goto L80; + } + +/* + end while + + Re-merge the eigenvalues/vectors which were deflated at the final + merge step. +*/ + + if (*icompq == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + + 1], &c__1); +/* L100: */ + } + dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + } else if (*icompq == 2) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1); +/* L110: */ + } + dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + dlacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; +/* L120: */ + } + dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + } + goto L140; + +L130: + *info = submat * (*n + 1) + submat + matsiz - 1; + +L140: + return 0; + +/* End of DLAED0 */ + +} /* dlaed0_ */ + +/* Subroutine */ int dlaed1_(integer *n, doublereal *d__, doublereal *q, + integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Local variables */ + static integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer indxp; + extern /* Subroutine */ int dlaed2_(integer *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *, integer *, integer *, integer *), dlaed3_(integer *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + doublereal *, doublereal *, integer *); + static integer idlmda; + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAED1 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles + the case in which eigenvalues only or eigenvalues and eigenvectors + of a full symmetric matrix (which was reduced to tridiagonal form) + are desired. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine DLAED2. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine DLAED4 (as called by DLAED3). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input/output) INTEGER array, dimension (N) + On entry, the permutation which separately sorts the two + subproblems in D into ascending order. + On exit, the permutation which will reintegrate the + subproblems back into sorted order, + i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. + + RHO (input) DOUBLE PRECISION + The subdiagonal entry used to create the rank-1 modification. + + CUTPNT (input) INTEGER + The location of the last eigenvalue in the leading sub-matrix. + min(1,N) <= CUTPNT <= N/2. + + WORK (workspace) DOUBLE PRECISION array, dimension (4*N + N**2) + + IWORK (workspace) INTEGER array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } else if (*ldq < max(1,*n)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) { + *info = -7; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED1", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are integer pointers which indicate + the portion of the workspace + used by a particular array in DLAED2 and DLAED3. +*/ + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1); + zpp1 = *cutpnt + 1; + i__1 = *n - *cutpnt; + dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1); + +/* Deflate eigenvalues. */ + + dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[ + iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[ + indxc], &iwork[indxp], &iwork[coltyp], info); + + if (*info != 0) { + goto L20; + } + +/* Solve Secular Equation. */ + + if (k != 0) { + is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + + 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2; + dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], + &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[ + is], info); + if (*info != 0) { + goto L20; + } + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L10: */ + } + } + +L20: + return 0; + +/* End of DLAED1 */ + +} /* dlaed1_ */ + +/* Subroutine */ int dlaed2_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, + doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2, + integer *indx, integer *indxc, integer *indxp, integer *coltyp, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + static doublereal c__; + static integer i__, j; + static doublereal s, t; + static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1; + static doublereal eps, tau, tol; + static integer psm[4], imax, jmax; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer ctot[4]; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dcopy_(integer *, doublereal *, integer *, doublereal + *, integer *); + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAED2 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny entry in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + K (output) INTEGER + The number of non-deflated eigenvalues, and the order of the + related secular equation. 0 <= K <=N. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + N1 (input) INTEGER + The location of the last eigenvalue in the leading sub-matrix. + min(1,N) <= N1 <= N/2. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, D contains the eigenvalues of the two submatrices to + be combined. + On exit, D contains the trailing (N-K) updated eigenvalues + (those which were deflated) sorted into increasing order. + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) + On entry, Q contains the eigenvectors of two submatrices in + the two square blocks with corners at (1,1), (N1,N1) + and (N1+1, N1+1), (N,N). + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input/output) INTEGER array, dimension (N) + The permutation which separately sorts the two sub-problems + in D into ascending order. Note that elements in the second + half of this permutation must first have N1 added to their + values. Destroyed on exit. + + RHO (input/output) DOUBLE PRECISION + On entry, the off-diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. + On exit, RHO has been modified to the value required by + DLAED3. + + Z (input) DOUBLE PRECISION array, dimension (N) + On entry, Z contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). + On exit, the contents of Z have been destroyed by the updating + process. + + DLAMDA (output) DOUBLE PRECISION array, dimension (N) + A copy of the first K eigenvalues which will be used by + DLAED3 to form the secular equation. + + W (output) DOUBLE PRECISION array, dimension (N) + The first k values of the final deflation-altered z-vector + which will be passed to DLAED3. + + Q2 (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) + A copy of the first K eigenvectors which will be used by + DLAED3 in a matrix multiply (DGEMM) to solve for the new + eigenvectors. + + INDX (workspace) INTEGER array, dimension (N) + The permutation used to sort the contents of DLAMDA into + ascending order. + + INDXC (output) INTEGER array, dimension (N) + The permutation used to arrange the columns of the deflated + Q matrix into three groups: the first group contains non-zero + elements only at and above N1, the second contains + non-zero elements only below N1, and the third is dense. + + INDXP (workspace) INTEGER array, dimension (N) + The permutation used to place deflated values of D at the end + of the array. INDXP(1:K) points to the nondeflated D-values + and INDXP(K+1:N) points to the deflated eigenvalues. + + COLTYP (workspace/output) INTEGER array, dimension (N) + During execution, a label which will indicate which of the + following types a column in the Q2 matrix is: + 1 : non-zero in the upper half only; + 2 : dense; + 3 : non-zero in the lower half only; + 4 : deflated. + On exit, COLTYP(i) is the number of columns of type i, + for i=1 to 4 only. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + --w; + --q2; + --indx; + --indxc; + --indxp; + --coltyp; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *n1 || *n / 2 < *n1) { + *info = -3; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n2 = *n - *n1; + n1p1 = *n1 + 1; + + if (*rho < 0.) { + dscal_(&n2, &c_b151, &z__[n1p1], &c__1); + } + +/* + Normalize z so that norm(z) = 1. Since z is the concatenation of + two normalized vectors, norm2(z) = sqrt(2). +*/ + + t = 1. / sqrt(2.); + dscal_(n, &t, &z__[1], &c__1); + +/* RHO = ABS( norm(z)**2 * RHO ) */ + + *rho = (d__1 = *rho * 2., abs(d__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + indxq[i__] += *n1; +/* L10: */ + } + +/* re-integrate the deflated parts from the last pass */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; +/* L20: */ + } + dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indx[i__] = indxq[indxc[i__]]; +/* L30: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = idamax_(n, &z__[1], &c__1); + jmax = idamax_(n, &d__[1], &c__1); + eps = EPSILON; +/* Computing MAX */ + d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2)) + ; + tol = eps * 8. * max(d__3,d__4); + +/* + If the rank-1 modifier is small enough, no more needs to be done + except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { + *k = 0; + iq2 = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__ = indx[j]; + dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + dlamda[j] = d__[i__]; + iq2 += *n; +/* L40: */ + } + dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq); + dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1); + goto L190; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + i__1 = *n1; + for (i__ = 1; i__ <= i__1; ++i__) { + coltyp[i__] = 1; +/* L50: */ + } + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + coltyp[i__] = 3; +/* L60: */ + } + + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + nj = indx[j]; + if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + if (j == *n) { + goto L100; + } + } else { + pj = nj; + goto L80; + } +/* L70: */ + } +L80: + ++j; + nj = indx[j]; + if (j > *n) { + goto L100; + } + if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[pj]; + c__ = z__[nj]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = dlapy2_(&c__, &s); + t = d__[nj] - d__[pj]; + c__ /= tau; + s = -s / tau; + if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + z__[nj] = tau; + z__[pj] = 0.; + if (coltyp[nj] != coltyp[pj]) { + coltyp[nj] = 2; + } + coltyp[pj] = 4; + drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, & + c__, &s); +/* Computing 2nd power */ + d__1 = c__; +/* Computing 2nd power */ + d__2 = s; + t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); +/* Computing 2nd power */ + d__1 = s; +/* Computing 2nd power */ + d__2 = c__; + d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); + d__[pj] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[pj] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = pj; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = pj; + } + } else { + indxp[k2 + i__ - 1] = pj; + } + pj = nj; + } else { + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + pj = nj; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + +/* + Count up the total number of the various types of columns, then + form a permutation which positions the four column types into + four uniform groups (although one or more of these groups may be + empty). +*/ + + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; +/* L110: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; +/* L120: */ + } + +/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ + + psm[0] = 1; + psm[1] = ctot[0] + 1; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + *k = *n - ctot[3]; + +/* + Fill out the INDXC array so that the permutation which it induces + will place all type-1 columns first, all type-2 columns next, + then all type-3's, and finally all type-4's. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + js = indxp[j]; + ct = coltyp[js]; + indx[psm[ct - 1]] = js; + indxc[psm[ct - 1]] = j; + ++psm[ct - 1]; +/* L130: */ + } + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + i__ = 1; + iq1 = 1; + iq2 = (ctot[0] + ctot[1]) * *n1 + 1; + i__1 = ctot[0]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; +/* L140: */ + } + + i__1 = ctot[1]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; + iq2 += n2; +/* L150: */ + } + + i__1 = ctot[2]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq2 += n2; +/* L160: */ + } + + iq1 = iq2; + i__1 = ctot[3]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + iq2 += *n; + z__[i__] = d__[js]; + ++i__; +/* L170: */ + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq); + i__1 = *n - *k; + dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1); + +/* Copy CTOT into COLTYP for referencing in DLAED3. */ + + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; +/* L180: */ + } + +L190: + return 0; + +/* End of DLAED2 */ + +} /* dlaed2_ */ + +/* Subroutine */ int dlaed3_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, + doublereal *q2, integer *indx, integer *ctot, doublereal *w, + doublereal *s, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer i__, j, n2, n12, ii, n23, iq2; + static doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer + *), dlaed4_(integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAED3 finds the roots of the secular equation, as defined by the + values in D, W, and RHO, between 1 and K. It makes the + appropriate calls to DLAED4 and then updates the eigenvectors by + multiplying the matrix of eigenvectors of the pair of eigensystems + being combined by the matrix of eigenvectors of the K-by-K system + which is solved here. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + K (input) INTEGER + The number of terms in the rational function to be solved by + DLAED4. K >= 0. + + N (input) INTEGER + The number of rows and columns in the Q matrix. + N >= K (deflation may result in N>K). + + N1 (input) INTEGER + The location of the last eigenvalue in the leading submatrix. + min(1,N) <= N1 <= N/2. + + D (output) DOUBLE PRECISION array, dimension (N) + D(I) contains the updated eigenvalues for + 1 <= I <= K. + + Q (output) DOUBLE PRECISION array, dimension (LDQ,N) + Initially the first K columns are used as workspace. + On output the columns 1 to K contain + the updated eigenvectors. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + RHO (input) DOUBLE PRECISION + The value of the parameter in the rank one update equation. + RHO >= 0 required. + + DLAMDA (input/output) DOUBLE PRECISION array, dimension (K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. May be changed on output by + having lowest order bit set to zero on Cray X-MP, Cray Y-MP, + Cray-2, or Cray C-90, as described above. + + Q2 (input) DOUBLE PRECISION array, dimension (LDQ2, N) + The first K columns of this matrix contain the non-deflated + eigenvectors for the split problem. + + INDX (input) INTEGER array, dimension (N) + The permutation used to arrange the columns of the deflated + Q matrix into three groups (see DLAED2). + The rows of the eigenvectors found by DLAED4 must be likewise + permuted before the matrix multiply can take place. + + CTOT (input) INTEGER array, dimension (4) + A count of the total number of the various types of columns + in Q, as described in INDX. The fourth column type is any + column which has been deflated. + + W (input/output) DOUBLE PRECISION array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating vector. Destroyed on + output. + + S (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K + Will contain the eigenvectors of the repaired matrix which + will be multiplied by the previously accumulated eigenvectors + to update the system. + + LDS (input) INTEGER + The leading dimension of S. LDS >= max(1,K). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --q2; + --indx; + --ctot; + --w; + --s; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*n < *k) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED3", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* + Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), + which on any of these machines zeros out the bottommost + bit of DLAMDA(I) if it is 1; this makes the subsequent + subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DLAMDA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DLAMDA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1) { + goto L110; + } + if (*k == 2) { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + w[1] = q[j * q_dim1 + 1]; + w[2] = q[j * q_dim1 + 2]; + ii = indx[1]; + q[j * q_dim1 + 1] = w[ii]; + ii = indx[2]; + q[j * q_dim1 + 2] = w[ii]; +/* L30: */ + } + goto L110; + } + +/* Compute updated W. */ + + dcopy_(k, &w[1], &c__1, &s[1], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L40: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } +/* L60: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = sqrt(-w[i__]); + w[i__] = d_sign(&d__1, &s[i__]); +/* L70: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__] = w[i__] / q[i__ + j * q_dim1]; +/* L80: */ + } + temp = dnrm2_(k, &s[1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + ii = indx[i__]; + q[i__ + j * q_dim1] = s[ii] / temp; +/* L90: */ + } +/* L100: */ + } + +/* Compute the updated eigenvectors. */ + +L110: + + n2 = *n - *n1; + n12 = ctot[1] + ctot[2]; + n23 = ctot[2] + ctot[3]; + + dlacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23); + iq2 = *n1 * n12 + 1; + if (n23 != 0) { + dgemm_("N", "N", &n2, k, &n23, &c_b15, &q2[iq2], &n2, &s[1], &n23, & + c_b29, &q[*n1 + 1 + q_dim1], ldq); + } else { + dlaset_("A", &n2, k, &c_b29, &c_b29, &q[*n1 + 1 + q_dim1], ldq); + } + + dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); + if (n12 != 0) { + dgemm_("N", "N", n1, k, &n12, &c_b15, &q2[1], n1, &s[1], &n12, &c_b29, + &q[q_offset], ldq); + } else { + dlaset_("A", n1, k, &c_b29, &c_b29, &q[q_dim1 + 1], ldq); + } + + +L120: + return 0; + +/* End of DLAED3 */ + +} /* dlaed3_ */ + +/* Subroutine */ int dlaed4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam, + integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + static doublereal a, b, c__; + static integer j; + static doublereal w; + static integer ii; + static doublereal dw, zz[3]; + static integer ip1; + static doublereal del, eta, phi, eps, tau, psi; + static integer iim1, iip1; + static doublereal dphi, dpsi; + static integer iter; + static doublereal temp, prew, temp1, dltlb, dltub, midpt; + static integer niter; + static logical swtch; + extern /* Subroutine */ int dlaed5_(integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), dlaed6_(integer *, + logical *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + static logical swtch3; + + static logical orgati; + static doublereal erretm, rhoinv; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + This subroutine computes the I-th updated eigenvalue of a symmetric + rank-one modification to a diagonal matrix whose elements are + given in the array d, and that + + D(i) < D(j) for i < j + + and that RHO > 0. This is arranged by the calling routine, and is + no loss in generality. The rank-one modified system is thus + + diag( D ) + RHO * Z * Z_transpose. + + where we assume the Euclidean norm of Z is 1. + + The method consists of approximating the rational functions in the + secular equation by simpler interpolating rational functions. + + Arguments + ========= + + N (input) INTEGER + The length of all arrays. + + I (input) INTEGER + The index of the eigenvalue to be computed. 1 <= I <= N. + + D (input) DOUBLE PRECISION array, dimension (N) + The original eigenvalues. It is assumed that they are in + order, D(I) < D(J) for I < J. + + Z (input) DOUBLE PRECISION array, dimension (N) + The components of the updating vector. + + DELTA (output) DOUBLE PRECISION array, dimension (N) + If N .GT. 2, DELTA contains (D(j) - lambda_I) in its j-th + component. If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5 + for detail. The vector DELTA contains the information necessary + to construct the eigenvectors by DLAED3 and DLAED9. + + RHO (input) DOUBLE PRECISION + The scalar in the symmetric updating formula. + + DLAM (output) DOUBLE PRECISION + The computed lambda_I, the I-th updated eigenvalue. + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, the updating process failed. + + Internal Parameters + =================== + + Logical variable ORGATI (origin-at-i?) is used for distinguishing + whether D(i) or D(i+1) is treated as the origin. + + ORGATI = .true. origin at i + ORGATI = .false. origin at i+1 + + Logical variable SWTCH3 (switch-for-3-poles?) is for noting + if we are working with THREE poles! + + MAXIT is the maximum number of iterations allowed for each + eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Since this routine is called in an inner loop, we do no argument + checking. + + Quick return for N=1 and 2. +*/ + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *dlam = d__[1] + *rho * z__[1] * z__[1]; + delta[1] = 1.; + return 0; + } + if (*n == 2) { + dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam); + return 0; + } + +/* Compute machine epsilon */ + + eps = EPSILON; + rhoinv = 1. / *rho; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + midpt = *rho / 2.; + +/* + If ||Z||_2 is not one, then TEMP should be set to + RHO * ||Z||_2^2 / TWO +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L10: */ + } + + psi = 0.; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[* + n]; + + if (w <= 0.) { + temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) + + z__[*n] * z__[*n] / *rho; + if (c__ <= temp) { + tau = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n] + ; + b = z__[*n] * z__[*n] * del; + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + } + +/* + It can be proved that + D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO +*/ + + dltlb = midpt; + dltub = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * del; + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + +/* + It can be proved that + D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 +*/ + + dltlb = 0.; + dltub = midpt; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * ( + dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (c__ < 0.) { + c__ = abs(c__); + } + if (c__ == 0.) { +/* + ETA = B/A + ETA = RHO - TAU +*/ + eta = dltub - tau; + } else if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L50: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * + (dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L70: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + *dlam = d__[*i__] + tau; + goto L250; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + del = d__[ip1] - d__[*i__]; + midpt = del / 2.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L100: */ + } + + psi = 0.; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L110: */ + } + + phi = 0.; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / delta[j]; +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / + delta[ip1]; + + if (w > 0.) { + +/* + d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 + + We choose d(i) as origin. +*/ + + orgati = TRUE_; + a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * del; + if (a > 0.) { + tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } else { + tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } + dltlb = 0.; + dltub = midpt; + } else { + +/* + (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) + + We choose d(i+1) as origin. +*/ + + orgati = FALSE_; + a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * del; + if (a < 0.) { + tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( + d__1)))); + } else { + tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / + (c__ * 2.); + } + dltlb = -midpt; + dltub = 0.; + } + + if (orgati) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[ip1] - tau; +/* L140: */ + } + } + if (orgati) { + ii = *i__; + } else { + ii = *i__ + 1; + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* + W is the value of the secular function with + its ii-th element removed. +*/ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.) { + swtch3 = TRUE_; + } + } else { + if (w > 0.) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * + d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * + d__1); + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * + dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.) { + if (a == 0.) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * + (dpsi + dphi); + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[ + iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[ + iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info); + if (*info != 0) { + goto L250; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + + prew = w; + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L180: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L190: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L200: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + ( + d__1 = tau + eta, abs(d__1)) * dw; + + swtch = FALSE_; + if (orgati) { + if (-w > abs(prew) / 10.) { + swtch = TRUE_; + } + } else { + if (w > abs(prew) / 10.) { + swtch = TRUE_; + } + } + + tau += eta; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + if (! swtch3) { + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * ( + d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * + (d__1 * d__1); + } + } else { + temp = z__[ii] / delta[ii]; + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi; + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] + * dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.) { + if (a == 0.) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * + delta[ip1] * (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[ + *i__] * (dpsi + dphi); + } + } else { + a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] + * delta[ip1] * dphi; + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) + / (c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, + abs(d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi; + zz[0] = delta[iim1] * delta[iim1] * dpsi; + zz[2] = delta[iip1] * delta[iip1] * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] + - d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] + - d__[iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - + temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + } + dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, + info); + if (*info != 0) { + goto L250; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L210: */ + } + + tau += eta; + prew = w; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L220: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L230: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + if (w * prew > 0. && abs(w) > abs(prew) / 10.) { + swtch = ! swtch; + } + +/* L240: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + + } + +L250: + + return 0; + +/* End of DLAED4 */ + +} /* dlaed4_ */ + +/* Subroutine */ int dlaed5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dlam) +{ + /* System generated locals */ + doublereal d__1; + + /* Local variables */ + static doublereal b, c__, w, del, tau, temp; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + This subroutine computes the I-th eigenvalue of a symmetric rank-one + modification of a 2-by-2 diagonal matrix + + diag( D ) + RHO * Z * transpose(Z) . + + The diagonal elements in the array D are assumed to satisfy + + D(i) < D(j) for i < j . + + We also assume RHO > 0 and that the Euclidean norm of the vector + Z is one. + + Arguments + ========= + + I (input) INTEGER + The index of the eigenvalue to be computed. I = 1 or I = 2. + + D (input) DOUBLE PRECISION array, dimension (2) + The original eigenvalues. We assume D(1) < D(2). + + Z (input) DOUBLE PRECISION array, dimension (2) + The components of the updating vector. + + DELTA (output) DOUBLE PRECISION array, dimension (2) + The vector DELTA contains the information necessary + to construct the eigenvectors. + + RHO (input) DOUBLE PRECISION + The scalar in the symmetric updating formula. + + DLAM (output) DOUBLE PRECISION + The computed lambda_I, the I-th updated eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + if (*i__ == 1) { + w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.; + if (w > 0.) { + b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * del; + +/* B > ZERO, always */ + + tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); + *dlam = d__[1] + tau; + delta[1] = -z__[1] / tau; + delta[2] = z__[2] / (del - tau); + } else { + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.) { + tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); + } else { + tau = (b - sqrt(b * b + c__ * 4.)) / 2.; + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + } + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } else { + +/* Now I=2 */ + + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.) { + tau = (b + sqrt(b * b + c__ * 4.)) / 2.; + } else { + tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } + return 0; + +/* End OF DLAED5 */ + +} /* dlaed5_ */ + +/* Subroutine */ int dlaed6_(integer *kniter, logical *orgati, doublereal * + rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal * + tau, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + static doublereal a, b, c__, f; + static integer i__; + static doublereal fc, df, ddf, lbd, eta, ubd, eps, base; + static integer iter; + static doublereal temp, temp1, temp2, temp3, temp4; + static logical scale; + static integer niter; + static doublereal small1, small2, sminv1, sminv2; + + static doublereal dscale[3], sclfac, zscale[3], erretm, sclinv; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + February 2007 + + + Purpose + ======= + + DLAED6 computes the positive or negative root (closest to the origin) + of + z(1) z(2) z(3) + f(x) = rho + --------- + ---------- + --------- + d(1)-x d(2)-x d(3)-x + + It is assumed that + + if ORGATI = .true. the root is between d(2) and d(3); + otherwise it is between d(1) and d(2) + + This routine will be called by DLAED4 when necessary. In most cases, + the root sought is the smallest in magnitude, though it might not be + in some extremely rare situations. + + Arguments + ========= + + KNITER (input) INTEGER + Refer to DLAED4 for its significance. + + ORGATI (input) LOGICAL + If ORGATI is true, the needed root is between d(2) and + d(3); otherwise it is between d(1) and d(2). See + DLAED4 for further details. + + RHO (input) DOUBLE PRECISION + Refer to the equation f(x) above. + + D (input) DOUBLE PRECISION array, dimension (3) + D satisfies d(1) < d(2) < d(3). + + Z (input) DOUBLE PRECISION array, dimension (3) + Each of the elements in z must be positive. + + FINIT (input) DOUBLE PRECISION + The value of f at 0. It is more accurate than the one + evaluated inside this routine (if someone wants to do + so). + + TAU (output) DOUBLE PRECISION + The root of the equation f(x). + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, failure to converge + + Further Details + =============== + + 30/06/99: Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + 10/02/03: This version has a few statements commented out for thread + safety (machine parameters are computed on each entry). SJH. + + 05/10/06: Modified from a new version of Ren-Cang Li, use + Gragg-Thornton-Warner cubic convergent scheme for better stability. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + --d__; + + /* Function Body */ + *info = 0; + + if (*orgati) { + lbd = d__[2]; + ubd = d__[3]; + } else { + lbd = d__[1]; + ubd = d__[2]; + } + if (*finit < 0.) { + lbd = 0.; + } else { + ubd = 0.; + } + + niter = 1; + *tau = 0.; + if (*kniter == 2) { + if (*orgati) { + temp = (d__[3] - d__[2]) / 2.; + c__ = *rho + z__[1] / (d__[1] - d__[2] - temp); + a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3]; + b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2]; + } else { + temp = (d__[1] - d__[2]) / 2.; + c__ = *rho + z__[3] / (d__[3] - d__[2] - temp); + a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2]; + b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1]; + } +/* Computing MAX */ + d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); + temp = max(d__1,d__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.) { + *tau = b / a; + } else if (a <= 0.) { + *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)) + )); + } + if (*tau < lbd || *tau > ubd) { + *tau = (lbd + ubd) / 2.; + } + if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) { + *tau = 0.; + } else { + temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) + *tau + * z__[2] / (d__[2] * (d__[2] - *tau)) + *tau * z__[3] / ( + d__[3] * (d__[3] - *tau)); + if (temp <= 0.) { + lbd = *tau; + } else { + ubd = *tau; + } + if (abs(*finit) <= abs(temp)) { + *tau = 0.; + } + } + } + +/* + get machine parameters for possible scaling to avoid overflow + + modified by Sven: parameters SMALL1, SMINV1, SMALL2, + SMINV2, EPS are not SAVEd anymore between one call to the + others but recomputed at each call +*/ + + eps = EPSILON; + base = BASE; + i__1 = (integer) (log(SAFEMINIMUM) / log(base) / 3.); + small1 = pow_di(&base, &i__1); + sminv1 = 1. / small1; + small2 = small1 * small1; + sminv2 = sminv1 * sminv1; + +/* + Determine if scaling of inputs necessary to avoid overflow + when computing 1/TEMP**3 +*/ + + if (*orgati) { +/* Computing MIN */ + d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - * + tau, abs(d__2)); + temp = min(d__3,d__4); + } else { +/* Computing MIN */ + d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - * + tau, abs(d__2)); + temp = min(d__3,d__4); + } + scale = FALSE_; + if (temp <= small1) { + scale = TRUE_; + if (temp <= small2) { + +/* Scale up by power of radix nearest 1/SAFMIN**(2/3) */ + + sclfac = sminv2; + sclinv = small2; + } else { + +/* Scale up by power of radix nearest 1/SAFMIN**(1/3) */ + + sclfac = sminv1; + sclinv = small1; + } + +/* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__] * sclfac; + zscale[i__ - 1] = z__[i__] * sclfac; +/* L10: */ + } + *tau *= sclfac; + lbd *= sclfac; + ubd *= sclfac; + } else { + +/* Copy D and Z to DSCALE and ZSCALE */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__]; + zscale[i__ - 1] = z__[i__]; +/* L20: */ + } + } + + fc = 0.; + df = 0.; + ddf = 0.; + for (i__ = 1; i__ <= 3; ++i__) { + temp = 1. / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + fc += temp1 / dscale[i__ - 1]; + df += temp2; + ddf += temp3; +/* L30: */ + } + f = *finit + *tau * fc; + + if (abs(f) <= 0.) { + goto L60; + } + if (f <= 0.) { + lbd = *tau; + } else { + ubd = *tau; + } + +/* + Iteration begins -- Use Gragg-Thornton-Warner cubic convergent + scheme + + It is not hard to see that + + 1) Iterations will go up monotonically + if FINIT < 0; + + 2) Iterations will go down monotonically + if FINIT > 0. +*/ + + iter = niter + 1; + + for (niter = iter; niter <= 40; ++niter) { + + if (*orgati) { + temp1 = dscale[1] - *tau; + temp2 = dscale[2] - *tau; + } else { + temp1 = dscale[0] - *tau; + temp2 = dscale[1] - *tau; + } + a = (temp1 + temp2) * f - temp1 * temp2 * df; + b = temp1 * temp2 * f; + c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf; +/* Computing MAX */ + d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); + temp = max(d__1,d__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.) { + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + if (f * eta >= 0.) { + eta = -f / df; + } + + *tau += eta; + if (*tau < lbd || *tau > ubd) { + *tau = (lbd + ubd) / 2.; + } + + fc = 0.; + erretm = 0.; + df = 0.; + ddf = 0.; + for (i__ = 1; i__ <= 3; ++i__) { + temp = 1. / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + temp4 = temp1 / dscale[i__ - 1]; + fc += temp4; + erretm += abs(temp4); + df += temp2; + ddf += temp3; +/* L40: */ + } + f = *finit + *tau * fc; + erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df; + if (abs(f) <= eps * erretm) { + goto L60; + } + if (f <= 0.) { + lbd = *tau; + } else { + ubd = *tau; + } +/* L50: */ + } + *info = 1; +L60: + +/* Undo scaling */ + + if (scale) { + *tau *= sclinv; + } + return 0; + +/* End of DLAED6 */ + +} /* dlaed6_ */ + +/* Subroutine */ int dlaed7_(integer *icompq, integer *n, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, + doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer + *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer * + perm, integer *givptr, integer *givcol, doublereal *givnum, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Local variables */ + static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer indxc, indxp; + extern /* Subroutine */ int dlaed8_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *, integer *, integer *), dlaed9_(integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *), dlaeda_(integer *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, doublereal *, integer *, doublereal *, doublereal *, integer *) + ; + static integer idlmda; + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAED7 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and optionally eigenvectors of a dense symmetric matrix + that has been reduced to tridiagonal form. DLAED1 handles + the case in which all eigenvalues and eigenvectors of a symmetric + tridiagonal matrix are desired. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine DLAED8. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine DLAED4 (as called by DLAED9). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= CURLVL <= TLVLS. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (output) INTEGER array, dimension (N) + The permutation which will reintegrate the subproblem just + solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) + will be in ascending order. + + RHO (input) DOUBLE PRECISION + The subdiagonal element used to create the rank-1 + modification. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1) + Stores eigenvectors of submatrices encountered during + divide and conquer, packed together. QPTR points to + beginning of the submatrices. + + QPTR (input/output) INTEGER array, dimension (N+2) + List of indices pointing to beginning of submatrices stored + in QSTORE. The submatrices are numbered starting at the + bottom left of the divide and conquer tree, from left to + right and bottom to top. + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and also the size of + the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + WORK (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N) + + IWORK (workspace) INTEGER array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --qstore; + --qptr; + --prmptr; + --perm; + --givptr; + givcol -= 3; + givnum -= 3; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -4; + } else if (*ldq < max(1,*n)) { + *info = -9; + } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED7", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in DLAED8 and DLAED9. +*/ + + if (*icompq == 1) { + ldq2 = *qsiz; + } else { + ldq2 = *n; + } + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + is = iq2 + *n * ldq2; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; + ptr += pow_ii(&c__2, &i__2); +/* L10: */ + } + curr = ptr + *curpbm; + dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & + givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz + + *n], info); + +/* + When solving the final problem, we no longer need the stored data, + so we will overwrite the data from this level onto the previously + used storage space. +*/ + + if (*curlvl == *tlvls) { + qptr[curr] = 1; + prmptr[curr] = 1; + givptr[curr] = 1; + } + +/* Sort and Deflate eigenvalues. */ + + dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, + cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], & + perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1) + + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[ + indx], info); + prmptr[curr + 1] = prmptr[curr] + *n; + givptr[curr + 1] += givptr[curr]; + +/* Solve Secular Equation. */ + + if (k != 0) { + dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], + &work[iw], &qstore[qptr[curr]], &k, info); + if (*info != 0) { + goto L30; + } + if (*icompq == 1) { + dgemm_("N", "N", qsiz, &k, &k, &c_b15, &work[iq2], &ldq2, &qstore[ + qptr[curr]], &k, &c_b29, &q[q_offset], ldq); + } +/* Computing 2nd power */ + i__1 = k; + qptr[curr + 1] = qptr[curr] + i__1 * i__1; + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + qptr[curr + 1] = qptr[curr]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L20: */ + } + } + +L30: + return 0; + +/* End of DLAED7 */ + +} /* dlaed7_ */ + +/* Subroutine */ int dlaed8_(integer *icompq, integer *k, integer *n, integer + *qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, + doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, + doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer + *givptr, integer *givcol, doublereal *givnum, integer *indxp, integer + *indx, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; + doublereal d__1; + + /* Local variables */ + static doublereal c__; + static integer i__, j; + static doublereal s, t; + static integer k2, n1, n2, jp, n1p1; + static doublereal eps, tau, tol; + static integer jlam, imax, jmax; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), dscal_( + integer *, doublereal *, doublereal *, integer *), dcopy_(integer + *, doublereal *, integer *, doublereal *, integer *); + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DLAED8 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny element in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + + K (output) INTEGER + The number of non-deflated eigenvalues, and the order of the + related secular equation. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the eigenvalues of the two submatrices to be + combined. On exit, the trailing (N-K) updated eigenvalues + (those which were deflated) sorted into increasing order. + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) + If ICOMPQ = 0, Q is not referenced. Otherwise, + on entry, Q contains the eigenvectors of the partially solved + system which has been previously updated in matrix + multiplies with other partially solved eigensystems. + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input) INTEGER array, dimension (N) + The permutation which separately sorts the two sub-problems + in D into ascending order. Note that elements in the second + half of this permutation must first have CUTPNT added to + their values in order to be accurate. + + RHO (input/output) DOUBLE PRECISION + On entry, the off-diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. + On exit, RHO has been modified to the value required by + DLAED3. + + CUTPNT (input) INTEGER + The location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + Z (input) DOUBLE PRECISION array, dimension (N) + On entry, Z contains the updating vector (the last row of + the first sub-eigenvector matrix and the first row of the + second sub-eigenvector matrix). + On exit, the contents of Z are destroyed by the updating + process. + + DLAMDA (output) DOUBLE PRECISION array, dimension (N) + A copy of the first K eigenvalues which will be used by + DLAED3 to form the secular equation. + + Q2 (output) DOUBLE PRECISION array, dimension (LDQ2,N) + If ICOMPQ = 0, Q2 is not referenced. Otherwise, + a copy of the first K eigenvectors which will be used by + DLAED7 in a matrix multiply (DGEMM) to update the new + eigenvectors. + + LDQ2 (input) INTEGER + The leading dimension of the array Q2. LDQ2 >= max(1,N). + + W (output) DOUBLE PRECISION array, dimension (N) + The first k values of the final deflation-altered z-vector and + will be passed to DLAED3. + + PERM (output) INTEGER array, dimension (N) + The permutations (from deflation and sorting) to be applied + to each eigenblock. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. + + GIVCOL (output) INTEGER array, dimension (2, N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (output) DOUBLE PRECISION array, dimension (2, N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INDXP (workspace) INTEGER array, dimension (N) + The permutation used to place deflated values of D at the end + of the array. INDXP(1:K) points to the nondeflated D-values + and INDXP(K+1:N) points to the deflated eigenvalues. + + INDX (workspace) INTEGER array, dimension (N) + The permutation used to sort the contents of D into ascending + order. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + q2_dim1 = *ldq2; + q2_offset = 1 + q2_dim1; + q2 -= q2_offset; + --w; + --perm; + givcol -= 3; + givnum -= 3; + --indxp; + --indx; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -3; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -4; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { + *info = -10; + } else if (*ldq2 < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED8", &i__1); + return 0; + } + +/* + Need to initialize GIVPTR to O here in case of quick exit + to prevent an unspecified code behavior (usually sigfault) + when IWORK array on entry to *stedc is not zeroed + (or at least some IWORK entries which used in *laed7 for GIVPTR). +*/ + + *givptr = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n1 = *cutpnt; + n2 = *n - n1; + n1p1 = n1 + 1; + + if (*rho < 0.) { + dscal_(&n2, &c_b151, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1 */ + + t = 1. / sqrt(2.); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + indx[j] = j; +/* L10: */ + } + dscal_(n, &t, &z__[1], &c__1); + *rho = (d__1 = *rho * 2., abs(d__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { + indxq[i__] += *cutpnt; +/* L20: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; + w[i__] = z__[indxq[i__]]; +/* L30: */ + } + i__ = 1; + j = *cutpnt + 1; + dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = dlamda[indx[i__]]; + z__[i__] = w[indx[i__]]; +/* L40: */ + } + +/* Calculate the allowable deflation tolerence */ + + imax = idamax_(n, &z__[1], &c__1); + jmax = idamax_(n, &d__[1], &c__1); + eps = EPSILON; + tol = eps * 8. * (d__1 = d__[jmax], abs(d__1)); + +/* + If the rank-1 modifier is small enough, no more needs to be done + except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { + *k = 0; + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; + dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + + 1], &c__1); +/* L60: */ + } + dlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); + } + return 0; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + if (j == *n) { + goto L110; + } + } else { + jlam = j; + goto L80; + } +/* L70: */ + } +L80: + ++j; + if (j > *n) { + goto L100; + } + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[jlam]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = dlapy2_(&c__, &s); + t = d__[j] - d__[jlam]; + c__ /= tau; + s = -s / tau; + if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + z__[j] = tau; + z__[jlam] = 0.; + +/* Record the appropriate Givens rotation */ + + ++(*givptr); + givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; + givcol[(*givptr << 1) + 2] = indxq[indx[j]]; + givnum[(*givptr << 1) + 1] = c__; + givnum[(*givptr << 1) + 2] = s; + if (*icompq == 1) { + drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[ + indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s); + } + t = d__[jlam] * c__ * c__ + d__[j] * s * s; + d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; + d__[jlam] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[jlam] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = jlam; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = jlam; + } + } else { + indxp[k2 + i__ - 1] = jlam; + } + jlam = j; + } else { + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + jlam = j; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + +L110: + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; +/* L120: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; + dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] + , &c__1); +/* L130: */ + } + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + if (*k < *n) { + if (*icompq == 0) { + i__1 = *n - *k; + dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + } else { + i__1 = *n - *k; + dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = *n - *k; + dlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(* + k + 1) * q_dim1 + 1], ldq); + } + } + + return 0; + +/* End of DLAED8 */ + +} /* dlaed8_ */ + +/* Subroutine */ int dlaed9_(integer *k, integer *kstart, integer *kstop, + integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * + rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer i__, j; + static doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dlaed4_(integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAED9 finds the roots of the secular equation, as defined by the + values in D, Z, and RHO, between KSTART and KSTOP. It makes the + appropriate calls to DLAED4 and then stores the new matrix of + eigenvectors for use in calculating the next level of Z vectors. + + Arguments + ========= + + K (input) INTEGER + The number of terms in the rational function to be solved by + DLAED4. K >= 0. + + KSTART (input) INTEGER + KSTOP (input) INTEGER + The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP + are to be computed. 1 <= KSTART <= KSTOP <= K. + + N (input) INTEGER + The number of rows and columns in the Q matrix. + N >= K (delation may result in N > K). + + D (output) DOUBLE PRECISION array, dimension (N) + D(I) contains the updated eigenvalues + for KSTART <= I <= KSTOP. + + Q (workspace) DOUBLE PRECISION array, dimension (LDQ,N) + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max( 1, N ). + + RHO (input) DOUBLE PRECISION + The value of the parameter in the rank one update equation. + RHO >= 0 required. + + DLAMDA (input) DOUBLE PRECISION array, dimension (K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. + + W (input) DOUBLE PRECISION array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating vector. + + S (output) DOUBLE PRECISION array, dimension (LDS, K) + Will contain the eigenvectors of the repaired matrix which + will be stored for subsequent Z vector calculation and + multiplied by the previously accumulated eigenvectors + to update the system. + + LDS (input) INTEGER + The leading dimension of S. LDS >= max( 1, K ). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --w; + s_dim1 = *lds; + s_offset = 1 + s_dim1; + s -= s_offset; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*kstart < 1 || *kstart > max(1,*k)) { + *info = -2; + } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { + *info = -3; + } else if (*n < *k) { + *info = -4; + } else if (*ldq < max(1,*k)) { + *info = -7; + } else if (*lds < max(1,*k)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED9", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* + Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), + which on any of these machines zeros out the bottommost + bit of DLAMDA(I) if it is 1; this makes the subsequent + subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DLAMDA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DLAMDA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *kstop; + for (j = *kstart; j <= i__1; ++j) { + dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1 || *k == 2) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *k; + for (j = 1; j <= i__2; ++j) { + s[j + i__ * s_dim1] = q[j + i__ * q_dim1]; +/* L30: */ + } +/* L40: */ + } + goto L120; + } + +/* Compute updated W. */ + + dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L60: */ + } +/* L70: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = sqrt(-w[i__]); + w[i__] = d_sign(&d__1, &s[i__ + s_dim1]); +/* L80: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1]; +/* L90: */ + } + temp = dnrm2_(k, &q[j * q_dim1 + 1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp; +/* L100: */ + } +/* L110: */ + } + +L120: + return 0; + +/* End of DLAED9 */ + +} /* dlaed9_ */ + +/* Subroutine */ int dlaeda_(integer *n, integer *tlvls, integer *curlvl, + integer *curpbm, integer *prmptr, integer *perm, integer *givptr, + integer *givcol, doublereal *givnum, doublereal *q, integer *qptr, + doublereal *z__, doublereal *ztemp, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k, mid, ptr; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1; + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *), xerbla_(char *, + integer *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DLAEDA computes the Z vector corresponding to the merge step in the + CURLVLth step of the merge process with TLVLS steps for the CURPBMth + problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= curlvl <= tlvls. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and incidentally the + size of the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + Q (input) DOUBLE PRECISION array, dimension (N**2) + Contains the square eigenblocks from previous levels, the + starting positions for blocks are given by QPTR. + + QPTR (input) INTEGER array, dimension (N+2) + Contains a list of pointers which indicate where in Q an + eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates + the size of the block. + + Z (output) DOUBLE PRECISION array, dimension (N) + On output this vector contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). + + ZTEMP (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --ztemp; + --z__; + --qptr; + --q; + givnum -= 3; + givcol -= 3; + --givptr; + --perm; + --prmptr; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAEDA", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine location of first number in second half. */ + + mid = *n / 2 + 1; + +/* Gather last/first rows of appropriate eigenblocks into center of Z */ + + ptr = 1; + +/* + Determine location of lowest level subproblem in the full storage + scheme +*/ + + i__1 = *curlvl - 1; + curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; + +/* + Determine size of these matrices. We add HALF to the value of + the SQRT in case the machine underestimates one of these square + roots. +*/ + + bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5); + bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) + + .5); + i__1 = mid - bsiz1 - 1; + for (k = 1; k <= i__1; ++k) { + z__[k] = 0.; +/* L10: */ + } + dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & + c__1); + dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); + i__1 = *n; + for (k = mid + bsiz2; k <= i__1; ++k) { + z__[k] = 0.; +/* L20: */ + } + +/* + Loop through remaining levels 1 -> CURLVL applying the Givens + rotations and permutation and then multiplying the center matrices + against the current Z. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = *curlvl - k; + i__3 = *curlvl - k - 1; + curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - + 1; + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + zptr1 = mid - psiz1; + +/* Apply Givens at CURR and CURR+1 */ + + i__2 = givptr[curr + 1] - 1; + for (i__ = givptr[curr]; i__ <= i__2; ++i__) { + drot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, & + z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[( + i__ << 1) + 1], &givnum[(i__ << 1) + 2]); +/* L30: */ + } + i__2 = givptr[curr + 2] - 1; + for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { + drot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[ + mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << + 1) + 1], &givnum[(i__ << 1) + 2]); +/* L40: */ + } + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + i__2 = psiz1 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; +/* L50: */ + } + i__2 = psiz2 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - + 1]; +/* L60: */ + } + +/* + Multiply Blocks at CURR and CURR+1 + + Determine size of these matrices. We add HALF to the value of + the SQRT in case the machine underestimates one of these + square roots. +*/ + + bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + + .5); + bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1]) + ) + .5); + if (bsiz1 > 0) { + dgemv_("T", &bsiz1, &bsiz1, &c_b15, &q[qptr[curr]], &bsiz1, & + ztemp[1], &c__1, &c_b29, &z__[zptr1], &c__1); + } + i__2 = psiz1 - bsiz1; + dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); + if (bsiz2 > 0) { + dgemv_("T", &bsiz2, &bsiz2, &c_b15, &q[qptr[curr + 1]], &bsiz2, & + ztemp[psiz1 + 1], &c__1, &c_b29, &z__[mid], &c__1); + } + i__2 = psiz2 - bsiz2; + dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & + c__1); + + i__2 = *tlvls - k; + ptr += pow_ii(&c__2, &i__2); +/* L70: */ + } + + return 0; + +/* End of DLAEDA */ + +} /* dlaeda_ */ + +/* Subroutine */ int dlaev2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1) +{ + /* System generated locals */ + doublereal d__1; + + /* Local variables */ + static doublereal ab, df, cs, ct, tb, sm, tn, rt, adf, acs; + static integer sgn1, sgn2; + static doublereal acmn, acmx; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix + [ A B ] + [ B C ]. + On return, RT1 is the eigenvalue of larger absolute value, RT2 is the + eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right + eigenvector for RT1, giving the decomposition + + [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] + [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. + + Arguments + ========= + + A (input) DOUBLE PRECISION + The (1,1) element of the 2-by-2 matrix. + + B (input) DOUBLE PRECISION + The (1,2) element and the conjugate of the (2,1) element of + the 2-by-2 matrix. + + C (input) DOUBLE PRECISION + The (2,2) element of the 2-by-2 matrix. + + RT1 (output) DOUBLE PRECISION + The eigenvalue of larger absolute value. + + RT2 (output) DOUBLE PRECISION + The eigenvalue of smaller absolute value. + + CS1 (output) DOUBLE PRECISION + SN1 (output) DOUBLE PRECISION + The vector (CS1, SN1) is a unit right eigenvector for RT1. + + Further Details + =============== + + RT1 is accurate to a few ulps barring over/underflow. + + RT2 may be inaccurate if there is massive cancellation in the + determinant A*C-B*B; higher precision or correctly rounded or + correctly truncated arithmetic would be needed to compute RT2 + accurately in all cases. + + CS1 and SN1 are accurate to a few ulps barring over/underflow. + + Overflow is possible only if RT1 is within a factor of 5 of overflow. + Underflow is harmless if the input data is 0 or exceeds + underflow_threshold / macheps. + + ===================================================================== + + + Compute the eigenvalues +*/ + + sm = *a + *c__; + df = *a - *c__; + adf = abs(df); + tb = *b + *b; + ab = abs(tb); + if (abs(*a) > abs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + d__1 = ab / adf; + rt = adf * sqrt(d__1 * d__1 + 1.); + } else if (adf < ab) { +/* Computing 2nd power */ + d__1 = adf / ab; + rt = ab * sqrt(d__1 * d__1 + 1.); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.); + } + if (sm < 0.) { + *rt1 = (sm - rt) * .5; + sgn1 = -1; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.) { + *rt1 = (sm + rt) * .5; + sgn1 = 1; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5; + *rt2 = rt * -.5; + sgn1 = 1; + } + +/* Compute the eigenvector */ + + if (df >= 0.) { + cs = df + rt; + sgn2 = 1; + } else { + cs = df - rt; + sgn2 = -1; + } + acs = abs(cs); + if (acs > ab) { + ct = -tb / cs; + *sn1 = 1. / sqrt(ct * ct + 1.); + *cs1 = ct * *sn1; + } else { + if (ab == 0.) { + *cs1 = 1.; + *sn1 = 0.; + } else { + tn = -cs / tb; + *cs1 = 1. / sqrt(tn * tn + 1.); + *sn1 = tn * *cs1; + } + } + if (sgn1 == sgn2) { + tn = *cs1; + *cs1 = -(*sn1); + *sn1 = tn; + } + return 0; + +/* End of DLAEV2 */ + +} /* dlaev2_ */ + +/* Subroutine */ int dlaexc_(logical *wantq, integer *n, doublereal *t, + integer *ldt, doublereal *q, integer *ldq, integer *j1, integer *n1, + integer *n2, doublereal *work, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + doublereal d__1, d__2, d__3; + + /* Local variables */ + static doublereal d__[16] /* was [4][4] */; + static integer k; + static doublereal u[3], x[4] /* was [2][2] */; + static integer j2, j3, j4; + static doublereal u1[3], u2[3]; + static integer nd; + static doublereal cs, t11, t22, t33, sn, wi1, wi2, wr1, wr2, eps, tau, + tau1, tau2; + static integer ierr; + static doublereal temp; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static doublereal scale, dnorm, xnorm; + extern /* Subroutine */ int dlanv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), dlasy2_( + logical *, logical *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *), dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), dlarfx_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *); + static doublereal thresh, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in + an upper quasi-triangular matrix T by an orthogonal similarity + transformation. + + T must be in Schur canonical form, that is, block upper triangular + with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block + has its diagonal elemnts equal and its off-diagonal elements of + opposite sign. + + Arguments + ========= + + WANTQ (input) LOGICAL + = .TRUE. : accumulate the transformation in the matrix Q; + = .FALSE.: do not accumulate the transformation. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input/output) DOUBLE PRECISION array, dimension (LDT,N) + On entry, the upper quasi-triangular matrix T, in Schur + canonical form. + On exit, the updated matrix T, again in Schur canonical form. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) + On entry, if WANTQ is .TRUE., the orthogonal matrix Q. + On exit, if WANTQ is .TRUE., the updated matrix Q. + If WANTQ is .FALSE., Q is not referenced. + + LDQ (input) INTEGER + The leading dimension of the array Q. + LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N. + + J1 (input) INTEGER + The index of the first row of the first block T11. + + N1 (input) INTEGER + The order of the first block T11. N1 = 0, 1 or 2. + + N2 (input) INTEGER + The order of the second block T22. N2 = 0, 1 or 2. + + WORK (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + = 1: the transformed matrix T would be too far from Schur + form; the blocks are not swapped and T and Q are + unchanged. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0 || *n1 == 0 || *n2 == 0) { + return 0; + } + if (*j1 + *n1 > *n) { + return 0; + } + + j2 = *j1 + 1; + j3 = *j1 + 2; + j4 = *j1 + 3; + + if (*n1 == 1 && *n2 == 1) { + +/* Swap two 1-by-1 blocks. */ + + t11 = t[*j1 + *j1 * t_dim1]; + t22 = t[j2 + j2 * t_dim1]; + +/* Determine the transformation to perform the interchange. */ + + d__1 = t22 - t11; + dlartg_(&t[*j1 + j2 * t_dim1], &d__1, &cs, &sn, &temp); + +/* Apply transformation to the matrix T. */ + + if (j3 <= *n) { + i__1 = *n - *j1 - 1; + drot_(&i__1, &t[*j1 + j3 * t_dim1], ldt, &t[j2 + j3 * t_dim1], + ldt, &cs, &sn); + } + i__1 = *j1 - 1; + drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1, + &cs, &sn); + + t[*j1 + *j1 * t_dim1] = t22; + t[j2 + j2 * t_dim1] = t11; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1, + &cs, &sn); + } + + } else { + +/* + Swapping involves at least one 2-by-2 block. + + Copy the diagonal block of order N1+N2 to the local array D + and compute its norm. +*/ + + nd = *n1 + *n2; + dlacpy_("Full", &nd, &nd, &t[*j1 + *j1 * t_dim1], ldt, d__, &c__4); + dnorm = dlange_("Max", &nd, &nd, d__, &c__4, &work[1]); + +/* + Compute machine-dependent threshold for test for accepting + swap. +*/ + + eps = PRECISION; + smlnum = SAFEMINIMUM / eps; +/* Computing MAX */ + d__1 = eps * 10. * dnorm; + thresh = max(d__1,smlnum); + +/* Solve T11*X - X*T22 = scale*T12 for X. */ + + dlasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d__[*n1 + 1 + + (*n1 + 1 << 2) - 5], &c__4, &d__[(*n1 + 1 << 2) - 4], &c__4, & + scale, x, &c__2, &xnorm, &ierr); + +/* Swap the adjacent diagonal blocks. */ + + k = *n1 + *n1 + *n2 - 3; + switch (k) { + case 1: goto L10; + case 2: goto L20; + case 3: goto L30; + } + +L10: + +/* + N1 = 1, N2 = 2: generate elementary reflector H so that: + + ( scale, X11, X12 ) H = ( 0, 0, * ) +*/ + + u[0] = scale; + u[1] = x[0]; + u[2] = x[2]; + dlarfg_(&c__3, &u[2], u, &c__1, &tau); + u[2] = 1.; + t11 = t[*j1 + *j1 * t_dim1]; + +/* Perform swap provisionally on diagonal block in D. */ + + dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + +/* + Test whether to reject swap. + + Computing MAX +*/ + d__2 = abs(d__[2]), d__3 = abs(d__[6]), d__2 = max(d__2,d__3), d__3 = + (d__1 = d__[10] - t11, abs(d__1)); + if (max(d__2,d__3) > thresh) { + goto L50; + } + +/* Accept swap: apply transformation to the entire matrix T. */ + + i__1 = *n - *j1 + 1; + dlarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + *j1 * t_dim1], ldt, & + work[1]); + dlarfx_("R", &j2, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]); + + t[j3 + *j1 * t_dim1] = 0.; + t[j3 + j2 * t_dim1] = 0.; + t[j3 + j3 * t_dim1] = t11; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + dlarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[ + 1]); + } + goto L40; + +L20: + +/* + N1 = 2, N2 = 1: generate elementary reflector H so that: + + H ( -X11 ) = ( * ) + ( -X21 ) = ( 0 ) + ( scale ) = ( 0 ) +*/ + + u[0] = -x[0]; + u[1] = -x[1]; + u[2] = scale; + dlarfg_(&c__3, u, &u[1], &c__1, &tau); + u[0] = 1.; + t33 = t[j3 + j3 * t_dim1]; + +/* Perform swap provisionally on diagonal block in D. */ + + dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + +/* + Test whether to reject swap. + + Computing MAX +*/ + d__2 = abs(d__[1]), d__3 = abs(d__[2]), d__2 = max(d__2,d__3), d__3 = + (d__1 = d__[0] - t33, abs(d__1)); + if (max(d__2,d__3) > thresh) { + goto L50; + } + +/* Accept swap: apply transformation to the entire matrix T. */ + + dlarfx_("R", &j3, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]); + i__1 = *n - *j1; + dlarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + j2 * t_dim1], ldt, &work[ + 1]); + + t[*j1 + *j1 * t_dim1] = t33; + t[j2 + *j1 * t_dim1] = 0.; + t[j3 + *j1 * t_dim1] = 0.; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + dlarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[ + 1]); + } + goto L40; + +L30: + +/* + N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so + that: + + H(2) H(1) ( -X11 -X12 ) = ( * * ) + ( -X21 -X22 ) ( 0 * ) + ( scale 0 ) ( 0 0 ) + ( 0 scale ) ( 0 0 ) +*/ + + u1[0] = -x[0]; + u1[1] = -x[1]; + u1[2] = scale; + dlarfg_(&c__3, u1, &u1[1], &c__1, &tau1); + u1[0] = 1.; + + temp = -tau1 * (x[2] + u1[1] * x[3]); + u2[0] = -temp * u1[1] - x[3]; + u2[1] = -temp * u1[2]; + u2[2] = scale; + dlarfg_(&c__3, u2, &u2[1], &c__1, &tau2); + u2[0] = 1.; + +/* Perform swap provisionally on diagonal block in D. */ + + dlarfx_("L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1]) + ; + dlarfx_("R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1]) + ; + dlarfx_("L", &c__3, &c__4, u2, &tau2, &d__[1], &c__4, &work[1]); + dlarfx_("R", &c__4, &c__3, u2, &tau2, &d__[4], &c__4, &work[1]); + +/* + Test whether to reject swap. + + Computing MAX +*/ + d__1 = abs(d__[2]), d__2 = abs(d__[6]), d__1 = max(d__1,d__2), d__2 = + abs(d__[3]), d__1 = max(d__1,d__2), d__2 = abs(d__[7]); + if (max(d__1,d__2) > thresh) { + goto L50; + } + +/* Accept swap: apply transformation to the entire matrix T. */ + + i__1 = *n - *j1 + 1; + dlarfx_("L", &c__3, &i__1, u1, &tau1, &t[*j1 + *j1 * t_dim1], ldt, & + work[1]); + dlarfx_("R", &j4, &c__3, u1, &tau1, &t[*j1 * t_dim1 + 1], ldt, &work[ + 1]); + i__1 = *n - *j1 + 1; + dlarfx_("L", &c__3, &i__1, u2, &tau2, &t[j2 + *j1 * t_dim1], ldt, & + work[1]); + dlarfx_("R", &j4, &c__3, u2, &tau2, &t[j2 * t_dim1 + 1], ldt, &work[1] + ); + + t[j3 + *j1 * t_dim1] = 0.; + t[j3 + j2 * t_dim1] = 0.; + t[j4 + *j1 * t_dim1] = 0.; + t[j4 + j2 * t_dim1] = 0.; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + dlarfx_("R", n, &c__3, u1, &tau1, &q[*j1 * q_dim1 + 1], ldq, & + work[1]); + dlarfx_("R", n, &c__3, u2, &tau2, &q[j2 * q_dim1 + 1], ldq, &work[ + 1]); + } + +L40: + + if (*n2 == 2) { + +/* Standardize new 2-by-2 block T11 */ + + dlanv2_(&t[*j1 + *j1 * t_dim1], &t[*j1 + j2 * t_dim1], &t[j2 + * + j1 * t_dim1], &t[j2 + j2 * t_dim1], &wr1, &wi1, &wr2, & + wi2, &cs, &sn); + i__1 = *n - *j1 - 1; + drot_(&i__1, &t[*j1 + (*j1 + 2) * t_dim1], ldt, &t[j2 + (*j1 + 2) + * t_dim1], ldt, &cs, &sn); + i__1 = *j1 - 1; + drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], & + c__1, &cs, &sn); + if (*wantq) { + drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], & + c__1, &cs, &sn); + } + } + + if (*n1 == 2) { + +/* Standardize new 2-by-2 block T22 */ + + j3 = *j1 + *n2; + j4 = j3 + 1; + dlanv2_(&t[j3 + j3 * t_dim1], &t[j3 + j4 * t_dim1], &t[j4 + j3 * + t_dim1], &t[j4 + j4 * t_dim1], &wr1, &wi1, &wr2, &wi2, & + cs, &sn); + if (j3 + 2 <= *n) { + i__1 = *n - j3 - 1; + drot_(&i__1, &t[j3 + (j3 + 2) * t_dim1], ldt, &t[j4 + (j3 + 2) + * t_dim1], ldt, &cs, &sn); + } + i__1 = j3 - 1; + drot_(&i__1, &t[j3 * t_dim1 + 1], &c__1, &t[j4 * t_dim1 + 1], & + c__1, &cs, &sn); + if (*wantq) { + drot_(n, &q[j3 * q_dim1 + 1], &c__1, &q[j4 * q_dim1 + 1], & + c__1, &cs, &sn); + } + } + + } + return 0; + +/* Exit with INFO = 1 if swap was rejected. */ + +L50: + *info = 1; + return 0; + +/* End of DLAEXC */ + +} /* dlaexc_ */ + +/* Subroutine */ int dlahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + static integer i__, j, k, l, m; + static doublereal s, v[3]; + static integer i1, i2; + static doublereal t1, t2, t3, v2, v3, aa, ab, ba, bb, h11, h12, h21, h22, + cs; + static integer nh; + static doublereal sn; + static integer nr; + static doublereal tr; + static integer nz; + static doublereal det, h21s; + static integer its; + static doublereal ulp, sum, tst, rt1i, rt2i, rt1r, rt2r; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), dcopy_( + integer *, doublereal *, integer *, doublereal *, integer *), + dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), dlabad_(doublereal *, doublereal *); + + extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *); + static doublereal safmin, safmax, rtdisc, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Purpose + ======= + + DLAHQR is an auxiliary routine called by DHSEQR to update the + eigenvalues and Schur decomposition already computed by DHSEQR, by + dealing with the Hessenberg submatrix in rows and columns ILO to + IHI. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper quasi-triangular in + rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless + ILO = 1). DLAHQR works primarily with the Hessenberg + submatrix in rows and columns ILO to IHI, but applies + transformations to all of H if WANTT is .TRUE.. + 1 <= ILO <= max(1,IHI); IHI <= N. + + H (input/output) DOUBLE PRECISION array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO is zero and if WANTT is .TRUE., H is upper + quasi-triangular in rows and columns ILO:IHI, with any + 2-by-2 diagonal blocks in standard form. If INFO is zero + and WANTT is .FALSE., the contents of H are unspecified on + exit. The output state of H if INFO is nonzero is given + below under the description of INFO. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + WR (output) DOUBLE PRECISION array, dimension (N) + WI (output) DOUBLE PRECISION array, dimension (N) + The real and imaginary parts, respectively, of the computed + eigenvalues ILO to IHI are stored in the corresponding + elements of WR and WI. If two eigenvalues are computed as a + complex conjugate pair, they are stored in consecutive + elements of WR and WI, say the i-th and (i+1)th, with + WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the + eigenvalues are stored in the same order as on the diagonal + of the Schur form returned in H, with WR(i) = H(i,i), and, if + H(i:i+1,i:i+1) is a 2-by-2 diagonal block, + WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) + If WANTZ is .TRUE., on entry Z must contain the current + matrix Z of transformations accumulated by DHSEQR, and on + exit Z has been updated; transformations are applied only to + the submatrix Z(ILOZ:IHIZ,ILO:IHI). + If WANTZ is .FALSE., Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: If INFO = i, DLAHQR failed to compute all the + eigenvalues ILO to IHI in a total of 30 iterations + per eigenvalue; elements i+1:ihi of WR and WI + contain those eigenvalues which have been + successfully computed. + + If INFO .GT. 0 and WANTT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the + eigenvalues of the upper Hessenberg matrix rows + and columns ILO thorugh INFO of the final, output + value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + (*) (initial value of H)*U = U*(final value of H) + where U is an orthognal matrix. The final + value of H is upper Hessenberg and triangular in + rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + (final value of Z) = (initial value of Z)*U + where U is the orthogonal matrix in (*) + (regardless of the value of WANTT.) + + Further Details + =============== + + 02-96 Based on modifications by + David Day, Sandia National Laboratory, USA + + 12-04 Further modifications by + Ralph Byers, University of Kansas, USA + This is a modified version of DLAHQR from LAPACK version 3.0. + It is (1) more robust against overflow and underflow and + (2) adopts the more conservative Ahues & Tisseur stopping + criterion (LAWN 122, 1997). + + ========================================================= +*/ + + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + +/* ==== clear out the trash ==== */ + i__1 = *ihi - 3; + for (j = *ilo; j <= i__1; ++j) { + h__[j + 2 + j * h_dim1] = 0.; + h__[j + 3 + j * h_dim1] = 0.; +/* L10: */ + } + if (*ilo <= *ihi - 2) { + h__[*ihi + (*ihi - 2) * h_dim1] = 0.; + } + + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + +/* Set machine-dependent constants for the stopping criterion. */ + + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = PRECISION; + smlnum = safmin * ((doublereal) nh / ulp); + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are set inside the main loop. +*/ + + if (*wantt) { + i1 = 1; + i2 = *n; + } + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of 1 or 2. Each iteration of the loop works + with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L20: + l = *ilo; + if (i__ < *ilo) { + goto L160; + } + +/* + Perform QR iterations on rows and columns ILO to I until a + submatrix of order 1 or 2 splits off at the bottom because a + subdiagonal element has become negligible. +*/ + + for (its = 0; its <= 30; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__1 = l + 1; + for (k = i__; k >= i__1; --k) { + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= smlnum) { + goto L40; + } + tst = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 = + h__[k + k * h_dim1], abs(d__2)); + if (tst == 0.) { + if (k - 2 >= *ilo) { + tst += (d__1 = h__[k - 1 + (k - 2) * h_dim1], abs(d__1)); + } + if (k + 1 <= *ihi) { + tst += (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)); + } + } +/* + ==== The following is a conservative small subdiagonal + . deflation criterion due to Ahues & Tisseur (LAWN 122, + . 1997). It has better mathematical foundation and + . improves accuracy in some cases. ==== +*/ + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= ulp * tst) { +/* Computing MAX */ + d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), d__4 = ( + d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); + ab = max(d__3,d__4); +/* Computing MIN */ + d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), d__4 = ( + d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); + ba = min(d__3,d__4); +/* Computing MAX */ + d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), d__4 = (d__2 = + h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], + abs(d__2)); + aa = max(d__3,d__4); +/* Computing MIN */ + d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), d__4 = (d__2 = + h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], + abs(d__2)); + bb = min(d__3,d__4); + s = aa + ab; +/* Computing MAX */ + d__1 = smlnum, d__2 = ulp * (bb * (aa / s)); + if (ba * (ab / s) <= max(d__1,d__2)) { + goto L40; + } + } +/* L30: */ + } +L40: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible */ + + h__[l + (l - 1) * h_dim1] = 0.; + } + +/* Exit from loop if a submatrix of order 1 or 2 has split off. */ + + if (l >= i__ - 1) { + goto L150; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! (*wantt)) { + i1 = l; + i2 = i__; + } + + if (its == 10) { + +/* Exceptional shift. */ + + s = (d__1 = h__[l + 1 + l * h_dim1], abs(d__1)) + (d__2 = h__[l + + 2 + (l + 1) * h_dim1], abs(d__2)); + h11 = s * .75 + h__[l + l * h_dim1]; + h12 = s * -.4375; + h21 = s; + h22 = h11; + } else if (its == 20) { + +/* Exceptional shift. */ + + s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 = + h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + h11 = s * .75 + h__[i__ + i__ * h_dim1]; + h12 = s * -.4375; + h21 = s; + h22 = h11; + } else { + +/* + Prepare to use Francis' double shift + (i.e. 2nd degree generalized Rayleigh quotient) +*/ + + h11 = h__[i__ - 1 + (i__ - 1) * h_dim1]; + h21 = h__[i__ + (i__ - 1) * h_dim1]; + h12 = h__[i__ - 1 + i__ * h_dim1]; + h22 = h__[i__ + i__ * h_dim1]; + } + s = abs(h11) + abs(h12) + abs(h21) + abs(h22); + if (s == 0.) { + rt1r = 0.; + rt1i = 0.; + rt2r = 0.; + rt2i = 0.; + } else { + h11 /= s; + h21 /= s; + h12 /= s; + h22 /= s; + tr = (h11 + h22) / 2.; + det = (h11 - tr) * (h22 - tr) - h12 * h21; + rtdisc = sqrt((abs(det))); + if (det >= 0.) { + +/* ==== complex conjugate shifts ==== */ + + rt1r = tr * s; + rt2r = rt1r; + rt1i = rtdisc * s; + rt2i = -rt1i; + } else { + +/* ==== real shifts (use only one of them) ==== */ + + rt1r = tr + rtdisc; + rt2r = tr - rtdisc; + if ((d__1 = rt1r - h22, abs(d__1)) <= (d__2 = rt2r - h22, abs( + d__2))) { + rt1r *= s; + rt2r = rt1r; + } else { + rt2r *= s; + rt1r = rt2r; + } + rt1i = 0.; + rt2i = 0.; + } + } + +/* Look for two consecutive small subdiagonal elements. */ + + i__1 = l; + for (m = i__ - 2; m >= i__1; --m) { +/* + Determine the effect of starting the double-shift QR + iteration at row M, and see if this would make H(M,M-1) + negligible. (The following uses scaling to avoid + overflows and most underflows.) +*/ + + h21s = h__[m + 1 + m * h_dim1]; + s = (d__1 = h__[m + m * h_dim1] - rt2r, abs(d__1)) + abs(rt2i) + + abs(h21s); + h21s = h__[m + 1 + m * h_dim1] / s; + v[0] = h21s * h__[m + (m + 1) * h_dim1] + (h__[m + m * h_dim1] - + rt1r) * ((h__[m + m * h_dim1] - rt2r) / s) - rt1i * (rt2i + / s); + v[1] = h21s * (h__[m + m * h_dim1] + h__[m + 1 + (m + 1) * h_dim1] + - rt1r - rt2r); + v[2] = h21s * h__[m + 2 + (m + 1) * h_dim1]; + s = abs(v[0]) + abs(v[1]) + abs(v[2]); + v[0] /= s; + v[1] /= s; + v[2] /= s; + if (m == l) { + goto L60; + } + if ((d__1 = h__[m + (m - 1) * h_dim1], abs(d__1)) * (abs(v[1]) + + abs(v[2])) <= ulp * abs(v[0]) * ((d__2 = h__[m - 1 + (m - + 1) * h_dim1], abs(d__2)) + (d__3 = h__[m + m * h_dim1], + abs(d__3)) + (d__4 = h__[m + 1 + (m + 1) * h_dim1], abs( + d__4)))) { + goto L60; + } +/* L50: */ + } +L60: + +/* Double-shift QR step */ + + i__1 = i__ - 1; + for (k = m; k <= i__1; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. NR is the order of G. + + Computing MIN +*/ + i__2 = 3, i__3 = i__ - k + 1; + nr = min(i__2,i__3); + if (k > m) { + dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + dlarfg_(&nr, v, &v[1], &c__1, &t1); + if (k > m) { + h__[k + (k - 1) * h_dim1] = v[0]; + h__[k + 1 + (k - 1) * h_dim1] = 0.; + if (k < i__ - 1) { + h__[k + 2 + (k - 1) * h_dim1] = 0.; + } + } else if (m > l) { +/* + ==== Use the following instead of + . H( K, K-1 ) = -H( K, K-1 ) to + . avoid a bug when v(2) and v(3) + . underflow. ==== +*/ + h__[k + (k - 1) * h_dim1] *= 1. - t1; + } + v2 = v[1]; + t2 = t1 * v2; + if (nr == 3) { + v3 = v[2]; + t3 = t1 * v3; + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__2 = i2; + for (j = k; j <= i__2; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] + + v3 * h__[k + 2 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; + h__[k + 2 + j * h_dim1] -= sum * t3; +/* L70: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+3,I). + + Computing MIN +*/ + i__3 = k + 3; + i__2 = min(i__3,i__); + for (j = i1; j <= i__2; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + + v3 * h__[j + (k + 2) * h_dim1]; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; + h__[j + (k + 2) * h_dim1] -= sum * t3; +/* L80: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__2 = *ihiz; + for (j = *iloz; j <= i__2; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * + z_dim1] + v3 * z__[j + (k + 2) * z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; + z__[j + (k + 2) * z_dim1] -= sum * t3; +/* L90: */ + } + } + } else if (nr == 2) { + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__2 = i2; + for (j = k; j <= i__2; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; +/* L100: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+3,I). +*/ + + i__2 = i__; + for (j = i1; j <= i__2; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + ; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; +/* L110: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__2 = *ihiz; + for (j = *iloz; j <= i__2; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * + z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; +/* L120: */ + } + } + } +/* L130: */ + } + +/* L140: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L150: + + if (l == i__) { + +/* H(I,I-1) is negligible: one eigenvalue has converged. */ + + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } else if (l == i__ - 1) { + +/* + H(I-1,I-2) is negligible: a pair of eigenvalues have converged. + + Transform the 2-by-2 submatrix to standard Schur form, + and compute and store the eigenvalues. +*/ + + dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * + h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * + h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, + &sn); + + if (*wantt) { + +/* Apply the transformation to the rest of H. */ + + if (i2 > i__) { + i__1 = i2 - i__; + drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[ + i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); + } + i__1 = i__ - i1 - 1; + drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * + h_dim1], &c__1, &cs, &sn); + } + if (*wantz) { + +/* Apply the transformation to Z. */ + + drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + + i__ * z_dim1], &c__1, &cs, &sn); + } + } + +/* return to start of the main loop with new value of I. */ + + i__ = l - 1; + goto L20; + +L160: + return 0; + +/* End of DLAHQR */ + +} /* dlahqr_ */ + +/* Subroutine */ int dlahr2_(integer *n, integer *k, integer *nb, doublereal * + a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, + doublereal *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, + i__3; + doublereal d__1; + + /* Local variables */ + static integer i__; + static doublereal ei; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dgemm_(char *, char *, integer *, integer *, integer * + , doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), dgemv_( + char *, integer *, integer *, doublereal *, doublereal *, integer + *, doublereal *, integer *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *, doublereal *, + integer *), dtrmm_(char *, char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *), daxpy_(integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *), + dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, + doublereal *, integer *), dlarfg_( + integer *, doublereal *, doublereal *, integer *, doublereal *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2.1) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + -- April 2009 -- + + + Purpose + ======= + + DLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1) + matrix A so that elements below the k-th subdiagonal are zero. The + reduction is performed by an orthogonal similarity transformation + Q' * A * Q. The routine returns the matrices V and T which determine + Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. + + This is an auxiliary routine called by DGEHRD. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. + + K (input) INTEGER + The offset for the reduction. Elements below the k-th + subdiagonal in the first NB columns are reduced to zero. + K < N. + + NB (input) INTEGER + The number of columns to be reduced. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1) + On entry, the n-by-(n-k+1) general matrix A. + On exit, the elements on and above the k-th subdiagonal in + the first NB columns are overwritten with the corresponding + elements of the reduced matrix; the elements below the k-th + subdiagonal, with the array TAU, represent the matrix Q as a + product of elementary reflectors. The other columns of A are + unchanged. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) DOUBLE PRECISION array, dimension (NB) + The scalar factors of the elementary reflectors. See Further + Details. + + T (output) DOUBLE PRECISION array, dimension (LDT,NB) + The upper triangular matrix T. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= NB. + + Y (output) DOUBLE PRECISION array, dimension (LDY,NB) + The n-by-nb matrix Y. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= N. + + Further Details + =============== + + The matrix Q is represented as a product of nb elementary reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in + A(i+k+1:n,i), and tau in TAU(i). + + The elements of the vectors v together form the (n-k+1)-by-nb matrix + V which is needed, with T and Y, to apply the transformation to the + unreduced part of the matrix, using an update of the form: + A := (I - V*T*V') * (A - Y*V'). + + The contents of A on exit are illustrated by the following example + with n = 7, k = 3 and nb = 2: + + ( a a a a a ) + ( a a a a a ) + ( a a a a a ) + ( h h a a a ) + ( v1 h a a a ) + ( v1 v2 a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + This subroutine is a slight modification of LAPACK-3.0's DLAHRD + incorporating improvements proposed by Quintana-Orti and Van de + Gejin. Note that the entries of A(1:K,2:NB) differ from those + returned by the original LAPACK-3.0's DLAHRD routine. (This + subroutine is not backward compatible with LAPACK-3.0's DLAHRD.) + + References + ========== + + Gregorio Quintana-Orti and Robert van de Geijn, "Improving the + performance of reduction to Hessenberg form," ACM Transactions on + Mathematical Software, 32(2):180-194, June 2006. + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*n <= 1) { + return 0; + } + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + +/* + Update A(K+1:N,I) + + Update I-th column of A - Y * V' +*/ + + i__2 = *n - *k; + i__3 = i__ - 1; + dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &y[*k + 1 + y_dim1], + ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b15, &a[*k + 1 + + i__ * a_dim1], &c__1); + +/* + Apply I - V * T' * V' to this column (call it b) from the + left, using the last column of T as workspace + + Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) + ( V2 ) ( b2 ) + + where V1 is unit lower triangular + + w := V1' * b1 +*/ + + i__2 = i__ - 1; + dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + dtrmv_("Lower", "Transpose", "UNIT", &i__2, &a[*k + 1 + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1); + +/* w := w + V2'*b2 */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], + lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b15, &t[*nb * + t_dim1 + 1], &c__1); + +/* w := T'*w */ + + i__2 = i__ - 1; + dtrmv_("Upper", "Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, + &t[*nb * t_dim1 + 1], &c__1); + +/* b2 := b2 - V2*w */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &a[*k + i__ + + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b15, &a[*k + + i__ + i__ * a_dim1], &c__1); + +/* b1 := b1 - V1*w */ + + i__2 = i__ - 1; + dtrmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1] + , lda, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + daxpy_(&i__2, &c_b151, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + + i__ * a_dim1], &c__1); + + a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; + } + +/* + Generate the elementary reflector H(I) to annihilate + A(K+I+1:N,I) +*/ + + i__2 = *n - *k - i__ + 1; +/* Computing MIN */ + i__3 = *k + i__ + 1; + dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + ei = a[*k + i__ + i__ * a_dim1]; + a[*k + i__ + i__ * a_dim1] = 1.; + +/* Compute Y(K+1:N,I) */ + + i__2 = *n - *k; + i__3 = *n - *k - i__ + 1; + dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b15, &a[*k + 1 + (i__ + 1) * + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &y[* + k + 1 + i__ * y_dim1], &c__1); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &t[i__ * t_dim1 + + 1], &c__1); + i__2 = *n - *k; + i__3 = i__ - 1; + dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &y[*k + 1 + y_dim1], + ldy, &t[i__ * t_dim1 + 1], &c__1, &c_b15, &y[*k + 1 + i__ * + y_dim1], &c__1); + i__2 = *n - *k; + dscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1); + +/* Compute T(1:I,I) */ + + i__2 = i__ - 1; + d__1 = -tau[i__]; + dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + dtrmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, + &t[i__ * t_dim1 + 1], &c__1) + ; + t[i__ + i__ * t_dim1] = tau[i__]; + +/* L10: */ + } + a[*k + *nb + *nb * a_dim1] = ei; + +/* Compute Y(1:K,1:NB) */ + + dlacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy); + dtrmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b15, &a[*k + 1 + + a_dim1], lda, &y[y_offset], ldy); + if (*n > *k + *nb) { + i__1 = *n - *k - *nb; + dgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b15, &a[(*nb + + 2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, & + c_b15, &y[y_offset], ldy); + } + dtrmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b15, &t[ + t_offset], ldt, &y[y_offset], ldy); + + return 0; + +/* End of DLAHR2 */ + +} /* dlahr2_ */ + +logical dlaisnan_(doublereal *din1, doublereal *din2) +{ + /* System generated locals */ + logical ret_val; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + This routine is not for general use. It exists solely to avoid + over-optimization in DISNAN. + + DLAISNAN checks for NaNs by comparing its two arguments for + inequality. NaN is the only floating-point value where NaN != NaN + returns .TRUE. To check for NaNs, pass the same variable as both + arguments. + + A compiler must assume that the two arguments are + not the same variable, and the test will not be optimized away. + Interprocedural or whole-program optimization may delete this + test. The ISNAN functions will be replaced by the correct + Fortran 03 intrinsic once the intrinsic is widely available. + + Arguments + ========= + + DIN1 (input) DOUBLE PRECISION + + DIN2 (input) DOUBLE PRECISION + Two numbers to compare for inequality. + + ===================================================================== +*/ + + ret_val = *din1 != *din2; + return ret_val; +} /* dlaisnan_ */ + +/* Subroutine */ int dlaln2_(logical *ltrans, integer *na, integer *nw, + doublereal *smin, doublereal *ca, doublereal *a, integer *lda, + doublereal *d1, doublereal *d2, doublereal *b, integer *ldb, + doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, + doublereal *scale, doublereal *xnorm, integer *info) +{ + /* Initialized data */ + + static logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; + static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; + static integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2, + 4,3,2,1 }; + + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + static doublereal equiv_0[4], equiv_1[4]; + + /* Local variables */ + static integer j; +#define ci (equiv_0) +#define cr (equiv_1) + static doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, + cr21, cr22, li21, csi, ui11, lr21, ui12, ui22; +#define civ (equiv_0) + static doublereal csr, ur11, ur12, ur22; +#define crv (equiv_1) + static doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs; + static integer icmax; + static doublereal bnorm, cnorm, smini; + + extern /* Subroutine */ int dladiv_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + static doublereal bignum, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLALN2 solves a system of the form (ca A - w D ) X = s B + or (ca A' - w D) X = s B with possible scaling ("s") and + perturbation of A. (A' means A-transpose.) + + A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA + real diagonal matrix, w is a real or complex value, and X and B are + NA x 1 matrices -- real if w is real, complex if w is complex. NA + may be 1 or 2. + + If w is complex, X and B are represented as NA x 2 matrices, + the first column of each being the real part and the second + being the imaginary part. + + "s" is a scaling factor (.LE. 1), computed by DLALN2, which is + so chosen that X can be computed without overflow. X is further + scaled if necessary to assure that norm(ca A - w D)*norm(X) is less + than overflow. + + If both singular values of (ca A - w D) are less than SMIN, + SMIN*identity will be used instead of (ca A - w D). If only one + singular value is less than SMIN, one element of (ca A - w D) will be + perturbed enough to make the smallest singular value roughly SMIN. + If both singular values are at least SMIN, (ca A - w D) will not be + perturbed. In any case, the perturbation will be at most some small + multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values + are computed by infinity-norm approximations, and thus will only be + correct to a factor of 2 or so. + + Note: all input quantities are assumed to be smaller than overflow + by a reasonable factor. (See BIGNUM.) + + Arguments + ========== + + LTRANS (input) LOGICAL + =.TRUE.: A-transpose will be used. + =.FALSE.: A will be used (not transposed.) + + NA (input) INTEGER + The size of the matrix A. It may (only) be 1 or 2. + + NW (input) INTEGER + 1 if "w" is real, 2 if "w" is complex. It may only be 1 + or 2. + + SMIN (input) DOUBLE PRECISION + The desired lower bound on the singular values of A. This + should be a safe distance away from underflow or overflow, + say, between (underflow/machine precision) and (machine + precision * overflow ). (See BIGNUM and ULP.) + + CA (input) DOUBLE PRECISION + The coefficient c, which A is multiplied by. + + A (input) DOUBLE PRECISION array, dimension (LDA,NA) + The NA x NA matrix A. + + LDA (input) INTEGER + The leading dimension of A. It must be at least NA. + + D1 (input) DOUBLE PRECISION + The 1,1 element in the diagonal matrix D. + + D2 (input) DOUBLE PRECISION + The 2,2 element in the diagonal matrix D. Not used if NW=1. + + B (input) DOUBLE PRECISION array, dimension (LDB,NW) + The NA x NW matrix B (right-hand side). If NW=2 ("w" is + complex), column 1 contains the real part of B and column 2 + contains the imaginary part. + + LDB (input) INTEGER + The leading dimension of B. It must be at least NA. + + WR (input) DOUBLE PRECISION + The real part of the scalar "w". + + WI (input) DOUBLE PRECISION + The imaginary part of the scalar "w". Not used if NW=1. + + X (output) DOUBLE PRECISION array, dimension (LDX,NW) + The NA x NW matrix X (unknowns), as computed by DLALN2. + If NW=2 ("w" is complex), on exit, column 1 will contain + the real part of X and column 2 will contain the imaginary + part. + + LDX (input) INTEGER + The leading dimension of X. It must be at least NA. + + SCALE (output) DOUBLE PRECISION + The scale factor that B must be multiplied by to insure + that overflow does not occur when computing X. Thus, + (ca A - w D) X will be SCALE*B, not B (ignoring + perturbations of A.) It will be at most 1. + + XNORM (output) DOUBLE PRECISION + The infinity-norm of X, when X is regarded as an NA x NW + real matrix. + + INFO (output) INTEGER + An error flag. It will be set to zero if no error occurs, + a negative number if an argument is in error, or a positive + number if ca A - w D had to be perturbed. + The possible values are: + = 0: No error occurred, and (ca A - w D) did not have to be + perturbed. + = 1: (ca A - w D) had to be perturbed to make its smallest + (or only) singular value greater than SMIN. + NOTE: In the interests of speed, this routine does not + check the inputs for errors. + + ===================================================================== +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + + /* Function Body */ + +/* Compute BIGNUM */ + + smlnum = 2. * SAFEMINIMUM; + bignum = 1. / smlnum; + smini = max(*smin,smlnum); + +/* Don't check for input errors */ + + *info = 0; + +/* Standard Initializations */ + + *scale = 1.; + + if (*na == 1) { + +/* 1 x 1 (i.e., scalar) system C X = B */ + + if (*nw == 1) { + +/* + Real 1x1 system. + + C = ca A - w D +*/ + + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + cnorm = abs(csr); + +/* If | C | < SMINI, use C = SMINI */ + + if (cnorm < smini) { + csr = smini; + cnorm = smini; + *info = 1; + } + +/* Check scaling for X = B / C */ + + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + +/* Compute X */ + + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); + } else { + +/* + Complex 1x1 system (w is complex) + + C = ca A - w D +*/ + + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + csi = -(*wi) * *d1; + cnorm = abs(csr) + abs(csi); + +/* If | C | < SMINI, use C = SMINI */ + + if (cnorm < smini) { + csr = smini; + csi = 0.; + cnorm = smini; + *info = 1; + } + +/* Check scaling for X = B / C */ + + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << + 1) + 1], abs(d__2)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + +/* Compute X */ + + d__1 = *scale * b[b_dim1 + 1]; + d__2 = *scale * b[(b_dim1 << 1) + 1]; + dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1) + + 1]); + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << + 1) + 1], abs(d__2)); + } + + } else { + +/* + 2x2 System + + Compute the real part of C = ca A - w D (or ca A' - w D ) +*/ + + cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1; + cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2; + if (*ltrans) { + cr[2] = *ca * a[a_dim1 + 2]; + cr[1] = *ca * a[(a_dim1 << 1) + 1]; + } else { + cr[1] = *ca * a[a_dim1 + 2]; + cr[2] = *ca * a[(a_dim1 << 1) + 1]; + } + + if (*nw == 1) { + +/* + Real 2x2 system (w is real) + + Find the largest element in C +*/ + + cmax = 0.; + icmax = 0; + + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)); + icmax = j; + } +/* L10: */ + } + +/* If norm(C) < SMINI, use SMINI*identity. */ + + if (cmax < smini) { +/* Computing MAX */ + d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[ + b_dim1 + 2], abs(d__2)); + bnorm = max(d__3,d__4); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + +/* Gaussian elimination with complete pivoting. */ + + ur11 = crv[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ur11r = 1. / ur11; + lr21 = ur11r * cr21; + ur22 = cr22 - ur12 * lr21; + +/* If smaller pivot < SMINI, use SMINI */ + + if (abs(ur22) < smini) { + ur22 = smini; + *info = 1; + } + if (rswap[icmax - 1]) { + br1 = b[b_dim1 + 2]; + br2 = b[b_dim1 + 1]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + } + br2 -= lr21 * br1; +/* Computing MAX */ + d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2); + bbnd = max(d__2,d__3); + if (bbnd > 1. && abs(ur22) < 1.) { + if (bbnd >= bignum * abs(ur22)) { + *scale = 1. / bbnd; + } + } + + xr2 = br2 * *scale / ur22; + xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + } +/* Computing MAX */ + d__1 = abs(xr1), d__2 = abs(xr2); + *xnorm = max(d__1,d__2); + +/* Further scaling if norm(A) norm(X) > overflow */ + + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } else { + +/* + Complex 2x2 system (w is complex) + + Find the largest element in C +*/ + + ci[0] = -(*wi) * *d1; + ci[1] = 0.; + ci[2] = 0.; + ci[3] = -(*wi) * *d2; + cmax = 0.; + icmax = 0; + + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs( + d__2)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1] + , abs(d__2)); + icmax = j; + } +/* L20: */ + } + +/* If norm(C) < SMINI, use SMINI*identity. */ + + if (cmax < smini) { +/* Computing MAX */ + d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 + << 1) + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 + 2], + abs(d__3)) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4)); + bnorm = max(d__5,d__6); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + +/* Gaussian elimination with complete pivoting. */ + + ur11 = crv[icmax - 1]; + ui11 = civ[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ci21 = civ[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + ui12 = civ[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ci22 = civ[ipivot[(icmax << 2) - 1] - 1]; + if (icmax == 1 || icmax == 4) { + +/* Code when off-diagonals of pivoted C are real */ + + if (abs(ur11) > abs(ui11)) { + temp = ui11 / ur11; +/* Computing 2nd power */ + d__1 = temp; + ur11r = 1. / (ur11 * (d__1 * d__1 + 1.)); + ui11r = -temp * ur11r; + } else { + temp = ur11 / ui11; +/* Computing 2nd power */ + d__1 = temp; + ui11r = -1. / (ui11 * (d__1 * d__1 + 1.)); + ur11r = -temp * ui11r; + } + lr21 = cr21 * ur11r; + li21 = cr21 * ui11r; + ur12s = ur12 * ur11r; + ui12s = ur12 * ui11r; + ur22 = cr22 - ur12 * lr21; + ui22 = ci22 - ur12 * li21; + } else { + +/* Code when diagonals of pivoted C are real */ + + ur11r = 1. / ur11; + ui11r = 0.; + lr21 = cr21 * ur11r; + li21 = ci21 * ur11r; + ur12s = ur12 * ur11r; + ui12s = ui12 * ur11r; + ur22 = cr22 - ur12 * lr21 + ui12 * li21; + ui22 = -ur12 * li21 - ui12 * lr21; + } + u22abs = abs(ur22) + abs(ui22); + +/* If smaller pivot < SMINI, use SMINI */ + + if (u22abs < smini) { + ur22 = smini; + ui22 = 0.; + *info = 1; + } + if (rswap[icmax - 1]) { + br2 = b[b_dim1 + 1]; + br1 = b[b_dim1 + 2]; + bi2 = b[(b_dim1 << 1) + 1]; + bi1 = b[(b_dim1 << 1) + 2]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + bi1 = b[(b_dim1 << 1) + 1]; + bi2 = b[(b_dim1 << 1) + 2]; + } + br2 = br2 - lr21 * br1 + li21 * bi1; + bi2 = bi2 - li21 * br1 - lr21 * bi1; +/* Computing MAX */ + d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r)) + ), d__2 = abs(br2) + abs(bi2); + bbnd = max(d__1,d__2); + if (bbnd > 1. && u22abs < 1.) { + if (bbnd >= bignum * u22abs) { + *scale = 1. / bbnd; + br1 = *scale * br1; + bi1 = *scale * bi1; + br2 = *scale * br2; + bi2 = *scale * bi2; + } + } + + dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); + xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; + xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + x[(x_dim1 << 1) + 1] = xi2; + x[(x_dim1 << 1) + 2] = xi1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + x[(x_dim1 << 1) + 1] = xi1; + x[(x_dim1 << 1) + 2] = xi2; + } +/* Computing MAX */ + d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2); + *xnorm = max(d__1,d__2); + +/* Further scaling if norm(A) norm(X) > overflow */ + + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } + } + + return 0; + +/* End of DLALN2 */ + +} /* dlaln2_ */ + +#undef crv +#undef civ +#undef cr +#undef ci + + +/* Subroutine */ int dlals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal + *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal * + poles, doublereal *difl, doublereal *difr, doublereal *z__, integer * + k, doublereal *c__, doublereal *s, doublereal *work, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, + difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, + poles_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer i__, j, m, n; + static doublereal dj; + static integer nlp1; + static doublereal temp; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + static doublereal diflj, difrj, dsigj; + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlacpy_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *), + xerbla_(char *, integer *); + static doublereal dsigjp; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLALS0 applies back the multiplying factors of either the left or the + right singular vector matrix of a diagonal matrix appended by a row + to the right hand side matrix B in solving the least squares problem + using the divide-and-conquer SVD approach. + + For the left singular vector matrix, three types of orthogonal + matrices are involved: + + (1L) Givens rotations: the number of such rotations is GIVPTR; the + pairs of columns/rows they were applied to are stored in GIVCOL; + and the C- and S-values of these rotations are stored in GIVNUM. + + (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + row, and for J=2:N, PERM(J)-th row of B is to be moved to the + J-th row. + + (3L) The left singular vector matrix of the remaining matrix. + + For the right singular vector matrix, four types of orthogonal + matrices are involved: + + (1R) The right singular vector matrix of the remaining matrix. + + (2R) If SQRE = 1, one extra Givens rotation to generate the right + null space. + + (3R) The inverse transformation of (2L). + + (4R) The inverse transformation of (1L). + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form: + = 0: Left singular vector matrix. + = 1: Right singular vector matrix. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. On output, B contains + the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B. LDB must be at least + max(1,MAX( M, N ) ). + + BX (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) + + LDBX (input) INTEGER + The leading dimension of BX. + + PERM (input) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) applied + to the two blocks. + + GIVPTR (input) INTEGER + The number of Givens rotations which took place in this + subproblem. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of rows/columns + involved in a Givens rotation. + + LDGCOL (input) INTEGER + The leading dimension of GIVCOL, must be at least N. + + GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value used in the + corresponding Givens rotation. + + LDGNUM (input) INTEGER + The leading dimension of arrays DIFR, POLES and + GIVNUM, must be at least K. + + POLES (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + On entry, POLES(1:K, 1) contains the new singular + values obtained from solving the secular equation, and + POLES(1:K, 2) is an array containing the poles in the secular + equation. + + DIFL (input) DOUBLE PRECISION array, dimension ( K ). + On entry, DIFL(I) is the distance between I-th updated + (undeflated) singular value and the I-th (undeflated) old + singular value. + + DIFR (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). + On entry, DIFR(I, 1) contains the distances between I-th + updated (undeflated) singular value and the I+1-th + (undeflated) old singular value. And DIFR(I, 2) is the + normalizing factor for the I-th right singular vector. + + Z (input) DOUBLE PRECISION array, dimension ( K ) + Contain the components of the deflation-adjusted updating row + vector. + + K (input) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + C (input) DOUBLE PRECISION + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (input) DOUBLE PRECISION + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + WORK (workspace) DOUBLE PRECISION array, dimension ( K ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + difr_dim1 = *ldgnum; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --z__; + --work; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } + + n = *nl + *nr + 1; + + if (*nrhs < 1) { + *info = -5; + } else if (*ldb < n) { + *info = -7; + } else if (*ldbx < n) { + *info = -9; + } else if (*givptr < 0) { + *info = -11; + } else if (*ldgcol < n) { + *info = -13; + } else if (*ldgnum < n) { + *info = -15; + } else if (*k < 1) { + *info = -20; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLALS0", &i__1); + return 0; + } + + m = n + *sqre; + nlp1 = *nl + 1; + + if (*icompq == 0) { + +/* + Apply back orthogonal transformations from the left. + + Step (1L): apply back the Givens rotations performed. +*/ + + i__1 = *givptr; + for (i__ = 1; i__ <= i__1; ++i__) { + drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); +/* L10: */ + } + +/* Step (2L): permute rows of B. */ + + dcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], + ldbx); +/* L20: */ + } + +/* + Step (3L): apply the inverse of the left singular vector + matrix to BX. +*/ + + if (*k == 1) { + dcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); + if (z__[1] < 0.) { + dscal_(nrhs, &c_b151, &b[b_offset], ldb); + } + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = poles[j + poles_dim1]; + dsigj = -poles[j + (poles_dim1 << 1)]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; + } + if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) { + work[j] = 0.; + } else { + work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj / + (poles[j + (poles_dim1 << 1)] + dj); + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + work[i__] = 0.; + } else { + work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigj) - diflj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L30: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + work[i__] = 0.; + } else { + work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigjp) + difrj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L40: */ + } + work[1] = -1.; + temp = dnrm2_(k, &work[1], &c__1); + dgemv_("T", k, nrhs, &c_b15, &bx[bx_offset], ldbx, &work[1], & + c__1, &c_b29, &b[j + b_dim1], ldb); + dlascl_("G", &c__0, &c__0, &temp, &c_b15, &c__1, nrhs, &b[j + + b_dim1], ldb, info); +/* L50: */ + } + } + +/* Move the deflated rows of BX to B also. */ + + if (*k < max(m,n)) { + i__1 = n - *k; + dlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 + + b_dim1], ldb); + } + } else { + +/* + Apply back the right orthogonal transformations. + + Step (1R): apply back the new right singular vector matrix + to B. +*/ + + if (*k == 1) { + dcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dsigj = poles[j + (poles_dim1 << 1)]; + if (z__[j] == 0.) { + work[j] = 0.; + } else { + work[j] = -z__[j] / difl[j] / (dsigj + poles[j + + poles_dim1]) / difr[j + (difr_dim1 << 1)]; + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + work[i__] = 0.; + } else { + d__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; + work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[ + i__ + difr_dim1]) / (dsigj + poles[i__ + + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; + } +/* L60: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + work[i__] = 0.; + } else { + d__1 = -poles[i__ + (poles_dim1 << 1)]; + work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[ + i__]) / (dsigj + poles[i__ + poles_dim1]) / + difr[i__ + (difr_dim1 << 1)]; + } +/* L70: */ + } + dgemv_("T", k, nrhs, &c_b15, &b[b_offset], ldb, &work[1], & + c__1, &c_b29, &bx[j + bx_dim1], ldbx); +/* L80: */ + } + } + +/* + Step (2R): if SQRE = 1, apply back the rotation that is + related to the right null space of the subproblem. +*/ + + if (*sqre == 1) { + dcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); + drot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, + s); + } + if (*k < max(m,n)) { + i__1 = n - *k; + dlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + + bx_dim1], ldbx); + } + +/* Step (3R): permute rows of B. */ + + dcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); + if (*sqre == 1) { + dcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); + } + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], + ldb); +/* L90: */ + } + +/* Step (4R): apply back the Givens rotations performed. */ + + for (i__ = *givptr; i__ >= 1; --i__) { + d__1 = -givnum[i__ + givnum_dim1]; + drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &d__1); +/* L100: */ + } + } + + return 0; + +/* End of DLALS0 */ + +} /* dlals0_ */ + +/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * + ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, + doublereal *difl, doublereal *difr, doublereal *z__, doublereal * + poles, integer *givptr, integer *givcol, integer *ldgcol, integer * + perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1, + b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1, + difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, + u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, + i__2; + + /* Local variables */ + static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl, + ndb1, nlp1, lvl2, nrp1, nlvl, sqre; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer inode, ndiml, ndimr; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dlals0_(integer *, integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *), dlasdt_(integer *, integer *, integer *, integer *, + integer *, integer *, integer *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLALSA is an itermediate step in solving the least squares problem + by computing the SVD of the coefficient matrix in compact form (The + singular vectors are computed as products of simple orthorgonal + matrices.). + + If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector + matrix of an upper bidiagonal matrix to the right hand side; and if + ICOMPQ = 1, DLALSA applies the right singular vector matrix to the + right hand side. The singular vector matrices were generated in + compact form by DLALSA. + + Arguments + ========= + + + ICOMPQ (input) INTEGER + Specifies whether the left or the right singular vector + matrix is involved. + = 0: Left singular vector matrix + = 1: Right singular vector matrix + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The row and column dimensions of the upper bidiagonal matrix. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. + On output, B contains the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,MAX( M, N ) ). + + BX (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) + On exit, the result of applying the left or right singular + vector matrix to B. + + LDBX (input) INTEGER + The leading dimension of BX. + + U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). + On entry, U contains the left singular vector matrices of all + subproblems at the bottom level. + + LDU (input) INTEGER, LDU = > N. + The leading dimension of arrays U, VT, DIFL, DIFR, + POLES, GIVNUM, and Z. + + VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). + On entry, VT' contains the right singular vector matrices of + all subproblems at the bottom level. + + K (input) INTEGER array, dimension ( N ). + + DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). + where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. + + DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record + distances between singular values on the I-th level and + singular values on the (I -1)-th level, and DIFR(*, 2 * I) + record the normalizing factors of the right singular vectors + matrices of subproblems on I-th level. + + Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). + On entry, Z(1, I) contains the components of the deflation- + adjusted updating row vector for subproblems on the I-th + level. + + POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old + singular values involved in the secular equations on the I-th + level. + + GIVPTR (input) INTEGER array, dimension ( N ). + On entry, GIVPTR( I ) records the number of Givens + rotations performed on the I-th problem on the computation + tree. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). + On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the + locations of Givens rotations performed on the I-th level on + the computation tree. + + LDGCOL (input) INTEGER, LDGCOL = > N. + The leading dimension of arrays GIVCOL and PERM. + + PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). + On entry, PERM(*, I) records permutations done on the I-th + level of the computation tree. + + GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- + values of Givens rotations performed on the I-th level on the + computation tree. + + C (input) DOUBLE PRECISION array, dimension ( N ). + On entry, if the I-th subproblem is not square, + C( I ) contains the C-value of a Givens rotation related to + the right null space of the I-th subproblem. + + S (input) DOUBLE PRECISION array, dimension ( N ). + On entry, if the I-th subproblem is not square, + S( I ) contains the S-value of a Givens rotation related to + the right null space of the I-th subproblem. + + WORK (workspace) DOUBLE PRECISION array. + The dimension must be at least N. + + IWORK (workspace) INTEGER array. + The dimension must be at least 3 * N + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < *smlsiz) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < *n) { + *info = -6; + } else if (*ldbx < *n) { + *info = -8; + } else if (*ldu < *n) { + *info = -10; + } else if (*ldgcol < *n) { + *info = -19; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLALSA", &i__1); + return 0; + } + +/* Book-keeping and setting up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + The following code applies back the left singular vector factors. + For applying back the right singular vector factors, go to 50. +*/ + + if (*icompq == 1) { + goto L50; + } + +/* + The nodes on the bottom level of the tree were solved + by DLASDQ. The corresponding left and right singular vector + matrices are in explicit form. First apply back the left + singular vector matrices. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + dgemm_("T", "N", &nl, nrhs, &nl, &c_b15, &u[nlf + u_dim1], ldu, &b[ + nlf + b_dim1], ldb, &c_b29, &bx[nlf + bx_dim1], ldbx); + dgemm_("T", "N", &nr, nrhs, &nr, &c_b15, &u[nrf + u_dim1], ldu, &b[ + nrf + b_dim1], ldb, &c_b29, &bx[nrf + bx_dim1], ldbx); +/* L10: */ + } + +/* + Next copy the rows of B that correspond to unchanged rows + in the bidiagonal matrix to BX. +*/ + + i__1 = nd; + for (i__ = 1; i__ <= i__1; ++i__) { + ic = iwork[inode + i__ - 1]; + dcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx); +/* L20: */ + } + +/* + Finally go through the left singular vector matrices of all + the other subproblems bottom-up on the tree. +*/ + + j = pow_ii(&c__2, &nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* + find the first node LF and last node LL on + the current level LVL +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + --j; + dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, & + b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &work[1], info); +/* L30: */ + } +/* L40: */ + } + goto L90; + +/* ICOMPQ = 1: applying back the right singular vector factors. */ + +L50: + +/* + First now go through the right singular vector matrices of all + the tree nodes top-down. +*/ + + j = 0; + i__1 = nlvl; + for (lvl = 1; lvl <= i__1; ++lvl) { + lvl2 = (lvl << 1) - 1; + +/* + Find the first node LF and last node LL on + the current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__2 = lvl - 1; + lf = pow_ii(&c__2, &i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; + for (i__ = ll; i__ >= i__2; --i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqre = 0; + } else { + sqre = 1; + } + ++j; + dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[ + nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &work[1], info); +/* L60: */ + } +/* L70: */ + } + +/* + The nodes on the bottom level of the tree were solved + by DLASDQ. The corresponding right singular vector + matrices are in explicit form. Apply them back. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlp1 = nl + 1; + if (i__ == nd) { + nrp1 = nr; + } else { + nrp1 = nr + 1; + } + nlf = ic - nl; + nrf = ic + 1; + dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b15, &vt[nlf + vt_dim1], ldu, + &b[nlf + b_dim1], ldb, &c_b29, &bx[nlf + bx_dim1], ldbx); + dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b15, &vt[nrf + vt_dim1], ldu, + &b[nrf + b_dim1], ldb, &c_b29, &bx[nrf + bx_dim1], ldbx); +/* L80: */ + } + +L90: + + return 0; + +/* End of DLALSA */ + +} /* dlalsa_ */ + +/* Subroutine */ int dlalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, + doublereal *rcond, integer *rank, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer c__, i__, j, k; + static doublereal r__; + static integer s, u, z__; + static doublereal cs; + static integer bx; + static doublereal sn; + static integer st, vt, nm1, st1; + static doublereal eps; + static integer iwk; + static doublereal tol; + static integer difl, difr; + static doublereal rcnd; + static integer perm, nsub; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer nlvl, sqre, bxst; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer + *); + static integer poles, sizei, nsize, nwork, icmpq1, icmpq2; + + extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), dlalsa_(integer *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *); + static integer givcol; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *); + static doublereal orgnrm; + static integer givnum, givptr, smlszp; + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DLALSD uses the singular value decomposition of A to solve the least + squares problem of finding X to minimize the Euclidean norm of each + column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + are N-by-NRHS. The solution X overwrites B. + + The singular values of A smaller than RCOND times the largest + singular value are treated as zero in solving the least squares + problem; in this case a minimum norm solution is returned. + The actual singular values are returned in D in ascending order. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': D and E define an upper bidiagonal matrix. + = 'L': D and E define a lower bidiagonal matrix. + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The dimension of the bidiagonal matrix. N >= 0. + + NRHS (input) INTEGER + The number of columns of B. NRHS must be at least 1. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry D contains the main diagonal of the bidiagonal + matrix. On exit, if INFO = 0, D contains its singular values. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + Contains the super-diagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) + On input, B contains the right hand sides of the least + squares problem. On output, B contains the solution X. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,N). + + RCOND (input) DOUBLE PRECISION + The singular values of A less than or equal to RCOND times + the largest singular value are treated as zero in solving + the least squares problem. If RCOND is negative, + machine precision is used instead. + For example, if diag(S)*X=B were the least squares problem, + where diag(S) is a diagonal matrix of singular values, the + solution would be X(i) = B(i) / S(i) if S(i) is greater than + RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to + RCOND*max(S). + + RANK (output) INTEGER + The number of singular values of A greater than RCOND times + the largest singular value. + + WORK (workspace) DOUBLE PRECISION array, dimension at least + (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), + where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). + + IWORK (workspace) INTEGER array, dimension at least + (3*N*NLVL + 11*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute a singular value while + working on the submatrix lying in rows and columns + INFO/(N+1) through MOD(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < 1 || *ldb < *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLALSD", &i__1); + return 0; + } + + eps = EPSILON; + +/* Set up the tolerance. */ + + if (*rcond <= 0. || *rcond >= 1.) { + rcnd = eps; + } else { + rcnd = *rcond; + } + + *rank = 0; + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } else if (*n == 1) { + if (d__[1] == 0.) { + dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &b[b_offset], ldb); + } else { + *rank = 1; + dlascl_("G", &c__0, &c__0, &d__[1], &c_b15, &c__1, nrhs, &b[ + b_offset], ldb, info); + d__[1] = abs(d__[1]); + } + return 0; + } + +/* Rotate the matrix if it is lower bidiagonal. */ + + if (*(unsigned char *)uplo == 'L') { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (*nrhs == 1) { + drot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], & + c__1, &cs, &sn); + } else { + work[(i__ << 1) - 1] = cs; + work[i__ * 2] = sn; + } +/* L10: */ + } + if (*nrhs > 1) { + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n - 1; + for (j = 1; j <= i__2; ++j) { + cs = work[(j << 1) - 1]; + sn = work[j * 2]; + drot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ * + b_dim1], &c__1, &cs, &sn); +/* L20: */ + } +/* L30: */ + } + } + } + +/* Scale. */ + + nm1 = *n - 1; + orgnrm = dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + dlaset_("A", n, nrhs, &c_b29, &c_b29, &b[b_offset], ldb); + return 0; + } + + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, info); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, + info); + +/* + If N is smaller than the minimum divide size SMLSIZ, then solve + the problem with another solver. +*/ + + if (*n <= *smlsiz) { + nwork = *n * *n + 1; + dlaset_("A", n, n, &c_b29, &c_b15, &work[1], n); + dlasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, & + work[1], n, &b[b_offset], ldb, &work[nwork], info); + if (*info != 0) { + return 0; + } + tol = rcnd * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] <= tol) { + dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &b[i__ + b_dim1], + ldb); + } else { + dlascl_("G", &c__0, &c__0, &d__[i__], &c_b15, &c__1, nrhs, &b[ + i__ + b_dim1], ldb, info); + ++(*rank); + } +/* L40: */ + } + dgemm_("T", "N", n, nrhs, n, &c_b15, &work[1], n, &b[b_offset], ldb, & + c_b29, &work[nwork], n); + dlacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb); + +/* Unscale. */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, + info); + dlasrt_("D", n, &d__[1], info); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, nrhs, &b[b_offset], + ldb, info); + + return 0; + } + +/* Book-keeping and setting up some constants. */ + + nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) / + log(2.)) + 1; + + smlszp = *smlsiz + 1; + + u = 1; + vt = *smlsiz * *n + 1; + difl = vt + smlszp * *n; + difr = difl + nlvl * *n; + z__ = difr + (nlvl * *n << 1); + c__ = z__ + nlvl * *n; + s = c__ + *n; + poles = s + *n; + givnum = poles + (nlvl << 1) * *n; + bx = givnum + (nlvl << 1) * *n; + nwork = bx + *n * *nrhs; + + sizei = *n + 1; + k = sizei + *n; + givptr = k + *n; + perm = givptr + *n; + givcol = perm + nlvl * *n; + iwk = givcol + (nlvl * *n << 1); + + st = 1; + sqre = 0; + icmpq1 = 1; + icmpq2 = 0; + nsub = 0; + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_sign(&eps, &d__[i__]); + } +/* L50: */ + } + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + ++nsub; + iwork[nsub] = st; + +/* + Subproblem found. First determine its size and then + apply divide and conquer on it. +*/ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else { + +/* + A subproblem with E(NM1) small. This implies an + 1-by-1 subproblem at D(N), which is not solved + explicitly. +*/ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + ++nsub; + iwork[nsub] = *n; + iwork[sizei + nsub - 1] = 1; + dcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n); + } + st1 = st - 1; + if (nsize == 1) { + +/* + This is a 1-by-1 subproblem and is not solved + explicitly. +*/ + + dcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); + } else if (nsize <= *smlsiz) { + +/* This is a small subproblem and is solved by DLASDQ. */ + + dlaset_("A", &nsize, &nsize, &c_b29, &c_b15, &work[vt + st1], + n); + dlasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[ + st], &work[vt + st1], n, &work[nwork], n, &b[st + + b_dim1], ldb, &work[nwork], info); + if (*info != 0) { + return 0; + } + dlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx + + st1], n); + } else { + +/* A large problem. Solve it using divide and conquer. */ + + dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & + work[u + st1], n, &work[vt + st1], &iwork[k + st1], & + work[difl + st1], &work[difr + st1], &work[z__ + st1], + &work[poles + st1], &iwork[givptr + st1], &iwork[ + givcol + st1], n, &iwork[perm + st1], &work[givnum + + st1], &work[c__ + st1], &work[s + st1], &work[nwork], + &iwork[iwk], info); + if (*info != 0) { + return 0; + } + bxst = bx + st1; + dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, & + work[bxst], n, &work[u + st1], n, &work[vt + st1], & + iwork[k + st1], &work[difl + st1], &work[difr + st1], + &work[z__ + st1], &work[poles + st1], &iwork[givptr + + st1], &iwork[givcol + st1], n, &iwork[perm + st1], & + work[givnum + st1], &work[c__ + st1], &work[s + st1], + &work[nwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + } + st = i__ + 1; + } +/* L60: */ + } + +/* Apply the singular values and treat the tiny ones as zero. */ + + tol = rcnd * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Some of the elements in D can be negative because 1-by-1 + subproblems were not solved explicitly. +*/ + + if ((d__1 = d__[i__], abs(d__1)) <= tol) { + dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &work[bx + i__ - 1], n); + } else { + ++(*rank); + dlascl_("G", &c__0, &c__0, &d__[i__], &c_b15, &c__1, nrhs, &work[ + bx + i__ - 1], n, info); + } + d__[i__] = (d__1 = d__[i__], abs(d__1)); +/* L70: */ + } + +/* Now apply back the right singular vectors. */ + + icmpq2 = 1; + i__1 = nsub; + for (i__ = 1; i__ <= i__1; ++i__) { + st = iwork[i__]; + st1 = st - 1; + nsize = iwork[sizei + i__ - 1]; + bxst = bx + st1; + if (nsize == 1) { + dcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb); + } else if (nsize <= *smlsiz) { + dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b15, &work[vt + st1], n, + &work[bxst], n, &c_b29, &b[st + b_dim1], ldb); + } else { + dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st + + b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[ + k + st1], &work[difl + st1], &work[difr + st1], &work[z__ + + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[ + givcol + st1], n, &iwork[perm + st1], &work[givnum + st1], + &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[ + iwk], info); + if (*info != 0) { + return 0; + } + } +/* L80: */ + } + +/* Unscale and sort the singular values. */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, info); + dlasrt_("D", n, &d__[1], info); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, nrhs, &b[b_offset], ldb, + info); + + return 0; + +/* End of DLALSD */ + +} /* dlalsd_ */ + +/* Subroutine */ int dlamrg_(integer *n1, integer *n2, doublereal *a, integer + *dtrd1, integer *dtrd2, integer *index) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, ind1, ind2, n1sv, n2sv; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAMRG will create a permutation list which will merge the elements + of A (which is composed of two independently sorted sets) into a + single set which is sorted in ascending order. + + Arguments + ========= + + N1 (input) INTEGER + N2 (input) INTEGER + These arguements contain the respective lengths of the two + sorted lists to be merged. + + A (input) DOUBLE PRECISION array, dimension (N1+N2) + The first N1 elements of A contain a list of numbers which + are sorted in either ascending or descending order. Likewise + for the final N2 elements. + + DTRD1 (input) INTEGER + DTRD2 (input) INTEGER + These are the strides to be taken through the array A. + Allowable strides are 1 and -1. They indicate whether a + subset of A is sorted in ascending (DTRDx = 1) or descending + (DTRDx = -1) order. + + INDEX (output) INTEGER array, dimension (N1+N2) + On exit this array will contain a permutation such that + if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be + sorted in ascending order. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --index; + --a; + + /* Function Body */ + n1sv = *n1; + n2sv = *n2; + if (*dtrd1 > 0) { + ind1 = 1; + } else { + ind1 = *n1; + } + if (*dtrd2 > 0) { + ind2 = *n1 + 1; + } else { + ind2 = *n1 + *n2; + } + i__ = 1; +/* while ( (N1SV > 0) & (N2SV > 0) ) */ +L10: + if (n1sv > 0 && n2sv > 0) { + if (a[ind1] <= a[ind2]) { + index[i__] = ind1; + ++i__; + ind1 += *dtrd1; + --n1sv; + } else { + index[i__] = ind2; + ++i__; + ind2 += *dtrd2; + --n2sv; + } + goto L10; + } +/* end while */ + if (n1sv == 0) { + i__1 = n2sv; + for (n1sv = 1; n1sv <= i__1; ++n1sv) { + index[i__] = ind2; + ++i__; + ind2 += *dtrd2; +/* L20: */ + } + } else { +/* N2SV .EQ. 0 */ + i__1 = n1sv; + for (n2sv = 1; n2sv <= i__1; ++n2sv) { + index[i__] = ind1; + ++i__; + ind1 += *dtrd1; +/* L30: */ + } + } + + return 0; + +/* End of DLAMRG */ + +} /* dlamrg_ */ + +doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer + *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Local variables */ + static integer i__, j; + static doublereal sum, scale; + extern logical lsame_(char *, char *); + static doublereal value; + extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLANGE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real matrix A. + + Description + =========== + + DLANGE returns the value + + DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in DLANGE as described + above. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. When M = 0, + DLANGE is set to zero. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. When N = 0, + DLANGE is set to zero. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(M,1). + + WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), + where LWORK >= M when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L30: */ + } + value = max(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L60: */ + } +/* L70: */ + } + value = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANGE */ + +} /* dlange_ */ + +doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val, d__1, d__2, d__3, d__4, d__5; + + /* Local variables */ + static integer i__; + static doublereal sum, scale; + extern logical lsame_(char *, char *); + static doublereal anorm; + extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLANST returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real symmetric tridiagonal matrix A. + + Description + =========== + + DLANST returns the value + + DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in DLANST as described + above. + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, DLANST is + set to zero. + + D (input) DOUBLE PRECISION array, dimension (N) + The diagonal elements of A. + + E (input) DOUBLE PRECISION array, dimension (N-1) + The (n-1) sub-diagonal or super-diagonal elements of A. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + if (*n <= 0) { + anorm = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + anorm = (d__1 = d__[*n], abs(d__1)); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1)); + anorm = max(d__2,d__3); +/* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = e[i__], abs(d__1)); + anorm = max(d__2,d__3); +/* L10: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1' || lsame_(norm, "I")) { + +/* Find norm1(A). */ + + if (*n == 1) { + anorm = abs(d__[1]); + } else { +/* Computing MAX */ + d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = e[*n - 1], abs( + d__1)) + (d__2 = d__[*n], abs(d__2)); + anorm = max(d__3,d__4); + i__1 = *n - 1; + for (i__ = 2; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[ + i__], abs(d__2)) + (d__3 = e[i__ - 1], abs(d__3)); + anorm = max(d__4,d__5); +/* L20: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (*n > 1) { + i__1 = *n - 1; + dlassq_(&i__1, &e[1], &c__1, &scale, &sum); + sum *= 2; + } + dlassq_(n, &d__[1], &c__1, &scale, &sum); + anorm = scale * sqrt(sum); + } + + ret_val = anorm; + return ret_val; + +/* End of DLANST */ + +} /* dlanst_ */ + +doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer + *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Local variables */ + static integer i__, j; + static doublereal sum, absa, scale; + extern logical lsame_(char *, char *); + static doublereal value; + extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLANSY returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real symmetric matrix A. + + Description + =========== + + DLANSY returns the value + + DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in DLANSY as described + above. + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is to be referenced. + = 'U': Upper triangular part of A is referenced + = 'L': Lower triangular part of A is referenced + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, DLANSY is + set to zero. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The symmetric matrix A. If UPLO = 'U', the leading n by n + upper triangular part of A contains the upper triangular part + of the matrix A, and the strictly lower triangular part of A + is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of A contains the lower triangular part of + the matrix A, and the strictly upper triangular part of A is + not referenced. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), + where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, + WORK is not referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L30: */ + } +/* L40: */ + } + } + } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is symmetric). */ + + value = 0.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + sum += absa; + work[i__] += absa; +/* L50: */ + } + work[j] = sum + (d__1 = a[j + j * a_dim1], abs(d__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = work[j] + (d__1 = a[j + j * a_dim1], abs(d__1)); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + sum += absa; + work[i__] += absa; +/* L90: */ + } + value = max(value,sum); +/* L100: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + dlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); +/* L120: */ + } + } + sum *= 2; + i__1 = *lda + 1; + dlassq_(n, &a[a_offset], &i__1, &scale, &sum); + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANSY */ + +} /* dlansy_ */ + +/* Subroutine */ int dlanv2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r, + doublereal *rt2i, doublereal *cs, doublereal *sn) +{ + /* System generated locals */ + doublereal d__1, d__2; + + /* Local variables */ + static doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, + temp, scale, bcmax, bcmis, sigma; + + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric + matrix in standard form: + + [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] + [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] + + where either + 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or + 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex + conjugate eigenvalues. + + Arguments + ========= + + A (input/output) DOUBLE PRECISION + B (input/output) DOUBLE PRECISION + C (input/output) DOUBLE PRECISION + D (input/output) DOUBLE PRECISION + On entry, the elements of the input matrix. + On exit, they are overwritten by the elements of the + standardised Schur form. + + RT1R (output) DOUBLE PRECISION + RT1I (output) DOUBLE PRECISION + RT2R (output) DOUBLE PRECISION + RT2I (output) DOUBLE PRECISION + The real and imaginary parts of the eigenvalues. If the + eigenvalues are a complex conjugate pair, RT1I > 0. + + CS (output) DOUBLE PRECISION + SN (output) DOUBLE PRECISION + Parameters of the rotation matrix. + + Further Details + =============== + + Modified by V. Sima, Research Institute for Informatics, Bucharest, + Romania, to reduce the risk of cancellation errors, + when computing real eigenvalues, and to ensure, if possible, that + abs(RT1R) >= abs(RT2R). + + ===================================================================== +*/ + + + eps = PRECISION; + if (*c__ == 0.) { + *cs = 1.; + *sn = 0.; + goto L10; + + } else if (*b == 0.) { + +/* Swap rows and columns */ + + *cs = 0.; + *sn = 1.; + temp = *d__; + *d__ = *a; + *a = temp; + *b = -(*c__); + *c__ = 0.; + goto L10; + } else if (*a - *d__ == 0. && d_sign(&c_b15, b) != d_sign(&c_b15, c__)) { + *cs = 1.; + *sn = 0.; + goto L10; + } else { + + temp = *a - *d__; + p = temp * .5; +/* Computing MAX */ + d__1 = abs(*b), d__2 = abs(*c__); + bcmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = abs(*b), d__2 = abs(*c__); + bcmis = min(d__1,d__2) * d_sign(&c_b15, b) * d_sign(&c_b15, c__); +/* Computing MAX */ + d__1 = abs(p); + scale = max(d__1,bcmax); + z__ = p / scale * p + bcmax / scale * bcmis; + +/* + If Z is of the order of the machine accuracy, postpone the + decision on the nature of eigenvalues +*/ + + if (z__ >= eps * 4.) { + +/* Real eigenvalues. Compute A and D. */ + + d__1 = sqrt(scale) * sqrt(z__); + z__ = p + d_sign(&d__1, &p); + *a = *d__ + z__; + *d__ -= bcmax / z__ * bcmis; + +/* Compute B and the rotation matrix */ + + tau = dlapy2_(c__, &z__); + *cs = z__ / tau; + *sn = *c__ / tau; + *b -= *c__; + *c__ = 0.; + } else { + +/* + Complex eigenvalues, or real (almost) equal eigenvalues. + Make diagonal elements equal. +*/ + + sigma = *b + *c__; + tau = dlapy2_(&sigma, &temp); + *cs = sqrt((abs(sigma) / tau + 1.) * .5); + *sn = -(p / (tau * *cs)) * d_sign(&c_b15, &sigma); + +/* + Compute [ AA BB ] = [ A B ] [ CS -SN ] + [ CC DD ] [ C D ] [ SN CS ] +*/ + + aa = *a * *cs + *b * *sn; + bb = -(*a) * *sn + *b * *cs; + cc = *c__ * *cs + *d__ * *sn; + dd = -(*c__) * *sn + *d__ * *cs; + +/* + Compute [ A B ] = [ CS SN ] [ AA BB ] + [ C D ] [-SN CS ] [ CC DD ] +*/ + + *a = aa * *cs + cc * *sn; + *b = bb * *cs + dd * *sn; + *c__ = -aa * *sn + cc * *cs; + *d__ = -bb * *sn + dd * *cs; + + temp = (*a + *d__) * .5; + *a = temp; + *d__ = temp; + + if (*c__ != 0.) { + if (*b != 0.) { + if (d_sign(&c_b15, b) == d_sign(&c_b15, c__)) { + +/* Real eigenvalues: reduce to upper triangular form */ + + sab = sqrt((abs(*b))); + sac = sqrt((abs(*c__))); + d__1 = sab * sac; + p = d_sign(&d__1, c__); + tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1))); + *a = temp + p; + *d__ = temp - p; + *b -= *c__; + *c__ = 0.; + cs1 = sab * tau; + sn1 = sac * tau; + temp = *cs * cs1 - *sn * sn1; + *sn = *cs * sn1 + *sn * cs1; + *cs = temp; + } + } else { + *b = -(*c__); + *c__ = 0.; + temp = *cs; + *cs = -(*sn); + *sn = temp; + } + } + } + + } + +L10: + +/* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */ + + *rt1r = *a; + *rt2r = *d__; + if (*c__ == 0.) { + *rt1i = 0.; + *rt2i = 0.; + } else { + *rt1i = sqrt((abs(*b))) * sqrt((abs(*c__))); + *rt2i = -(*rt1i); + } + return 0; + +/* End of DLANV2 */ + +} /* dlanv2_ */ + +doublereal dlapy2_(doublereal *x, doublereal *y) +{ + /* System generated locals */ + doublereal ret_val, d__1; + + /* Local variables */ + static doublereal w, z__, xabs, yabs; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary + overflow. + + Arguments + ========= + + X (input) DOUBLE PRECISION + Y (input) DOUBLE PRECISION + X and Y specify the values x and y. + + ===================================================================== +*/ + + + xabs = abs(*x); + yabs = abs(*y); + w = max(xabs,yabs); + z__ = min(xabs,yabs); + if (z__ == 0.) { + ret_val = w; + } else { +/* Computing 2nd power */ + d__1 = z__ / w; + ret_val = w * sqrt(d__1 * d__1 + 1.); + } + return ret_val; + +/* End of DLAPY2 */ + +} /* dlapy2_ */ + +doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__) +{ + /* System generated locals */ + doublereal ret_val, d__1, d__2, d__3; + + /* Local variables */ + static doublereal w, xabs, yabs, zabs; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause + unnecessary overflow. + + Arguments + ========= + + X (input) DOUBLE PRECISION + Y (input) DOUBLE PRECISION + Z (input) DOUBLE PRECISION + X, Y and Z specify the values x, y and z. + + ===================================================================== +*/ + + + xabs = abs(*x); + yabs = abs(*y); + zabs = abs(*z__); +/* Computing MAX */ + d__1 = max(xabs,yabs); + w = max(d__1,zabs); + if (w == 0.) { +/* + W can be zero for max(0,nan,0) + adding all three entries together will make sure + NaN will not disappear. +*/ + ret_val = xabs + yabs + zabs; + } else { +/* Computing 2nd power */ + d__1 = xabs / w; +/* Computing 2nd power */ + d__2 = yabs / w; +/* Computing 2nd power */ + d__3 = zabs / w; + ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3); + } + return ret_val; + +/* End of DLAPY3 */ + +} /* dlapy3_ */ + +/* Subroutine */ int dlaqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + static integer i__, k; + static doublereal aa, bb, cc, dd; + static integer ld; + static doublereal cs; + static integer nh, it, ks, kt; + static doublereal sn; + static integer ku, kv, ls, ns; + static doublereal ss; + static integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, + kbot, nmin; + static doublereal swap; + static integer ktop; + static doublereal zdum[1] /* was [1][1] */; + static integer kacc22, itmax, nsmax, nwmax, kwtop; + extern /* Subroutine */ int dlanv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), dlaqr3_( + logical *, logical *, integer *, integer *, integer *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), + dlaqr4_(logical *, logical *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *), dlaqr5_(logical *, logical *, integer *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *); + static integer nibble; + extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), dlacpy_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static char jbcmpz[2]; + static integer nwupbd; + static logical sorted; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Purpose + ======= + + DLAQR0 computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**T, where T is an upper quasi-triangular matrix (the + Schur form), and Z is the orthogonal matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input orthogonal + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, + H(ILO,ILO-1) is zero. ILO and IHI are normally set by a + previous call to DGEBAL, and then passed to DGEHRD when the + matrix output by DGEBAL is reduced to Hessenberg form. + Otherwise, ILO and IHI should be set to 1 and N, + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) DOUBLE PRECISION array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and WANTT is .TRUE., then H contains + the upper quasi-triangular matrix T from the Schur + decomposition (the Schur form); 2-by-2 diagonal blocks + (corresponding to complex conjugate pairs of eigenvalues) + are returned in standard form, with H(i,i) = H(i+1,i+1) + and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is + .FALSE., then the contents of H are unspecified on exit. + (The output value of H when INFO.GT.0 is given under the + description of INFO below.) + + This subroutine may explicitly set H(i,j) = 0 for i.GT.j and + j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + WR (output) DOUBLE PRECISION array, dimension (IHI) + WI (output) DOUBLE PRECISION array, dimension (IHI) + The real and imaginary parts, respectively, of the computed + eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) + and WI(ILO:IHI). If two eigenvalues are computed as a + complex conjugate pair, they are stored in consecutive + elements of WR and WI, say the i-th and (i+1)th, with + WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then + the eigenvalues are stored in the same order as on the + diagonal of the Schur form returned in H, with + WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal + block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and + WI(i+1) = -WI(i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ,IHI) + If WANTZ is .FALSE., then Z is not referenced. + If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is + replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the + orthogonal Schur factor of H(ILO:IHI,ILO:IHI). + (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if WANTZ is .TRUE. + then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. + + WORK (workspace/output) DOUBLE PRECISION array, dimension LWORK + On exit, if LWORK = -1, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient, but LWORK typically as large as 6*N may + be required for optimal performance. A workspace query + to determine the optimal workspace size is recommended. + + If LWORK = -1, then DLAQR0 does a workspace query. + In this case, DLAQR0 checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: if INFO = i, DLAQR0 failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and WANT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is an orthogonal matrix. The final + value of H is upper Hessenberg and quasi-triangular + in rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + + (final value of Z(ILO:IHI,ILOZ:IHIZ) + = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U + + where U is the orthogonal matrix in (*) (regard- + less of the value of WANTT.) + + If INFO .GT. 0 and WANTZ is .FALSE., then Z is not + accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . DLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== Exceptional deflation windows: try to cure rare + . slow convergence by varying the size of the + . deflation window after KEXNW iterations. ==== + + ==== Exceptional shifts: try to cure rare slow convergence + . with ad-hoc exceptional shifts every KEXSH iterations. + . ==== + + ==== The constants WILK1 and WILK2 are used to form the + . exceptional shifts. ==== +*/ + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + +/* ==== Quick return for N = 0: nothing to do. ==== */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + + if (*n <= 11) { + +/* ==== Tiny matrices must use DLAHQR. ==== */ + + lwkopt = 1; + if (*lwork != -1) { + dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], & + wi[1], iloz, ihiz, &z__[z_offset], ldz, info); + } + } else { + +/* + ==== Use small bulge multi-shift QR with aggressive early + . deflation on larger-than-tiny matrices. ==== + + ==== Hope for the best. ==== +*/ + + *info = 0; + +/* ==== Set up job flags for ILAENV. ==== */ + + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + +/* + ==== NWR = recommended deflation window size. At this + . point, N .GT. NTINY = 11, so there is enough + . subdiagonal workspace for NWR.GE.2 as required. + . (In fact, there is enough subdiagonal space for + . NWR.GE.3.) ==== +*/ + + nwr = ilaenv_(&c__13, "DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nwr = max(2,nwr); +/* Computing MIN */ + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); + nwr = min(i__1,nwr); + +/* + ==== NSR = recommended number of simultaneous shifts. + . At this point N .GT. NTINY = 11, so there is at + . enough subdiagonal workspace for NSR to be even + . and greater than or equal to two as required. ==== +*/ + + nsr = ilaenv_(&c__15, "DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); +/* Computing MIN */ + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - + *ilo; + nsr = min(i__1,i__2); +/* Computing MAX */ + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1,i__2); + +/* + ==== Estimate optimal workspace ==== + + ==== Workspace query call to DLAQR3 ==== +*/ + + i__1 = nwr + 1; + dlaqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, + ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[ + h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], + ldh, &work[1], &c_n1); + +/* + ==== Optimal workspace = MAX(DLAQR5, DLAQR3) ==== + + Computing MAX +*/ + i__1 = nsr * 3 / 2, i__2 = (integer) work[1]; + lwkopt = max(i__1,i__2); + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (doublereal) lwkopt; + return 0; + } + +/* ==== DLAHQR/DLAQR0 crossover point ==== */ + + nmin = ilaenv_(&c__12, "DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen) + 6, (ftnlen)2); + nmin = max(11,nmin); + +/* ==== Nibble crossover point ==== */ + + nibble = ilaenv_(&c__14, "DLAQR0", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + nibble = max(0,nibble); + +/* + ==== Accumulate reflections during ttswp? Use block + . 2-by-2 structure during matrix-matrix multiply? ==== +*/ + + kacc22 = ilaenv_(&c__16, "DLAQR0", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + kacc22 = max(0,kacc22); + kacc22 = min(2,kacc22); + +/* + ==== NWMAX = the largest possible deflation window for + . which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1,i__2); + nw = nwmax; + +/* + ==== NSMAX = the Largest number of simultaneous shifts + . for which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1,i__2); + nsmax -= nsmax % 2; + +/* ==== NDFL: an iteration count restarted at deflation. ==== */ + + ndfl = 1; + +/* + ==== ITMAX = iteration limit ==== + + Computing MAX +*/ + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1,i__2) * 30; + +/* ==== Last row and column in the active block ==== */ + + kbot = *ihi; + +/* ==== Main Loop ==== */ + + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + +/* ==== Done when KBOT falls below ILO ==== */ + + if (kbot < *ilo) { + goto L90; + } + +/* ==== Locate active block ==== */ + + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.) { + goto L20; + } +/* L10: */ + } + k = *ilo; +L20: + ktop = k; + +/* + ==== Select deflation window size: + . Typical Case: + . If possible and advisable, nibble the entire + . active block. If not, use size MIN(NWR,NWMAX) + . or MIN(NWR+1,NWMAX) depending upon which has + . the smaller corresponding subdiagonal entry + . (a heuristic). + . + . Exceptional Case: + . If there have been no deflations in KEXNW or + . more iterations, then vary the deflation window + . size. At first, because, larger windows are, + . in general, more powerful than smaller ones, + . rapidly increase the window to the maximum possible. + . Then, gradually reduce the window size. ==== +*/ + + nh = kbot - ktop + 1; + nwupbd = min(nh,nwmax); + if (ndfl < 5) { + nw = min(nwupbd,nwr); + } else { +/* Computing MIN */ + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2,i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) + > (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], + abs(d__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + +/* + ==== Aggressive early deflation: + . split workspace under the subdiagonal into + . - an nw-by-nw work array V in the lower + . left-hand-corner, + . - an NW-by-at-least-NW-but-more-is-better + . (NW-by-NHO) horizontal work array along + . the bottom edge, + . - an at-least-NW-but-more-is-better (NHV-by-NW) + . vertical work array along the left-hand-edge. + . ==== +*/ + + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + +/* ==== Aggressive early deflation ==== */ + + dlaqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, + iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], + &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + +/* ==== Adjust KBOT accounting for new deflations. ==== */ + + kbot -= ld; + +/* ==== KS points to the shifts. ==== */ + + ks = kbot - ls + 1; + +/* + ==== Skip an expensive QR sweep if there is a (partly + . heuristic) reason to expect that many eigenvalues + . will deflate without it. Here, the QR sweep is + . skipped if many eigenvalues have just been deflated + . or if the remaining active block is small. +*/ + + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( + nmin,nwmax)) { + +/* + ==== NS = nominal number of simultaneous shifts. + . This may be lowered (slightly) if DLAQR3 + . did not provide that many shifts. ==== + + Computing MIN + Computing MAX +*/ + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); + ns = min(i__2,i__3); + ns -= ns % 2; + +/* + ==== If there have been no deflations + . in a multiple of KEXSH iterations, + . then try exceptional shifts. + . Otherwise use shifts provided by + . DLAQR3 above or from the eigenvalues + . of a trailing principal submatrix. ==== +*/ + + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; +/* Computing MAX */ + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3,i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], + abs(d__2)); + aa = ss * .75 + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375; + dd = aa; + dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1] + , &wr[i__], &wi[i__], &cs, &sn); +/* L30: */ + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + +/* + ==== Got NS/2 or fewer shifts? Use DLAQR4 or + . DLAHQR on a trailing principal submatrix to + . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, + . there is enough space below the subdiagonal + . to fit an NS-by-NS scratch array.) ==== +*/ + + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + dlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & + h__[kt + h_dim1], ldh); + if (ns > nmin) { + dlaqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ + kt + h_dim1], ldh, &wr[ks], &wi[ks], & + c__1, &c__1, zdum, &c__1, &work[1], lwork, + &inf); + } else { + dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ + kt + h_dim1], ldh, &wr[ks], &wi[ks], & + c__1, &c__1, zdum, &c__1, &inf); + } + ks += inf; + +/* + ==== In case of a rare QR failure use + . eigenvalues of the trailing 2-by-2 + . principal submatrix. ==== +*/ + + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[ + kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn) + ; + ks = kbot - 1; + } + } + + if (kbot - ks + 1 > ns) { + +/* + ==== Sort the shifts (Helps a little) + . Bubble sort keeps complex conjugate + . pairs together. ==== +*/ + + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[ + i__], abs(d__2)) < (d__3 = wr[i__ + 1] + , abs(d__3)) + (d__4 = wi[i__ + 1], + abs(d__4))) { + sorted = FALSE_; + + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } +/* L40: */ + } +/* L50: */ + } +L60: + ; + } + +/* + ==== Shuffle shifts into pairs of real shifts + . and pairs of complex conjugate shifts + . assuming complex conjugate shifts are + . already adjacent to one another. (Yes, + . they are.) ==== +*/ + + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } +/* L70: */ + } + } + +/* + ==== If there are only two shifts and both are + . real, then use only one. ==== +*/ + + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.) { + if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs( + d__1)) < (d__2 = wr[kbot - 1] - h__[kbot + + kbot * h_dim1], abs(d__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + +/* + ==== Use up to NS of the the smallest magnatiude + . shifts. If there aren't NS shifts available, + . then use them all, possibly dropping one to + . make the number of shifts even. ==== + + Computing MIN +*/ + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2,i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + +/* + ==== Small-bulge multi-shift QR sweep: + . split workspace under the subdiagonal into + . - a KDU-by-KDU work array U in the lower + . left-hand-corner, + . - a KDU-by-at-least-KDU-but-more-is-better + . (KDU-by-NHo) horizontal work array WH along + . the bottom edge, + . - and an at-least-KDU-but-more-is-better-by-KDU + . (NVE-by-KDU) vertical work WV arrow along + . the left-hand-edge. ==== +*/ + + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + +/* ==== Small-bulge multi-shift QR sweep ==== */ + + dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], + &wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[ + z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku + + kwh * h_dim1], ldh); + } + +/* ==== Note progress (or the lack of it). ==== */ + + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + +/* + ==== End of main loop ==== + L80: +*/ + } + +/* + ==== Iteration limit exceeded. Set INFO to show where + . the problem occurred and exit. ==== +*/ + + *info = kbot; +L90: + ; + } + +/* ==== Return the optimal value of LWORK. ==== */ + + work[1] = (doublereal) lwkopt; + +/* ==== End of DLAQR0 ==== */ + + return 0; +} /* dlaqr0_ */ + +/* Subroutine */ int dlaqr1_(integer *n, doublereal *h__, integer *ldh, + doublereal *sr1, doublereal *si1, doublereal *sr2, doublereal *si2, + doublereal *v) +{ + /* System generated locals */ + integer h_dim1, h_offset; + doublereal d__1, d__2, d__3; + + /* Local variables */ + static doublereal s, h21s, h31s; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Given a 2-by-2 or 3-by-3 matrix H, DLAQR1 sets v to a + scalar multiple of the first column of the product + + (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) + + scaling to avoid overflows and most underflows. It + is assumed that either + + 1) sr1 = sr2 and si1 = -si2 + or + 2) si1 = si2 = 0. + + This is useful for starting double implicit shift bulges + in the QR algorithm. + + + N (input) integer + Order of the matrix H. N must be either 2 or 3. + + H (input) DOUBLE PRECISION array of dimension (LDH,N) + The 2-by-2 or 3-by-3 matrix H in (*). + + LDH (input) integer + The leading dimension of H as declared in + the calling procedure. LDH.GE.N + + SR1 (input) DOUBLE PRECISION + SI1 The shifts in (*). + SR2 + SI2 + + V (output) DOUBLE PRECISION array of dimension N + A scalar multiple of the first column of the + matrix K in (*). + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --v; + + /* Function Body */ + if (*n == 2) { + s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + (d__2 = + h__[h_dim1 + 2], abs(d__2)); + if (s == 0.) { + v[1] = 0.; + v[2] = 0.; + } else { + h21s = h__[h_dim1 + 2] / s; + v[1] = h21s * h__[(h_dim1 << 1) + 1] + (h__[h_dim1 + 1] - *sr1) * + ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s); + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - * + sr2); + } + } else { + s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + (d__2 = + h__[h_dim1 + 2], abs(d__2)) + (d__3 = h__[h_dim1 + 3], abs( + d__3)); + if (s == 0.) { + v[1] = 0.; + v[2] = 0.; + v[3] = 0.; + } else { + h21s = h__[h_dim1 + 2] / s; + h31s = h__[h_dim1 + 3] / s; + v[1] = (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s) + - *si1 * (*si2 / s) + h__[(h_dim1 << 1) + 1] * h21s + h__[ + h_dim1 * 3 + 1] * h31s; + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - * + sr2) + h__[h_dim1 * 3 + 2] * h31s; + v[3] = h31s * (h__[h_dim1 + 1] + h__[h_dim1 * 3 + 3] - *sr1 - * + sr2) + h21s * h__[(h_dim1 << 1) + 3]; + } + } + return 0; +} /* dlaqr1_ */ + +/* Subroutine */ int dlaqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * + ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, + integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * + v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * + nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork) +{ + /* System generated locals */ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + + /* Local variables */ + static integer i__, j, k; + static doublereal s, aa, bb, cc, dd, cs, sn; + static integer jw; + static doublereal evi, evk, foo; + static integer kln; + static doublereal tau, ulp; + static integer lwk1, lwk2; + static doublereal beta; + static integer kend, kcol, info, ifst, ilst, ltop, krow; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dgemm_(char *, char *, integer *, integer * + , integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static logical bulge; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer infqr, kwtop; + extern /* Subroutine */ int dlanv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), dlabad_( + doublereal *, doublereal *); + + extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *), dlahqr_(logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), dlacpy_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + static doublereal safmin; + extern /* Subroutine */ int dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *); + static doublereal safmax; + extern /* Subroutine */ int dtrexc_(char *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *), dormhr_(char *, char *, integer + *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *); + static logical sorted; + static doublereal smlnum; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + -- June 2010 -- + + + This subroutine is identical to DLAQR3 except that it avoids + recursion by calling DLAHQR instead of DLAQR4. + + + ****************************************************************** + Aggressive early deflation: + + This subroutine accepts as input an upper Hessenberg matrix + H and performs an orthogonal similarity transformation + designed to detect and deflate fully converged eigenvalues from + a trailing principal submatrix. On output H has been over- + written by a new Hessenberg matrix that is a perturbation of + an orthogonal similarity transformation of H. It is to be + hoped that the final version of H has many zero subdiagonal + entries. + + ****************************************************************** + WANTT (input) LOGICAL + If .TRUE., then the Hessenberg matrix H is fully updated + so that the quasi-triangular Schur factor may be + computed (in cooperation with the calling subroutine). + If .FALSE., then only enough of H is updated to preserve + the eigenvalues. + + WANTZ (input) LOGICAL + If .TRUE., then the orthogonal matrix Z is updated so + so that the orthogonal Schur factor may be computed + (in cooperation with the calling subroutine). + If .FALSE., then Z is not referenced. + + N (input) INTEGER + The order of the matrix H and (if WANTZ is .TRUE.) the + order of the orthogonal matrix Z. + + KTOP (input) INTEGER + It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. + KBOT and KTOP together determine an isolated block + along the diagonal of the Hessenberg matrix. + + KBOT (input) INTEGER + It is assumed without a check that either + KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together + determine an isolated block along the diagonal of the + Hessenberg matrix. + + NW (input) INTEGER + Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). + + H (input/output) DOUBLE PRECISION array, dimension (LDH,N) + On input the initial N-by-N section of H stores the + Hessenberg matrix undergoing aggressive early deflation. + On output H has been transformed by an orthogonal + similarity transformation, perturbed, and the returned + to Hessenberg form that (it is to be hoped) has some + zero subdiagonal entries. + + LDH (input) integer + Leading dimension of H just as declared in the calling + subroutine. N .LE. LDH + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) + IF WANTZ is .TRUE., then on output, the orthogonal + similarity transformation mentioned above has been + accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ is .FALSE., then Z is unreferenced. + + LDZ (input) integer + The leading dimension of Z just as declared in the + calling subroutine. 1 .LE. LDZ. + + NS (output) integer + The number of unconverged (ie approximate) eigenvalues + returned in SR and SI that may be used as shifts by the + calling subroutine. + + ND (output) integer + The number of converged eigenvalues uncovered by this + subroutine. + + SR (output) DOUBLE PRECISION array, dimension (KBOT) + SI (output) DOUBLE PRECISION array, dimension (KBOT) + On output, the real and imaginary parts of approximate + eigenvalues that may be used for shifts are stored in + SR(KBOT-ND-NS+1) through SR(KBOT-ND) and + SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. + The real and imaginary parts of converged eigenvalues + are stored in SR(KBOT-ND+1) through SR(KBOT) and + SI(KBOT-ND+1) through SI(KBOT), respectively. + + V (workspace) DOUBLE PRECISION array, dimension (LDV,NW) + An NW-by-NW work array. + + LDV (input) integer scalar + The leading dimension of V just as declared in the + calling subroutine. NW .LE. LDV + + NH (input) integer scalar + The number of columns of T. NH.GE.NW. + + T (workspace) DOUBLE PRECISION array, dimension (LDT,NW) + + LDT (input) integer + The leading dimension of T just as declared in the + calling subroutine. NW .LE. LDT + + NV (input) integer + The number of rows of work array WV available for + workspace. NV.GE.NW. + + WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW) + + LDWV (input) integer + The leading dimension of W just as declared in the + calling subroutine. NW .LE. LDV + + WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) + On exit, WORK(1) is set to an estimate of the optimal value + of LWORK for the given values of N, NW, KTOP and KBOT. + + LWORK (input) integer + The dimension of the work array WORK. LWORK = 2*NW + suffices, but greater efficiency may result from larger + values of LWORK. + + If LWORK = -1, then a workspace query is assumed; DLAQR2 + only estimates the optimal workspace size for the given + values of N, NW, KTOP and KBOT. The estimate is returned + in WORK(1). No error message related to LWORK is issued + by XERBLA. Neither H nor Z are accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + + ==== Estimate optimal workspace. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + + /* Function Body */ +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + +/* ==== Workspace query call to DGEHRD ==== */ + + i__1 = jw - 1; + dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & + c_n1, &info); + lwk1 = (integer) work[1]; + +/* ==== Workspace query call to DORMHR ==== */ + + i__1 = jw - 1; + dormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[1], &c_n1, &info); + lwk2 = (integer) work[1]; + +/* ==== Optimal workspace ==== */ + + lwkopt = jw + max(lwk1,lwk2); + } + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (doublereal) lwkopt; + return 0; + } + +/* + ==== Nothing to do ... + ... for an empty active block ... ==== +*/ + *ns = 0; + *nd = 0; + work[1] = 1.; + if (*ktop > *kbot) { + return 0; + } +/* ... nor for an empty deflation window. ==== */ + if (*nw < 1) { + return 0; + } + +/* ==== Machine constants ==== */ + + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = PRECISION; + smlnum = safmin * ((doublereal) (*n) / ulp); + +/* + ==== Setup deflation window ==== + + Computing MIN +*/ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + + if (*kbot == kwtop) { + +/* ==== 1-by-1 deflation window: not much to do ==== */ + + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.; + *ns = 1; + *nd = 0; +/* Computing MAX */ + d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs( + d__1)); + if (abs(s) <= max(d__2,d__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.; + } + } + work[1] = 1.; + return 0; + } + +/* + ==== Convert to spike-triangular form. (In case of a + . rare QR failure, this routine continues to do + . aggressive early deflation using that part of + . the deflation window that converged using INFQR + . here and there to keep track.) ==== +*/ + + dlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], + ldt); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & + i__3); + + dlaset_("A", &jw, &jw, &c_b29, &c_b15, &v[v_offset], ldv); + dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], + &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr); + +/* ==== DTREXC needs a clean margin near the diagonal ==== */ + + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.; + t[j + 3 + j * t_dim1] = 0.; +/* L10: */ + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.; + } + +/* ==== Deflation detection loop ==== */ + + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; + } + +/* ==== Small spike tip test for deflation ==== */ + + if (! bulge) { + +/* ==== Real eigenvalue ==== */ + + foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); + if (foo == 0.) { + foo = abs(s); + } +/* Computing MAX */ + d__2 = smlnum, d__3 = ulp * foo; + if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2,d__3)) + { + +/* ==== Deflatable ==== */ + + --(*ns); + } else { + +/* + ==== Undeflatable. Move it up out of the way. + . (DTREXC can not fail in this case.) ==== +*/ + + ifst = *ns; + dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ++ilst; + } + } else { + +/* ==== Complex conjugate pair ==== */ + + foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + sqrt((d__1 = t[* + ns + (*ns - 1) * t_dim1], abs(d__1))) * sqrt((d__2 = t[* + ns - 1 + *ns * t_dim1], abs(d__2))); + if (foo == 0.) { + foo = abs(s); + } +/* Computing MAX */ + d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), d__4 = (d__2 = + s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); +/* Computing MAX */ + d__5 = smlnum, d__6 = ulp * foo; + if (max(d__3,d__4) <= max(d__5,d__6)) { + +/* ==== Deflatable ==== */ + + *ns += -2; + } else { + +/* + ==== Undeflatable. Move them up out of the way. + . Fortunately, DTREXC does the right thing with + . ILST in case of a rare exchange failure. ==== +*/ + + ifst = *ns; + dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ilst += 2; + } + } + +/* ==== End deflation detection loop ==== */ + + goto L20; + } + +/* ==== Return to Hessenberg form ==== */ + + if (*ns == 0) { + s = 0.; + } + + if (*ns < jw) { + +/* + ==== sorting diagonal blocks of T improves accuracy for + . graded matrices. Bubble sort deals well with + . exchange failures. ==== +*/ + + sorted = FALSE_; + i__ = *ns + 1; +L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } +L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); + } else { + evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + sqrt((d__1 = + t[i__ + 1 + i__ * t_dim1], abs(d__1))) * sqrt((d__2 = + t[i__ + (i__ + 1) * t_dim1], abs(d__2))); + } + + if (k == kend) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else if (t[k + 1 + k * t_dim1] == 0.) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else { + evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + sqrt((d__1 = t[ + k + 1 + k * t_dim1], abs(d__1))) * sqrt((d__2 = t[k + + (k + 1) * t_dim1], abs(d__2))); + } + + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; +L50: + ; + } + +/* ==== Restore shift/eigenvalue array from T ==== */ + + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ + - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, & + sn); + i__ += -2; + } + goto L60; + } + + if (*ns < jw || s == 0.) { + if (*ns > 1 && s != 0.) { + +/* ==== Reflect spike back into lower triangle ==== */ + + dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + dlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.; + + i__1 = jw - 2; + i__2 = jw - 2; + dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &t[t_dim1 + 3], ldt); + + dlarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + dlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + dlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & + work[jw + 1]); + + i__1 = *lwork - jw; + dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] + , &i__1, &info); + } + +/* ==== Copy updated reduced window into place ==== */ + + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + dlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] + , ldh); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], + &i__3); + +/* + ==== Accumulate orthogonal matrix in order update + . H and Z, if requested. ==== +*/ + + if (*ns > 1 && s != 0.) { + i__1 = *lwork - jw; + dormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[jw + 1], &i__1, &info); + } + +/* ==== Update vertical slab in H ==== */ + + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3,i__4); + dgemm_("N", "N", &kln, &jw, &jw, &c_b15, &h__[krow + kwtop * + h_dim1], ldh, &v[v_offset], ldv, &c_b29, &wv[wv_offset], + ldwv); + dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * + h_dim1], ldh); +/* L70: */ + } + +/* ==== Update horizontal slab in H ==== */ + + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; + kcol += i__1) { +/* Computing MIN */ + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3,i__4); + dgemm_("C", "N", &jw, &kln, &jw, &c_b15, &v[v_offset], ldv, & + h__[kwtop + kcol * h_dim1], ldh, &c_b29, &t[t_offset], + ldt); + dlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * + h_dim1], ldh); +/* L80: */ + } + } + +/* ==== Update vertical slab in Z ==== */ + + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3,i__4); + dgemm_("N", "N", &kln, &jw, &jw, &c_b15, &z__[krow + kwtop * + z_dim1], ldz, &v[v_offset], ldv, &c_b29, &wv[ + wv_offset], ldwv); + dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + + kwtop * z_dim1], ldz); +/* L90: */ + } + } + } + +/* ==== Return the number of deflations ... ==== */ + + *nd = jw - *ns; + +/* + ==== ... and the number of shifts. (Subtracting + . INFQR from the spike length takes care + . of the case of a rare QR failure while + . calculating eigenvalues of the deflation + . window.) ==== +*/ + + *ns -= infqr; + +/* ==== Return optimal workspace. ==== */ + + work[1] = (doublereal) lwkopt; + +/* ==== End of DLAQR2 ==== */ + + return 0; +} /* dlaqr2_ */ + +/* Subroutine */ int dlaqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * + ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, + integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * + v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * + nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork) +{ + /* System generated locals */ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + + /* Local variables */ + static integer i__, j, k; + static doublereal s, aa, bb, cc, dd, cs, sn; + static integer jw; + static doublereal evi, evk, foo; + static integer kln; + static doublereal tau, ulp; + static integer lwk1, lwk2, lwk3; + static doublereal beta; + static integer kend, kcol, info, nmin, ifst, ilst, ltop, krow; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dgemm_(char *, char *, integer *, integer * + , integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static logical bulge; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer infqr, kwtop; + extern /* Subroutine */ int dlanv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), dlaqr4_( + logical *, logical *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), + dlabad_(doublereal *, doublereal *); + + extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *), dlahqr_(logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), dlacpy_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + static doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static doublereal safmax; + extern /* Subroutine */ int dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + dtrexc_(char *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *), + dormhr_(char *, char *, integer *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + static logical sorted; + static doublereal smlnum; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + -- June 2010 -- + + + ****************************************************************** + Aggressive early deflation: + + This subroutine accepts as input an upper Hessenberg matrix + H and performs an orthogonal similarity transformation + designed to detect and deflate fully converged eigenvalues from + a trailing principal submatrix. On output H has been over- + written by a new Hessenberg matrix that is a perturbation of + an orthogonal similarity transformation of H. It is to be + hoped that the final version of H has many zero subdiagonal + entries. + + ****************************************************************** + WANTT (input) LOGICAL + If .TRUE., then the Hessenberg matrix H is fully updated + so that the quasi-triangular Schur factor may be + computed (in cooperation with the calling subroutine). + If .FALSE., then only enough of H is updated to preserve + the eigenvalues. + + WANTZ (input) LOGICAL + If .TRUE., then the orthogonal matrix Z is updated so + so that the orthogonal Schur factor may be computed + (in cooperation with the calling subroutine). + If .FALSE., then Z is not referenced. + + N (input) INTEGER + The order of the matrix H and (if WANTZ is .TRUE.) the + order of the orthogonal matrix Z. + + KTOP (input) INTEGER + It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. + KBOT and KTOP together determine an isolated block + along the diagonal of the Hessenberg matrix. + + KBOT (input) INTEGER + It is assumed without a check that either + KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together + determine an isolated block along the diagonal of the + Hessenberg matrix. + + NW (input) INTEGER + Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). + + H (input/output) DOUBLE PRECISION array, dimension (LDH,N) + On input the initial N-by-N section of H stores the + Hessenberg matrix undergoing aggressive early deflation. + On output H has been transformed by an orthogonal + similarity transformation, perturbed, and the returned + to Hessenberg form that (it is to be hoped) has some + zero subdiagonal entries. + + LDH (input) integer + Leading dimension of H just as declared in the calling + subroutine. N .LE. LDH + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) + IF WANTZ is .TRUE., then on output, the orthogonal + similarity transformation mentioned above has been + accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ is .FALSE., then Z is unreferenced. + + LDZ (input) integer + The leading dimension of Z just as declared in the + calling subroutine. 1 .LE. LDZ. + + NS (output) integer + The number of unconverged (ie approximate) eigenvalues + returned in SR and SI that may be used as shifts by the + calling subroutine. + + ND (output) integer + The number of converged eigenvalues uncovered by this + subroutine. + + SR (output) DOUBLE PRECISION array, dimension (KBOT) + SI (output) DOUBLE PRECISION array, dimension (KBOT) + On output, the real and imaginary parts of approximate + eigenvalues that may be used for shifts are stored in + SR(KBOT-ND-NS+1) through SR(KBOT-ND) and + SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. + The real and imaginary parts of converged eigenvalues + are stored in SR(KBOT-ND+1) through SR(KBOT) and + SI(KBOT-ND+1) through SI(KBOT), respectively. + + V (workspace) DOUBLE PRECISION array, dimension (LDV,NW) + An NW-by-NW work array. + + LDV (input) integer scalar + The leading dimension of V just as declared in the + calling subroutine. NW .LE. LDV + + NH (input) integer scalar + The number of columns of T. NH.GE.NW. + + T (workspace) DOUBLE PRECISION array, dimension (LDT,NW) + + LDT (input) integer + The leading dimension of T just as declared in the + calling subroutine. NW .LE. LDT + + NV (input) integer + The number of rows of work array WV available for + workspace. NV.GE.NW. + + WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW) + + LDWV (input) integer + The leading dimension of W just as declared in the + calling subroutine. NW .LE. LDV + + WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) + On exit, WORK(1) is set to an estimate of the optimal value + of LWORK for the given values of N, NW, KTOP and KBOT. + + LWORK (input) integer + The dimension of the work array WORK. LWORK = 2*NW + suffices, but greater efficiency may result from larger + values of LWORK. + + If LWORK = -1, then a workspace query is assumed; DLAQR3 + only estimates the optimal workspace size for the given + values of N, NW, KTOP and KBOT. The estimate is returned + in WORK(1). No error message related to LWORK is issued + by XERBLA. Neither H nor Z are accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + + ==== Estimate optimal workspace. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + + /* Function Body */ +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + +/* ==== Workspace query call to DGEHRD ==== */ + + i__1 = jw - 1; + dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & + c_n1, &info); + lwk1 = (integer) work[1]; + +/* ==== Workspace query call to DORMHR ==== */ + + i__1 = jw - 1; + dormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[1], &c_n1, &info); + lwk2 = (integer) work[1]; + +/* ==== Workspace query call to DLAQR4 ==== */ + + dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[1], + &si[1], &c__1, &jw, &v[v_offset], ldv, &work[1], &c_n1, & + infqr); + lwk3 = (integer) work[1]; + +/* + ==== Optimal workspace ==== + + Computing MAX +*/ + i__1 = jw + max(lwk1,lwk2); + lwkopt = max(i__1,lwk3); + } + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (doublereal) lwkopt; + return 0; + } + +/* + ==== Nothing to do ... + ... for an empty active block ... ==== +*/ + *ns = 0; + *nd = 0; + work[1] = 1.; + if (*ktop > *kbot) { + return 0; + } +/* ... nor for an empty deflation window. ==== */ + if (*nw < 1) { + return 0; + } + +/* ==== Machine constants ==== */ + + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = PRECISION; + smlnum = safmin * ((doublereal) (*n) / ulp); + +/* + ==== Setup deflation window ==== + + Computing MIN +*/ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + + if (*kbot == kwtop) { + +/* ==== 1-by-1 deflation window: not much to do ==== */ + + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.; + *ns = 1; + *nd = 0; +/* Computing MAX */ + d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs( + d__1)); + if (abs(s) <= max(d__2,d__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.; + } + } + work[1] = 1.; + return 0; + } + +/* + ==== Convert to spike-triangular form. (In case of a + . rare QR failure, this routine continues to do + . aggressive early deflation using that part of + . the deflation window that converged using INFQR + . here and there to keep track.) ==== +*/ + + dlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], + ldt); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & + i__3); + + dlaset_("A", &jw, &jw, &c_b29, &c_b15, &v[v_offset], ldv); + nmin = ilaenv_(&c__12, "DLAQR3", "SV", &jw, &c__1, &jw, lwork, (ftnlen)6, + (ftnlen)2); + if (jw > nmin) { + dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[ + kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &work[1], + lwork, &infqr); + } else { + dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[ + kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr); + } + +/* ==== DTREXC needs a clean margin near the diagonal ==== */ + + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.; + t[j + 3 + j * t_dim1] = 0.; +/* L10: */ + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.; + } + +/* ==== Deflation detection loop ==== */ + + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; + } + +/* ==== Small spike tip test for deflation ==== */ + + if (! bulge) { + +/* ==== Real eigenvalue ==== */ + + foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); + if (foo == 0.) { + foo = abs(s); + } +/* Computing MAX */ + d__2 = smlnum, d__3 = ulp * foo; + if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2,d__3)) + { + +/* ==== Deflatable ==== */ + + --(*ns); + } else { + +/* + ==== Undeflatable. Move it up out of the way. + . (DTREXC can not fail in this case.) ==== +*/ + + ifst = *ns; + dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ++ilst; + } + } else { + +/* ==== Complex conjugate pair ==== */ + + foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + sqrt((d__1 = t[* + ns + (*ns - 1) * t_dim1], abs(d__1))) * sqrt((d__2 = t[* + ns - 1 + *ns * t_dim1], abs(d__2))); + if (foo == 0.) { + foo = abs(s); + } +/* Computing MAX */ + d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), d__4 = (d__2 = + s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); +/* Computing MAX */ + d__5 = smlnum, d__6 = ulp * foo; + if (max(d__3,d__4) <= max(d__5,d__6)) { + +/* ==== Deflatable ==== */ + + *ns += -2; + } else { + +/* + ==== Undeflatable. Move them up out of the way. + . Fortunately, DTREXC does the right thing with + . ILST in case of a rare exchange failure. ==== +*/ + + ifst = *ns; + dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ilst += 2; + } + } + +/* ==== End deflation detection loop ==== */ + + goto L20; + } + +/* ==== Return to Hessenberg form ==== */ + + if (*ns == 0) { + s = 0.; + } + + if (*ns < jw) { + +/* + ==== sorting diagonal blocks of T improves accuracy for + . graded matrices. Bubble sort deals well with + . exchange failures. ==== +*/ + + sorted = FALSE_; + i__ = *ns + 1; +L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } +L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); + } else { + evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + sqrt((d__1 = + t[i__ + 1 + i__ * t_dim1], abs(d__1))) * sqrt((d__2 = + t[i__ + (i__ + 1) * t_dim1], abs(d__2))); + } + + if (k == kend) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else if (t[k + 1 + k * t_dim1] == 0.) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else { + evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + sqrt((d__1 = t[ + k + 1 + k * t_dim1], abs(d__1))) * sqrt((d__2 = t[k + + (k + 1) * t_dim1], abs(d__2))); + } + + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; +L50: + ; + } + +/* ==== Restore shift/eigenvalue array from T ==== */ + + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ + - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, & + sn); + i__ += -2; + } + goto L60; + } + + if (*ns < jw || s == 0.) { + if (*ns > 1 && s != 0.) { + +/* ==== Reflect spike back into lower triangle ==== */ + + dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + dlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.; + + i__1 = jw - 2; + i__2 = jw - 2; + dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &t[t_dim1 + 3], ldt); + + dlarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + dlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + dlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & + work[jw + 1]); + + i__1 = *lwork - jw; + dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] + , &i__1, &info); + } + +/* ==== Copy updated reduced window into place ==== */ + + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + dlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] + , ldh); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], + &i__3); + +/* + ==== Accumulate orthogonal matrix in order update + . H and Z, if requested. ==== +*/ + + if (*ns > 1 && s != 0.) { + i__1 = *lwork - jw; + dormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[jw + 1], &i__1, &info); + } + +/* ==== Update vertical slab in H ==== */ + + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3,i__4); + dgemm_("N", "N", &kln, &jw, &jw, &c_b15, &h__[krow + kwtop * + h_dim1], ldh, &v[v_offset], ldv, &c_b29, &wv[wv_offset], + ldwv); + dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * + h_dim1], ldh); +/* L70: */ + } + +/* ==== Update horizontal slab in H ==== */ + + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; + kcol += i__1) { +/* Computing MIN */ + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3,i__4); + dgemm_("C", "N", &jw, &kln, &jw, &c_b15, &v[v_offset], ldv, & + h__[kwtop + kcol * h_dim1], ldh, &c_b29, &t[t_offset], + ldt); + dlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * + h_dim1], ldh); +/* L80: */ + } + } + +/* ==== Update vertical slab in Z ==== */ + + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3,i__4); + dgemm_("N", "N", &kln, &jw, &jw, &c_b15, &z__[krow + kwtop * + z_dim1], ldz, &v[v_offset], ldv, &c_b29, &wv[ + wv_offset], ldwv); + dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + + kwtop * z_dim1], ldz); +/* L90: */ + } + } + } + +/* ==== Return the number of deflations ... ==== */ + + *nd = jw - *ns; + +/* + ==== ... and the number of shifts. (Subtracting + . INFQR from the spike length takes care + . of the case of a rare QR failure while + . calculating eigenvalues of the deflation + . window.) ==== +*/ + + *ns -= infqr; + +/* ==== Return optimal workspace. ==== */ + + work[1] = (doublereal) lwkopt; + +/* ==== End of DLAQR3 ==== */ + + return 0; +} /* dlaqr3_ */ + +/* Subroutine */ int dlaqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + static integer i__, k; + static doublereal aa, bb, cc, dd; + static integer ld; + static doublereal cs; + static integer nh, it, ks, kt; + static doublereal sn; + static integer ku, kv, ls, ns; + static doublereal ss; + static integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, + kbot, nmin; + static doublereal swap; + static integer ktop; + static doublereal zdum[1] /* was [1][1] */; + static integer kacc22, itmax, nsmax, nwmax, kwtop; + extern /* Subroutine */ int dlaqr2_(logical *, logical *, integer *, + integer *, integer *, integer *, doublereal *, integer *, integer + *, integer *, doublereal *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *), dlanv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), dlaqr5_( + logical *, logical *, integer *, integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *); + static integer nibble; + extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), dlacpy_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static char jbcmpz[2]; + static integer nwupbd; + static logical sorted; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + This subroutine implements one level of recursion for DLAQR0. + It is a complete implementation of the small bulge multi-shift + QR algorithm. It may be called by DLAQR0 and, for large enough + deflation window size, it may be called by DLAQR3. This + subroutine is identical to DLAQR0 except that it calls DLAQR2 + instead of DLAQR3. + + Purpose + ======= + + DLAQR4 computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**T, where T is an upper quasi-triangular matrix (the + Schur form), and Z is the orthogonal matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input orthogonal + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, + H(ILO,ILO-1) is zero. ILO and IHI are normally set by a + previous call to DGEBAL, and then passed to DGEHRD when the + matrix output by DGEBAL is reduced to Hessenberg form. + Otherwise, ILO and IHI should be set to 1 and N, + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) DOUBLE PRECISION array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and WANTT is .TRUE., then H contains + the upper quasi-triangular matrix T from the Schur + decomposition (the Schur form); 2-by-2 diagonal blocks + (corresponding to complex conjugate pairs of eigenvalues) + are returned in standard form, with H(i,i) = H(i+1,i+1) + and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is + .FALSE., then the contents of H are unspecified on exit. + (The output value of H when INFO.GT.0 is given under the + description of INFO below.) + + This subroutine may explicitly set H(i,j) = 0 for i.GT.j and + j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + WR (output) DOUBLE PRECISION array, dimension (IHI) + WI (output) DOUBLE PRECISION array, dimension (IHI) + The real and imaginary parts, respectively, of the computed + eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) + and WI(ILO:IHI). If two eigenvalues are computed as a + complex conjugate pair, they are stored in consecutive + elements of WR and WI, say the i-th and (i+1)th, with + WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then + the eigenvalues are stored in the same order as on the + diagonal of the Schur form returned in H, with + WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal + block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and + WI(i+1) = -WI(i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ,IHI) + If WANTZ is .FALSE., then Z is not referenced. + If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is + replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the + orthogonal Schur factor of H(ILO:IHI,ILO:IHI). + (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if WANTZ is .TRUE. + then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. + + WORK (workspace/output) DOUBLE PRECISION array, dimension LWORK + On exit, if LWORK = -1, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient, but LWORK typically as large as 6*N may + be required for optimal performance. A workspace query + to determine the optimal workspace size is recommended. + + If LWORK = -1, then DLAQR4 does a workspace query. + In this case, DLAQR4 checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: if INFO = i, DLAQR4 failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and WANT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is an orthogonal matrix. The final + value of H is upper Hessenberg and quasi-triangular + in rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + + (final value of Z(ILO:IHI,ILOZ:IHIZ) + = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U + + where U is the orthogonal matrix in (*) (regard- + less of the value of WANTT.) + + If INFO .GT. 0 and WANTZ is .FALSE., then Z is not + accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . DLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== Exceptional deflation windows: try to cure rare + . slow convergence by varying the size of the + . deflation window after KEXNW iterations. ==== + + ==== Exceptional shifts: try to cure rare slow convergence + . with ad-hoc exceptional shifts every KEXSH iterations. + . ==== + + ==== The constants WILK1 and WILK2 are used to form the + . exceptional shifts. ==== +*/ + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + +/* ==== Quick return for N = 0: nothing to do. ==== */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + + if (*n <= 11) { + +/* ==== Tiny matrices must use DLAHQR. ==== */ + + lwkopt = 1; + if (*lwork != -1) { + dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], & + wi[1], iloz, ihiz, &z__[z_offset], ldz, info); + } + } else { + +/* + ==== Use small bulge multi-shift QR with aggressive early + . deflation on larger-than-tiny matrices. ==== + + ==== Hope for the best. ==== +*/ + + *info = 0; + +/* ==== Set up job flags for ILAENV. ==== */ + + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + +/* + ==== NWR = recommended deflation window size. At this + . point, N .GT. NTINY = 11, so there is enough + . subdiagonal workspace for NWR.GE.2 as required. + . (In fact, there is enough subdiagonal space for + . NWR.GE.3.) ==== +*/ + + nwr = ilaenv_(&c__13, "DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nwr = max(2,nwr); +/* Computing MIN */ + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); + nwr = min(i__1,nwr); + +/* + ==== NSR = recommended number of simultaneous shifts. + . At this point N .GT. NTINY = 11, so there is at + . enough subdiagonal workspace for NSR to be even + . and greater than or equal to two as required. ==== +*/ + + nsr = ilaenv_(&c__15, "DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); +/* Computing MIN */ + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - + *ilo; + nsr = min(i__1,i__2); +/* Computing MAX */ + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1,i__2); + +/* + ==== Estimate optimal workspace ==== + + ==== Workspace query call to DLAQR2 ==== +*/ + + i__1 = nwr + 1; + dlaqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, + ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[ + h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], + ldh, &work[1], &c_n1); + +/* + ==== Optimal workspace = MAX(DLAQR5, DLAQR2) ==== + + Computing MAX +*/ + i__1 = nsr * 3 / 2, i__2 = (integer) work[1]; + lwkopt = max(i__1,i__2); + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (doublereal) lwkopt; + return 0; + } + +/* ==== DLAHQR/DLAQR0 crossover point ==== */ + + nmin = ilaenv_(&c__12, "DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen) + 6, (ftnlen)2); + nmin = max(11,nmin); + +/* ==== Nibble crossover point ==== */ + + nibble = ilaenv_(&c__14, "DLAQR4", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + nibble = max(0,nibble); + +/* + ==== Accumulate reflections during ttswp? Use block + . 2-by-2 structure during matrix-matrix multiply? ==== +*/ + + kacc22 = ilaenv_(&c__16, "DLAQR4", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + kacc22 = max(0,kacc22); + kacc22 = min(2,kacc22); + +/* + ==== NWMAX = the largest possible deflation window for + . which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1,i__2); + nw = nwmax; + +/* + ==== NSMAX = the Largest number of simultaneous shifts + . for which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1,i__2); + nsmax -= nsmax % 2; + +/* ==== NDFL: an iteration count restarted at deflation. ==== */ + + ndfl = 1; + +/* + ==== ITMAX = iteration limit ==== + + Computing MAX +*/ + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1,i__2) * 30; + +/* ==== Last row and column in the active block ==== */ + + kbot = *ihi; + +/* ==== Main Loop ==== */ + + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + +/* ==== Done when KBOT falls below ILO ==== */ + + if (kbot < *ilo) { + goto L90; + } + +/* ==== Locate active block ==== */ + + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.) { + goto L20; + } +/* L10: */ + } + k = *ilo; +L20: + ktop = k; + +/* + ==== Select deflation window size: + . Typical Case: + . If possible and advisable, nibble the entire + . active block. If not, use size MIN(NWR,NWMAX) + . or MIN(NWR+1,NWMAX) depending upon which has + . the smaller corresponding subdiagonal entry + . (a heuristic). + . + . Exceptional Case: + . If there have been no deflations in KEXNW or + . more iterations, then vary the deflation window + . size. At first, because, larger windows are, + . in general, more powerful than smaller ones, + . rapidly increase the window to the maximum possible. + . Then, gradually reduce the window size. ==== +*/ + + nh = kbot - ktop + 1; + nwupbd = min(nh,nwmax); + if (ndfl < 5) { + nw = min(nwupbd,nwr); + } else { +/* Computing MIN */ + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2,i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) + > (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], + abs(d__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + +/* + ==== Aggressive early deflation: + . split workspace under the subdiagonal into + . - an nw-by-nw work array V in the lower + . left-hand-corner, + . - an NW-by-at-least-NW-but-more-is-better + . (NW-by-NHO) horizontal work array along + . the bottom edge, + . - an at-least-NW-but-more-is-better (NHV-by-NW) + . vertical work array along the left-hand-edge. + . ==== +*/ + + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + +/* ==== Aggressive early deflation ==== */ + + dlaqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, + iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], + &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + +/* ==== Adjust KBOT accounting for new deflations. ==== */ + + kbot -= ld; + +/* ==== KS points to the shifts. ==== */ + + ks = kbot - ls + 1; + +/* + ==== Skip an expensive QR sweep if there is a (partly + . heuristic) reason to expect that many eigenvalues + . will deflate without it. Here, the QR sweep is + . skipped if many eigenvalues have just been deflated + . or if the remaining active block is small. +*/ + + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( + nmin,nwmax)) { + +/* + ==== NS = nominal number of simultaneous shifts. + . This may be lowered (slightly) if DLAQR2 + . did not provide that many shifts. ==== + + Computing MIN + Computing MAX +*/ + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); + ns = min(i__2,i__3); + ns -= ns % 2; + +/* + ==== If there have been no deflations + . in a multiple of KEXSH iterations, + . then try exceptional shifts. + . Otherwise use shifts provided by + . DLAQR2 above or from the eigenvalues + . of a trailing principal submatrix. ==== +*/ + + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; +/* Computing MAX */ + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3,i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], + abs(d__2)); + aa = ss * .75 + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375; + dd = aa; + dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1] + , &wr[i__], &wi[i__], &cs, &sn); +/* L30: */ + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + +/* + ==== Got NS/2 or fewer shifts? Use DLAHQR + . on a trailing principal submatrix to + . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, + . there is enough space below the subdiagonal + . to fit an NS-by-NS scratch array.) ==== +*/ + + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + dlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & + h__[kt + h_dim1], ldh); + dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + + h_dim1], ldh, &wr[ks], &wi[ks], &c__1, & + c__1, zdum, &c__1, &inf); + ks += inf; + +/* + ==== In case of a rare QR failure use + . eigenvalues of the trailing 2-by-2 + . principal submatrix. ==== +*/ + + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[ + kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn) + ; + ks = kbot - 1; + } + } + + if (kbot - ks + 1 > ns) { + +/* + ==== Sort the shifts (Helps a little) + . Bubble sort keeps complex conjugate + . pairs together. ==== +*/ + + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[ + i__], abs(d__2)) < (d__3 = wr[i__ + 1] + , abs(d__3)) + (d__4 = wi[i__ + 1], + abs(d__4))) { + sorted = FALSE_; + + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } +/* L40: */ + } +/* L50: */ + } +L60: + ; + } + +/* + ==== Shuffle shifts into pairs of real shifts + . and pairs of complex conjugate shifts + . assuming complex conjugate shifts are + . already adjacent to one another. (Yes, + . they are.) ==== +*/ + + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } +/* L70: */ + } + } + +/* + ==== If there are only two shifts and both are + . real, then use only one. ==== +*/ + + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.) { + if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs( + d__1)) < (d__2 = wr[kbot - 1] - h__[kbot + + kbot * h_dim1], abs(d__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + +/* + ==== Use up to NS of the the smallest magnatiude + . shifts. If there aren't NS shifts available, + . then use them all, possibly dropping one to + . make the number of shifts even. ==== + + Computing MIN +*/ + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2,i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + +/* + ==== Small-bulge multi-shift QR sweep: + . split workspace under the subdiagonal into + . - a KDU-by-KDU work array U in the lower + . left-hand-corner, + . - a KDU-by-at-least-KDU-but-more-is-better + . (KDU-by-NHo) horizontal work array WH along + . the bottom edge, + . - and an at-least-KDU-but-more-is-better-by-KDU + . (NVE-by-KDU) vertical work WV arrow along + . the left-hand-edge. ==== +*/ + + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + +/* ==== Small-bulge multi-shift QR sweep ==== */ + + dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], + &wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[ + z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku + + kwh * h_dim1], ldh); + } + +/* ==== Note progress (or the lack of it). ==== */ + + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + +/* + ==== End of main loop ==== + L80: +*/ + } + +/* + ==== Iteration limit exceeded. Set INFO to show where + . the problem occurred and exit. ==== +*/ + + *info = kbot; +L90: + ; + } + +/* ==== Return the optimal value of LWORK. ==== */ + + work[1] = (doublereal) lwkopt; + +/* ==== End of DLAQR4 ==== */ + + return 0; +} /* dlaqr4_ */ + +/* Subroutine */ int dlaqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, doublereal + *sr, doublereal *si, doublereal *h__, integer *ldh, integer *iloz, + integer *ihiz, doublereal *z__, integer *ldz, doublereal *v, integer * + ldv, doublereal *u, integer *ldu, integer *nv, doublereal *wv, + integer *ldwv, integer *nh, doublereal *wh, integer *ldwh) +{ + /* System generated locals */ + integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1, + wh_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3, + i__4, i__5, i__6, i__7; + doublereal d__1, d__2, d__3, d__4, d__5; + + /* Local variables */ + static integer i__, j, k, m, i2, j2, i4, j4, k1; + static doublereal h11, h12, h21, h22; + static integer m22, ns, nu; + static doublereal vt[3], scl; + static integer kdu, kms; + static doublereal ulp; + static integer knz, kzs; + static doublereal tst1, tst2, beta; + static logical blk22, bmp22; + static integer mend, jcol, jlen, jbot, mbot; + static doublereal swap; + static integer jtop, jrow, mtop; + static doublereal alpha; + static logical accum; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer ndcol, incol, krcol, nbmps; + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), dlaqr1_( + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), dlabad_(doublereal *, + doublereal *); + + extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *), dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *); + static doublereal safmin; + extern /* Subroutine */ int dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *); + static doublereal safmax, refsum; + static integer mstart; + static doublereal smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + This auxiliary subroutine called by DLAQR0 performs a + single small-bulge multi-shift QR sweep. + + WANTT (input) logical scalar + WANTT = .true. if the quasi-triangular Schur factor + is being computed. WANTT is set to .false. otherwise. + + WANTZ (input) logical scalar + WANTZ = .true. if the orthogonal Schur factor is being + computed. WANTZ is set to .false. otherwise. + + KACC22 (input) integer with value 0, 1, or 2. + Specifies the computation mode of far-from-diagonal + orthogonal updates. + = 0: DLAQR5 does not accumulate reflections and does not + use matrix-matrix multiply to update far-from-diagonal + matrix entries. + = 1: DLAQR5 accumulates reflections and uses matrix-matrix + multiply to update the far-from-diagonal matrix entries. + = 2: DLAQR5 accumulates reflections, uses matrix-matrix + multiply to update the far-from-diagonal matrix entries, + and takes advantage of 2-by-2 block structure during + matrix multiplies. + + N (input) integer scalar + N is the order of the Hessenberg matrix H upon which this + subroutine operates. + + KTOP (input) integer scalar + KBOT (input) integer scalar + These are the first and last rows and columns of an + isolated diagonal block upon which the QR sweep is to be + applied. It is assumed without a check that + either KTOP = 1 or H(KTOP,KTOP-1) = 0 + and + either KBOT = N or H(KBOT+1,KBOT) = 0. + + NSHFTS (input) integer scalar + NSHFTS gives the number of simultaneous shifts. NSHFTS + must be positive and even. + + SR (input/output) DOUBLE PRECISION array of size (NSHFTS) + SI (input/output) DOUBLE PRECISION array of size (NSHFTS) + SR contains the real parts and SI contains the imaginary + parts of the NSHFTS shifts of origin that define the + multi-shift QR sweep. On output SR and SI may be + reordered. + + H (input/output) DOUBLE PRECISION array of size (LDH,N) + On input H contains a Hessenberg matrix. On output a + multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied + to the isolated diagonal block in rows and columns KTOP + through KBOT. + + LDH (input) integer scalar + LDH is the leading dimension of H just as declared in the + calling procedure. LDH.GE.MAX(1,N). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N + + Z (input/output) DOUBLE PRECISION array of size (LDZ,IHI) + If WANTZ = .TRUE., then the QR Sweep orthogonal + similarity transformation is accumulated into + Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ = .FALSE., then Z is unreferenced. + + LDZ (input) integer scalar + LDA is the leading dimension of Z just as declared in + the calling procedure. LDZ.GE.N. + + V (workspace) DOUBLE PRECISION array of size (LDV,NSHFTS/2) + + LDV (input) integer scalar + LDV is the leading dimension of V as declared in the + calling procedure. LDV.GE.3. + + U (workspace) DOUBLE PRECISION array of size + (LDU,3*NSHFTS-3) + + LDU (input) integer scalar + LDU is the leading dimension of U just as declared in the + in the calling subroutine. LDU.GE.3*NSHFTS-3. + + NH (input) integer scalar + NH is the number of columns in array WH available for + workspace. NH.GE.1. + + WH (workspace) DOUBLE PRECISION array of size (LDWH,NH) + + LDWH (input) integer scalar + Leading dimension of WH just as declared in the + calling procedure. LDWH.GE.3*NSHFTS-3. + + NV (input) integer scalar + NV is the number of rows in WV agailable for workspace. + NV.GE.1. + + WV (workspace) DOUBLE PRECISION array of size + (LDWV,3*NSHFTS-3) + + LDWV (input) integer scalar + LDWV is the leading dimension of WV as declared in the + in the calling subroutine. LDWV.GE.NV. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + Reference: + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and + Level 3 Performance, SIAM Journal of Matrix Analysis, + volume 23, pages 929--947, 2002. + + ================================================================ + + + ==== If there are no shifts, then there is nothing to do. ==== +*/ + + /* Parameter adjustments */ + --sr; + --si; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + wh_dim1 = *ldwh; + wh_offset = 1 + wh_dim1; + wh -= wh_offset; + + /* Function Body */ + if (*nshfts < 2) { + return 0; + } + +/* + ==== If the active block is empty or 1-by-1, then there + . is nothing to do. ==== +*/ + + if (*ktop >= *kbot) { + return 0; + } + +/* + ==== Shuffle shifts into pairs of real shifts and pairs + . of complex conjugate shifts assuming complex + . conjugate shifts are already adjacent to one + . another. ==== +*/ + + i__1 = *nshfts - 2; + for (i__ = 1; i__ <= i__1; i__ += 2) { + if (si[i__] != -si[i__ + 1]) { + + swap = sr[i__]; + sr[i__] = sr[i__ + 1]; + sr[i__ + 1] = sr[i__ + 2]; + sr[i__ + 2] = swap; + + swap = si[i__]; + si[i__] = si[i__ + 1]; + si[i__ + 1] = si[i__ + 2]; + si[i__ + 2] = swap; + } +/* L10: */ + } + +/* + ==== NSHFTS is supposed to be even, but if it is odd, + . then simply reduce it by one. The shuffle above + . ensures that the dropped shift is real and that + . the remaining shifts are paired. ==== +*/ + + ns = *nshfts - *nshfts % 2; + +/* ==== Machine constants for deflation ==== */ + + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = PRECISION; + smlnum = safmin * ((doublereal) (*n) / ulp); + +/* + ==== Use accumulated reflections to update far-from-diagonal + . entries ? ==== +*/ + + accum = *kacc22 == 1 || *kacc22 == 2; + +/* ==== If so, exploit the 2-by-2 block structure? ==== */ + + blk22 = ns > 2 && *kacc22 == 2; + +/* ==== clear trash ==== */ + + if (*ktop + 2 <= *kbot) { + h__[*ktop + 2 + *ktop * h_dim1] = 0.; + } + +/* ==== NBMPS = number of 2-shift bulges in the chain ==== */ + + nbmps = ns / 2; + +/* ==== KDU = width of slab ==== */ + + kdu = nbmps * 6 - 3; + +/* ==== Create and chase chains of NBMPS bulges ==== */ + + i__1 = *kbot - 2; + i__2 = nbmps * 3 - 2; + for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 : + incol <= i__1; incol += i__2) { + ndcol = incol + kdu; + if (accum) { + dlaset_("ALL", &kdu, &kdu, &c_b29, &c_b15, &u[u_offset], ldu); + } + +/* + ==== Near-the-diagonal bulge chase. The following loop + . performs the near-the-diagonal part of a small bulge + . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal + . chunk extends from column INCOL to column NDCOL + . (including both column INCOL and column NDCOL). The + . following loop chases a 3*NBMPS column long chain of + . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL + . may be less than KTOP and and NDCOL may be greater than + . KBOT indicating phantom columns from which to chase + . bulges before they are actually introduced or to which + . to chase bulges beyond column KBOT.) ==== + + Computing MIN +*/ + i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2; + i__3 = min(i__4,i__5); + for (krcol = incol; krcol <= i__3; ++krcol) { + +/* + ==== Bulges number MTOP to MBOT are active double implicit + . shift bulges. There may or may not also be small + . 2-by-2 bulge, if there is room. The inactive bulges + . (if any) must wait until the active bulges have moved + . down the diagonal to make room. The phantom matrix + . paradigm described above helps keep track. ==== + + Computing MAX +*/ + i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1; + mtop = max(i__4,i__5); +/* Computing MIN */ + i__4 = nbmps, i__5 = (*kbot - krcol) / 3; + mbot = min(i__4,i__5); + m22 = mbot + 1; + bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2; + +/* + ==== Generate reflections to chase the chain right + . one column. (The minimum value of K is KTOP-1.) ==== +*/ + + i__4 = mbot; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + if (k == *ktop - 1) { + dlaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &sr[(m + << 1) - 1], &si[(m << 1) - 1], &sr[m * 2], &si[m * + 2], &v[m * v_dim1 + 1]); + alpha = v[m * v_dim1 + 1]; + dlarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m * + v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + v[m * v_dim1 + 3] = h__[k + 3 + k * h_dim1]; + dlarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m * + v_dim1 + 1]); + +/* + ==== A Bulge may collapse because of vigilant + . deflation or destructive underflow. In the + . underflow case, try the two-small-subdiagonals + . trick to try to reinflate the bulge. ==== +*/ + + if (h__[k + 3 + k * h_dim1] != 0. || h__[k + 3 + (k + 1) * + h_dim1] != 0. || h__[k + 3 + (k + 2) * h_dim1] == + 0.) { + +/* ==== Typical case: not collapsed (yet). ==== */ + + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + } else { + +/* + ==== Atypical case: collapsed. Attempt to + . reintroduce ignoring H(K+1,K) and H(K+2,K). + . If the fill resulting from the new + . reflector is too large, then abandon it. + . Otherwise, use the new one. ==== +*/ + + dlaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, & + sr[(m << 1) - 1], &si[(m << 1) - 1], &sr[m * + 2], &si[m * 2], vt); + alpha = vt[0]; + dlarfg_(&c__3, &alpha, &vt[1], &c__1, vt); + refsum = vt[0] * (h__[k + 1 + k * h_dim1] + vt[1] * + h__[k + 2 + k * h_dim1]); + + if ((d__1 = h__[k + 2 + k * h_dim1] - refsum * vt[1], + abs(d__1)) + (d__2 = refsum * vt[2], abs(d__2) + ) > ulp * ((d__3 = h__[k + k * h_dim1], abs( + d__3)) + (d__4 = h__[k + 1 + (k + 1) * h_dim1] + , abs(d__4)) + (d__5 = h__[k + 2 + (k + 2) * + h_dim1], abs(d__5)))) { + +/* + ==== Starting a new bulge here would + . create non-negligible fill. Use + . the old one with trepidation. ==== +*/ + + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + } else { + +/* + ==== Stating a new bulge here would + . create only negligible fill. + . Replace the old reflector with + . the new one. ==== +*/ + + h__[k + 1 + k * h_dim1] -= refsum; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + v[m * v_dim1 + 1] = vt[0]; + v[m * v_dim1 + 2] = vt[1]; + v[m * v_dim1 + 3] = vt[2]; + } + } + } +/* L20: */ + } + +/* ==== Generate a 2-by-2 reflection, if needed. ==== */ + + k = krcol + (m22 - 1) * 3; + if (bmp22) { + if (k == *ktop - 1) { + dlaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[( + m22 << 1) - 1], &si[(m22 << 1) - 1], &sr[m22 * 2], + &si[m22 * 2], &v[m22 * v_dim1 + 1]); + beta = v[m22 * v_dim1 + 1]; + dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 + * v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m22 * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 + * v_dim1 + 1]); + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + } + } + +/* ==== Multiply H by reflections from the left ==== */ + + if (accum) { + jbot = min(ndcol,*kbot); + } else if (*wantt) { + jbot = *n; + } else { + jbot = *kbot; + } + i__4 = jbot; + for (j = max(*ktop,krcol); j <= i__4; ++j) { +/* Computing MIN */ + i__5 = mbot, i__6 = (j - krcol + 2) / 3; + mend = min(i__5,i__6); + i__5 = mend; + for (m = mtop; m <= i__5; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + v[ + m * v_dim1 + 2] * h__[k + 2 + j * h_dim1] + v[m * + v_dim1 + 3] * h__[k + 3 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m * v_dim1 + 2]; + h__[k + 3 + j * h_dim1] -= refsum * v[m * v_dim1 + 3]; +/* L30: */ + } +/* L40: */ + } + if (bmp22) { + k = krcol + (m22 - 1) * 3; +/* Computing MAX */ + i__4 = k + 1; + i__5 = jbot; + for (j = max(i__4,*ktop); j <= i__5; ++j) { + refsum = v[m22 * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + + v[m22 * v_dim1 + 2] * h__[k + 2 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; +/* L50: */ + } + } + +/* + ==== Multiply H by reflections from the right. + . Delay filling in the last row until the + . vigilant deflation check is complete. ==== +*/ + + if (accum) { + jtop = max(*ktop,incol); + } else if (*wantt) { + jtop = 1; + } else { + jtop = *ktop; + } + i__5 = mbot; + for (m = mtop; m <= i__5; ++m) { + if (v[m * v_dim1 + 1] != 0.) { + k = krcol + (m - 1) * 3; +/* Computing MIN */ + i__6 = *kbot, i__7 = k + 3; + i__4 = min(i__6,i__7); + for (j = jtop; j <= i__4; ++j) { + refsum = v[m * v_dim1 + 1] * (h__[j + (k + 1) * + h_dim1] + v[m * v_dim1 + 2] * h__[j + (k + 2) + * h_dim1] + v[m * v_dim1 + 3] * h__[j + (k + + 3) * h_dim1]); + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m * v_dim1 + + 2]; + h__[j + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + + 3]; +/* L60: */ + } + + if (accum) { + +/* + ==== Accumulate U. (If necessary, update Z later + . with with an efficient matrix-matrix + . multiply.) ==== +*/ + + kms = k - incol; +/* Computing MAX */ + i__4 = 1, i__6 = *ktop - incol; + i__7 = kdu; + for (j = max(i__4,i__6); j <= i__7; ++j) { + refsum = v[m * v_dim1 + 1] * (u[j + (kms + 1) * + u_dim1] + v[m * v_dim1 + 2] * u[j + (kms + + 2) * u_dim1] + v[m * v_dim1 + 3] * u[j + + (kms + 3) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m * + v_dim1 + 2]; + u[j + (kms + 3) * u_dim1] -= refsum * v[m * + v_dim1 + 3]; +/* L70: */ + } + } else if (*wantz) { + +/* + ==== U is not accumulated, so update Z + . now by multiplying by reflections + . from the right. ==== +*/ + + i__7 = *ihiz; + for (j = *iloz; j <= i__7; ++j) { + refsum = v[m * v_dim1 + 1] * (z__[j + (k + 1) * + z_dim1] + v[m * v_dim1 + 2] * z__[j + (k + + 2) * z_dim1] + v[m * v_dim1 + 3] * z__[ + j + (k + 3) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m * + v_dim1 + 2]; + z__[j + (k + 3) * z_dim1] -= refsum * v[m * + v_dim1 + 3]; +/* L80: */ + } + } + } +/* L90: */ + } + +/* ==== Special case: 2-by-2 reflection (if needed) ==== */ + + k = krcol + (m22 - 1) * 3; + if (bmp22 && v[m22 * v_dim1 + 1] != 0.) { +/* Computing MIN */ + i__7 = *kbot, i__4 = k + 3; + i__5 = min(i__7,i__4); + for (j = jtop; j <= i__5; ++j) { + refsum = v[m22 * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] + + v[m22 * v_dim1 + 2] * h__[j + (k + 2) * h_dim1]) + ; + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; +/* L100: */ + } + + if (accum) { + kms = k - incol; +/* Computing MAX */ + i__5 = 1, i__7 = *ktop - incol; + i__4 = kdu; + for (j = max(i__5,i__7); j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * (u[j + (kms + 1) * + u_dim1] + v[m22 * v_dim1 + 2] * u[j + (kms + + 2) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m22 * v_dim1 + + 2]; +/* L110: */ + } + } else if (*wantz) { + i__4 = *ihiz; + for (j = *iloz; j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * (z__[j + (k + 1) * + z_dim1] + v[m22 * v_dim1 + 2] * z__[j + (k + + 2) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m22 * v_dim1 + + 2]; +/* L120: */ + } + } + } + +/* ==== Vigilant deflation check ==== */ + + mstart = mtop; + if (krcol + (mstart - 1) * 3 < *ktop) { + ++mstart; + } + mend = mbot; + if (bmp22) { + ++mend; + } + if (krcol == *kbot - 2) { + ++mend; + } + i__4 = mend; + for (m = mstart; m <= i__4; ++m) { +/* Computing MIN */ + i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3; + k = min(i__5,i__7); + +/* + ==== The following convergence test requires that + . the tradition small-compared-to-nearby-diagonals + . criterion and the Ahues & Tisseur (LAWN 122, 1997) + . criteria both be satisfied. The latter improves + . accuracy in some examples. Falling back on an + . alternate convergence criterion when TST1 or TST2 + . is zero (as done here) is traditional but probably + . unnecessary. ==== +*/ + + if (h__[k + 1 + k * h_dim1] != 0.) { + tst1 = (d__1 = h__[k + k * h_dim1], abs(d__1)) + (d__2 = + h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + if (tst1 == 0.) { + if (k >= *ktop + 1) { + tst1 += (d__1 = h__[k + (k - 1) * h_dim1], abs( + d__1)); + } + if (k >= *ktop + 2) { + tst1 += (d__1 = h__[k + (k - 2) * h_dim1], abs( + d__1)); + } + if (k >= *ktop + 3) { + tst1 += (d__1 = h__[k + (k - 3) * h_dim1], abs( + d__1)); + } + if (k <= *kbot - 2) { + tst1 += (d__1 = h__[k + 2 + (k + 1) * h_dim1], + abs(d__1)); + } + if (k <= *kbot - 3) { + tst1 += (d__1 = h__[k + 3 + (k + 1) * h_dim1], + abs(d__1)); + } + if (k <= *kbot - 4) { + tst1 += (d__1 = h__[k + 4 + (k + 1) * h_dim1], + abs(d__1)); + } + } +/* Computing MAX */ + d__2 = smlnum, d__3 = ulp * tst1; + if ((d__1 = h__[k + 1 + k * h_dim1], abs(d__1)) <= max( + d__2,d__3)) { +/* Computing MAX */ + d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs( + d__2)); + h12 = max(d__3,d__4); +/* Computing MIN */ + d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs( + d__2)); + h21 = min(d__3,d__4); +/* Computing MAX */ + d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs( + d__1)), d__4 = (d__2 = h__[k + k * h_dim1] - + h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + h11 = max(d__3,d__4); +/* Computing MIN */ + d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs( + d__1)), d__4 = (d__2 = h__[k + k * h_dim1] - + h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + h22 = min(d__3,d__4); + scl = h11 + h12; + tst2 = h22 * (h11 / scl); + +/* Computing MAX */ + d__1 = smlnum, d__2 = ulp * tst2; + if (tst2 == 0. || h21 * (h12 / scl) <= max(d__1,d__2)) + { + h__[k + 1 + k * h_dim1] = 0.; + } + } + } +/* L130: */ + } + +/* + ==== Fill in the last row of each bulge. ==== + + Computing MIN +*/ + i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3; + mend = min(i__4,i__5); + i__4 = mend; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * v[m * v_dim1 + 3] * h__[k + 4 + ( + k + 3) * h_dim1]; + h__[k + 4 + (k + 1) * h_dim1] = -refsum; + h__[k + 4 + (k + 2) * h_dim1] = -refsum * v[m * v_dim1 + 2]; + h__[k + 4 + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3]; +/* L140: */ + } + +/* + ==== End of near-the-diagonal bulge chase. ==== + + L150: +*/ + } + +/* + ==== Use U (if accumulated) to update far-from-diagonal + . entries in H. If required, use U to update Z as + . well. ==== +*/ + + if (accum) { + if (*wantt) { + jtop = 1; + jbot = *n; + } else { + jtop = *ktop; + jbot = *kbot; + } + if (! blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) { + +/* + ==== Updates not exploiting the 2-by-2 block + . structure of U. K1 and NU keep track of + . the location and size of U in the special + . cases of introducing bulges and chasing + . bulges off the bottom. In these special + . cases and in case the number of shifts + . is NS = 2, there is no 2-by-2 block + . structure to exploit. ==== + + Computing MAX +*/ + i__3 = 1, i__4 = *ktop - incol; + k1 = max(i__3,i__4); +/* Computing MAX */ + i__3 = 0, i__4 = ndcol - *kbot; + nu = kdu - max(i__3,i__4) - k1 + 1; + +/* ==== Horizontal Multiply ==== */ + + i__3 = jbot; + i__4 = *nh; + for (jcol = min(ndcol,*kbot) + 1; i__4 < 0 ? jcol >= i__3 : + jcol <= i__3; jcol += i__4) { +/* Computing MIN */ + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5,i__7); + dgemm_("C", "N", &nu, &jlen, &nu, &c_b15, &u[k1 + k1 * + u_dim1], ldu, &h__[incol + k1 + jcol * h_dim1], + ldh, &c_b29, &wh[wh_offset], ldwh); + dlacpy_("ALL", &nu, &jlen, &wh[wh_offset], ldwh, &h__[ + incol + k1 + jcol * h_dim1], ldh); +/* L160: */ + } + +/* ==== Vertical multiply ==== */ + + i__4 = max(*ktop,incol) - 1; + i__3 = *nv; + for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; + jrow += i__3) { +/* Computing MIN */ + i__5 = *nv, i__7 = max(*ktop,incol) - jrow; + jlen = min(i__5,i__7); + dgemm_("N", "N", &jlen, &nu, &nu, &c_b15, &h__[jrow + ( + incol + k1) * h_dim1], ldh, &u[k1 + k1 * u_dim1], + ldu, &c_b29, &wv[wv_offset], ldwv); + dlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &h__[ + jrow + (incol + k1) * h_dim1], ldh); +/* L170: */ + } + +/* ==== Z multiply (also vertical) ==== */ + + if (*wantz) { + i__3 = *ihiz; + i__4 = *nv; + for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; + jrow += i__4) { +/* Computing MIN */ + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5,i__7); + dgemm_("N", "N", &jlen, &nu, &nu, &c_b15, &z__[jrow + + (incol + k1) * z_dim1], ldz, &u[k1 + k1 * + u_dim1], ldu, &c_b29, &wv[wv_offset], ldwv); + dlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &z__[ + jrow + (incol + k1) * z_dim1], ldz) + ; +/* L180: */ + } + } + } else { + +/* + ==== Updates exploiting U's 2-by-2 block structure. + . (I2, I4, J2, J4 are the last rows and columns + . of the blocks.) ==== +*/ + + i2 = (kdu + 1) / 2; + i4 = kdu; + j2 = i4 - i2; + j4 = kdu; + +/* + ==== KZS and KNZ deal with the band of zeros + . along the diagonal of one of the triangular + . blocks. ==== +*/ + + kzs = j4 - j2 - (ns + 1); + knz = ns + 1; + +/* ==== Horizontal multiply ==== */ + + i__4 = jbot; + i__3 = *nh; + for (jcol = min(ndcol,*kbot) + 1; i__3 < 0 ? jcol >= i__4 : + jcol <= i__4; jcol += i__3) { +/* Computing MIN */ + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5,i__7); + +/* + ==== Copy bottom of H to top+KZS of scratch ==== + (The first KZS rows get multiplied by zero.) ==== +*/ + + dlacpy_("ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol * + h_dim1], ldh, &wh[kzs + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U21' ==== */ + + dlaset_("ALL", &kzs, &jlen, &c_b29, &c_b29, &wh[wh_offset] + , ldwh); + dtrmm_("L", "U", "C", "N", &knz, &jlen, &c_b15, &u[j2 + 1 + + (kzs + 1) * u_dim1], ldu, &wh[kzs + 1 + wh_dim1] + , ldwh); + +/* ==== Multiply top of H by U11' ==== */ + + dgemm_("C", "N", &i2, &jlen, &j2, &c_b15, &u[u_offset], + ldu, &h__[incol + 1 + jcol * h_dim1], ldh, &c_b15, + &wh[wh_offset], ldwh); + +/* ==== Copy top of H to bottom of WH ==== */ + + dlacpy_("ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1] + , ldh, &wh[i2 + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U21' ==== */ + + dtrmm_("L", "L", "C", "N", &j2, &jlen, &c_b15, &u[(i2 + 1) + * u_dim1 + 1], ldu, &wh[i2 + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_("C", "N", &i__5, &jlen, &i__7, &c_b15, &u[j2 + 1 + + (i2 + 1) * u_dim1], ldu, &h__[incol + 1 + j2 + + jcol * h_dim1], ldh, &c_b15, &wh[i2 + 1 + wh_dim1] + , ldwh); + +/* ==== Copy it back ==== */ + + dlacpy_("ALL", &kdu, &jlen, &wh[wh_offset], ldwh, &h__[ + incol + 1 + jcol * h_dim1], ldh); +/* L190: */ + } + +/* ==== Vertical multiply ==== */ + + i__3 = max(incol,*ktop) - 1; + i__4 = *nv; + for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; + jrow += i__4) { +/* Computing MIN */ + i__5 = *nv, i__7 = max(incol,*ktop) - jrow; + jlen = min(i__5,i__7); + +/* + ==== Copy right of H to scratch (the first KZS + . columns get multiplied by zero) ==== +*/ + + dlacpy_("ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) * + h_dim1], ldh, &wv[(kzs + 1) * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U21 ==== */ + + dlaset_("ALL", &jlen, &kzs, &c_b29, &c_b29, &wv[wv_offset] + , ldwv); + dtrmm_("R", "U", "N", "N", &jlen, &knz, &c_b15, &u[j2 + 1 + + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Multiply by U11 ==== */ + + dgemm_("N", "N", &jlen, &i2, &j2, &c_b15, &h__[jrow + ( + incol + 1) * h_dim1], ldh, &u[u_offset], ldu, & + c_b15, &wv[wv_offset], ldwv) + ; + +/* ==== Copy left of H to right of scratch ==== */ + + dlacpy_("ALL", &jlen, &j2, &h__[jrow + (incol + 1) * + h_dim1], ldh, &wv[(i2 + 1) * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U21 ==== */ + + i__5 = i4 - i2; + dtrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b15, &u[(i2 + + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1] + , ldwv); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_("N", "N", &jlen, &i__5, &i__7, &c_b15, &h__[jrow + + (incol + 1 + j2) * h_dim1], ldh, &u[j2 + 1 + (i2 + + 1) * u_dim1], ldu, &c_b15, &wv[(i2 + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Copy it back ==== */ + + dlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &h__[ + jrow + (incol + 1) * h_dim1], ldh); +/* L200: */ + } + +/* ==== Multiply Z (also vertical) ==== */ + + if (*wantz) { + i__4 = *ihiz; + i__3 = *nv; + for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; + jrow += i__3) { +/* Computing MIN */ + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5,i__7); + +/* + ==== Copy right of Z to left of scratch (first + . KZS columns get multiplied by zero) ==== +*/ + + dlacpy_("ALL", &jlen, &knz, &z__[jrow + (incol + 1 + + j2) * z_dim1], ldz, &wv[(kzs + 1) * wv_dim1 + + 1], ldwv); + +/* ==== Multiply by U12 ==== */ + + dlaset_("ALL", &jlen, &kzs, &c_b29, &c_b29, &wv[ + wv_offset], ldwv); + dtrmm_("R", "U", "N", "N", &jlen, &knz, &c_b15, &u[j2 + + 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) + * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U11 ==== */ + + dgemm_("N", "N", &jlen, &i2, &j2, &c_b15, &z__[jrow + + (incol + 1) * z_dim1], ldz, &u[u_offset], ldu, + &c_b15, &wv[wv_offset], ldwv); + +/* ==== Copy left of Z to right of scratch ==== */ + + dlacpy_("ALL", &jlen, &j2, &z__[jrow + (incol + 1) * + z_dim1], ldz, &wv[(i2 + 1) * wv_dim1 + 1], + ldwv); + +/* ==== Multiply by U21 ==== */ + + i__5 = i4 - i2; + dtrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b15, &u[( + i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_("N", "N", &jlen, &i__5, &i__7, &c_b15, &z__[ + jrow + (incol + 1 + j2) * z_dim1], ldz, &u[j2 + + 1 + (i2 + 1) * u_dim1], ldu, &c_b15, &wv[( + i2 + 1) * wv_dim1 + 1], ldwv); + +/* ==== Copy the result back to Z ==== */ + + dlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, & + z__[jrow + (incol + 1) * z_dim1], ldz); +/* L210: */ + } + } + } + } +/* L220: */ + } + +/* ==== End of DLAQR5 ==== */ + + return 0; +} /* dlaqr5_ */ + +/* Subroutine */ int dlarf_(char *side, integer *m, integer *n, doublereal *v, + integer *incv, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work) +{ + /* System generated locals */ + integer c_dim1, c_offset; + doublereal d__1; + + /* Local variables */ + static integer i__; + static logical applyleft; + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + static integer lastc, lastv; + extern integer iladlc_(integer *, integer *, doublereal *, integer *), + iladlr_(integer *, integer *, doublereal *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLARF applies a real elementary reflector H to a real m by n matrix + C, from either the left or the right. H is represented in the form + + H = I - tau * v * v' + + where tau is a real scalar and v is a real vector. + + If tau = 0, then H is taken to be the unit matrix. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) DOUBLE PRECISION array, dimension + (1 + (M-1)*abs(INCV)) if SIDE = 'L' + or (1 + (N-1)*abs(INCV)) if SIDE = 'R' + The vector v in the representation of H. V is not used if + TAU = 0. + + INCV (input) INTEGER + The increment between elements of v. INCV <> 0. + + TAU (input) DOUBLE PRECISION + The value tau in the representation of H. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + lastv = 0; + lastc = 0; + if (*tau != 0.) { +/* + Set up variables for scanning V. LASTV begins pointing to the end + of V. +*/ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ + while(lastv > 0 && v[i__] == 0.) { + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = iladlc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = iladlr_(m, &lastv, &c__[c_offset], ldc); + } + } +/* + Note that lastc.eq.0 renders the BLAS operations null; no special + case is needed at this level. +*/ + if (applyleft) { + +/* Form H * C */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */ + + dgemv_("Transpose", &lastv, &lastc, &c_b15, &c__[c_offset], ldc, & + v[1], incv, &c_b29, &work[1], &c__1); + +/* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */ + + d__1 = -(*tau); + dger_(&lastv, &lastc, &d__1, &v[1], incv, &work[1], &c__1, &c__[ + c_offset], ldc); + } + } else { + +/* Form C * H */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ + + dgemv_("No transpose", &lastc, &lastv, &c_b15, &c__[c_offset], + ldc, &v[1], incv, &c_b29, &work[1], &c__1); + +/* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */ + + d__1 = -(*tau); + dger_(&lastc, &lastv, &d__1, &work[1], &c__1, &v[1], incv, &c__[ + c_offset], ldc); + } + } + return 0; + +/* End of DLARF */ + +} /* dlarf_ */ + +/* Subroutine */ int dlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublereal *v, integer * + ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, + doublereal *work, integer *ldwork) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + static integer lastc; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + static integer lastv; + extern integer iladlc_(integer *, integer *, doublereal *, integer *), + iladlr_(integer *, integer *, doublereal *, integer *); + static char transt[1]; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLARFB applies a real block reflector H or its transpose H' to a + real m by n matrix C, from either the left or the right. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply H or H' from the Left + = 'R': apply H or H' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply H (No transpose) + = 'T': apply H' (Transpose) + + DIRECT (input) CHARACTER*1 + Indicates how H is formed from a product of elementary + reflectors + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Indicates how the vectors which define the elementary + reflectors are stored: + = 'C': Columnwise + = 'R': Rowwise + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + K (input) INTEGER + The order of the matrix T (= the number of elementary + reflectors whose product defines the block reflector). + + V (input) DOUBLE PRECISION array, dimension + (LDV,K) if STOREV = 'C' + (LDV,M) if STOREV = 'R' and SIDE = 'L' + (LDV,N) if STOREV = 'R' and SIDE = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); + if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); + if STOREV = 'R', LDV >= K. + + T (input) DOUBLE PRECISION array, dimension (LDT,K) + The triangular k by k matrix T in the representation of the + block reflector. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by H*C or H'*C or C*H or C*H'. + + LDC (input) INTEGER + The leading dimension of the array C. LDA >= max(1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) + + LDWORK (input) INTEGER + The leading dimension of the array WORK. + If SIDE = 'L', LDWORK >= max(1,N); + if SIDE = 'R', LDWORK >= max(1,M). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (lsame_(trans, "N")) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (lsame_(storev, "C")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 ) (first K rows) + ( V2 ) + where V1 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = iladlr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2 */ + + i__1 = lastv - *k; + dgemm_("Transpose", "No transpose", &lastc, k, &i__1, & + c_b15, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + + v_dim1], ldv, &c_b15, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b15, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (lastv > *k) { + +/* C2 := C2 - V2 * W' */ + + i__1 = lastv - *k; + dgemm_("No transpose", "Transpose", &i__1, &lastc, k, & + c_b151, &v[*k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork, &c_b15, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1' */ + + dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L20: */ + } +/* L30: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = iladlr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2 */ + + i__1 = lastv - *k; + dgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b15, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b15, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2' */ + + i__1 = lastv - *k; + dgemm_("No transpose", "Transpose", &lastc, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[*k + 1 + + v_dim1], ldv, &c_b15, &c__[(*k + 1) * c_dim1 + 1], + ldc); + } + +/* W := W * V1' */ + + dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* + Let V = ( V1 ) + ( V2 ) (last K rows) + where V2 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = iladlr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1 */ + + i__1 = lastv - *k; + dgemm_("Transpose", "No transpose", &lastc, k, &i__1, & + c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b15, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b15, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1 * W' */ + + i__1 = lastv - *k; + dgemm_("No transpose", "Transpose", &i__1, &lastc, k, & + c_b151, &v[v_offset], ldv, &work[work_offset], + ldwork, &c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L80: */ + } +/* L90: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = iladlr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(&lastc, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, & + work[j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1 */ + + i__1 = lastv - *k; + dgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b15, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b15, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1' */ + + i__1 = lastv - *k; + dgemm_("No transpose", "Transpose", &lastc, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (lsame_(storev, "R")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 V2 ) (V1: first K columns) + where V1 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = iladlc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); +/* L130: */ + } + +/* W := W * V1' */ + + dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2' */ + + i__1 = lastv - *k; + dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b15, + &c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 + + 1], ldv, &c_b15, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b15, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C2 := C2 - V2' * W' */ + + i__1 = lastv - *k; + dgemm_("Transpose", "Transpose", &i__1, &lastc, k, & + c_b151, &v[(*k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork, &c_b15, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1 */ + + dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L140: */ + } +/* L150: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = iladlc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1' */ + + dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2' */ + + i__1 = lastv - *k; + dgemm_("No transpose", "Transpose", &lastc, k, &i__1, & + c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + + 1) * v_dim1 + 1], ldv, &c_b15, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b15, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = lastv - *k; + dgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b15, &c__[(*k + 1) * c_dim1 + + 1], ldc); + } + +/* W := W * V1 */ + + dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* + Let V = ( V1 V2 ) (V2: last K columns) + where V2 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = iladlc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = iladlc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2' */ + + dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1' */ + + i__1 = lastv - *k; + dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b15, + &c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, & + work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b15, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1' * W' */ + + i__1 = lastv - *k; + dgemm_("Transpose", "Transpose", &i__1, &lastc, k, & + c_b151, &v[v_offset], ldv, &work[work_offset], + ldwork, &c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L200: */ + } +/* L210: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = iladlc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = iladlr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, + &work[j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2' */ + + dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1' */ + + i__1 = lastv - *k; + dgemm_("No transpose", "Transpose", &lastc, k, &i__1, & + c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b15, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b15, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = lastv - *k; + dgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of DLARFB */ + +} /* dlarfb_ */ + +/* Subroutine */ int dlarfg_(integer *n, doublereal *alpha, doublereal *x, + integer *incx, doublereal *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + static integer j, knt; + static doublereal beta; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + static doublereal xnorm; + + static doublereal safmin, rsafmn; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLARFG generates a real elementary reflector H of order n, such + that + + H * ( alpha ) = ( beta ), H' * H = I. + ( x ) ( 0 ) + + where alpha and beta are scalars, and x is an (n-1)-element real + vector. H is represented in the form + + H = I - tau * ( 1 ) * ( 1 v' ) , + ( v ) + + where tau is a real scalar and v is a real (n-1)-element + vector. + + If the elements of x are all zero, then tau = 0 and H is taken to be + the unit matrix. + + Otherwise 1 <= tau <= 2. + + Arguments + ========= + + N (input) INTEGER + The order of the elementary reflector. + + ALPHA (input/output) DOUBLE PRECISION + On entry, the value alpha. + On exit, it is overwritten with the value beta. + + X (input/output) DOUBLE PRECISION array, dimension + (1+(N-2)*abs(INCX)) + On entry, the vector x. + On exit, it is overwritten with the vector v. + + INCX (input) INTEGER + The increment between elements of X. INCX > 0. + + TAU (output) DOUBLE PRECISION + The value tau. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 1) { + *tau = 0.; + return 0; + } + + i__1 = *n - 1; + xnorm = dnrm2_(&i__1, &x[1], incx); + + if (xnorm == 0.) { + +/* H = I */ + + *tau = 0.; + } else { + +/* general case */ + + d__1 = dlapy2_(alpha, &xnorm); + beta = -d_sign(&d__1, alpha); + safmin = SAFEMINIMUM / EPSILON; + knt = 0; + if (abs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + + rsafmn = 1. / safmin; +L10: + ++knt; + i__1 = *n - 1; + dscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + *alpha *= rsafmn; + if (abs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = dnrm2_(&i__1, &x[1], incx); + d__1 = dlapy2_(alpha, &xnorm); + beta = -d_sign(&d__1, alpha); + } + *tau = (beta - *alpha) / beta; + i__1 = *n - 1; + d__1 = 1. / (*alpha - beta); + dscal_(&i__1, &d__1, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + beta *= safmin; +/* L20: */ + } + *alpha = beta; + } + + return 0; + +/* End of DLARFG */ + +} /* dlarfg_ */ + +/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer * + k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, + integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + static integer i__, j, prevlastv; + static doublereal vii; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + static integer lastv; + extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLARFT forms the triangular factor T of a real block reflector H + of order n, which is defined as a product of k elementary reflectors. + + If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + + If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + + If STOREV = 'C', the vector which defines the elementary reflector + H(i) is stored in the i-th column of the array V, and + + H = I - V * T * V' + + If STOREV = 'R', the vector which defines the elementary reflector + H(i) is stored in the i-th row of the array V, and + + H = I - V' * T * V + + Arguments + ========= + + DIRECT (input) CHARACTER*1 + Specifies the order in which the elementary reflectors are + multiplied to form the block reflector: + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Specifies how the vectors which define the elementary + reflectors are stored (see also Further Details): + = 'C': columnwise + = 'R': rowwise + + N (input) INTEGER + The order of the block reflector H. N >= 0. + + K (input) INTEGER + The order of the triangular factor T (= the number of + elementary reflectors). K >= 1. + + V (input/output) DOUBLE PRECISION array, dimension + (LDV,K) if STOREV = 'C' + (LDV,N) if STOREV = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i). + + T (output) DOUBLE PRECISION array, dimension (LDT,K) + The k by k triangular factor T of the block reflector. + If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is + lower triangular. The rest of the array is not used. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + Further Details + =============== + + The shape of the matrix V and the storage of the vectors which define + the H(i) is best illustrated by the following example with n = 5 and + k = 3. The elements equal to 1 are not stored; the corresponding + array elements are modified but restored on exit. The rest of the + array is not used. + + DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': + + V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) + ( v1 1 ) ( 1 v2 v2 v2 ) + ( v1 v2 1 ) ( 1 v3 v3 ) + ( v1 v2 v3 ) + ( v1 v2 v3 ) + + DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': + + V = ( v1 v2 v3 ) V = ( v1 v1 1 ) + ( v1 v2 v3 ) ( v2 v2 v2 1 ) + ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) + ( 1 v3 ) + ( 1 ) + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = max(i__,prevlastv); + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = 0.; +/* L10: */ + } + } else { + +/* general case */ + + vii = v[i__ + i__ * v_dim1]; + v[i__ + i__ * v_dim1] = 1.; + if (lsame_(storev, "C")) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[lastv + i__ * v_dim1] != 0.) { + goto L15; + } + } +L15: + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */ + + i__2 = j - i__ + 1; + i__3 = i__ - 1; + d__1 = -tau[i__]; + dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1], + ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b29, &t[ + i__ * t_dim1 + 1], &c__1); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[i__ + lastv * v_dim1] != 0.) { + goto L16; + } + } +L16: + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */ + + i__2 = i__ - 1; + i__3 = j - i__ + 1; + d__1 = -tau[i__]; + dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ * + v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & + c_b29, &t[i__ * t_dim1 + 1], &c__1); + } + v[i__ + i__ * v_dim1] = vii; + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + t[i__ + i__ * t_dim1] = tau[i__]; + if (i__ > 1) { + prevlastv = max(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } +/* L20: */ + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + t[j + i__ * t_dim1] = 0.; +/* L30: */ + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { + vii = v[*n - *k + i__ + i__ * v_dim1]; + v[*n - *k + i__ + i__ * v_dim1] = 1.; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[lastv + i__ * v_dim1] != 0.) { + goto L35; + } + } +L35: + j = max(lastv,prevlastv); + +/* + T(i+1:k,i) := + - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i) +*/ + + i__1 = *n - *k + i__ - j + 1; + i__2 = *k - i__; + d__1 = -tau[i__]; + dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__ + + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], & + c__1, &c_b29, &t[i__ + 1 + i__ * t_dim1], & + c__1); + v[*n - *k + i__ + i__ * v_dim1] = vii; + } else { + vii = v[i__ + (*n - *k + i__) * v_dim1]; + v[i__ + (*n - *k + i__) * v_dim1] = 1.; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[i__ + lastv * v_dim1] != 0.) { + goto L36; + } + } +L36: + j = max(lastv,prevlastv); + +/* + T(i+1:k,i) := + - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)' +*/ + + i__1 = *k - i__; + i__2 = *n - *k + i__ - j + 1; + d__1 = -tau[i__]; + dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b29, &t[i__ + 1 + i__ * t_dim1], & + c__1); + v[i__ + (*n - *k + i__) * v_dim1] = vii; + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + if (i__ > 1) { + prevlastv = min(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + t[i__ + i__ * t_dim1] = tau[i__]; + } +/* L40: */ + } + } + return 0; + +/* End of DLARFT */ + +} /* dlarft_ */ + +/* Subroutine */ int dlarfx_(char *side, integer *m, integer *n, doublereal * + v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + + /* Local variables */ + static integer j; + static doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, + v6, v7, v8, v9, t10, v10, sum; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLARFX applies a real elementary reflector H to a real m by n + matrix C, from either the left or the right. H is represented in the + form + + H = I - tau * v * v' + + where tau is a real scalar and v is a real vector. + + If tau = 0, then H is taken to be the unit matrix + + This version uses inline code if H has order < 11. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L' + or (N) if SIDE = 'R' + The vector v in the representation of H. + + TAU (input) DOUBLE PRECISION + The value tau in the representation of H. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDA >= (1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + WORK is not referenced if H has order < 11. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (*tau == 0.) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form H * C, where H has order m. */ + + switch (*m) { + case 1: goto L10; + case 2: goto L30; + case 3: goto L50; + case 4: goto L70; + case 5: goto L90; + case 6: goto L110; + case 7: goto L130; + case 8: goto L150; + case 9: goto L170; + case 10: goto L190; + } + +/* Code for general M */ + + dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]); + goto L410; +L10: + +/* Special code for 1 x 1 Householder */ + + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1]; +/* L20: */ + } + goto L410; +L30: + +/* Special code for 2 x 2 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; +/* L40: */ + } + goto L410; +L50: + +/* Special code for 3 x 3 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; +/* L60: */ + } + goto L410; +L70: + +/* Special code for 4 x 4 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; +/* L80: */ + } + goto L410; +L90: + +/* Special code for 5 x 5 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; +/* L100: */ + } + goto L410; +L110: + +/* Special code for 6 x 6 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; +/* L120: */ + } + goto L410; +L130: + +/* Special code for 7 x 7 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; +/* L140: */ + } + goto L410; +L150: + +/* Special code for 8 x 8 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; +/* L160: */ + } + goto L410; +L170: + +/* Special code for 9 x 9 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * + c_dim1 + 9]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; +/* L180: */ + } + goto L410; +L190: + +/* Special code for 10 x 10 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * + c_dim1 + 9] + v10 * c__[j * c_dim1 + 10]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; + c__[j * c_dim1 + 10] -= sum * t10; +/* L200: */ + } + goto L410; + } else { + +/* Form C * H, where H has order n. */ + + switch (*n) { + case 1: goto L210; + case 2: goto L230; + case 3: goto L250; + case 4: goto L270; + case 5: goto L290; + case 6: goto L310; + case 7: goto L330; + case 8: goto L350; + case 9: goto L370; + case 10: goto L390; + } + +/* Code for general N */ + + dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]); + goto L410; +L210: + +/* Special code for 1 x 1 Householder */ + + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + c__[j + c_dim1] = t1 * c__[j + c_dim1]; +/* L220: */ + } + goto L410; +L230: + +/* Special code for 2 x 2 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; +/* L240: */ + } + goto L410; +L250: + +/* Special code for 3 x 3 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; +/* L260: */ + } + goto L410; +L270: + +/* Special code for 4 x 4 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; +/* L280: */ + } + goto L410; +L290: + +/* Special code for 5 x 5 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; +/* L300: */ + } + goto L410; +L310: + +/* Special code for 6 x 6 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; +/* L320: */ + } + goto L410; +L330: + +/* Special code for 7 x 7 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; +/* L340: */ + } + goto L410; +L350: + +/* Special code for 8 x 8 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; +/* L360: */ + } + goto L410; +L370: + +/* Special code for 9 x 9 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ + j + c_dim1 * 9]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; +/* L380: */ + } + goto L410; +L390: + +/* Special code for 10 x 10 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ + j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; + c__[j + c_dim1 * 10] -= sum * t10; +/* L400: */ + } + goto L410; + } +L410: + return 0; + +/* End of DLARFX */ + +} /* dlarfx_ */ + +/* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs, + doublereal *sn, doublereal *r__) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__; + static doublereal f1, g1, eps, scale; + static integer count; + static doublereal safmn2, safmx2; + + static doublereal safmin; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLARTG generate a plane rotation so that + + [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. + [ -SN CS ] [ G ] [ 0 ] + + This is a slower, more accurate version of the BLAS1 routine DROTG, + with the following other differences: + F and G are unchanged on return. + If G=0, then CS=1 and SN=0. + If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any + floating point operations (saves work in DBDSQR when + there are zeros on the diagonal). + + If F exceeds G in magnitude, CS will be positive. + + Arguments + ========= + + F (input) DOUBLE PRECISION + The first component of vector to be rotated. + + G (input) DOUBLE PRECISION + The second component of vector to be rotated. + + CS (output) DOUBLE PRECISION + The cosine of the rotation. + + SN (output) DOUBLE PRECISION + The sine of the rotation. + + R (output) DOUBLE PRECISION + The nonzero component of the rotated vector. + + This version has a few statements commented out for thread safety + (machine parameters are computed on each entry). 10 feb 03, SJH. + + ===================================================================== + + LOGICAL FIRST + SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 + DATA FIRST / .TRUE. / + + IF( FIRST ) THEN +*/ + safmin = SAFEMINIMUM; + eps = EPSILON; + d__1 = BASE; + i__1 = (integer) (log(safmin / eps) / log(BASE) / 2.); + safmn2 = pow_di(&d__1, &i__1); + safmx2 = 1. / safmn2; +/* + FIRST = .FALSE. + END IF +*/ + if (*g == 0.) { + *cs = 1.; + *sn = 0.; + *r__ = *f; + } else if (*f == 0.) { + *cs = 0.; + *sn = 1.; + *r__ = *g; + } else { + f1 = *f; + g1 = *g; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale >= safmx2) { + count = 0; +L10: + ++count; + f1 *= safmn2; + g1 *= safmn2; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale >= safmx2) { + goto L10; + } +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmx2; +/* L20: */ + } + } else if (scale <= safmn2) { + count = 0; +L30: + ++count; + f1 *= safmx2; + g1 *= safmx2; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale <= safmn2) { + goto L30; + } +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmn2; +/* L40: */ + } + } else { +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + } + if (abs(*f) > abs(*g) && *cs < 0.) { + *cs = -(*cs); + *sn = -(*sn); + *r__ = -(*r__); + } + } + return 0; + +/* End of DLARTG */ + +} /* dlartg_ */ + +/* Subroutine */ int dlas2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax) +{ + /* System generated locals */ + doublereal d__1, d__2; + + /* Local variables */ + static doublereal c__, fa, ga, ha, as, at, au, fhmn, fhmx; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAS2 computes the singular values of the 2-by-2 matrix + [ F G ] + [ 0 H ]. + On return, SSMIN is the smaller singular value and SSMAX is the + larger singular value. + + Arguments + ========= + + F (input) DOUBLE PRECISION + The (1,1) element of the 2-by-2 matrix. + + G (input) DOUBLE PRECISION + The (1,2) element of the 2-by-2 matrix. + + H (input) DOUBLE PRECISION + The (2,2) element of the 2-by-2 matrix. + + SSMIN (output) DOUBLE PRECISION + The smaller singular value. + + SSMAX (output) DOUBLE PRECISION + The larger singular value. + + Further Details + =============== + + Barring over/underflow, all output quantities are correct to within + a few units in the last place (ulps), even in the absence of a guard + digit in addition/subtraction. + + In IEEE arithmetic, the code works correctly if one matrix element is + infinite. + + Overflow will not occur unless the largest singular value itself + overflows, or is within a few ulps of overflow. (On machines with + partial overflow, like the Cray, overflow may occur if the largest + singular value is within a factor of 2 of overflow.) + + Underflow is harmless if underflow is gradual. Otherwise, results + may correspond to a matrix modified by perturbations of size near + the underflow threshold. + + ==================================================================== +*/ + + + fa = abs(*f); + ga = abs(*g); + ha = abs(*h__); + fhmn = min(fa,ha); + fhmx = max(fa,ha); + if (fhmn == 0.) { + *ssmin = 0.; + if (fhmx == 0.) { + *ssmax = ga; + } else { +/* Computing 2nd power */ + d__1 = min(fhmx,ga) / max(fhmx,ga); + *ssmax = max(fhmx,ga) * sqrt(d__1 * d__1 + 1.); + } + } else { + if (ga < fhmx) { + as = fhmn / fhmx + 1.; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + d__1 = ga / fhmx; + au = d__1 * d__1; + c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au)); + *ssmin = fhmn * c__; + *ssmax = fhmx / c__; + } else { + au = fhmx / ga; + if (au == 0.) { + +/* + Avoid possible harmful underflow if exponent range + asymmetric (true SSMIN may not underflow even if + AU underflows) +*/ + + *ssmin = fhmn * fhmx / ga; + *ssmax = ga; + } else { + as = fhmn / fhmx + 1.; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + d__1 = as * au; +/* Computing 2nd power */ + d__2 = at * au; + c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.)); + *ssmin = fhmn * c__ * au; + *ssmin += *ssmin; + *ssmax = ga / (c__ + c__); + } + } + } + return 0; + +/* End of DLAS2 */ + +} /* dlas2_ */ + +/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublereal *a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + static integer i__, j, k1, k2, k3, k4; + static doublereal mul, cto1; + static logical done; + static doublereal ctoc; + extern logical lsame_(char *, char *); + static integer itype; + static doublereal cfrom1; + + static doublereal cfromc; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal bignum, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLASCL multiplies the M by N real matrix A by the real scalar + CTO/CFROM. This is done without over/underflow as long as the final + result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + A may be full, upper triangular, lower triangular, upper Hessenberg, + or banded. + + Arguments + ========= + + TYPE (input) CHARACTER*1 + TYPE indices the storage type of the input matrix. + = 'G': A is a full matrix. + = 'L': A is a lower triangular matrix. + = 'U': A is an upper triangular matrix. + = 'H': A is an upper Hessenberg matrix. + = 'B': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the lower + half stored. + = 'Q': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the upper + half stored. + = 'Z': A is a band matrix with lower bandwidth KL and upper + bandwidth KU. + + KL (input) INTEGER + The lower bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + KU (input) INTEGER + The upper bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + CFROM (input) DOUBLE PRECISION + CTO (input) DOUBLE PRECISION + The matrix A is multiplied by CTO/CFROM. A(I,J) is computed + without over/underflow if the final result CTO*A(I,J)/CFROM + can be represented without over/underflow. CFROM must be + nonzero. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + The matrix to be multiplied by CTO/CFROM. See TYPE for the + storage type. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + INFO (output) INTEGER + 0 - successful exit + <0 - if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (lsame_(type__, "G")) { + itype = 0; + } else if (lsame_(type__, "L")) { + itype = 1; + } else if (lsame_(type__, "U")) { + itype = 2; + } else if (lsame_(type__, "H")) { + itype = 3; + } else if (lsame_(type__, "B")) { + itype = 4; + } else if (lsame_(type__, "Q")) { + itype = 5; + } else if (lsame_(type__, "Z")) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0. || disnan_(cfrom)) { + *info = -4; + } else if (disnan_(cto)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASCL", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = SAFEMINIMUM; + bignum = 1. / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + if (cfrom1 == cfromc) { +/* + CFROMC is an inf. Multiply by a correctly signed zero for + finite CTOC, or a NaN if CTOC is infinite. +*/ + mul = ctoc / cfromc; + done = TRUE_; + cto1 = ctoc; + } else { + cto1 = ctoc / bignum; + if (cto1 == ctoc) { +/* + CTOC is either 0 or an inf. In both cases, CTOC itself + serves as the correct multiplication factor. +*/ + mul = ctoc; + done = TRUE_; + cfromc = 1.; + } else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (abs(cto1) > abs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + } + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of DLASCL */ + +} /* dlascl_ */ + +/* Subroutine */ int dlasd0_(integer *n, integer *sqre, doublereal *d__, + doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer * + ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer * + info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, + iwk, lvl, ndb1, nlp1, nrp1; + static doublereal beta; + static integer idxq, nlvl; + static doublereal alpha; + static integer inode, ndiml, idxqc, ndimr, itemp, sqrei; + extern /* Subroutine */ int dlasd1_(integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, + integer *), dlasdq_(char *, integer *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), dlasdt_(integer *, integer *, + integer *, integer *, integer *, integer *, integer *), xerbla_( + char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + Using a divide and conquer approach, DLASD0 computes the singular + value decomposition (SVD) of a real upper bidiagonal N-by-M + matrix B with diagonal D and offdiagonal E, where M = N + SQRE. + The algorithm computes orthogonal matrices U and VT such that + B = U * S * VT. The singular values S are overwritten on D. + + A related subroutine, DLASDA, computes only the singular values, + and optionally, the singular vectors in compact form. + + Arguments + ========= + + N (input) INTEGER + On entry, the row dimension of the upper bidiagonal matrix. + This is also the dimension of the main diagonal array D. + + SQRE (input) INTEGER + Specifies the column dimension of the bidiagonal matrix. + = 0: The bidiagonal matrix has column dimension M = N; + = 1: The bidiagonal matrix has column dimension M = N+1; + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry D contains the main diagonal of the bidiagonal + matrix. + On exit D, if INFO = 0, contains its singular values. + + E (input) DOUBLE PRECISION array, dimension (M-1) + Contains the subdiagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + U (output) DOUBLE PRECISION array, dimension at least (LDQ, N) + On exit, U contains the left singular vectors. + + LDU (input) INTEGER + On entry, leading dimension of U. + + VT (output) DOUBLE PRECISION array, dimension at least (LDVT, M) + On exit, VT' contains the right singular vectors. + + LDVT (input) INTEGER + On entry, leading dimension of VT. + + SMLSIZ (input) INTEGER + On entry, maximum size of the subproblems at the + bottom of the computation tree. + + IWORK (workspace) INTEGER work array. + Dimension must be at least (8 * N) + + WORK (workspace) DOUBLE PRECISION work array. + Dimension must be at least (3 * M**2 + 2 * M) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --iwork; + --work; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } + + m = *n + *sqre; + + if (*ldu < *n) { + *info = -6; + } else if (*ldvt < m) { + *info = -8; + } else if (*smlsiz < 3) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD0", &i__1); + return 0; + } + +/* If the input matrix is too small, call DLASDQ to find the SVD. */ + + if (*n <= *smlsiz) { + dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], + ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); + return 0; + } + +/* Set up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + For the nodes on bottom level of the tree, solve + their subproblems by DLASDQ. +*/ + + ndb1 = (nd + 1) / 2; + ncc = 0; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nrp1 = nr + 1; + nlf = ic - nl; + nrf = ic + 1; + sqrei = 1; + dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[ + nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[ + nlf + nlf * u_dim1], ldu, &work[1], info); + if (*info != 0) { + return 0; + } + itemp = idxq + nlf - 2; + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j] = j; +/* L10: */ + } + if (i__ == nd) { + sqrei = *sqre; + } else { + sqrei = 1; + } + nrp1 = nr + sqrei; + dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[ + nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[ + nrf + nrf * u_dim1], ldu, &work[1], info); + if (*info != 0) { + return 0; + } + itemp = idxq + ic; + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j - 1] = j; +/* L20: */ + } +/* L30: */ + } + +/* Now conquer each subproblem bottom-up. */ + + for (lvl = nlvl; lvl >= 1; --lvl) { + +/* + Find the first node LF and last node LL on the + current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + if (*sqre == 0 && i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + idxqc = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf * + u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[ + idxqc], &iwork[iwk], &work[1], info); + if (*info != 0) { + return 0; + } +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of DLASD0 */ + +} /* dlasd0_ */ + +/* Subroutine */ int dlasd1_(integer *nl, integer *nr, integer *sqre, + doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u, + integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer * + iwork, doublereal *work, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, + idxc, idxp, ldvt2; + extern /* Subroutine */ int dlasd2_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *), dlasd3_( + integer *, integer *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + dlamrg_(integer *, integer *, doublereal *, integer *, integer *, + integer *); + static integer isigma; + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal orgnrm; + static integer coltyp; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, + where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. + + A related subroutine DLASD7 handles the case in which the singular + values (and the singular vectors in factored form) are desired. + + DLASD1 computes the SVD as follows: + + ( D1(in) 0 0 0 ) + B = U(in) * ( Z1' a Z2' b ) * VT(in) + ( 0 0 D2(in) 0 ) + + = U(out) * ( D(out) 0) * VT(out) + + where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M + with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + elsewhere; and the entry b is empty if SQRE = 0. + + The left singular vectors of the original matrix are stored in U, and + the transpose of the right singular vectors are stored in VT, and the + singular values are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple singular values or when there are zeros in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine DLASD2. + + The second stage consists of calculating the updated + singular values. This is done by finding the square roots of the + roots of the secular equation via the routine DLASD4 (as called + by DLASD3). This routine also calculates the singular vectors of + the current problem. + + The final stage consists of computing the updated singular vectors + directly using the updated singular values. The singular vectors + for the current problem are multiplied with the singular vectors + from the overall problem. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + D (input/output) DOUBLE PRECISION array, + dimension (N = NL+NR+1). + On entry D(1:NL,1:NL) contains the singular values of the + upper block; and D(NL+2:N) contains the singular values of + the lower block. On exit D(1:N) contains the singular values + of the modified matrix. + + ALPHA (input/output) DOUBLE PRECISION + Contains the diagonal element associated with the added row. + + BETA (input/output) DOUBLE PRECISION + Contains the off-diagonal element associated with the added + row. + + U (input/output) DOUBLE PRECISION array, dimension(LDU,N) + On entry U(1:NL, 1:NL) contains the left singular vectors of + the upper block; U(NL+2:N, NL+2:N) contains the left singular + vectors of the lower block. On exit U contains the left + singular vectors of the bidiagonal matrix. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= max( 1, N ). + + VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) + where M = N + SQRE. + On entry VT(1:NL+1, 1:NL+1)' contains the right singular + vectors of the upper block; VT(NL+2:M, NL+2:M)' contains + the right singular vectors of the lower block. On exit + VT' contains the right singular vectors of the + bidiagonal matrix. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= max( 1, M ). + + IDXQ (output) INTEGER array, dimension(N) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, i.e. + D( IDXQ( I = 1, N ) ) will be in ascending order. + + IWORK (workspace) INTEGER array, dimension( 4 * N ) + + WORK (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --idxq; + --iwork; + --work; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre < 0 || *sqre > 1) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD1", &i__1); + return 0; + } + + n = *nl + *nr + 1; + m = n + *sqre; + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in DLASD2 and DLASD3. +*/ + + ldu2 = n; + ldvt2 = m; + + iz = 1; + isigma = iz + m; + iu2 = isigma + n; + ivt2 = iu2 + ldu2 * n; + iq = ivt2 + ldvt2 * m; + + idx = 1; + idxc = idx + n; + coltyp = idxc + n; + idxp = coltyp + n; + +/* + Scale. + + Computing MAX +*/ + d__1 = abs(*alpha), d__2 = abs(*beta); + orgnrm = max(d__1,d__2); + d__[*nl + 1] = 0.; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { + orgnrm = (d__1 = d__[i__], abs(d__1)); + } +/* L10: */ + } + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info); + *alpha /= orgnrm; + *beta /= orgnrm; + +/* Deflate singular values. */ + + dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], + ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, & + work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], & + idxq[1], &iwork[coltyp], info); + +/* Solve Secular Equation and update singular vectors. */ + + ldq = k; + dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[ + u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[ + ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info); + if (*info != 0) { + return 0; + } + +/* Unscale. */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info); + +/* Prepare the IDXQ sorting permutation. */ + + n1 = k; + n2 = n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + + return 0; + +/* End of DLASD1 */ + +} /* dlasd1_ */ + +/* Subroutine */ int dlasd2_(integer *nl, integer *nr, integer *sqre, integer + *k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal * + beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, + doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, + integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer * + idxq, integer *coltyp, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, + vt2_dim1, vt2_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal c__; + static integer i__, j, m, n; + static doublereal s; + static integer k2; + static doublereal z1; + static integer ct, jp; + static doublereal eps, tau, tol; + static integer psm[4], nlp1, nlp2, idxi, idxj; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer ctot[4], idxjp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer jprev; + + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *), xerbla_(char *, + integer *); + static doublereal hlftol; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLASD2 merges the two sets of singular values together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + singular values are close together or if there is a tiny entry in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + DLASD2 is called from DLASD1. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + D (input/output) DOUBLE PRECISION array, dimension(N) + On entry D contains the singular values of the two submatrices + to be combined. On exit D contains the trailing (N-K) updated + singular values (those which were deflated) sorted into + increasing order. + + Z (output) DOUBLE PRECISION array, dimension(N) + On exit Z contains the updating row vector in the secular + equation. + + ALPHA (input) DOUBLE PRECISION + Contains the diagonal element associated with the added row. + + BETA (input) DOUBLE PRECISION + Contains the off-diagonal element associated with the added + row. + + U (input/output) DOUBLE PRECISION array, dimension(LDU,N) + On entry U contains the left singular vectors of two + submatrices in the two square blocks with corners at (1,1), + (NL, NL), and (NL+2, NL+2), (N,N). + On exit U contains the trailing (N-K) updated left singular + vectors (those which were deflated) in its last N-K columns. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= N. + + VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) + On entry VT' contains the right singular vectors of two + submatrices in the two square blocks with corners at (1,1), + (NL+1, NL+1), and (NL+2, NL+2), (M,M). + On exit VT' contains the trailing (N-K) updated right singular + vectors (those which were deflated) in its last N-K columns. + In case SQRE =1, the last row of VT spans the right null + space. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= M. + + DSIGMA (output) DOUBLE PRECISION array, dimension (N) + Contains a copy of the diagonal elements (K-1 singular values + and one zero) in the secular equation. + + U2 (output) DOUBLE PRECISION array, dimension(LDU2,N) + Contains a copy of the first K-1 left singular vectors which + will be used by DLASD3 in a matrix multiply (DGEMM) to solve + for the new left singular vectors. U2 is arranged into four + blocks. The first block contains a column with 1 at NL+1 and + zero everywhere else; the second block contains non-zero + entries only at and above NL; the third contains non-zero + entries only below NL+1; and the fourth is dense. + + LDU2 (input) INTEGER + The leading dimension of the array U2. LDU2 >= N. + + VT2 (output) DOUBLE PRECISION array, dimension(LDVT2,N) + VT2' contains a copy of the first K right singular vectors + which will be used by DLASD3 in a matrix multiply (DGEMM) to + solve for the new right singular vectors. VT2 is arranged into + three blocks. The first block contains a row that corresponds + to the special 0 diagonal element in SIGMA; the second block + contains non-zeros only at and before NL +1; the third block + contains non-zeros only at and after NL +2. + + LDVT2 (input) INTEGER + The leading dimension of the array VT2. LDVT2 >= M. + + IDXP (workspace) INTEGER array dimension(N) + This will contain the permutation used to place deflated + values of D at the end of the array. On output IDXP(2:K) + points to the nondeflated D-values and IDXP(K+1:N) + points to the deflated singular values. + + IDX (workspace) INTEGER array dimension(N) + This will contain the permutation used to sort the contents of + D into ascending order. + + IDXC (output) INTEGER array dimension(N) + This will contain the permutation used to arrange the columns + of the deflated U matrix into three groups: the first group + contains non-zero entries only at and above NL, the second + contains non-zero entries only below NL+2, and the third is + dense. + + IDXQ (input/output) INTEGER array dimension(N) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that entries in + the first hlaf of this permutation must first be moved one + position backward; and entries in the second half + must first have NL+1 added to their values. + + COLTYP (workspace/output) INTEGER array dimension(N) + As workspace, this will contain a label which will indicate + which of the following types a column in the U2 matrix or a + row in the VT2 matrix is: + 1 : non-zero in the upper half only + 2 : non-zero in the lower half only + 3 : dense + 4 : deflated + + On exit, it is an array of dimension 4, with COLTYP(I) being + the dimension of the I-th type columns. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --dsigma; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxp; + --idx; + --idxc; + --idxq; + --coltyp; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + + n = *nl + *nr + 1; + m = n + *sqre; + + if (*ldu < n) { + *info = -10; + } else if (*ldvt < m) { + *info = -12; + } else if (*ldu2 < n) { + *info = -15; + } else if (*ldvt2 < m) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD2", &i__1); + return 0; + } + + nlp1 = *nl + 1; + nlp2 = *nl + 2; + +/* + Generate the first part of the vector Z; and move the singular + values in the first part of D one position backward. +*/ + + z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; + z__[1] = z1; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; +/* L10: */ + } + +/* Generate the second part of the vector Z. */ + + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; +/* L20: */ + } + +/* Initialize some reference arrays. */ + + i__1 = nlp1; + for (i__ = 2; i__ <= i__1; ++i__) { + coltyp[i__] = 1; +/* L30: */ + } + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + coltyp[i__] = 2; +/* L40: */ + } + +/* Sort the singular values into increasing order */ + + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; +/* L50: */ + } + +/* + DSIGMA, IDXC, IDXC, and the first column of U2 + are used as storage space. +*/ + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + u2[i__ + u2_dim1] = z__[idxq[i__]]; + idxc[i__] = coltyp[idxq[i__]]; +/* L60: */ + } + + dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = u2[idxi + u2_dim1]; + coltyp[i__] = idxc[idxi]; +/* L70: */ + } + +/* Calculate the allowable deflation tolerance */ + + eps = EPSILON; +/* Computing MAX */ + d__1 = abs(*alpha), d__2 = abs(*beta); + tol = max(d__1,d__2); +/* Computing MAX */ + d__2 = (d__1 = d__[n], abs(d__1)); + tol = eps * 8. * max(d__2,tol); + +/* + There are 2 kinds of deflation -- first a value in the z-vector + is small, second two (or more) singular values are very close + together (their difference is small). + + If the value in the z-vector is small, we simply permute the + array so that the corresponding singular value is moved to the + end. + + If two values in the D-vector are close, we perform a two-sided + rotation designed to make one of the corresponding z-vector + entries zero, and then permute the array so that the deflated + singular value is moved to the end. + + If there are multiple singular values then the problem deflates. + Here the number of equal singular values are found. As each equal + singular value is found, an elementary reflector is computed to + rotate the corresponding singular subspace so that the + corresponding components of Z are zero in this new basis. +*/ + + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + coltyp[j] = 4; + if (j == n) { + goto L120; + } + } else { + jprev = j; + goto L90; + } +/* L80: */ + } +L90: + j = jprev; +L100: + ++j; + if (j > n) { + goto L110; + } + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + coltyp[j] = 4; + } else { + +/* Check if singular values are close enough to allow deflation. */ + + if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + s = z__[jprev]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = dlapy2_(&c__, &s); + c__ /= tau; + s = -s / tau; + z__[j] = tau; + z__[jprev] = 0.; + +/* + Apply back the Givens rotation to the left and right + singular vector matrices. +*/ + + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & + c__1, &c__, &s); + drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & + c__, &s); + if (coltyp[j] != coltyp[jprev]) { + coltyp[j] = 3; + } + coltyp[jprev] = 4; + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L100; +L110: + +/* Record the last singular value. */ + + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + +L120: + +/* + Count up the total number of the various types of columns, then + form a permutation which positions the four column types into + four groups of uniform structure (although one or more of these + groups may be empty). +*/ + + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; +/* L130: */ + } + i__1 = n; + for (j = 2; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; +/* L140: */ + } + +/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ + + psm[0] = 2; + psm[1] = ctot[0] + 2; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + +/* + Fill out the IDXC array so that the permutation which it induces + will place all type-1 columns first, all type-2 columns next, + then all type-3's, and finally all type-4's, starting from the + second column. This applies similarly to the rows of VT. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + ct = coltyp[jp]; + idxc[psm[ct - 1]] = j; + ++psm[ct - 1]; +/* L150: */ + } + +/* + Sort the singular values and corresponding singular vectors into + DSIGMA, U2, and VT2 respectively. The singular values/vectors + which were not deflated go into the first K slots of DSIGMA, U2, + and VT2 respectively, while those which were deflated go into the + last N - K slots, except that the first column/row will be treated + separately. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + idxj = idxq[idx[idxp[idxc[j]]] + 1]; + if (idxj <= nlp1) { + --idxj; + } + dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); + dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); +/* L160: */ + } + +/* Determine DSIGMA(1), DSIGMA(2) and Z(1) */ + + dsigma[1] = 0.; + hlftol = tol / 2.; + if (abs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = dlapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + c__ = 1.; + s = 0.; + z__[1] = tol; + } else { + c__ = z1 / z__[1]; + s = z__[m] / z__[1]; + } + } else { + if (abs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + +/* Move the rest of the updating row to Z. */ + + i__1 = *k - 1; + dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); + +/* + Determine the first column of U2, the first row of VT2 and the + last row of VT. +*/ + + dlaset_("A", &n, &c__1, &c_b29, &c_b29, &u2[u2_offset], ldu2); + u2[nlp1 + u2_dim1] = 1.; + if (m > n) { + i__1 = nlp1; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; + vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; +/* L170: */ + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; + vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; +/* L180: */ + } + } else { + dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); + } + if (m > n) { + dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); + } + +/* + The deflated singular values and their corresponding vectors go + into the back of D, U, and V respectively. +*/ + + if (n > *k) { + i__1 = n - *k; + dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = n - *k; + dlacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) + * u_dim1 + 1], ldu); + i__1 = n - *k; + dlacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + + vt_dim1], ldvt); + } + +/* Copy CTOT into COLTYP for referencing in DLASD3. */ + + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; +/* L190: */ + } + + return 0; + +/* End of DLASD2 */ + +} /* dlasd2_ */ + +/* Subroutine */ int dlasd3_(integer *nl, integer *nr, integer *sqre, integer + *k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma, + doublereal *u, integer *ldu, doublereal *u2, integer *ldu2, + doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, + integer *idxc, integer *ctot, doublereal *z__, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, + vt_offset, vt2_dim1, vt2_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__, j, m, n, jc; + static doublereal rho; + static integer nlp1, nlp2, nrp1; + static doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer ctemp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer ktemp; + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlacpy_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *), + xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DLASD3 finds all the square roots of the roots of the secular + equation, as defined by the values in D and Z. It makes the + appropriate calls to DLASD4 and then updates the singular + vectors by matrix multiplication. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + DLASD3 is called from DLASD1. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (input) INTEGER + The size of the secular equation, 1 =< K = < N. + + D (output) DOUBLE PRECISION array, dimension(K) + On exit the square roots of the roots of the secular equation, + in ascending order. + + Q (workspace) DOUBLE PRECISION array, + dimension at least (LDQ,K). + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= K. + + DSIGMA (input) DOUBLE PRECISION array, dimension(K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. + + U (output) DOUBLE PRECISION array, dimension (LDU, N) + The last N - K columns of this matrix contain the deflated + left singular vectors. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= N. + + U2 (input/output) DOUBLE PRECISION array, dimension (LDU2, N) + The first K columns of this matrix contain the non-deflated + left singular vectors for the split problem. + + LDU2 (input) INTEGER + The leading dimension of the array U2. LDU2 >= N. + + VT (output) DOUBLE PRECISION array, dimension (LDVT, M) + The last M - K columns of VT' contain the deflated + right singular vectors. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= N. + + VT2 (input/output) DOUBLE PRECISION array, dimension (LDVT2, N) + The first K columns of VT2' contain the non-deflated + right singular vectors for the split problem. + + LDVT2 (input) INTEGER + The leading dimension of the array VT2. LDVT2 >= N. + + IDXC (input) INTEGER array, dimension ( N ) + The permutation used to arrange the columns of U (and rows of + VT) into three groups: the first group contains non-zero + entries only at and above (or before) NL +1; the second + contains non-zero entries only at and below (or after) NL+2; + and the third is dense. The first column of U and the row of + VT are treated separately, however. + + The rows of the singular vectors found by DLASD4 + must be likewise permuted before the matrix multiplies can + take place. + + CTOT (input) INTEGER array, dimension ( 4 ) + A count of the total number of the various types of columns + in U (or rows in VT), as described in IDXC. The fourth column + type is any column which has been deflated. + + Z (input) DOUBLE PRECISION array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating row vector. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dsigma; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxc; + --ctot; + --z__; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + + n = *nl + *nr + 1; + m = n + *sqre; + nlp1 = *nl + 1; + nlp2 = *nl + 2; + + if (*k < 1 || *k > n) { + *info = -4; + } else if (*ldq < *k) { + *info = -7; + } else if (*ldu < n) { + *info = -10; + } else if (*ldu2 < n) { + *info = -12; + } else if (*ldvt < m) { + *info = -14; + } else if (*ldvt2 < m) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD3", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 1) { + d__[1] = abs(z__[1]); + dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); + if (z__[1] > 0.) { + dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); + } else { + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + u[i__ + u_dim1] = -u2[i__ + u2_dim1]; +/* L10: */ + } + } + return 0; + } + +/* + Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), + which on any of these machines zeros out the bottommost + bit of DSIGMA(I) if it is 1; this makes the subsequent + subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DSIGMA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DSIGMA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DSIGMA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; +/* L20: */ + } + +/* Keep a copy of Z. */ + + dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); + +/* Normalize Z. */ + + rho = dnrm2_(k, &z__[1], &c__1); + dlascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info); + rho *= rho; + +/* Find the new singular values. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], + &vt[j * vt_dim1 + 1], info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + return 0; + } +/* L30: */ + } + +/* Compute updated Z. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ + i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]); +/* L40: */ + } + i__2 = *k - 1; + for (j = i__; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ + i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); +/* L50: */ + } + d__2 = sqrt((d__1 = z__[i__], abs(d__1))); + z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]); +/* L60: */ + } + +/* + Compute left singular vectors of the modified diagonal matrix, + and store related information for the right singular vectors. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * + vt_dim1 + 1]; + u[i__ * u_dim1 + 1] = -1.; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ + * vt_dim1]; + u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; +/* L70: */ + } + temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1); + q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; +/* L80: */ + } +/* L90: */ + } + +/* Update the left singular vector matrix. */ + + if (*k == 2) { + dgemm_("N", "N", &n, k, k, &c_b15, &u2[u2_offset], ldu2, &q[q_offset], + ldq, &c_b29, &u[u_offset], ldu); + goto L100; + } + if (ctot[1] > 0) { + dgemm_("N", "N", nl, k, &ctot[1], &c_b15, &u2[(u2_dim1 << 1) + 1], + ldu2, &q[q_dim1 + 2], ldq, &c_b29, &u[u_dim1 + 1], ldu); + if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + dgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1] + , ldu2, &q[ktemp + q_dim1], ldq, &c_b15, &u[u_dim1 + 1], + ldu); + } + } else if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + dgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1], + ldu2, &q[ktemp + q_dim1], ldq, &c_b29, &u[u_dim1 + 1], ldu); + } else { + dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu); + } + dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); + ktemp = ctot[1] + 2; + ctemp = ctot[2] + ctot[3]; + dgemm_("N", "N", nr, k, &ctemp, &c_b15, &u2[nlp2 + ktemp * u2_dim1], ldu2, + &q[ktemp + q_dim1], ldq, &c_b29, &u[nlp2 + u_dim1], ldu); + +/* Generate the right singular vectors. */ + +L100: + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); + q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; +/* L110: */ + } +/* L120: */ + } + +/* Update the right singular vector matrix. */ + + if (*k == 2) { + dgemm_("N", "N", k, &m, k, &c_b15, &q[q_offset], ldq, &vt2[vt2_offset] + , ldvt2, &c_b29, &vt[vt_offset], ldvt); + return 0; + } + ktemp = ctot[1] + 1; + dgemm_("N", "N", k, &nlp1, &ktemp, &c_b15, &q[q_dim1 + 1], ldq, &vt2[ + vt2_dim1 + 1], ldvt2, &c_b29, &vt[vt_dim1 + 1], ldvt); + ktemp = ctot[1] + 2 + ctot[2]; + if (ktemp <= *ldvt2) { + dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b15, &q[ktemp * q_dim1 + 1], + ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b15, &vt[vt_dim1 + 1], + ldvt); + } + + ktemp = ctot[1] + 1; + nrp1 = *nr + *sqre; + if (ktemp > 1) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; +/* L130: */ + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; +/* L140: */ + } + } + ctemp = ctot[2] + 1 + ctot[3]; + dgemm_("N", "N", k, &nrp1, &ctemp, &c_b15, &q[ktemp * q_dim1 + 1], ldq, & + vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b29, &vt[nlp2 * vt_dim1 + + 1], ldvt); + + return 0; + +/* End of DLASD3 */ + +} /* dlasd3_ */ + +/* Subroutine */ int dlasd4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal * + sigma, doublereal *work, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Local variables */ + static doublereal a, b, c__; + static integer j; + static doublereal w, dd[3]; + static integer ii; + static doublereal dw, zz[3]; + static integer ip1; + static doublereal eta, phi, eps, tau, psi; + static integer iim1, iip1; + static doublereal dphi, dpsi; + static integer iter; + static doublereal temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, + dtiip; + static integer niter; + static doublereal dtisq; + static logical swtch; + static doublereal dtnsq; + extern /* Subroutine */ int dlaed6_(integer *, logical *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *) + , dlasd5_(integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + static doublereal delsq2, dtnsq1; + static logical swtch3; + + static logical orgati; + static doublereal erretm, dtipsq, rhoinv; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + This subroutine computes the square root of the I-th updated + eigenvalue of a positive symmetric rank-one modification to + a positive diagonal matrix whose entries are given as the squares + of the corresponding entries in the array d, and that + + 0 <= D(i) < D(j) for i < j + + and that RHO > 0. This is arranged by the calling routine, and is + no loss in generality. The rank-one modified system is thus + + diag( D ) * diag( D ) + RHO * Z * Z_transpose. + + where we assume the Euclidean norm of Z is 1. + + The method consists of approximating the rational functions in the + secular equation by simpler interpolating rational functions. + + Arguments + ========= + + N (input) INTEGER + The length of all arrays. + + I (input) INTEGER + The index of the eigenvalue to be computed. 1 <= I <= N. + + D (input) DOUBLE PRECISION array, dimension ( N ) + The original eigenvalues. It is assumed that they are in + order, 0 <= D(I) < D(J) for I < J. + + Z (input) DOUBLE PRECISION array, dimension ( N ) + The components of the updating vector. + + DELTA (output) DOUBLE PRECISION array, dimension ( N ) + If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th + component. If N = 1, then DELTA(1) = 1. The vector DELTA + contains the information necessary to construct the + (singular) eigenvectors. + + RHO (input) DOUBLE PRECISION + The scalar in the symmetric updating formula. + + SIGMA (output) DOUBLE PRECISION + The computed sigma_I, the I-th updated eigenvalue. + + WORK (workspace) DOUBLE PRECISION array, dimension ( N ) + If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th + component. If N = 1, then WORK( 1 ) = 1. + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, the updating process failed. + + Internal Parameters + =================== + + Logical variable ORGATI (origin-at-i?) is used for distinguishing + whether D(i) or D(i+1) is treated as the origin. + + ORGATI = .true. origin at i + ORGATI = .false. origin at i+1 + + Logical variable SWTCH3 (switch-for-3-poles?) is for noting + if we are working with THREE poles! + + MAXIT is the maximum number of iterations allowed for each + eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Since this routine is called in an inner loop, we do no argument + checking. + + Quick return for N=1 and 2. +*/ + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]); + delta[1] = 1.; + work[1] = 1.; + return 0; + } + if (*n == 2) { + dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]); + return 0; + } + +/* Compute machine epsilon */ + + eps = EPSILON; + rhoinv = 1. / *rho; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + temp = *rho / 2.; + +/* + If ||Z||_2 is not one, then TEMP should be set to + RHO * ||Z||_2^2 / TWO +*/ + + temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp)); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*n] + temp1; + delta[j] = d__[j] - d__[*n] - temp1; +/* L10: */ + } + + psi = 0.; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (delta[j] * work[j]); +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[* + n] / (delta[*n] * work[*n]); + + if (w <= 0.) { + temp1 = sqrt(d__[*n] * d__[*n] + *rho); + temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[* + n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] * + z__[*n] / *rho; + +/* + The following TAU is to approximate + SIGMA_n^2 - D( N )*D( N ) +*/ + + if (c__ <= temp) { + tau = *rho; + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[* + n]; + b = z__[*n] * z__[*n] * delsq; + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + } + +/* + It can be proved that + D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO +*/ + + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * delsq; + +/* + The following TAU is to approximate + SIGMA_n^2 - D( N )*D( N ) +*/ + + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + +/* + It can be proved that + D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 +*/ + + } + +/* The following ETA is to approximate SIGMA_n - D( N ) */ + + eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau)); + + *sigma = d__[*n] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - eta; + work[j] = d__[j] + d__[*i__] + eta; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (delta[j] * work[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (delta[*n] * work[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + ++niter; + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi); + b = dtnsq * dtnsq1 * w; + if (c__ < 0.) { + c__ = abs(c__); + } + if (c__ == 0.) { + eta = *rho - *sigma * *sigma; + } else if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp > *rho) { + eta = *rho + dtnsq; + } + + tau += eta; + eta /= *sigma + sqrt(eta + *sigma * *sigma); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L50: */ + } + + *sigma += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (work[*n] * delta[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi); + b = dtnsq1 * dtnsq * w; + if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp <= 0.) { + eta /= 2.; + } + + tau += eta; + eta /= *sigma + sqrt(eta + *sigma * *sigma); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L70: */ + } + + *sigma += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (work[*n] * delta[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + goto L240; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]); + delsq2 = delsq / 2.; + temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2)); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*i__] + temp; + delta[j] = d__[j] - d__[*i__] - temp; +/* L100: */ + } + + psi = 0.; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L110: */ + } + + phi = 0.; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[ + ip1] * z__[ip1] / (work[ip1] * delta[ip1]); + + if (w > 0.) { + +/* + d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 + + We choose d(i) as origin. +*/ + + orgati = TRUE_; + sg2lb = 0.; + sg2ub = delsq2; + a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * delsq; + if (a > 0.) { + tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } else { + tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } + +/* + TAU now is an estimation of SIGMA^2 - D( I )^2. The + following, however, is the corresponding estimation of + SIGMA - D( I ). +*/ + + eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau)); + } else { + +/* + (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 + + We choose d(i+1) as origin. +*/ + + orgati = FALSE_; + sg2lb = -delsq2; + sg2ub = 0.; + a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * delsq; + if (a < 0.) { + tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( + d__1)))); + } else { + tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / + (c__ * 2.); + } + +/* + TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The + following, however, is the corresponding estimation of + SIGMA - D( IP1 ). +*/ + + eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau, + abs(d__1)))); + } + + if (orgati) { + ii = *i__; + *sigma = d__[*i__] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*i__] + eta; + delta[j] = d__[j] - d__[*i__] - eta; +/* L130: */ + } + } else { + ii = *i__ + 1; + *sigma = d__[ip1] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[ip1] + eta; + delta[j] = d__[j] - d__[ip1] - eta; +/* L140: */ + } + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* + W is the value of the secular function with + its ii-th element removed. +*/ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.) { + swtch3 = TRUE_; + } + } else { + if (w > 0.) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + + if (w <= 0.) { + sg2lb = max(sg2lb,tau); + } else { + sg2ub = min(sg2ub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (d__1 * d__1); + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.) { + if (a == 0.) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi + + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi + + dphi); + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * + (d__[iim1] + d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * + (d__[iim1] + d__[iip1]) * temp1; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + if (*info != 0) { + goto L240; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.) { + eta = -w / dw; + } + if (orgati) { + temp1 = work[*i__] * delta[*i__]; + temp = eta - temp1; + } else { + temp1 = work[ip1] * delta[ip1]; + temp = eta - temp1; + } + if (temp > sg2ub || temp < sg2lb) { + if (w < 0.) { + eta = (sg2ub - tau) / 2.; + } else { + eta = (sg2lb - tau) / 2.; + } + } + + tau += eta; + eta /= *sigma + sqrt(*sigma * *sigma + eta); + + prew = w; + + *sigma += eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L170: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L180: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L190: */ + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + + if (w <= 0.) { + sg2lb = max(sg2lb,tau); + } else { + sg2ub = min(sg2ub,tau); + } + + swtch = FALSE_; + if (orgati) { + if (-w > abs(prew) / 10.) { + swtch = TRUE_; + } + } else { + if (w > abs(prew) / 10.) { + swtch = TRUE_; + } + } + +/* Main loop to update the values of the array DELTA and WORK */ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (d__1 * d__1); + } + } else { + temp = z__[ii] / (work[ii] * delta[ii]); + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - dtisq * dpsi - dtipsq * dphi; + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.) { + if (a == 0.) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * ( + dpsi + dphi); + } + } else { + a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi; + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) + / (c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, + abs(d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - dtiim * dpsi - dtiip * dphi; + zz[0] = dtiim * dtiim * dpsi; + zz[2] = dtiip * dtiip * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiip * (dpsi + dphi) - temp2; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiim * (dpsi + dphi) - temp2; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + } + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + if (*info != 0) { + goto L240; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.) { + eta = -w / dw; + } + if (orgati) { + temp1 = work[*i__] * delta[*i__]; + temp = eta - temp1; + } else { + temp1 = work[ip1] * delta[ip1]; + temp = eta - temp1; + } + if (temp > sg2ub || temp < sg2lb) { + if (w < 0.) { + eta = (sg2ub - tau) / 2.; + } else { + eta = (sg2lb - tau) / 2.; + } + } + + tau += eta; + eta /= *sigma + sqrt(*sigma * *sigma + eta); + + *sigma += eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L200: */ + } + + prew = w; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L210: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L220: */ + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + if (w * prew > 0. && abs(w) > abs(prew) / 10.) { + swtch = ! swtch; + } + + if (w <= 0.) { + sg2lb = max(sg2lb,tau); + } else { + sg2ub = min(sg2ub,tau); + } + +/* L230: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + + } + +L240: + return 0; + +/* End of DLASD4 */ + +} /* dlasd4_ */ + +/* Subroutine */ int dlasd5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal * + work) +{ + /* System generated locals */ + doublereal d__1; + + /* Local variables */ + static doublereal b, c__, w, del, tau, delsq; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + This subroutine computes the square root of the I-th eigenvalue + of a positive symmetric rank-one modification of a 2-by-2 diagonal + matrix + + diag( D ) * diag( D ) + RHO * Z * transpose(Z) . + + The diagonal entries in the array D are assumed to satisfy + + 0 <= D(i) < D(j) for i < j . + + We also assume RHO > 0 and that the Euclidean norm of the vector + Z is one. + + Arguments + ========= + + I (input) INTEGER + The index of the eigenvalue to be computed. I = 1 or I = 2. + + D (input) DOUBLE PRECISION array, dimension ( 2 ) + The original eigenvalues. We assume 0 <= D(1) < D(2). + + Z (input) DOUBLE PRECISION array, dimension ( 2 ) + The components of the updating vector. + + DELTA (output) DOUBLE PRECISION array, dimension ( 2 ) + Contains (D(j) - sigma_I) in its j-th component. + The vector DELTA contains the information necessary + to construct the eigenvectors. + + RHO (input) DOUBLE PRECISION + The scalar in the symmetric updating formula. + + DSIGMA (output) DOUBLE PRECISION + The computed sigma_I, the I-th updated eigenvalue. + + WORK (workspace) DOUBLE PRECISION array, dimension ( 2 ) + WORK contains (D(j) + sigma_I) in its j-th component. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + delsq = del * (d__[2] + d__[1]); + if (*i__ == 1) { + w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] * + z__[1] / (d__[1] * 3. + d__[2])) / del + 1.; + if (w > 0.) { + b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * delsq; + +/* + B > ZERO, always + + The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) +*/ + + tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); + +/* The following TAU is DSIGMA - D( 1 ) */ + + tau /= d__[1] + sqrt(d__[1] * d__[1] + tau); + *dsigma = d__[1] + tau; + delta[1] = -tau; + delta[2] = del - tau; + work[1] = d__[1] * 2. + tau; + work[2] = d__[1] + tau + d__[2]; +/* + DELTA( 1 ) = -Z( 1 ) / TAU + DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) +*/ + } else { + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.) { + tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); + } else { + tau = (b - sqrt(b * b + c__ * 4.)) / 2.; + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1))); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2. + tau; +/* + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU +*/ + } +/* + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP +*/ + } else { + +/* Now I=2 */ + + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.) { + tau = (b + sqrt(b * b + c__ * 4.)) / 2.; + } else { + tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt(d__[2] * d__[2] + tau); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2. + tau; +/* + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP +*/ + } + return 0; + +/* End of DLASD5 */ + +} /* dlasd5_ */ + +/* Subroutine */ int dlasd6_(integer *icompq, integer *nl, integer *nr, + integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl, + doublereal *alpha, doublereal *beta, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *poles, doublereal *difl, doublereal * + difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dlasd7_(integer *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), dlasd8_( + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlamrg_(integer *, integer *, + doublereal *, integer *, integer *, integer *); + static integer isigma; + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal orgnrm; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DLASD6 computes the SVD of an updated upper bidiagonal matrix B + obtained by merging two smaller ones by appending a row. This + routine is used only for the problem which requires all singular + values and optionally singular vector matrices in factored form. + B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. + A related subroutine, DLASD1, handles the case in which all singular + values and singular vectors of the bidiagonal matrix are desired. + + DLASD6 computes the SVD as follows: + + ( D1(in) 0 0 0 ) + B = U(in) * ( Z1' a Z2' b ) * VT(in) + ( 0 0 D2(in) 0 ) + + = U(out) * ( D(out) 0) * VT(out) + + where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M + with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + elsewhere; and the entry b is empty if SQRE = 0. + + The singular values of B can be computed using D1, D2, the first + components of all the right singular vectors of the lower block, and + the last components of all the right singular vectors of the upper + block. These components are stored and updated in VF and VL, + respectively, in DLASD6. Hence U and VT are not explicitly + referenced. + + The singular values are stored in D. The algorithm consists of two + stages: + + The first stage consists of deflating the size of the problem + when there are multiple singular values or if there is a zero + in the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine DLASD7. + + The second stage consists of calculating the updated + singular values. This is done by finding the roots of the + secular equation via the routine DLASD4 (as called by DLASD8). + This routine also updates VF and VL and computes the distances + between the updated singular values and the old singular + values. + + DLASD6 is called from DLASDA. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form: + = 0: Compute singular values only. + = 1: Compute singular vectors in factored form as well. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + D (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ). + On entry D(1:NL,1:NL) contains the singular values of the + upper block, and D(NL+2:N) contains the singular values + of the lower block. On exit D(1:N) contains the singular + values of the modified matrix. + + VF (input/output) DOUBLE PRECISION array, dimension ( M ) + On entry, VF(1:NL+1) contains the first components of all + right singular vectors of the upper block; and VF(NL+2:M) + contains the first components of all right singular vectors + of the lower block. On exit, VF contains the first components + of all right singular vectors of the bidiagonal matrix. + + VL (input/output) DOUBLE PRECISION array, dimension ( M ) + On entry, VL(1:NL+1) contains the last components of all + right singular vectors of the upper block; and VL(NL+2:M) + contains the last components of all right singular vectors of + the lower block. On exit, VL contains the last components of + all right singular vectors of the bidiagonal matrix. + + ALPHA (input/output) DOUBLE PRECISION + Contains the diagonal element associated with the added row. + + BETA (input/output) DOUBLE PRECISION + Contains the off-diagonal element associated with the added + row. + + IDXQ (output) INTEGER array, dimension ( N ) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, i.e. + D( IDXQ( I = 1, N ) ) will be in ascending order. + + PERM (output) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) to be applied + to each block. Not referenced if ICOMPQ = 0. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. Not referenced if ICOMPQ = 0. + + GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. Not referenced if ICOMPQ = 0. + + LDGCOL (input) INTEGER + leading dimension of GIVCOL, must be at least N. + + GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value to be used in the + corresponding Givens rotation. Not referenced if ICOMPQ = 0. + + LDGNUM (input) INTEGER + The leading dimension of GIVNUM and POLES, must be at least N. + + POLES (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + On exit, POLES(1,*) is an array containing the new singular + values obtained from solving the secular equation, and + POLES(2,*) is an array containing the poles in the secular + equation. Not referenced if ICOMPQ = 0. + + DIFL (output) DOUBLE PRECISION array, dimension ( N ) + On exit, DIFL(I) is the distance between I-th updated + (undeflated) singular value and the I-th (undeflated) old + singular value. + + DIFR (output) DOUBLE PRECISION array, + dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + On exit, DIFR(I, 1) is the distance between I-th updated + (undeflated) singular value and the I+1-th (undeflated) old + singular value. + + If ICOMPQ = 1, DIFR(1:K,2) is an array containing the + normalizing factors for the right singular vector matrix. + + See DLASD8 for details on DIFL and DIFR. + + Z (output) DOUBLE PRECISION array, dimension ( M ) + The first elements of this array contain the components + of the deflation-adjusted updating row vector. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + C (output) DOUBLE PRECISION + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (output) DOUBLE PRECISION + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + WORK (workspace) DOUBLE PRECISION array, dimension ( 4 * M ) + + IWORK (workspace) INTEGER array, dimension ( 3 * N ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --vf; + --vl; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --difr; + --z__; + --work; + --iwork; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -14; + } else if (*ldgnum < n) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD6", &i__1); + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in DLASD7 and DLASD8. +*/ + + isigma = 1; + iw = isigma + n; + ivfw = iw + m; + ivlw = ivfw + m; + + idx = 1; + idxc = idx + n; + idxp = idxc + n; + +/* + Scale. + + Computing MAX +*/ + d__1 = abs(*alpha), d__2 = abs(*beta); + orgnrm = max(d__1,d__2); + d__[*nl + 1] = 0.; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { + orgnrm = (d__1 = d__[i__], abs(d__1)); + } +/* L10: */ + } + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info); + *alpha /= orgnrm; + *beta /= orgnrm; + +/* Sort and Deflate singular values. */ + + dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], & + work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], & + iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[ + givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s, + info); + +/* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */ + + dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1], + ldgnum, &work[isigma], &work[iw], info); + +/* Save the poles if ICOMPQ = 1. */ + + if (*icompq == 1) { + dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1); + dcopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1); + } + +/* Unscale. */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info); + +/* Prepare the IDXQ sorting permutation. */ + + n1 = *k; + n2 = n - *k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + + return 0; + +/* End of DLASD6 */ + +} /* dlasd6_ */ + +/* Subroutine */ int dlasd7_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *k, doublereal *d__, doublereal *z__, + doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl, + doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal * + dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *c__, doublereal *s, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__, j, m, n, k2; + static doublereal z1; + static integer jp; + static doublereal eps, tau, tol; + static integer nlp1, nlp2, idxi, idxj; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer idxjp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer jprev; + + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *); + static doublereal hlftol; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLASD7 merges the two sets of singular values together into a single + sorted set. Then it tries to deflate the size of the problem. There + are two ways in which deflation can occur: when two or more singular + values are close together or if there is a tiny entry in the Z + vector. For each such occurrence the order of the related + secular equation problem is reduced by one. + + DLASD7 is called from DLASD6. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed + in compact form, as follows: + = 0: Compute singular values only. + = 1: Compute singular vectors of upper + bidiagonal matrix in compact form. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has + N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, this is + the order of the related secular equation. 1 <= K <=N. + + D (input/output) DOUBLE PRECISION array, dimension ( N ) + On entry D contains the singular values of the two submatrices + to be combined. On exit D contains the trailing (N-K) updated + singular values (those which were deflated) sorted into + increasing order. + + Z (output) DOUBLE PRECISION array, dimension ( M ) + On exit Z contains the updating row vector in the secular + equation. + + ZW (workspace) DOUBLE PRECISION array, dimension ( M ) + Workspace for Z. + + VF (input/output) DOUBLE PRECISION array, dimension ( M ) + On entry, VF(1:NL+1) contains the first components of all + right singular vectors of the upper block; and VF(NL+2:M) + contains the first components of all right singular vectors + of the lower block. On exit, VF contains the first components + of all right singular vectors of the bidiagonal matrix. + + VFW (workspace) DOUBLE PRECISION array, dimension ( M ) + Workspace for VF. + + VL (input/output) DOUBLE PRECISION array, dimension ( M ) + On entry, VL(1:NL+1) contains the last components of all + right singular vectors of the upper block; and VL(NL+2:M) + contains the last components of all right singular vectors + of the lower block. On exit, VL contains the last components + of all right singular vectors of the bidiagonal matrix. + + VLW (workspace) DOUBLE PRECISION array, dimension ( M ) + Workspace for VL. + + ALPHA (input) DOUBLE PRECISION + Contains the diagonal element associated with the added row. + + BETA (input) DOUBLE PRECISION + Contains the off-diagonal element associated with the added + row. + + DSIGMA (output) DOUBLE PRECISION array, dimension ( N ) + Contains a copy of the diagonal elements (K-1 singular values + and one zero) in the secular equation. + + IDX (workspace) INTEGER array, dimension ( N ) + This will contain the permutation used to sort the contents of + D into ascending order. + + IDXP (workspace) INTEGER array, dimension ( N ) + This will contain the permutation used to place deflated + values of D at the end of the array. On output IDXP(2:K) + points to the nondeflated D-values and IDXP(K+1:N) + points to the deflated singular values. + + IDXQ (input) INTEGER array, dimension ( N ) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that entries in + the first half of this permutation must first be moved one + position backward; and entries in the second half + must first have NL+1 added to their values. + + PERM (output) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) to be applied + to each singular block. Not referenced if ICOMPQ = 0. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. Not referenced if ICOMPQ = 0. + + GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. Not referenced if ICOMPQ = 0. + + LDGCOL (input) INTEGER + The leading dimension of GIVCOL, must be at least N. + + GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value to be used in the + corresponding Givens rotation. Not referenced if ICOMPQ = 0. + + LDGNUM (input) INTEGER + The leading dimension of GIVNUM, must be at least N. + + C (output) DOUBLE PRECISION + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (output) DOUBLE PRECISION + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + --zw; + --vf; + --vfw; + --vl; + --vlw; + --dsigma; + --idx; + --idxp; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -22; + } else if (*ldgnum < n) { + *info = -24; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD7", &i__1); + return 0; + } + + nlp1 = *nl + 1; + nlp2 = *nl + 2; + if (*icompq == 1) { + *givptr = 0; + } + +/* + Generate the first part of the vector Z and move the singular + values in the first part of D one position backward. +*/ + + z1 = *alpha * vl[nlp1]; + vl[nlp1] = 0.; + tau = vf[nlp1]; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vl[i__]; + vl[i__] = 0.; + vf[i__ + 1] = vf[i__]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; +/* L10: */ + } + vf[1] = tau; + +/* Generate the second part of the vector Z. */ + + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vf[i__]; + vf[i__] = 0.; +/* L20: */ + } + +/* Sort the singular values into increasing order */ + + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; +/* L30: */ + } + +/* DSIGMA, IDXC, IDXC, and ZW are used as storage space. */ + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + zw[i__] = z__[idxq[i__]]; + vfw[i__] = vf[idxq[i__]]; + vlw[i__] = vl[idxq[i__]]; +/* L40: */ + } + + dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = zw[idxi]; + vf[i__] = vfw[idxi]; + vl[i__] = vlw[idxi]; +/* L50: */ + } + +/* Calculate the allowable deflation tolerence */ + + eps = EPSILON; +/* Computing MAX */ + d__1 = abs(*alpha), d__2 = abs(*beta); + tol = max(d__1,d__2); +/* Computing MAX */ + d__2 = (d__1 = d__[n], abs(d__1)); + tol = eps * 64. * max(d__2,tol); + +/* + There are 2 kinds of deflation -- first a value in the z-vector + is small, second two (or more) singular values are very close + together (their difference is small). + + If the value in the z-vector is small, we simply permute the + array so that the corresponding singular value is moved to the + end. + + If two values in the D-vector are close, we perform a two-sided + rotation designed to make one of the corresponding z-vector + entries zero, and then permute the array so that the deflated + singular value is moved to the end. + + If there are multiple singular values then the problem deflates. + Here the number of equal singular values are found. As each equal + singular value is found, an elementary reflector is computed to + rotate the corresponding singular subspace so that the + corresponding components of Z are zero in this new basis. +*/ + + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + if (j == n) { + goto L100; + } + } else { + jprev = j; + goto L70; + } +/* L60: */ + } +L70: + j = jprev; +L80: + ++j; + if (j > n) { + goto L90; + } + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + } else { + +/* Check if singular values are close enough to allow deflation. */ + + if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + *s = z__[jprev]; + *c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = dlapy2_(c__, s); + z__[j] = tau; + z__[jprev] = 0.; + *c__ /= tau; + *s = -(*s) / tau; + +/* Record the appropriate Givens rotation */ + + if (*icompq == 1) { + ++(*givptr); + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + givcol[*givptr + (givcol_dim1 << 1)] = idxjp; + givcol[*givptr + givcol_dim1] = idxj; + givnum[*givptr + (givnum_dim1 << 1)] = *c__; + givnum[*givptr + givnum_dim1] = *s; + } + drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s); + drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s); + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L80; +L90: + +/* Record the last singular value. */ + + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + +L100: + +/* + Sort the singular values into DSIGMA. The singular values which + were not deflated go into the first K slots of DSIGMA, except + that DSIGMA(1) is treated separately. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + vfw[j] = vf[jp]; + vlw[j] = vl[jp]; +/* L110: */ + } + if (*icompq == 1) { + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + perm[j] = idxq[idx[jp] + 1]; + if (perm[j] <= nlp1) { + --perm[j]; + } +/* L120: */ + } + } + +/* + The deflated singular values go back into the last N - K slots of + D. +*/ + + i__1 = n - *k; + dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + +/* + Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and + VL(M). +*/ + + dsigma[1] = 0.; + hlftol = tol / 2.; + if (abs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = dlapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + *c__ = 1.; + *s = 0.; + z__[1] = tol; + } else { + *c__ = z1 / z__[1]; + *s = -z__[m] / z__[1]; + } + drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s); + drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s); + } else { + if (abs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + +/* Restore Z, VF, and VL. */ + + i__1 = *k - 1; + dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1); + i__1 = n - 1; + dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1); + i__1 = n - 1; + dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1); + + return 0; + +/* End of DLASD7 */ + +} /* dlasd7_ */ + +/* Subroutine */ int dlasd8_(integer *icompq, integer *k, doublereal *d__, + doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, + doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * + work, integer *info) +{ + /* System generated locals */ + integer difr_dim1, difr_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__, j; + static doublereal dj, rho; + static integer iwk1, iwk2, iwk3; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + static doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + static integer iwk2i, iwk3i; + static doublereal diflj, difrj, dsigj; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *); + static doublereal dsigjp; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DLASD8 finds the square roots of the roots of the secular equation, + as defined by the values in DSIGMA and Z. It makes the appropriate + calls to DLASD4, and stores, for each element in D, the distance + to its two nearest poles (elements in DSIGMA). It also updates + the arrays VF and VL, the first and last components of all the + right singular vectors of the original bidiagonal matrix. + + DLASD8 is called from DLASD6. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form in the calling routine: + = 0: Compute singular values only. + = 1: Compute singular vectors in factored form as well. + + K (input) INTEGER + The number of terms in the rational function to be solved + by DLASD4. K >= 1. + + D (output) DOUBLE PRECISION array, dimension ( K ) + On output, D contains the updated singular values. + + Z (input/output) DOUBLE PRECISION array, dimension ( K ) + On entry, the first K elements of this array contain the + components of the deflation-adjusted updating row vector. + On exit, Z is updated. + + VF (input/output) DOUBLE PRECISION array, dimension ( K ) + On entry, VF contains information passed through DBEDE8. + On exit, VF contains the first K components of the first + components of all right singular vectors of the bidiagonal + matrix. + + VL (input/output) DOUBLE PRECISION array, dimension ( K ) + On entry, VL contains information passed through DBEDE8. + On exit, VL contains the first K components of the last + components of all right singular vectors of the bidiagonal + matrix. + + DIFL (output) DOUBLE PRECISION array, dimension ( K ) + On exit, DIFL(I) = D(I) - DSIGMA(I). + + DIFR (output) DOUBLE PRECISION array, + dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and + dimension ( K ) if ICOMPQ = 0. + On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not + defined and will not be referenced. + + If ICOMPQ = 1, DIFR(1:K,2) is an array containing the + normalizing factors for the right singular vector matrix. + + LDDIFR (input) INTEGER + The leading dimension of DIFR, must be at least K. + + DSIGMA (input/output) DOUBLE PRECISION array, dimension ( K ) + On entry, the first K elements of this array contain the old + roots of the deflated updating problem. These are the poles + of the secular equation. + On exit, the elements of DSIGMA may be very slightly altered + in value. + + WORK (workspace) DOUBLE PRECISION array, dimension at least 3 * K + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + --vf; + --vl; + --difl; + difr_dim1 = *lddifr; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + --dsigma; + --work; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*k < 1) { + *info = -2; + } else if (*lddifr < *k) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD8", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 1) { + d__[1] = abs(z__[1]); + difl[1] = d__[1]; + if (*icompq == 1) { + difl[2] = 1.; + difr[(difr_dim1 << 1) + 1] = 1.; + } + return 0; + } + +/* + Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), + which on any of these machines zeros out the bottommost + bit of DSIGMA(I) if it is 1; this makes the subsequent + subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DSIGMA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DSIGMA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; +/* L10: */ + } + +/* Book keeping. */ + + iwk1 = 1; + iwk2 = iwk1 + *k; + iwk3 = iwk2 + *k; + iwk2i = iwk2 - 1; + iwk3i = iwk3 - 1; + +/* Normalize Z. */ + + rho = dnrm2_(k, &z__[1], &c__1); + dlascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info); + rho *= rho; + +/* Initialize WORK(IWK3). */ + + dlaset_("A", k, &c__1, &c_b15, &c_b15, &work[iwk3], k); + +/* + Compute the updated singular values, the arrays DIFL, DIFR, + and the updated Z. +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ + iwk2], info); + +/* If the root finder fails, the computation is terminated. */ + + if (*info != 0) { + return 0; + } + work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; + difl[j] = -work[j]; + difr[j + difr_dim1] = -work[j + 1]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L20: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L30: */ + } +/* L40: */ + } + +/* Compute updated Z. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1))); + z__[i__] = d_sign(&d__2, &z__[i__]); +/* L50: */ + } + +/* Update VF and VL. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = d__[j]; + dsigj = -dsigma[j]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -dsigma[j + 1]; + } + work[j] = -z__[j] / diflj / (dsigma[j] + dj); + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / ( + dsigma[i__] + dj); +/* L60: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) / + (dsigma[i__] + dj); +/* L70: */ + } + temp = dnrm2_(k, &work[1], &c__1); + work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; + work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; + if (*icompq == 1) { + difr[j + (difr_dim1 << 1)] = temp; + } +/* L80: */ + } + + dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); + dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); + + return 0; + +/* End of DLASD8 */ + +} /* dlasd8_ */ + +/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n, + integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer + *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, + doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, + integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, + doublereal *s, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, + difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, + z_dim1, z_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc, + nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; + static doublereal beta; + static integer idxq, nlvl; + static doublereal alpha; + static integer inode, ndiml, ndimr, idxqi, itemp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer sqrei; + extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + static integer nwork1, nwork2; + extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), dlasdt_(integer *, integer *, + integer *, integer *, integer *, integer *, integer *), dlaset_( + char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), xerbla_(char *, integer *); + static integer smlszp; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + Using a divide and conquer approach, DLASDA computes the singular + value decomposition (SVD) of a real upper bidiagonal N-by-M matrix + B with diagonal D and offdiagonal E, where M = N + SQRE. The + algorithm computes the singular values in the SVD B = U * S * VT. + The orthogonal matrices U and VT are optionally computed in + compact form. + + A related subroutine, DLASD0, computes the singular values and + the singular vectors in explicit form. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed + in compact form, as follows + = 0: Compute singular values only. + = 1: Compute singular vectors of upper bidiagonal + matrix in compact form. + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The row dimension of the upper bidiagonal matrix. This is + also the dimension of the main diagonal array D. + + SQRE (input) INTEGER + Specifies the column dimension of the bidiagonal matrix. + = 0: The bidiagonal matrix has column dimension M = N; + = 1: The bidiagonal matrix has column dimension M = N + 1. + + D (input/output) DOUBLE PRECISION array, dimension ( N ) + On entry D contains the main diagonal of the bidiagonal + matrix. On exit D, if INFO = 0, contains its singular values. + + E (input) DOUBLE PRECISION array, dimension ( M-1 ) + Contains the subdiagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + U (output) DOUBLE PRECISION array, + dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left + singular vector matrices of all subproblems at the bottom + level. + + LDU (input) INTEGER, LDU = > N. + The leading dimension of arrays U, VT, DIFL, DIFR, POLES, + GIVNUM, and Z. + + VT (output) DOUBLE PRECISION array, + dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right + singular vector matrices of all subproblems at the bottom + level. + + K (output) INTEGER array, + dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. + If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th + secular equation on the computation tree. + + DIFL (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ), + where NLVL = floor(log_2 (N/SMLSIZ))). + + DIFR (output) DOUBLE PRECISION array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) + record distances between singular values on the I-th + level and singular values on the (I -1)-th level, and + DIFR(1:N, 2 * I ) contains the normalizing factors for + the right singular vector matrix. See DLASD8 for details. + + Z (output) DOUBLE PRECISION array, + dimension ( LDU, NLVL ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + The first K elements of Z(1, I) contain the components of + the deflation-adjusted updating row vector for subproblems + on the I-th level. + + POLES (output) DOUBLE PRECISION array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and + POLES(1, 2*I) contain the new and old singular values + involved in the secular equations on the I-th level. + + GIVPTR (output) INTEGER array, + dimension ( N ) if ICOMPQ = 1, and not referenced if + ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records + the number of Givens rotations performed on the I-th + problem on the computation tree. + + GIVCOL (output) INTEGER array, + dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not + referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, + GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations + of Givens rotations performed on the I-th level on the + computation tree. + + LDGCOL (input) INTEGER, LDGCOL = > N. + The leading dimension of arrays GIVCOL and PERM. + + PERM (output) INTEGER array, + dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records + permutations done on the I-th level of the computation tree. + + GIVNUM (output) DOUBLE PRECISION array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not + referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, + GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- + values of Givens rotations performed on the I-th level on + the computation tree. + + C (output) DOUBLE PRECISION array, + dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. + If ICOMPQ = 1 and the I-th subproblem is not square, on exit, + C( I ) contains the C-value of a Givens rotation related to + the right null space of the I-th subproblem. + + S (output) DOUBLE PRECISION array, dimension ( N ) if + ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 + and the I-th subproblem is not square, on exit, S( I ) + contains the S-value of a Givens rotation related to + the right null space of the I-th subproblem. + + WORK (workspace) DOUBLE PRECISION array, dimension + (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). + + IWORK (workspace) INTEGER array. + Dimension must be at least (7 * N). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldu < *n + *sqre) { + *info = -8; + } else if (*ldgcol < *n) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASDA", &i__1); + return 0; + } + + m = *n + *sqre; + +/* If the input matrix is too small, call DLASDQ to find the SVD. */ + + if (*n <= *smlsiz) { + if (*icompq == 0) { + dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & + work[1], info); + } else { + dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] + , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], + info); + } + return 0; + } + +/* Book-keeping and set up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + + ncc = 0; + nru = 0; + + smlszp = *smlsiz + 1; + vf = 1; + vl = vf + m; + nwork1 = vl + m; + nwork2 = nwork1 + smlszp * smlszp; + + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + for the nodes on bottom level of the tree, solve + their subproblems by DLASDQ. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + idxqi = idxq + nlf - 2; + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + sqrei = 1; + if (*icompq == 0) { + dlaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &work[nwork1], &smlszp); + dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & + work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], + &nl, &work[nwork2], info); + itemp = nwork1 + nl * smlszp; + dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); + dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + dlaset_("A", &nl, &nl, &c_b29, &c_b15, &u[nlf + u_dim1], ldu); + dlaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &vt[nlf + vt_dim1], + ldu); + dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & + vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf + + u_dim1], ldu, &work[nwork1], info); + dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1); + dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L10: */ + } + if (i__ == nd && *sqre == 0) { + sqrei = 0; + } else { + sqrei = 1; + } + idxqi += nlp1; + vfi += nlp1; + vli += nlp1; + nrp1 = nr + sqrei; + if (*icompq == 0) { + dlaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &work[nwork1], &smlszp); + dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & + work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], + &nr, &work[nwork2], info); + itemp = nwork1 + (nrp1 - 1) * smlszp; + dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); + dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + dlaset_("A", &nr, &nr, &c_b29, &c_b15, &u[nrf + u_dim1], ldu); + dlaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &vt[nrf + vt_dim1], + ldu); + dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & + vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf + + u_dim1], ldu, &work[nwork1], info); + dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1); + dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L20: */ + } +/* L30: */ + } + +/* Now conquer each subproblem bottom-up. */ + + j = pow_ii(&c__2, &nlvl); + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* + Find the first node LF and last node LL on + the current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + idxqi = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + if (*icompq == 0) { + dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[ + perm_offset], &givptr[1], &givcol[givcol_offset], + ldgcol, &givnum[givnum_offset], ldu, &poles[ + poles_offset], &difl[difl_offset], &difr[difr_offset], + &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], + &iwork[iwk], info); + } else { + --j; + dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf + + lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 * + givcol_dim1], ldgcol, &givnum[nlf + lvl2 * + givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], & + difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 * + difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j], + &s[j], &work[nwork1], &iwork[iwk], info); + } + if (*info != 0) { + return 0; + } +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of DLASDA */ + +} /* dlasda_ */ + +/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer * + ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, + doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, + doublereal *c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + + /* Local variables */ + static integer i__, j; + static doublereal r__, cs, sn; + static integer np1, isub; + static doublereal smin; + static integer sqre1; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer * + , doublereal *, integer *); + static integer iuplo; + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), xerbla_(char *, + integer *), dbdsqr_(char *, integer *, integer *, integer + *, integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + static logical rotate; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLASDQ computes the singular value decomposition (SVD) of a real + (upper or lower) bidiagonal matrix with diagonal D and offdiagonal + E, accumulating the transformations if desired. Letting B denote + the input bidiagonal matrix, the algorithm computes orthogonal + matrices Q and P such that B = Q * S * P' (P' denotes the transpose + of P). The singular values S are overwritten on D. + + The input matrix U is changed to U * Q if desired. + The input matrix VT is changed to P' * VT if desired. + The input matrix C is changed to Q' * C if desired. + + See "Computing Small Singular Values of Bidiagonal Matrices With + Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + LAPACK Working Note #3, for a detailed description of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + On entry, UPLO specifies whether the input bidiagonal matrix + is upper or lower bidiagonal, and wether it is square are + not. + UPLO = 'U' or 'u' B is upper bidiagonal. + UPLO = 'L' or 'l' B is lower bidiagonal. + + SQRE (input) INTEGER + = 0: then the input matrix is N-by-N. + = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and + (N+1)-by-N if UPLU = 'L'. + + The bidiagonal matrix has + N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + N (input) INTEGER + On entry, N specifies the number of rows and columns + in the matrix. N must be at least 0. + + NCVT (input) INTEGER + On entry, NCVT specifies the number of columns of + the matrix VT. NCVT must be at least 0. + + NRU (input) INTEGER + On entry, NRU specifies the number of rows of + the matrix U. NRU must be at least 0. + + NCC (input) INTEGER + On entry, NCC specifies the number of columns of + the matrix C. NCC must be at least 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, D contains the diagonal entries of the + bidiagonal matrix whose SVD is desired. On normal exit, + D contains the singular values in ascending order. + + E (input/output) DOUBLE PRECISION array. + dimension is (N-1) if SQRE = 0 and N if SQRE = 1. + On entry, the entries of E contain the offdiagonal entries + of the bidiagonal matrix whose SVD is desired. On normal + exit, E will contain 0. If the algorithm does not converge, + D and E will contain the diagonal and superdiagonal entries + of a bidiagonal matrix orthogonally equivalent to the one + given as input. + + VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) + On entry, contains a matrix which on exit has been + premultiplied by P', dimension N-by-NCVT if SQRE = 0 + and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). + + LDVT (input) INTEGER + On entry, LDVT specifies the leading dimension of VT as + declared in the calling (sub) program. LDVT must be at + least 1. If NCVT is nonzero LDVT must also be at least N. + + U (input/output) DOUBLE PRECISION array, dimension (LDU, N) + On entry, contains a matrix which on exit has been + postmultiplied by Q, dimension NRU-by-N if SQRE = 0 + and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). + + LDU (input) INTEGER + On entry, LDU specifies the leading dimension of U as + declared in the calling (sub) program. LDU must be at + least max( 1, NRU ) . + + C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) + On entry, contains an N-by-NCC matrix which on exit + has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 + and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). + + LDC (input) INTEGER + On entry, LDC specifies the leading dimension of C as + declared in the calling (sub) program. LDC must be at + least 1. If NCC is nonzero, LDC must also be at least N. + + WORK (workspace) DOUBLE PRECISION array, dimension (4*N) + Workspace. Only referenced if one of NCVT, NRU, or NCC is + nonzero, and if N is at least 2. + + INFO (output) INTEGER + On exit, a value of 0 indicates a successful exit. + If INFO < 0, argument number -INFO is illegal. + If INFO > 0, the algorithm did not converge, and INFO + specifies how many superdiagonals did not converge. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + iuplo = 0; + if (lsame_(uplo, "U")) { + iuplo = 1; + } + if (lsame_(uplo, "L")) { + iuplo = 2; + } + if (iuplo == 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ncvt < 0) { + *info = -4; + } else if (*nru < 0) { + *info = -5; + } else if (*ncc < 0) { + *info = -6; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -10; + } else if (*ldu < max(1,*nru)) { + *info = -12; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASDQ", &i__1); + return 0; + } + if (*n == 0) { + return 0; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + np1 = *n + 1; + sqre1 = *sqre; + +/* + If matrix non-square upper bidiagonal, rotate to be lower + bidiagonal. The rotations are on the right. +*/ + + if (iuplo == 1 && sqre1 == 1) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L10: */ + } + dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + e[*n] = 0.; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + iuplo = 2; + sqre1 = 0; + +/* Update singular vectors if desired. */ + + if (*ncvt > 0) { + dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[ + vt_offset], ldvt); + } + } + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left. +*/ + + if (iuplo == 2) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L20: */ + } + +/* + If matrix (N+1)-by-N lower bidiagonal, one additional + rotation is needed. +*/ + + if (sqre1 == 1) { + dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + } + +/* Update singular vectors if desired. */ + + if (*nru > 0) { + if (sqre1 == 0) { + dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[ + u_offset], ldu); + } else { + dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[ + u_offset], ldu); + } + } + if (*ncc > 0) { + if (sqre1 == 0) { + dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc); + } else { + dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc); + } + } + } + +/* + Call DBDSQR to compute the SVD of the reduced real + N-by-N upper bidiagonal matrix. +*/ + + dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[ + u_offset], ldu, &c__[c_offset], ldc, &work[1], info); + +/* + Sort the singular values into ascending order (insertion sort on + singular values, but only one transposition per singular vector) +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I). */ + + isub = i__; + smin = d__[i__]; + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + if (d__[j] < smin) { + isub = j; + smin = d__[j]; + } +/* L30: */ + } + if (isub != i__) { + +/* Swap singular values and vectors. */ + + d__[isub] = d__[i__]; + d__[i__] = smin; + if (*ncvt > 0) { + dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1], + ldvt); + } + if (*nru > 0) { + dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1] + , &c__1); + } + if (*ncc > 0) { + dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc) + ; + } + } +/* L40: */ + } + + return 0; + +/* End of DLASDQ */ + +} /* dlasdq_ */ + +/* Subroutine */ int dlasdt_(integer *n, integer *lvl, integer *nd, integer * + inode, integer *ndiml, integer *ndimr, integer *msub) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, il, ir, maxn; + static doublereal temp; + static integer nlvl, llst, ncrnt; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + DLASDT creates a tree of subproblems for bidiagonal divide and + conquer. + + Arguments + ========= + + N (input) INTEGER + On entry, the number of diagonal elements of the + bidiagonal matrix. + + LVL (output) INTEGER + On exit, the number of levels on the computation tree. + + ND (output) INTEGER + On exit, the number of nodes on the tree. + + INODE (output) INTEGER array, dimension ( N ) + On exit, centers of subproblems. + + NDIML (output) INTEGER array, dimension ( N ) + On exit, row dimensions of left children. + + NDIMR (output) INTEGER array, dimension ( N ) + On exit, row dimensions of right children. + + MSUB (input) INTEGER + On entry, the maximum row dimension each subproblem at the + bottom of the tree can be of. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Find the number of levels on the tree. +*/ + + /* Parameter adjustments */ + --ndimr; + --ndiml; + --inode; + + /* Function Body */ + maxn = max(1,*n); + temp = log((doublereal) maxn / (doublereal) (*msub + 1)) / log(2.); + *lvl = (integer) temp + 1; + + i__ = *n / 2; + inode[1] = i__ + 1; + ndiml[1] = i__; + ndimr[1] = *n - i__ - 1; + il = 0; + ir = 1; + llst = 1; + i__1 = *lvl - 1; + for (nlvl = 1; nlvl <= i__1; ++nlvl) { + +/* + Constructing the tree at (NLVL+1)-st level. The number of + nodes created on this level is LLST * 2. +*/ + + i__2 = llst - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + il += 2; + ir += 2; + ncrnt = llst + i__; + ndiml[il] = ndiml[ncrnt] / 2; + ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1; + inode[il] = inode[ncrnt] - ndimr[il] - 1; + ndiml[ir] = ndimr[ncrnt] / 2; + ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1; + inode[ir] = inode[ncrnt] + ndiml[ir] + 1; +/* L10: */ + } + llst <<= 1; +/* L20: */ + } + *nd = (llst << 1) - 1; + + return 0; + +/* End of DLASDT */ + +} /* dlasdt_ */ + +/* Subroutine */ int dlaset_(char *uplo, integer *m, integer *n, doublereal * + alpha, doublereal *beta, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLASET initializes an m-by-n matrix A to BETA on the diagonal and + ALPHA on the offdiagonals. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be set. + = 'U': Upper triangular part is set; the strictly lower + triangular part of A is not changed. + = 'L': Lower triangular part is set; the strictly upper + triangular part of A is not changed. + Otherwise: All of the matrix A is set. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + ALPHA (input) DOUBLE PRECISION + The constant to which the offdiagonal elements are to be set. + + BETA (input) DOUBLE PRECISION + The constant to which the diagonal elements are to be set. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On exit, the leading m-by-n submatrix of A is set as follows: + + if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, + if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, + otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, + + and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + +/* + Set the strictly upper triangular or trapezoidal part of the + array to ALPHA. +*/ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + +/* + Set the strictly lower triangular or trapezoidal part of the + array to ALPHA. +*/ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L30: */ + } +/* L40: */ + } + + } else { + +/* Set the leading m-by-n submatrix to ALPHA. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L50: */ + } +/* L60: */ + } + } + +/* Set the first min(M,N) diagonal elements to BETA. */ + + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + i__ * a_dim1] = *beta; +/* L70: */ + } + + return 0; + +/* End of DLASET */ + +} /* dlaset_ */ + +/* Subroutine */ int dlasq1_(integer *n, doublereal *d__, doublereal *e, + doublereal *work, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Local variables */ + static integer i__; + static doublereal eps; + extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + static doublereal scale; + static integer iinfo; + static doublereal sigmn; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static doublereal sigmx; + extern /* Subroutine */ int dlasq2_(integer *, doublereal *, integer *); + + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + static doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *), dlasrt_( + char *, integer *, doublereal *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- November 2008 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + DLASQ1 computes the singular values of a real N-by-N bidiagonal + matrix with diagonal D and off-diagonal E. The singular values + are computed to high relative accuracy, in the absence of + denormalization, underflow and overflow. The algorithm was first + presented in + + "Accurate singular values and differential qd algorithms" by K. V. + Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, + 1994, + + and the present implementation is described in "An implementation of + the dqds Algorithm (Positive Case)", LAPACK Working Note. + + Arguments + ========= + + N (input) INTEGER + The number of rows and columns in the matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, D contains the diagonal elements of the + bidiagonal matrix whose SVD is desired. On normal exit, + D contains the singular values in decreasing order. + + E (input/output) DOUBLE PRECISION array, dimension (N) + On entry, elements E(1:N-1) contain the off-diagonal elements + of the bidiagonal matrix whose SVD is desired. + On exit, E is overwritten. + + WORK (workspace) DOUBLE PRECISION array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm failed + = 1, a split was marked by a positive value in E + = 2, current block of Z not diagonalized after 30*N + iterations (in inner while loop) + = 3, termination criterion of outer while loop not met + (program created more than N unreduced blocks) + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --work; + --e; + --d__; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -2; + i__1 = -(*info); + xerbla_("DLASQ1", &i__1); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + d__[1] = abs(d__[1]); + return 0; + } else if (*n == 2) { + dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx); + d__[1] = sigmx; + d__[2] = sigmn; + return 0; + } + +/* Estimate the largest singular value. */ + + sigmx = 0.; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = (d__1 = d__[i__], abs(d__1)); +/* Computing MAX */ + d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1)); + sigmx = max(d__2,d__3); +/* L10: */ + } + d__[*n] = (d__1 = d__[*n], abs(d__1)); + +/* Early return if SIGMX is zero (matrix is already diagonal). */ + + if (sigmx == 0.) { + dlasrt_("D", n, &d__[1], &iinfo); + return 0; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = sigmx, d__2 = d__[i__]; + sigmx = max(d__1,d__2); +/* L20: */ + } + +/* + Copy D and E into WORK (in the Z format) and scale (squaring the + input data makes scaling by a power of the radix pointless). +*/ + + eps = PRECISION; + safmin = SAFEMINIMUM; + scale = sqrt(eps / safmin); + dcopy_(n, &d__[1], &c__1, &work[1], &c__2); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2); + i__1 = (*n << 1) - 1; + i__2 = (*n << 1) - 1; + dlascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2, + &iinfo); + +/* Compute the q's and e's. */ + + i__1 = (*n << 1) - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + d__1 = work[i__]; + work[i__] = d__1 * d__1; +/* L30: */ + } + work[*n * 2] = 0.; + + dlasq2_(n, &work[1], info); + + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = sqrt(work[i__]); +/* L40: */ + } + dlascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, & + iinfo); + } + + return 0; + +/* End of DLASQ1 */ + +} /* dlasq1_ */ + +/* Subroutine */ int dlasq2_(integer *n, doublereal *z__, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal d__, e, g; + static integer k; + static doublereal s, t; + static integer i0, i4, n0; + static doublereal dn; + static integer pp; + static doublereal dn1, dn2, dee, eps, tau, tol; + static integer ipn4; + static doublereal tol2; + static logical ieee; + static integer nbig; + static doublereal dmin__, emin, emax; + static integer kmin, ndiv, iter; + static doublereal qmin, temp, qmax, zmax; + static integer splt; + static doublereal dmin1, dmin2; + static integer nfail; + static doublereal desig, trace, sigma; + static integer iinfo, ttype; + extern /* Subroutine */ int dlasq3_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *, logical *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + + static doublereal deemin; + static integer iwhila, iwhilb; + static doublereal oldemn, safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *); + + +/* + -- LAPACK routine (version 3.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- November 2008 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + DLASQ2 computes all the eigenvalues of the symmetric positive + definite tridiagonal matrix associated with the qd array Z to high + relative accuracy are computed to high relative accuracy, in the + absence of denormalization, underflow and overflow. + + To see the relation of Z to the tridiagonal matrix, let L be a + unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and + let U be an upper bidiagonal matrix with 1's above and diagonal + Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the + symmetric tridiagonal to which it is similar. + + Note : DLASQ2 defines a logical variable, IEEE, which is true + on machines which follow ieee-754 floating-point standard in their + handling of infinities and NaNs, and false otherwise. This variable + is passed to DLASQ3. + + Arguments + ========= + + N (input) INTEGER + The number of rows and columns in the matrix. N >= 0. + + Z (input/output) DOUBLE PRECISION array, dimension ( 4*N ) + On entry Z holds the qd array. On exit, entries 1 to N hold + the eigenvalues in decreasing order, Z( 2*N+1 ) holds the + trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If + N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) + holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of + shifts that failed. + + INFO (output) INTEGER + = 0: successful exit + < 0: if the i-th argument is a scalar and had an illegal + value, then INFO = -i, if the i-th argument is an + array and the j-entry had an illegal value, then + INFO = -(i*100+j) + > 0: the algorithm failed + = 1, a split was marked by a positive value in E + = 2, current block of Z not diagonalized after 30*N + iterations (in inner while loop) + = 3, termination criterion of outer while loop not met + (program created more than N unreduced blocks) + + Further Details + =============== + Local Variables: I0:N0 defines a current unreduced segment of Z. + The shifts are accumulated in SIGMA. Iteration count is in ITER. + Ping-pong is controlled by PP (alternates between 0 and 1). + + ===================================================================== + + + Test the input arguments. + (in case DLASQ2 is not called by DLASQ1) +*/ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + *info = 0; + eps = PRECISION; + safmin = SAFEMINIMUM; + tol = eps * 100.; +/* Computing 2nd power */ + d__1 = tol; + tol2 = d__1 * d__1; + + if (*n < 0) { + *info = -1; + xerbla_("DLASQ2", &c__1); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + +/* 1-by-1 case. */ + + if (z__[1] < 0.) { + *info = -201; + xerbla_("DLASQ2", &c__2); + } + return 0; + } else if (*n == 2) { + +/* 2-by-2 case. */ + + if (z__[2] < 0. || z__[3] < 0.) { + *info = -2; + xerbla_("DLASQ2", &c__2); + return 0; + } else if (z__[3] > z__[1]) { + d__ = z__[3]; + z__[3] = z__[1]; + z__[1] = d__; + } + z__[5] = z__[1] + z__[2] + z__[3]; + if (z__[2] > z__[3] * tol2) { + t = (z__[1] - z__[3] + z__[2]) * .5; + s = z__[3] * (z__[2] / t); + if (s <= t) { + s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.))); + } else { + s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[1] + (s + z__[2]); + z__[3] *= z__[1] / t; + z__[1] = t; + } + z__[2] = z__[3]; + z__[6] = z__[2] + z__[1]; + return 0; + } + +/* Check for negative data and compute sums of q's and e's. */ + + z__[*n * 2] = 0.; + emin = z__[2]; + qmax = 0.; + zmax = 0.; + d__ = 0.; + e = 0.; + + i__1 = *n - 1 << 1; + for (k = 1; k <= i__1; k += 2) { + if (z__[k] < 0.) { + *info = -(k + 200); + xerbla_("DLASQ2", &c__2); + return 0; + } else if (z__[k + 1] < 0.) { + *info = -(k + 201); + xerbla_("DLASQ2", &c__2); + return 0; + } + d__ += z__[k]; + e += z__[k + 1]; +/* Computing MAX */ + d__1 = qmax, d__2 = z__[k]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[k + 1]; + emin = min(d__1,d__2); +/* Computing MAX */ + d__1 = max(qmax,zmax), d__2 = z__[k + 1]; + zmax = max(d__1,d__2); +/* L10: */ + } + if (z__[(*n << 1) - 1] < 0.) { + *info = -((*n << 1) + 199); + xerbla_("DLASQ2", &c__2); + return 0; + } + d__ += z__[(*n << 1) - 1]; +/* Computing MAX */ + d__1 = qmax, d__2 = z__[(*n << 1) - 1]; + qmax = max(d__1,d__2); + zmax = max(qmax,zmax); + +/* Check for diagonality. */ + + if (e == 0.) { + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 1) - 1]; +/* L20: */ + } + dlasrt_("D", n, &z__[1], &iinfo); + z__[(*n << 1) - 1] = d__; + return 0; + } + + trace = d__ + e; + +/* Check for zero data. */ + + if (trace == 0.) { + z__[(*n << 1) - 1] = 0.; + return 0; + } + +/* Check whether the machine is IEEE conformable. */ + + ieee = ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen) + 6, (ftnlen)1) == 1 && ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2, + &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1; + +/* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ + + for (k = *n << 1; k >= 2; k += -2) { + z__[k * 2] = 0.; + z__[(k << 1) - 1] = z__[k]; + z__[(k << 1) - 2] = 0.; + z__[(k << 1) - 3] = z__[k - 1]; +/* L30: */ + } + + i0 = 1; + n0 = *n; + +/* Reverse the qd-array, if warranted. */ + + if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) { + ipn4 = i0 + n0 << 2; + i__1 = i0 + n0 - 1 << 1; + for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; +/* L40: */ + } + } + +/* Initial split checking via dqd and Li's test. */ + + pp = 0; + + for (k = 1; k <= 2; ++k) { + + d__ = z__[(n0 << 2) + pp - 3]; + i__1 = (i0 << 2) + pp; + for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) { + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.; + d__ = z__[i4 - 3]; + } else { + d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); + } +/* L50: */ + } + +/* dqd maps Z to ZZ plus Li's test. */ + + emin = z__[(i0 << 2) + pp + 1]; + d__ = z__[(i0 << 2) + pp - 3]; + i__1 = (n0 - 1 << 2) + pp; + for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { + z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.; + z__[i4 - (pp << 1) - 2] = d__; + z__[i4 - (pp << 1)] = 0.; + d__ = z__[i4 + 1]; + } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && + safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { + temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; + z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; + d__ *= temp; + } else { + z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( + pp << 1) - 2]); + d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); + } +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - (pp << 1)]; + emin = min(d__1,d__2); +/* L60: */ + } + z__[(n0 << 2) - pp - 2] = d__; + +/* Now find qmax. */ + + qmax = z__[(i0 << 2) - pp - 2]; + i__1 = (n0 << 2) - pp - 2; + for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4]; + qmax = max(d__1,d__2); +/* L70: */ + } + +/* Prepare for the next iteration on K. */ + + pp = 1 - pp; +/* L80: */ + } + +/* Initialise variables to pass to DLASQ3. */ + + ttype = 0; + dmin1 = 0.; + dmin2 = 0.; + dn = 0.; + dn1 = 0.; + dn2 = 0.; + g = 0.; + tau = 0.; + + iter = 2; + nfail = 0; + ndiv = n0 - i0 << 1; + + i__1 = *n + 1; + for (iwhila = 1; iwhila <= i__1; ++iwhila) { + if (n0 < 1) { + goto L170; + } + +/* + While array unfinished do + + E(N0) holds the value of SIGMA when submatrix in I0:N0 + splits from the rest of the array, but is negated. +*/ + + desig = 0.; + if (n0 == *n) { + sigma = 0.; + } else { + sigma = -z__[(n0 << 2) - 1]; + } + if (sigma < 0.) { + *info = 1; + return 0; + } + +/* + Find last unreduced submatrix's top index I0, find QMAX and + EMIN. Find Gershgorin-type bound if Q's much greater than E's. +*/ + + emax = 0.; + if (n0 > i0) { + emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1)); + } else { + emin = 0.; + } + qmin = z__[(n0 << 2) - 3]; + qmax = qmin; + for (i4 = n0 << 2; i4 >= 8; i4 += -4) { + if (z__[i4 - 5] <= 0.) { + goto L100; + } + if (qmin >= emax * 4.) { +/* Computing MIN */ + d__1 = qmin, d__2 = z__[i4 - 3]; + qmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = emax, d__2 = z__[i4 - 5]; + emax = max(d__1,d__2); + } +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - 5]; + emin = min(d__1,d__2); +/* L90: */ + } + i4 = 4; + +L100: + i0 = i4 / 4; + pp = 0; + + if (n0 - i0 > 1) { + dee = z__[(i0 << 2) - 3]; + deemin = dee; + kmin = i0; + i__2 = (n0 << 2) - 3; + for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) { + dee = z__[i4] * (dee / (dee + z__[i4 - 2])); + if (dee <= deemin) { + deemin = dee; + kmin = (i4 + 3) / 4; + } +/* L110: */ + } + if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] * + .5) { + ipn4 = i0 + n0 << 2; + pp = 2; + i__2 = i0 + n0 - 1 << 1; + for (i4 = i0 << 2; i4 <= i__2; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 2]; + z__[i4 - 2] = z__[ipn4 - i4 - 2]; + z__[ipn4 - i4 - 2] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; + temp = z__[i4]; + z__[i4] = z__[ipn4 - i4 - 4]; + z__[ipn4 - i4 - 4] = temp; +/* L120: */ + } + } + } + +/* + Put -(initial shift) into DMIN. + + Computing MAX +*/ + d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax); + dmin__ = -max(d__1,d__2); + +/* + Now I0:N0 is unreduced. + PP = 0 for ping, PP = 1 for pong. + PP = 2 indicates that flipping was applied to the Z array and + and that the tests for deflation upon entry in DLASQ3 + should not be performed. +*/ + + nbig = (n0 - i0 + 1) * 30; + i__2 = nbig; + for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { + if (i0 > n0) { + goto L150; + } + +/* While submatrix unfinished take a good dqds step. */ + + dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & + nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, & + dn1, &dn2, &g, &tau); + + pp = 1 - pp; + +/* When EMIN is very small check for splits. */ + + if (pp == 0 && n0 - i0 >= 3) { + if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * + sigma) { + splt = i0 - 1; + qmax = z__[(i0 << 2) - 3]; + emin = z__[(i0 << 2) - 1]; + oldemn = z__[i0 * 4]; + i__3 = n0 - 3 << 2; + for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { + if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= + tol2 * sigma) { + z__[i4 - 1] = -sigma; + splt = i4 / 4; + qmax = 0.; + emin = z__[i4 + 3]; + oldemn = z__[i4 + 4]; + } else { +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4 + 1]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - 1]; + emin = min(d__1,d__2); +/* Computing MIN */ + d__1 = oldemn, d__2 = z__[i4]; + oldemn = min(d__1,d__2); + } +/* L130: */ + } + z__[(n0 << 2) - 1] = emin; + z__[n0 * 4] = oldemn; + i0 = splt + 1; + } + } + +/* L140: */ + } + + *info = 2; + return 0; + +/* end IWHILB */ + +L150: + +/* L160: */ + ; + } + + *info = 3; + return 0; + +/* end IWHILA */ + +L170: + +/* Move q's to the front. */ + + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 2) - 3]; +/* L180: */ + } + +/* Sort and compute sum of eigenvalues. */ + + dlasrt_("D", n, &z__[1], &iinfo); + + e = 0.; + for (k = *n; k >= 1; --k) { + e += z__[k]; +/* L190: */ + } + +/* Store trace, sum(eigenvalues) and information on performance. */ + + z__[(*n << 1) + 1] = trace; + z__[(*n << 1) + 2] = e; + z__[(*n << 1) + 3] = (doublereal) iter; +/* Computing 2nd power */ + i__1 = *n; + z__[(*n << 1) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1); + z__[(*n << 1) + 5] = nfail * 100. / (doublereal) iter; + return 0; + +/* End of DLASQ2 */ + +} /* dlasq2_ */ + +/* Subroutine */ int dlasq3_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig, + doublereal *qmax, integer *nfail, integer *iter, integer *ndiv, + logical *ieee, integer *ttype, doublereal *dmin1, doublereal *dmin2, + doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *g, + doublereal *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal s, t; + static integer j4, nn; + static doublereal eps, tol; + static integer n0in, ipn4; + static doublereal tol2, temp; + extern /* Subroutine */ int dlasq4_(integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *), dlasq5_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, logical *), dlasq6_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + + extern logical disnan_(doublereal *); + + +/* + -- LAPACK routine (version 3.2.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- June 2010 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. + In case of failure it changes shifts, and tries again until output + is positive. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input/output) INTEGER + Last index. + + Z (input) DOUBLE PRECISION array, dimension ( 4*N ) + Z holds the qd array. + + PP (input/output) INTEGER + PP=0 for ping, PP=1 for pong. + PP=2 indicates that flipping was applied to the Z array + and that the initial tests for deflation should not be + performed. + + DMIN (output) DOUBLE PRECISION + Minimum value of d. + + SIGMA (output) DOUBLE PRECISION + Sum of shifts used in current segment. + + DESIG (input/output) DOUBLE PRECISION + Lower order part of SIGMA + + QMAX (input) DOUBLE PRECISION + Maximum value of q. + + NFAIL (output) INTEGER + Number of times shift was too big. + + ITER (output) INTEGER + Number of iterations. + + NDIV (output) INTEGER + Number of divisions. + + IEEE (input) LOGICAL + Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). + + TTYPE (input/output) INTEGER + Shift type. + + DMIN1 (input/output) DOUBLE PRECISION + + DMIN2 (input/output) DOUBLE PRECISION + + DN (input/output) DOUBLE PRECISION + + DN1 (input/output) DOUBLE PRECISION + + DN2 (input/output) DOUBLE PRECISION + + G (input/output) DOUBLE PRECISION + + TAU (input/output) DOUBLE PRECISION + + These are passed as arguments in order to save their values + between calls to DLASQ3. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + n0in = *n0; + eps = PRECISION; + tol = eps * 100.; +/* Computing 2nd power */ + d__1 = tol; + tol2 = d__1 * d__1; + +/* Check for deflation. */ + +L10: + + if (*n0 < *i0) { + return 0; + } + if (*n0 == *i0) { + goto L20; + } + nn = (*n0 << 2) + *pp; + if (*n0 == *i0 + 1) { + goto L40; + } + +/* Check whether E(N0-1) is negligible, 1 eigenvalue. */ + + if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - + 4] > tol2 * z__[nn - 7]) { + goto L30; + } + +L20: + + z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; + --(*n0); + goto L10; + +/* Check whether E(N0-2) is negligible, 2 eigenvalues. */ + +L30: + + if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ + nn - 11]) { + goto L50; + } + +L40: + + if (z__[nn - 3] > z__[nn - 7]) { + s = z__[nn - 3]; + z__[nn - 3] = z__[nn - 7]; + z__[nn - 7] = s; + } + if (z__[nn - 5] > z__[nn - 3] * tol2) { + t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5; + s = z__[nn - 3] * (z__[nn - 5] / t); + if (s <= t) { + s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.))); + } else { + s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[nn - 7] + (s + z__[nn - 5]); + z__[nn - 3] *= z__[nn - 7] / t; + z__[nn - 7] = t; + } + z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; + z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; + *n0 += -2; + goto L10; + +L50: + if (*pp == 2) { + *pp = 0; + } + +/* Reverse the qd-array, if warranted. */ + + if (*dmin__ <= 0. || *n0 < n0in) { + if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) { + ipn4 = *i0 + *n0 << 2; + i__1 = *i0 + *n0 - 1 << 1; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + temp = z__[j4 - 3]; + z__[j4 - 3] = z__[ipn4 - j4 - 3]; + z__[ipn4 - j4 - 3] = temp; + temp = z__[j4 - 2]; + z__[j4 - 2] = z__[ipn4 - j4 - 2]; + z__[ipn4 - j4 - 2] = temp; + temp = z__[j4 - 1]; + z__[j4 - 1] = z__[ipn4 - j4 - 5]; + z__[ipn4 - j4 - 5] = temp; + temp = z__[j4]; + z__[j4] = z__[ipn4 - j4 - 4]; + z__[ipn4 - j4 - 4] = temp; +/* L60: */ + } + if (*n0 - *i0 <= 4) { + z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; + z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; + } +/* Computing MIN */ + d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1]; + *dmin2 = min(d__1,d__2); +/* Computing MIN */ + d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1] + , d__1 = min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3]; + z__[(*n0 << 2) + *pp - 1] = min(d__1,d__2); +/* Computing MIN */ + d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 = + min(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4]; + z__[(*n0 << 2) - *pp] = min(d__1,d__2); +/* Computing MAX */ + d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = max(d__1, + d__2), d__2 = z__[(*i0 << 2) + *pp + 1]; + *qmax = max(d__1,d__2); + *dmin__ = -0.; + } + } + +/* Choose a shift. */ + + dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, + tau, ttype, g); + +/* Call dqds until DMIN > 0. */ + +L70: + + dlasq5_(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2, + ieee); + + *ndiv += *n0 - *i0 + 2; + ++(*iter); + +/* Check status. */ + + if (*dmin__ >= 0. && *dmin1 > 0.) { + +/* Success. */ + + goto L90; + + } else if (*dmin__ < 0. && *dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] < tol + * (*sigma + *dn1) && abs(*dn) < tol * *sigma) { + +/* Convergence hidden by negative DN. */ + + z__[(*n0 - 1 << 2) - *pp + 2] = 0.; + *dmin__ = 0.; + goto L90; + } else if (*dmin__ < 0.) { + +/* TAU too big. Select new TAU and try again. */ + + ++(*nfail); + if (*ttype < -22) { + +/* Failed twice. Play it safe. */ + + *tau = 0.; + } else if (*dmin1 > 0.) { + +/* Late failure. Gives excellent shift. */ + + *tau = (*tau + *dmin__) * (1. - eps * 2.); + *ttype += -11; + } else { + +/* Early failure. Divide by 4. */ + + *tau *= .25; + *ttype += -12; + } + goto L70; + } else if (disnan_(dmin__)) { + +/* NaN. */ + + if (*tau == 0.) { + goto L80; + } else { + *tau = 0.; + goto L70; + } + } else { + +/* Possible underflow. Play it safe. */ + + goto L80; + } + +/* Risk of underflow. */ + +L80: + dlasq6_(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2); + *ndiv += *n0 - *i0 + 2; + ++(*iter); + *tau = 0.; + +L90: + if (*tau < *sigma) { + *desig += *tau; + t = *sigma + *desig; + *desig -= t - *sigma; + } else { + t = *sigma + *tau; + *desig = *sigma - (t - *tau) + *desig; + } + *sigma = t; + + return 0; + +/* End of DLASQ3 */ + +} /* dlasq3_ */ + +/* Subroutine */ int dlasq4_(integer *i0, integer *n0, doublereal *z__, + integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1, + doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, + doublereal *tau, integer *ttype, doublereal *g) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal s, a2, b1, b2; + static integer i4, nn, np; + static doublereal gam, gap1, gap2; + + +/* + -- LAPACK routine (version 3.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- November 2008 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + DLASQ4 computes an approximation TAU to the smallest eigenvalue + using values of d from the previous transform. + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) DOUBLE PRECISION array, dimension ( 4*N ) + Z holds the qd array. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + NOIN (input) INTEGER + The value of N0 at start of EIGTEST. + + DMIN (input) DOUBLE PRECISION + Minimum value of d. + + DMIN1 (input) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ). + + DMIN2 (input) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (input) DOUBLE PRECISION + d(N) + + DN1 (input) DOUBLE PRECISION + d(N-1) + + DN2 (input) DOUBLE PRECISION + d(N-2) + + TAU (output) DOUBLE PRECISION + This is the shift. + + TTYPE (output) INTEGER + Shift type. + + G (input/output) REAL + G is passed as an argument in order to save its value between + calls to DLASQ4. + + Further Details + =============== + CNST1 = 9/16 + + ===================================================================== + + + A negative DMIN forces the shift to take that absolute value + TTYPE records the type of shift. +*/ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*dmin__ <= 0.) { + *tau = -(*dmin__); + *ttype = -1; + return 0; + } + + nn = (*n0 << 2) + *pp; + if (*n0in == *n0) { + +/* No eigenvalues deflated. */ + + if (*dmin__ == *dn || *dmin__ == *dn1) { + + b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); + b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); + a2 = z__[nn - 7] + z__[nn - 5]; + +/* Cases 2 and 3. */ + + if (*dmin__ == *dn && *dmin1 == *dn1) { + gap2 = *dmin2 - a2 - *dmin2 * .25; + if (gap2 > 0. && gap2 > b2) { + gap1 = a2 - *dn - b2 / gap2 * b2; + } else { + gap1 = a2 - *dn - (b1 + b2); + } + if (gap1 > 0. && gap1 > b1) { +/* Computing MAX */ + d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5; + s = max(d__1,d__2); + *ttype = -2; + } else { + s = 0.; + if (*dn > b1) { + s = *dn - b1; + } + if (a2 > b1 + b2) { +/* Computing MIN */ + d__1 = s, d__2 = a2 - (b1 + b2); + s = min(d__1,d__2); + } +/* Computing MAX */ + d__1 = s, d__2 = *dmin__ * .333; + s = max(d__1,d__2); + *ttype = -3; + } + } else { + +/* Case 4. */ + + *ttype = -4; + s = *dmin__ * .25; + if (*dmin__ == *dn) { + gam = *dn; + a2 = 0.; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b2 = z__[nn - 5] / z__[nn - 7]; + np = nn - 9; + } else { + np = nn - (*pp << 1); + b2 = z__[np - 2]; + gam = *dn1; + if (z__[np - 4] > z__[np - 2]) { + return 0; + } + a2 = z__[np - 4] / z__[np - 2]; + if (z__[nn - 9] > z__[nn - 11]) { + return 0; + } + b2 = z__[nn - 9] / z__[nn - 11]; + np = nn - 13; + } + +/* Approximate contribution to norm squared from I < NN-1. */ + + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = np; i4 >= i__1; i4 += -4) { + if (b2 == 0.) { + goto L20; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (max(b2,b1) * 100. < a2 || .563 < a2) { + goto L20; + } +/* L10: */ + } +L20: + a2 *= 1.05; + +/* Rayleigh quotient residual bound. */ + + if (a2 < .563) { + s = gam * (1. - sqrt(a2)) / (a2 + 1.); + } + } + } else if (*dmin__ == *dn2) { + +/* Case 5. */ + + *ttype = -5; + s = *dmin__ * .25; + +/* Compute contribution to norm squared from I > NN-2. */ + + np = nn - (*pp << 1); + b1 = z__[np - 2]; + b2 = z__[np - 6]; + gam = *dn2; + if (z__[np - 8] > b2 || z__[np - 4] > b1) { + return 0; + } + a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.); + +/* Approximate contribution to norm squared from I < NN-2. */ + + if (*n0 - *i0 > 2) { + b2 = z__[nn - 13] / z__[nn - 15]; + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = nn - 17; i4 >= i__1; i4 += -4) { + if (b2 == 0.) { + goto L40; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (max(b2,b1) * 100. < a2 || .563 < a2) { + goto L40; + } +/* L30: */ + } +L40: + a2 *= 1.05; + } + + if (a2 < .563) { + s = gam * (1. - sqrt(a2)) / (a2 + 1.); + } + } else { + +/* Case 6, no information to guide us. */ + + if (*ttype == -6) { + *g += (1. - *g) * .333; + } else if (*ttype == -18) { + *g = .083250000000000005; + } else { + *g = .25; + } + s = *g * *dmin__; + *ttype = -6; + } + + } else if (*n0in == *n0 + 1) { + +/* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ + + if (*dmin1 == *dn1 && *dmin2 == *dn2) { + +/* Cases 7 and 8. */ + + *ttype = -7; + s = *dmin1 * .333; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.) { + goto L60; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + a2 = b1; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (max(b1,a2) * 100. < b2) { + goto L60; + } +/* L50: */ + } +L60: + b2 = sqrt(b2 * 1.05); +/* Computing 2nd power */ + d__1 = b2; + a2 = *dmin1 / (d__1 * d__1 + 1.); + gap2 = *dmin2 * .5 - a2; + if (gap2 > 0. && gap2 > b2 * a2) { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); + s = max(d__1,d__2); + } else { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - b2 * 1.01); + s = max(d__1,d__2); + *ttype = -8; + } + } else { + +/* Case 9. */ + + s = *dmin1 * .25; + if (*dmin1 == *dn1) { + s = *dmin1 * .5; + } + *ttype = -9; + } + + } else if (*n0in == *n0 + 2) { + +/* + Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. + + Cases 10 and 11. +*/ + + if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) { + *ttype = -10; + s = *dmin2 * .333; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.) { + goto L80; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (b1 * 100. < b2) { + goto L80; + } +/* L70: */ + } +L80: + b2 = sqrt(b2 * 1.05); +/* Computing 2nd power */ + d__1 = b2; + a2 = *dmin2 / (d__1 * d__1 + 1.); + gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ + nn - 9]) - a2; + if (gap2 > 0. && gap2 > b2 * a2) { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); + s = max(d__1,d__2); + } else { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - b2 * 1.01); + s = max(d__1,d__2); + } + } else { + s = *dmin2 * .25; + *ttype = -11; + } + } else if (*n0in > *n0 + 2) { + +/* Case 12, more than two eigenvalues deflated. No information. */ + + s = 0.; + *ttype = -12; + } + + *tau = s; + return 0; + +/* End of DLASQ4 */ + +} /* dlasq4_ */ + +/* Subroutine */ int dlasq5_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1, + doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2, + logical *ieee) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal d__; + static integer j4, j4p2; + static doublereal emin, temp; + + +/* + -- LAPACK routine (version 3.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- November 2008 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + DLASQ5 computes one dqds transform in ping-pong form, one + version for IEEE machines another for non IEEE machines. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) DOUBLE PRECISION array, dimension ( 4*N ) + Z holds the qd array. EMIN is stored in Z(4*N0) to avoid + an extra argument. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + TAU (input) DOUBLE PRECISION + This is the shift. + + DMIN (output) DOUBLE PRECISION + Minimum value of d. + + DMIN1 (output) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ). + + DMIN2 (output) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (output) DOUBLE PRECISION + d(N0), the last value of d. + + DNM1 (output) DOUBLE PRECISION + d(N0-1). + + DNM2 (output) DOUBLE PRECISION + d(N0-2). + + IEEE (input) LOGICAL + Flag for IEEE or non IEEE arithmetic. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4] - *tau; + *dmin__ = d__; + *dmin1 = -z__[j4]; + + if (*ieee) { + +/* Code for IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + temp = z__[j4 + 1] / z__[j4 - 2]; + d__ = d__ * temp - *tau; + *dmin__ = min(*dmin__,d__); + z__[j4] = z__[j4 - 1] * temp; +/* Computing MIN */ + d__1 = z__[j4]; + emin = min(d__1,emin); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + temp = z__[j4 + 2] / z__[j4 - 3]; + d__ = d__ * temp - *tau; + *dmin__ = min(*dmin__,d__); + z__[j4 - 1] = z__[j4] * temp; +/* Computing MIN */ + d__1 = z__[j4 - 1]; + emin = min(d__1,emin); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + *dmin__ = min(*dmin__,*dn); + + } else { + +/* Code for non IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (d__ < 0.) { + return 0; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4]; + emin = min(d__1,d__2); +/* L30: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (d__ < 0.) { + return 0; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4 - 1]; + emin = min(d__1,d__2); +/* L40: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (*dnm2 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (*dnm1 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,*dn); + + } + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of DLASQ5 */ + +} /* dlasq5_ */ + +/* Subroutine */ int dlasq6_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, + doublereal *dn, doublereal *dnm1, doublereal *dnm2) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal d__; + static integer j4, j4p2; + static doublereal emin, temp; + + static doublereal safmin; + + +/* + -- LAPACK routine (version 3.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- November 2008 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + DLASQ6 computes one dqd (shift equal to zero) transform in + ping-pong form, with protection against underflow and overflow. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) DOUBLE PRECISION array, dimension ( 4*N ) + Z holds the qd array. EMIN is stored in Z(4*N0) to avoid + an extra argument. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + DMIN (output) DOUBLE PRECISION + Minimum value of d. + + DMIN1 (output) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ). + + DMIN2 (output) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (output) DOUBLE PRECISION + d(N0), the last value of d. + + DNM1 (output) DOUBLE PRECISION + d(N0-1). + + DNM2 (output) DOUBLE PRECISION + d(N0-2). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + safmin = SAFEMINIMUM; + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4]; + *dmin__ = d__; + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + d__ = z__[j4 + 1]; + *dmin__ = d__; + emin = 0.; + } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 + - 2] < z__[j4 + 1]) { + temp = z__[j4 + 1] / z__[j4 - 2]; + z__[j4] = z__[j4 - 1] * temp; + d__ *= temp; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4]; + emin = min(d__1,d__2); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (z__[j4 - 3] == 0.) { + z__[j4 - 1] = 0.; + d__ = z__[j4 + 2]; + *dmin__ = d__; + emin = 0.; + } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 + - 3] < z__[j4 + 2]) { + temp = z__[j4 + 2] / z__[j4 - 3]; + z__[j4 - 1] = z__[j4] * temp; + d__ *= temp; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4 - 1]; + emin = min(d__1,d__2); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + *dnm1 = z__[j4p2 + 2]; + *dmin__ = *dnm1; + emin = 0.; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dnm1 = *dnm2 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + *dn = z__[j4p2 + 2]; + *dmin__ = *dn; + emin = 0.; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dn = *dnm1 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,*dn); + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of DLASQ6 */ + +} /* dlasq6_ */ + +/* Subroutine */ int dlasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, doublereal *c__, doublereal *s, doublereal *a, integer * + lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, info; + static doublereal temp; + extern logical lsame_(char *, char *); + static doublereal ctemp, stemp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLASR applies a sequence of plane rotations to a real matrix A, + from either the left or the right. + + When SIDE = 'L', the transformation takes the form + + A := P*A + + and when SIDE = 'R', the transformation takes the form + + A := A*P**T + + where P is an orthogonal matrix consisting of a sequence of z plane + rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', + and P**T is the transpose of P. + + When DIRECT = 'F' (Forward sequence), then + + P = P(z-1) * ... * P(2) * P(1) + + and when DIRECT = 'B' (Backward sequence), then + + P = P(1) * P(2) * ... * P(z-1) + + where P(k) is a plane rotation matrix defined by the 2-by-2 rotation + + R(k) = ( c(k) s(k) ) + = ( -s(k) c(k) ). + + When PIVOT = 'V' (Variable pivot), the rotation is performed + for the plane (k,k+1), i.e., P(k) has the form + + P(k) = ( 1 ) + ( ... ) + ( 1 ) + ( c(k) s(k) ) + ( -s(k) c(k) ) + ( 1 ) + ( ... ) + ( 1 ) + + where R(k) appears as a rank-2 modification to the identity matrix in + rows and columns k and k+1. + + When PIVOT = 'T' (Top pivot), the rotation is performed for the + plane (1,k+1), so P(k) has the form + + P(k) = ( c(k) s(k) ) + ( 1 ) + ( ... ) + ( 1 ) + ( -s(k) c(k) ) + ( 1 ) + ( ... ) + ( 1 ) + + where R(k) appears in rows and columns 1 and k+1. + + Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is + performed for the plane (k,z), giving P(k) the form + + P(k) = ( 1 ) + ( ... ) + ( 1 ) + ( c(k) s(k) ) + ( 1 ) + ( ... ) + ( 1 ) + ( -s(k) c(k) ) + + where R(k) appears in rows and columns k and z. The rotations are + performed without ever forming P(k) explicitly. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + Specifies whether the plane rotation matrix P is applied to + A on the left or the right. + = 'L': Left, compute A := P*A + = 'R': Right, compute A:= A*P**T + + PIVOT (input) CHARACTER*1 + Specifies the plane for which P(k) is a plane rotation + matrix. + = 'V': Variable pivot, the plane (k,k+1) + = 'T': Top pivot, the plane (1,k+1) + = 'B': Bottom pivot, the plane (k,z) + + DIRECT (input) CHARACTER*1 + Specifies whether P is a forward or backward sequence of + plane rotations. + = 'F': Forward, P = P(z-1)*...*P(2)*P(1) + = 'B': Backward, P = P(1)*P(2)*...*P(z-1) + + M (input) INTEGER + The number of rows of the matrix A. If m <= 1, an immediate + return is effected. + + N (input) INTEGER + The number of columns of the matrix A. If n <= 1, an + immediate return is effected. + + C (input) DOUBLE PRECISION array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + The cosines c(k) of the plane rotations. + + S (input) DOUBLE PRECISION array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + The sines s(k) of the plane rotations. The 2-by-2 plane + rotation part of the matrix P(k), R(k), has the form + R(k) = ( c(k) s(k) ) + ( -s(k) c(k) ). + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + The M-by-N matrix A. On exit, A is overwritten by P*A if + SIDE = 'R' or by A*P**T if SIDE = 'L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + --c__; + --s; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! (lsame_(side, "L") || lsame_(side, "R"))) { + info = 1; + } else if (! (lsame_(pivot, "V") || lsame_(pivot, + "T") || lsame_(pivot, "B"))) { + info = 2; + } else if (! (lsame_(direct, "F") || lsame_(direct, + "B"))) { + info = 3; + } else if (*m < 0) { + info = 4; + } else if (*n < 0) { + info = 5; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("DLASR ", &info); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form P * A */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L10: */ + } + } +/* L20: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L30: */ + } + } +/* L40: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L50: */ + } + } +/* L60: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L90: */ + } + } +/* L100: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L110: */ + } + } +/* L120: */ + } + } + } + } else if (lsame_(side, "R")) { + +/* Form A * P' */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L130: */ + } + } +/* L140: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L150: */ + } + } +/* L160: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L170: */ + } + } +/* L180: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L190: */ + } + } +/* L200: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L210: */ + } + } +/* L220: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L230: */ + } + } +/* L240: */ + } + } + } + } + + return 0; + +/* End of DLASR */ + +} /* dlasr_ */ + +/* Subroutine */ int dlasrt_(char *id, integer *n, doublereal *d__, integer * + info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, j; + static doublereal d1, d2, d3; + static integer dir; + static doublereal tmp; + static integer endd; + extern logical lsame_(char *, char *); + static integer stack[64] /* was [2][32] */; + static doublereal dmnmx; + static integer start; + extern /* Subroutine */ int xerbla_(char *, integer *); + static integer stkpnt; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + Sort the numbers in D in increasing order (if ID = 'I') or + in decreasing order (if ID = 'D' ). + + Use Quick Sort, reverting to Insertion sort on arrays of + size <= 20. Dimension of STACK limits N to about 2**32. + + Arguments + ========= + + ID (input) CHARACTER*1 + = 'I': sort D in increasing order; + = 'D': sort D in decreasing order. + + N (input) INTEGER + The length of the array D. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the array to be sorted. + On exit, D has been sorted into increasing order + (D(1) <= ... <= D(N) ) or into decreasing order + (D(1) >= ... >= D(N) ), depending on ID. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input paramters. +*/ + + /* Parameter adjustments */ + --d__; + + /* Function Body */ + *info = 0; + dir = -1; + if (lsame_(id, "D")) { + dir = 0; + } else if (lsame_(id, "I")) { + dir = 1; + } + if (dir == -1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASRT", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + + stkpnt = 1; + stack[0] = 1; + stack[1] = *n; +L10: + start = stack[(stkpnt << 1) - 2]; + endd = stack[(stkpnt << 1) - 1]; + --stkpnt; + if (endd - start <= 20 && endd - start > 0) { + +/* Do Insertion sort on D( START:ENDD ) */ + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] > d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L30; + } +/* L20: */ + } +L30: + ; + } + + } else { + +/* Sort into increasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] < d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L50; + } +/* L40: */ + } +L50: + ; + } + + } + + } else if (endd - start > 20) { + +/* + Partition D( START:ENDD ) and stack parts, largest one first + + Choose partition entry as median of 3 +*/ + + d1 = d__[start]; + d2 = d__[endd]; + i__ = (start + endd) / 2; + d3 = d__[i__]; + if (d1 < d2) { + if (d3 < d1) { + dmnmx = d1; + } else if (d3 < d2) { + dmnmx = d3; + } else { + dmnmx = d2; + } + } else { + if (d3 < d2) { + dmnmx = d2; + } else if (d3 < d1) { + dmnmx = d3; + } else { + dmnmx = d1; + } + } + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__ = start - 1; + j = endd + 1; +L60: +L70: + --j; + if (d__[j] < dmnmx) { + goto L70; + } +L80: + ++i__; + if (d__[i__] > dmnmx) { + goto L80; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L60; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } else { + +/* Sort into increasing order */ + + i__ = start - 1; + j = endd + 1; +L90: +L100: + --j; + if (d__[j] > dmnmx) { + goto L100; + } +L110: + ++i__; + if (d__[i__] < dmnmx) { + goto L110; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L90; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } + } + if (stkpnt > 0) { + goto L10; + } + return 0; + +/* End of DLASRT */ + +} /* dlasrt_ */ + +/* Subroutine */ int dlassq_(integer *n, doublereal *x, integer *incx, + doublereal *scale, doublereal *sumsq) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer ix; + static doublereal absxi; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLASSQ returns the values scl and smsq such that + + ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + + where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is + assumed to be non-negative and scl returns the value + + scl = max( scale, abs( x( i ) ) ). + + scale and sumsq must be supplied in SCALE and SUMSQ and + scl and smsq are overwritten on SCALE and SUMSQ respectively. + + The routine makes only one pass through the vector x. + + Arguments + ========= + + N (input) INTEGER + The number of elements to be used from the vector X. + + X (input) DOUBLE PRECISION array, dimension (N) + The vector for which a scaled sum of squares is computed. + x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. + + INCX (input) INTEGER + The increment between successive values of the vector X. + INCX > 0. + + SCALE (input/output) DOUBLE PRECISION + On entry, the value scale in the equation above. + On exit, SCALE is overwritten with scl , the scaling factor + for the sum of squares. + + SUMSQ (input/output) DOUBLE PRECISION + On entry, the value sumsq in the equation above. + On exit, SUMSQ is overwritten with smsq , the basic sum of + squares from which scl has been factored out. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.) { + absxi = (d__1 = x[ix], abs(d__1)); + if (*scale < absxi) { +/* Computing 2nd power */ + d__1 = *scale / absxi; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = absxi; + } else { +/* Computing 2nd power */ + d__1 = absxi / *scale; + *sumsq += d__1 * d__1; + } + } +/* L10: */ + } + } + return 0; + +/* End of DLASSQ */ + +} /* dlassq_ */ + +/* Subroutine */ int dlasv2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal * + csr, doublereal *snl, doublereal *csl) +{ + /* System generated locals */ + doublereal d__1; + + /* Local variables */ + static doublereal a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, + clt, crt, slt, srt; + static integer pmax; + static doublereal temp; + static logical swap; + static doublereal tsign; + + static logical gasmal; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLASV2 computes the singular value decomposition of a 2-by-2 + triangular matrix + [ F G ] + [ 0 H ]. + On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the + smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and + right singular vectors for abs(SSMAX), giving the decomposition + + [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] + [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. + + Arguments + ========= + + F (input) DOUBLE PRECISION + The (1,1) element of the 2-by-2 matrix. + + G (input) DOUBLE PRECISION + The (1,2) element of the 2-by-2 matrix. + + H (input) DOUBLE PRECISION + The (2,2) element of the 2-by-2 matrix. + + SSMIN (output) DOUBLE PRECISION + abs(SSMIN) is the smaller singular value. + + SSMAX (output) DOUBLE PRECISION + abs(SSMAX) is the larger singular value. + + SNL (output) DOUBLE PRECISION + CSL (output) DOUBLE PRECISION + The vector (CSL, SNL) is a unit left singular vector for the + singular value abs(SSMAX). + + SNR (output) DOUBLE PRECISION + CSR (output) DOUBLE PRECISION + The vector (CSR, SNR) is a unit right singular vector for the + singular value abs(SSMAX). + + Further Details + =============== + + Any input parameter may be aliased with any output parameter. + + Barring over/underflow and assuming a guard digit in subtraction, all + output quantities are correct to within a few units in the last + place (ulps). + + In IEEE arithmetic, the code works correctly if one matrix element is + infinite. + + Overflow will not occur unless the largest singular value itself + overflows or is within a few ulps of overflow. (On machines with + partial overflow, like the Cray, overflow may occur if the largest + singular value is within a factor of 2 of overflow.) + + Underflow is harmless if underflow is gradual. Otherwise, results + may correspond to a matrix modified by perturbations of size near + the underflow threshold. + + ===================================================================== +*/ + + + ft = *f; + fa = abs(ft); + ht = *h__; + ha = abs(*h__); + +/* + PMAX points to the maximum absolute element of matrix + PMAX = 1 if F largest in absolute values + PMAX = 2 if G largest in absolute values + PMAX = 3 if H largest in absolute values +*/ + + pmax = 1; + swap = ha > fa; + if (swap) { + pmax = 3; + temp = ft; + ft = ht; + ht = temp; + temp = fa; + fa = ha; + ha = temp; + +/* Now FA .ge. HA */ + + } + gt = *g; + ga = abs(gt); + if (ga == 0.) { + +/* Diagonal matrix */ + + *ssmin = ha; + *ssmax = fa; + clt = 1.; + crt = 1.; + slt = 0.; + srt = 0.; + } else { + gasmal = TRUE_; + if (ga > fa) { + pmax = 2; + if (fa / ga < EPSILON) { + +/* Case of very large GA */ + + gasmal = FALSE_; + *ssmax = ga; + if (ha > 1.) { + *ssmin = fa / (ga / ha); + } else { + *ssmin = fa / ga * ha; + } + clt = 1.; + slt = ht / gt; + srt = 1.; + crt = ft / gt; + } + } + if (gasmal) { + +/* Normal case */ + + d__ = fa - ha; + if (d__ == fa) { + +/* Copes with infinite F or H */ + + l = 1.; + } else { + l = d__ / fa; + } + +/* Note that 0 .le. L .le. 1 */ + + m = gt / ft; + +/* Note that abs(M) .le. 1/macheps */ + + t = 2. - l; + +/* Note that T .ge. 1 */ + + mm = m * m; + tt = t * t; + s = sqrt(tt + mm); + +/* Note that 1 .le. S .le. 1 + 1/macheps */ + + if (l == 0.) { + r__ = abs(m); + } else { + r__ = sqrt(l * l + mm); + } + +/* Note that 0 .le. R .le. 1 + 1/macheps */ + + a = (s + r__) * .5; + +/* Note that 1 .le. A .le. 1 + abs(M) */ + + *ssmin = ha / a; + *ssmax = fa * a; + if (mm == 0.) { + +/* Note that M is very tiny */ + + if (l == 0.) { + t = d_sign(&c_b3192, &ft) * d_sign(&c_b15, >); + } else { + t = gt / d_sign(&d__, &ft) + m / t; + } + } else { + t = (m / (s + t) + m / (r__ + l)) * (a + 1.); + } + l = sqrt(t * t + 4.); + crt = 2. / l; + srt = t / l; + clt = (crt + srt * m) / a; + slt = ht / ft * srt / a; + } + } + if (swap) { + *csl = srt; + *snl = crt; + *csr = slt; + *snr = clt; + } else { + *csl = clt; + *snl = slt; + *csr = crt; + *snr = srt; + } + +/* Correct signs of SSMAX and SSMIN */ + + if (pmax == 1) { + tsign = d_sign(&c_b15, csr) * d_sign(&c_b15, csl) * d_sign(&c_b15, f); + } + if (pmax == 2) { + tsign = d_sign(&c_b15, snr) * d_sign(&c_b15, csl) * d_sign(&c_b15, g); + } + if (pmax == 3) { + tsign = d_sign(&c_b15, snr) * d_sign(&c_b15, snl) * d_sign(&c_b15, + h__); + } + *ssmax = d_sign(ssmax, &tsign); + d__1 = tsign * d_sign(&c_b15, f) * d_sign(&c_b15, h__); + *ssmin = d_sign(ssmin, &d__1); + return 0; + +/* End of DLASV2 */ + +} /* dlasv2_ */ + +/* Subroutine */ int dlaswp_(integer *n, doublereal *a, integer *lda, integer + *k1, integer *k2, integer *ipiv, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + static doublereal temp; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLASWP performs a series of row interchanges on the matrix A. + One row interchange is initiated for each of rows K1 through K2 of A. + + Arguments + ========= + + N (input) INTEGER + The number of columns of the matrix A. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the matrix of column dimension N to which the row + interchanges will be applied. + On exit, the permuted matrix. + + LDA (input) INTEGER + The leading dimension of the array A. + + K1 (input) INTEGER + The first element of IPIV for which a row interchange will + be done. + + K2 (input) INTEGER + The last element of IPIV for which a row interchange will + be done. + + IPIV (input) INTEGER array, dimension (K2*abs(INCX)) + The vector of pivot indices. Only the elements in positions + K1 through K2 of IPIV are accessed. + IPIV(K) = L implies rows K and L are to be interchanged. + + INCX (input) INTEGER + The increment between successive values of IPIV. If IPIV + is negative, the pivots are applied in reverse order. + + Further Details + =============== + + Modified by + R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA + + ===================================================================== + + + Interchange row I with row IPIV(I) for each of rows K1 through K2. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = (1 - *k2) * *incx + 1; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) + { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L10: */ + } + } + ix += *incx; +/* L20: */ + } +/* L30: */ + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L40: */ + } + } + ix += *incx; +/* L50: */ + } + } + + return 0; + +/* End of DLASWP */ + +} /* dlaswp_ */ + +/* Subroutine */ int dlasy2_(logical *ltranl, logical *ltranr, integer *isgn, + integer *n1, integer *n2, doublereal *tl, integer *ldtl, doublereal * + tr, integer *ldtr, doublereal *b, integer *ldb, doublereal *scale, + doublereal *x, integer *ldx, doublereal *xnorm, integer *info) +{ + /* Initialized data */ + + static integer locu12[4] = { 3,4,1,2 }; + static integer locl21[4] = { 2,1,4,3 }; + static integer locu22[4] = { 4,3,2,1 }; + static logical xswpiv[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; + static logical bswpiv[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; + + /* System generated locals */ + integer b_dim1, b_offset, tl_dim1, tl_offset, tr_dim1, tr_offset, x_dim1, + x_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8; + + /* Local variables */ + static integer i__, j, k; + static doublereal x2[2], l21, u11, u12; + static integer ip, jp; + static doublereal u22, t16[16] /* was [4][4] */, gam, bet, eps, sgn, + tmp[4], tau1, btmp[4], smin; + static integer ipiv; + static doublereal temp; + static integer jpiv[4]; + static doublereal xmax; + static integer ipsv, jpsv; + static logical bswap; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + static logical xswap; + + extern integer idamax_(integer *, doublereal *, integer *); + static doublereal smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in + + op(TL)*X + ISGN*X*op(TR) = SCALE*B, + + where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or + -1. op(T) = T or T', where T' denotes the transpose of T. + + Arguments + ========= + + LTRANL (input) LOGICAL + On entry, LTRANL specifies the op(TL): + = .FALSE., op(TL) = TL, + = .TRUE., op(TL) = TL'. + + LTRANR (input) LOGICAL + On entry, LTRANR specifies the op(TR): + = .FALSE., op(TR) = TR, + = .TRUE., op(TR) = TR'. + + ISGN (input) INTEGER + On entry, ISGN specifies the sign of the equation + as described before. ISGN may only be 1 or -1. + + N1 (input) INTEGER + On entry, N1 specifies the order of matrix TL. + N1 may only be 0, 1 or 2. + + N2 (input) INTEGER + On entry, N2 specifies the order of matrix TR. + N2 may only be 0, 1 or 2. + + TL (input) DOUBLE PRECISION array, dimension (LDTL,2) + On entry, TL contains an N1 by N1 matrix. + + LDTL (input) INTEGER + The leading dimension of the matrix TL. LDTL >= max(1,N1). + + TR (input) DOUBLE PRECISION array, dimension (LDTR,2) + On entry, TR contains an N2 by N2 matrix. + + LDTR (input) INTEGER + The leading dimension of the matrix TR. LDTR >= max(1,N2). + + B (input) DOUBLE PRECISION array, dimension (LDB,2) + On entry, the N1 by N2 matrix B contains the right-hand + side of the equation. + + LDB (input) INTEGER + The leading dimension of the matrix B. LDB >= max(1,N1). + + SCALE (output) DOUBLE PRECISION + On exit, SCALE contains the scale factor. SCALE is chosen + less than or equal to 1 to prevent the solution overflowing. + + X (output) DOUBLE PRECISION array, dimension (LDX,2) + On exit, X contains the N1 by N2 solution. + + LDX (input) INTEGER + The leading dimension of the matrix X. LDX >= max(1,N1). + + XNORM (output) DOUBLE PRECISION + On exit, XNORM is the infinity-norm of the solution. + + INFO (output) INTEGER + On exit, INFO is set to + 0: successful exit. + 1: TL and TR have too close eigenvalues, so TL or + TR is perturbed to get a nonsingular equation. + NOTE: In the interests of speed, this routine does not + check the inputs for errors. + + ===================================================================== +*/ + + /* Parameter adjustments */ + tl_dim1 = *ldtl; + tl_offset = 1 + tl_dim1; + tl -= tl_offset; + tr_dim1 = *ldtr; + tr_offset = 1 + tr_dim1; + tr -= tr_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + + /* Function Body */ + +/* Do not check the input parameters for errors */ + + *info = 0; + +/* Quick return if possible */ + + if (*n1 == 0 || *n2 == 0) { + return 0; + } + +/* Set constants to control overflow */ + + eps = PRECISION; + smlnum = SAFEMINIMUM / eps; + sgn = (doublereal) (*isgn); + + k = *n1 + *n1 + *n2 - 2; + switch (k) { + case 1: goto L10; + case 2: goto L20; + case 3: goto L30; + case 4: goto L50; + } + +/* 1 by 1: TL11*X + SGN*X*TR11 = B11 */ + +L10: + tau1 = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + bet = abs(tau1); + if (bet <= smlnum) { + tau1 = smlnum; + bet = smlnum; + *info = 1; + } + + *scale = 1.; + gam = (d__1 = b[b_dim1 + 1], abs(d__1)); + if (smlnum * gam > bet) { + *scale = 1. / gam; + } + + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / tau1; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); + return 0; + +/* + 1 by 2: + TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12] = [B11 B12] + [TR21 TR22] +*/ + +L20: + +/* + Computing MAX + Computing MAX +*/ + d__7 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__8 = (d__2 = tr[tr_dim1 + 1] + , abs(d__2)), d__7 = max(d__7,d__8), d__8 = (d__3 = tr[(tr_dim1 << + 1) + 1], abs(d__3)), d__7 = max(d__7,d__8), d__8 = (d__4 = tr[ + tr_dim1 + 2], abs(d__4)), d__7 = max(d__7,d__8), d__8 = (d__5 = + tr[(tr_dim1 << 1) + 2], abs(d__5)); + d__6 = eps * max(d__7,d__8); + smin = max(d__6,smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranr) { + tmp[1] = sgn * tr[tr_dim1 + 2]; + tmp[2] = sgn * tr[(tr_dim1 << 1) + 1]; + } else { + tmp[1] = sgn * tr[(tr_dim1 << 1) + 1]; + tmp[2] = sgn * tr[tr_dim1 + 2]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[(b_dim1 << 1) + 1]; + goto L40; + +/* + 2 by 1: + op[TL11 TL12]*[X11] + ISGN* [X11]*TR11 = [B11] + [TL21 TL22] [X21] [X21] [B21] +*/ + +L30: +/* + Computing MAX + Computing MAX +*/ + d__7 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__8 = (d__2 = tl[tl_dim1 + 1] + , abs(d__2)), d__7 = max(d__7,d__8), d__8 = (d__3 = tl[(tl_dim1 << + 1) + 1], abs(d__3)), d__7 = max(d__7,d__8), d__8 = (d__4 = tl[ + tl_dim1 + 2], abs(d__4)), d__7 = max(d__7,d__8), d__8 = (d__5 = + tl[(tl_dim1 << 1) + 2], abs(d__5)); + d__6 = eps * max(d__7,d__8); + smin = max(d__6,smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + if (*ltranl) { + tmp[1] = tl[(tl_dim1 << 1) + 1]; + tmp[2] = tl[tl_dim1 + 2]; + } else { + tmp[1] = tl[tl_dim1 + 2]; + tmp[2] = tl[(tl_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; +L40: + +/* + Solve 2 by 2 system using complete pivoting. + Set pivots less than SMIN to SMIN. +*/ + + ipiv = idamax_(&c__4, tmp, &c__1); + u11 = tmp[ipiv - 1]; + if (abs(u11) <= smin) { + *info = 1; + u11 = smin; + } + u12 = tmp[locu12[ipiv - 1] - 1]; + l21 = tmp[locl21[ipiv - 1] - 1] / u11; + u22 = tmp[locu22[ipiv - 1] - 1] - u12 * l21; + xswap = xswpiv[ipiv - 1]; + bswap = bswpiv[ipiv - 1]; + if (abs(u22) <= smin) { + *info = 1; + u22 = smin; + } + if (bswap) { + temp = btmp[1]; + btmp[1] = btmp[0] - l21 * temp; + btmp[0] = temp; + } else { + btmp[1] -= l21 * btmp[0]; + } + *scale = 1.; + if (smlnum * 2. * abs(btmp[1]) > abs(u22) || smlnum * 2. * abs(btmp[0]) > + abs(u11)) { +/* Computing MAX */ + d__1 = abs(btmp[0]), d__2 = abs(btmp[1]); + *scale = .5 / max(d__1,d__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + } + x2[1] = btmp[1] / u22; + x2[0] = btmp[0] / u11 - u12 / u11 * x2[1]; + if (xswap) { + temp = x2[1]; + x2[1] = x2[0]; + x2[0] = temp; + } + x[x_dim1 + 1] = x2[0]; + if (*n1 == 1) { + x[(x_dim1 << 1) + 1] = x2[1]; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1) + + 1], abs(d__2)); + } else { + x[x_dim1 + 2] = x2[1]; +/* Computing MAX */ + d__3 = (d__1 = x[x_dim1 + 1], abs(d__1)), d__4 = (d__2 = x[x_dim1 + 2] + , abs(d__2)); + *xnorm = max(d__3,d__4); + } + return 0; + +/* + 2 by 2: + op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12] + [TL21 TL22] [X21 X22] [X21 X22] [TR21 TR22] [B21 B22] + + Solve equivalent 4 by 4 system using complete pivoting. + Set pivots less than SMIN to SMIN. +*/ + +L50: +/* Computing MAX */ + d__5 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__6 = (d__2 = tr[(tr_dim1 << + 1) + 1], abs(d__2)), d__5 = max(d__5,d__6), d__6 = (d__3 = tr[ + tr_dim1 + 2], abs(d__3)), d__5 = max(d__5,d__6), d__6 = (d__4 = + tr[(tr_dim1 << 1) + 2], abs(d__4)); + smin = max(d__5,d__6); +/* Computing MAX */ + d__5 = smin, d__6 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__5 = max(d__5, + d__6), d__6 = (d__2 = tl[(tl_dim1 << 1) + 1], abs(d__2)), d__5 = + max(d__5,d__6), d__6 = (d__3 = tl[tl_dim1 + 2], abs(d__3)), d__5 = + max(d__5,d__6), d__6 = (d__4 = tl[(tl_dim1 << 1) + 2], abs(d__4)) + ; + smin = max(d__5,d__6); +/* Computing MAX */ + d__1 = eps * smin; + smin = max(d__1,smlnum); + btmp[0] = 0.; + dcopy_(&c__16, btmp, &c__0, t16, &c__1); + t16[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + t16[5] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + t16[10] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + t16[15] = tl[(tl_dim1 << 1) + 2] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranl) { + t16[4] = tl[tl_dim1 + 2]; + t16[1] = tl[(tl_dim1 << 1) + 1]; + t16[14] = tl[tl_dim1 + 2]; + t16[11] = tl[(tl_dim1 << 1) + 1]; + } else { + t16[4] = tl[(tl_dim1 << 1) + 1]; + t16[1] = tl[tl_dim1 + 2]; + t16[14] = tl[(tl_dim1 << 1) + 1]; + t16[11] = tl[tl_dim1 + 2]; + } + if (*ltranr) { + t16[8] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[13] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[2] = sgn * tr[tr_dim1 + 2]; + t16[7] = sgn * tr[tr_dim1 + 2]; + } else { + t16[8] = sgn * tr[tr_dim1 + 2]; + t16[13] = sgn * tr[tr_dim1 + 2]; + t16[2] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[7] = sgn * tr[(tr_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; + btmp[2] = b[(b_dim1 << 1) + 1]; + btmp[3] = b[(b_dim1 << 1) + 2]; + +/* Perform elimination */ + + for (i__ = 1; i__ <= 3; ++i__) { + xmax = 0.; + for (ip = i__; ip <= 4; ++ip) { + for (jp = i__; jp <= 4; ++jp) { + if ((d__1 = t16[ip + (jp << 2) - 5], abs(d__1)) >= xmax) { + xmax = (d__1 = t16[ip + (jp << 2) - 5], abs(d__1)); + ipsv = ip; + jpsv = jp; + } +/* L60: */ + } +/* L70: */ + } + if (ipsv != i__) { + dswap_(&c__4, &t16[ipsv - 1], &c__4, &t16[i__ - 1], &c__4); + temp = btmp[i__ - 1]; + btmp[i__ - 1] = btmp[ipsv - 1]; + btmp[ipsv - 1] = temp; + } + if (jpsv != i__) { + dswap_(&c__4, &t16[(jpsv << 2) - 4], &c__1, &t16[(i__ << 2) - 4], + &c__1); + } + jpiv[i__ - 1] = jpsv; + if ((d__1 = t16[i__ + (i__ << 2) - 5], abs(d__1)) < smin) { + *info = 1; + t16[i__ + (i__ << 2) - 5] = smin; + } + for (j = i__ + 1; j <= 4; ++j) { + t16[j + (i__ << 2) - 5] /= t16[i__ + (i__ << 2) - 5]; + btmp[j - 1] -= t16[j + (i__ << 2) - 5] * btmp[i__ - 1]; + for (k = i__ + 1; k <= 4; ++k) { + t16[j + (k << 2) - 5] -= t16[j + (i__ << 2) - 5] * t16[i__ + ( + k << 2) - 5]; +/* L80: */ + } +/* L90: */ + } +/* L100: */ + } + if (abs(t16[15]) < smin) { + t16[15] = smin; + } + *scale = 1.; + if (smlnum * 8. * abs(btmp[0]) > abs(t16[0]) || smlnum * 8. * abs(btmp[1]) + > abs(t16[5]) || smlnum * 8. * abs(btmp[2]) > abs(t16[10]) || + smlnum * 8. * abs(btmp[3]) > abs(t16[15])) { +/* Computing MAX */ + d__1 = abs(btmp[0]), d__2 = abs(btmp[1]), d__1 = max(d__1,d__2), d__2 + = abs(btmp[2]), d__1 = max(d__1,d__2), d__2 = abs(btmp[3]); + *scale = .125 / max(d__1,d__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + btmp[2] *= *scale; + btmp[3] *= *scale; + } + for (i__ = 1; i__ <= 4; ++i__) { + k = 5 - i__; + temp = 1. / t16[k + (k << 2) - 5]; + tmp[k - 1] = btmp[k - 1] * temp; + for (j = k + 1; j <= 4; ++j) { + tmp[k - 1] -= temp * t16[k + (j << 2) - 5] * tmp[j - 1]; +/* L110: */ + } +/* L120: */ + } + for (i__ = 1; i__ <= 3; ++i__) { + if (jpiv[4 - i__ - 1] != 4 - i__) { + temp = tmp[4 - i__ - 1]; + tmp[4 - i__ - 1] = tmp[jpiv[4 - i__ - 1] - 1]; + tmp[jpiv[4 - i__ - 1] - 1] = temp; + } +/* L130: */ + } + x[x_dim1 + 1] = tmp[0]; + x[x_dim1 + 2] = tmp[1]; + x[(x_dim1 << 1) + 1] = tmp[2]; + x[(x_dim1 << 1) + 2] = tmp[3]; +/* Computing MAX */ + d__1 = abs(tmp[0]) + abs(tmp[2]), d__2 = abs(tmp[1]) + abs(tmp[3]); + *xnorm = max(d__1,d__2); + return 0; + +/* End of DLASY2 */ + +} /* dlasy2_ */ + +/* Subroutine */ int dlatrd_(char *uplo, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *e, doublereal *tau, doublereal *w, + integer *ldw) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, iw; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + static doublereal alpha; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), daxpy_(integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *), + dsymv_(char *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), dlarfg_(integer *, doublereal *, doublereal *, integer *, + doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLATRD reduces NB rows and columns of a real symmetric matrix A to + symmetric tridiagonal form by an orthogonal similarity + transformation Q' * A * Q, and returns the matrices V and W which are + needed to apply the transformation to the unreduced part of A. + + If UPLO = 'U', DLATRD reduces the last NB rows and columns of a + matrix, of which the upper triangle is supplied; + if UPLO = 'L', DLATRD reduces the first NB rows and columns of a + matrix, of which the lower triangle is supplied. + + This is an auxiliary routine called by DSYTRD. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. + + NB (input) INTEGER + The number of rows and columns to be reduced. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit: + if UPLO = 'U', the last NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements above the diagonal + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors; + if UPLO = 'L', the first NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements below the diagonal + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= (1,N). + + E (output) DOUBLE PRECISION array, dimension (N-1) + If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal + elements of the last NB columns of the reduced matrix; + if UPLO = 'L', E(1:nb) contains the subdiagonal elements of + the first NB columns of the reduced matrix. + + TAU (output) DOUBLE PRECISION array, dimension (N-1) + The scalar factors of the elementary reflectors, stored in + TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. + See Further Details. + + W (output) DOUBLE PRECISION array, dimension (LDW,NB) + The n-by-nb matrix W required to update the unreduced part + of A. + + LDW (input) INTEGER + The leading dimension of the array W. LDW >= max(1,N). + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n) H(n-1) . . . H(n-nb+1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), + and tau in TAU(i-1). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), + and tau in TAU(i). + + The elements of the vectors v together form the n-by-nb matrix V + which is needed, with W, to apply the transformation to the unreduced + part of the matrix, using a symmetric rank-2k update of the form: + A := A - V*W' - W*V'. + + The contents of A on exit are illustrated by the following examples + with n = 5 and nb = 2: + + if UPLO = 'U': if UPLO = 'L': + + ( a a a v4 v5 ) ( d ) + ( a a v4 v5 ) ( 1 d ) + ( a 1 v5 ) ( v1 1 a ) + ( d 1 ) ( v1 v2 a a ) + ( d ) ( v1 v2 a a a ) + + where d denotes a diagonal element of the reduced matrix, a denotes + an element of the original matrix that is unchanged, and vi denotes + an element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --e; + --tau; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + + if (lsame_(uplo, "U")) { + +/* Reduce last NB columns of upper triangle */ + + i__1 = *n - *nb + 1; + for (i__ = *n; i__ >= i__1; --i__) { + iw = i__ - *n + *nb; + if (i__ < *n) { + +/* Update A(1:i,i) */ + + i__2 = *n - i__; + dgemv_("No transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & + c_b15, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + dgemv_("No transpose", &i__, &i__2, &c_b151, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b15, &a[i__ * a_dim1 + 1], &c__1); + } + if (i__ > 1) { + +/* + Generate elementary reflector H(i) to annihilate + A(1:i-2,i) +*/ + + i__2 = i__ - 1; + dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 + + 1], &c__1, &tau[i__ - 1]); + e[i__ - 1] = a[i__ - 1 + i__ * a_dim1]; + a[i__ - 1 + i__ * a_dim1] = 1.; + +/* Compute W(1:i-1,i) */ + + i__2 = i__ - 1; + dsymv_("Upper", &i__2, &c_b15, &a[a_offset], lda, &a[i__ * + a_dim1 + 1], &c__1, &c_b29, &w[iw * w_dim1 + 1], & + c__1); + if (i__ < *n) { + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, & + c_b29, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) + * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, & + c_b29, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[(iw + 1) + * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1); + } + i__2 = i__ - 1; + dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + alpha = tau[i__ - 1] * -.5 * ddot_(&i__2, &w[iw * w_dim1 + 1], + &c__1, &a[i__ * a_dim1 + 1], &c__1); + i__2 = i__ - 1; + daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * + w_dim1 + 1], &c__1); + } + +/* L10: */ + } + } else { + +/* Reduce first NB columns of lower triangle */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:n,i) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1], + lda, &w[i__ + w_dim1], ldw, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + w_dim1], + ldw, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + if (i__ < *n) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:n,i) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + + i__ * a_dim1], &c__1, &tau[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute W(i+1:n,i) */ + + i__2 = *n - i__; + dsymv_("Lower", &i__2, &c_b15, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b29, &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &w[i__ + 1 + w_dim1] + , ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[ + i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[ + i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + 1 + + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + alpha = tau[i__] * -.5 * ddot_(&i__2, &w[i__ + 1 + i__ * + w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + } + +/* L20: */ + } + } + + return 0; + +/* End of DLATRD */ + +} /* dlatrd_ */ + +/* Subroutine */ int dlauu2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + static doublereal aii; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAUU2 computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the unblocked form of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAUU2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1], + lda, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + aii, &a[i__ * a_dim1 + 1], &c__1); + } else { + dscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } +/* L10: */ + } + + } else { + +/* Compute the product L' * L. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1], & + c__1, &a[i__ + i__ * a_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii, &a[ + i__ + a_dim1], lda); + } else { + dscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } +/* L20: */ + } + } + + return 0; + +/* End of DLAUU2 */ + +} /* dlauu2_ */ + +/* Subroutine */ int dlauum_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, ib, nb; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), dlauu2_(char *, integer *, + doublereal *, integer *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DLAUUM computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the blocked form of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAUUM", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + dlauu2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + dtrmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, + &c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 + + 1], lda) + ; + dlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + dgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, & + c_b15, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ * + a_dim1 + 1], lda); + i__3 = *n - i__ - ib + 1; + dsyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &a[ + i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ + + i__ * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the product L' * L. */ + + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + dtrmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, & + c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], + lda); + dlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + dgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, & + c_b15, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + + ib + a_dim1], lda, &c_b15, &a[i__ + a_dim1], lda); + i__3 = *n - i__ - ib + 1; + dsyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a[i__ + + ib + i__ * a_dim1], lda, &c_b15, &a[i__ + i__ * + a_dim1], lda); + } +/* L20: */ + } + } + } + + return 0; + +/* End of DLAUUM */ + +} /* dlauum_ */ + +/* Subroutine */ int dorg2r_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dlarf_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORG2R generates an m by n real matrix Q with orthonormal columns, + which is defined as the first n columns of a product of k elementary + reflectors of order m + + Q = H(1) H(2) . . . H(k) + + as returned by DGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by DGEQRF in the first k columns of its array + argument A. + On exit, the m-by-n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQRF. + + WORK (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORG2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns k+1:n to columns of the unit matrix */ + + i__1 = *n; + for (j = *k + 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.; +/* L10: */ + } + a[j + j * a_dim1] = 1.; +/* L20: */ + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the left */ + + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.; + i__1 = *m - i__ + 1; + i__2 = *n - i__; + dlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + } + if (i__ < *m) { + i__1 = *m - i__; + d__1 = -tau[i__]; + dscal_(&i__1, &d__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + } + a[i__ + i__ * a_dim1] = 1. - tau[i__]; + +/* Set A(1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[l + i__ * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of DORG2R */ + +} /* dorg2r_ */ + +/* Subroutine */ int dorgbr_(char *vect, integer *m, integer *n, integer *k, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, mn; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical wantq; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dorglq_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORGBR generates one of the real orthogonal matrices Q or P**T + determined by DGEBRD when reducing a real matrix A to bidiagonal + form: A = Q * B * P**T. Q and P**T are defined as products of + elementary reflectors H(i) or G(i) respectively. + + If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + is of order M: + if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n + columns of Q, where m >= n >= k; + if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an + M-by-M matrix. + + If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T + is of order N: + if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m + rows of P**T, where n >= m >= k; + if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as + an N-by-N matrix. + + Arguments + ========= + + VECT (input) CHARACTER*1 + Specifies whether the matrix Q or the matrix P**T is + required, as defined in the transformation applied by DGEBRD: + = 'Q': generate Q; + = 'P': generate P**T. + + M (input) INTEGER + The number of rows of the matrix Q or P**T to be returned. + M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q or P**T to be returned. + N >= 0. + If VECT = 'Q', M >= N >= min(M,K); + if VECT = 'P', N >= M >= min(N,K). + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original M-by-K + matrix reduced by DGEBRD. + If VECT = 'P', the number of rows in the original K-by-N + matrix reduced by DGEBRD. + K >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by DGEBRD. + On exit, the M-by-N matrix Q or P**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (input) DOUBLE PRECISION array, dimension + (min(M,K)) if VECT = 'Q' + (min(N,K)) if VECT = 'P' + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i), which determines Q or P**T, as + returned by DGEBRD in its array argument TAUQ or TAUP. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,min(M,N)). + For optimum performance LWORK >= min(M,N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(vect, "Q"); + mn = min(*m,*n); + lquery = *lwork == -1; + if (! wantq && ! lsame_(vect, "P")) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( + *m > *n || *m < min(*n,*k))) { + *info = -3; + } else if (*k < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*lwork < max(1,mn) && ! lquery) { + *info = -9; + } + + if (*info == 0) { + if (wantq) { + nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } + lwkopt = max(1,mn) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORGBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + work[1] = 1.; + return 0; + } + + if (wantq) { + +/* + Form Q, determined by a call to DGEBRD to reduce an m-by-k + matrix +*/ + + if (*m >= *k) { + +/* If m >= k, assume m >= n >= k */ + + dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If m < k, assume m = n + + Shift the vectors which define the elementary reflectors one + column to the right, and set the first row and column of Q + to those of the unit matrix +*/ + + for (j = *m; j >= 2; --j) { + a[j * a_dim1 + 1] = 0.; + i__1 = *m; + for (i__ = j + 1; i__ <= i__1; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L10: */ + } +/* L20: */ + } + a[a_dim1 + 1] = 1.; + i__1 = *m; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.; +/* L30: */ + } + if (*m > 1) { + +/* Form Q(2:m,2:m) */ + + i__1 = *m - 1; + i__2 = *m - 1; + i__3 = *m - 1; + dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } else { + +/* + Form P', determined by a call to DGEBRD to reduce a k-by-n + matrix +*/ + + if (*k < *n) { + +/* If k < n, assume k <= m <= n */ + + dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If k >= n, assume m = n + + Shift the vectors which define the elementary reflectors one + row downward, and set the first row and column of P' to + those of the unit matrix +*/ + + a[a_dim1 + 1] = 1.; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.; +/* L40: */ + } + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + for (i__ = j - 1; i__ >= 2; --i__) { + a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1]; +/* L50: */ + } + a[j * a_dim1 + 1] = 0.; +/* L60: */ + } + if (*n > 1) { + +/* Form P'(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + dorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORGBR */ + +} /* dorgbr_ */ + +/* Subroutine */ int dorghr_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, nb, nh, iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORGHR generates a real orthogonal matrix Q which is defined as the + product of IHI-ILO elementary reflectors of order N, as returned by + DGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + N (input) INTEGER + The order of the matrix Q. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of DGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by DGEHRD. + On exit, the N-by-N orthogonal matrix Q. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (input) DOUBLE PRECISION array, dimension (N-1) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEHRD. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= IHI-ILO. + For optimum performance LWORK >= (IHI-ILO)*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,nh) && ! lquery) { + *info = -8; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "DORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = max(1,nh) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORGHR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + +/* + Shift the vectors which define the elementary reflectors one + column to the right, and set the first ilo and the last n-ihi + rows and columns to those of the unit matrix +*/ + + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L20: */ + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L50: */ + } + a[j + j * a_dim1] = 1.; +/* L60: */ + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L70: */ + } + a[j + j * a_dim1] = 1.; +/* L80: */ + } + + if (nh > 0) { + +/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ + + dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* + ilo], &work[1], lwork, &iinfo); + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORGHR */ + +} /* dorghr_ */ + +/* Subroutine */ int dorgl2_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dlarf_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORGL2 generates an m by n real matrix Q with orthonormal rows, + which is defined as the first m rows of a product of k elementary + reflectors of order n + + Q = H(k) . . . H(2) H(1) + + as returned by DGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by DGELQF in the first k rows of its array argument A. + On exit, the m-by-n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGELQF. + + WORK (workspace) DOUBLE PRECISION array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORGL2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows k+1:m to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = *k + 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.; +/* L10: */ + } + if (j > *k && j <= *m) { + a[j + j * a_dim1] = 1.; + } +/* L20: */ + } + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the right */ + + if (i__ < *n) { + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.; + i__1 = *m - i__; + i__2 = *n - i__ + 1; + dlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & + tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__1 = *n - i__; + d__1 = -tau[i__]; + dscal_(&i__1, &d__1, &a[i__ + (i__ + 1) * a_dim1], lda); + } + a[i__ + i__ * a_dim1] = 1. - tau[i__]; + +/* Set A(i,1:i-1) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[i__ + l * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of DORGL2 */ + +} /* dorgl2_ */ + +/* Subroutine */ int dorglq_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dorgl2_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORGLQ generates an M-by-N real matrix Q with orthonormal rows, + which is defined as the first M rows of a product of K elementary + reflectors of order N + + Q = H(k) . . . H(2) H(1) + + as returned by DGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by DGELQF in the first k rows of its array argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGELQF. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*m) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORGLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "DORGLQ", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DORGLQ", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk rows are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(kk+1:m,1:kk) to zero. */ + + i__1 = kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = kk + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *m) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + dorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *n - i__ + 1; + dlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i+ib:m,i:n) from the right */ + + i__2 = *m - i__ - ib + 1; + i__3 = *n - i__ + 1; + dlarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, & + i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } + +/* Apply H' to columns i:n of current block */ + + i__2 = *n - i__ + 1; + dorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set columns 1:i-1 of current block to zero */ + + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + ib - 1; + for (l = i__; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DORGLQ */ + +} /* dorglq_ */ + +/* Subroutine */ int dorgqr_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dorg2r_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORGQR generates an M-by-N real matrix Q with orthonormal columns, + which is defined as the first N columns of a product of K elementary + reflectors of order M + + Q = H(1) H(2) . . . H(k) + + as returned by DGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by DGEQRF in the first k columns of its array + argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQRF. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*n) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORGQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "DORGQR", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DORGQR", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk columns are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(1:kk,kk+1:n) to zero. */ + + i__1 = *n; + for (j = kk + 1; j <= i__1; ++j) { + i__2 = kk; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *n) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + dorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *m - i__ + 1; + dlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i:m,i+ib:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__ - ib + 1; + dlarfb_("Left", "No transpose", "Forward", "Columnwise", & + i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & + work[ib + 1], &ldwork); + } + +/* Apply H to rows i:m of current block */ + + i__2 = *m - i__ + 1; + dorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set rows 1:i-1 of current block to zero */ + + i__2 = i__ + ib - 1; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ - 1; + for (l = 1; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DORGQR */ + +} /* dorgqr_ */ + +/* Subroutine */ int dorm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, mi, ni, nq; + static doublereal aii; + static logical left; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORM2L overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQLF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORM2L", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) */ + + aii = a[nq - *k + i__ + i__ * a_dim1]; + a[nq - *k + i__ + i__ * a_dim1] = 1.; + dlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[ + c_offset], ldc, &work[1]); + a[nq - *k + i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORM2L */ + +} /* dorm2l_ */ + +/* Subroutine */ int dorm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static doublereal aii; + static logical left; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORM2R overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQRF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORM2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORM2R */ + +} /* dorm2r_ */ + +/* Subroutine */ int dormbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + static logical notran; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + static logical applyq; + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': P * C C * P + TRANS = 'T': P**T * C C * P**T + + Here Q and P**T are the orthogonal matrices determined by DGEBRD when + reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and + P**T are defined as products of elementary reflectors H(i) and G(i) + respectively. + + Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + order of the orthogonal matrix Q or P**T that is applied. + + If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + if nq >= k, Q = H(1) H(2) . . . H(k); + if nq < k, Q = H(1) H(2) . . . H(nq-1). + + If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + if k < nq, P = G(1) G(2) . . . G(k); + if k >= nq, P = G(1) G(2) . . . G(nq-1). + + Arguments + ========= + + VECT (input) CHARACTER*1 + = 'Q': apply Q or Q**T; + = 'P': apply P or P**T. + + SIDE (input) CHARACTER*1 + = 'L': apply Q, Q**T, P or P**T from the Left; + = 'R': apply Q, Q**T, P or P**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q or P; + = 'T': Transpose, apply Q**T or P**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original + matrix reduced by DGEBRD. + If VECT = 'P', the number of rows in the original + matrix reduced by DGEBRD. + K >= 0. + + A (input) DOUBLE PRECISION array, dimension + (LDA,min(nq,K)) if VECT = 'Q' + (LDA,nq) if VECT = 'P' + The vectors which define the elementary reflectors H(i) and + G(i), whose products determine the matrices Q and P, as + returned by DGEBRD. + + LDA (input) INTEGER + The leading dimension of the array A. + If VECT = 'Q', LDA >= max(1,nq); + if VECT = 'P', LDA >= max(1,min(nq,K)). + + TAU (input) DOUBLE PRECISION array, dimension (min(nq,K)) + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i) which determines Q or P, as returned + by DGEBRD in the array argument TAUQ or TAUP. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q + or P*C or P**T*C or C*P or C*P**T. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + applyq = lsame_(vect, "Q"); + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! applyq && ! lsame_(vect, "P")) { + *info = -1; + } else if (! left && ! lsame_(side, "R")) { + *info = -2; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*k < 0) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = min(nq,*k); + if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + } + + if (*info == 0) { + if (applyq) { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORMBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + work[1] = 1.; + if (*m == 0 || *n == 0) { + return 0; + } + + if (applyq) { + +/* Apply Q */ + + if (nq >= *k) { + +/* Q was determined by a call to DGEBRD with nq >= k */ + + dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* Q was determined by a call to DGEBRD with nq < k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + dormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] + , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + } else { + +/* Apply P */ + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + if (nq > *k) { + +/* P was determined by a call to DGEBRD with nq > k */ + + dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* P was determined by a call to DGEBRD with nq <= k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + dormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, + &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & + iinfo); + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMBR */ + +} /* dormbr_ */ + +/* Subroutine */ int dormhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal * + tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, nh, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORMHR overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + IHI-ILO elementary reflectors, as returned by DGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of DGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and + ILO = 1 and IHI = 0, if M = 0; + if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and + ILO = 1 and IHI = 0, if N = 0. + + A (input) DOUBLE PRECISION array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by DGEHRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) DOUBLE PRECISION array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEHRD. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + left = lsame_(side, "L"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ilo < 1 || *ilo > max(1,nq)) { + *info = -5; + } else if (*ihi < min(*ilo,nq) || *ihi > nq) { + *info = -6; + } else if (*lda < max(1,nq)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + + if (*info == 0) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "DORMQR", ch__1, &nh, n, &nh, &c_n1, (ftnlen) + 6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &nh, &nh, &c_n1, (ftnlen) + 6, (ftnlen)2); + } + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("DORMHR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nh == 0) { + work[1] = 1.; + return 0; + } + + if (left) { + mi = nh; + ni = *n; + i1 = *ilo + 1; + i2 = 1; + } else { + mi = *m; + ni = nh; + i1 = 1; + i2 = *ilo + 1; + } + + dormqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, & + tau[*ilo], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMHR */ + +} /* dormhr_ */ + +/* Subroutine */ int dorml2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static doublereal aii; + static logical left; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORML2 overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGELQF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORML2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORML2 */ + +} /* dorml2_ */ + +/* Subroutine */ int dormlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static doublereal t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int dorml2_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork; + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORMLQ overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGELQF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORMLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], + lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], + ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMLQ */ + +} /* dormlq_ */ + +/* Subroutine */ int dormql_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static doublereal t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int dorm2l_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORMQL overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQLF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + + if (*info == 0) { + if (*m == 0 || *n == 0) { + lwkopt = 1; + } else { + +/* + Determine the block size. NB may be at most NBMAX, where + NBMAX is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1, + (ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = nw * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < nw && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORMQL", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + dorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i+ib-1) . . . H(i+1) H(i) +*/ + + i__4 = nq - *k + i__ + ib - 1; + dlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] + , lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H' */ + + dlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ + i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & + work[1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMQL */ + +} /* dormql_ */ + +/* Subroutine */ int dormqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static doublereal t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int dorm2r_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORMQR overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQRF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORMQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], t, &c__65) + ; + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * + c_dim1], ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMQR */ + +} /* dormqr_ */ + +/* Subroutine */ int dormtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dormql_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), + dormqr_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DORMTR overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + nq-1 elementary reflectors, as returned by DSYTRD: + + if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + + if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A contains elementary reflectors + from DSYTRD; + = 'L': Lower triangle of A contains elementary reflectors + from DSYTRD. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + A (input) DOUBLE PRECISION array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by DSYTRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) DOUBLE PRECISION array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DSYTRD. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + if (upper) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "DORMQL", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "DORMQL", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("DORMTR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nq == 1) { + work[1] = 1.; + return 0; + } + + if (left) { + mi = *m - 1; + ni = *n; + } else { + mi = *m; + ni = *n - 1; + } + + if (upper) { + +/* Q was determined by a call to DSYTRD with UPLO = 'U' */ + + i__2 = nq - 1; + dormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & + tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); + } else { + +/* Q was determined by a call to DSYTRD with UPLO = 'L' */ + + if (left) { + i1 = 2; + i2 = 1; + } else { + i1 = 1; + i2 = 2; + } + i__2 = nq - 1; + dormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & + c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMTR */ + +} /* dormtr_ */ + +/* Subroutine */ int dpotf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + static integer j; + static doublereal ajj; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + static logical upper; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DPOTF2 computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U' * U , if UPLO = 'U', or + A = L * L', if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the unblocked version of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored. + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n by n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U'*U or A = L*L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, the leading minor of order k is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DPOTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j * a_dim1 + 1], &c__1, + &a[j * a_dim1 + 1], &c__1); + if (ajj <= 0. || disnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + i__3 = *n - j; + dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(j + 1) * + a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b15, & + a[j + (j + 1) * a_dim1], lda); + i__2 = *n - j; + d__1 = 1. / ajj; + dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j + a_dim1], lda, &a[j + + a_dim1], lda); + if (ajj <= 0. || disnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = *n - j; + i__3 = j - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[j + 1 + + a_dim1], lda, &a[j + a_dim1], lda, &c_b15, &a[j + 1 + + j * a_dim1], &c__1); + i__2 = *n - j; + d__1 = 1. / ajj; + dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of DPOTF2 */ + +} /* dpotf2_ */ + +/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer j, jb, nb; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), dpotf2_(char *, integer *, + doublereal *, integer *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DPOTRF computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U**T * U, if UPLO = 'U', or + A = L * L**T, if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the block version of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + dpotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b151, &a[j * + a_dim1 + 1], lda, &c_b15, &a[j + j * a_dim1], lda); + dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, & + c_b151, &a[j * a_dim1 + 1], lda, &a[(j + jb) * + a_dim1 + 1], lda, &c_b15, &a[j + (j + jb) * + a_dim1], lda); + i__3 = *n - j - jb + 1; + dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & + i__3, &c_b15, &a[j + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b151, &a[j + + a_dim1], lda, &c_b15, &a[j + j * a_dim1], lda); + dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, & + c_b151, &a[j + jb + a_dim1], lda, &a[j + a_dim1], + lda, &c_b15, &a[j + jb + j * a_dim1], lda); + i__3 = *n - j - jb + 1; + dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & + jb, &c_b15, &a[j + j * a_dim1], lda, &a[j + jb + + j * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of DPOTRF */ + +} /* dpotrf_ */ + +/* Subroutine */ int dpotri_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *), dlauum_( + char *, integer *, doublereal *, integer *, integer *), + dtrtri_(char *, char *, integer *, doublereal *, integer *, + integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DPOTRI computes the inverse of a real symmetric positive definite + matrix A using the Cholesky factorization A = U**T*U or A = L*L**T + computed by DPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the triangular factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T, as computed by + DPOTRF. + On exit, the upper or lower triangle of the (symmetric) + inverse of A, overwriting the input factor U or L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the (i,i) element of the factor U or L is + zero, and the inverse could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DPOTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + dtrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + +/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ + + dlauum_(uplo, n, &a[a_offset], lda, info); + + return 0; + +/* End of DPOTRI */ + +} /* dpotri_ */ + +/* Subroutine */ int dpotrs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DPOTRS solves a system of linear equations A*X = B with a symmetric + positive definite matrix A using the Cholesky factorization + A = U**T*U or A = L*L**T computed by DPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The triangular factor U or L from the Cholesky factorization + A = U**T*U or A = L*L**T, as computed by DPOTRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DPOTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* + Solve A*X = B where A = U'*U. + + Solve U'*X = B, overwriting B with X. +*/ + + dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A*X = B where A = L*L'. + + Solve L*X = B, overwriting B with X. +*/ + + dtrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + dtrsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + } + + return 0; + +/* End of DPOTRS */ + +} /* dpotrs_ */ + +/* Subroutine */ int dstedc_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__, j, k, m; + static doublereal p; + static integer ii, lgn; + static doublereal eps, tiny; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer lwmin; + extern /* Subroutine */ int dlaed0_(integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *); + static integer start; + + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlacpy_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *); + static integer finish; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), dlasrt_(char *, integer *, doublereal *, integer *); + static integer liwmin, icompz; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + static doublereal orgnrm; + static logical lquery; + static integer smlsiz, storez, strtrw; + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DSTEDC computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + The eigenvectors of a full or band real symmetric matrix can also be + found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this + matrix to tridiagonal form. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. See DLAED3 for details. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'I': Compute eigenvectors of tridiagonal matrix also. + = 'V': Compute eigenvectors of original dense symmetric + matrix also. On entry, Z contains the orthogonal + matrix used to reduce the original matrix to + tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the subdiagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) + On entry, if COMPZ = 'V', then Z contains the orthogonal + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original symmetric matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1. + If eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace/output) DOUBLE PRECISION array, + dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. + If COMPZ = 'V' and N > 1 then LWORK must be at least + ( 1 + 3*N + 2*N*lg N + 3*N**2 ), + where lg( N ) = smallest integer k such + that 2**k >= N. + If COMPZ = 'I' and N > 1 then LWORK must be at least + ( 1 + 4*N + N**2 ). + Note that for COMPZ = 'I' or 'V', then if N is less than or + equal to the minimum divide size, usually 25, then LWORK need + only be max(1,2*(N-1)). + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. + If COMPZ = 'V' and N > 1 then LIWORK must be at least + ( 6 + 6*N + 5*N*lg N ). + If COMPZ = 'I' and N > 1 then LIWORK must be at least + ( 3 + 5*N ). + Note that for COMPZ = 'I' or 'V', then if N is less than or + equal to the minimum divide size, usually 25, then LIWORK + need only be 1. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the IWORK array, + returns this value as the first entry of the IWORK array, and + no error message related to LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + + smlsiz = ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + if (*n <= 1 || icompz == 0) { + liwmin = 1; + lwmin = 1; + } else if (*n <= smlsiz) { + liwmin = 1; + lwmin = *n - 1 << 1; + } else { + lgn = (integer) (log((doublereal) (*n)) / log(2.)); + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (icompz == 1) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; + liwmin = *n * 6 + 6 + *n * 5 * lgn; + } else if (icompz == 2) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = (*n << 2) + 1 + i__1 * i__1; + liwmin = *n * 5 + 3; + } + } + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DSTEDC", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*n == 1) { + if (icompz != 0) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* + If the following conditional clause is removed, then the routine + will use the Divide and Conquer routine to compute only the + eigenvalues, which requires (3N + 3N**2) real workspace and + (2 + 5N + 2N lg(N)) integer workspace. + Since on many architectures DSTERF is much faster than any other + algorithm for finding eigenvalues only, it is used here + as the default. If the conditional clause is removed, then + information on the size of workspace needs to be changed. + + If COMPZ = 'N', use DSTERF to compute the eigenvalues. +*/ + + if (icompz == 0) { + dsterf_(n, &d__[1], &e[1], info); + goto L50; + } + +/* + If N is smaller than the minimum divide size (SMLSIZ+1), then + solve the problem with another solver. +*/ + + if (*n <= smlsiz) { + + dsteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info); + + } else { + +/* + If COMPZ = 'V', the Z matrix must be stored elsewhere for later + use. +*/ + + if (icompz == 1) { + storez = *n * *n + 1; + } else { + storez = 1; + } + + if (icompz == 2) { + dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz); + } + +/* Scale. */ + + orgnrm = dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + goto L50; + } + + eps = EPSILON; + + start = 1; + +/* while ( START <= N ) */ + +L10: + if (start <= *n) { + +/* + Let FINISH be the position of the next subdiagonal entry + such that E( FINISH ) <= TINY or FINISH = N if no such + subdiagonal exists. The matrix identified by the elements + between START and FINISH constitutes an independent + sub-problem. +*/ + + finish = start; +L20: + if (finish < *n) { + tiny = eps * sqrt((d__1 = d__[finish], abs(d__1))) * sqrt(( + d__2 = d__[finish + 1], abs(d__2))); + if ((d__1 = e[finish], abs(d__1)) > tiny) { + ++finish; + goto L20; + } + } + +/* (Sub) Problem determined. Compute its size and solve it. */ + + m = finish - start + 1; + if (m == 1) { + start = finish + 1; + goto L10; + } + if (m > smlsiz) { + +/* Scale. */ + + orgnrm = dlanst_("M", &m, &d__[start], &e[start]); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &m, &c__1, &d__[ + start], &m, info); + i__1 = m - 1; + i__2 = m - 1; + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &i__1, &c__1, &e[ + start], &i__2, info); + + if (icompz == 1) { + strtrw = 1; + } else { + strtrw = start; + } + dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw + + start * z_dim1], ldz, &work[1], n, &work[storez], & + iwork[1], info); + if (*info != 0) { + *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % + (m + 1) + start - 1; + goto L50; + } + +/* Scale back. */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &m, &c__1, &d__[ + start], &m, info); + + } else { + if (icompz == 1) { + +/* + Since QR won't update a Z matrix which is larger than + the length of D, we must solve the sub-problem in a + workspace and then multiply back into Z. +*/ + + dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, & + work[m * m + 1], info); + dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[ + storez], n); + dgemm_("N", "N", n, &m, &m, &c_b15, &work[storez], n, & + work[1], &m, &c_b29, &z__[start * z_dim1 + 1], + ldz); + } else if (icompz == 2) { + dsteqr_("I", &m, &d__[start], &e[start], &z__[start + + start * z_dim1], ldz, &work[1], info); + } else { + dsterf_(&m, &d__[start], &e[start], info); + } + if (*info != 0) { + *info = start * (*n + 1) + finish; + goto L50; + } + } + + start = finish + 1; + goto L10; + } + +/* + endwhile + + If the problem split any number of times, then the eigenvalues + will not be properly ordered. Here we permute the eigenvalues + (and the associated eigenvectors) into ascending order. +*/ + + if (m != *n) { + if (icompz == 0) { + +/* Use Quick Sort */ + + dlasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L30: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * + z_dim1 + 1], &c__1); + } +/* L40: */ + } + } + } + } + +L50: + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + return 0; + +/* End of DSTEDC */ + +} /* dstedc_ */ + +/* Subroutine */ int dsteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal b, c__, f, g; + static integer i__, j, k, l, m; + static doublereal p, r__, s; + static integer l1, ii, mm, lm1, mm1, nm1; + static doublereal rt1, rt2, eps; + static integer lsv; + static doublereal tst, eps2; + static integer lend, jtot; + extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *); + static doublereal anorm; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *), dlaev2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + static integer lendm1, lendp1; + + static integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *); + static doublereal safmin; + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + static doublereal safmax; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *); + static integer lendsv; + static doublereal ssfmin; + static integer nmaxit, icompz; + static doublereal ssfmax; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DSTEQR computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the implicit QL or QR method. + The eigenvectors of a full or band symmetric matrix can also be found + if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to + tridiagonal form. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors of the original + symmetric matrix. On entry, Z must contain the + orthogonal matrix used to reduce the original matrix + to tridiagonal form. + = 'I': Compute eigenvalues and eigenvectors of the + tridiagonal matrix. Z is initialized to the identity + matrix. + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) + On entry, if COMPZ = 'V', then Z contains the orthogonal + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original symmetric matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1, and if + eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) + If COMPZ = 'N', then WORK is not referenced. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm has failed to find all the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero; on exit, D + and E contain the elements of a symmetric tridiagonal + matrix which is orthogonally similar to the original + matrix. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DSTEQR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz == 2) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Determine the unit roundoff and over/underflow thresholds. */ + + eps = EPSILON; +/* Computing 2nd power */ + d__1 = eps; + eps2 = d__1 * d__1; + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + ssfmax = sqrt(safmax) / 3.; + ssfmin = sqrt(safmin) / eps2; + +/* + Compute the eigenvalues and eigenvectors of the tridiagonal + matrix. +*/ + + if (icompz == 2) { + dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz); + } + + nmaxit = *n * 30; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + nm1 = *n - 1; + +L10: + if (l1 > *n) { + goto L160; + } + if (l1 > 1) { + e[l1 - 1] = 0.; + } + if (l1 <= nm1) { + i__1 = nm1; + for (m = l1; m <= i__1; ++m) { + tst = (d__1 = e[m], abs(d__1)); + if (tst == 0.) { + goto L30; + } + if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m + + 1], abs(d__2))) * eps) { + e[m] = 0.; + goto L30; + } +/* L20: */ + } + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = dlanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm == 0.) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + +/* Choose between QL and QR iteration */ + + if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { + lend = lsv; + l = lendsv; + } + + if (lend > l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L40: + if (l != lend) { + lendm1 = lend - 1; + i__1 = lendm1; + for (m = l; m <= i__1; ++m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + + 1], abs(d__2)) + safmin) { + goto L60; + } +/* L50: */ + } + } + + m = lend; + +L60: + if (m < lend) { + e[m] = 0.; + } + p = d__[l]; + if (m == l) { + goto L80; + } + +/* + If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l + 1) { + if (icompz > 0) { + dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); + work[l] = c__; + work[*n - 1 + l] = s; + dlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & + z__[l * z_dim1 + 1], ldz); + } else { + dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); + } + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.; + l += 2; + if (l <= lend) { + goto L40; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l + 1] - p) / (e[l] * 2.); + r__ = dlapy2_(&g, &c_b15); + g = d__[m] - p + e[l] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + mm1 = m - 1; + i__1 = l; + for (i__ = mm1; i__ >= i__1; --i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m - 1) { + e[i__ + 1] = r__; + } + g = d__[i__ + 1] - p; + r__ = (d__[i__] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__ + 1] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = -s; + } + +/* L70: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = m - l + 1; + dlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[l] = g; + goto L40; + +/* Eigenvalue found. */ + +L80: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L40; + } + goto L140; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L90: + if (l != lend) { + lendp1 = lend + 1; + i__1 = lendp1; + for (m = l; m >= i__1; --m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m - 1], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + - 1], abs(d__2)) + safmin) { + goto L110; + } +/* L100: */ + } + } + + m = lend; + +L110: + if (m > lend) { + e[m - 1] = 0.; + } + p = d__[l]; + if (m == l) { + goto L130; + } + +/* + If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l - 1) { + if (icompz > 0) { + dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) + ; + work[m] = c__; + work[*n - 1 + m] = s; + dlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & + z__[(l - 1) * z_dim1 + 1], ldz); + } else { + dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); + } + d__[l - 1] = rt1; + d__[l] = rt2; + e[l - 1] = 0.; + l += -2; + if (l >= lend) { + goto L90; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l - 1] - p) / (e[l - 1] * 2.); + r__ = dlapy2_(&g, &c_b15); + g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + lm1 = l - 1; + i__1 = lm1; + for (i__ = m; i__ <= i__1; ++i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m) { + e[i__ - 1] = r__; + } + g = d__[i__] - p; + r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = s; + } + +/* L120: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = l - m + 1; + dlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[lm1] = g; + goto L90; + +/* Eigenvalue found. */ + +L130: + d__[l] = p; + + --l; + if (l >= lend) { + goto L90; + } + goto L140; + + } + +/* Undo scaling if necessary */ + +L140: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } else if (iscale == 2) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L150: */ + } + goto L190; + +/* Order eigenvalues and eigenvectors. */ + +L160: + if (icompz == 0) { + +/* Use Quick Sort */ + + dlasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L170: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L180: */ + } + } + +L190: + return 0; + +/* End of DSTEQR */ + +} /* dsteqr_ */ + +/* Subroutine */ int dsterf_(integer *n, doublereal *d__, doublereal *e, + integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3; + + /* Local variables */ + static doublereal c__; + static integer i__, l, m; + static doublereal p, r__, s; + static integer l1; + static doublereal bb, rt1, rt2, eps, rte; + static integer lsv; + static doublereal eps2, oldc; + static integer lend, jtot; + extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + static doublereal gamma, alpha, sigma, anorm; + + static integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + static doublereal oldgam, safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal safmax; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *); + static integer lendsv; + static doublereal ssfmin; + static integer nmaxit; + static doublereal ssfmax; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DSTERF computes all eigenvalues of a symmetric tridiagonal matrix + using the Pal-Walker-Kahan variant of the QL or QR algorithm. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the n diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm failed to find all of the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n < 0) { + *info = -1; + i__1 = -(*info); + xerbla_("DSTERF", &i__1); + return 0; + } + if (*n <= 1) { + return 0; + } + +/* Determine the unit roundoff for this environment. */ + + eps = EPSILON; +/* Computing 2nd power */ + d__1 = eps; + eps2 = d__1 * d__1; + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + ssfmax = sqrt(safmax) / 3.; + ssfmin = sqrt(safmin) / eps2; + +/* Compute the eigenvalues of the tridiagonal matrix. */ + + nmaxit = *n * 30; + sigma = 0.; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + +L10: + if (l1 > *n) { + goto L170; + } + if (l1 > 1) { + e[l1 - 1] = 0.; + } + i__1 = *n - 1; + for (m = l1; m <= i__1; ++m) { + if ((d__3 = e[m], abs(d__3)) <= sqrt((d__1 = d__[m], abs(d__1))) * + sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) { + e[m] = 0.; + goto L30; + } +/* L20: */ + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = dlanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + + i__1 = lend - 1; + for (i__ = l; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + d__1 = e[i__]; + e[i__] = d__1 * d__1; +/* L40: */ + } + +/* Choose between QL and QR iteration */ + + if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { + lend = lsv; + l = lendsv; + } + + if (lend >= l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L50: + if (l != lend) { + i__1 = lend - 1; + for (m = l; m <= i__1; ++m) { + if ((d__2 = e[m], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m + + 1], abs(d__1))) { + goto L70; + } +/* L60: */ + } + } + m = lend; + +L70: + if (m < lend) { + e[m] = 0.; + } + p = d__[l]; + if (m == l) { + goto L90; + } + +/* + If remaining matrix is 2 by 2, use DLAE2 to compute its + eigenvalues. +*/ + + if (m == l + 1) { + rte = sqrt(e[l]); + dlae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2); + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.; + l += 2; + if (l <= lend) { + goto L50; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l]); + sigma = (d__[l + 1] - p) / (rte * 2.); + r__ = dlapy2_(&sigma, &c_b15); + sigma = p - rte / (sigma + d_sign(&r__, &sigma)); + + c__ = 1.; + s = 0.; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l; + for (i__ = m - 1; i__ >= i__1; --i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m - 1) { + e[i__ + 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__ + 1] = oldgam + (alpha - gamma); + if (c__ != 0.) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L80: */ + } + + e[l] = s * p; + d__[l] = sigma + gamma; + goto L50; + +/* Eigenvalue found. */ + +L90: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L50; + } + goto L150; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L100: + i__1 = lend + 1; + for (m = l; m >= i__1; --m) { + if ((d__2 = e[m - 1], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m + - 1], abs(d__1))) { + goto L120; + } +/* L110: */ + } + m = lend; + +L120: + if (m > lend) { + e[m - 1] = 0.; + } + p = d__[l]; + if (m == l) { + goto L140; + } + +/* + If remaining matrix is 2 by 2, use DLAE2 to compute its + eigenvalues. +*/ + + if (m == l - 1) { + rte = sqrt(e[l - 1]); + dlae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2); + d__[l] = rt1; + d__[l - 1] = rt2; + e[l - 1] = 0.; + l += -2; + if (l >= lend) { + goto L100; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l - 1]); + sigma = (d__[l - 1] - p) / (rte * 2.); + r__ = dlapy2_(&sigma, &c_b15); + sigma = p - rte / (sigma + d_sign(&r__, &sigma)); + + c__ = 1.; + s = 0.; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l - 1; + for (i__ = m; i__ <= i__1; ++i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m) { + e[i__ - 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__ + 1]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__] = oldgam + (alpha - gamma); + if (c__ != 0.) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L130: */ + } + + e[l - 1] = s * p; + d__[l] = sigma + gamma; + goto L100; + +/* Eigenvalue found. */ + +L140: + d__[l] = p; + + --l; + if (l >= lend) { + goto L100; + } + goto L150; + + } + +/* Undo scaling if necessary */ + +L150: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + } + if (iscale == 2) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L160: */ + } + goto L180; + +/* Sort eigenvalues in increasing order. */ + +L170: + dlasrt_("I", n, &d__[1], info); + +L180: + return 0; + +/* End of DSTERF */ + +} /* dsterf_ */ + +/* Subroutine */ int dsyevd_(char *jobz, char *uplo, integer *n, doublereal * + a, integer *lda, doublereal *w, doublereal *work, integer *lwork, + integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + static doublereal eps; + static integer inde; + static doublereal anrm, rmin, rmax; + static integer lopt; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + static doublereal sigma; + extern logical lsame_(char *, char *); + static integer iinfo, lwmin, liopt; + static logical lower, wantz; + static integer indwk2, llwrk2; + + static integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dstedc_(char *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *), dlacpy_( + char *, integer *, integer *, doublereal *, integer *, doublereal + *, integer *); + static doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal bignum; + static integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *); + extern doublereal dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *); + static integer indwrk, liwmin; + extern /* Subroutine */ int dormtr_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), dsytrd_(char *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *); + static integer llwork; + static doublereal smlnum; + static logical lquery; + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DSYEVD computes all eigenvalues and, optionally, eigenvectors of a + real symmetric matrix A. If eigenvectors are desired, it uses a + divide and conquer algorithm. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Because of large use of BLAS of level 3, DSYEVD needs N**2 more + workspace than DSYEVX. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA, N) + On entry, the symmetric matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of A contains the + upper triangular part of the matrix A. If UPLO = 'L', + the leading N-by-N lower triangular part of A contains + the lower triangular part of the matrix A. + On exit, if JOBZ = 'V', then if INFO = 0, A contains the + orthonormal eigenvectors of the matrix A. + If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') + or the upper triangle (if UPLO='U') of A, including the + diagonal, is destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) DOUBLE PRECISION array, dimension (N) + If INFO = 0, the eigenvalues in ascending order. + + WORK (workspace/output) DOUBLE PRECISION array, + dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If N <= 1, LWORK must be at least 1. + If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. + If JOBZ = 'V' and N > 1, LWORK must be at least + 1 + 6*N + 2*N**2. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal sizes of the WORK and IWORK + arrays, returns these values as the first entries of the WORK + and IWORK arrays, and no error message related to LWORK or + LIWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If N <= 1, LIWORK must be at least 1. + If JOBZ = 'N' and N > 1, LIWORK must be at least 1. + If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal sizes of the WORK and + IWORK arrays, returns these values as the first entries of + the WORK and IWORK arrays, and no error message related to + LWORK or LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i and JOBZ = 'N', then the algorithm failed + to converge; i off-diagonal elements of an intermediate + tridiagonal form did not converge to zero; + if INFO = i and JOBZ = 'V', then the algorithm failed + to compute an eigenvalue while working on the submatrix + lying in rows and columns INFO/(N+1) through + mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + Modified description of INFO. Sven, 16 Feb 05. + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + + if (*info == 0) { + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + lopt = lwmin; + liopt = liwmin; + } else { + if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); + } else { + liwmin = 1; + lwmin = (*n << 1) + 1; + } +/* Computing MAX */ + i__1 = lwmin, i__2 = (*n << 1) + ilaenv_(&c__1, "DSYTRD", uplo, n, + &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lopt = max(i__1,i__2); + liopt = liwmin; + } + work[1] = (doublereal) lopt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DSYEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + w[1] = a[a_dim1 + 1]; + if (wantz) { + a[a_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = SAFEMINIMUM; + eps = PRECISION; + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + dlascl_(uplo, &c__0, &c__0, &c_b15, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ + + inde = 1; + indtau = inde + *n; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + indwk2 = indwrk + *n * *n; + llwrk2 = *lwork - indwk2 + 1; + + dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + lopt = (integer) ((*n << 1) + work[indwrk]); + +/* + For eigenvalues only, call DSTERF. For eigenvectors, first call + DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the + tridiagonal matrix, then call DORMTR to multiply it by the + Householder transformations stored in A. +*/ + + if (! wantz) { + dsterf_(n, &w[1], &work[inde], info); + } else { + dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & + llwrk2, &iwork[1], liwork, info); + dormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + dlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); +/* + Computing MAX + Computing 2nd power +*/ + i__3 = *n; + i__1 = lopt, i__2 = *n * 6 + 1 + (i__3 * i__3 << 1); + lopt = max(i__1,i__2); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + d__1 = 1. / sigma; + dscal_(n, &d__1, &w[1], &c__1); + } + + work[1] = (doublereal) lopt; + iwork[1] = liopt; + + return 0; + +/* End of DSYEVD */ + +} /* dsyevd_ */ + +/* Subroutine */ int dsytd2_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + static doublereal taui; + extern /* Subroutine */ int dsyr2_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + static doublereal alpha; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer * + ); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal + form T by an orthogonal similarity transformation: Q' * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the orthogonal + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the orthogonal matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) DOUBLE PRECISION array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) DOUBLE PRECISION array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DSYTD2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(1:i-1,i+1) +*/ + + dlarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1 + + 1], &c__1, &taui); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + + if (taui != 0.) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + dsymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b29, &tau[1], &c__1) + ; + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + alpha = taui * -.5 * ddot_(&i__, &tau[1], &c__1, &a[(i__ + 1) + * a_dim1 + 1], &c__1); + daxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + dsyr2_(uplo, &i__, &c_b151, &a[(i__ + 1) * a_dim1 + 1], &c__1, + &tau[1], &c__1, &a[a_offset], lda); + + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } + d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1]; + tau[i__] = taui; +/* L10: */ + } + d__[1] = a[a_dim1 + 1]; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(i+2:n,i) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &taui); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + + if (taui != 0.) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + dsymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &tau[ + i__], &c__1); + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + i__2 = *n - i__; + alpha = taui * -.5 * ddot_(&i__2, &tau[i__], &c__1, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + i__2 = *n - i__; + dsyr2_(uplo, &i__2, &c_b151, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda); + + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } + d__[i__] = a[i__ + i__ * a_dim1]; + tau[i__] = taui; +/* L20: */ + } + d__[*n] = a[*n + *n * a_dim1]; + } + + return 0; + +/* End of DSYTD2 */ + +} /* dsytd2_ */ + +/* Subroutine */ int dsytrd_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, kk, nx, iws; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + static logical upper; + extern /* Subroutine */ int dsytd2_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dsyr2k_(char *, char *, integer *, integer *, doublereal + *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), dlatrd_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DSYTRD reduces a real symmetric matrix A to real symmetric + tridiagonal form T by an orthogonal similarity transformation: + Q**T * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the orthogonal + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the orthogonal matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) DOUBLE PRECISION array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) DOUBLE PRECISION array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DSYTRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code by setting NX = N. + + Computing MAX +*/ + i__1 = *lwork / ldwork; + nb = max(i__1,1); + nbmin = ilaenv_(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* + Reduce the upper triangle of A. + Columns 1:kk are handled by the unblocked method. +*/ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = i__ + nb - 1; + dlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork); + +/* + Update the unreduced submatrix A(1:i-1,1:i-1), using an + update of the form: A := A - V*W' - W*V' +*/ + + i__3 = i__ - 1; + dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ * + a_dim1 + 1], lda, &work[1], &ldwork, &c_b15, &a[a_offset], + lda); + +/* + Copy superdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j - 1 + j * a_dim1] = e[j - 1]; + d__[j] = a[j + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + dsytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = *n - i__ + 1; + dlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork); + +/* + Update the unreduced submatrix A(i+ib:n,i+ib:n), using + an update of the form: A := A - V*W' - W*V' +*/ + + i__3 = *n - i__ - nb + 1; + dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b15, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda); + +/* + Copy subdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + 1 + j * a_dim1] = e[j]; + d__[j] = a[j + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + dsytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo); + } + + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DSYTRD */ + +} /* dsytrd_ */ + +/* Subroutine */ int dtrevc_(char *side, char *howmny, logical *select, + integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * + ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, + doublereal *work, integer *info) +{ + /* System generated locals */ + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + static integer i__, j, k; + static doublereal x[4] /* was [2][2] */; + static integer j1, j2, n2, ii, ki, ip, is; + static doublereal wi, wr, rec, ulp, beta, emax; + static logical pair; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + static logical allv; + static integer ierr; + static doublereal unfl, ovfl, smin; + static logical over; + static doublereal vmax; + static integer jnxt; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + static doublereal scale; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + static doublereal remax; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static logical leftv, bothv; + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + static doublereal vcrit; + static logical somev; + static doublereal xnorm; + extern /* Subroutine */ int dlaln2_(logical *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal * + , doublereal *, integer *, doublereal *, doublereal *, integer *), + dlabad_(doublereal *, doublereal *); + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal bignum; + static logical rightv; + static doublereal smlnum; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DTREVC computes some or all of the right and/or left eigenvectors of + a real upper quasi-triangular matrix T. + Matrices of this type are produced by the Schur factorization of + a real general matrix: A = Q*T*Q**T, as computed by DHSEQR. + + The right eigenvector x and the left eigenvector y of T corresponding + to an eigenvalue w are defined by: + + T*x = w*x, (y**H)*T = w*(y**H) + + where y**H denotes the conjugate transpose of y. + The eigenvalues are not input to this routine, but are read directly + from the diagonal blocks of T. + + This routine returns the matrices X and/or Y of right and left + eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + input matrix. If Q is the orthogonal factor that reduces a matrix + A to Schur form T, then Q*X and Q*Y are the matrices of right and + left eigenvectors of A. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'R': compute right eigenvectors only; + = 'L': compute left eigenvectors only; + = 'B': compute both right and left eigenvectors. + + HOWMNY (input) CHARACTER*1 + = 'A': compute all right and/or left eigenvectors; + = 'B': compute all right and/or left eigenvectors, + backtransformed by the matrices in VR and/or VL; + = 'S': compute selected right and/or left eigenvectors, + as indicated by the logical array SELECT. + + SELECT (input/output) LOGICAL array, dimension (N) + If HOWMNY = 'S', SELECT specifies the eigenvectors to be + computed. + If w(j) is a real eigenvalue, the corresponding real + eigenvector is computed if SELECT(j) is .TRUE.. + If w(j) and w(j+1) are the real and imaginary parts of a + complex eigenvalue, the corresponding complex eigenvector is + computed if either SELECT(j) or SELECT(j+1) is .TRUE., and + on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to + .FALSE.. + Not referenced if HOWMNY = 'A' or 'B'. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input) DOUBLE PRECISION array, dimension (LDT,N) + The upper quasi-triangular matrix T in Schur canonical form. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) + On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must + contain an N-by-N matrix Q (usually the orthogonal matrix Q + of Schur vectors returned by DHSEQR). + On exit, if SIDE = 'L' or 'B', VL contains: + if HOWMNY = 'A', the matrix Y of left eigenvectors of T; + if HOWMNY = 'B', the matrix Q*Y; + if HOWMNY = 'S', the left eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VL, in the same order as their + eigenvalues. + A complex eigenvector corresponding to a complex eigenvalue + is stored in two consecutive columns, the first holding the + real part, and the second the imaginary part. + Not referenced if SIDE = 'R'. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1, and if + SIDE = 'L' or 'B', LDVL >= N. + + VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) + On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must + contain an N-by-N matrix Q (usually the orthogonal matrix Q + of Schur vectors returned by DHSEQR). + On exit, if SIDE = 'R' or 'B', VR contains: + if HOWMNY = 'A', the matrix X of right eigenvectors of T; + if HOWMNY = 'B', the matrix Q*X; + if HOWMNY = 'S', the right eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VR, in the same order as their + eigenvalues. + A complex eigenvector corresponding to a complex eigenvalue + is stored in two consecutive columns, the first holding the + real part and the second the imaginary part. + Not referenced if SIDE = 'L'. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1, and if + SIDE = 'R' or 'B', LDVR >= N. + + MM (input) INTEGER + The number of columns in the arrays VL and/or VR. MM >= M. + + M (output) INTEGER + The number of columns in the arrays VL and/or VR actually + used to store the eigenvectors. + If HOWMNY = 'A' or 'B', M is set to N. + Each selected real eigenvector occupies one column and each + selected complex eigenvector occupies two columns. + + WORK (workspace) DOUBLE PRECISION array, dimension (3*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The algorithm used in this program is basically backward (forward) + substitution, with scaling to make the the code robust against + possible overflow. + + Each eigenvector is normalized so that the element of largest + magnitude has magnitude 1; here the magnitude of a complex number + (x,y) is taken to be |x| + |y|. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + bothv = lsame_(side, "B"); + rightv = lsame_(side, "R") || bothv; + leftv = lsame_(side, "L") || bothv; + + allv = lsame_(howmny, "A"); + over = lsame_(howmny, "B"); + somev = lsame_(howmny, "S"); + + *info = 0; + if (! rightv && ! leftv) { + *info = -1; + } else if (! allv && ! over && ! somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1,*n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else { + +/* + Set M to the number of columns required to store the selected + eigenvectors, standardize the array SELECT if necessary, and + test MM. +*/ + + if (somev) { + *m = 0; + pair = FALSE_; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (pair) { + pair = FALSE_; + select[j] = FALSE_; + } else { + if (j < *n) { + if (t[j + 1 + j * t_dim1] == 0.) { + if (select[j]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[j] || select[j + 1]) { + select[j] = TRUE_; + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } +/* L10: */ + } + } else { + *m = *n; + } + + if (*mm < *m) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DTREVC", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* Set the constants to control overflow. */ + + unfl = SAFEMINIMUM; + ovfl = 1. / unfl; + dlabad_(&unfl, &ovfl); + ulp = PRECISION; + smlnum = unfl * (*n / ulp); + bignum = (1. - ulp) / smlnum; + +/* + Compute 1-norm of each column of strictly upper triangular + part of T to control overflow in triangular solver. +*/ + + work[1] = 0.; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + work[j] = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1)); +/* L20: */ + } +/* L30: */ + } + +/* + Index IP is used to specify the real or complex eigenvalue: + IP = 0, real eigenvalue, + 1, first of conjugate complex pair: (wr,wi) + -1, second of conjugate complex pair: (wr,wi) +*/ + + n2 = *n << 1; + + if (rightv) { + +/* Compute right eigenvectors. */ + + ip = 0; + is = *m; + for (ki = *n; ki >= 1; --ki) { + + if (ip == 1) { + goto L130; + } + if (ki == 1) { + goto L40; + } + if (t[ki + (ki - 1) * t_dim1] == 0.) { + goto L40; + } + ip = -1; + +L40: + if (somev) { + if (ip == 0) { + if (! select[ki]) { + goto L130; + } + } else { + if (! select[ki - 1]) { + goto L130; + } + } + } + +/* Compute the KI-th eigenvalue (WR,WI). */ + + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2))); + } +/* Computing MAX */ + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1,smlnum); + + if (ip == 0) { + +/* Real right eigenvector */ + + work[ki + *n] = 1.; + +/* Form right-hand side */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + work[k + *n] = -t[k + ki * t_dim1]; +/* L50: */ + } + +/* + Solve the upper quasi-triangular system: + (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. +*/ + + jnxt = ki - 1; + for (j = ki - 1; j >= 1; --j) { + if (j > jnxt) { + goto L60; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + + dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* + Scale X(1,1) to avoid overflow when updating + the right-hand side. +*/ + + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + dscal_(&ki, &scale, &work[*n + 1], &c__1); + } + work[j + *n] = x[0]; + +/* Update right-hand side */ + + i__1 = j - 1; + d__1 = -x[0]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + + } else { + +/* 2-by-2 diagonal block */ + + dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b15, &t[j - + 1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, & + work[j - 1 + *n], n, &wr, &c_b29, x, &c__2, & + scale, &xnorm, &ierr); + +/* + Scale X(1,1) and X(2,1) to avoid overflow when + updating the right-hand side. +*/ + + if (xnorm > 1.) { +/* Computing MAX */ + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1,d__2); + if (beta > bignum / xnorm) { + x[0] /= xnorm; + x[1] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + dscal_(&ki, &scale, &work[*n + 1], &c__1); + } + work[j - 1 + *n] = x[0]; + work[j + *n] = x[1]; + +/* Update right-hand side */ + + i__1 = j - 2; + d__1 = -x[0]; + daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[*n + 1], &c__1); + i__1 = j - 2; + d__1 = -x[1]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + } +L60: + ; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + dcopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], & + c__1); + + ii = idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1)); + dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + vr[k + is * vr_dim1] = 0.; +/* L70: */ + } + } else { + if (ki > 1) { + i__1 = ki - 1; + dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[*n + 1], &c__1, &work[ki + *n], &vr[ki * + vr_dim1 + 1], &c__1); + } + + ii = idamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1)); + dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + + } else { + +/* + Complex right eigenvector. + + Initial solve + [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. + [ (T(KI,KI-1) T(KI,KI) ) ] +*/ + + if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= (d__2 = t[ + ki + (ki - 1) * t_dim1], abs(d__2))) { + work[ki - 1 + *n] = 1.; + work[ki + n2] = wi / t[ki - 1 + ki * t_dim1]; + } else { + work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1]; + work[ki + n2] = 1.; + } + work[ki + *n] = 0.; + work[ki - 1 + n2] = 0.; + +/* Form right-hand side */ + + i__1 = ki - 2; + for (k = 1; k <= i__1; ++k) { + work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) * + t_dim1]; + work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1]; +/* L80: */ + } + +/* + Solve upper quasi-triangular system: + (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) +*/ + + jnxt = ki - 2; + for (j = ki - 2; j >= 1; --j) { + if (j > jnxt) { + goto L90; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + + dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &wi, x, &c__2, &scale, &xnorm, & + ierr); + +/* + Scale X(1,1) and X(1,2) to avoid overflow when + updating the right-hand side. +*/ + + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + x[2] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + dscal_(&ki, &scale, &work[*n + 1], &c__1); + dscal_(&ki, &scale, &work[n2 + 1], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; + +/* Update the right-hand side */ + + i__1 = j - 1; + d__1 = -x[0]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + i__1 = j - 1; + d__1 = -x[2]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + n2 + 1], &c__1); + + } else { + +/* 2-by-2 diagonal block */ + + dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b15, &t[j - + 1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, & + work[j - 1 + *n], n, &wr, &wi, x, &c__2, & + scale, &xnorm, &ierr); + +/* + Scale X to avoid overflow when updating + the right-hand side. +*/ + + if (xnorm > 1.) { +/* Computing MAX */ + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1,d__2); + if (beta > bignum / xnorm) { + rec = 1. / xnorm; + x[0] *= rec; + x[2] *= rec; + x[1] *= rec; + x[3] *= rec; + scale *= rec; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + dscal_(&ki, &scale, &work[*n + 1], &c__1); + dscal_(&ki, &scale, &work[n2 + 1], &c__1); + } + work[j - 1 + *n] = x[0]; + work[j + *n] = x[1]; + work[j - 1 + n2] = x[2]; + work[j + n2] = x[3]; + +/* Update the right-hand side */ + + i__1 = j - 2; + d__1 = -x[0]; + daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[*n + 1], &c__1); + i__1 = j - 2; + d__1 = -x[1]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + i__1 = j - 2; + d__1 = -x[2]; + daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[n2 + 1], &c__1); + i__1 = j - 2; + d__1 = -x[3]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + n2 + 1], &c__1); + } +L90: + ; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + dcopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1 + + 1], &c__1); + dcopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], & + c__1); + + emax = 0.; + i__1 = ki; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1] + , abs(d__1)) + (d__2 = vr[k + is * vr_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L100: */ + } + + remax = 1. / emax; + dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1); + dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + vr[k + (is - 1) * vr_dim1] = 0.; + vr[k + is * vr_dim1] = 0.; +/* L110: */ + } + + } else { + + if (ki > 2) { + i__1 = ki - 2; + dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[( + ki - 1) * vr_dim1 + 1], &c__1); + i__1 = ki - 2; + dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[n2 + 1], &c__1, &work[ki + n2], &vr[ki * + vr_dim1 + 1], &c__1); + } else { + dscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1 + + 1], &c__1); + dscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], & + c__1); + } + + emax = 0.; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1] + , abs(d__1)) + (d__2 = vr[k + ki * vr_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L120: */ + } + remax = 1. / emax; + dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1); + dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + } + + --is; + if (ip != 0) { + --is; + } +L130: + if (ip == 1) { + ip = 0; + } + if (ip == -1) { + ip = 1; + } +/* L140: */ + } + } + + if (leftv) { + +/* Compute left eigenvectors. */ + + ip = 0; + is = 1; + i__1 = *n; + for (ki = 1; ki <= i__1; ++ki) { + + if (ip == -1) { + goto L250; + } + if (ki == *n) { + goto L150; + } + if (t[ki + 1 + ki * t_dim1] == 0.) { + goto L150; + } + ip = 1; + +L150: + if (somev) { + if (! select[ki]) { + goto L250; + } + } + +/* Compute the KI-th eigenvalue (WR,WI). */ + + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))); + } +/* Computing MAX */ + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1,smlnum); + + if (ip == 0) { + +/* Real left eigenvector. */ + + work[ki + *n] = 1.; + +/* Form right-hand side */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + work[k + *n] = -t[ki + k * t_dim1]; +/* L160: */ + } + +/* + Solve the quasi-triangular system: + (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK +*/ + + vmax = 1.; + vcrit = bignum; + + jnxt = ki + 1; + i__2 = *n; + for (j = ki + 1; j <= i__2; ++j) { + if (j < jnxt) { + goto L170; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + + if (j1 == j2) { + +/* + 1-by-1 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side. +*/ + + if (work[j] > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 1; + work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1], + &c__1, &work[ki + 1 + *n], &c__1); + +/* Solve (T(J,J)-WR)'*X = WORK */ + + dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + *n], &c__1); + } + work[j + *n] = x[0]; +/* Computing MAX */ + d__2 = (d__1 = work[j + *n], abs(d__1)); + vmax = max(d__2,vmax); + vcrit = bignum / vmax; + + } else { + +/* + 2-by-2 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side. + + Computing MAX +*/ + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1,d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 1; + work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1], + &c__1, &work[ki + 1 + *n], &c__1); + + i__3 = j - ki - 1; + work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 1 + (j + 1) * + t_dim1], &c__1, &work[ki + 1 + *n], &c__1); + +/* + Solve + [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) + [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) +*/ + + dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + *n], &c__1); + } + work[j + *n] = x[0]; + work[j + 1 + *n] = x[1]; + +/* Computing MAX */ + d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2 + = work[j + 1 + *n], abs(d__2)), d__3 = max( + d__3,d__4); + vmax = max(d__3,vmax); + vcrit = bignum / vmax; + + } +L170: + ; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * + vl_dim1], &c__1); + + i__2 = *n - ki + 1; + ii = idamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - + 1; + remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1)); + i__2 = *n - ki + 1; + dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + vl[k + is * vl_dim1] = 0.; +/* L180: */ + } + + } else { + + if (ki < *n) { + i__2 = *n - ki; + dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 1) * vl_dim1 + + 1], ldvl, &work[ki + 1 + *n], &c__1, &work[ + ki + *n], &vl[ki * vl_dim1 + 1], &c__1); + } + + ii = idamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1)); + dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + + } + + } else { + +/* + Complex left eigenvector. + + Initial solve: + ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. + ((T(KI+1,KI) T(KI+1,KI+1)) ) +*/ + + if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= (d__2 = + t[ki + 1 + ki * t_dim1], abs(d__2))) { + work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1]; + work[ki + 1 + n2] = 1.; + } else { + work[ki + *n] = 1.; + work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1]; + } + work[ki + 1 + *n] = 0.; + work[ki + n2] = 0.; + +/* Form right-hand side */ + + i__2 = *n; + for (k = ki + 2; k <= i__2; ++k) { + work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1]; + work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1] + ; +/* L190: */ + } + +/* + Solve complex quasi-triangular system: + ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 +*/ + + vmax = 1.; + vcrit = bignum; + + jnxt = ki + 2; + i__2 = *n; + for (j = ki + 2; j <= i__2; ++j) { + if (j < jnxt) { + goto L200; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + + if (j1 == j2) { + +/* + 1-by-1 diagonal block + + Scale if necessary to avoid overflow when + forming the right-hand side elements. +*/ + + if (work[j] > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + n2], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 2; + work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + *n], &c__1); + i__3 = j - ki - 2; + work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + n2], &c__1); + +/* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */ + + d__1 = -wi; + dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, & + ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + n2], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; +/* Computing MAX */ + d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2 + = work[j + n2], abs(d__2)), d__3 = max(d__3, + d__4); + vmax = max(d__3,vmax); + vcrit = bignum / vmax; + + } else { + +/* + 2-by-2 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side elements. + + Computing MAX +*/ + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1,d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + n2], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 2; + work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + *n], &c__1); + + i__3 = j - ki - 2; + work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + n2], &c__1); + + i__3 = j - ki - 2; + work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 2 + (j + 1) * + t_dim1], &c__1, &work[ki + 2 + *n], &c__1); + + i__3 = j - ki - 2; + work[j + 1 + n2] -= ddot_(&i__3, &t[ki + 2 + (j + 1) * + t_dim1], &c__1, &work[ki + 2 + n2], &c__1); + +/* + Solve 2-by-2 complex linear equation + ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B + ([T(j+1,j) T(j+1,j+1)] ) +*/ + + d__1 = -wi; + dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, & + ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + n2], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; + work[j + 1 + *n] = x[1]; + work[j + 1 + n2] = x[3]; +/* Computing MAX */ + d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1, + d__2), d__2 = abs(x[1]), d__1 = max(d__1,d__2) + , d__2 = abs(x[3]), d__1 = max(d__1,d__2); + vmax = max(d__1,vmax); + vcrit = bignum / vmax; + + } +L200: + ; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * + vl_dim1], &c__1); + i__2 = *n - ki + 1; + dcopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) * + vl_dim1], &c__1); + + emax = 0.; + i__2 = *n; + for (k = ki; k <= i__2; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs( + d__1)) + (d__2 = vl[k + (is + 1) * vl_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L220: */ + } + remax = 1. / emax; + i__2 = *n - ki + 1; + dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + i__2 = *n - ki + 1; + dscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1) + ; + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + vl[k + is * vl_dim1] = 0.; + vl[k + (is + 1) * vl_dim1] = 0.; +/* L230: */ + } + } else { + if (ki < *n - 1) { + i__2 = *n - ki - 1; + dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1 + + 1], ldvl, &work[ki + 2 + *n], &c__1, &work[ + ki + *n], &vl[ki * vl_dim1 + 1], &c__1); + i__2 = *n - ki - 1; + dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1 + + 1], ldvl, &work[ki + 2 + n2], &c__1, &work[ + ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], & + c__1); + } else { + dscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], & + c__1); + dscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + + 1], &c__1); + } + + emax = 0.; + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs( + d__1)) + (d__2 = vl[k + (ki + 1) * vl_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L240: */ + } + remax = 1. / emax; + dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1); + + } + + } + + ++is; + if (ip != 0) { + ++is; + } +L250: + if (ip == -1) { + ip = 0; + } + if (ip == 1) { + ip = -1; + } + +/* L260: */ + } + + } + + return 0; + +/* End of DTREVC */ + +} /* dtrevc_ */ + +/* Subroutine */ int dtrexc_(char *compq, integer *n, doublereal *t, integer * + ldt, doublereal *q, integer *ldq, integer *ifst, integer *ilst, + doublereal *work, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + + /* Local variables */ + static integer nbf, nbl, here; + extern logical lsame_(char *, char *); + static logical wantq; + extern /* Subroutine */ int dlaexc_(logical *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, integer + *, doublereal *, integer *), xerbla_(char *, integer *); + static integer nbnext; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DTREXC reorders the real Schur factorization of a real matrix + A = Q*T*Q**T, so that the diagonal block of T with row index IFST is + moved to row ILST. + + The real Schur form T is reordered by an orthogonal similarity + transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors + is updated by postmultiplying it with Z. + + T must be in Schur canonical form (as returned by DHSEQR), that is, + block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + 2-by-2 diagonal block has its diagonal elements equal and its + off-diagonal elements of opposite sign. + + Arguments + ========= + + COMPQ (input) CHARACTER*1 + = 'V': update the matrix Q of Schur vectors; + = 'N': do not update Q. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input/output) DOUBLE PRECISION array, dimension (LDT,N) + On entry, the upper quasi-triangular matrix T, in Schur + Schur canonical form. + On exit, the reordered upper quasi-triangular matrix, again + in Schur canonical form. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) + On entry, if COMPQ = 'V', the matrix Q of Schur vectors. + On exit, if COMPQ = 'V', Q has been postmultiplied by the + orthogonal transformation matrix Z which reorders T. + If COMPQ = 'N', Q is not referenced. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + IFST (input/output) INTEGER + ILST (input/output) INTEGER + Specify the reordering of the diagonal blocks of T. + The block with row index IFST is moved to row ILST, by a + sequence of transpositions between adjacent blocks. + On exit, if IFST pointed on entry to the second row of a + 2-by-2 block, it is changed to point to the first row; ILST + always points to the first row of the block in its final + position (which may differ from its input value by +1 or -1). + 1 <= IFST <= N; 1 <= ILST <= N. + + WORK (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + = 1: two adjacent blocks were too close to swap (the problem + is very ill-conditioned); T may have been partially + reordered, and ILST points to the first row of the + current position of the block being moved. + + ===================================================================== + + + Decode and test the input arguments. +*/ + + /* Parameter adjustments */ + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(compq, "V"); + if (! wantq && ! lsame_(compq, "N")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldt < max(1,*n)) { + *info = -4; + } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) { + *info = -6; + } else if (*ifst < 1 || *ifst > *n) { + *info = -7; + } else if (*ilst < 1 || *ilst > *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DTREXC", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + +/* + Determine the first row of specified block + and find out it is 1 by 1 or 2 by 2. +*/ + + if (*ifst > 1) { + if (t[*ifst + (*ifst - 1) * t_dim1] != 0.) { + --(*ifst); + } + } + nbf = 1; + if (*ifst < *n) { + if (t[*ifst + 1 + *ifst * t_dim1] != 0.) { + nbf = 2; + } + } + +/* + Determine the first row of the final block + and find out it is 1 by 1 or 2 by 2. +*/ + + if (*ilst > 1) { + if (t[*ilst + (*ilst - 1) * t_dim1] != 0.) { + --(*ilst); + } + } + nbl = 1; + if (*ilst < *n) { + if (t[*ilst + 1 + *ilst * t_dim1] != 0.) { + nbl = 2; + } + } + + if (*ifst == *ilst) { + return 0; + } + + if (*ifst < *ilst) { + +/* Update ILST */ + + if (nbf == 2 && nbl == 1) { + --(*ilst); + } + if (nbf == 1 && nbl == 2) { + ++(*ilst); + } + + here = *ifst; + +L10: + +/* Swap block with next one below */ + + if (nbf == 1 || nbf == 2) { + +/* Current block either 1 by 1 or 2 by 2 */ + + nbnext = 1; + if (here + nbf + 1 <= *n) { + if (t[here + nbf + 1 + (here + nbf) * t_dim1] != 0.) { + nbnext = 2; + } + } + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, & + nbf, &nbnext, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += nbnext; + +/* Test if 2 by 2 block breaks into two 1 by 1 blocks */ + + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.) { + nbf = 3; + } + } + + } else { + +/* + Current block consists of two 1 by 1 blocks each of which + must be swapped individually +*/ + + nbnext = 1; + if (here + 3 <= *n) { + if (t[here + 3 + (here + 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here + 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & + c__1, &nbnext, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + +/* Swap two 1 by 1 blocks, no problems possible */ + + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &nbnext, &work[1], info); + ++here; + } else { + +/* Recompute NBNEXT in case 2 by 2 split */ + + if (t[here + 2 + (here + 1) * t_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + +/* 2 by 2 Block did not split */ + + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &nbnext, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += 2; + } else { + +/* 2 by 2 Block did split */ + + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &c__1, &work[1], info); + i__1 = here + 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + i__1, &c__1, &c__1, &work[1], info); + here += 2; + } + } + } + if (here < *ilst) { + goto L10; + } + + } else { + + here = *ifst; +L20: + +/* Swap block with next one above */ + + if (nbf == 1 || nbf == 2) { + +/* Current block either 1 by 1 or 2 by 2 */ + + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & + nbnext, &nbf, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here -= nbnext; + +/* Test if 2 by 2 block breaks into two 1 by 1 blocks */ + + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.) { + nbf = 3; + } + } + + } else { + +/* + Current block consists of two 1 by 1 blocks each of which + must be swapped individually +*/ + + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & + nbnext, &c__1, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + +/* Swap two 1 by 1 blocks, no problems possible */ + + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &nbnext, &c__1, &work[1], info); + --here; + } else { + +/* Recompute NBNEXT in case 2 by 2 split */ + + if (t[here + (here - 1) * t_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + +/* 2 by 2 Block did not split */ + + i__1 = here - 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + i__1, &c__2, &c__1, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += -2; + } else { + +/* 2 by 2 Block did split */ + + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &c__1, &work[1], info); + i__1 = here - 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + i__1, &c__1, &c__1, &work[1], info); + here += -2; + } + } + } + if (here > *ilst) { + goto L20; + } + } + *ilst = here; + + return 0; + +/* End of DTREXC */ + +} /* dtrexc_ */ + +/* Subroutine */ int dtrti2_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer j; + static doublereal ajj; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); + static logical nounit; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DTRTI2 computes the inverse of a real upper or lower triangular + matrix. + + This is the Level 2 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading n by n upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DTRTI2", &i__1); + return 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + dtrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & + a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + dscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Compute inverse of lower triangular matrix. */ + + for (j = *n; j >= 1; --j) { + if (nounit) { + a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + dtrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + + 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); + i__1 = *n - j; + dscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + + return 0; + +/* End of DTRTI2 */ + +} /* dtrti2_ */ + +/* Subroutine */ int dtrtri_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5; + char ch__1[2]; + + /* Local variables */ + static integer j, jb, nb, nn; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), dtrsm_( + char *, char *, char *, char *, integer *, integer *, doublereal * + , doublereal *, integer *, doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int dtrti2_(char *, char *, integer *, doublereal + *, integer *, integer *), xerbla_(char *, integer + *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical nounit; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + DTRTRI computes the inverse of a real upper or lower triangular + matrix A. + + This is the Level 3 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': A is upper triangular; + = 'L': A is lower triangular. + + DIAG (input) CHARACTER*1 + = 'N': A is non-unit triangular; + = 'U': A is unit triangular. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, A(i,i) is exactly zero. The triangular + matrix is singular and its inverse can not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DTRTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (a[*info + *info * a_dim1] == 0.) { + return 0; + } +/* L10: */ + } + *info = 0; + } + +/* + Determine the block size for this environment. + + Writing concatenation +*/ + i__2[0] = 1, a__1[0] = uplo; + i__2[1] = 1, a__1[1] = diag; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "DTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + dtrti2_(uplo, diag, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute inverse of upper triangular matrix */ + + i__1 = *n; + i__3 = nb; + for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { +/* Computing MIN */ + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4,i__5); + +/* Compute rows 1:j-1 of current block column */ + + i__4 = j - 1; + dtrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & + c_b15, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); + i__4 = j - 1; + dtrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & + c_b151, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], + lda); + +/* Compute inverse of current diagonal block */ + + dtrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L20: */ + } + } else { + +/* Compute inverse of lower triangular matrix */ + + nn = (*n - 1) / nb * nb + 1; + i__3 = -nb; + for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) { +/* Computing MIN */ + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1,i__4); + if (j + jb <= *n) { + +/* Compute rows j+jb:n of current block column */ + + i__1 = *n - j - jb + 1; + dtrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, + &c_b15, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + + jb + j * a_dim1], lda); + i__1 = *n - j - jb + 1; + dtrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, + &c_b151, &a[j + j * a_dim1], lda, &a[j + jb + j * + a_dim1], lda); + } + +/* Compute inverse of current diagonal block */ + + dtrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L30: */ + } + } + } + + return 0; + +/* End of DTRTRI */ + +} /* dtrtri_ */ + diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_d_lapack.f.patch b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_d_lapack.f.patch new file mode 100644 index 0000000000..cd750cec09 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_d_lapack.f.patch @@ -0,0 +1,32 @@ +@@ -19075,5 +19075,6 @@ + ! Skip any trailing zeros. + DO LASTV = N, I+1, -1 +- IF( V( LASTV, I ).NE.ZERO ) EXIT ++ IF( V( LASTV, I ).NE.ZERO ) GO TO 15 + END DO ++ 15 CONTINUE + J = MIN( LASTV, PREVLASTV ) +@@ -19087,5 +19088,6 @@ + ! Skip any trailing zeros. + DO LASTV = N, I+1, -1 +- IF( V( I, LASTV ).NE.ZERO ) EXIT ++ IF( V( I, LASTV ).NE.ZERO ) GO TO 16 + END DO ++ 16 CONTINUE + J = MIN( LASTV, PREVLASTV ) +@@ -19131,5 +19133,6 @@ + ! Skip any leading zeros. + DO LASTV = 1, I-1 +- IF( V( LASTV, I ).NE.ZERO ) EXIT ++ IF( V( LASTV, I ).NE.ZERO ) GO TO 35 + END DO ++ 35 CONTINUE + J = MAX( LASTV, PREVLASTV ) +@@ -19147,5 +19150,6 @@ + ! Skip any leading zeros. + DO LASTV = N, I+1, -1 +- IF( V( I, LASTV ).NE.ZERO ) EXIT ++ IF( V( I, LASTV ).NE.ZERO ) GO TO 36 + END DO ++ 36 CONTINUE + J = MAX( LASTV, PREVLASTV ) diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_lapack.c b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_lapack.c new file mode 100644 index 0000000000..d956ddbbb7 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_lapack.c @@ -0,0 +1,1651 @@ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif + + +/* Table of constant values */ + +static integer c__1 = 1; +static real c_b172 = 0.f; +static real c_b173 = 1.f; +static integer c__0 = 0; + +integer ieeeck_(integer *ispec, real *zero, real *one) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + static real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro, + newzro; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + IEEECK is called from the ILAENV to verify that Infinity and + possibly NaN arithmetic is safe (i.e. will not trap). + + Arguments + ========= + + ISPEC (input) INTEGER + Specifies whether to test just for inifinity arithmetic + or whether to test for infinity and NaN arithmetic. + = 0: Verify infinity arithmetic only. + = 1: Verify infinity and NaN arithmetic. + + ZERO (input) REAL + Must contain the value 0.0 + This is passed to prevent the compiler from optimizing + away this code. + + ONE (input) REAL + Must contain the value 1.0 + This is passed to prevent the compiler from optimizing + away this code. + + RETURN VALUE: INTEGER + = 0: Arithmetic failed to produce the correct answers + = 1: Arithmetic produced the correct answers +*/ + + ret_val = 1; + + posinf = *one / *zero; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf = -(*one) / *zero; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + negzro = *one / (neginf + *one); + if (negzro != *zero) { + ret_val = 0; + return ret_val; + } + + neginf = *one / negzro; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + newzro = negzro + *zero; + if (newzro != *zero) { + ret_val = 0; + return ret_val; + } + + posinf = *one / newzro; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf *= posinf; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + posinf *= posinf; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + +/* Return if we were only asked to check infinity arithmetic */ + + if (*ispec == 0) { + return ret_val; + } + + nan1 = posinf + neginf; + + nan2 = posinf / neginf; + + nan3 = posinf / posinf; + + nan4 = posinf * *zero; + + nan5 = neginf * negzro; + + nan6 = nan5 * *zero; + + if (nan1 == nan1) { + ret_val = 0; + return ret_val; + } + + if (nan2 == nan2) { + ret_val = 0; + return ret_val; + } + + if (nan3 == nan3) { + ret_val = 0; + return ret_val; + } + + if (nan4 == nan4) { + ret_val = 0; + return ret_val; + } + + if (nan5 == nan5) { + ret_val = 0; + return ret_val; + } + + if (nan6 == nan6) { + ret_val = 0; + return ret_val; + } + + return ret_val; +} /* ieeeck_ */ + +integer ilaclc_(integer *m, integer *n, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1, i__2; + + /* Local variables */ + static integer i__; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + + -- June 2010 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + ILACLC scans A for its last non-zero column. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. + + N (input) INTEGER + The number of columns of the matrix A. + + A (input) COMPLEX array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Quick test for the common case where one corner is non-zero. +*/ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*n == 0) { + ret_val = *n; + } else /* if(complicated condition) */ { + i__1 = *n * a_dim1 + 1; + i__2 = *m + *n * a_dim1; + if (a[i__1].r != 0.f || a[i__1].i != 0.f || (a[i__2].r != 0.f || a[ + i__2].i != 0.f)) { + ret_val = *n; + } else { +/* Now scan each column from the end, returning with the first non-zero. */ + for (ret_val = *n; ret_val >= 1; --ret_val) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + ret_val * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + return ret_val; + } + } + } + } + } + return ret_val; +} /* ilaclc_ */ + +integer ilaclr_(integer *m, integer *n, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1, i__2; + + /* Local variables */ + static integer i__, j; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + + -- June 2010 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + ILACLR scans A for its last non-zero row. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. + + N (input) INTEGER + The number of columns of the matrix A. + + A (input) COMPLEX array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Quick test for the common case where one corner is non-zero. +*/ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*m == 0) { + ret_val = *m; + } else /* if(complicated condition) */ { + i__1 = *m + a_dim1; + i__2 = *m + *n * a_dim1; + if (a[i__1].r != 0.f || a[i__1].i != 0.f || (a[i__2].r != 0.f || a[ + i__2].i != 0.f)) { + ret_val = *m; + } else { +/* Scan up each column tracking the last zero row seen. */ + ret_val = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + goto L10; + } + } +L10: + ret_val = max(ret_val,i__); + } + } + } + return ret_val; +} /* ilaclr_ */ + +integer iladlc_(integer *m, integer *n, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + static integer i__; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + + -- June 2010 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + ILADLC scans A for its last non-zero column. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. + + N (input) INTEGER + The number of columns of the matrix A. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Quick test for the common case where one corner is non-zero. +*/ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*n == 0) { + ret_val = *n; + } else if (a[*n * a_dim1 + 1] != 0. || a[*m + *n * a_dim1] != 0.) { + ret_val = *n; + } else { +/* Now scan each column from the end, returning with the first non-zero. */ + for (ret_val = *n; ret_val >= 1; --ret_val) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (a[i__ + ret_val * a_dim1] != 0.) { + return ret_val; + } + } + } + } + return ret_val; +} /* iladlc_ */ + +integer iladlr_(integer *m, integer *n, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + static integer i__, j; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + + -- June 2010 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + ILADLR scans A for its last non-zero row. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. + + N (input) INTEGER + The number of columns of the matrix A. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Quick test for the common case where one corner is non-zero. +*/ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*m == 0) { + ret_val = *m; + } else if (a[*m + a_dim1] != 0. || a[*m + *n * a_dim1] != 0.) { + ret_val = *m; + } else { +/* Scan up each column tracking the last zero row seen. */ + ret_val = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + if (a[i__ + j * a_dim1] != 0.) { + goto L10; + } + } +L10: + ret_val = max(ret_val,i__); + } + } + return ret_val; +} /* iladlr_ */ + +integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen + opts_len) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + static integer i__; + static char c1[1], c2[2], c3[3], c4[2]; + static integer ic, nb, iz, nx; + static logical cname; + static integer nbmin; + static logical sname; + extern integer ieeeck_(integer *, real *, real *); + static char subnam[6]; + extern integer iparmq_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK auxiliary routine (version 3.2.1) -- + + -- April 2009 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + ILAENV is called from the LAPACK routines to choose problem-dependent + parameters for the local environment. See ISPEC for a description of + the parameters. + + ILAENV returns an INTEGER + if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC + if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. + + This version provides a set of parameters which should give good, + but not optimal, performance on many of the currently available + computers. Users are encouraged to modify this subroutine to set + the tuning parameters for their particular machine using the option + and problem size information in the arguments. + + This routine will not function correctly if it is converted to all + lower case. Converting it to all upper case is allowed. + + Arguments + ========= + + ISPEC (input) INTEGER + Specifies the parameter to be returned as the value of + ILAENV. + = 1: the optimal blocksize; if this value is 1, an unblocked + algorithm will give the best performance. + = 2: the minimum block size for which the block routine + should be used; if the usable block size is less than + this value, an unblocked routine should be used. + = 3: the crossover point (in a block routine, for N less + than this value, an unblocked routine should be used) + = 4: the number of shifts, used in the nonsymmetric + eigenvalue routines (DEPRECATED) + = 5: the minimum column dimension for blocking to be used; + rectangular blocks must have dimension at least k by m, + where k is given by ILAENV(2,...) and m by ILAENV(5,...) + = 6: the crossover point for the SVD (when reducing an m by n + matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds + this value, a QR factorization is used first to reduce + the matrix to a triangular form.) + = 7: the number of processors + = 8: the crossover point for the multishift QR method + for nonsymmetric eigenvalue problems (DEPRECATED) + = 9: maximum size of the subproblems at the bottom of the + computation tree in the divide-and-conquer algorithm + (used by xGELSD and xGESDD) + =10: ieee NaN arithmetic can be trusted not to trap + =11: infinity arithmetic can be trusted not to trap + 12 <= ISPEC <= 16: + xHSEQR or one of its subroutines, + see IPARMQ for detailed explanation + + NAME (input) CHARACTER*(*) + The name of the calling subroutine, in either upper case or + lower case. + + OPTS (input) CHARACTER*(*) + The character options to the subroutine NAME, concatenated + into a single character string. For example, UPLO = 'U', + TRANS = 'T', and DIAG = 'N' for a triangular routine would + be specified as OPTS = 'UTN'. + + N1 (input) INTEGER + N2 (input) INTEGER + N3 (input) INTEGER + N4 (input) INTEGER + Problem dimensions for the subroutine NAME; these may not all + be required. + + Further Details + =============== + + The following conventions have been used when calling ILAENV from the + LAPACK routines: + 1) OPTS is a concatenation of all of the character options to + subroutine NAME, in the same order that they appear in the + argument list for NAME, even if they are not used in determining + the value of the parameter specified by ISPEC. + 2) The problem dimensions N1, N2, N3, N4 are specified in the order + that they appear in the argument list for NAME. N1 is used + first, N2 second, and so on, and unused problem dimensions are + passed a value of -1. + 3) The parameter value returned by ILAENV is checked for validity in + the calling subroutine. For example, ILAENV is used to retrieve + the optimal blocksize for STRTRI as follows: + + NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 ) NB = MAX( 1, N ) + + ===================================================================== +*/ + + + switch (*ispec) { + case 1: goto L10; + case 2: goto L10; + case 3: goto L10; + case 4: goto L80; + case 5: goto L90; + case 6: goto L100; + case 7: goto L110; + case 8: goto L120; + case 9: goto L130; + case 10: goto L140; + case 11: goto L150; + case 12: goto L160; + case 13: goto L160; + case 14: goto L160; + case 15: goto L160; + case 16: goto L160; + } + +/* Invalid value for ISPEC */ + + ret_val = -1; + return ret_val; + +L10: + +/* Convert NAME to upper case if the first character is lower case. */ + + ret_val = 1; + s_copy(subnam, name__, (ftnlen)6, name_len); + ic = *(unsigned char *)subnam; + iz = 'Z'; + if (iz == 90 || iz == 122) { + +/* ASCII character set */ + + if (ic >= 97 && ic <= 122) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 97 && ic <= 122) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L20: */ + } + } + + } else if (iz == 233 || iz == 169) { + +/* EBCDIC character set */ + + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && + ic <= 169) { + *(unsigned char *)subnam = (char) (ic + 64); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= + 162 && ic <= 169) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); + } +/* L30: */ + } + } + + } else if (iz == 218 || iz == 250) { + +/* Prime machines: ASCII+128 */ + + if (ic >= 225 && ic <= 250) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 225 && ic <= 250) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L40: */ + } + } + } + + *(unsigned char *)c1 = *(unsigned char *)subnam; + sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; + cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; + if (! (cname || sname)) { + return ret_val; + } + s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2); + s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3); + s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2); + + switch (*ispec) { + case 1: goto L50; + case 2: goto L60; + case 3: goto L70; + } + +L50: + +/* + ISPEC = 1: block size + + In these examples, separate code is provided for setting NB for + real and complex. We assume that NB will take the same value in + single or double precision. +*/ + + nb = 1; + + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, + "RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen) + 3, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) + == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nb = 32; + } else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nb = 32; + } else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) { + nb = 1; + } + } + ret_val = nb; + return ret_val; + +L60: + +/* ISPEC = 2: minimum block size */ + + nbmin = 2; + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) + { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 8; + } else { + nbmin = 8; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } + ret_val = nbmin; + return ret_val; + +L70: + +/* ISPEC = 3: crossover point */ + + nx = 0; + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) + { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nx = 32; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nx = 32; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nx = 128; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nx = 128; + } + } + } + ret_val = nx; + return ret_val; + +L80: + +/* ISPEC = 4: number of shifts (used by xHSEQR) */ + + ret_val = 6; + return ret_val; + +L90: + +/* ISPEC = 5: minimum column dimension (not used) */ + + ret_val = 2; + return ret_val; + +L100: + +/* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ + + ret_val = (integer) ((real) min(*n1,*n2) * 1.6f); + return ret_val; + +L110: + +/* ISPEC = 7: number of processors (not used) */ + + ret_val = 1; + return ret_val; + +L120: + +/* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ + + ret_val = 50; + return ret_val; + +L130: + +/* + ISPEC = 9: maximum size of the subproblems at the bottom of the + computation tree in the divide-and-conquer algorithm + (used by xGELSD and xGESDD) +*/ + + ret_val = 25; + return ret_val; + +L140: + +/* + ISPEC = 10: ieee NaN arithmetic can be trusted not to trap + + ILAENV = 0 +*/ + ret_val = 1; + if (ret_val == 1) { + ret_val = ieeeck_(&c__1, &c_b172, &c_b173); + } + return ret_val; + +L150: + +/* + ISPEC = 11: infinity arithmetic can be trusted not to trap + + ILAENV = 0 +*/ + ret_val = 1; + if (ret_val == 1) { + ret_val = ieeeck_(&c__0, &c_b172, &c_b173); + } + return ret_val; + +L160: + +/* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */ + + ret_val = iparmq_(ispec, name__, opts, n1, n2, n3, n4, name_len, opts_len) + ; + return ret_val; + +/* End of ILAENV */ + +} /* ilaenv_ */ + +integer ilaslc_(integer *m, integer *n, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + static integer i__; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + + -- June 2010 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + ILASLC scans A for its last non-zero column. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. + + N (input) INTEGER + The number of columns of the matrix A. + + A (input) REAL array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Quick test for the common case where one corner is non-zero. +*/ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*n == 0) { + ret_val = *n; + } else if (a[*n * a_dim1 + 1] != 0.f || a[*m + *n * a_dim1] != 0.f) { + ret_val = *n; + } else { +/* Now scan each column from the end, returning with the first non-zero. */ + for (ret_val = *n; ret_val >= 1; --ret_val) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (a[i__ + ret_val * a_dim1] != 0.f) { + return ret_val; + } + } + } + } + return ret_val; +} /* ilaslc_ */ + +integer ilaslr_(integer *m, integer *n, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + static integer i__, j; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + + -- June 2010 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + ILASLR scans A for its last non-zero row. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. + + N (input) INTEGER + The number of columns of the matrix A. + + A (input) REAL array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Quick test for the common case where one corner is non-zero. +*/ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*m == 0) { + ret_val = *m; + } else if (a[*m + a_dim1] != 0.f || a[*m + *n * a_dim1] != 0.f) { + ret_val = *m; + } else { +/* Scan up each column tracking the last zero row seen. */ + ret_val = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + if (a[i__ + j * a_dim1] != 0.f) { + goto L10; + } + } +L10: + ret_val = max(ret_val,i__); + } + } + return ret_val; +} /* ilaslr_ */ + +integer ilazlc_(integer *m, integer *n, doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1, i__2; + + /* Local variables */ + static integer i__; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + + -- June 2010 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + ILAZLC scans A for its last non-zero column. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. + + N (input) INTEGER + The number of columns of the matrix A. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Quick test for the common case where one corner is non-zero. +*/ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*n == 0) { + ret_val = *n; + } else /* if(complicated condition) */ { + i__1 = *n * a_dim1 + 1; + i__2 = *m + *n * a_dim1; + if (a[i__1].r != 0. || a[i__1].i != 0. || (a[i__2].r != 0. || a[i__2] + .i != 0.)) { + ret_val = *n; + } else { +/* Now scan each column from the end, returning with the first non-zero. */ + for (ret_val = *n; ret_val >= 1; --ret_val) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + ret_val * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + return ret_val; + } + } + } + } + } + return ret_val; +} /* ilazlc_ */ + +integer ilazlr_(integer *m, integer *n, doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1, i__2; + + /* Local variables */ + static integer i__, j; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + + -- June 2010 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + ILAZLR scans A for its last non-zero row. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. + + N (input) INTEGER + The number of columns of the matrix A. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Quick test for the common case where one corner is non-zero. +*/ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*m == 0) { + ret_val = *m; + } else /* if(complicated condition) */ { + i__1 = *m + a_dim1; + i__2 = *m + *n * a_dim1; + if (a[i__1].r != 0. || a[i__1].i != 0. || (a[i__2].r != 0. || a[i__2] + .i != 0.)) { + ret_val = *m; + } else { +/* Scan up each column tracking the last zero row seen. */ + ret_val = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + goto L10; + } + } +L10: + ret_val = max(ret_val,i__); + } + } + } + return ret_val; +} /* ilazlr_ */ + +integer iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer + *ilo, integer *ihi, integer *lwork, ftnlen name_len, ftnlen opts_len) +{ + /* System generated locals */ + integer ret_val, i__1, i__2; + real r__1; + + /* Local variables */ + static integer nh, ns; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + This program sets problem and machine dependent parameters + useful for xHSEQR and its subroutines. It is called whenever + ILAENV is called with 12 <= ISPEC <= 16 + + Arguments + ========= + + ISPEC (input) integer scalar + ISPEC specifies which tunable parameter IPARMQ should + return. + + ISPEC=12: (INMIN) Matrices of order nmin or less + are sent directly to xLAHQR, the implicit + double shift QR algorithm. NMIN must be + at least 11. + + ISPEC=13: (INWIN) Size of the deflation window. + This is best set greater than or equal to + the number of simultaneous shifts NS. + Larger matrices benefit from larger deflation + windows. + + ISPEC=14: (INIBL) Determines when to stop nibbling and + invest in an (expensive) multi-shift QR sweep. + If the aggressive early deflation subroutine + finds LD converged eigenvalues from an order + NW deflation window and LD.GT.(NW*NIBBLE)/100, + then the next QR sweep is skipped and early + deflation is applied immediately to the + remaining active diagonal block. Setting + IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a + multi-shift QR sweep whenever early deflation + finds a converged eigenvalue. Setting + IPARMQ(ISPEC=14) greater than or equal to 100 + prevents TTQRE from skipping a multi-shift + QR sweep. + + ISPEC=15: (NSHFTS) The number of simultaneous shifts in + a multi-shift QR iteration. + + ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the + following meanings. + 0: During the multi-shift QR sweep, + xLAQR5 does not accumulate reflections and + does not use matrix-matrix multiply to + update the far-from-diagonal matrix + entries. + 1: During the multi-shift QR sweep, + xLAQR5 and/or xLAQRaccumulates reflections and uses + matrix-matrix multiply to update the + far-from-diagonal matrix entries. + 2: During the multi-shift QR sweep. + xLAQR5 accumulates reflections and takes + advantage of 2-by-2 block structure during + matrix-matrix multiplies. + (If xTRMM is slower than xGEMM, then + IPARMQ(ISPEC=16)=1 may be more efficient than + IPARMQ(ISPEC=16)=2 despite the greater level of + arithmetic work implied by the latter choice.) + + NAME (input) character string + Name of the calling subroutine + + OPTS (input) character string + This is a concatenation of the string arguments to + TTQRE. + + N (input) integer scalar + N is the order of the Hessenberg matrix H. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular + in rows and columns 1:ILO-1 and IHI+1:N. + + LWORK (input) integer scalar + The amount of workspace available. + + Further Details + =============== + + Little is known about how best to choose these parameters. + It is possible to use different values of the parameters + for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. + + It is probably best to choose different parameters for + different matrices and different parameters at different + times during the iteration, but this has not been + implemented --- yet. + + + The best choices of most of the parameters depend + in an ill-understood way on the relative execution + rate of xLAQR3 and xLAQR5 and on the nature of each + particular eigenvalue problem. Experiment may be the + only practical way to determine which choices are most + effective. + + Following is a list of default values supplied by IPARMQ. + These defaults may be adjusted in order to attain better + performance in any particular computational environment. + + IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. + Default: 75. (Must be at least 11.) + + IPARMQ(ISPEC=13) Recommended deflation window size. + This depends on ILO, IHI and NS, the + number of simultaneous shifts returned + by IPARMQ(ISPEC=15). The default for + (IHI-ILO+1).LE.500 is NS. The default + for (IHI-ILO+1).GT.500 is 3*NS/2. + + IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. + + IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. + a multi-shift QR iteration. + + If IHI-ILO+1 is ... + + greater than ...but less ... the + or equal to ... than default is + + 0 30 NS = 2+ + 30 60 NS = 4+ + 60 150 NS = 10 + 150 590 NS = ** + 590 3000 NS = 64 + 3000 6000 NS = 128 + 6000 infinity NS = 256 + + (+) By default matrices of this order are + passed to the implicit double shift routine + xLAHQR. See IPARMQ(ISPEC=12) above. These + values of NS are used only in case of a rare + xLAHQR failure. + + (**) The asterisks (**) indicate an ad-hoc + function increasing from 10 to 64. + + IPARMQ(ISPEC=16) Select structured matrix multiply. + (See ISPEC=16 above for details.) + Default: 3. + + ================================================================ +*/ + if (*ispec == 15 || *ispec == 13 || *ispec == 16) { + +/* ==== Set the number simultaneous shifts ==== */ + + nh = *ihi - *ilo + 1; + ns = 2; + if (nh >= 30) { + ns = 4; + } + if (nh >= 60) { + ns = 10; + } + if (nh >= 150) { +/* Computing MAX */ + r__1 = log((real) nh) / log(2.f); + i__1 = 10, i__2 = nh / i_nint(&r__1); + ns = max(i__1,i__2); + } + if (nh >= 590) { + ns = 64; + } + if (nh >= 3000) { + ns = 128; + } + if (nh >= 6000) { + ns = 256; + } +/* Computing MAX */ + i__1 = 2, i__2 = ns - ns % 2; + ns = max(i__1,i__2); + } + + if (*ispec == 12) { + + +/* + ===== Matrices of order smaller than NMIN get sent + . to xLAHQR, the classic double shift algorithm. + . This must be at least 11. ==== +*/ + + ret_val = 75; + + } else if (*ispec == 14) { + +/* + ==== INIBL: skip a multi-shift qr iteration and + . whenever aggressive early deflation finds + . at least (NIBBLE*(window size)/100) deflations. ==== +*/ + + ret_val = 14; + + } else if (*ispec == 15) { + +/* ==== NSHFTS: The number of simultaneous shifts ===== */ + + ret_val = ns; + + } else if (*ispec == 13) { + +/* ==== NW: deflation window size. ==== */ + + if (nh <= 500) { + ret_val = ns; + } else { + ret_val = ns * 3 / 2; + } + + } else if (*ispec == 16) { + +/* + ==== IACC22: Whether to accumulate reflections + . before updating the far-from-diagonal elements + . and whether to use 2-by-2 block structure while + . doing it. A small amount of work could be saved + . by making this choice dependent also upon the + . NH=IHI-ILO+1. +*/ + + ret_val = 0; + if (ns >= 14) { + ret_val = 1; + } + if (ns >= 14) { + ret_val = 2; + } + + } else { +/* ===== invalid value of ispec ===== */ + ret_val = -1; + + } + +/* ==== End of IPARMQ ==== */ + + return ret_val; +} /* iparmq_ */ + diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_lapack.f.patch b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_lapack.f.patch new file mode 100644 index 0000000000..c743c1f627 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_lapack.f.patch @@ -0,0 +1,48 @@ +@@ -267,9 +267,10 @@ + Scan up each column tracking the last zero row seen. + ILACLR = 0 + DO J = 1, N + DO I = M, 1, -1 +- IF( A(I, J).NE.ZERO ) EXIT ++ IF( A(I, J).NE.ZERO ) GO TO 10 + END DO ++ 10 CONTINUE + ILACLR = MAX( ILACLR, I ) + END DO + END IF +@@ -395,9 +396,10 @@ + Scan up each column tracking the last zero row seen. + ILADLR = 0 + DO J = 1, N + DO I = M, 1, -1 +- IF( A(I, J).NE.ZERO ) EXIT ++ IF( A(I, J).NE.ZERO ) GO TO 10 + END DO ++ 10 CONTINUE + ILADLR = MAX( ILADLR, I ) + END DO + END IF +@@ -1078,9 +1080,10 @@ + Scan up each column tracking the last zero row seen. + ILASLR = 0 + DO J = 1, N + DO I = M, 1, -1 +- IF( A(I, J).NE.ZERO ) EXIT ++ IF( A(I, J).NE.ZERO ) GO TO 10 + END DO ++ 10 CONTINUE + ILASLR = MAX( ILASLR, I ) + END DO + END IF +@@ -1206,9 +1209,10 @@ + Scan up each column tracking the last zero row seen. + ILAZLR = 0 + DO J = 1, N + DO I = M, 1, -1 +- IF( A(I, J).NE.ZERO ) EXIT ++ IF( A(I, J).NE.ZERO ) GO TO 10 + END DO ++ 10 CONTINUE + ILAZLR = MAX( ILAZLR, I ) + END DO + END IF diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_s_lapack.c b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_s_lapack.c new file mode 100644 index 0000000000..fccb1f58b5 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_s_lapack.c @@ -0,0 +1,41691 @@ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif + + +/* Table of constant values */ + +static integer c__9 = 9; +static integer c__0 = 0; +static real c_b15 = 1.f; +static integer c__1 = 1; +static real c_b29 = 0.f; +static doublereal c_b94 = -.125; +static real c_b151 = -1.f; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__65 = 65; +static integer c__6 = 6; +static integer c__12 = 12; +static integer c__49 = 49; +static integer c__4 = 4; +static logical c_false = FALSE_; +static integer c__13 = 13; +static integer c__15 = 15; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_true = TRUE_; +static real c_b3178 = 2.f; + +/* Subroutine */ int sbdsdc_(char *uplo, char *compq, integer *n, real *d__, + real *e, real *u, integer *ldu, real *vt, integer *ldvt, real *q, + integer *iq, real *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static integer i__, j, k; + static real p, r__; + static integer z__, ic, ii, kk; + static real cs; + static integer is, iu; + static real sn; + static integer nm1; + static real eps; + static integer ivt, difl, difr, ierr, perm, mlvl, sqre; + extern logical lsame_(char *, char *); + static integer poles; + extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, + integer *, real *, real *, real *, integer *); + static integer iuplo, nsize, start; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), sswap_(integer *, real *, integer *, real *, integer * + ), slasd0_(integer *, integer *, real *, real *, real *, integer * + , real *, integer *, integer *, integer *, real *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int slasda_(integer *, integer *, integer *, + integer *, real *, real *, real *, integer *, real *, integer *, + real *, real *, real *, real *, integer *, integer *, integer *, + integer *, real *, real *, real *, real *, integer *, integer *), + xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + static integer givcol; + extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer + *, integer *, integer *, real *, real *, real *, integer *, real * + , integer *, real *, integer *, real *, integer *); + static integer icompq; + extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, + real *, real *, integer *), slartg_(real *, real *, real * + , real *, real *); + static real orgnrm; + static integer givnum; + extern doublereal slanst_(char *, integer *, real *, real *); + static integer givptr, qstart, smlsiz, wstart, smlszp; + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SBDSDC computes the singular value decomposition (SVD) of a real + N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, + using a divide and conquer method, where S is a diagonal matrix + with non-negative diagonal elements (the singular values of B), and + U and VT are orthogonal matrices of left and right singular vectors, + respectively. SBDSDC can be used to compute all singular values, + and optionally, singular vectors or singular vectors in compact form. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. See SLASD3 for details. + + The code currently calls SLASDQ if singular values only are desired. + However, it can be slightly modified to compute singular values + using the divide and conquer method. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': B is upper bidiagonal. + = 'L': B is lower bidiagonal. + + COMPQ (input) CHARACTER*1 + Specifies whether singular vectors are to be computed + as follows: + = 'N': Compute singular values only; + = 'P': Compute singular values and compute singular + vectors in compact form; + = 'I': Compute singular values and singular vectors. + + N (input) INTEGER + The order of the matrix B. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the n diagonal elements of the bidiagonal matrix B. + On exit, if INFO=0, the singular values of B. + + E (input/output) REAL array, dimension (N-1) + On entry, the elements of E contain the offdiagonal + elements of the bidiagonal matrix whose SVD is desired. + On exit, E has been destroyed. + + U (output) REAL array, dimension (LDU,N) + If COMPQ = 'I', then: + On exit, if INFO = 0, U contains the left singular vectors + of the bidiagonal matrix. + For other values of COMPQ, U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1. + If singular vectors are desired, then LDU >= max( 1, N ). + + VT (output) REAL array, dimension (LDVT,N) + If COMPQ = 'I', then: + On exit, if INFO = 0, VT' contains the right singular + vectors of the bidiagonal matrix. + For other values of COMPQ, VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1. + If singular vectors are desired, then LDVT >= max( 1, N ). + + Q (output) REAL array, dimension (LDQ) + If COMPQ = 'P', then: + On exit, if INFO = 0, Q and IQ contain the left + and right singular vectors in a compact form, + requiring O(N log N) space instead of 2*N**2. + In particular, Q contains all the REAL data in + LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) + words of memory, where SMLSIZ is returned by ILAENV and + is equal to the maximum size of the subproblems at the + bottom of the computation tree (usually about 25). + For other values of COMPQ, Q is not referenced. + + IQ (output) INTEGER array, dimension (LDIQ) + If COMPQ = 'P', then: + On exit, if INFO = 0, Q and IQ contain the left + and right singular vectors in a compact form, + requiring O(N log N) space instead of 2*N**2. + In particular, IQ contains all INTEGER data in + LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) + words of memory, where SMLSIZ is returned by ILAENV and + is equal to the maximum size of the subproblems at the + bottom of the computation tree (usually about 25). + For other values of COMPQ, IQ is not referenced. + + WORK (workspace) REAL array, dimension (MAX(1,LWORK)) + If COMPQ = 'N' then LWORK >= (4 * N). + If COMPQ = 'P' then LWORK >= (6 * N). + If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). + + IWORK (workspace) INTEGER array, dimension (8*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute a singular value. + The update process of divide and conquer failed. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + ===================================================================== + Changed dimension statement in comment describing E from (N) to + (N-1). Sven, 17 Feb 05. + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --q; + --iq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + iuplo = 0; + if (lsame_(uplo, "U")) { + iuplo = 1; + } + if (lsame_(uplo, "L")) { + iuplo = 2; + } + if (lsame_(compq, "N")) { + icompq = 0; + } else if (lsame_(compq, "P")) { + icompq = 1; + } else if (lsame_(compq, "I")) { + icompq = 2; + } else { + icompq = -1; + } + if (iuplo == 0) { + *info = -1; + } else if (icompq < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { + *info = -7; + } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SBDSDC", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + smlsiz = ilaenv_(&c__9, "SBDSDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + if (*n == 1) { + if (icompq == 1) { + q[1] = r_sign(&c_b15, &d__[1]); + q[smlsiz * *n + 1] = 1.f; + } else if (icompq == 2) { + u[u_dim1 + 1] = r_sign(&c_b15, &d__[1]); + vt[vt_dim1 + 1] = 1.f; + } + d__[1] = dabs(d__[1]); + return 0; + } + nm1 = *n - 1; + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left +*/ + + wstart = 1; + qstart = 3; + if (icompq == 1) { + scopy_(n, &d__[1], &c__1, &q[1], &c__1); + i__1 = *n - 1; + scopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); + } + if (iuplo == 2) { + qstart = 5; + wstart = (*n << 1) - 1; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (icompq == 1) { + q[i__ + (*n << 1)] = cs; + q[i__ + *n * 3] = sn; + } else if (icompq == 2) { + work[i__] = cs; + work[nm1 + i__] = -sn; + } +/* L10: */ + } + } + +/* If ICOMPQ = 0, use SLASDQ to compute the singular values. */ + + if (icompq == 0) { + slasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + wstart], info); + goto L40; + } + +/* + If N is smaller than the minimum divide size SMLSIZ, then solve + the problem with another solver. +*/ + + if (*n <= smlsiz) { + if (icompq == 2) { + slaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + slaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset] + , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + wstart], info); + } else if (icompq == 1) { + iu = 1; + ivt = iu + *n; + slaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n); + slaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n); + slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + ( + qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[ + iu + (qstart - 1) * *n], n, &work[wstart], info); + } + goto L40; + } + + if (icompq == 2) { + slaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + slaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + } + +/* Scale. */ + + orgnrm = slanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.f) { + return 0; + } + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr); + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, & + ierr); + + eps = slamch_("Epsilon"); + + mlvl = (integer) (log((real) (*n) / (real) (smlsiz + 1)) / log(2.f)) + 1; + smlszp = smlsiz + 1; + + if (icompq == 1) { + iu = 1; + ivt = smlsiz + 1; + difl = ivt + smlszp; + difr = difl + mlvl; + z__ = difr + (mlvl << 1); + ic = z__ + mlvl; + is = ic + 1; + poles = is + 1; + givnum = poles + (mlvl << 1); + + k = 1; + givptr = 2; + perm = 3; + givcol = perm + mlvl; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = d__[i__], dabs(r__1)) < eps) { + d__[i__] = r_sign(&eps, &d__[i__]); + } +/* L20: */ + } + + start = 1; + sqre = 0; + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = e[i__], dabs(r__1)) < eps || i__ == nm1) { + +/* + Subproblem found. First determine its size and then + apply divide and conquer on it. +*/ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - start + 1; + } else if ((r__1 = e[i__], dabs(r__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - start + 1; + } else { + +/* + A subproblem with E(NM1) small. This implies an + 1-by-1 subproblem at D(N). Solve this 1-by-1 problem + first. +*/ + + nsize = i__ - start + 1; + if (icompq == 2) { + u[*n + *n * u_dim1] = r_sign(&c_b15, &d__[*n]); + vt[*n + *n * vt_dim1] = 1.f; + } else if (icompq == 1) { + q[*n + (qstart - 1) * *n] = r_sign(&c_b15, &d__[*n]); + q[*n + (smlsiz + qstart - 1) * *n] = 1.f; + } + d__[*n] = (r__1 = d__[*n], dabs(r__1)); + } + if (icompq == 2) { + slasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + + start * u_dim1], ldu, &vt[start + start * vt_dim1], + ldvt, &smlsiz, &iwork[1], &work[wstart], info); + } else { + slasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[ + start], &q[start + (iu + qstart - 2) * *n], n, &q[ + start + (ivt + qstart - 2) * *n], &iq[start + k * *n], + &q[start + (difl + qstart - 2) * *n], &q[start + ( + difr + qstart - 2) * *n], &q[start + (z__ + qstart - + 2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[ + start + givptr * *n], &iq[start + givcol * *n], n, & + iq[start + perm * *n], &q[start + (givnum + qstart - + 2) * *n], &q[start + (ic + qstart - 2) * *n], &q[ + start + (is + qstart - 2) * *n], &work[wstart], & + iwork[1], info); + } + if (*info != 0) { + return 0; + } + start = i__ + 1; + } +/* L30: */ + } + +/* Unscale */ + + slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr); +L40: + +/* Use Selection Sort to minimize swaps of singular vectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + kk = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] > p) { + kk = j; + p = d__[j]; + } +/* L50: */ + } + if (kk != i__) { + d__[kk] = d__[i__]; + d__[i__] = p; + if (icompq == 1) { + iq[i__] = kk; + } else if (icompq == 2) { + sswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], & + c__1); + sswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); + } + } else if (icompq == 1) { + iq[i__] = i__; + } +/* L60: */ + } + +/* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */ + + if (icompq == 1) { + if (iuplo == 1) { + iq[*n] = 1; + } else { + iq[*n] = 0; + } + } + +/* + If B is lower bidiagonal, update U by those Givens rotations + which rotated B to be upper bidiagonal +*/ + + if (iuplo == 2 && icompq == 2) { + slasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu); + } + + return 0; + +/* End of SBDSDC */ + +} /* sbdsdc_ */ + +/* Subroutine */ int sbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real * + u, integer *ldu, real *c__, integer *ldc, real *work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + real r__1, r__2, r__3, r__4; + doublereal d__1; + + /* Local variables */ + static real f, g, h__; + static integer i__, j, m; + static real r__, cs; + static integer ll; + static real sn, mu; + static integer nm1, nm12, nm13, lll; + static real eps, sll, tol, abse; + static integer idir; + static real abss; + static integer oldm; + static real cosl; + static integer isub, iter; + static real unfl, sinl, cosr, smin, smax, sinr; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *), slas2_(real *, real *, real *, real *, + real *); + extern logical lsame_(char *, char *); + static real oldcs; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static integer oldll; + static real shift, sigmn, oldsn; + static integer maxit; + static real sminl; + extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, + integer *, real *, real *, real *, integer *); + static real sigmx; + static logical lower; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), slasq1_(integer *, real *, real *, real *, integer *), + slasv2_(real *, real *, real *, real *, real *, real *, real *, + real *, real *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static real sminoa; + extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * + ); + static real thresh; + static logical rotate; + static real tolmul; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + January 2007 + + + Purpose + ======= + + SBDSQR computes the singular values and, optionally, the right and/or + left singular vectors from the singular value decomposition (SVD) of + a real N-by-N (upper or lower) bidiagonal matrix B using the implicit + zero-shift QR algorithm. The SVD of B has the form + + B = Q * S * P**T + + where S is the diagonal matrix of singular values, Q is an orthogonal + matrix of left singular vectors, and P is an orthogonal matrix of + right singular vectors. If left singular vectors are requested, this + subroutine actually returns U*Q instead of Q, and, if right singular + vectors are requested, this subroutine returns P**T*VT instead of + P**T, for given real input matrices U and VT. When U and VT are the + orthogonal matrices that reduce a general matrix A to bidiagonal + form: A = U*B*VT, as computed by SGEBRD, then + + A = (U*Q) * S * (P**T*VT) + + is the SVD of A. Optionally, the subroutine may also compute Q**T*C + for a given real input matrix C. + + See "Computing Small Singular Values of Bidiagonal Matrices With + Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, + no. 5, pp. 873-912, Sept 1990) and + "Accurate singular values and differential qd algorithms," by + B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics + Department, University of California at Berkeley, July 1992 + for a detailed description of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': B is upper bidiagonal; + = 'L': B is lower bidiagonal. + + N (input) INTEGER + The order of the matrix B. N >= 0. + + NCVT (input) INTEGER + The number of columns of the matrix VT. NCVT >= 0. + + NRU (input) INTEGER + The number of rows of the matrix U. NRU >= 0. + + NCC (input) INTEGER + The number of columns of the matrix C. NCC >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the n diagonal elements of the bidiagonal matrix B. + On exit, if INFO=0, the singular values of B in decreasing + order. + + E (input/output) REAL array, dimension (N-1) + On entry, the N-1 offdiagonal elements of the bidiagonal + matrix B. + On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E + will contain the diagonal and superdiagonal elements of a + bidiagonal matrix orthogonally equivalent to the one given + as input. + + VT (input/output) REAL array, dimension (LDVT, NCVT) + On entry, an N-by-NCVT matrix VT. + On exit, VT is overwritten by P**T * VT. + Not referenced if NCVT = 0. + + LDVT (input) INTEGER + The leading dimension of the array VT. + LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. + + U (input/output) REAL array, dimension (LDU, N) + On entry, an NRU-by-N matrix U. + On exit, U is overwritten by U * Q. + Not referenced if NRU = 0. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= max(1,NRU). + + C (input/output) REAL array, dimension (LDC, NCC) + On entry, an N-by-NCC matrix C. + On exit, C is overwritten by Q**T * C. + Not referenced if NCC = 0. + + LDC (input) INTEGER + The leading dimension of the array C. + LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. + + WORK (workspace) REAL array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: If INFO = -i, the i-th argument had an illegal value + > 0: + if NCVT = NRU = NCC = 0, + = 1, a split was marked by a positive value in E + = 2, current block of Z not diagonalized after 30*N + iterations (in inner while loop) + = 3, termination criterion of outer while loop not met + (program created more than N unreduced blocks) + else NCVT = NRU = NCC = 0, + the algorithm did not converge; D and E contain the + elements of a bidiagonal matrix which is orthogonally + similar to the input matrix B; if INFO = i, i + elements of E have not converged to zero. + + Internal Parameters + =================== + + TOLMUL REAL, default = max(10,min(100,EPS**(-1/8))) + TOLMUL controls the convergence criterion of the QR loop. + If it is positive, TOLMUL*EPS is the desired relative + precision in the computed singular values. + If it is negative, abs(TOLMUL*EPS*sigma_max) is the + desired absolute accuracy in the computed singular + values (corresponds to relative accuracy + abs(TOLMUL*EPS) in the largest singular value. + abs(TOLMUL) should be between 1 and 1/EPS, and preferably + between 10 (for fast convergence) and .1/EPS + (for there to be some accuracy in the results). + Default is to lose at either one eighth or 2 of the + available decimal digits in each computed singular value + (whichever is smaller). + + MAXITR INTEGER, default = 6 + MAXITR controls the maximum number of passes of the + algorithm through its inner loop. The algorithms stops + (and so fails to converge) if the number of passes + through the inner loop exceeds MAXITR*N**2. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + lower = lsame_(uplo, "L"); + if (! lsame_(uplo, "U") && ! lower) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ncvt < 0) { + *info = -3; + } else if (*nru < 0) { + *info = -4; + } else if (*ncc < 0) { + *info = -5; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -9; + } else if (*ldu < max(1,*nru)) { + *info = -11; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SBDSQR", &i__1); + return 0; + } + if (*n == 0) { + return 0; + } + if (*n == 1) { + goto L160; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + +/* If no singular vectors desired, use qd algorithm */ + + if (! rotate) { + slasq1_(n, &d__[1], &e[1], &work[1], info); + return 0; + } + + nm1 = *n - 1; + nm12 = nm1 + nm1; + nm13 = nm12 + nm1; + idir = 0; + +/* Get machine constants */ + + eps = slamch_("Epsilon"); + unfl = slamch_("Safe minimum"); + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left +*/ + + if (lower) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + work[i__] = cs; + work[nm1 + i__] = sn; +/* L10: */ + } + +/* Update singular vectors if desired */ + + if (*nru > 0) { + slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], + ldu); + } + if (*ncc > 0) { + slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], + ldc); + } + } + +/* + Compute singular values to relative accuracy TOL + (By setting TOL to be negative, algorithm will compute + singular values to absolute accuracy ABS(TOL)*norm(input matrix)) + + Computing MAX + Computing MIN +*/ + d__1 = (doublereal) eps; + r__3 = 100.f, r__4 = pow_dd(&d__1, &c_b94); + r__1 = 10.f, r__2 = dmin(r__3,r__4); + tolmul = dmax(r__1,r__2); + tol = tolmul * eps; + +/* Compute approximate maximum, minimum singular values */ + + smax = 0.f; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__2 = smax, r__3 = (r__1 = d__[i__], dabs(r__1)); + smax = dmax(r__2,r__3); +/* L20: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__2 = smax, r__3 = (r__1 = e[i__], dabs(r__1)); + smax = dmax(r__2,r__3); +/* L30: */ + } + sminl = 0.f; + if (tol >= 0.f) { + +/* Relative accuracy desired */ + + sminoa = dabs(d__[1]); + if (sminoa == 0.f) { + goto L50; + } + mu = sminoa; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + mu = (r__2 = d__[i__], dabs(r__2)) * (mu / (mu + (r__1 = e[i__ - + 1], dabs(r__1)))); + sminoa = dmin(sminoa,mu); + if (sminoa == 0.f) { + goto L50; + } +/* L40: */ + } +L50: + sminoa /= sqrt((real) (*n)); +/* Computing MAX */ + r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl; + thresh = dmax(r__1,r__2); + } else { + +/* + Absolute accuracy desired + + Computing MAX +*/ + r__1 = dabs(tol) * smax, r__2 = *n * 6 * *n * unfl; + thresh = dmax(r__1,r__2); + } + +/* + Prepare for main iteration loop for the singular values + (MAXIT is the maximum number of passes through the inner + loop permitted before nonconvergence signalled.) +*/ + + maxit = *n * 6 * *n; + iter = 0; + oldll = -1; + oldm = -1; + +/* M points to last element of unconverged part of matrix */ + + m = *n; + +/* Begin main iteration loop */ + +L60: + +/* Check for convergence or exceeding iteration count */ + + if (m <= 1) { + goto L160; + } + if (iter > maxit) { + goto L200; + } + +/* Find diagonal block of matrix to work on */ + + if (tol < 0.f && (r__1 = d__[m], dabs(r__1)) <= thresh) { + d__[m] = 0.f; + } + smax = (r__1 = d__[m], dabs(r__1)); + smin = smax; + i__1 = m - 1; + for (lll = 1; lll <= i__1; ++lll) { + ll = m - lll; + abss = (r__1 = d__[ll], dabs(r__1)); + abse = (r__1 = e[ll], dabs(r__1)); + if (tol < 0.f && abss <= thresh) { + d__[ll] = 0.f; + } + if (abse <= thresh) { + goto L80; + } + smin = dmin(smin,abss); +/* Computing MAX */ + r__1 = max(smax,abss); + smax = dmax(r__1,abse); +/* L70: */ + } + ll = 0; + goto L90; +L80: + e[ll] = 0.f; + +/* Matrix splits since E(LL) = 0 */ + + if (ll == m - 1) { + +/* Convergence of bottom singular value, return to top of loop */ + + --m; + goto L60; + } +L90: + ++ll; + +/* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ + + if (ll == m - 1) { + +/* 2 by 2 block, handle separately */ + + slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, + &sinl, &cosl); + d__[m - 1] = sigmx; + e[m - 1] = 0.f; + d__[m] = sigmn; + +/* Compute singular vectors, if desired */ + + if (*ncvt > 0) { + srot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, & + cosr, &sinr); + } + if (*nru > 0) { + srot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], & + c__1, &cosl, &sinl); + } + if (*ncc > 0) { + srot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, & + cosl, &sinl); + } + m += -2; + goto L60; + } + +/* + If working on new submatrix, choose shift direction + (from larger end diagonal element towards smaller) +*/ + + if (ll > oldm || m < oldll) { + if ((r__1 = d__[ll], dabs(r__1)) >= (r__2 = d__[m], dabs(r__2))) { + +/* Chase bulge from top (big end) to bottom (small end) */ + + idir = 1; + } else { + +/* Chase bulge from bottom (big end) to top (small end) */ + + idir = 2; + } + } + +/* Apply convergence tests */ + + if (idir == 1) { + +/* + Run convergence test in forward direction + First apply standard test to bottom of matrix +*/ + + if ((r__2 = e[m - 1], dabs(r__2)) <= dabs(tol) * (r__1 = d__[m], dabs( + r__1)) || tol < 0.f && (r__3 = e[m - 1], dabs(r__3)) <= + thresh) { + e[m - 1] = 0.f; + goto L60; + } + + if (tol >= 0.f) { + +/* + If relative accuracy desired, + apply convergence criterion forward +*/ + + mu = (r__1 = d__[ll], dabs(r__1)); + sminl = mu; + i__1 = m - 1; + for (lll = ll; lll <= i__1; ++lll) { + if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) { + e[lll] = 0.f; + goto L60; + } + mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 = + e[lll], dabs(r__1)))); + sminl = dmin(sminl,mu); +/* L100: */ + } + } + + } else { + +/* + Run convergence test in backward direction + First apply standard test to top of matrix +*/ + + if ((r__2 = e[ll], dabs(r__2)) <= dabs(tol) * (r__1 = d__[ll], dabs( + r__1)) || tol < 0.f && (r__3 = e[ll], dabs(r__3)) <= thresh) { + e[ll] = 0.f; + goto L60; + } + + if (tol >= 0.f) { + +/* + If relative accuracy desired, + apply convergence criterion backward +*/ + + mu = (r__1 = d__[m], dabs(r__1)); + sminl = mu; + i__1 = ll; + for (lll = m - 1; lll >= i__1; --lll) { + if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) { + e[lll] = 0.f; + goto L60; + } + mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[ + lll], dabs(r__1)))); + sminl = dmin(sminl,mu); +/* L110: */ + } + } + } + oldll = ll; + oldm = m; + +/* + Compute shift. First, test if shifting would ruin relative + accuracy, and if so set the shift to zero. + + Computing MAX +*/ + r__1 = eps, r__2 = tol * .01f; + if (tol >= 0.f && *n * tol * (sminl / smax) <= dmax(r__1,r__2)) { + +/* Use a zero shift to avoid loss of relative accuracy */ + + shift = 0.f; + } else { + +/* Compute the shift from 2-by-2 block at end of matrix */ + + if (idir == 1) { + sll = (r__1 = d__[ll], dabs(r__1)); + slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); + } else { + sll = (r__1 = d__[m], dabs(r__1)); + slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); + } + +/* Test if shift negligible, and if so set to zero */ + + if (sll > 0.f) { +/* Computing 2nd power */ + r__1 = shift / sll; + if (r__1 * r__1 < eps) { + shift = 0.f; + } + } + } + +/* Increment iteration count */ + + iter = iter + m - ll; + +/* If SHIFT = 0, do simplified QR iteration */ + + if (shift == 0.f) { + if (idir == 1) { + +/* + Chase bulge from top to bottom + Save cosines and sines for later singular vector updates +*/ + + cs = 1.f; + oldcs = 1.f; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + r__1 = d__[i__] * cs; + slartg_(&r__1, &e[i__], &cs, &sn, &r__); + if (i__ > ll) { + e[i__ - 1] = oldsn * r__; + } + r__1 = oldcs * r__; + r__2 = d__[i__ + 1] * sn; + slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll + 1] = cs; + work[i__ - ll + 1 + nm1] = sn; + work[i__ - ll + 1 + nm12] = oldcs; + work[i__ - ll + 1 + nm13] = oldsn; +/* L120: */ + } + h__ = d__[m] * cs; + d__[m] = h__ * oldcs; + e[m - 1] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) { + e[m - 1] = 0.f; + } + + } else { + +/* + Chase bulge from bottom to top + Save cosines and sines for later singular vector updates +*/ + + cs = 1.f; + oldcs = 1.f; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + r__1 = d__[i__] * cs; + slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__); + if (i__ < m) { + e[i__] = oldsn * r__; + } + r__1 = oldcs * r__; + r__2 = d__[i__ - 1] * sn; + slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll] = cs; + work[i__ - ll + nm1] = -sn; + work[i__ - ll + nm12] = oldcs; + work[i__ - ll + nm13] = -oldsn; +/* L130: */ + } + h__ = d__[ll] * cs; + d__[ll] = h__ * oldcs; + e[ll] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((r__1 = e[ll], dabs(r__1)) <= thresh) { + e[ll] = 0.f; + } + } + } else { + +/* Use nonzero shift */ + + if (idir == 1) { + +/* + Chase bulge from top to bottom + Save cosines and sines for later singular vector updates +*/ + + f = ((r__1 = d__[ll], dabs(r__1)) - shift) * (r_sign(&c_b15, &d__[ + ll]) + shift / d__[ll]); + g = e[ll]; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + slartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ > ll) { + e[i__ - 1] = r__; + } + f = cosr * d__[i__] + sinr * e[i__]; + e[i__] = cosr * e[i__] - sinr * d__[i__]; + g = sinr * d__[i__ + 1]; + d__[i__ + 1] = cosr * d__[i__ + 1]; + slartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__] + sinl * d__[i__ + 1]; + d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; + if (i__ < m - 1) { + g = sinl * e[i__ + 1]; + e[i__ + 1] = cosl * e[i__ + 1]; + } + work[i__ - ll + 1] = cosr; + work[i__ - ll + 1 + nm1] = sinr; + work[i__ - ll + 1 + nm12] = cosl; + work[i__ - ll + 1 + nm13] = sinl; +/* L140: */ + } + e[m - 1] = f; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) { + e[m - 1] = 0.f; + } + + } else { + +/* + Chase bulge from bottom to top + Save cosines and sines for later singular vector updates +*/ + + f = ((r__1 = d__[m], dabs(r__1)) - shift) * (r_sign(&c_b15, &d__[ + m]) + shift / d__[m]); + g = e[m - 1]; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + slartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ < m) { + e[i__] = r__; + } + f = cosr * d__[i__] + sinr * e[i__ - 1]; + e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; + g = sinr * d__[i__ - 1]; + d__[i__ - 1] = cosr * d__[i__ - 1]; + slartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; + d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; + if (i__ > ll + 1) { + g = sinl * e[i__ - 2]; + e[i__ - 2] = cosl * e[i__ - 2]; + } + work[i__ - ll] = cosr; + work[i__ - ll + nm1] = -sinr; + work[i__ - ll + nm12] = cosl; + work[i__ - ll + nm13] = -sinl; +/* L150: */ + } + e[ll] = f; + +/* Test convergence */ + + if ((r__1 = e[ll], dabs(r__1)) <= thresh) { + e[ll] = 0.f; + } + +/* Update singular vectors if desired */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + } + } + +/* QR iteration finished, go back and check convergence */ + + goto L60; + +/* All singular values converged, so make them positive */ + +L160: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] < 0.f) { + d__[i__] = -d__[i__]; + +/* Change sign of singular vectors, if desired */ + + if (*ncvt > 0) { + sscal_(ncvt, &c_b151, &vt[i__ + vt_dim1], ldvt); + } + } +/* L170: */ + } + +/* + Sort the singular values into decreasing order (insertion sort on + singular values, but only one transposition per singular vector) +*/ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I) */ + + isub = 1; + smin = d__[1]; + i__2 = *n + 1 - i__; + for (j = 2; j <= i__2; ++j) { + if (d__[j] <= smin) { + isub = j; + smin = d__[j]; + } +/* L180: */ + } + if (isub != *n + 1 - i__) { + +/* Swap singular values and vectors */ + + d__[isub] = d__[*n + 1 - i__]; + d__[*n + 1 - i__] = smin; + if (*ncvt > 0) { + sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + + vt_dim1], ldvt); + } + if (*nru > 0) { + sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * + u_dim1 + 1], &c__1); + } + if (*ncc > 0) { + sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + + c_dim1], ldc); + } + } +/* L190: */ + } + goto L220; + +/* Maximum number of iterations exceeded, failure to converge */ + +L200: + *info = 0; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.f) { + ++(*info); + } +/* L210: */ + } +L220: + return 0; + +/* End of SBDSQR */ + +} /* sbdsqr_ */ + +/* Subroutine */ int sgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer + *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + static integer i__, k; + static real s; + static integer ii; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static logical leftv; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), xerbla_(char *, integer *); + static logical rightv; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGEBAK forms the right or left eigenvectors of a real general matrix + by backward transformation on the computed eigenvectors of the + balanced matrix output by SGEBAL. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the type of backward transformation required: + = 'N', do nothing, return immediately; + = 'P', do backward transformation for permutation only; + = 'S', do backward transformation for scaling only; + = 'B', do backward transformations for both permutation and + scaling. + JOB must be the same as the argument JOB supplied to SGEBAL. + + SIDE (input) CHARACTER*1 + = 'R': V contains right eigenvectors; + = 'L': V contains left eigenvectors. + + N (input) INTEGER + The number of rows of the matrix V. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + The integers ILO and IHI determined by SGEBAL. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + SCALE (input) REAL array, dimension (N) + Details of the permutation and scaling factors, as returned + by SGEBAL. + + M (input) INTEGER + The number of columns of the matrix V. M >= 0. + + V (input/output) REAL array, dimension (LDV,M) + On entry, the matrix of right or left eigenvectors to be + transformed, as returned by SHSEIN or STREVC. + On exit, V is overwritten by the transformed eigenvectors. + + LDV (input) INTEGER + The leading dimension of the array V. LDV >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Decode and Test the input parameters +*/ + + /* Parameter adjustments */ + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEBAK", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + sscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1.f / scale[i__]; + sscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + + } + +/* + Backward permutation + + For I = ILO-1 step -1 until 1, + IHI+1 step 1 until N do -- +*/ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = scale[i__]; + if (k == i__) { + goto L40; + } + sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + } + + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = scale[i__]; + if (k == i__) { + goto L50; + } + sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L50: + ; + } + } + } + + return 0; + +/* End of SGEBAK */ + +} /* sgebak_ */ + +/* Subroutine */ int sgebal_(char *job, integer *n, real *a, integer *lda, + integer *ilo, integer *ihi, real *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + static real c__, f, g; + static integer i__, j, k, l, m; + static real r__, s, ca, ra; + static integer ica, ira, iexc; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sswap_(integer *, real *, integer *, real *, integer *); + static real sfmin1, sfmin2, sfmax1, sfmax2; + extern doublereal slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer isamax_(integer *, real *, integer *); + extern logical sisnan_(real *); + static logical noconv; + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SGEBAL balances a general real matrix A. This involves, first, + permuting A by a similarity transformation to isolate eigenvalues + in the first 1 to ILO-1 and last IHI+1 to N elements on the + diagonal; and second, applying a diagonal similarity transformation + to rows and columns ILO to IHI to make the rows and columns as + close in norm as possible. Both steps are optional. + + Balancing may reduce the 1-norm of the matrix, and improve the + accuracy of the computed eigenvalues and/or eigenvectors. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the operations to be performed on A: + = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 + for i = 1,...,N; + = 'P': permute only; + = 'S': scale only; + = 'B': both permute and scale. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the input matrix A. + On exit, A is overwritten by the balanced matrix. + If JOB = 'N', A is not referenced. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + ILO (output) INTEGER + IHI (output) INTEGER + ILO and IHI are set to integers such that on exit + A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. + If JOB = 'N' or 'S', ILO = 1 and IHI = N. + + SCALE (output) REAL array, dimension (N) + Details of the permutations and scaling factors applied to + A. If P(j) is the index of the row and column interchanged + with row and column j and D(j) is the scaling factor + applied to row and column j, then + SCALE(j) = P(j) for j = 1,...,ILO-1 + = D(j) for j = ILO,...,IHI + = P(j) for j = IHI+1,...,N. + The order in which the interchanges are made is N to IHI+1, + then 1 to ILO-1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The permutations consist of row and column interchanges which put + the matrix in the form + + ( T1 X Y ) + P A P = ( 0 B Z ) + ( 0 0 T2 ) + + where T1 and T2 are upper triangular matrices whose eigenvalues lie + along the diagonal. The column indices ILO and IHI mark the starting + and ending columns of the submatrix B. Balancing consists of applying + a diagonal similarity transformation inv(D) * B * D to make the + 1-norms of each row of B and its corresponding column nearly equal. + The output matrix is + + ( T1 X*D Y ) + ( 0 inv(D)*B*D inv(D)*Z ). + ( 0 0 T2 ) + + Information about the permutations P and the diagonal matrix D is + returned in the vector SCALE. + + This subroutine is based on the EISPACK routine BALANC. + + Modified by Tzu-Yi Chen, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEBAL", &i__1); + return 0; + } + + k = 1; + l = *n; + + if (*n == 0) { + goto L210; + } + + if (lsame_(job, "N")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.f; +/* L10: */ + } + goto L210; + } + + if (lsame_(job, "S")) { + goto L120; + } + +/* Permutation to isolate eigenvalues if possible */ + + goto L50; + +/* Row and column exchange. */ + +L20: + scale[m] = (real) j; + if (j == m) { + goto L30; + } + + sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + sswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); + +L30: + switch (iexc) { + case 1: goto L40; + case 2: goto L80; + } + +/* Search for rows isolating an eigenvalue and push them down. */ + +L40: + if (l == 1) { + goto L210; + } + --l; + +L50: + for (j = l; j >= 1; --j) { + + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + if (a[j + i__ * a_dim1] != 0.f) { + goto L70; + } +L60: + ; + } + + m = l; + iexc = 1; + goto L20; +L70: + ; + } + + goto L90; + +/* Search for columns isolating an eigenvalue and push them left. */ + +L80: + ++k; + +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + if (a[i__ + j * a_dim1] != 0.f) { + goto L110; + } +L100: + ; + } + + m = k; + iexc = 2; + goto L20; +L110: + ; + } + +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.f; +/* L130: */ + } + + if (lsame_(job, "P")) { + goto L210; + } + +/* + Balance the submatrix in rows K to L. + + Iterative loop for norm reduction +*/ + + sfmin1 = slamch_("S") / slamch_("P"); + sfmax1 = 1.f / sfmin1; + sfmin2 = sfmin1 * 2.f; + sfmax2 = 1.f / sfmin2; +L140: + noconv = FALSE_; + + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + c__ = 0.f; + r__ = 0.f; + + i__2 = l; + for (j = k; j <= i__2; ++j) { + if (j == i__) { + goto L150; + } + c__ += (r__1 = a[j + i__ * a_dim1], dabs(r__1)); + r__ += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); +L150: + ; + } + ica = isamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = (r__1 = a[ica + i__ * a_dim1], dabs(r__1)); + i__2 = *n - k + 1; + ira = isamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = (r__1 = a[i__ + (ira + k - 1) * a_dim1], dabs(r__1)); + +/* Guard against zero C or R due to underflow. */ + + if (c__ == 0.f || r__ == 0.f) { + goto L200; + } + g = r__ / 2.f; + f = 1.f; + s = c__ + r__; +L160: +/* Computing MAX */ + r__1 = max(f,c__); +/* Computing MIN */ + r__2 = min(r__,g); + if (c__ >= g || dmax(r__1,ca) >= sfmax2 || dmin(r__2,ra) <= sfmin2) { + goto L170; + } + f *= 2.f; + c__ *= 2.f; + ca *= 2.f; + r__ /= 2.f; + g /= 2.f; + ra /= 2.f; + goto L160; + +L170: + g = c__ / 2.f; +L180: +/* Computing MIN */ + r__1 = min(f,c__), r__1 = min(r__1,g); + if (g < r__ || dmax(r__,ra) >= sfmax2 || dmin(r__1,ca) <= sfmin2) { + goto L190; + } + r__1 = c__ + f + ca + r__ + g + ra; + if (sisnan_(&r__1)) { + +/* Exit if NaN to avoid infinite loop */ + + *info = -3; + i__2 = -(*info); + xerbla_("SGEBAL", &i__2); + return 0; + } + f /= 2.f; + c__ /= 2.f; + g /= 2.f; + ca /= 2.f; + r__ *= 2.f; + ra *= 2.f; + goto L180; + +/* Now balance. */ + +L190: + if (c__ + r__ >= s * .95f) { + goto L200; + } + if (f < 1.f && scale[i__] < 1.f) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1.f && scale[i__] > 1.f) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1.f / f; + scale[i__] *= f; + noconv = TRUE_; + + i__2 = *n - k + 1; + sscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + sscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + +L200: + ; + } + + if (noconv) { + goto L140; + } + +L210: + *ilo = k; + *ihi = l; + + return 0; + +/* End of SGEBAL */ + +} /* sgebal_ */ + +/* Subroutine */ int sgebd2_(integer *m, integer *n, real *a, integer *lda, + real *d__, real *e, real *tauq, real *taup, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *), slarfg_(integer *, real *, real *, + integer *, real *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGEBD2 reduces a real general m by n matrix A to upper or lower + bidiagonal form B by an orthogonal transformation: Q' * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the orthogonal matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the orthogonal matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) REAL array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) REAL array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + WORK (workspace) REAL array, dimension (max(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); + u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("SGEBD2", &i__1); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + if (i__ < *n) { + i__2 = *m - i__ + 1; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, & + tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1] + ); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *n) { + +/* + Generate elementary reflector G(i) to annihilate + A(i,i+2:n) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.f; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + slarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } else { + taup[i__] = 0.f; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + if (i__ < *m) { + i__2 = *m - i__; + i__3 = *n - i__ + 1; + slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, & + taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *m) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:m,i) +*/ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Apply H(i) to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } else { + tauq[i__] = 0.f; + } +/* L20: */ + } + } + return 0; + +/* End of SGEBD2 */ + +} /* sgebd2_ */ + +/* Subroutine */ int sgebrd_(integer *m, integer *n, real *a, integer *lda, + real *d__, real *e, real *tauq, real *taup, real *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, nb, nx; + static real ws; + static integer nbmin, iinfo; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer minmn; + extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, real *, integer *), slabrd_( + integer *, integer *, integer *, real *, integer *, real *, real * + , real *, real *, real *, integer *, real *, integer *), xerbla_( + char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwrkx, ldwrky, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGEBRD reduces a general real M-by-N matrix A to upper or lower + bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the M-by-N general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the orthogonal matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the orthogonal matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) REAL array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) REAL array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,M,N). + For optimum performance LWORK >= (M+N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); + u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = max(i__1,i__2); + lwkopt = (*m + *n) * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*lwork < max(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("SGEBRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = min(*m,*n); + if (minmn == 0) { + work[1] = 1.f; + return 0; + } + + ws = (real) max(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* + Set the crossover point NX. + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (real) ((*m + *n) * nb); + if ((real) (*lwork) < ws) { + +/* + Not enough work space for the optimal NB, consider using + a smaller block size. +*/ + + nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* + Reduce rows and columns i:i+nb-1 to bidiagonal form and return + the matrices X and Y which are needed to update the unreduced + part of the matrix +*/ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + slabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* + Update the trailing submatrix A(i+nb:m,i+nb:n), using an update + of the form A := A - V*Y' - X*U' +*/ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b151, &a[ + i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], & + ldwrky, &c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b151, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + (j + 1) * a_dim1] = e[j]; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + 1 + j * a_dim1] = e[j]; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + sgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1] = ws; + return 0; + +/* End of SGEBRD */ + +} /* sgebrd_ */ + +/* Subroutine */ int sgeev_(char *jobvl, char *jobvr, integer *n, real *a, + integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr, + integer *ldvr, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + real r__1, r__2; + + /* Local variables */ + static integer i__, k; + static real r__, cs, sn; + static integer ihi; + static real scl; + static integer ilo; + static real dum[1], eps; + static integer ibal; + static char side[1]; + static real anrm; + static integer ierr, itau, iwrk, nout; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + extern doublereal snrm2_(integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + extern doublereal slapy2_(real *, real *); + extern /* Subroutine */ int slabad_(real *, real *); + static logical scalea; + static real cscale; + extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, + integer *, integer *, real *, integer *); + extern doublereal slamch_(char *), slange_(char *, integer *, + integer *, real *, integer *, real *); + extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), xerbla_(char + *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical select[1]; + static real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slartg_(real *, real *, + real *, real *, real *), sorghr_(integer *, integer *, integer *, + real *, integer *, real *, real *, integer *, integer *), shseqr_( + char *, char *, integer *, integer *, integer *, real *, integer * + , real *, real *, real *, integer *, real *, integer *, integer *), strevc_(char *, char *, logical *, integer *, + real *, integer *, real *, integer *, real *, integer *, integer * + , integer *, real *, integer *); + static integer minwrk, maxwrk; + static logical wantvl; + static real smlnum; + static integer hswork; + static logical lquery, wantvr; + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGEEV computes for an N-by-N real nonsymmetric matrix A, the + eigenvalues and, optionally, the left and/or right eigenvectors. + + The right eigenvector v(j) of A satisfies + A * v(j) = lambda(j) * v(j) + where lambda(j) is its eigenvalue. + The left eigenvector u(j) of A satisfies + u(j)**H * A = lambda(j) * u(j)**H + where u(j)**H denotes the conjugate transpose of u(j). + + The computed eigenvectors are normalized to have Euclidean norm + equal to 1 and largest component real. + + Arguments + ========= + + JOBVL (input) CHARACTER*1 + = 'N': left eigenvectors of A are not computed; + = 'V': left eigenvectors of A are computed. + + JOBVR (input) CHARACTER*1 + = 'N': right eigenvectors of A are not computed; + = 'V': right eigenvectors of A are computed. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the N-by-N matrix A. + On exit, A has been overwritten. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + WR (output) REAL array, dimension (N) + WI (output) REAL array, dimension (N) + WR and WI contain the real and imaginary parts, + respectively, of the computed eigenvalues. Complex + conjugate pairs of eigenvalues appear consecutively + with the eigenvalue having the positive imaginary part + first. + + VL (output) REAL array, dimension (LDVL,N) + If JOBVL = 'V', the left eigenvectors u(j) are stored one + after another in the columns of VL, in the same order + as their eigenvalues. + If JOBVL = 'N', VL is not referenced. + If the j-th eigenvalue is real, then u(j) = VL(:,j), + the j-th column of VL. + If the j-th and (j+1)-st eigenvalues form a complex + conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and + u(j+1) = VL(:,j) - i*VL(:,j+1). + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1; if + JOBVL = 'V', LDVL >= N. + + VR (output) REAL array, dimension (LDVR,N) + If JOBVR = 'V', the right eigenvectors v(j) are stored one + after another in the columns of VR, in the same order + as their eigenvalues. + If JOBVR = 'N', VR is not referenced. + If the j-th eigenvalue is real, then v(j) = VR(:,j), + the j-th column of VR. + If the j-th and (j+1)-st eigenvalues form a complex + conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and + v(j+1) = VR(:,j) - i*VR(:,j+1). + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1; if + JOBVR = 'V', LDVR >= N. + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,3*N), and + if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good + performance, LWORK must generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = i, the QR algorithm failed to compute all the + eigenvalues, and no eigenvectors have been computed; + elements i+1:N of WR and WI contain eigenvalues which + have converged. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -1; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -9; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -11; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV. + HSWORK refers to the workspace preferred by SHSEQR, as + calculated below. HSWORK is computed assuming ILO=1 and IHI=N, + the worst case.) +*/ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, + n, &c__0, (ftnlen)6, (ftnlen)1); + if (wantvl) { + minwrk = *n << 2; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vl[vl_offset], ldvl, &work[1], &c_n1, info); + hswork = work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * + n + hswork; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1,i__2); + } else if (wantvr) { + minwrk = *n << 2; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + hswork = work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * + n + hswork; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1,i__2); + } else { + minwrk = *n * 3; + shseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + hswork = work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * + n + hswork; + maxwrk = max(i__1,i__2); + } + maxwrk = max(maxwrk,minwrk); + } + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEEV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* + Balance the matrix + (Workspace: need N) +*/ + + ibal = 1; + sgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* + Reduce to upper Hessenberg form + (Workspace: need 3*N, prefer 2*N+N*NB) +*/ + + itau = ibal + *n; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvl) { + +/* + Want left eigenvectors + Copy Householder vectors to VL +*/ + + *(unsigned char *)side = 'L'; + slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* + Generate orthogonal matrix in VL + (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +*/ + + i__1 = *lwork - iwrk + 1; + sorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VL + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vl[vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* + Want left and right eigenvectors + Copy Schur vectors to VR +*/ + + *(unsigned char *)side = 'B'; + slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* + Want right eigenvectors + Copy Householder vectors to VR +*/ + + *(unsigned char *)side = 'R'; + slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* + Generate orthogonal matrix in VR + (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +*/ + + i__1 = *lwork - iwrk + 1; + sorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VR + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* + Compute eigenvalues only + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO > 0 from SHSEQR, then quit */ + + if (*info > 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* + Compute left and/or right eigenvectors + (Workspace: need 4*N) +*/ + + strevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr); + } + + if (wantvl) { + +/* + Undo balancing of left eigenvectors + (Workspace: need N) +*/ + + sgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, + &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.f) { + scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.f) { + r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1.f / slapy2_(&r__1, &r__2); + sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + r__1 = vl[k + i__ * vl_dim1]; +/* Computing 2nd power */ + r__2 = vl[k + (i__ + 1) * vl_dim1]; + work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2; +/* L10: */ + } + k = isamax_(n, &work[iwrk], &c__1); + slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], + &cs, &sn, &r__); + srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * + vl_dim1 + 1], &c__1, &cs, &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.f; + } +/* L20: */ + } + } + + if (wantvr) { + +/* + Undo balancing of right eigenvectors + (Workspace: need N) +*/ + + sgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, + &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.f) { + scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.f) { + r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1.f / slapy2_(&r__1, &r__2); + sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + r__1 = vr[k + i__ * vr_dim1]; +/* Computing 2nd power */ + r__2 = vr[k + (i__ + 1) * vr_dim1]; + work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2; +/* L30: */ + } + k = isamax_(n, &work[iwrk], &c__1); + slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], + &cs, &sn, &r__); + srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * + vr_dim1 + 1], &c__1, &cs, &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.f; + } +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + + 1], &i__2, &ierr); + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + + 1], &i__2, &ierr); + if (*info > 0) { + i__1 = ilo - 1; + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], + n, &ierr); + i__1 = ilo - 1; + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], + n, &ierr); + } + } + + work[1] = (real) maxwrk; + return 0; + +/* End of SGEEV */ + +} /* sgeev_ */ + +/* Subroutine */ int sgehd2_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + static real aii; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *), slarfg_(integer *, real *, real *, + integer *, real *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGEHD2 reduces a real general matrix A to upper Hessenberg form H by + an orthogonal similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to SGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= max(1,N). + + A (input/output) REAL array, dimension (LDA,N) + On entry, the n by n general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the orthogonal matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) REAL array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEHD2", &i__1); + return 0; + } + + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + +/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ + + i__2 = *ihi - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + aii = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ + + i__2 = *ihi - i__; + slarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); + +/* Apply H(i) to A(i+1:ihi,i+1:n) from the left */ + + i__2 = *ihi - i__; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); + + a[i__ + 1 + i__ * a_dim1] = aii; +/* L10: */ + } + + return 0; + +/* End of SGEHD2 */ + +} /* sgehd2_ */ + +/* Subroutine */ int sgehrd_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j; + static real t[4160] /* was [65][64] */; + static integer ib; + static real ei; + static integer nb, nh, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), strmm_(char *, char *, char *, + char *, integer *, integer *, real *, real *, integer *, real *, + integer *), saxpy_(integer *, + real *, real *, integer *, real *, integer *), sgehd2_(integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + ), slahr2_(integer *, integer *, integer *, real *, integer *, + real *, real *, integer *, real *, integer *), slarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, real *, + integer *, real *, integer *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2.1) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + -- April 2009 -- + + + Purpose + ======= + + SGEHRD reduces a real general matrix A to upper Hessenberg form H by + an orthogonal similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to SGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the N-by-N general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the orthogonal matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) REAL array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to + zero. + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + This file is a slight modification of LAPACK-3.0's DGEHRD + subroutine incorporating improvements proposed by Quintana-Orti and + Van de Geijn (2006). (See DLAHR2.) + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + lwkopt = *n * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEHRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.f; +/* L10: */ + } + i__1 = *n - 1; + for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { + tau[i__] = 0.f; +/* L20: */ + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1] = 1.f; + return 0; + } + +/* + Determine the block size + + Computing MIN +*/ + i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + nbmin = 2; + iws = 1; + if (nb > 1 && nb < nh) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code) + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code */ + + iws = *n * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code + + Computing MAX +*/ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGEHRD", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + if (*lwork >= *n * nbmin) { + nb = *lwork / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + i__ = *ilo; + + } else { + +/* Use blocked code */ + + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3,i__4); + +/* + Reduce columns i:i+ib-1 to Hessenberg form, returning the + matrices V and T of the block reflector H = I - V*T*V' + which performs the reduction, and also the matrix Y = A*V*T +*/ + + slahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & + c__65, &work[1], &ldwork); + +/* + Apply the block reflector H to A(1:ihi,i+ib:ihi) from the + right, computing A := A - Y * V'. V(i+ib,ib-1) must be set + to 1 +*/ + + ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; + a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.f; + i__3 = *ihi - i__ - ib + 1; + sgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b151, & + work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, & + c_b15, &a[(i__ + ib) * a_dim1 + 1], lda); + a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; + +/* + Apply the block reflector H to A(1:i,i+1:i+ib-1) from the + right +*/ + + i__3 = ib - 1; + strmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b15, + &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork); + i__3 = ib - 2; + for (j = 0; j <= i__3; ++j) { + saxpy_(&i__, &c_b151, &work[ldwork * j + 1], &c__1, &a[(i__ + + j + 1) * a_dim1 + 1], &c__1); +/* L30: */ + } + +/* + Apply the block reflector H to A(i+1:ihi,i+ib:n) from the + left +*/ + + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[ + i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork); +/* L40: */ + } + } + +/* Use unblocked code to reduce the rest of the matrix */ + + sgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1] = (real) iws; + + return 0; + +/* End of SGEHRD */ + +} /* sgehrd_ */ + +/* Subroutine */ int sgelq2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static real aii; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *), slarfg_(integer *, real *, real *, + integer *, real *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SGELQ2 computes an LQ factorization of a real m by n matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and below the diagonal of the array + contain the m by min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) REAL array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k) . . . H(2) H(1), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELQ2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1] + , lda, &tau[i__]); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of SGELQ2 */ + +} /* sgelq2_ */ + +/* Subroutine */ int sgelqf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer + *, real *, real *, integer *), slarfb_(char *, char *, char *, + char *, integer *, integer *, integer *, real *, integer *, real * + , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGELQF computes an LQ factorization of a real M-by-N matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and below the diagonal of the array + contain the m-by-min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k) . . . H(2) H(1), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *m * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the LQ factorization of the current block + A(i:i+ib-1,i:n) +*/ + + i__3 = *n - i__ + 1; + sgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *n - i__ + 1; + slarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + sgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (real) iws; + return 0; + +/* End of SGELQF */ + +} /* sgelqf_ */ + +/* Subroutine */ int sgelsd_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * + rank, real *work, integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer ie, il, mm; + static real eps, anrm, bnrm; + static integer itau, nlvl, iascl, ibscl; + static real sfmin; + static integer minmn, maxmn, itaup, itauq, mnthr, nwork; + extern /* Subroutine */ int slabad_(real *, real *), sgebrd_(integer *, + integer *, real *, integer *, real *, real *, real *, real *, + real *, integer *, integer *); + extern doublereal slamch_(char *), slange_(char *, integer *, + integer *, real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static real bignum; + extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), slalsd_(char *, integer + *, integer *, integer *, real *, real *, real *, integer *, real * + , integer *, real *, integer *, integer *), slascl_(char * + , integer *, integer *, real *, real *, integer *, integer *, + real *, integer *, integer *); + static integer wlalsd; + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), slacpy_(char *, integer + *, integer *, real *, integer *, real *, integer *), + slaset_(char *, integer *, integer *, real *, real *, real *, + integer *); + static integer ldwork; + extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , real *, integer *, integer *); + static integer liwork, minwrk, maxwrk; + static real smlnum; + extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + static logical lquery; + static integer smlsiz; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGELSD computes the minimum-norm solution to a real linear least + squares problem: + minimize 2-norm(| b - A*x |) + using the singular value decomposition (SVD) of A. A is an M-by-N + matrix which may be rank-deficient. + + Several right hand side vectors b and solution vectors x can be + handled in a single call; they are stored as the columns of the + M-by-NRHS right hand side matrix B and the N-by-NRHS solution + matrix X. + + The problem is solved in three steps: + (1) Reduce the coefficient matrix A to bidiagonal form with + Householder transformations, reducing the original problem + into a "bidiagonal least squares problem" (BLS) + (2) Solve the BLS using a divide and conquer approach. + (3) Apply back all the Householder tranformations to solve + the original least squares problem. + + The effective rank of A is determined by treating as zero those + singular values which are less than RCOND times the largest singular + value. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + M (input) INTEGER + The number of rows of A. M >= 0. + + N (input) INTEGER + The number of columns of A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrices B and X. NRHS >= 0. + + A (input) REAL array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, A has been destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (input/output) REAL array, dimension (LDB,NRHS) + On entry, the M-by-NRHS right hand side matrix B. + On exit, B is overwritten by the N-by-NRHS solution + matrix X. If m >= n and RANK = n, the residual + sum-of-squares for the solution in the i-th column is given + by the sum of squares of elements n+1:m in that column. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,max(M,N)). + + S (output) REAL array, dimension (min(M,N)) + The singular values of A in decreasing order. + The condition number of A in the 2-norm = S(1)/S(min(m,n)). + + RCOND (input) REAL + RCOND is used to determine the effective rank of A. + Singular values S(i) <= RCOND*S(1) are treated as zero. + If RCOND < 0, machine precision is used instead. + + RANK (output) INTEGER + The effective rank of A, i.e., the number of singular values + which are greater than RCOND*S(1). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK must be at least 1. + The exact minimum amount of workspace needed depends on M, + N and NRHS. As long as LWORK is at least + 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, + if M is greater than or equal to N or + 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, + if M is less than N, the code will execute correctly. + SMLSIZ is returned by ILAENV and is equal to the maximum + size of the subproblems at the bottom of the computation + tree (usually about 25), and + NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) + For good performance, LWORK should generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the array WORK and the + minimum size of the array IWORK, and returns these values as + the first entries of the WORK and IWORK arrays, and no error + message related to LWORK is issued by XERBLA. + + IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) + LIWORK >= max(1, 3*MINMN*NLVL + 11*MINMN), + where MINMN = MIN( M,N ). + On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: the algorithm for computing the SVD failed to converge; + if INFO = i, i off-diagonal elements of an intermediate + bidiagonal form did not converge to zero. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input arguments. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + maxmn = max(*m,*n); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,maxmn)) { + *info = -7; + } + +/* + Compute workspace. + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV.) +*/ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + liwork = 1; + if (minmn > 0) { + smlsiz = ilaenv_(&c__9, "SGELSD", " ", &c__0, &c__0, &c__0, &c__0, + (ftnlen)6, (ftnlen)1); + mnthr = ilaenv_(&c__6, "SGELSD", " ", m, n, nrhs, &c_n1, (ftnlen) + 6, (ftnlen)1); +/* Computing MAX */ + i__1 = (integer) (log((real) minmn / (real) (smlsiz + 1)) / log( + 2.f)) + 1; + nlvl = max(i__1,0); + liwork = minmn * 3 * nlvl + minmn * 11; + mm = *m; + if (*m >= *n && *m >= mnthr) { + +/* + Path 1a - overdetermined, with many more rows than + columns. +*/ + + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SGEQRF", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "SORMQR", + "LT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); + } + if (*m >= *n) { + +/* + Path 1 - overdetermined or exactly determined. + + Computing MAX +*/ + i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, + "SGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "SORMBR" + , "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, + "SORMBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * + *nrhs + i__1 * i__1; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + wlalsd; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, + i__2), i__2 = *n * 3 + wlalsd; + minwrk = max(i__1,i__2); + } + if (*n > *m) { +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * + *nrhs + i__1 * i__1; + if (*n >= mnthr) { + +/* + Path 2a - underdetermined, with many more columns + than rows. +*/ + + maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * + ilaenv_(&c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * + ilaenv_(&c__1, "SORMBR", "QLT", m, nrhs, m, &c_n1, + (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * + ilaenv_(&c__1, "SORMBR", "PLN", m, nrhs, m, &c_n1, + (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "SORMLQ" + , "LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd; + maxwrk = max(i__1,i__2); +/* + XXX: Ensure the Path 2a case below is triggered. The workspace + calculation should use queries for all routines eventually. + Computing MAX + Computing MAX +*/ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), + i__3 = max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3,i__4) + ; + maxwrk = max(i__1,i__2); + } else { + +/* Path 2 - remaining underdetermined cases. */ + + maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "SGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, + "SORMBR", "QLT", m, nrhs, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORM" + "BR", "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen) + 3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + wlalsd; + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1, + i__2), i__2 = *m * 3 + wlalsd; + minwrk = max(i__1,i__2); + } + } + minwrk = min(minwrk,maxwrk); + work[1] = (real) maxwrk; + iwork[1] = liwork; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELSD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters. */ + + eps = slamch_("P"); + sfmin = slamch_("S"); + smlnum = sfmin / eps; + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ + + anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0.f && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.f) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + slaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[b_offset], ldb); + slaset_("F", &minmn, &c__1, &c_b29, &c_b29, &s[1], &c__1); + *rank = 0; + goto L10; + } + +/* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ + + bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0.f && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* If M < N make sure certain entries of B are zero. */ + + if (*m < *n) { + i__1 = *n - *m; + slaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[*m + 1 + b_dim1], ldb); + } + +/* Overdetermined case. */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R. + (Workspace: need 2*N, prefer N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + +/* + Multiply B by transpose(Q). + (Workspace: need N+NRHS, prefer N+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below R. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + slaset_("L", &i__1, &i__2, &c_b29, &c_b29, &a[a_dim1 + 2], + lda); + } + } + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A. + (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of R. + (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + slalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, + rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of R. */ + + i__1 = *lwork - nwork + 1; + sormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & + b[b_offset], ldb, &work[nwork], &i__1, info); + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( + i__1,*nrhs), i__2 = *n - *m * 3, i__1 = max(i__1,i__2); + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,wlalsd)) { + +/* + Path 2a - underdetermined, with many more columns than rows + and sufficient workspace for an efficient algorithm. +*/ + + ldwork = *m; +/* + Computing MAX + Computing MAX +*/ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + + *m + *m * *nrhs, i__1 = max(i__1,i__2), i__2 = (*m << 2) + + *m * *lda + wlalsd; + if (*lwork >= max(i__1,i__2)) { + ldwork = *lda; + } + itau = 1; + nwork = *m + 1; + +/* + Compute A=L*Q. + (Workspace: need 2*M, prefer M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + il = nwork; + +/* Copy L to WORK(IL), zeroing out above its diagonal. */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); + i__1 = *m - 1; + i__2 = *m - 1; + slaset_("U", &i__1, &i__2, &c_b29, &c_b29, &work[il + ldwork], & + ldwork); + ie = il + ldwork * *m; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL). + (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of L. + (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + slalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of L. */ + + i__1 = *lwork - nwork + 1; + sormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ + itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below first M rows of B. */ + + i__1 = *n - *m; + slaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[*m + 1 + b_dim1], + ldb); + nwork = itau + *m; + +/* + Multiply transpose(Q) by B. + (Workspace: need M+NRHS, prefer M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + + } else { + +/* Path 2 - remaining underdetermined cases. */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A. + (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors. + (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + slalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of A. */ + + i__1 = *lwork - nwork + 1; + sormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + + } + } + +/* Undo scaling. */ + + if (iascl == 1) { + slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } else if (iascl == 2) { + slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (ibscl == 1) { + slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L10: + work[1] = (real) maxwrk; + iwork[1] = liwork; + return 0; + +/* End of SGELSD */ + +} /* sgelsd_ */ + +/* Subroutine */ int sgeqr2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static real aii; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *), slarfg_(integer *, real *, real *, + integer *, real *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SGEQR2 computes a QR factorization of a real m by n matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(m,n) by n upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQR2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] + , &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of SGEQR2 */ + +} /* sgeqr2_ */ + +/* Subroutine */ int sgeqrf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer + *, real *, real *, integer *), slarfb_(char *, char *, char *, + char *, integer *, integer *, integer *, real *, integer *, real * + , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGEQRF computes a QR factorization of a real M-by-N matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(M,N)-by-N upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of min(m,n) elementary reflectors (see Further + Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the QR factorization of the current block + A(i:m,i:i+ib-1) +*/ + + i__3 = *m - i__ + 1; + sgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *m - i__ + 1; + slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + sgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (real) iws; + return 0; + +/* End of SGEQRF */ + +} /* sgeqrf_ */ + +/* Subroutine */ int sgesdd_(char *jobz, integer *m, integer *n, real *a, + integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, + real *work, integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Local variables */ + static integer i__, ie, il, ir, iu, blk; + static real dum[1], eps; + static integer ivt, iscl; + static real anrm; + static integer idum[1], ierr, itau; + extern logical lsame_(char *, char *); + static integer chunk; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer minmn, wrkbl, itaup, itauq, mnthr; + static logical wntqa; + static integer nwork; + static logical wntqn, wntqo, wntqs; + static integer bdspac; + extern /* Subroutine */ int sbdsdc_(char *, char *, integer *, real *, + real *, real *, integer *, real *, integer *, real *, integer *, + real *, integer *, integer *), sgebrd_(integer *, + integer *, real *, integer *, real *, real *, real *, real *, + real *, integer *, integer *); + extern doublereal slamch_(char *), slange_(char *, integer *, + integer *, real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static real bignum; + extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), slascl_(char *, integer + *, integer *, real *, real *, integer *, integer *, real *, + integer *, integer *), sgeqrf_(integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), slacpy_(char + *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *), sorgbr_(char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, integer * + ); + static integer ldwrkl; + extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , real *, integer *, integer *); + static integer ldwrkr, minwrk, ldwrku, maxwrk; + extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + static integer ldwkvt; + static real smlnum; + static logical wntqas; + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + static logical lquery; + + +/* + -- LAPACK driver routine (version 3.2.1) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + March 2009 + + + Purpose + ======= + + SGESDD computes the singular value decomposition (SVD) of a real + M-by-N matrix A, optionally computing the left and right singular + vectors. If singular vectors are desired, it uses a + divide-and-conquer algorithm. + + The SVD is written + + A = U * SIGMA * transpose(V) + + where SIGMA is an M-by-N matrix which is zero except for its + min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and + V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA + are the singular values of A; they are real and non-negative, and + are returned in descending order. The first min(m,n) columns of + U and V are the left and right singular vectors of A. + + Note that the routine returns VT = V**T, not V. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + Specifies options for computing all or part of the matrix U: + = 'A': all M columns of U and all N rows of V**T are + returned in the arrays U and VT; + = 'S': the first min(M,N) columns of U and the first + min(M,N) rows of V**T are returned in the arrays U + and VT; + = 'O': If M >= N, the first N columns of U are overwritten + on the array A and all rows of V**T are returned in + the array VT; + otherwise, all columns of U are returned in the + array U and the first M rows of V**T are overwritten + in the array A; + = 'N': no columns of U or rows of V**T are computed. + + M (input) INTEGER + The number of rows of the input matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the input matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, + if JOBZ = 'O', A is overwritten with the first N columns + of U (the left singular vectors, stored + columnwise) if M >= N; + A is overwritten with the first M rows + of V**T (the right singular vectors, stored + rowwise) otherwise. + if JOBZ .ne. 'O', the contents of A are destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + S (output) REAL array, dimension (min(M,N)) + The singular values of A, sorted so that S(i) >= S(i+1). + + U (output) REAL array, dimension (LDU,UCOL) + UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; + UCOL = min(M,N) if JOBZ = 'S'. + If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M + orthogonal matrix U; + if JOBZ = 'S', U contains the first min(M,N) columns of U + (the left singular vectors, stored columnwise); + if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1; if + JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. + + VT (output) REAL array, dimension (LDVT,N) + If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the + N-by-N orthogonal matrix V**T; + if JOBZ = 'S', VT contains the first min(M,N) rows of + V**T (the right singular vectors, stored rowwise); + if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1; if + JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; + if JOBZ = 'S', LDVT >= min(M,N). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK; + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + If JOBZ = 'N', + LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)). + If JOBZ = 'O', + LWORK >= 3*min(M,N) + + max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)). + If JOBZ = 'S' or 'A' + LWORK >= 3*min(M,N) + + max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)). + For good performance, LWORK should generally be larger. + If LWORK = -1 but other input arguments are legal, WORK(1) + returns the optimal LWORK. + + IWORK (workspace) INTEGER array, dimension (8*min(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: SBDSDC did not converge, updating process failed. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + wntqa = lsame_(jobz, "A"); + wntqs = lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, "O"); + wntqn = lsame_(jobz, "N"); + lquery = *lwork == -1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV.) +*/ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (*m >= *n && minmn > 0) { + +/* Compute space needed for SBDSDC */ + + mnthr = (integer) (minmn * 11.f / 6.f); + if (wntqn) { + bdspac = *n * 7; + } else { + bdspac = *n * 3 * *n + (*n << 2); + } + if (*m >= mnthr) { + if (wntqn) { + +/* Path 1 (M much larger than N, JOBZ='N') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n; + maxwrk = max(i__1,i__2); + minwrk = bdspac + *n; + } else if (wntqo) { + +/* Path 2 (M much larger than N, JOBZ='O') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + (*n << 1) * *n; + minwrk = bdspac + (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M much larger than N, JOBZ='S') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M much larger than N, JOBZ='A') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "SORGQR", + " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } + } else { + +/* Path 5 (M at least N, but not much larger) */ + + wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wntqn) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } else if (wntqo) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *m, i__2 = *n * *n + bdspac; + minwrk = *n * 3 + max(i__1,i__2); + } else if (wntqs) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } else if (wntqa) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } + } + } else if (minmn > 0) { + +/* Compute space needed for SBDSDC */ + + mnthr = (integer) (minmn * 11.f / 6.f); + if (wntqn) { + bdspac = *m * 7; + } else { + bdspac = *m * 3 * *m + (*m << 2); + } + if (*n >= mnthr) { + if (wntqn) { + +/* Path 1t (N much larger than M, JOBZ='N') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m; + maxwrk = max(i__1,i__2); + minwrk = bdspac + *m; + } else if (wntqo) { + +/* Path 2t (N much larger than M, JOBZ='O') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + (*m << 1) * *m; + minwrk = bdspac + (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N much larger than M, JOBZ='S') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N much larger than M, JOBZ='A') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "SORGLQ", + " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } + } else { + +/* Path 5t (N greater than M, but not much larger) */ + + wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wntqn) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } else if (wntqo) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *n, i__2 = *m * *m + bdspac; + minwrk = *m * 3 + max(i__1,i__2); + } else if (wntqs) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } else if (wntqa) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } + } + } + maxwrk = max(maxwrk,minwrk); + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGESDD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = sqrt(slamch_("S")) / eps; + bignum = 1.f / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0.f && anrm < smlnum) { + iscl = 1; + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* + A has at least as many rows as columns. If A has sufficiently + more rows than columns, first reduce using the QR + decomposition (if sufficient workspace available) +*/ + + if (*m >= mnthr) { + + if (wntqn) { + +/* + Path 1 (M much larger than N, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need 2*N, prefer N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + slaset_("L", &i__1, &i__2, &c_b29, &c_b29, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (Workspace: need 4*N, prefer 3*N+2*N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *n; + +/* + Perform bidiagonal SVD, computing singular values only + (Workspace: need N+BDSPAC) +*/ + + sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2 (M much larger than N, JOBZ = 'O') + N left singular vectors to be overwritten on A and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is LDWRKR by N */ + + if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { + ldwrkr = *lda; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + slaset_("L", &i__1, &i__2, &c_b29, &c_b29, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in VT, copying result to WORK(IR) + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* WORK(IU) is N by N */ + + iu = nwork; + nwork = iu + *n * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite WORK(IU) by left singular vectors of R + and VT by right singular vectors of R + (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], n, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IU), storing result in WORK(IR) and copying to A + (Workspace: need 2*N*N, prefer N*N+M*N) +*/ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + sgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ + a_dim1], + lda, &work[iu], n, &c_b29, &work[ir], &ldwrkr); + slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* + Path 3 (M much larger than N, JOBZ='S') + N left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + slaset_("L", &i__2, &i__1, &c_b29, &c_b29, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagoal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of R and VT + by right singular vectors of R + (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IR), storing result in U + (Workspace: need N*N) +*/ + + slacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + sgemm_("N", "N", m, n, n, &c_b15, &a[a_offset], lda, &work[ir] + , &ldwrkr, &c_b29, &u[u_offset], ldu); + + } else if (wntqa) { + +/* + Path 4 (M much larger than N, JOBZ='A') + M left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* + Compute A=Q*R, copying result to U + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Generate Q in U + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + i__2 = *lwork - nwork + 1; + sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out other entries */ + + i__2 = *n - 1; + i__1 = *n - 1; + slaset_("L", &i__2, &i__1, &c_b29, &c_b29, &a[a_dim1 + 2], + lda); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite WORK(IU) by left singular vectors of R and VT + by right singular vectors of R + (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in U by left singular vectors of R in + WORK(IU), storing result in A + (Workspace: need N*N) +*/ + + sgemm_("N", "N", m, n, n, &c_b15, &u[u_offset], ldu, &work[iu] + , &ldwrku, &c_b29, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else { + +/* + M .LT. MNTHR + + Path 5 (M at least N, but not much larger) + Reduce to bidiagonal form without QR decomposition +*/ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Perform bidiagonal SVD, only computing singular values + (Workspace: need N+BDSPAC) +*/ + + sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + nwork = iu + ldwrku * *n; + slaset_("F", m, n, &c_b29, &c_b29, &work[iu], &ldwrku); + } else { + +/* WORK( IU ) is N by N */ + + ldwrku = *n; + nwork = iu + ldwrku * *n; + +/* WORK(IR) is LDWRKR by N */ + + ir = nwork; + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, & + vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[ + 1], info); + +/* + Overwrite VT by right singular vectors of A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* + Overwrite WORK(IU) by left singular vectors of A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* Copy left singular vectors of A from WORK(IU) to A */ + + slacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by left singular vectors of + bidiagonal matrix in WORK(IU), storing result in + WORK(IR) and copying to A + (Workspace: need 2*N*N, prefer N*N+M*N) +*/ + + i__2 = *m; + i__1 = ldwrkr; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + sgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ + + a_dim1], lda, &work[iu], &ldwrku, &c_b29, & + work[ir], &ldwrkr); + slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L20: */ + } + } + + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + slaset_("F", m, n, &c_b29, &c_b29, &u[u_offset], ldu); + sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 3*N, prefer 2*N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + slaset_("F", m, m, &c_b29, &c_b29, &u[u_offset], ldu); + sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + if (*m > *n) { + i__1 = *m - *n; + i__2 = *m - *n; + slaset_("F", &i__1, &i__2, &c_b29, &c_b15, &u[*n + 1 + (* + n + 1) * u_dim1], ldu); + } + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } else { + +/* + A has more columns than rows. If A has sufficiently more + columns than rows, first reduce using the LQ decomposition (if + sufficient workspace available) +*/ + + if (*n >= mnthr) { + + if (wntqn) { + +/* + Path 1t (N much larger than M, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need 2*M, prefer M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out above L */ + + i__1 = *m - 1; + i__2 = *m - 1; + slaset_("U", &i__1, &i__2, &c_b29, &c_b29, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (Workspace: need 4*M, prefer 3*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *m; + +/* + Perform bidiagonal SVD, computing singular values only + (Workspace: need M+BDSPAC) +*/ + + sbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2t (N much larger than M, JOBZ='O') + M right singular vectors to be overwritten on A and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* IVT is M by M */ + + il = ivt + *m * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { + +/* WORK(IL) is M by N */ + + ldwrkl = *m; + chunk = *n; + } else { + ldwrkl = *m; + chunk = (*lwork - *m * *m) / *m; + } + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + slaset_("U", &i__1, &i__2, &c_b29, &c_b29, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U, and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M+M*M+BDSPAC) +*/ + + sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], m, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of L and WORK(IVT) + by right singular vectors of L + (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], m, &work[nwork], &i__1, &ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by Q + in A, storing result in WORK(IL) and copying to A + (Workspace: need 2*M*M, prefer M*M+M*N) +*/ + + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + sgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], m, &a[ + i__ * a_dim1 + 1], lda, &c_b29, &work[il], & + ldwrkl); + slacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L30: */ + } + + } else if (wntqs) { + +/* + Path 3t (N much larger than M, JOBZ='S') + M right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + slaset_("U", &i__2, &i__1, &c_b29, &c_b29, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IU), copying result to U + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of L and VT + by right singular vectors of L + (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IL) by + Q in A, storing result in VT + (Workspace: need M*M) +*/ + + slacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + sgemm_("N", "N", m, n, m, &c_b15, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b29, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* + Path 4t (N much larger than M, JOBZ='A') + N right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* + Compute A=L*Q, copying result to VT + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Generate Q in VT + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__2, &ierr); + +/* Produce L in A, zeroing out other entries */ + + i__2 = *m - 1; + i__1 = *m - 1; + slaset_("U", &i__2, &i__1, &c_b29, &c_b29, &a[(a_dim1 << 1) + + 1], lda); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M+M*M+BDSPAC) +*/ + + sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* + Overwrite U by left singular vectors of L and WORK(IVT) + by right singular vectors of L + (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by + Q in VT, storing result in A + (Workspace: need M*M) +*/ + + sgemm_("N", "N", m, n, m, &c_b15, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b29, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else { + +/* + N .LT. MNTHR + + Path 5t (N greater than M, but not much larger) + Reduce to bidiagonal form without LQ decomposition +*/ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Perform bidiagonal SVD, only computing singular values + (Workspace: need M+BDSPAC) +*/ + + sbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* WORK( IVT ) is M by N */ + + slaset_("F", m, n, &c_b29, &c_b29, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; + } else { + +/* WORK( IVT ) is M by M */ + + nwork = ivt + ldwkvt * *m; + il = nwork; + +/* WORK(IL) is M by CHUNK */ + + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M*M+BDSPAC) +*/ + + sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* + Overwrite U by left singular vectors of A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* + Overwrite WORK(IVT) by left singular vectors of A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + +/* Copy right singular vectors of A from WORK(IVT) to A */ + + slacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* + Generate P**T in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by right singular vectors of + bidiagonal matrix in WORK(IVT), storing result in + WORK(IL) and copying to A + (Workspace: need 2*M*M, prefer M*M+M*N) +*/ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + sgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], & + ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b29, & + work[il], m); + slacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + } + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + slaset_("F", m, n, &c_b29, &c_b29, &vt[vt_offset], ldvt); + sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 3*M, prefer 2*M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + slaset_("F", n, n, &c_b29, &c_b29, &vt[vt_offset], ldvt); + sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of VT to identity matrix */ + + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + slaset_("F", &i__1, &i__2, &c_b29, &c_b15, &vt[*m + 1 + (* + m + 1) * vt_dim1], ldvt); + } + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 2*M+N, prefer 2*M+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (anrm < smlnum) { + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (real) maxwrk; + + return 0; + +/* End of SGESDD */ + +} /* sgesdd_ */ + +/* Subroutine */ int sgesv_(integer *n, integer *nrhs, real *a, integer *lda, + integer *ipiv, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int xerbla_(char *, integer *), sgetrf_( + integer *, integer *, real *, integer *, integer *, integer *), + sgetrs_(char *, integer *, integer *, real *, integer *, integer * + , real *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGESV computes the solution to a real system of linear equations + A * X = B, + where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + + The LU decomposition with partial pivoting and row interchanges is + used to factor A as + A = P * L * U, + where P is a permutation matrix, L is unit lower triangular, and U is + upper triangular. The factored form of A is then used to solve the + system of equations A * X = B. + + Arguments + ========= + + N (input) INTEGER + The number of linear equations, i.e., the order of the + matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the N-by-N coefficient matrix A. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (output) INTEGER array, dimension (N) + The pivot indices that define the permutation matrix P; + row i of the matrix was interchanged with row IPIV(i). + + B (input/output) REAL array, dimension (LDB,NRHS) + On entry, the N-by-NRHS matrix of right hand side matrix B. + On exit, if INFO = 0, the N-by-NRHS solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, so the solution could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGESV ", &i__1); + return 0; + } + +/* Compute the LU factorization of A. */ + + sgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + sgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of SGESV */ + +} /* sgesv_ */ + +/* Subroutine */ int sgetf2_(integer *m, integer *n, real *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + + /* Local variables */ + static integer i__, j, jp; + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *), sscal_(integer * + , real *, real *, integer *); + static real sfmin; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer isamax_(integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGETF2 computes an LU factorization of a general m-by-n matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 2 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the m by n matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, U(k,k) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Compute machine safe minimum */ + + sfmin = slamch_("S"); + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + isamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + if (a[jp + j * a_dim1] != 0.f) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + sswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + if ((r__1 = a[j + j * a_dim1], dabs(r__1)) >= sfmin) { + i__2 = *m - j; + r__1 = 1.f / a[j + j * a_dim1]; + sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); + } else { + i__2 = *m - j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[j + i__ + j * a_dim1] /= a[j + j * a_dim1]; +/* L20: */ + } + } + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < min(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + sger_(&i__2, &i__3, &c_b151, &a[j + 1 + j * a_dim1], &c__1, &a[j + + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], + lda); + } +/* L10: */ + } + return 0; + +/* End of SGETF2 */ + +} /* sgetf2_ */ + +/* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + static integer i__, j, jb, nb, iinfo; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), strsm_(char *, char *, char *, + char *, integer *, integer *, real *, real *, integer *, real *, + integer *), sgetf2_(integer *, + integer *, real *, integer *, integer *, integer *), xerbla_(char + *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer + *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGETRF computes an LU factorization of a general M-by-N matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 3 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the M-by-N matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "SGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + sgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* + Factor diagonal and subdiagonal blocks and test for exact + singularity. +*/ + + i__3 = *m - j + 1; + sgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + slaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + slaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + strsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b15, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + sgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &c_b151, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b15, &a[j + jb + (j + jb) + * a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of SGETRF */ + +} /* sgetrf_ */ + +/* Subroutine */ int sgetrs_(char *trans, integer *n, integer *nrhs, real *a, + integer *lda, integer *ipiv, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *); + static logical notran; + extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer + *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SGETRS solves a system of linear equations + A * X = B or A' * X = B + with a general N-by-N matrix A using the LU factorization computed + by SGETRF. + + Arguments + ========= + + TRANS (input) CHARACTER*1 + Specifies the form of the system of equations: + = 'N': A * X = B (No transpose) + = 'T': A'* X = B (Transpose) + = 'C': A'* X = B (Conjugate transpose = Transpose) + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) REAL array, dimension (LDA,N) + The factors L and U from the factorization A = P*L*U + as computed by SGETRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (input) INTEGER array, dimension (N) + The pivot indices from SGETRF; for 1<=i<=N, row i of the + matrix was interchanged with row IPIV(i). + + B (input/output) REAL array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* + Solve A * X = B. + + Apply row interchanges to the right hand sides. +*/ + + slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + strsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A' * X = B. + + Solve U'*X = B, overwriting B with X. +*/ + + strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + strsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of SGETRS */ + +} /* sgetrs_ */ + +/* Subroutine */ int shseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__, + integer *ldz, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3; + real r__1; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static real hl[2401] /* was [49][49] */; + static integer kbot, nmin; + extern logical lsame_(char *, char *); + static logical initz; + static real workl[49]; + static logical wantt, wantz; + extern /* Subroutine */ int slaqr0_(logical *, logical *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , integer *, real *, integer *, real *, integer *, integer *), + xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slahqr_(logical *, logical *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , integer *, real *, integer *, integer *), slacpy_(char *, + integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *); + static logical lquery; + + +/* + -- LAPACK computational routine (version 3.2.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + June 2010 + + Purpose + ======= + + SHSEQR computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**T, where T is an upper quasi-triangular matrix (the + Schur form), and Z is the orthogonal matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input orthogonal + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + Arguments + ========= + + JOB (input) CHARACTER*1 + = 'E': compute eigenvalues only; + = 'S': compute eigenvalues and the Schur form T. + + COMPZ (input) CHARACTER*1 + = 'N': no Schur vectors are computed; + = 'I': Z is initialized to the unit matrix and the matrix Z + of Schur vectors of H is returned; + = 'V': Z must contain an orthogonal matrix Q on entry, and + the product Q*Z is returned. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to SGEBAL, and then passed to SGEHRD + when the matrix output by SGEBAL is reduced to Hessenberg + form. Otherwise ILO and IHI should be set to 1 and N + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) REAL array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and JOB = 'S', then H contains the + upper quasi-triangular matrix T from the Schur decomposition + (the Schur form); 2-by-2 diagonal blocks (corresponding to + complex conjugate pairs of eigenvalues) are returned in + standard form, with H(i,i) = H(i+1,i+1) and + H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the + contents of H are unspecified on exit. (The output value of + H when INFO.GT.0 is given under the description of INFO + below.) + + Unlike earlier versions of SHSEQR, this subroutine may + explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 + or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + WR (output) REAL array, dimension (N) + WI (output) REAL array, dimension (N) + The real and imaginary parts, respectively, of the computed + eigenvalues. If two eigenvalues are computed as a complex + conjugate pair, they are stored in consecutive elements of + WR and WI, say the i-th and (i+1)th, with WI(i) .GT. 0 and + WI(i+1) .LT. 0. If JOB = 'S', the eigenvalues are stored in + the same order as on the diagonal of the Schur form returned + in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 + diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and + WI(i+1) = -WI(i). + + Z (input/output) REAL array, dimension (LDZ,N) + If COMPZ = 'N', Z is not referenced. + If COMPZ = 'I', on entry Z need not be set and on exit, + if INFO = 0, Z contains the orthogonal matrix Z of the Schur + vectors of H. If COMPZ = 'V', on entry Z must contain an + N-by-N matrix Q, which is assumed to be equal to the unit + matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, + if INFO = 0, Z contains Q*Z. + Normally Q is the orthogonal matrix generated by SORGHR + after the call to SGEHRD which formed the Hessenberg matrix + H. (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if COMPZ = 'I' or + COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient and delivers very good and sometimes + optimal performance. However, LWORK as large as 11*N + may be required for optimal performance. A workspace + query is recommended to determine the optimal workspace + size. + + If LWORK = -1, then SHSEQR does a workspace query. + In this case, SHSEQR checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .LT. 0: if INFO = -i, the i-th argument had an illegal + value + .GT. 0: if INFO = i, SHSEQR failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and JOB = 'E', then on exit, the + remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and JOB = 'S', then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is an orthogonal matrix. The final + value of H is upper Hessenberg and quasi-triangular + in rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and COMPZ = 'V', then on exit + + (final value of Z) = (initial value of Z)*U + + where U is the orthogonal matrix in (*) (regard- + less of the value of JOB.) + + If INFO .GT. 0 and COMPZ = 'I', then on exit + (final value of Z) = U + where U is the orthogonal matrix in (*) (regard- + less of the value of JOB.) + + If INFO .GT. 0 and COMPZ = 'N', then Z is not + accessed. + + ================================================================ + Default values supplied by + ILAENV(ISPEC,'SHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). + It is suggested that these defaults be adjusted in order + to attain best performance in each particular + computational environment. + + ISPEC=12: The SLAHQR vs SLAQR0 crossover point. + Default: 75. (Must be at least 11.) + + ISPEC=13: Recommended deflation window size. + This depends on ILO, IHI and NS. NS is the + number of simultaneous shifts returned + by ILAENV(ISPEC=15). (See ISPEC=15 below.) + The default for (IHI-ILO+1).LE.500 is NS. + The default for (IHI-ILO+1).GT.500 is 3*NS/2. + + ISPEC=14: Nibble crossover point. (See IPARMQ for + details.) Default: 14% of deflation window + size. + + ISPEC=15: Number of simultaneous shifts in a multishift + QR iteration. + + If IHI-ILO+1 is ... + + greater than ...but less ... the + or equal to ... than default is + + 1 30 NS = 2(+) + 30 60 NS = 4(+) + 60 150 NS = 10(+) + 150 590 NS = ** + 590 3000 NS = 64 + 3000 6000 NS = 128 + 6000 infinity NS = 256 + + (+) By default some or all matrices of this order + are passed to the implicit double shift routine + SLAHQR and this parameter is ignored. See + ISPEC=12 above and comments in IPARMQ for + details. + + (**) The asterisks (**) indicate an ad-hoc + function of N increasing from 10 to 64. + + ISPEC=16: Select structured matrix multiply. + If the number of simultaneous shifts (specified + by ISPEC=15) is less than 14, then the default + for ISPEC=16 is 0. Otherwise the default for + ISPEC=16 is 2. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . SLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== NL allocates some local workspace to help small matrices + . through a rare SLAHQR failure. NL .GT. NTINY = 11 is + . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom- + . mended. (The default value of NMIN is 75.) Using NL = 49 + . allows up to six simultaneous shifts and a 16-by-16 + . deflation window. ==== + + ==== Decode and check the input parameters. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantt = lsame_(job, "S"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + work[1] = (real) max(1,*n); + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(job, "E") && ! wantt) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1,*n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { + *info = -11; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -13; + } + + if (*info != 0) { + +/* ==== Quick return in case of invalid argument. ==== */ + + i__1 = -(*info); + xerbla_("SHSEQR", &i__1); + return 0; + + } else if (*n == 0) { + +/* ==== Quick return in case N = 0; nothing to do. ==== */ + + return 0; + + } else if (lquery) { + +/* ==== Quick return in case of a workspace query ==== */ + + slaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[ + 1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info); +/* + ==== Ensure reported workspace size is backward-compatible with + . previous LAPACK versions. ==== + Computing MAX +*/ + r__1 = (real) max(1,*n); + work[1] = dmax(r__1,work[1]); + return 0; + + } else { + +/* ==== copy eigenvalues isolated by SGEBAL ==== */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.f; +/* L10: */ + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.f; +/* L20: */ + } + +/* ==== Initialize Z, if requested ==== */ + + if (initz) { + slaset_("A", n, n, &c_b29, &c_b15, &z__[z_offset], ldz) + ; + } + +/* ==== Quick return if possible ==== */ + + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.f; + return 0; + } + +/* + ==== SLAHQR/SLAQR0 crossover point ==== + + Writing concatenation +*/ + i__2[0] = 1, a__1[0] = job; + i__2[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nmin = ilaenv_(&c__12, "SHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nmin = max(11,nmin); + +/* ==== SLAQR0 for big matrices; SLAHQR for small ones ==== */ + + if (*n > nmin) { + slaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], + &wi[1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, + info); + } else { + +/* ==== Small matrix ==== */ + + slahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], + &wi[1], ilo, ihi, &z__[z_offset], ldz, info); + + if (*info > 0) { + +/* + ==== A rare SLAHQR failure! SLAQR0 sometimes succeeds + . when SLAHQR fails. ==== +*/ + + kbot = *info; + + if (*n >= 49) { + +/* + ==== Larger matrices have enough subdiagonal scratch + . space to call SLAQR0 directly. ==== +*/ + + slaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], + ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset], + ldz, &work[1], lwork, info); + + } else { + +/* + ==== Tiny matrices don't have enough subdiagonal + . scratch space to benefit from SLAQR0. Hence, + . tiny matrices must be copied into a larger + . array before calling SLAQR0. ==== +*/ + + slacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49); + hl[*n + 1 + *n * 49 - 50] = 0.f; + i__1 = 49 - *n; + slaset_("A", &c__49, &i__1, &c_b29, &c_b29, &hl[(*n + 1) * + 49 - 49], &c__49); + slaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, & + wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz, + workl, &c__49, info); + if (wantt || *info != 0) { + slacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh); + } + } + } + } + +/* ==== Clear out the trash, if necessary. ==== */ + + if ((wantt || *info != 0) && *n > 2) { + i__1 = *n - 2; + i__3 = *n - 2; + slaset_("L", &i__1, &i__3, &c_b29, &c_b29, &h__[h_dim1 + 3], ldh); + } + +/* + ==== Ensure reported workspace size is backward-compatible with + . previous LAPACK versions. ==== + + Computing MAX +*/ + r__1 = (real) max(1,*n); + work[1] = dmax(r__1,work[1]); + } + +/* ==== End of SHSEQR ==== */ + + return 0; +} /* shseqr_ */ + +logical sisnan_(real *sin__) +{ + /* System generated locals */ + logical ret_val; + + /* Local variables */ + extern logical slaisnan_(real *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SISNAN returns .TRUE. if its argument is NaN, and .FALSE. + otherwise. To be replaced by the Fortran 2003 intrinsic in the + future. + + Arguments + ========= + + SIN (input) REAL + Input to test for NaN. + + ===================================================================== +*/ + + ret_val = slaisnan_(sin__, sin__); + return ret_val; +} /* sisnan_ */ + +/* Subroutine */ int slabad_(real *small, real *large) +{ + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLABAD takes as input the values computed by SLAMCH for underflow and + overflow, and returns the square root of each of these values if the + log of LARGE is sufficiently large. This subroutine is intended to + identify machines with a large exponent range, such as the Crays, and + redefine the underflow and overflow limits to be the square roots of + the values computed by SLAMCH. This subroutine is needed because + SLAMCH does not compensate for poor arithmetic in the upper half of + the exponent range, as is found on a Cray. + + Arguments + ========= + + SMALL (input/output) REAL + On entry, the underflow threshold as computed by SLAMCH. + On exit, if LOG10(LARGE) is sufficiently large, the square + root of SMALL, otherwise unchanged. + + LARGE (input/output) REAL + On entry, the overflow threshold as computed by SLAMCH. + On exit, if LOG10(LARGE) is sufficiently large, the square + root of LARGE, otherwise unchanged. + + ===================================================================== + + + If it looks like we're on a Cray, take the square root of + SMALL and LARGE to avoid overflow and underflow problems. +*/ + + if (r_lg10(large) > 2e3f) { + *small = sqrt(*small); + *large = sqrt(*large); + } + + return 0; + +/* End of SLABAD */ + +} /* slabad_ */ + +/* Subroutine */ int slabrd_(integer *m, integer *n, integer *nb, real *a, + integer *lda, real *d__, real *e, real *tauq, real *taup, real *x, + integer *ldx, real *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *), slarfg_( + integer *, real *, real *, integer *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLABRD reduces the first NB rows and columns of a real general + m by n matrix A to upper or lower bidiagonal form by an orthogonal + transformation Q' * A * P, and returns the matrices X and Y which + are needed to apply the transformation to the unreduced part of A. + + If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + bidiagonal form. + + This is an auxiliary routine called by SGEBRD + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. + + N (input) INTEGER + The number of columns in the matrix A. + + NB (input) INTEGER + The number of leading rows and columns of A to be reduced. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, the first NB rows and columns of the matrix are + overwritten; the rest of the array is unchanged. + If m >= n, elements on and below the diagonal in the first NB + columns, with the array TAUQ, represent the orthogonal + matrix Q as a product of elementary reflectors; and + elements above the diagonal in the first NB rows, with the + array TAUP, represent the orthogonal matrix P as a product + of elementary reflectors. + If m < n, elements below the diagonal in the first NB + columns, with the array TAUQ, represent the orthogonal + matrix Q as a product of elementary reflectors, and + elements on and above the diagonal in the first NB rows, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (NB) + The diagonal elements of the first NB rows and columns of + the reduced matrix. D(i) = A(i,i). + + E (output) REAL array, dimension (NB) + The off-diagonal elements of the first NB rows and columns of + the reduced matrix. + + TAUQ (output) REAL array dimension (NB) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) REAL array, dimension (NB) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + X (output) REAL array, dimension (LDX,NB) + The m-by-nb matrix X required to update the unreduced part + of A. + + LDX (input) INTEGER + The leading dimension of the array X. LDX >= M. + + Y (output) REAL array, dimension (LDY,NB) + The n-by-nb matrix Y required to update the unreduced part + of A. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= N. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors. + + If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in + A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The elements of the vectors v and u together form the m-by-nb matrix + V and the nb-by-n matrix U' which are needed, with X and Y, to apply + the transformation to the unreduced part of the matrix, using a block + update of the form: A := A - V*Y' - X*U'. + + The contents of A on exit are illustrated by the following examples + with nb = 2: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) + ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) + ( v1 v2 a a a ) ( v1 1 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix which is unchanged, + vi denotes an element of the vector defining H(i), and ui an element + of the vector defining G(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:m,i) */ + + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1], + lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + x_dim1], + ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[i__ + i__ * + a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.f; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + (i__ + 1) * + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + a_dim1], + lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &x[i__ + x_dim1], + ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + +/* Update A(i,i+1:n) */ + + i__2 = *n - i__; + sgemv_("No transpose", &i__2, &i__, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b15, &a[ + i__ + (i__ + 1) * a_dim1], lda); + +/* Generate reflection P(i) to annihilate A(i,i+2:n) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.f; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + sgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + ( + i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], + lda, &c_b29, &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + sgemv_("Transpose", &i__2, &i__, &c_b15, &y[i__ + 1 + y_dim1], + ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b29, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + sgemv_("No transpose", &i__2, &i__, &c_b151, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("No transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b29, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i,i:n) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + y_dim1], + ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1] + , lda); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + sgemv_("Transpose", &i__2, &i__3, &c_b151, &a[i__ * a_dim1 + 1], + lda, &x[i__ + x_dim1], ldx, &c_b15, &a[i__ + i__ * a_dim1] + , lda); + +/* Generate reflection P(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.f; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; + sgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + i__ + * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, & + x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &y[i__ + y_dim1], + ldy, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[i__ * + x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + sgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ * a_dim1 + + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + +/* Update A(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = *m - i__; + sgemv_("No transpose", &i__2, &i__, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + (i__ + + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, + &c_b29, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[ + i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + sgemv_("Transpose", &i__2, &i__, &c_b15, &x[i__ + 1 + x_dim1], + ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[ + i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + sgemv_("Transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + } +/* L20: */ + } + } + return 0; + +/* End of SLABRD */ + +} /* slabrd_ */ + +/* Subroutine */ int slacpy_(char *uplo, integer *m, integer *n, real *a, + integer *lda, real *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLACPY copies all or part of a two-dimensional matrix A to another + matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) REAL array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper triangle + or trapezoid is accessed; if UPLO = 'L', only the lower + triangle or trapezoid is accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) REAL array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L50: */ + } +/* L60: */ + } + } + return 0; + +/* End of SLACPY */ + +} /* slacpy_ */ + +/* Subroutine */ int sladiv_(real *a, real *b, real *c__, real *d__, real *p, + real *q) +{ + static real e, f; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLADIV performs complex division in real arithmetic + + a + i*b + p + i*q = --------- + c + i*d + + The algorithm is due to Robert L. Smith and can be found + in D. Knuth, The art of Computer Programming, Vol.2, p.195 + + Arguments + ========= + + A (input) REAL + B (input) REAL + C (input) REAL + D (input) REAL + The scalars a, b, c, and d in the above expression. + + P (output) REAL + Q (output) REAL + The scalars p and q in the above expression. + + ===================================================================== +*/ + + + if (dabs(*d__) < dabs(*c__)) { + e = *d__ / *c__; + f = *c__ + *d__ * e; + *p = (*a + *b * e) / f; + *q = (*b - *a * e) / f; + } else { + e = *c__ / *d__; + f = *d__ + *c__ * e; + *p = (*b + *a * e) / f; + *q = (-(*a) + *b * e) / f; + } + + return 0; + +/* End of SLADIV */ + +} /* sladiv_ */ + +/* Subroutine */ int slae2_(real *a, real *b, real *c__, real *rt1, real *rt2) +{ + /* System generated locals */ + real r__1; + + /* Local variables */ + static real ab, df, tb, sm, rt, adf, acmn, acmx; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix + [ A B ] + [ B C ]. + On return, RT1 is the eigenvalue of larger absolute value, and RT2 + is the eigenvalue of smaller absolute value. + + Arguments + ========= + + A (input) REAL + The (1,1) element of the 2-by-2 matrix. + + B (input) REAL + The (1,2) and (2,1) elements of the 2-by-2 matrix. + + C (input) REAL + The (2,2) element of the 2-by-2 matrix. + + RT1 (output) REAL + The eigenvalue of larger absolute value. + + RT2 (output) REAL + The eigenvalue of smaller absolute value. + + Further Details + =============== + + RT1 is accurate to a few ulps barring over/underflow. + + RT2 may be inaccurate if there is massive cancellation in the + determinant A*C-B*B; higher precision or correctly rounded or + correctly truncated arithmetic would be needed to compute RT2 + accurately in all cases. + + Overflow is possible only if RT1 is within a factor of 5 of overflow. + Underflow is harmless if the input data is 0 or exceeds + underflow_threshold / macheps. + + ===================================================================== + + + Compute the eigenvalues +*/ + + sm = *a + *c__; + df = *a - *c__; + adf = dabs(df); + tb = *b + *b; + ab = dabs(tb); + if (dabs(*a) > dabs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + r__1 = ab / adf; + rt = adf * sqrt(r__1 * r__1 + 1.f); + } else if (adf < ab) { +/* Computing 2nd power */ + r__1 = adf / ab; + rt = ab * sqrt(r__1 * r__1 + 1.f); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.f); + } + if (sm < 0.f) { + *rt1 = (sm - rt) * .5f; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.f) { + *rt1 = (sm + rt) * .5f; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5f; + *rt2 = rt * -.5f; + } + return 0; + +/* End of SLAE2 */ + +} /* slae2_ */ + +/* Subroutine */ int slaed0_(integer *icompq, integer *qsiz, integer *n, real + *d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs, + real *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2; + static real temp; + static integer curr; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer iperm, indxq, iwrem; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + static integer iqptr, tlvls; + extern /* Subroutine */ int slaed1_(integer *, real *, real *, integer *, + integer *, real *, integer *, real *, integer *, integer *), + slaed7_(integer *, integer *, integer *, integer *, integer *, + integer *, real *, real *, integer *, integer *, real *, integer * + , real *, integer *, integer *, integer *, integer *, integer *, + real *, real *, integer *, integer *); + static integer igivcl; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer igivnm, submat; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *); + static integer curprb, subpbs, igivpt, curlvl, matsiz, iprmpt, smlsiz; + extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, + real *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAED0 computes all eigenvalues and corresponding eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + = 2: Compute eigenvalues and eigenvectors of tridiagonal + matrix. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the main diagonal of the tridiagonal matrix. + On exit, its eigenvalues. + + E (input) REAL array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Q (input/output) REAL array, dimension (LDQ, N) + On entry, Q must contain an N-by-N orthogonal matrix. + If ICOMPQ = 0 Q is not referenced. + If ICOMPQ = 1 On entry, Q is a subset of the columns of the + orthogonal matrix used to reduce the full + matrix to tridiagonal form corresponding to + the subset of the full matrix which is being + decomposed at this time. + If ICOMPQ = 2 On entry, Q will be the identity matrix. + On exit, Q contains the eigenvectors of the + tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. If eigenvectors are + desired, then LDQ >= max(1,N). In any case, LDQ >= 1. + + QSTORE (workspace) REAL array, dimension (LDQS, N) + Referenced only when ICOMPQ = 1. Used to store parts of + the eigenvector matrix when the updating matrix multiplies + take place. + + LDQS (input) INTEGER + The leading dimension of the array QSTORE. If ICOMPQ = 1, + then LDQS >= max(1,N). In any case, LDQS >= 1. + + WORK (workspace) REAL array, + If ICOMPQ = 0 or 1, the dimension of WORK must be at least + 1 + 3*N + 2*N*lg N + 2*N**2 + ( lg( N ) = smallest integer k + such that 2^k >= N ) + If ICOMPQ = 2, the dimension of WORK must be at least + 4*N + N**2. + + IWORK (workspace) INTEGER array, + If ICOMPQ = 0 or 1, the dimension of IWORK must be at least + 6 + 6*N + 5*N*lg N. + ( lg( N ) = smallest integer k + such that 2^k >= N ) + If ICOMPQ = 2, the dimension of IWORK must be at least + 3 + 5*N. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + qstore_dim1 = *ldqs; + qstore_offset = 1 + qstore_dim1; + qstore -= qstore_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 2) { + *info = -1; + } else if (*icompq == 1 && *qsiz < max(0,*n)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*ldqs < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED0", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + smlsiz = ilaenv_(&c__9, "SLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + Determine the size and placement of the submatrices, and save in + the leading elements of IWORK. +*/ + + iwork[1] = *n; + subpbs = 1; + tlvls = 0; +L10: + if (iwork[subpbs] > smlsiz) { + for (j = subpbs; j >= 1; --j) { + iwork[j * 2] = (iwork[j] + 1) / 2; + iwork[(j << 1) - 1] = iwork[j] / 2; +/* L20: */ + } + ++tlvls; + subpbs <<= 1; + goto L10; + } + i__1 = subpbs; + for (j = 2; j <= i__1; ++j) { + iwork[j] += iwork[j - 1]; +/* L30: */ + } + +/* + Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 + using rank-1 modifications (cuts). +*/ + + spm1 = subpbs - 1; + i__1 = spm1; + for (i__ = 1; i__ <= i__1; ++i__) { + submat = iwork[i__] + 1; + smm1 = submat - 1; + d__[smm1] -= (r__1 = e[smm1], dabs(r__1)); + d__[submat] -= (r__1 = e[smm1], dabs(r__1)); +/* L40: */ + } + + indxq = (*n << 2) + 3; + if (*icompq != 2) { + +/* + Set up workspaces for eigenvalues only/accumulate new vectors + routine +*/ + + temp = log((real) (*n)) / log(2.f); + lgn = (integer) temp; + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; + iperm = iprmpt + *n * lgn; + iqptr = iperm + *n * lgn; + igivpt = iqptr + *n + 2; + igivcl = igivpt + *n * lgn; + + igivnm = 1; + iq = igivnm + (*n << 1) * lgn; +/* Computing 2nd power */ + i__1 = *n; + iwrem = iq + i__1 * i__1 + 1; + +/* Initialize pointers */ + + i__1 = subpbs; + for (i__ = 0; i__ <= i__1; ++i__) { + iwork[iprmpt + i__] = 1; + iwork[igivpt + i__] = 1; +/* L50: */ + } + iwork[iqptr] = 1; + } + +/* + Solve each submatrix eigenproblem at the bottom of the divide and + conquer tree. +*/ + + curr = 0; + i__1 = spm1; + for (i__ = 0; i__ <= i__1; ++i__) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[1]; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 1] - iwork[i__]; + } + if (*icompq == 2) { + ssteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat + + submat * q_dim1], ldq, &work[1], info); + if (*info != 0) { + goto L130; + } + } else { + ssteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + + iwork[iqptr + curr]], &matsiz, &work[1], info); + if (*info != 0) { + goto L130; + } + if (*icompq == 1) { + sgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b15, &q[submat * + q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]], + &matsiz, &c_b29, &qstore[submat * qstore_dim1 + 1], + ldqs); + } +/* Computing 2nd power */ + i__2 = matsiz; + iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; + ++curr; + } + k = 1; + i__2 = iwork[i__ + 1]; + for (j = submat; j <= i__2; ++j) { + iwork[indxq + j] = k; + ++k; +/* L60: */ + } +/* L70: */ + } + +/* + Successively merge eigensystems of adjacent submatrices + into eigensystem for the corresponding larger matrix. + + while ( SUBPBS > 1 ) +*/ + + curlvl = 1; +L80: + if (subpbs > 1) { + spm2 = subpbs - 2; + i__1 = spm2; + for (i__ = 0; i__ <= i__1; i__ += 2) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[2]; + msd2 = iwork[1]; + curprb = 0; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 2] - iwork[i__]; + msd2 = matsiz / 2; + ++curprb; + } + +/* + Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) + into an eigensystem of size MATSIZ. + SLAED1 is used only for the full eigensystem of a tridiagonal + matrix. + SLAED7 handles the cases in which eigenvalues only or eigenvalues + and eigenvectors of a full symmetric matrix (which was reduced to + tridiagonal form) are desired. +*/ + + if (*icompq == 2) { + slaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], + ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], & + msd2, &work[1], &iwork[subpbs + 1], info); + } else { + slaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[ + submat], &qstore[submat * qstore_dim1 + 1], ldqs, & + iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, & + work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm] + , &iwork[igivpt], &iwork[igivcl], &work[igivnm], & + work[iwrem], &iwork[subpbs + 1], info); + } + if (*info != 0) { + goto L130; + } + iwork[i__ / 2 + 1] = iwork[i__ + 2]; +/* L90: */ + } + subpbs /= 2; + ++curlvl; + goto L80; + } + +/* + end while + + Re-merge the eigenvalues/vectors which were deflated at the final + merge step. +*/ + + if (*icompq == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + scopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + + 1], &c__1); +/* L100: */ + } + scopy_(n, &work[1], &c__1, &d__[1], &c__1); + } else if (*icompq == 2) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + scopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1); +/* L110: */ + } + scopy_(n, &work[1], &c__1, &d__[1], &c__1); + slacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; +/* L120: */ + } + scopy_(n, &work[1], &c__1, &d__[1], &c__1); + } + goto L140; + +L130: + *info = submat * (*n + 1) + submat + matsiz - 1; + +L140: + return 0; + +/* End of SLAED0 */ + +} /* slaed0_ */ + +/* Subroutine */ int slaed1_(integer *n, real *d__, real *q, integer *ldq, + integer *indxq, real *rho, integer *cutpnt, real *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Local variables */ + static integer i__, k, n1, n2, is, iw, iz, iq2, cpp1, indx, indxc, indxp; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), slaed2_(integer *, integer *, integer *, real *, real + *, integer *, integer *, real *, real *, real *, real *, real *, + integer *, integer *, integer *, integer *, integer *), slaed3_( + integer *, integer *, integer *, real *, real *, integer *, real * + , real *, real *, integer *, integer *, real *, real *, integer *) + ; + static integer idlmda; + extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( + integer *, integer *, real *, integer *, integer *, integer *); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAED1 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and eigenvectors of a tridiagonal matrix. SLAED7 handles + the case in which eigenvalues only or eigenvalues and eigenvectors + of a full symmetric matrix (which was reduced to tridiagonal form) + are desired. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine SLAED2. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine SLAED4 (as called by SLAED3). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) REAL array, dimension (LDQ,N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input/output) INTEGER array, dimension (N) + On entry, the permutation which separately sorts the two + subproblems in D into ascending order. + On exit, the permutation which will reintegrate the + subproblems back into sorted order, + i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. + + RHO (input) REAL + The subdiagonal entry used to create the rank-1 modification. + + CUTPNT (input) INTEGER + The location of the last eigenvalue in the leading sub-matrix. + min(1,N) <= CUTPNT <= N/2. + + WORK (workspace) REAL array, dimension (4*N + N**2) + + IWORK (workspace) INTEGER array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } else if (*ldq < max(1,*n)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) { + *info = -7; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED1", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are integer pointers which indicate + the portion of the workspace + used by a particular array in SLAED2 and SLAED3. +*/ + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + scopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1); + cpp1 = *cutpnt + 1; + i__1 = *n - *cutpnt; + scopy_(&i__1, &q[cpp1 + cpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1); + +/* Deflate eigenvalues. */ + + slaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[ + iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[ + indxc], &iwork[indxp], &iwork[coltyp], info); + + if (*info != 0) { + goto L20; + } + +/* Solve Secular Equation. */ + + if (k != 0) { + is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + + 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2; + slaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], + &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[ + is], info); + if (*info != 0) { + goto L20; + } + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L10: */ + } + } + +L20: + return 0; + +/* End of SLAED1 */ + +} /* slaed1_ */ + +/* Subroutine */ int slaed2_(integer *k, integer *n, integer *n1, real *d__, + real *q, integer *ldq, integer *indxq, real *rho, real *z__, real * + dlamda, real *w, real *q2, integer *indx, integer *indxc, integer * + indxp, integer *coltyp, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + static real c__; + static integer i__, j; + static real s, t; + static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1; + static real eps, tau, tol; + static integer psm[4], imax, jmax, ctot[4]; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *), sscal_(integer *, real *, real *, + integer *), scopy_(integer *, real *, integer *, real *, integer * + ); + extern doublereal slapy2_(real *, real *), slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer + *, integer *, integer *), slacpy_(char *, integer *, integer *, + real *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAED2 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny entry in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + K (output) INTEGER + The number of non-deflated eigenvalues, and the order of the + related secular equation. 0 <= K <=N. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + N1 (input) INTEGER + The location of the last eigenvalue in the leading sub-matrix. + min(1,N) <= N1 <= N/2. + + D (input/output) REAL array, dimension (N) + On entry, D contains the eigenvalues of the two submatrices to + be combined. + On exit, D contains the trailing (N-K) updated eigenvalues + (those which were deflated) sorted into increasing order. + + Q (input/output) REAL array, dimension (LDQ, N) + On entry, Q contains the eigenvectors of two submatrices in + the two square blocks with corners at (1,1), (N1,N1) + and (N1+1, N1+1), (N,N). + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input/output) INTEGER array, dimension (N) + The permutation which separately sorts the two sub-problems + in D into ascending order. Note that elements in the second + half of this permutation must first have N1 added to their + values. Destroyed on exit. + + RHO (input/output) REAL + On entry, the off-diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. + On exit, RHO has been modified to the value required by + SLAED3. + + Z (input) REAL array, dimension (N) + On entry, Z contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). + On exit, the contents of Z have been destroyed by the updating + process. + + DLAMDA (output) REAL array, dimension (N) + A copy of the first K eigenvalues which will be used by + SLAED3 to form the secular equation. + + W (output) REAL array, dimension (N) + The first k values of the final deflation-altered z-vector + which will be passed to SLAED3. + + Q2 (output) REAL array, dimension (N1**2+(N-N1)**2) + A copy of the first K eigenvectors which will be used by + SLAED3 in a matrix multiply (SGEMM) to solve for the new + eigenvectors. + + INDX (workspace) INTEGER array, dimension (N) + The permutation used to sort the contents of DLAMDA into + ascending order. + + INDXC (output) INTEGER array, dimension (N) + The permutation used to arrange the columns of the deflated + Q matrix into three groups: the first group contains non-zero + elements only at and above N1, the second contains + non-zero elements only below N1, and the third is dense. + + INDXP (workspace) INTEGER array, dimension (N) + The permutation used to place deflated values of D at the end + of the array. INDXP(1:K) points to the nondeflated D-values + and INDXP(K+1:N) points to the deflated eigenvalues. + + COLTYP (workspace/output) INTEGER array, dimension (N) + During execution, a label which will indicate which of the + following types a column in the Q2 matrix is: + 1 : non-zero in the upper half only; + 2 : dense; + 3 : non-zero in the lower half only; + 4 : deflated. + On exit, COLTYP(i) is the number of columns of type i, + for i=1 to 4 only. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + --w; + --q2; + --indx; + --indxc; + --indxp; + --coltyp; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *n1 || *n / 2 < *n1) { + *info = -3; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n2 = *n - *n1; + n1p1 = *n1 + 1; + + if (*rho < 0.f) { + sscal_(&n2, &c_b151, &z__[n1p1], &c__1); + } + +/* + Normalize z so that norm(z) = 1. Since z is the concatenation of + two normalized vectors, norm2(z) = sqrt(2). +*/ + + t = 1.f / sqrt(2.f); + sscal_(n, &t, &z__[1], &c__1); + +/* RHO = ABS( norm(z)**2 * RHO ) */ + + *rho = (r__1 = *rho * 2.f, dabs(r__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + indxq[i__] += *n1; +/* L10: */ + } + +/* re-integrate the deflated parts from the last pass */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; +/* L20: */ + } + slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indx[i__] = indxq[indxc[i__]]; +/* L30: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = isamax_(n, &z__[1], &c__1); + jmax = isamax_(n, &d__[1], &c__1); + eps = slamch_("Epsilon"); +/* Computing MAX */ + r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs( + r__2)); + tol = eps * 8.f * dmax(r__3,r__4); + +/* + If the rank-1 modifier is small enough, no more needs to be done + except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { + *k = 0; + iq2 = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__ = indx[j]; + scopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + dlamda[j] = d__[i__]; + iq2 += *n; +/* L40: */ + } + slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq); + scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1); + goto L190; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + i__1 = *n1; + for (i__ = 1; i__ <= i__1; ++i__) { + coltyp[i__] = 1; +/* L50: */ + } + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + coltyp[i__] = 3; +/* L60: */ + } + + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + nj = indx[j]; + if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + if (j == *n) { + goto L100; + } + } else { + pj = nj; + goto L80; + } +/* L70: */ + } +L80: + ++j; + nj = indx[j]; + if (j > *n) { + goto L100; + } + if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[pj]; + c__ = z__[nj]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = slapy2_(&c__, &s); + t = d__[nj] - d__[pj]; + c__ /= tau; + s = -s / tau; + if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { + +/* Deflation is possible. */ + + z__[nj] = tau; + z__[pj] = 0.f; + if (coltyp[nj] != coltyp[pj]) { + coltyp[nj] = 2; + } + coltyp[pj] = 4; + srot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, & + c__, &s); +/* Computing 2nd power */ + r__1 = c__; +/* Computing 2nd power */ + r__2 = s; + t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2); +/* Computing 2nd power */ + r__1 = s; +/* Computing 2nd power */ + r__2 = c__; + d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2); + d__[pj] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[pj] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = pj; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = pj; + } + } else { + indxp[k2 + i__ - 1] = pj; + } + pj = nj; + } else { + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + pj = nj; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + +/* + Count up the total number of the various types of columns, then + form a permutation which positions the four column types into + four uniform groups (although one or more of these groups may be + empty). +*/ + + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; +/* L110: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; +/* L120: */ + } + +/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ + + psm[0] = 1; + psm[1] = ctot[0] + 1; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + *k = *n - ctot[3]; + +/* + Fill out the INDXC array so that the permutation which it induces + will place all type-1 columns first, all type-2 columns next, + then all type-3's, and finally all type-4's. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + js = indxp[j]; + ct = coltyp[js]; + indx[psm[ct - 1]] = js; + indxc[psm[ct - 1]] = j; + ++psm[ct - 1]; +/* L130: */ + } + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + i__ = 1; + iq1 = 1; + iq2 = (ctot[0] + ctot[1]) * *n1 + 1; + i__1 = ctot[0]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; +/* L140: */ + } + + i__1 = ctot[1]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; + iq2 += n2; +/* L150: */ + } + + i__1 = ctot[2]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq2 += n2; +/* L160: */ + } + + iq1 = iq2; + i__1 = ctot[3]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + scopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + iq2 += *n; + z__[i__] = d__[js]; + ++i__; +/* L170: */ + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + slacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq); + i__1 = *n - *k; + scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1); + +/* Copy CTOT into COLTYP for referencing in SLAED3. */ + + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; +/* L180: */ + } + +L190: + return 0; + +/* End of SLAED2 */ + +} /* slaed2_ */ + +/* Subroutine */ int slaed3_(integer *k, integer *n, integer *n1, real *d__, + real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer * + indx, integer *ctot, real *w, real *s, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static integer i__, j, n2, n12, ii, n23, iq2; + static real temp; + extern doublereal snrm2_(integer *, real *, integer *); + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), scopy_(integer *, real *, + integer *, real *, integer *), slaed4_(integer *, integer *, real + *, real *, real *, real *, real *, integer *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *), slacpy_( + char *, integer *, integer *, real *, integer *, real *, integer * + ), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAED3 finds the roots of the secular equation, as defined by the + values in D, W, and RHO, between 1 and K. It makes the + appropriate calls to SLAED4 and then updates the eigenvectors by + multiplying the matrix of eigenvectors of the pair of eigensystems + being combined by the matrix of eigenvectors of the K-by-K system + which is solved here. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + K (input) INTEGER + The number of terms in the rational function to be solved by + SLAED4. K >= 0. + + N (input) INTEGER + The number of rows and columns in the Q matrix. + N >= K (deflation may result in N>K). + + N1 (input) INTEGER + The location of the last eigenvalue in the leading submatrix. + min(1,N) <= N1 <= N/2. + + D (output) REAL array, dimension (N) + D(I) contains the updated eigenvalues for + 1 <= I <= K. + + Q (output) REAL array, dimension (LDQ,N) + Initially the first K columns are used as workspace. + On output the columns 1 to K contain + the updated eigenvectors. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + RHO (input) REAL + The value of the parameter in the rank one update equation. + RHO >= 0 required. + + DLAMDA (input/output) REAL array, dimension (K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. May be changed on output by + having lowest order bit set to zero on Cray X-MP, Cray Y-MP, + Cray-2, or Cray C-90, as described above. + + Q2 (input) REAL array, dimension (LDQ2, N) + The first K columns of this matrix contain the non-deflated + eigenvectors for the split problem. + + INDX (input) INTEGER array, dimension (N) + The permutation used to arrange the columns of the deflated + Q matrix into three groups (see SLAED2). + The rows of the eigenvectors found by SLAED4 must be likewise + permuted before the matrix multiply can take place. + + CTOT (input) INTEGER array, dimension (4) + A count of the total number of the various types of columns + in Q, as described in INDX. The fourth column type is any + column which has been deflated. + + W (input/output) REAL array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating vector. Destroyed on + output. + + S (workspace) REAL array, dimension (N1 + 1)*K + Will contain the eigenvectors of the repaired matrix which + will be multiplied by the previously accumulated eigenvectors + to update the system. + + LDS (input) INTEGER + The leading dimension of S. LDS >= max(1,K). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --q2; + --indx; + --ctot; + --w; + --s; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*n < *k) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED3", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* + Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), + which on any of these machines zeros out the bottommost + bit of DLAMDA(I) if it is 1; this makes the subsequent + subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DLAMDA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DLAMDA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1) { + goto L110; + } + if (*k == 2) { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + w[1] = q[j * q_dim1 + 1]; + w[2] = q[j * q_dim1 + 2]; + ii = indx[1]; + q[j * q_dim1 + 1] = w[ii]; + ii = indx[2]; + q[j * q_dim1 + 2] = w[ii]; +/* L30: */ + } + goto L110; + } + +/* Compute updated W. */ + + scopy_(k, &w[1], &c__1, &s[1], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L40: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } +/* L60: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = sqrt(-w[i__]); + w[i__] = r_sign(&r__1, &s[i__]); +/* L70: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__] = w[i__] / q[i__ + j * q_dim1]; +/* L80: */ + } + temp = snrm2_(k, &s[1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + ii = indx[i__]; + q[i__ + j * q_dim1] = s[ii] / temp; +/* L90: */ + } +/* L100: */ + } + +/* Compute the updated eigenvectors. */ + +L110: + + n2 = *n - *n1; + n12 = ctot[1] + ctot[2]; + n23 = ctot[2] + ctot[3]; + + slacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23); + iq2 = *n1 * n12 + 1; + if (n23 != 0) { + sgemm_("N", "N", &n2, k, &n23, &c_b15, &q2[iq2], &n2, &s[1], &n23, & + c_b29, &q[*n1 + 1 + q_dim1], ldq); + } else { + slaset_("A", &n2, k, &c_b29, &c_b29, &q[*n1 + 1 + q_dim1], ldq); + } + + slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); + if (n12 != 0) { + sgemm_("N", "N", n1, k, &n12, &c_b15, &q2[1], n1, &s[1], &n12, &c_b29, + &q[q_offset], ldq); + } else { + slaset_("A", n1, k, &c_b29, &c_b29, &q[q_dim1 + 1], ldq); + } + + +L120: + return 0; + +/* End of SLAED3 */ + +} /* slaed3_ */ + +/* Subroutine */ int slaed4_(integer *n, integer *i__, real *d__, real *z__, + real *delta, real *rho, real *dlam, integer *info) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Local variables */ + static real a, b, c__; + static integer j; + static real w; + static integer ii; + static real dw, zz[3]; + static integer ip1; + static real del, eta, phi, eps, tau, psi; + static integer iim1, iip1; + static real dphi, dpsi; + static integer iter; + static real temp, prew, temp1, dltlb, dltub, midpt; + static integer niter; + static logical swtch; + extern /* Subroutine */ int slaed5_(integer *, real *, real *, real *, + real *, real *), slaed6_(integer *, logical *, real *, real *, + real *, real *, real *, integer *); + static logical swtch3; + extern doublereal slamch_(char *); + static logical orgati; + static real erretm, rhoinv; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + This subroutine computes the I-th updated eigenvalue of a symmetric + rank-one modification to a diagonal matrix whose elements are + given in the array d, and that + + D(i) < D(j) for i < j + + and that RHO > 0. This is arranged by the calling routine, and is + no loss in generality. The rank-one modified system is thus + + diag( D ) + RHO * Z * Z_transpose. + + where we assume the Euclidean norm of Z is 1. + + The method consists of approximating the rational functions in the + secular equation by simpler interpolating rational functions. + + Arguments + ========= + + N (input) INTEGER + The length of all arrays. + + I (input) INTEGER + The index of the eigenvalue to be computed. 1 <= I <= N. + + D (input) REAL array, dimension (N) + The original eigenvalues. It is assumed that they are in + order, D(I) < D(J) for I < J. + + Z (input) REAL array, dimension (N) + The components of the updating vector. + + DELTA (output) REAL array, dimension (N) + If N .GT. 2, DELTA contains (D(j) - lambda_I) in its j-th + component. If N = 1, then DELTA(1) = 1. If N = 2, see SLAED5 + for detail. The vector DELTA contains the information necessary + to construct the eigenvectors by SLAED3 and SLAED9. + + RHO (input) REAL + The scalar in the symmetric updating formula. + + DLAM (output) REAL + The computed lambda_I, the I-th updated eigenvalue. + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, the updating process failed. + + Internal Parameters + =================== + + Logical variable ORGATI (origin-at-i?) is used for distinguishing + whether D(i) or D(i+1) is treated as the origin. + + ORGATI = .true. origin at i + ORGATI = .false. origin at i+1 + + Logical variable SWTCH3 (switch-for-3-poles?) is for noting + if we are working with THREE poles! + + MAXIT is the maximum number of iterations allowed for each + eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Since this routine is called in an inner loop, we do no argument + checking. + + Quick return for N=1 and 2. +*/ + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *dlam = d__[1] + *rho * z__[1] * z__[1]; + delta[1] = 1.f; + return 0; + } + if (*n == 2) { + slaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam); + return 0; + } + +/* Compute machine epsilon */ + + eps = slamch_("Epsilon"); + rhoinv = 1.f / *rho; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + midpt = *rho / 2.f; + +/* + If ||Z||_2 is not one, then TEMP should be set to + RHO * ||Z||_2^2 / TWO +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L10: */ + } + + psi = 0.f; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[* + n]; + + if (w <= 0.f) { + temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) + + z__[*n] * z__[*n] / *rho; + if (c__ <= temp) { + tau = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n] + ; + b = z__[*n] * z__[*n] * del; + if (a < 0.f) { + tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); + } + } + +/* + It can be proved that + D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO +*/ + + dltlb = midpt; + dltub = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * del; + if (a < 0.f) { + tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); + } + +/* + It can be proved that + D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 +*/ + + dltlb = 0.f; + dltub = midpt; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.f) { + dltlb = dmax(dltlb,tau); + } else { + dltub = dmin(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * ( + dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (c__ < 0.f) { + c__ = dabs(c__); + } + if (c__ == 0.f) { +/* + ETA = B/A + ETA = RHO - TAU +*/ + eta = dltub - tau; + } else if (a >= 0.f) { + eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( + c__ * 2.f); + } else { + eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.f) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.f) { + eta = (dltub - tau) / 2.f; + } else { + eta = (dltlb - tau) / 2.f; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L50: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.f) { + dltlb = dmax(dltlb,tau); + } else { + dltub = dmin(dltub,tau); + } + +/* Calculate the new step */ + + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * + (dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (a >= 0.f) { + eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } else { + eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.f) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.f) { + eta = (dltub - tau) / 2.f; + } else { + eta = (dltlb - tau) / 2.f; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L70: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * + (dpsi + dphi); + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + *dlam = d__[*i__] + tau; + goto L250; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + del = d__[ip1] - d__[*i__]; + midpt = del / 2.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L100: */ + } + + psi = 0.f; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L110: */ + } + + phi = 0.f; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / delta[j]; +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / + delta[ip1]; + + if (w > 0.f) { + +/* + d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 + + We choose d(i) as origin. +*/ + + orgati = TRUE_; + a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * del; + if (a > 0.f) { + tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } else { + tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } + dltlb = 0.f; + dltub = midpt; + } else { + +/* + (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) + + We choose d(i+1) as origin. +*/ + + orgati = FALSE_; + a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * del; + if (a < 0.f) { + tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs( + r__1)))); + } else { + tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1)))) + / (c__ * 2.f); + } + dltlb = -midpt; + dltub = 0.f; + } + + if (orgati) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[ip1] - tau; +/* L140: */ + } + } + if (orgati) { + ii = *i__; + } else { + ii = *i__ + 1; + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* + W is the value of the secular function with + its ii-th element removed. +*/ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.f) { + swtch3 = TRUE_; + } + } else { + if (w > 0.f) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f + + dabs(tau) * dw; + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.f) { + dltlb = dmax(dltlb,tau); + } else { + dltub = dmin(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + if (orgati) { +/* Computing 2nd power */ + r__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (r__1 * + r__1); + } else { +/* Computing 2nd power */ + r__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (r__1 * + r__1); + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * + dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.f) { + if (a == 0.f) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * + (dpsi + dphi); + } + } + eta = b / a; + } else if (a <= 0.f) { + eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } else { + eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[ + iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[ + iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info); + if (*info != 0) { + goto L250; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.f) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.f) { + eta = (dltub - tau) / 2.f; + } else { + eta = (dltlb - tau) / 2.f; + } + } + + prew = w; + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L180: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L190: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L200: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f + + (r__1 = tau + eta, dabs(r__1)) * dw; + + swtch = FALSE_; + if (orgati) { + if (-w > dabs(prew) / 10.f) { + swtch = TRUE_; + } + } else { + if (w > dabs(prew) / 10.f) { + swtch = TRUE_; + } + } + + tau += eta; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.f) { + dltlb = dmax(dltlb,tau); + } else { + dltub = dmin(dltub,tau); + } + +/* Calculate the new step */ + + if (! swtch3) { + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + r__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * ( + r__1 * r__1); + } else { +/* Computing 2nd power */ + r__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * + (r__1 * r__1); + } + } else { + temp = z__[ii] / delta[ii]; + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi; + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] + * dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.f) { + if (a == 0.f) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * + delta[ip1] * (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[ + *i__] * (dpsi + dphi); + } + } else { + a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] + * delta[ip1] * dphi; + } + } + eta = b / a; + } else if (a <= 0.f) { + eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)) + )) / (c__ * 2.f); + } else { + eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, + dabs(r__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi; + zz[0] = delta[iim1] * delta[iim1] * dpsi; + zz[2] = delta[iip1] * delta[iip1] * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] + - d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] + - d__[iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - + temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + } + slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, + info); + if (*info != 0) { + goto L250; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.f) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.f) { + eta = (dltub - tau) / 2.f; + } else { + eta = (dltlb - tau) / 2.f; + } + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L210: */ + } + + tau += eta; + prew = w; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L220: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L230: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * + 3.f + dabs(tau) * dw; + if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) { + swtch = ! swtch; + } + +/* L240: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + + } + +L250: + + return 0; + +/* End of SLAED4 */ + +} /* slaed4_ */ + +/* Subroutine */ int slaed5_(integer *i__, real *d__, real *z__, real *delta, + real *rho, real *dlam) +{ + /* System generated locals */ + real r__1; + + /* Local variables */ + static real b, c__, w, del, tau, temp; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + This subroutine computes the I-th eigenvalue of a symmetric rank-one + modification of a 2-by-2 diagonal matrix + + diag( D ) + RHO * Z * transpose(Z) . + + The diagonal elements in the array D are assumed to satisfy + + D(i) < D(j) for i < j . + + We also assume RHO > 0 and that the Euclidean norm of the vector + Z is one. + + Arguments + ========= + + I (input) INTEGER + The index of the eigenvalue to be computed. I = 1 or I = 2. + + D (input) REAL array, dimension (2) + The original eigenvalues. We assume D(1) < D(2). + + Z (input) REAL array, dimension (2) + The components of the updating vector. + + DELTA (output) REAL array, dimension (2) + The vector DELTA contains the information necessary + to construct the eigenvectors. + + RHO (input) REAL + The scalar in the symmetric updating formula. + + DLAM (output) REAL + The computed lambda_I, the I-th updated eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + if (*i__ == 1) { + w = *rho * 2.f * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.f; + if (w > 0.f) { + b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * del; + +/* B > ZERO, always */ + + tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1)) + )); + *dlam = d__[1] + tau; + delta[1] = -z__[1] / tau; + delta[2] = z__[2] / (del - tau); + } else { + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.f) { + tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f)); + } else { + tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f; + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + } + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } else { + +/* Now I=2 */ + + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.f) { + tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f; + } else { + tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f)); + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } + return 0; + +/* End OF SLAED5 */ + +} /* slaed5_ */ + +/* Subroutine */ int slaed6_(integer *kniter, logical *orgati, real *rho, + real *d__, real *z__, real *finit, real *tau, integer *info) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + static real a, b, c__, f; + static integer i__; + static real fc, df, ddf, lbd, eta, ubd, eps, base; + static integer iter; + static real temp, temp1, temp2, temp3, temp4; + static logical scale; + static integer niter; + static real small1, small2, sminv1, sminv2, dscale[3], sclfac; + extern doublereal slamch_(char *); + static real zscale[3], erretm, sclinv; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + February 2007 + + + Purpose + ======= + + SLAED6 computes the positive or negative root (closest to the origin) + of + z(1) z(2) z(3) + f(x) = rho + --------- + ---------- + --------- + d(1)-x d(2)-x d(3)-x + + It is assumed that + + if ORGATI = .true. the root is between d(2) and d(3); + otherwise it is between d(1) and d(2) + + This routine will be called by SLAED4 when necessary. In most cases, + the root sought is the smallest in magnitude, though it might not be + in some extremely rare situations. + + Arguments + ========= + + KNITER (input) INTEGER + Refer to SLAED4 for its significance. + + ORGATI (input) LOGICAL + If ORGATI is true, the needed root is between d(2) and + d(3); otherwise it is between d(1) and d(2). See + SLAED4 for further details. + + RHO (input) REAL + Refer to the equation f(x) above. + + D (input) REAL array, dimension (3) + D satisfies d(1) < d(2) < d(3). + + Z (input) REAL array, dimension (3) + Each of the elements in z must be positive. + + FINIT (input) REAL + The value of f at 0. It is more accurate than the one + evaluated inside this routine (if someone wants to do + so). + + TAU (output) REAL + The root of the equation f(x). + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, failure to converge + + Further Details + =============== + + 30/06/99: Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + 10/02/03: This version has a few statements commented out for thread safety + (machine parameters are computed on each entry). SJH. + + 05/10/06: Modified from a new version of Ren-Cang Li, use + Gragg-Thornton-Warner cubic convergent scheme for better stability. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + --d__; + + /* Function Body */ + *info = 0; + + if (*orgati) { + lbd = d__[2]; + ubd = d__[3]; + } else { + lbd = d__[1]; + ubd = d__[2]; + } + if (*finit < 0.f) { + lbd = 0.f; + } else { + ubd = 0.f; + } + + niter = 1; + *tau = 0.f; + if (*kniter == 2) { + if (*orgati) { + temp = (d__[3] - d__[2]) / 2.f; + c__ = *rho + z__[1] / (d__[1] - d__[2] - temp); + a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3]; + b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2]; + } else { + temp = (d__[1] - d__[2]) / 2.f; + c__ = *rho + z__[3] / (d__[3] - d__[2] - temp); + a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2]; + b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1]; + } +/* Computing MAX */ + r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs( + c__); + temp = dmax(r__1,r__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.f) { + *tau = b / a; + } else if (a <= 0.f) { + *tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( + c__ * 2.f); + } else { + *tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + if (*tau < lbd || *tau > ubd) { + *tau = (lbd + ubd) / 2.f; + } + if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) { + *tau = 0.f; + } else { + temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) + *tau + * z__[2] / (d__[2] * (d__[2] - *tau)) + *tau * z__[3] / ( + d__[3] * (d__[3] - *tau)); + if (temp <= 0.f) { + lbd = *tau; + } else { + ubd = *tau; + } + if (dabs(*finit) <= dabs(temp)) { + *tau = 0.f; + } + } + } + +/* + get machine parameters for possible scaling to avoid overflow + + modified by Sven: parameters SMALL1, SMINV1, SMALL2, + SMINV2, EPS are not SAVEd anymore between one call to the + others but recomputed at each call +*/ + + eps = slamch_("Epsilon"); + base = slamch_("Base"); + i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f); + small1 = pow_ri(&base, &i__1); + sminv1 = 1.f / small1; + small2 = small1 * small1; + sminv2 = sminv1 * sminv1; + +/* + Determine if scaling of inputs necessary to avoid overflow + when computing 1/TEMP**3 +*/ + + if (*orgati) { +/* Computing MIN */ + r__3 = (r__1 = d__[2] - *tau, dabs(r__1)), r__4 = (r__2 = d__[3] - * + tau, dabs(r__2)); + temp = dmin(r__3,r__4); + } else { +/* Computing MIN */ + r__3 = (r__1 = d__[1] - *tau, dabs(r__1)), r__4 = (r__2 = d__[2] - * + tau, dabs(r__2)); + temp = dmin(r__3,r__4); + } + scale = FALSE_; + if (temp <= small1) { + scale = TRUE_; + if (temp <= small2) { + +/* Scale up by power of radix nearest 1/SAFMIN**(2/3) */ + + sclfac = sminv2; + sclinv = small2; + } else { + +/* Scale up by power of radix nearest 1/SAFMIN**(1/3) */ + + sclfac = sminv1; + sclinv = small1; + } + +/* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__] * sclfac; + zscale[i__ - 1] = z__[i__] * sclfac; +/* L10: */ + } + *tau *= sclfac; + lbd *= sclfac; + ubd *= sclfac; + } else { + +/* Copy D and Z to DSCALE and ZSCALE */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__]; + zscale[i__ - 1] = z__[i__]; +/* L20: */ + } + } + + fc = 0.f; + df = 0.f; + ddf = 0.f; + for (i__ = 1; i__ <= 3; ++i__) { + temp = 1.f / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + fc += temp1 / dscale[i__ - 1]; + df += temp2; + ddf += temp3; +/* L30: */ + } + f = *finit + *tau * fc; + + if (dabs(f) <= 0.f) { + goto L60; + } + if (f <= 0.f) { + lbd = *tau; + } else { + ubd = *tau; + } + +/* + Iteration begins -- Use Gragg-Thornton-Warner cubic convergent + scheme + + It is not hard to see that + + 1) Iterations will go up monotonically + if FINIT < 0; + + 2) Iterations will go down monotonically + if FINIT > 0. +*/ + + iter = niter + 1; + + for (niter = iter; niter <= 40; ++niter) { + + if (*orgati) { + temp1 = dscale[1] - *tau; + temp2 = dscale[2] - *tau; + } else { + temp1 = dscale[0] - *tau; + temp2 = dscale[1] - *tau; + } + a = (temp1 + temp2) * f - temp1 * temp2 * df; + b = temp1 * temp2 * f; + c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf; +/* Computing MAX */ + r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs( + c__); + temp = dmax(r__1,r__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.f) { + eta = b / a; + } else if (a <= 0.f) { + eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( + c__ * 2.f); + } else { + eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + if (f * eta >= 0.f) { + eta = -f / df; + } + + *tau += eta; + if (*tau < lbd || *tau > ubd) { + *tau = (lbd + ubd) / 2.f; + } + + fc = 0.f; + erretm = 0.f; + df = 0.f; + ddf = 0.f; + for (i__ = 1; i__ <= 3; ++i__) { + temp = 1.f / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + temp4 = temp1 / dscale[i__ - 1]; + fc += temp4; + erretm += dabs(temp4); + df += temp2; + ddf += temp3; +/* L40: */ + } + f = *finit + *tau * fc; + erretm = (dabs(*finit) + dabs(*tau) * erretm) * 8.f + dabs(*tau) * df; + if (dabs(f) <= eps * erretm) { + goto L60; + } + if (f <= 0.f) { + lbd = *tau; + } else { + ubd = *tau; + } +/* L50: */ + } + *info = 1; +L60: + +/* Undo scaling */ + + if (scale) { + *tau *= sclinv; + } + return 0; + +/* End of SLAED6 */ + +} /* slaed6_ */ + +/* Subroutine */ int slaed7_(integer *icompq, integer *n, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q, + integer *ldq, integer *indxq, real *rho, integer *cutpnt, real * + qstore, integer *qptr, integer *prmptr, integer *perm, integer * + givptr, integer *givcol, real *givnum, real *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Local variables */ + static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr, + indxc; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer indxp; + extern /* Subroutine */ int slaed8_(integer *, integer *, integer *, + integer *, real *, real *, integer *, integer *, real *, integer * + , real *, real *, real *, integer *, real *, integer *, integer *, + integer *, real *, integer *, integer *, integer *), slaed9_( + integer *, integer *, integer *, integer *, real *, real *, + integer *, real *, real *, real *, real *, integer *, integer *), + slaeda_(integer *, integer *, integer *, integer *, integer *, + integer *, integer *, integer *, real *, real *, integer *, real * + , real *, integer *); + static integer idlmda; + extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( + integer *, integer *, real *, integer *, integer *, integer *); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAED7 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and optionally eigenvectors of a dense symmetric matrix + that has been reduced to tridiagonal form. SLAED1 handles + the case in which all eigenvalues and eigenvectors of a symmetric + tridiagonal matrix are desired. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine SLAED8. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine SLAED4 (as called by SLAED9). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= CURLVL <= TLVLS. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + D (input/output) REAL array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) REAL array, dimension (LDQ, N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (output) INTEGER array, dimension (N) + The permutation which will reintegrate the subproblem just + solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) + will be in ascending order. + + RHO (input) REAL + The subdiagonal element used to create the rank-1 + modification. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + QSTORE (input/output) REAL array, dimension (N**2+1) + Stores eigenvectors of submatrices encountered during + divide and conquer, packed together. QPTR points to + beginning of the submatrices. + + QPTR (input/output) INTEGER array, dimension (N+2) + List of indices pointing to beginning of submatrices stored + in QSTORE. The submatrices are numbered starting at the + bottom left of the divide and conquer tree, from left to + right and bottom to top. + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and also the size of + the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) REAL array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + WORK (workspace) REAL array, dimension (3*N+QSIZ*N) + + IWORK (workspace) INTEGER array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --qstore; + --qptr; + --prmptr; + --perm; + --givptr; + givcol -= 3; + givnum -= 3; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -4; + } else if (*ldq < max(1,*n)) { + *info = -9; + } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED7", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in SLAED8 and SLAED9. +*/ + + if (*icompq == 1) { + ldq2 = *qsiz; + } else { + ldq2 = *n; + } + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + is = iq2 + *n * ldq2; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; + ptr += pow_ii(&c__2, &i__2); +/* L10: */ + } + curr = ptr + *curpbm; + slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & + givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz + + *n], info); + +/* + When solving the final problem, we no longer need the stored data, + so we will overwrite the data from this level onto the previously + used storage space. +*/ + + if (*curlvl == *tlvls) { + qptr[curr] = 1; + prmptr[curr] = 1; + givptr[curr] = 1; + } + +/* Sort and Deflate eigenvalues. */ + + slaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, + cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], & + perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1) + + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[ + indx], info); + prmptr[curr + 1] = prmptr[curr] + *n; + givptr[curr + 1] += givptr[curr]; + +/* Solve Secular Equation. */ + + if (k != 0) { + slaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], + &work[iw], &qstore[qptr[curr]], &k, info); + if (*info != 0) { + goto L30; + } + if (*icompq == 1) { + sgemm_("N", "N", qsiz, &k, &k, &c_b15, &work[iq2], &ldq2, &qstore[ + qptr[curr]], &k, &c_b29, &q[q_offset], ldq); + } +/* Computing 2nd power */ + i__1 = k; + qptr[curr + 1] = qptr[curr] + i__1 * i__1; + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + qptr[curr + 1] = qptr[curr]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L20: */ + } + } + +L30: + return 0; + +/* End of SLAED7 */ + +} /* slaed7_ */ + +/* Subroutine */ int slaed8_(integer *icompq, integer *k, integer *n, integer + *qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho, + integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2, + real *w, integer *perm, integer *givptr, integer *givcol, real * + givnum, integer *indxp, integer *indx, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; + real r__1; + + /* Local variables */ + static real c__; + static integer i__, j; + static real s, t; + static integer k2, n1, n2, jp, n1p1; + static real eps, tau, tol; + static integer jlam, imax, jmax; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *), sscal_(integer *, real *, real *, + integer *), scopy_(integer *, real *, integer *, real *, integer * + ); + extern doublereal slapy2_(real *, real *), slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer + *, integer *, integer *), slacpy_(char *, integer *, integer *, + real *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SLAED8 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny element in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + + K (output) INTEGER + The number of non-deflated eigenvalues, and the order of the + related secular equation. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + D (input/output) REAL array, dimension (N) + On entry, the eigenvalues of the two submatrices to be + combined. On exit, the trailing (N-K) updated eigenvalues + (those which were deflated) sorted into increasing order. + + Q (input/output) REAL array, dimension (LDQ,N) + If ICOMPQ = 0, Q is not referenced. Otherwise, + on entry, Q contains the eigenvectors of the partially solved + system which has been previously updated in matrix + multiplies with other partially solved eigensystems. + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input) INTEGER array, dimension (N) + The permutation which separately sorts the two sub-problems + in D into ascending order. Note that elements in the second + half of this permutation must first have CUTPNT added to + their values in order to be accurate. + + RHO (input/output) REAL + On entry, the off-diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. + On exit, RHO has been modified to the value required by + SLAED3. + + CUTPNT (input) INTEGER + The location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + Z (input) REAL array, dimension (N) + On entry, Z contains the updating vector (the last row of + the first sub-eigenvector matrix and the first row of the + second sub-eigenvector matrix). + On exit, the contents of Z are destroyed by the updating + process. + + DLAMDA (output) REAL array, dimension (N) + A copy of the first K eigenvalues which will be used by + SLAED3 to form the secular equation. + + Q2 (output) REAL array, dimension (LDQ2,N) + If ICOMPQ = 0, Q2 is not referenced. Otherwise, + a copy of the first K eigenvectors which will be used by + SLAED7 in a matrix multiply (SGEMM) to update the new + eigenvectors. + + LDQ2 (input) INTEGER + The leading dimension of the array Q2. LDQ2 >= max(1,N). + + W (output) REAL array, dimension (N) + The first k values of the final deflation-altered z-vector and + will be passed to SLAED3. + + PERM (output) INTEGER array, dimension (N) + The permutations (from deflation and sorting) to be applied + to each eigenblock. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. + + GIVCOL (output) INTEGER array, dimension (2, N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (output) REAL array, dimension (2, N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INDXP (workspace) INTEGER array, dimension (N) + The permutation used to place deflated values of D at the end + of the array. INDXP(1:K) points to the nondeflated D-values + and INDXP(K+1:N) points to the deflated eigenvalues. + + INDX (workspace) INTEGER array, dimension (N) + The permutation used to sort the contents of D into ascending + order. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + q2_dim1 = *ldq2; + q2_offset = 1 + q2_dim1; + q2 -= q2_offset; + --w; + --perm; + givcol -= 3; + givnum -= 3; + --indxp; + --indx; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -3; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -4; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { + *info = -10; + } else if (*ldq2 < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED8", &i__1); + return 0; + } + +/* + Need to initialize GIVPTR to O here in case of quick exit + to prevent an unspecified code behavior (usually sigfault) + when IWORK array on entry to *stedc is not zeroed + (or at least some IWORK entries which used in *laed7 for GIVPTR). +*/ + + *givptr = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n1 = *cutpnt; + n2 = *n - n1; + n1p1 = n1 + 1; + + if (*rho < 0.f) { + sscal_(&n2, &c_b151, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1 */ + + t = 1.f / sqrt(2.f); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + indx[j] = j; +/* L10: */ + } + sscal_(n, &t, &z__[1], &c__1); + *rho = (r__1 = *rho * 2.f, dabs(r__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { + indxq[i__] += *cutpnt; +/* L20: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; + w[i__] = z__[indxq[i__]]; +/* L30: */ + } + i__ = 1; + j = *cutpnt + 1; + slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = dlamda[indx[i__]]; + z__[i__] = w[indx[i__]]; +/* L40: */ + } + +/* Calculate the allowable deflation tolerence */ + + imax = isamax_(n, &z__[1], &c__1); + jmax = isamax_(n, &d__[1], &c__1); + eps = slamch_("Epsilon"); + tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1)); + +/* + If the rank-1 modifier is small enough, no more needs to be done + except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { + *k = 0; + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; + scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + + 1], &c__1); +/* L60: */ + } + slacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); + } + return 0; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + if (j == *n) { + goto L110; + } + } else { + jlam = j; + goto L80; + } +/* L70: */ + } +L80: + ++j; + if (j > *n) { + goto L100; + } + if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[jlam]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = slapy2_(&c__, &s); + t = d__[j] - d__[jlam]; + c__ /= tau; + s = -s / tau; + if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { + +/* Deflation is possible. */ + + z__[j] = tau; + z__[jlam] = 0.f; + +/* Record the appropriate Givens rotation */ + + ++(*givptr); + givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; + givcol[(*givptr << 1) + 2] = indxq[indx[j]]; + givnum[(*givptr << 1) + 1] = c__; + givnum[(*givptr << 1) + 2] = s; + if (*icompq == 1) { + srot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[ + indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s); + } + t = d__[jlam] * c__ * c__ + d__[j] * s * s; + d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; + d__[jlam] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[jlam] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = jlam; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = jlam; + } + } else { + indxp[k2 + i__ - 1] = jlam; + } + jlam = j; + } else { + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + jlam = j; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + +L110: + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; +/* L120: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; + scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] + , &c__1); +/* L130: */ + } + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + if (*k < *n) { + if (*icompq == 0) { + i__1 = *n - *k; + scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + } else { + i__1 = *n - *k; + scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = *n - *k; + slacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(* + k + 1) * q_dim1 + 1], ldq); + } + } + + return 0; + +/* End of SLAED8 */ + +} /* slaed8_ */ + +/* Subroutine */ int slaed9_(integer *k, integer *kstart, integer *kstop, + integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, + real *w, real *s, integer *lds, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static integer i__, j; + static real temp; + extern doublereal snrm2_(integer *, real *, integer *); + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), slaed4_(integer *, integer *, real *, real *, real *, + real *, real *, integer *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAED9 finds the roots of the secular equation, as defined by the + values in D, Z, and RHO, between KSTART and KSTOP. It makes the + appropriate calls to SLAED4 and then stores the new matrix of + eigenvectors for use in calculating the next level of Z vectors. + + Arguments + ========= + + K (input) INTEGER + The number of terms in the rational function to be solved by + SLAED4. K >= 0. + + KSTART (input) INTEGER + KSTOP (input) INTEGER + The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP + are to be computed. 1 <= KSTART <= KSTOP <= K. + + N (input) INTEGER + The number of rows and columns in the Q matrix. + N >= K (delation may result in N > K). + + D (output) REAL array, dimension (N) + D(I) contains the updated eigenvalues + for KSTART <= I <= KSTOP. + + Q (workspace) REAL array, dimension (LDQ,N) + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max( 1, N ). + + RHO (input) REAL + The value of the parameter in the rank one update equation. + RHO >= 0 required. + + DLAMDA (input) REAL array, dimension (K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. + + W (input) REAL array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating vector. + + S (output) REAL array, dimension (LDS, K) + Will contain the eigenvectors of the repaired matrix which + will be stored for subsequent Z vector calculation and + multiplied by the previously accumulated eigenvectors + to update the system. + + LDS (input) INTEGER + The leading dimension of S. LDS >= max( 1, K ). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --w; + s_dim1 = *lds; + s_offset = 1 + s_dim1; + s -= s_offset; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*kstart < 1 || *kstart > max(1,*k)) { + *info = -2; + } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { + *info = -3; + } else if (*n < *k) { + *info = -4; + } else if (*ldq < max(1,*k)) { + *info = -7; + } else if (*lds < max(1,*k)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED9", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* + Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), + which on any of these machines zeros out the bottommost + bit of DLAMDA(I) if it is 1; this makes the subsequent + subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DLAMDA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DLAMDA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *kstop; + for (j = *kstart; j <= i__1; ++j) { + slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1 || *k == 2) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *k; + for (j = 1; j <= i__2; ++j) { + s[j + i__ * s_dim1] = q[j + i__ * q_dim1]; +/* L30: */ + } +/* L40: */ + } + goto L120; + } + +/* Compute updated W. */ + + scopy_(k, &w[1], &c__1, &s[s_offset], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L60: */ + } +/* L70: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = sqrt(-w[i__]); + w[i__] = r_sign(&r__1, &s[i__ + s_dim1]); +/* L80: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1]; +/* L90: */ + } + temp = snrm2_(k, &q[j * q_dim1 + 1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp; +/* L100: */ + } +/* L110: */ + } + +L120: + return 0; + +/* End of SLAED9 */ + +} /* slaed9_ */ + +/* Subroutine */ int slaeda_(integer *n, integer *tlvls, integer *curlvl, + integer *curpbm, integer *prmptr, integer *perm, integer *givptr, + integer *givcol, real *givnum, real *q, integer *qptr, real *z__, + real *ztemp, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k, mid, ptr, curr; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + static integer bsiz1, bsiz2, psiz1, psiz2, zptr1; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), + xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SLAEDA computes the Z vector corresponding to the merge step in the + CURLVLth step of the merge process with TLVLS steps for the CURPBMth + problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= curlvl <= tlvls. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and incidentally the + size of the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) REAL array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + Q (input) REAL array, dimension (N**2) + Contains the square eigenblocks from previous levels, the + starting positions for blocks are given by QPTR. + + QPTR (input) INTEGER array, dimension (N+2) + Contains a list of pointers which indicate where in Q an + eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates + the size of the block. + + Z (output) REAL array, dimension (N) + On output this vector contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). + + ZTEMP (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --ztemp; + --z__; + --qptr; + --q; + givnum -= 3; + givcol -= 3; + --givptr; + --perm; + --prmptr; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAEDA", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine location of first number in second half. */ + + mid = *n / 2 + 1; + +/* Gather last/first rows of appropriate eigenblocks into center of Z */ + + ptr = 1; + +/* + Determine location of lowest level subproblem in the full storage + scheme +*/ + + i__1 = *curlvl - 1; + curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; + +/* + Determine size of these matrices. We add HALF to the value of + the SQRT in case the machine underestimates one of these square + roots. +*/ + + bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); + bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f); + i__1 = mid - bsiz1 - 1; + for (k = 1; k <= i__1; ++k) { + z__[k] = 0.f; +/* L10: */ + } + scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & + c__1); + scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); + i__1 = *n; + for (k = mid + bsiz2; k <= i__1; ++k) { + z__[k] = 0.f; +/* L20: */ + } + +/* + Loop through remaining levels 1 -> CURLVL applying the Givens + rotations and permutation and then multiplying the center matrices + against the current Z. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = *curlvl - k; + i__3 = *curlvl - k - 1; + curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - + 1; + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + zptr1 = mid - psiz1; + +/* Apply Givens at CURR and CURR+1 */ + + i__2 = givptr[curr + 1] - 1; + for (i__ = givptr[curr]; i__ <= i__2; ++i__) { + srot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, & + z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[( + i__ << 1) + 1], &givnum[(i__ << 1) + 2]); +/* L30: */ + } + i__2 = givptr[curr + 2] - 1; + for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { + srot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[ + mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << + 1) + 1], &givnum[(i__ << 1) + 2]); +/* L40: */ + } + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + i__2 = psiz1 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; +/* L50: */ + } + i__2 = psiz2 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - + 1]; +/* L60: */ + } + +/* + Multiply Blocks at CURR and CURR+1 + + Determine size of these matrices. We add HALF to the value of + the SQRT in case the machine underestimates one of these + square roots. +*/ + + bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); + bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + + .5f); + if (bsiz1 > 0) { + sgemv_("T", &bsiz1, &bsiz1, &c_b15, &q[qptr[curr]], &bsiz1, & + ztemp[1], &c__1, &c_b29, &z__[zptr1], &c__1); + } + i__2 = psiz1 - bsiz1; + scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); + if (bsiz2 > 0) { + sgemv_("T", &bsiz2, &bsiz2, &c_b15, &q[qptr[curr + 1]], &bsiz2, & + ztemp[psiz1 + 1], &c__1, &c_b29, &z__[mid], &c__1); + } + i__2 = psiz2 - bsiz2; + scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & + c__1); + + i__2 = *tlvls - k; + ptr += pow_ii(&c__2, &i__2); +/* L70: */ + } + + return 0; + +/* End of SLAEDA */ + +} /* slaeda_ */ + +/* Subroutine */ int slaev2_(real *a, real *b, real *c__, real *rt1, real * + rt2, real *cs1, real *sn1) +{ + /* System generated locals */ + real r__1; + + /* Local variables */ + static real ab, df, cs, ct, tb, sm, tn, rt, adf, acs; + static integer sgn1, sgn2; + static real acmn, acmx; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix + [ A B ] + [ B C ]. + On return, RT1 is the eigenvalue of larger absolute value, RT2 is the + eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right + eigenvector for RT1, giving the decomposition + + [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] + [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. + + Arguments + ========= + + A (input) REAL + The (1,1) element of the 2-by-2 matrix. + + B (input) REAL + The (1,2) element and the conjugate of the (2,1) element of + the 2-by-2 matrix. + + C (input) REAL + The (2,2) element of the 2-by-2 matrix. + + RT1 (output) REAL + The eigenvalue of larger absolute value. + + RT2 (output) REAL + The eigenvalue of smaller absolute value. + + CS1 (output) REAL + SN1 (output) REAL + The vector (CS1, SN1) is a unit right eigenvector for RT1. + + Further Details + =============== + + RT1 is accurate to a few ulps barring over/underflow. + + RT2 may be inaccurate if there is massive cancellation in the + determinant A*C-B*B; higher precision or correctly rounded or + correctly truncated arithmetic would be needed to compute RT2 + accurately in all cases. + + CS1 and SN1 are accurate to a few ulps barring over/underflow. + + Overflow is possible only if RT1 is within a factor of 5 of overflow. + Underflow is harmless if the input data is 0 or exceeds + underflow_threshold / macheps. + + ===================================================================== + + + Compute the eigenvalues +*/ + + sm = *a + *c__; + df = *a - *c__; + adf = dabs(df); + tb = *b + *b; + ab = dabs(tb); + if (dabs(*a) > dabs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + r__1 = ab / adf; + rt = adf * sqrt(r__1 * r__1 + 1.f); + } else if (adf < ab) { +/* Computing 2nd power */ + r__1 = adf / ab; + rt = ab * sqrt(r__1 * r__1 + 1.f); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.f); + } + if (sm < 0.f) { + *rt1 = (sm - rt) * .5f; + sgn1 = -1; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.f) { + *rt1 = (sm + rt) * .5f; + sgn1 = 1; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5f; + *rt2 = rt * -.5f; + sgn1 = 1; + } + +/* Compute the eigenvector */ + + if (df >= 0.f) { + cs = df + rt; + sgn2 = 1; + } else { + cs = df - rt; + sgn2 = -1; + } + acs = dabs(cs); + if (acs > ab) { + ct = -tb / cs; + *sn1 = 1.f / sqrt(ct * ct + 1.f); + *cs1 = ct * *sn1; + } else { + if (ab == 0.f) { + *cs1 = 1.f; + *sn1 = 0.f; + } else { + tn = -cs / tb; + *cs1 = 1.f / sqrt(tn * tn + 1.f); + *sn1 = tn * *cs1; + } + } + if (sgn1 == sgn2) { + tn = *cs1; + *cs1 = -(*sn1); + *sn1 = tn; + } + return 0; + +/* End of SLAEV2 */ + +} /* slaev2_ */ + +/* Subroutine */ int slaexc_(logical *wantq, integer *n, real *t, integer * + ldt, real *q, integer *ldq, integer *j1, integer *n1, integer *n2, + real *work, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + real r__1, r__2, r__3; + + /* Local variables */ + static real d__[16] /* was [4][4] */; + static integer k; + static real u[3], x[4] /* was [2][2] */; + static integer j2, j3, j4; + static real u1[3], u2[3]; + static integer nd; + static real cs, t11, t22, t33, sn, wi1, wi2, wr1, wr2, eps, tau, tau1, + tau2; + static integer ierr; + static real temp; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + static real scale, dnorm, xnorm; + extern /* Subroutine */ int slanv2_(real *, real *, real *, real *, real * + , real *, real *, real *, real *, real *), slasy2_(logical *, + logical *, integer *, integer *, integer *, real *, integer *, + real *, integer *, real *, integer *, real *, real *, integer *, + real *, integer *); + extern doublereal slamch_(char *), slange_(char *, integer *, + integer *, real *, integer *, real *); + extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, + real *), slacpy_(char *, integer *, integer *, real *, integer *, + real *, integer *), slartg_(real *, real *, real *, real * + , real *); + static real thresh; + extern /* Subroutine */ int slarfx_(char *, integer *, integer *, real *, + real *, real *, integer *, real *); + static real smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in + an upper quasi-triangular matrix T by an orthogonal similarity + transformation. + + T must be in Schur canonical form, that is, block upper triangular + with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block + has its diagonal elemnts equal and its off-diagonal elements of + opposite sign. + + Arguments + ========= + + WANTQ (input) LOGICAL + = .TRUE. : accumulate the transformation in the matrix Q; + = .FALSE.: do not accumulate the transformation. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input/output) REAL array, dimension (LDT,N) + On entry, the upper quasi-triangular matrix T, in Schur + canonical form. + On exit, the updated matrix T, again in Schur canonical form. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + Q (input/output) REAL array, dimension (LDQ,N) + On entry, if WANTQ is .TRUE., the orthogonal matrix Q. + On exit, if WANTQ is .TRUE., the updated matrix Q. + If WANTQ is .FALSE., Q is not referenced. + + LDQ (input) INTEGER + The leading dimension of the array Q. + LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N. + + J1 (input) INTEGER + The index of the first row of the first block T11. + + N1 (input) INTEGER + The order of the first block T11. N1 = 0, 1 or 2. + + N2 (input) INTEGER + The order of the second block T22. N2 = 0, 1 or 2. + + WORK (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + = 1: the transformed matrix T would be too far from Schur + form; the blocks are not swapped and T and Q are + unchanged. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0 || *n1 == 0 || *n2 == 0) { + return 0; + } + if (*j1 + *n1 > *n) { + return 0; + } + + j2 = *j1 + 1; + j3 = *j1 + 2; + j4 = *j1 + 3; + + if (*n1 == 1 && *n2 == 1) { + +/* Swap two 1-by-1 blocks. */ + + t11 = t[*j1 + *j1 * t_dim1]; + t22 = t[j2 + j2 * t_dim1]; + +/* Determine the transformation to perform the interchange. */ + + r__1 = t22 - t11; + slartg_(&t[*j1 + j2 * t_dim1], &r__1, &cs, &sn, &temp); + +/* Apply transformation to the matrix T. */ + + if (j3 <= *n) { + i__1 = *n - *j1 - 1; + srot_(&i__1, &t[*j1 + j3 * t_dim1], ldt, &t[j2 + j3 * t_dim1], + ldt, &cs, &sn); + } + i__1 = *j1 - 1; + srot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1, + &cs, &sn); + + t[*j1 + *j1 * t_dim1] = t22; + t[j2 + j2 * t_dim1] = t11; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + srot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1, + &cs, &sn); + } + + } else { + +/* + Swapping involves at least one 2-by-2 block. + + Copy the diagonal block of order N1+N2 to the local array D + and compute its norm. +*/ + + nd = *n1 + *n2; + slacpy_("Full", &nd, &nd, &t[*j1 + *j1 * t_dim1], ldt, d__, &c__4); + dnorm = slange_("Max", &nd, &nd, d__, &c__4, &work[1]); + +/* + Compute machine-dependent threshold for test for accepting + swap. +*/ + + eps = slamch_("P"); + smlnum = slamch_("S") / eps; +/* Computing MAX */ + r__1 = eps * 10.f * dnorm; + thresh = dmax(r__1,smlnum); + +/* Solve T11*X - X*T22 = scale*T12 for X. */ + + slasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d__[*n1 + 1 + + (*n1 + 1 << 2) - 5], &c__4, &d__[(*n1 + 1 << 2) - 4], &c__4, & + scale, x, &c__2, &xnorm, &ierr); + +/* Swap the adjacent diagonal blocks. */ + + k = *n1 + *n1 + *n2 - 3; + switch (k) { + case 1: goto L10; + case 2: goto L20; + case 3: goto L30; + } + +L10: + +/* + N1 = 1, N2 = 2: generate elementary reflector H so that: + + ( scale, X11, X12 ) H = ( 0, 0, * ) +*/ + + u[0] = scale; + u[1] = x[0]; + u[2] = x[2]; + slarfg_(&c__3, &u[2], u, &c__1, &tau); + u[2] = 1.f; + t11 = t[*j1 + *j1 * t_dim1]; + +/* Perform swap provisionally on diagonal block in D. */ + + slarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + slarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + +/* + Test whether to reject swap. + + Computing MAX +*/ + r__2 = dabs(d__[2]), r__3 = dabs(d__[6]), r__2 = max(r__2,r__3), r__3 + = (r__1 = d__[10] - t11, dabs(r__1)); + if (dmax(r__2,r__3) > thresh) { + goto L50; + } + +/* Accept swap: apply transformation to the entire matrix T. */ + + i__1 = *n - *j1 + 1; + slarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + *j1 * t_dim1], ldt, & + work[1]); + slarfx_("R", &j2, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]); + + t[j3 + *j1 * t_dim1] = 0.f; + t[j3 + j2 * t_dim1] = 0.f; + t[j3 + j3 * t_dim1] = t11; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + slarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[ + 1]); + } + goto L40; + +L20: + +/* + N1 = 2, N2 = 1: generate elementary reflector H so that: + + H ( -X11 ) = ( * ) + ( -X21 ) = ( 0 ) + ( scale ) = ( 0 ) +*/ + + u[0] = -x[0]; + u[1] = -x[1]; + u[2] = scale; + slarfg_(&c__3, u, &u[1], &c__1, &tau); + u[0] = 1.f; + t33 = t[j3 + j3 * t_dim1]; + +/* Perform swap provisionally on diagonal block in D. */ + + slarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + slarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); + +/* + Test whether to reject swap. + + Computing MAX +*/ + r__2 = dabs(d__[1]), r__3 = dabs(d__[2]), r__2 = max(r__2,r__3), r__3 + = (r__1 = d__[0] - t33, dabs(r__1)); + if (dmax(r__2,r__3) > thresh) { + goto L50; + } + +/* Accept swap: apply transformation to the entire matrix T. */ + + slarfx_("R", &j3, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]); + i__1 = *n - *j1; + slarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + j2 * t_dim1], ldt, &work[ + 1]); + + t[*j1 + *j1 * t_dim1] = t33; + t[j2 + *j1 * t_dim1] = 0.f; + t[j3 + *j1 * t_dim1] = 0.f; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + slarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[ + 1]); + } + goto L40; + +L30: + +/* + N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so + that: + + H(2) H(1) ( -X11 -X12 ) = ( * * ) + ( -X21 -X22 ) ( 0 * ) + ( scale 0 ) ( 0 0 ) + ( 0 scale ) ( 0 0 ) +*/ + + u1[0] = -x[0]; + u1[1] = -x[1]; + u1[2] = scale; + slarfg_(&c__3, u1, &u1[1], &c__1, &tau1); + u1[0] = 1.f; + + temp = -tau1 * (x[2] + u1[1] * x[3]); + u2[0] = -temp * u1[1] - x[3]; + u2[1] = -temp * u1[2]; + u2[2] = scale; + slarfg_(&c__3, u2, &u2[1], &c__1, &tau2); + u2[0] = 1.f; + +/* Perform swap provisionally on diagonal block in D. */ + + slarfx_("L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1]) + ; + slarfx_("R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1]) + ; + slarfx_("L", &c__3, &c__4, u2, &tau2, &d__[1], &c__4, &work[1]); + slarfx_("R", &c__4, &c__3, u2, &tau2, &d__[4], &c__4, &work[1]); + +/* + Test whether to reject swap. + + Computing MAX +*/ + r__1 = dabs(d__[2]), r__2 = dabs(d__[6]), r__1 = max(r__1,r__2), r__2 + = dabs(d__[3]), r__1 = max(r__1,r__2), r__2 = dabs(d__[7]); + if (dmax(r__1,r__2) > thresh) { + goto L50; + } + +/* Accept swap: apply transformation to the entire matrix T. */ + + i__1 = *n - *j1 + 1; + slarfx_("L", &c__3, &i__1, u1, &tau1, &t[*j1 + *j1 * t_dim1], ldt, & + work[1]); + slarfx_("R", &j4, &c__3, u1, &tau1, &t[*j1 * t_dim1 + 1], ldt, &work[ + 1]); + i__1 = *n - *j1 + 1; + slarfx_("L", &c__3, &i__1, u2, &tau2, &t[j2 + *j1 * t_dim1], ldt, & + work[1]); + slarfx_("R", &j4, &c__3, u2, &tau2, &t[j2 * t_dim1 + 1], ldt, &work[1] + ); + + t[j3 + *j1 * t_dim1] = 0.f; + t[j3 + j2 * t_dim1] = 0.f; + t[j4 + *j1 * t_dim1] = 0.f; + t[j4 + j2 * t_dim1] = 0.f; + + if (*wantq) { + +/* Accumulate transformation in the matrix Q. */ + + slarfx_("R", n, &c__3, u1, &tau1, &q[*j1 * q_dim1 + 1], ldq, & + work[1]); + slarfx_("R", n, &c__3, u2, &tau2, &q[j2 * q_dim1 + 1], ldq, &work[ + 1]); + } + +L40: + + if (*n2 == 2) { + +/* Standardize new 2-by-2 block T11 */ + + slanv2_(&t[*j1 + *j1 * t_dim1], &t[*j1 + j2 * t_dim1], &t[j2 + * + j1 * t_dim1], &t[j2 + j2 * t_dim1], &wr1, &wi1, &wr2, & + wi2, &cs, &sn); + i__1 = *n - *j1 - 1; + srot_(&i__1, &t[*j1 + (*j1 + 2) * t_dim1], ldt, &t[j2 + (*j1 + 2) + * t_dim1], ldt, &cs, &sn); + i__1 = *j1 - 1; + srot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], & + c__1, &cs, &sn); + if (*wantq) { + srot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], & + c__1, &cs, &sn); + } + } + + if (*n1 == 2) { + +/* Standardize new 2-by-2 block T22 */ + + j3 = *j1 + *n2; + j4 = j3 + 1; + slanv2_(&t[j3 + j3 * t_dim1], &t[j3 + j4 * t_dim1], &t[j4 + j3 * + t_dim1], &t[j4 + j4 * t_dim1], &wr1, &wi1, &wr2, &wi2, & + cs, &sn); + if (j3 + 2 <= *n) { + i__1 = *n - j3 - 1; + srot_(&i__1, &t[j3 + (j3 + 2) * t_dim1], ldt, &t[j4 + (j3 + 2) + * t_dim1], ldt, &cs, &sn); + } + i__1 = j3 - 1; + srot_(&i__1, &t[j3 * t_dim1 + 1], &c__1, &t[j4 * t_dim1 + 1], & + c__1, &cs, &sn); + if (*wantq) { + srot_(n, &q[j3 * q_dim1 + 1], &c__1, &q[j4 * q_dim1 + 1], & + c__1, &cs, &sn); + } + } + + } + return 0; + +/* Exit with INFO = 1 if swap was rejected. */ + +L50: + *info = 1; + return 0; + +/* End of SLAEXC */ + +} /* slaexc_ */ + +/* Subroutine */ int slahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * + wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer * + info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + static integer i__, j, k, l, m; + static real s, v[3]; + static integer i1, i2; + static real t1, t2, t3, v2, v3, aa, ab, ba, bb, h11, h12, h21, h22, cs; + static integer nh; + static real sn; + static integer nr; + static real tr; + static integer nz; + static real det, h21s; + static integer its; + static real ulp, sum, tst, rt1i, rt2i, rt1r, rt2r; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *), scopy_(integer *, real *, integer *, + real *, integer *), slanv2_(real *, real *, real *, real *, real * + , real *, real *, real *, real *, real *), slabad_(real *, real *) + ; + extern doublereal slamch_(char *); + static real safmin; + extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, + real *); + static real safmax, rtdisc, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Purpose + ======= + + SLAHQR is an auxiliary routine called by SHSEQR to update the + eigenvalues and Schur decomposition already computed by SHSEQR, by + dealing with the Hessenberg submatrix in rows and columns ILO to + IHI. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper quasi-triangular in + rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless + ILO = 1). SLAHQR works primarily with the Hessenberg + submatrix in rows and columns ILO to IHI, but applies + transformations to all of H if WANTT is .TRUE.. + 1 <= ILO <= max(1,IHI); IHI <= N. + + H (input/output) REAL array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO is zero and if WANTT is .TRUE., H is upper + quasi-triangular in rows and columns ILO:IHI, with any + 2-by-2 diagonal blocks in standard form. If INFO is zero + and WANTT is .FALSE., the contents of H are unspecified on + exit. The output state of H if INFO is nonzero is given + below under the description of INFO. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + WR (output) REAL array, dimension (N) + WI (output) REAL array, dimension (N) + The real and imaginary parts, respectively, of the computed + eigenvalues ILO to IHI are stored in the corresponding + elements of WR and WI. If two eigenvalues are computed as a + complex conjugate pair, they are stored in consecutive + elements of WR and WI, say the i-th and (i+1)th, with + WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the + eigenvalues are stored in the same order as on the diagonal + of the Schur form returned in H, with WR(i) = H(i,i), and, if + H(i:i+1,i:i+1) is a 2-by-2 diagonal block, + WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. + + Z (input/output) REAL array, dimension (LDZ,N) + If WANTZ is .TRUE., on entry Z must contain the current + matrix Z of transformations accumulated by SHSEQR, and on + exit Z has been updated; transformations are applied only to + the submatrix Z(ILOZ:IHIZ,ILO:IHI). + If WANTZ is .FALSE., Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: If INFO = i, SLAHQR failed to compute all the + eigenvalues ILO to IHI in a total of 30 iterations + per eigenvalue; elements i+1:ihi of WR and WI + contain those eigenvalues which have been + successfully computed. + + If INFO .GT. 0 and WANTT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the + eigenvalues of the upper Hessenberg matrix rows + and columns ILO thorugh INFO of the final, output + value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + (*) (initial value of H)*U = U*(final value of H) + where U is an orthognal matrix. The final + value of H is upper Hessenberg and triangular in + rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + (final value of Z) = (initial value of Z)*U + where U is the orthogonal matrix in (*) + (regardless of the value of WANTT.) + + Further Details + =============== + + 02-96 Based on modifications by + David Day, Sandia National Laboratory, USA + + 12-04 Further modifications by + Ralph Byers, University of Kansas, USA + This is a modified version of SLAHQR from LAPACK version 3.0. + It is (1) more robust against overflow and underflow and + (2) adopts the more conservative Ahues & Tisseur stopping + criterion (LAWN 122, 1997). + + ========================================================= +*/ + + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.f; + return 0; + } + +/* ==== clear out the trash ==== */ + i__1 = *ihi - 3; + for (j = *ilo; j <= i__1; ++j) { + h__[j + 2 + j * h_dim1] = 0.f; + h__[j + 3 + j * h_dim1] = 0.f; +/* L10: */ + } + if (*ilo <= *ihi - 2) { + h__[*ihi + (*ihi - 2) * h_dim1] = 0.f; + } + + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + +/* Set machine-dependent constants for the stopping criterion. */ + + safmin = slamch_("SAFE MINIMUM"); + safmax = 1.f / safmin; + slabad_(&safmin, &safmax); + ulp = slamch_("PRECISION"); + smlnum = safmin * ((real) nh / ulp); + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are set inside the main loop. +*/ + + if (*wantt) { + i1 = 1; + i2 = *n; + } + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of 1 or 2. Each iteration of the loop works + with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L20: + l = *ilo; + if (i__ < *ilo) { + goto L160; + } + +/* + Perform QR iterations on rows and columns ILO to I until a + submatrix of order 1 or 2 splits off at the bottom because a + subdiagonal element has become negligible. +*/ + + for (its = 0; its <= 30; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__1 = l + 1; + for (k = i__; k >= i__1; --k) { + if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= smlnum) { + goto L40; + } + tst = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2 = + h__[k + k * h_dim1], dabs(r__2)); + if (tst == 0.f) { + if (k - 2 >= *ilo) { + tst += (r__1 = h__[k - 1 + (k - 2) * h_dim1], dabs(r__1)); + } + if (k + 1 <= *ihi) { + tst += (r__1 = h__[k + 1 + k * h_dim1], dabs(r__1)); + } + } +/* + ==== The following is a conservative small subdiagonal + . deflation criterion due to Ahues & Tisseur (LAWN 122, + . 1997). It has better mathematical foundation and + . improves accuracy in some cases. ==== +*/ + if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= ulp * tst) { +/* Computing MAX */ + r__3 = (r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)), r__4 = + (r__2 = h__[k - 1 + k * h_dim1], dabs(r__2)); + ab = dmax(r__3,r__4); +/* Computing MIN */ + r__3 = (r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)), r__4 = + (r__2 = h__[k - 1 + k * h_dim1], dabs(r__2)); + ba = dmin(r__3,r__4); +/* Computing MAX */ + r__3 = (r__1 = h__[k + k * h_dim1], dabs(r__1)), r__4 = (r__2 + = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], + dabs(r__2)); + aa = dmax(r__3,r__4); +/* Computing MIN */ + r__3 = (r__1 = h__[k + k * h_dim1], dabs(r__1)), r__4 = (r__2 + = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], + dabs(r__2)); + bb = dmin(r__3,r__4); + s = aa + ab; +/* Computing MAX */ + r__1 = smlnum, r__2 = ulp * (bb * (aa / s)); + if (ba * (ab / s) <= dmax(r__1,r__2)) { + goto L40; + } + } +/* L30: */ + } +L40: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible */ + + h__[l + (l - 1) * h_dim1] = 0.f; + } + +/* Exit from loop if a submatrix of order 1 or 2 has split off. */ + + if (l >= i__ - 1) { + goto L150; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! (*wantt)) { + i1 = l; + i2 = i__; + } + + if (its == 10) { + +/* Exceptional shift. */ + + s = (r__1 = h__[l + 1 + l * h_dim1], dabs(r__1)) + (r__2 = h__[l + + 2 + (l + 1) * h_dim1], dabs(r__2)); + h11 = s * .75f + h__[l + l * h_dim1]; + h12 = s * -.4375f; + h21 = s; + h22 = h11; + } else if (its == 20) { + +/* Exceptional shift. */ + + s = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)) + (r__2 = + h__[i__ - 1 + (i__ - 2) * h_dim1], dabs(r__2)); + h11 = s * .75f + h__[i__ + i__ * h_dim1]; + h12 = s * -.4375f; + h21 = s; + h22 = h11; + } else { + +/* + Prepare to use Francis' double shift + (i.e. 2nd degree generalized Rayleigh quotient) +*/ + + h11 = h__[i__ - 1 + (i__ - 1) * h_dim1]; + h21 = h__[i__ + (i__ - 1) * h_dim1]; + h12 = h__[i__ - 1 + i__ * h_dim1]; + h22 = h__[i__ + i__ * h_dim1]; + } + s = dabs(h11) + dabs(h12) + dabs(h21) + dabs(h22); + if (s == 0.f) { + rt1r = 0.f; + rt1i = 0.f; + rt2r = 0.f; + rt2i = 0.f; + } else { + h11 /= s; + h21 /= s; + h12 /= s; + h22 /= s; + tr = (h11 + h22) / 2.f; + det = (h11 - tr) * (h22 - tr) - h12 * h21; + rtdisc = sqrt((dabs(det))); + if (det >= 0.f) { + +/* ==== complex conjugate shifts ==== */ + + rt1r = tr * s; + rt2r = rt1r; + rt1i = rtdisc * s; + rt2i = -rt1i; + } else { + +/* ==== real shifts (use only one of them) ==== */ + + rt1r = tr + rtdisc; + rt2r = tr - rtdisc; + if ((r__1 = rt1r - h22, dabs(r__1)) <= (r__2 = rt2r - h22, + dabs(r__2))) { + rt1r *= s; + rt2r = rt1r; + } else { + rt2r *= s; + rt1r = rt2r; + } + rt1i = 0.f; + rt2i = 0.f; + } + } + +/* Look for two consecutive small subdiagonal elements. */ + + i__1 = l; + for (m = i__ - 2; m >= i__1; --m) { +/* + Determine the effect of starting the double-shift QR + iteration at row M, and see if this would make H(M,M-1) + negligible. (The following uses scaling to avoid + overflows and most underflows.) +*/ + + h21s = h__[m + 1 + m * h_dim1]; + s = (r__1 = h__[m + m * h_dim1] - rt2r, dabs(r__1)) + dabs(rt2i) + + dabs(h21s); + h21s = h__[m + 1 + m * h_dim1] / s; + v[0] = h21s * h__[m + (m + 1) * h_dim1] + (h__[m + m * h_dim1] - + rt1r) * ((h__[m + m * h_dim1] - rt2r) / s) - rt1i * (rt2i + / s); + v[1] = h21s * (h__[m + m * h_dim1] + h__[m + 1 + (m + 1) * h_dim1] + - rt1r - rt2r); + v[2] = h21s * h__[m + 2 + (m + 1) * h_dim1]; + s = dabs(v[0]) + dabs(v[1]) + dabs(v[2]); + v[0] /= s; + v[1] /= s; + v[2] /= s; + if (m == l) { + goto L60; + } + if ((r__1 = h__[m + (m - 1) * h_dim1], dabs(r__1)) * (dabs(v[1]) + + dabs(v[2])) <= ulp * dabs(v[0]) * ((r__2 = h__[m - 1 + ( + m - 1) * h_dim1], dabs(r__2)) + (r__3 = h__[m + m * + h_dim1], dabs(r__3)) + (r__4 = h__[m + 1 + (m + 1) * + h_dim1], dabs(r__4)))) { + goto L60; + } +/* L50: */ + } +L60: + +/* Double-shift QR step */ + + i__1 = i__ - 1; + for (k = m; k <= i__1; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. NR is the order of G. + + Computing MIN +*/ + i__2 = 3, i__3 = i__ - k + 1; + nr = min(i__2,i__3); + if (k > m) { + scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + slarfg_(&nr, v, &v[1], &c__1, &t1); + if (k > m) { + h__[k + (k - 1) * h_dim1] = v[0]; + h__[k + 1 + (k - 1) * h_dim1] = 0.f; + if (k < i__ - 1) { + h__[k + 2 + (k - 1) * h_dim1] = 0.f; + } + } else if (m > l) { +/* + ==== Use the following instead of + . H( K, K-1 ) = -H( K, K-1 ) to + . avoid a bug when v(2) and v(3) + . underflow. ==== +*/ + h__[k + (k - 1) * h_dim1] *= 1.f - t1; + } + v2 = v[1]; + t2 = t1 * v2; + if (nr == 3) { + v3 = v[2]; + t3 = t1 * v3; + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__2 = i2; + for (j = k; j <= i__2; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] + + v3 * h__[k + 2 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; + h__[k + 2 + j * h_dim1] -= sum * t3; +/* L70: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+3,I). + + Computing MIN +*/ + i__3 = k + 3; + i__2 = min(i__3,i__); + for (j = i1; j <= i__2; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + + v3 * h__[j + (k + 2) * h_dim1]; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; + h__[j + (k + 2) * h_dim1] -= sum * t3; +/* L80: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__2 = *ihiz; + for (j = *iloz; j <= i__2; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * + z_dim1] + v3 * z__[j + (k + 2) * z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; + z__[j + (k + 2) * z_dim1] -= sum * t3; +/* L90: */ + } + } + } else if (nr == 2) { + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__2 = i2; + for (j = k; j <= i__2; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; +/* L100: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+3,I). +*/ + + i__2 = i__; + for (j = i1; j <= i__2; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + ; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; +/* L110: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__2 = *ihiz; + for (j = *iloz; j <= i__2; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * + z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; +/* L120: */ + } + } + } +/* L130: */ + } + +/* L140: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L150: + + if (l == i__) { + +/* H(I,I-1) is negligible: one eigenvalue has converged. */ + + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.f; + } else if (l == i__ - 1) { + +/* + H(I-1,I-2) is negligible: a pair of eigenvalues have converged. + + Transform the 2-by-2 submatrix to standard Schur form, + and compute and store the eigenvalues. +*/ + + slanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * + h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * + h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, + &sn); + + if (*wantt) { + +/* Apply the transformation to the rest of H. */ + + if (i2 > i__) { + i__1 = i2 - i__; + srot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[ + i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); + } + i__1 = i__ - i1 - 1; + srot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * + h_dim1], &c__1, &cs, &sn); + } + if (*wantz) { + +/* Apply the transformation to Z. */ + + srot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + + i__ * z_dim1], &c__1, &cs, &sn); + } + } + +/* return to start of the main loop with new value of I. */ + + i__ = l - 1; + goto L20; + +L160: + return 0; + +/* End of SLAHQR */ + +} /* slahqr_ */ + +/* Subroutine */ int slahr2_(integer *n, integer *k, integer *nb, real *a, + integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, + i__3; + real r__1; + + /* Local variables */ + static integer i__; + static real ei; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemm_(char *, char *, integer *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *), sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), + strmm_(char *, char *, char *, char *, integer *, integer *, real + *, real *, integer *, real *, integer *), saxpy_(integer *, real *, real *, integer *, real *, + integer *), strmv_(char *, char *, char *, integer *, real *, + integer *, real *, integer *), slarfg_( + integer *, real *, real *, integer *, real *), slacpy_(char *, + integer *, integer *, real *, integer *, real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2.1) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + -- April 2009 -- + + + Purpose + ======= + + SLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1) + matrix A so that elements below the k-th subdiagonal are zero. The + reduction is performed by an orthogonal similarity transformation + Q' * A * Q. The routine returns the matrices V and T which determine + Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. + + This is an auxiliary routine called by SGEHRD. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. + + K (input) INTEGER + The offset for the reduction. Elements below the k-th + subdiagonal in the first NB columns are reduced to zero. + K < N. + + NB (input) INTEGER + The number of columns to be reduced. + + A (input/output) REAL array, dimension (LDA,N-K+1) + On entry, the n-by-(n-k+1) general matrix A. + On exit, the elements on and above the k-th subdiagonal in + the first NB columns are overwritten with the corresponding + elements of the reduced matrix; the elements below the k-th + subdiagonal, with the array TAU, represent the matrix Q as a + product of elementary reflectors. The other columns of A are + unchanged. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) REAL array, dimension (NB) + The scalar factors of the elementary reflectors. See Further + Details. + + T (output) REAL array, dimension (LDT,NB) + The upper triangular matrix T. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= NB. + + Y (output) REAL array, dimension (LDY,NB) + The n-by-nb matrix Y. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= N. + + Further Details + =============== + + The matrix Q is represented as a product of nb elementary reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in + A(i+k+1:n,i), and tau in TAU(i). + + The elements of the vectors v together form the (n-k+1)-by-nb matrix + V which is needed, with T and Y, to apply the transformation to the + unreduced part of the matrix, using an update of the form: + A := (I - V*T*V') * (A - Y*V'). + + The contents of A on exit are illustrated by the following example + with n = 7, k = 3 and nb = 2: + + ( a a a a a ) + ( a a a a a ) + ( a a a a a ) + ( h h a a a ) + ( v1 h a a a ) + ( v1 v2 a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + This subroutine is a slight modification of LAPACK-3.0's DLAHRD + incorporating improvements proposed by Quintana-Orti and Van de + Gejin. Note that the entries of A(1:K,2:NB) differ from those + returned by the original LAPACK-3.0's DLAHRD routine. (This + subroutine is not backward compatible with LAPACK-3.0's DLAHRD.) + + References + ========== + + Gregorio Quintana-Orti and Robert van de Geijn, "Improving the + performance of reduction to Hessenberg form," ACM Transactions on + Mathematical Software, 32(2):180-194, June 2006. + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*n <= 1) { + return 0; + } + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + +/* + Update A(K+1:N,I) + + Update I-th column of A - Y * V' +*/ + + i__2 = *n - *k; + i__3 = i__ - 1; + sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &y[*k + 1 + y_dim1], + ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b15, &a[*k + 1 + + i__ * a_dim1], &c__1); + +/* + Apply I - V * T' * V' to this column (call it b) from the + left, using the last column of T as workspace + + Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) + ( V2 ) ( b2 ) + + where V1 is unit lower triangular + + w := V1' * b1 +*/ + + i__2 = i__ - 1; + scopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + strmv_("Lower", "Transpose", "UNIT", &i__2, &a[*k + 1 + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1); + +/* w := w + V2'*b2 */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], + lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b15, &t[*nb * + t_dim1 + 1], &c__1); + +/* w := T'*w */ + + i__2 = i__ - 1; + strmv_("Upper", "Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, + &t[*nb * t_dim1 + 1], &c__1); + +/* b2 := b2 - V2*w */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &a[*k + i__ + + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b15, &a[*k + + i__ + i__ * a_dim1], &c__1); + +/* b1 := b1 - V1*w */ + + i__2 = i__ - 1; + strmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1] + , lda, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + saxpy_(&i__2, &c_b151, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + + i__ * a_dim1], &c__1); + + a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; + } + +/* + Generate the elementary reflector H(I) to annihilate + A(K+I+1:N,I) +*/ + + i__2 = *n - *k - i__ + 1; +/* Computing MIN */ + i__3 = *k + i__ + 1; + slarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + ei = a[*k + i__ + i__ * a_dim1]; + a[*k + i__ + i__ * a_dim1] = 1.f; + +/* Compute Y(K+1:N,I) */ + + i__2 = *n - *k; + i__3 = *n - *k - i__ + 1; + sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b15, &a[*k + 1 + (i__ + 1) * + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &y[* + k + 1 + i__ * y_dim1], &c__1); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &t[i__ * t_dim1 + + 1], &c__1); + i__2 = *n - *k; + i__3 = i__ - 1; + sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &y[*k + 1 + y_dim1], + ldy, &t[i__ * t_dim1 + 1], &c__1, &c_b15, &y[*k + 1 + i__ * + y_dim1], &c__1); + i__2 = *n - *k; + sscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1); + +/* Compute T(1:I,I) */ + + i__2 = i__ - 1; + r__1 = -tau[i__]; + sscal_(&i__2, &r__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + strmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, + &t[i__ * t_dim1 + 1], &c__1) + ; + t[i__ + i__ * t_dim1] = tau[i__]; + +/* L10: */ + } + a[*k + *nb + *nb * a_dim1] = ei; + +/* Compute Y(1:K,1:NB) */ + + slacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy); + strmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b15, &a[*k + 1 + + a_dim1], lda, &y[y_offset], ldy); + if (*n > *k + *nb) { + i__1 = *n - *k - *nb; + sgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b15, &a[(*nb + + 2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, & + c_b15, &y[y_offset], ldy); + } + strmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b15, &t[ + t_offset], ldt, &y[y_offset], ldy); + + return 0; + +/* End of SLAHR2 */ + +} /* slahr2_ */ + +logical slaisnan_(real *sin1, real *sin2) +{ + /* System generated locals */ + logical ret_val; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + This routine is not for general use. It exists solely to avoid + over-optimization in SISNAN. + + SLAISNAN checks for NaNs by comparing its two arguments for + inequality. NaN is the only floating-point value where NaN != NaN + returns .TRUE. To check for NaNs, pass the same variable as both + arguments. + + A compiler must assume that the two arguments are + not the same variable, and the test will not be optimized away. + Interprocedural or whole-program optimization may delete this + test. The ISNAN functions will be replaced by the correct + Fortran 03 intrinsic once the intrinsic is widely available. + + Arguments + ========= + + SIN1 (input) REAL + + SIN2 (input) REAL + Two numbers to compare for inequality. + + ===================================================================== +*/ + + ret_val = *sin1 != *sin2; + return ret_val; +} /* slaisnan_ */ + +/* Subroutine */ int slaln2_(logical *ltrans, integer *na, integer *nw, real * + smin, real *ca, real *a, integer *lda, real *d1, real *d2, real *b, + integer *ldb, real *wr, real *wi, real *x, integer *ldx, real *scale, + real *xnorm, integer *info) +{ + /* Initialized data */ + + static logical cswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; + static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; + static integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2, + 4,3,2,1 }; + + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; + real r__1, r__2, r__3, r__4, r__5, r__6; + static real equiv_0[4], equiv_1[4]; + + /* Local variables */ + static integer j; +#define ci (equiv_0) +#define cr (equiv_1) + static real bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, cr21, + cr22, li21, csi, ui11, lr21, ui12, ui22; +#define civ (equiv_0) + static real csr, ur11, ur12, ur22; +#define crv (equiv_1) + static real bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs; + static integer icmax; + static real bnorm, cnorm, smini; + extern doublereal slamch_(char *); + static real bignum; + extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real * + , real *); + static real smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLALN2 solves a system of the form (ca A - w D ) X = s B + or (ca A' - w D) X = s B with possible scaling ("s") and + perturbation of A. (A' means A-transpose.) + + A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA + real diagonal matrix, w is a real or complex value, and X and B are + NA x 1 matrices -- real if w is real, complex if w is complex. NA + may be 1 or 2. + + If w is complex, X and B are represented as NA x 2 matrices, + the first column of each being the real part and the second + being the imaginary part. + + "s" is a scaling factor (.LE. 1), computed by SLALN2, which is + so chosen that X can be computed without overflow. X is further + scaled if necessary to assure that norm(ca A - w D)*norm(X) is less + than overflow. + + If both singular values of (ca A - w D) are less than SMIN, + SMIN*identity will be used instead of (ca A - w D). If only one + singular value is less than SMIN, one element of (ca A - w D) will be + perturbed enough to make the smallest singular value roughly SMIN. + If both singular values are at least SMIN, (ca A - w D) will not be + perturbed. In any case, the perturbation will be at most some small + multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values + are computed by infinity-norm approximations, and thus will only be + correct to a factor of 2 or so. + + Note: all input quantities are assumed to be smaller than overflow + by a reasonable factor. (See BIGNUM.) + + Arguments + ========== + + LTRANS (input) LOGICAL + =.TRUE.: A-transpose will be used. + =.FALSE.: A will be used (not transposed.) + + NA (input) INTEGER + The size of the matrix A. It may (only) be 1 or 2. + + NW (input) INTEGER + 1 if "w" is real, 2 if "w" is complex. It may only be 1 + or 2. + + SMIN (input) REAL + The desired lower bound on the singular values of A. This + should be a safe distance away from underflow or overflow, + say, between (underflow/machine precision) and (machine + precision * overflow ). (See BIGNUM and ULP.) + + CA (input) REAL + The coefficient c, which A is multiplied by. + + A (input) REAL array, dimension (LDA,NA) + The NA x NA matrix A. + + LDA (input) INTEGER + The leading dimension of A. It must be at least NA. + + D1 (input) REAL + The 1,1 element in the diagonal matrix D. + + D2 (input) REAL + The 2,2 element in the diagonal matrix D. Not used if NW=1. + + B (input) REAL array, dimension (LDB,NW) + The NA x NW matrix B (right-hand side). If NW=2 ("w" is + complex), column 1 contains the real part of B and column 2 + contains the imaginary part. + + LDB (input) INTEGER + The leading dimension of B. It must be at least NA. + + WR (input) REAL + The real part of the scalar "w". + + WI (input) REAL + The imaginary part of the scalar "w". Not used if NW=1. + + X (output) REAL array, dimension (LDX,NW) + The NA x NW matrix X (unknowns), as computed by SLALN2. + If NW=2 ("w" is complex), on exit, column 1 will contain + the real part of X and column 2 will contain the imaginary + part. + + LDX (input) INTEGER + The leading dimension of X. It must be at least NA. + + SCALE (output) REAL + The scale factor that B must be multiplied by to insure + that overflow does not occur when computing X. Thus, + (ca A - w D) X will be SCALE*B, not B (ignoring + perturbations of A.) It will be at most 1. + + XNORM (output) REAL + The infinity-norm of X, when X is regarded as an NA x NW + real matrix. + + INFO (output) INTEGER + An error flag. It will be set to zero if no error occurs, + a negative number if an argument is in error, or a positive + number if ca A - w D had to be perturbed. + The possible values are: + = 0: No error occurred, and (ca A - w D) did not have to be + perturbed. + = 1: (ca A - w D) had to be perturbed to make its smallest + (or only) singular value greater than SMIN. + NOTE: In the interests of speed, this routine does not + check the inputs for errors. + + ===================================================================== +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + + /* Function Body */ + +/* Compute BIGNUM */ + + smlnum = 2.f * slamch_("Safe minimum"); + bignum = 1.f / smlnum; + smini = dmax(*smin,smlnum); + +/* Don't check for input errors */ + + *info = 0; + +/* Standard Initializations */ + + *scale = 1.f; + + if (*na == 1) { + +/* 1 x 1 (i.e., scalar) system C X = B */ + + if (*nw == 1) { + +/* + Real 1x1 system. + + C = ca A - w D +*/ + + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + cnorm = dabs(csr); + +/* If | C | < SMINI, use C = SMINI */ + + if (cnorm < smini) { + csr = smini; + cnorm = smini; + *info = 1; + } + +/* Check scaling for X = B / C */ + + bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1)); + if (cnorm < 1.f && bnorm > 1.f) { + if (bnorm > bignum * cnorm) { + *scale = 1.f / bnorm; + } + } + +/* Compute X */ + + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr; + *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)); + } else { + +/* + Complex 1x1 system (w is complex) + + C = ca A - w D +*/ + + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + csi = -(*wi) * *d1; + cnorm = dabs(csr) + dabs(csi); + +/* If | C | < SMINI, use C = SMINI */ + + if (cnorm < smini) { + csr = smini; + csi = 0.f; + cnorm = smini; + *info = 1; + } + +/* Check scaling for X = B / C */ + + bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[(b_dim1 << + 1) + 1], dabs(r__2)); + if (cnorm < 1.f && bnorm > 1.f) { + if (bnorm > bignum * cnorm) { + *scale = 1.f / bnorm; + } + } + +/* Compute X */ + + r__1 = *scale * b[b_dim1 + 1]; + r__2 = *scale * b[(b_dim1 << 1) + 1]; + sladiv_(&r__1, &r__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1) + + 1]); + *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)) + (r__2 = x[(x_dim1 << + 1) + 1], dabs(r__2)); + } + + } else { + +/* + 2x2 System + + Compute the real part of C = ca A - w D (or ca A' - w D ) +*/ + + cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1; + cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2; + if (*ltrans) { + cr[2] = *ca * a[a_dim1 + 2]; + cr[1] = *ca * a[(a_dim1 << 1) + 1]; + } else { + cr[1] = *ca * a[a_dim1 + 2]; + cr[2] = *ca * a[(a_dim1 << 1) + 1]; + } + + if (*nw == 1) { + +/* + Real 2x2 system (w is real) + + Find the largest element in C +*/ + + cmax = 0.f; + icmax = 0; + + for (j = 1; j <= 4; ++j) { + if ((r__1 = crv[j - 1], dabs(r__1)) > cmax) { + cmax = (r__1 = crv[j - 1], dabs(r__1)); + icmax = j; + } +/* L10: */ + } + +/* If norm(C) < SMINI, use SMINI*identity. */ + + if (cmax < smini) { +/* Computing MAX */ + r__3 = (r__1 = b[b_dim1 + 1], dabs(r__1)), r__4 = (r__2 = b[ + b_dim1 + 2], dabs(r__2)); + bnorm = dmax(r__3,r__4); + if (smini < 1.f && bnorm > 1.f) { + if (bnorm > bignum * smini) { + *scale = 1.f / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + +/* Gaussian elimination with complete pivoting. */ + + ur11 = crv[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ur11r = 1.f / ur11; + lr21 = ur11r * cr21; + ur22 = cr22 - ur12 * lr21; + +/* If smaller pivot < SMINI, use SMINI */ + + if (dabs(ur22) < smini) { + ur22 = smini; + *info = 1; + } + if (rswap[icmax - 1]) { + br1 = b[b_dim1 + 2]; + br2 = b[b_dim1 + 1]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + } + br2 -= lr21 * br1; +/* Computing MAX */ + r__2 = (r__1 = br1 * (ur22 * ur11r), dabs(r__1)), r__3 = dabs(br2) + ; + bbnd = dmax(r__2,r__3); + if (bbnd > 1.f && dabs(ur22) < 1.f) { + if (bbnd >= bignum * dabs(ur22)) { + *scale = 1.f / bbnd; + } + } + + xr2 = br2 * *scale / ur22; + xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); + if (cswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + } +/* Computing MAX */ + r__1 = dabs(xr1), r__2 = dabs(xr2); + *xnorm = dmax(r__1,r__2); + +/* Further scaling if norm(A) norm(X) > overflow */ + + if (*xnorm > 1.f && cmax > 1.f) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } else { + +/* + Complex 2x2 system (w is complex) + + Find the largest element in C +*/ + + ci[0] = -(*wi) * *d1; + ci[1] = 0.f; + ci[2] = 0.f; + ci[3] = -(*wi) * *d2; + cmax = 0.f; + icmax = 0; + + for (j = 1; j <= 4; ++j) { + if ((r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j - 1], + dabs(r__2)) > cmax) { + cmax = (r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j - + 1], dabs(r__2)); + icmax = j; + } +/* L20: */ + } + +/* If norm(C) < SMINI, use SMINI*identity. */ + + if (cmax < smini) { +/* Computing MAX */ + r__5 = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[(b_dim1 + << 1) + 1], dabs(r__2)), r__6 = (r__3 = b[b_dim1 + 2], + dabs(r__3)) + (r__4 = b[(b_dim1 << 1) + 2], dabs( + r__4)); + bnorm = dmax(r__5,r__6); + if (smini < 1.f && bnorm > 1.f) { + if (bnorm > bignum * smini) { + *scale = 1.f / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + +/* Gaussian elimination with complete pivoting. */ + + ur11 = crv[icmax - 1]; + ui11 = civ[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ci21 = civ[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + ui12 = civ[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ci22 = civ[ipivot[(icmax << 2) - 1] - 1]; + if (icmax == 1 || icmax == 4) { + +/* Code when off-diagonals of pivoted C are real */ + + if (dabs(ur11) > dabs(ui11)) { + temp = ui11 / ur11; +/* Computing 2nd power */ + r__1 = temp; + ur11r = 1.f / (ur11 * (r__1 * r__1 + 1.f)); + ui11r = -temp * ur11r; + } else { + temp = ur11 / ui11; +/* Computing 2nd power */ + r__1 = temp; + ui11r = -1.f / (ui11 * (r__1 * r__1 + 1.f)); + ur11r = -temp * ui11r; + } + lr21 = cr21 * ur11r; + li21 = cr21 * ui11r; + ur12s = ur12 * ur11r; + ui12s = ur12 * ui11r; + ur22 = cr22 - ur12 * lr21; + ui22 = ci22 - ur12 * li21; + } else { + +/* Code when diagonals of pivoted C are real */ + + ur11r = 1.f / ur11; + ui11r = 0.f; + lr21 = cr21 * ur11r; + li21 = ci21 * ur11r; + ur12s = ur12 * ur11r; + ui12s = ui12 * ur11r; + ur22 = cr22 - ur12 * lr21 + ui12 * li21; + ui22 = -ur12 * li21 - ui12 * lr21; + } + u22abs = dabs(ur22) + dabs(ui22); + +/* If smaller pivot < SMINI, use SMINI */ + + if (u22abs < smini) { + ur22 = smini; + ui22 = 0.f; + *info = 1; + } + if (rswap[icmax - 1]) { + br2 = b[b_dim1 + 1]; + br1 = b[b_dim1 + 2]; + bi2 = b[(b_dim1 << 1) + 1]; + bi1 = b[(b_dim1 << 1) + 2]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + bi1 = b[(b_dim1 << 1) + 1]; + bi2 = b[(b_dim1 << 1) + 2]; + } + br2 = br2 - lr21 * br1 + li21 * bi1; + bi2 = bi2 - li21 * br1 - lr21 * bi1; +/* Computing MAX */ + r__1 = (dabs(br1) + dabs(bi1)) * (u22abs * (dabs(ur11r) + dabs( + ui11r))), r__2 = dabs(br2) + dabs(bi2); + bbnd = dmax(r__1,r__2); + if (bbnd > 1.f && u22abs < 1.f) { + if (bbnd >= bignum * u22abs) { + *scale = 1.f / bbnd; + br1 = *scale * br1; + bi1 = *scale * bi1; + br2 = *scale * br2; + bi2 = *scale * bi2; + } + } + + sladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); + xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; + xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; + if (cswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + x[(x_dim1 << 1) + 1] = xi2; + x[(x_dim1 << 1) + 2] = xi1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + x[(x_dim1 << 1) + 1] = xi1; + x[(x_dim1 << 1) + 2] = xi2; + } +/* Computing MAX */ + r__1 = dabs(xr1) + dabs(xi1), r__2 = dabs(xr2) + dabs(xi2); + *xnorm = dmax(r__1,r__2); + +/* Further scaling if norm(A) norm(X) > overflow */ + + if (*xnorm > 1.f && cmax > 1.f) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } + } + + return 0; + +/* End of SLALN2 */ + +} /* slaln2_ */ + +#undef crv +#undef civ +#undef cr +#undef ci + + +/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, + integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * + difl, real *difr, real *z__, integer *k, real *c__, real *s, real * + work, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, + difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, + poles_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static integer i__, j, m, n; + static real dj; + static integer nlp1; + static real temp; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + extern doublereal snrm2_(integer *, real *, integer *); + static real diflj, difrj, dsigj; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *), scopy_( + integer *, real *, integer *, real *, integer *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static real dsigjp; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, + real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLALS0 applies back the multiplying factors of either the left or the + right singular vector matrix of a diagonal matrix appended by a row + to the right hand side matrix B in solving the least squares problem + using the divide-and-conquer SVD approach. + + For the left singular vector matrix, three types of orthogonal + matrices are involved: + + (1L) Givens rotations: the number of such rotations is GIVPTR; the + pairs of columns/rows they were applied to are stored in GIVCOL; + and the C- and S-values of these rotations are stored in GIVNUM. + + (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + row, and for J=2:N, PERM(J)-th row of B is to be moved to the + J-th row. + + (3L) The left singular vector matrix of the remaining matrix. + + For the right singular vector matrix, four types of orthogonal + matrices are involved: + + (1R) The right singular vector matrix of the remaining matrix. + + (2R) If SQRE = 1, one extra Givens rotation to generate the right + null space. + + (3R) The inverse transformation of (2L). + + (4R) The inverse transformation of (1L). + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form: + = 0: Left singular vector matrix. + = 1: Right singular vector matrix. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input/output) REAL array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. On output, B contains + the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B. LDB must be at least + max(1,MAX( M, N ) ). + + BX (workspace) REAL array, dimension ( LDBX, NRHS ) + + LDBX (input) INTEGER + The leading dimension of BX. + + PERM (input) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) applied + to the two blocks. + + GIVPTR (input) INTEGER + The number of Givens rotations which took place in this + subproblem. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of rows/columns + involved in a Givens rotation. + + LDGCOL (input) INTEGER + The leading dimension of GIVCOL, must be at least N. + + GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value used in the + corresponding Givens rotation. + + LDGNUM (input) INTEGER + The leading dimension of arrays DIFR, POLES and + GIVNUM, must be at least K. + + POLES (input) REAL array, dimension ( LDGNUM, 2 ) + On entry, POLES(1:K, 1) contains the new singular + values obtained from solving the secular equation, and + POLES(1:K, 2) is an array containing the poles in the secular + equation. + + DIFL (input) REAL array, dimension ( K ). + On entry, DIFL(I) is the distance between I-th updated + (undeflated) singular value and the I-th (undeflated) old + singular value. + + DIFR (input) REAL array, dimension ( LDGNUM, 2 ). + On entry, DIFR(I, 1) contains the distances between I-th + updated (undeflated) singular value and the I+1-th + (undeflated) old singular value. And DIFR(I, 2) is the + normalizing factor for the I-th right singular vector. + + Z (input) REAL array, dimension ( K ) + Contain the components of the deflation-adjusted updating row + vector. + + K (input) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + C (input) REAL + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (input) REAL + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + WORK (workspace) REAL array, dimension ( K ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + difr_dim1 = *ldgnum; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --z__; + --work; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } + + n = *nl + *nr + 1; + + if (*nrhs < 1) { + *info = -5; + } else if (*ldb < n) { + *info = -7; + } else if (*ldbx < n) { + *info = -9; + } else if (*givptr < 0) { + *info = -11; + } else if (*ldgcol < n) { + *info = -13; + } else if (*ldgnum < n) { + *info = -15; + } else if (*k < 1) { + *info = -20; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLALS0", &i__1); + return 0; + } + + m = n + *sqre; + nlp1 = *nl + 1; + + if (*icompq == 0) { + +/* + Apply back orthogonal transformations from the left. + + Step (1L): apply back the Givens rotations performed. +*/ + + i__1 = *givptr; + for (i__ = 1; i__ <= i__1; ++i__) { + srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); +/* L10: */ + } + +/* Step (2L): permute rows of B. */ + + scopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + scopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], + ldbx); +/* L20: */ + } + +/* + Step (3L): apply the inverse of the left singular vector + matrix to BX. +*/ + + if (*k == 1) { + scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); + if (z__[1] < 0.f) { + sscal_(nrhs, &c_b151, &b[b_offset], ldb); + } + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = poles[j + poles_dim1]; + dsigj = -poles[j + (poles_dim1 << 1)]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; + } + if (z__[j] == 0.f || poles[j + (poles_dim1 << 1)] == 0.f) { + work[j] = 0.f; + } else { + work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj / + (poles[j + (poles_dim1 << 1)] + dj); + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == + 0.f) { + work[i__] = 0.f; + } else { + work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (slamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigj) - diflj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L30: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == + 0.f) { + work[i__] = 0.f; + } else { + work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (slamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigjp) + difrj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L40: */ + } + work[1] = -1.f; + temp = snrm2_(k, &work[1], &c__1); + sgemv_("T", k, nrhs, &c_b15, &bx[bx_offset], ldbx, &work[1], & + c__1, &c_b29, &b[j + b_dim1], ldb); + slascl_("G", &c__0, &c__0, &temp, &c_b15, &c__1, nrhs, &b[j + + b_dim1], ldb, info); +/* L50: */ + } + } + +/* Move the deflated rows of BX to B also. */ + + if (*k < max(m,n)) { + i__1 = n - *k; + slacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 + + b_dim1], ldb); + } + } else { + +/* + Apply back the right orthogonal transformations. + + Step (1R): apply back the new right singular vector matrix + to B. +*/ + + if (*k == 1) { + scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dsigj = poles[j + (poles_dim1 << 1)]; + if (z__[j] == 0.f) { + work[j] = 0.f; + } else { + work[j] = -z__[j] / difl[j] / (dsigj + poles[j + + poles_dim1]) / difr[j + (difr_dim1 << 1)]; + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.f) { + work[i__] = 0.f; + } else { + r__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; + work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr[ + i__ + difr_dim1]) / (dsigj + poles[i__ + + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; + } +/* L60: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.f) { + work[i__] = 0.f; + } else { + r__1 = -poles[i__ + (poles_dim1 << 1)]; + work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[ + i__]) / (dsigj + poles[i__ + poles_dim1]) / + difr[i__ + (difr_dim1 << 1)]; + } +/* L70: */ + } + sgemv_("T", k, nrhs, &c_b15, &b[b_offset], ldb, &work[1], & + c__1, &c_b29, &bx[j + bx_dim1], ldbx); +/* L80: */ + } + } + +/* + Step (2R): if SQRE = 1, apply back the rotation that is + related to the right null space of the subproblem. +*/ + + if (*sqre == 1) { + scopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); + srot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, + s); + } + if (*k < max(m,n)) { + i__1 = n - *k; + slacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + + bx_dim1], ldbx); + } + +/* Step (3R): permute rows of B. */ + + scopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); + if (*sqre == 1) { + scopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); + } + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + scopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], + ldb); +/* L90: */ + } + +/* Step (4R): apply back the Givens rotations performed. */ + + for (i__ = *givptr; i__ >= 1; --i__) { + r__1 = -givnum[i__ + givnum_dim1]; + srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &r__1); +/* L100: */ + } + } + + return 0; + +/* End of SLALS0 */ + +} /* slals0_ */ + +/* Subroutine */ int slalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, real * + u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real * + z__, real *poles, integer *givptr, integer *givcol, integer *ldgcol, + integer *perm, real *givnum, real *c__, real *s, real *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1, + b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1, + difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, + u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, + i__2; + + /* Local variables */ + static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl, + ndb1, nlp1, lvl2, nrp1, nlvl, sqre, inode, ndiml; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer ndimr; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), slals0_(integer *, integer *, integer *, integer *, + integer *, real *, integer *, real *, integer *, integer *, + integer *, integer *, integer *, real *, integer *, real *, real * + , real *, real *, integer *, real *, real *, real *, integer *), + xerbla_(char *, integer *), slasdt_(integer *, integer *, + integer *, integer *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLALSA is an itermediate step in solving the least squares problem + by computing the SVD of the coefficient matrix in compact form (The + singular vectors are computed as products of simple orthorgonal + matrices.). + + If ICOMPQ = 0, SLALSA applies the inverse of the left singular vector + matrix of an upper bidiagonal matrix to the right hand side; and if + ICOMPQ = 1, SLALSA applies the right singular vector matrix to the + right hand side. The singular vector matrices were generated in + compact form by SLALSA. + + Arguments + ========= + + + ICOMPQ (input) INTEGER + Specifies whether the left or the right singular vector + matrix is involved. + = 0: Left singular vector matrix + = 1: Right singular vector matrix + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The row and column dimensions of the upper bidiagonal matrix. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input/output) REAL array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. + On output, B contains the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,MAX( M, N ) ). + + BX (output) REAL array, dimension ( LDBX, NRHS ) + On exit, the result of applying the left or right singular + vector matrix to B. + + LDBX (input) INTEGER + The leading dimension of BX. + + U (input) REAL array, dimension ( LDU, SMLSIZ ). + On entry, U contains the left singular vector matrices of all + subproblems at the bottom level. + + LDU (input) INTEGER, LDU = > N. + The leading dimension of arrays U, VT, DIFL, DIFR, + POLES, GIVNUM, and Z. + + VT (input) REAL array, dimension ( LDU, SMLSIZ+1 ). + On entry, VT' contains the right singular vector matrices of + all subproblems at the bottom level. + + K (input) INTEGER array, dimension ( N ). + + DIFL (input) REAL array, dimension ( LDU, NLVL ). + where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. + + DIFR (input) REAL array, dimension ( LDU, 2 * NLVL ). + On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record + distances between singular values on the I-th level and + singular values on the (I -1)-th level, and DIFR(*, 2 * I) + record the normalizing factors of the right singular vectors + matrices of subproblems on I-th level. + + Z (input) REAL array, dimension ( LDU, NLVL ). + On entry, Z(1, I) contains the components of the deflation- + adjusted updating row vector for subproblems on the I-th + level. + + POLES (input) REAL array, dimension ( LDU, 2 * NLVL ). + On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old + singular values involved in the secular equations on the I-th + level. + + GIVPTR (input) INTEGER array, dimension ( N ). + On entry, GIVPTR( I ) records the number of Givens + rotations performed on the I-th problem on the computation + tree. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). + On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the + locations of Givens rotations performed on the I-th level on + the computation tree. + + LDGCOL (input) INTEGER, LDGCOL = > N. + The leading dimension of arrays GIVCOL and PERM. + + PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). + On entry, PERM(*, I) records permutations done on the I-th + level of the computation tree. + + GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ). + On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- + values of Givens rotations performed on the I-th level on the + computation tree. + + C (input) REAL array, dimension ( N ). + On entry, if the I-th subproblem is not square, + C( I ) contains the C-value of a Givens rotation related to + the right null space of the I-th subproblem. + + S (input) REAL array, dimension ( N ). + On entry, if the I-th subproblem is not square, + S( I ) contains the S-value of a Givens rotation related to + the right null space of the I-th subproblem. + + WORK (workspace) REAL array. + The dimension must be at least N. + + IWORK (workspace) INTEGER array. + The dimension must be at least 3 * N + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < *smlsiz) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < *n) { + *info = -6; + } else if (*ldbx < *n) { + *info = -8; + } else if (*ldu < *n) { + *info = -10; + } else if (*ldgcol < *n) { + *info = -19; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLALSA", &i__1); + return 0; + } + +/* Book-keeping and setting up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + + slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + The following code applies back the left singular vector factors. + For applying back the right singular vector factors, go to 50. +*/ + + if (*icompq == 1) { + goto L50; + } + +/* + The nodes on the bottom level of the tree were solved + by SLASDQ. The corresponding left and right singular vector + matrices are in explicit form. First apply back the left + singular vector matrices. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + sgemm_("T", "N", &nl, nrhs, &nl, &c_b15, &u[nlf + u_dim1], ldu, &b[ + nlf + b_dim1], ldb, &c_b29, &bx[nlf + bx_dim1], ldbx); + sgemm_("T", "N", &nr, nrhs, &nr, &c_b15, &u[nrf + u_dim1], ldu, &b[ + nrf + b_dim1], ldb, &c_b29, &bx[nrf + bx_dim1], ldbx); +/* L10: */ + } + +/* + Next copy the rows of B that correspond to unchanged rows + in the bidiagonal matrix to BX. +*/ + + i__1 = nd; + for (i__ = 1; i__ <= i__1; ++i__) { + ic = iwork[inode + i__ - 1]; + scopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx); +/* L20: */ + } + +/* + Finally go through the left singular vector matrices of all + the other subproblems bottom-up on the tree. +*/ + + j = pow_ii(&c__2, &nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* + find the first node LF and last node LL on + the current level LVL +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + --j; + slals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, & + b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &work[1], info); +/* L30: */ + } +/* L40: */ + } + goto L90; + +/* ICOMPQ = 1: applying back the right singular vector factors. */ + +L50: + +/* + First now go through the right singular vector matrices of all + the tree nodes top-down. +*/ + + j = 0; + i__1 = nlvl; + for (lvl = 1; lvl <= i__1; ++lvl) { + lvl2 = (lvl << 1) - 1; + +/* + Find the first node LF and last node LL on + the current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__2 = lvl - 1; + lf = pow_ii(&c__2, &i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; + for (i__ = ll; i__ >= i__2; --i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqre = 0; + } else { + sqre = 1; + } + ++j; + slals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[ + nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &work[1], info); +/* L60: */ + } +/* L70: */ + } + +/* + The nodes on the bottom level of the tree were solved + by SLASDQ. The corresponding right singular vector + matrices are in explicit form. Apply them back. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlp1 = nl + 1; + if (i__ == nd) { + nrp1 = nr; + } else { + nrp1 = nr + 1; + } + nlf = ic - nl; + nrf = ic + 1; + sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b15, &vt[nlf + vt_dim1], ldu, + &b[nlf + b_dim1], ldb, &c_b29, &bx[nlf + bx_dim1], ldbx); + sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b15, &vt[nrf + vt_dim1], ldu, + &b[nrf + b_dim1], ldb, &c_b29, &bx[nrf + bx_dim1], ldbx); +/* L80: */ + } + +L90: + + return 0; + +/* End of SLALSA */ + +} /* slalsa_ */ + +/* Subroutine */ int slalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, real *d__, real *e, real *b, integer *ldb, real *rcond, + integer *rank, real *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static integer c__, i__, j, k; + static real r__; + static integer s, u, z__; + static real cs; + static integer bx; + static real sn; + static integer st, vt, nm1, st1; + static real eps; + static integer iwk; + static real tol; + static integer difl, difr; + static real rcnd; + static integer perm, nsub, nlvl, sqre, bxst; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *), sgemm_(char *, char *, integer *, + integer *, integer *, real *, real *, integer *, real *, integer * + , real *, real *, integer *); + static integer poles, sizei, nsize; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + static integer nwork, icmpq1, icmpq2; + extern doublereal slamch_(char *); + extern /* Subroutine */ int slasda_(integer *, integer *, integer *, + integer *, real *, real *, real *, integer *, real *, integer *, + real *, real *, real *, real *, integer *, integer *, integer *, + integer *, real *, real *, real *, real *, integer *, integer *), + xerbla_(char *, integer *), slalsa_(integer *, integer *, + integer *, integer *, real *, integer *, real *, integer *, real * + , integer *, real *, integer *, real *, real *, real *, real *, + integer *, integer *, integer *, integer *, real *, real *, real * + , real *, integer *, integer *), slascl_(char *, integer *, + integer *, real *, real *, integer *, integer *, real *, integer * + , integer *); + static integer givcol; + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer + *, integer *, integer *, real *, real *, real *, integer *, real * + , integer *, real *, integer *, real *, integer *), + slacpy_(char *, integer *, integer *, real *, integer *, real *, + integer *), slartg_(real *, real *, real *, real *, real * + ), slaset_(char *, integer *, integer *, real *, real *, real *, + integer *); + static real orgnrm; + static integer givnum; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); + static integer givptr, smlszp; + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SLALSD uses the singular value decomposition of A to solve the least + squares problem of finding X to minimize the Euclidean norm of each + column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + are N-by-NRHS. The solution X overwrites B. + + The singular values of A smaller than RCOND times the largest + singular value are treated as zero in solving the least squares + problem; in this case a minimum norm solution is returned. + The actual singular values are returned in D in ascending order. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': D and E define an upper bidiagonal matrix. + = 'L': D and E define a lower bidiagonal matrix. + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The dimension of the bidiagonal matrix. N >= 0. + + NRHS (input) INTEGER + The number of columns of B. NRHS must be at least 1. + + D (input/output) REAL array, dimension (N) + On entry D contains the main diagonal of the bidiagonal + matrix. On exit, if INFO = 0, D contains its singular values. + + E (input/output) REAL array, dimension (N-1) + Contains the super-diagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + B (input/output) REAL array, dimension (LDB,NRHS) + On input, B contains the right hand sides of the least + squares problem. On output, B contains the solution X. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,N). + + RCOND (input) REAL + The singular values of A less than or equal to RCOND times + the largest singular value are treated as zero in solving + the least squares problem. If RCOND is negative, + machine precision is used instead. + For example, if diag(S)*X=B were the least squares problem, + where diag(S) is a diagonal matrix of singular values, the + solution would be X(i) = B(i) / S(i) if S(i) is greater than + RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to + RCOND*max(S). + + RANK (output) INTEGER + The number of singular values of A greater than RCOND times + the largest singular value. + + WORK (workspace) REAL array, dimension at least + (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), + where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). + + IWORK (workspace) INTEGER array, dimension at least + (3*N*NLVL + 11*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute a singular value while + working on the submatrix lying in rows and columns + INFO/(N+1) through MOD(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < 1 || *ldb < *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLALSD", &i__1); + return 0; + } + + eps = slamch_("Epsilon"); + +/* Set up the tolerance. */ + + if (*rcond <= 0.f || *rcond >= 1.f) { + rcnd = eps; + } else { + rcnd = *rcond; + } + + *rank = 0; + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } else if (*n == 1) { + if (d__[1] == 0.f) { + slaset_("A", &c__1, nrhs, &c_b29, &c_b29, &b[b_offset], ldb); + } else { + *rank = 1; + slascl_("G", &c__0, &c__0, &d__[1], &c_b15, &c__1, nrhs, &b[ + b_offset], ldb, info); + d__[1] = dabs(d__[1]); + } + return 0; + } + +/* Rotate the matrix if it is lower bidiagonal. */ + + if (*(unsigned char *)uplo == 'L') { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (*nrhs == 1) { + srot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], & + c__1, &cs, &sn); + } else { + work[(i__ << 1) - 1] = cs; + work[i__ * 2] = sn; + } +/* L10: */ + } + if (*nrhs > 1) { + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n - 1; + for (j = 1; j <= i__2; ++j) { + cs = work[(j << 1) - 1]; + sn = work[j * 2]; + srot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ * + b_dim1], &c__1, &cs, &sn); +/* L20: */ + } +/* L30: */ + } + } + } + +/* Scale. */ + + nm1 = *n - 1; + orgnrm = slanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.f) { + slaset_("A", n, nrhs, &c_b29, &c_b29, &b[b_offset], ldb); + return 0; + } + + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, info); + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, + info); + +/* + If N is smaller than the minimum divide size SMLSIZ, then solve + the problem with another solver. +*/ + + if (*n <= *smlsiz) { + nwork = *n * *n + 1; + slaset_("A", n, n, &c_b29, &c_b15, &work[1], n); + slasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, & + work[1], n, &b[b_offset], ldb, &work[nwork], info); + if (*info != 0) { + return 0; + } + tol = rcnd * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1)); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] <= tol) { + slaset_("A", &c__1, nrhs, &c_b29, &c_b29, &b[i__ + b_dim1], + ldb); + } else { + slascl_("G", &c__0, &c__0, &d__[i__], &c_b15, &c__1, nrhs, &b[ + i__ + b_dim1], ldb, info); + ++(*rank); + } +/* L40: */ + } + sgemm_("T", "N", n, nrhs, n, &c_b15, &work[1], n, &b[b_offset], ldb, & + c_b29, &work[nwork], n); + slacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb); + +/* Unscale. */ + + slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, + info); + slasrt_("D", n, &d__[1], info); + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, nrhs, &b[b_offset], + ldb, info); + + return 0; + } + +/* Book-keeping and setting up some constants. */ + + nlvl = (integer) (log((real) (*n) / (real) (*smlsiz + 1)) / log(2.f)) + 1; + + smlszp = *smlsiz + 1; + + u = 1; + vt = *smlsiz * *n + 1; + difl = vt + smlszp * *n; + difr = difl + nlvl * *n; + z__ = difr + (nlvl * *n << 1); + c__ = z__ + nlvl * *n; + s = c__ + *n; + poles = s + *n; + givnum = poles + (nlvl << 1) * *n; + bx = givnum + (nlvl << 1) * *n; + nwork = bx + *n * *nrhs; + + sizei = *n + 1; + k = sizei + *n; + givptr = k + *n; + perm = givptr + *n; + givcol = perm + nlvl * *n; + iwk = givcol + (nlvl * *n << 1); + + st = 1; + sqre = 0; + icmpq1 = 1; + icmpq2 = 0; + nsub = 0; + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = d__[i__], dabs(r__1)) < eps) { + d__[i__] = r_sign(&eps, &d__[i__]); + } +/* L50: */ + } + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = e[i__], dabs(r__1)) < eps || i__ == nm1) { + ++nsub; + iwork[nsub] = st; + +/* + Subproblem found. First determine its size and then + apply divide and conquer on it. +*/ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else if ((r__1 = e[i__], dabs(r__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else { + +/* + A subproblem with E(NM1) small. This implies an + 1-by-1 subproblem at D(N), which is not solved + explicitly. +*/ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + ++nsub; + iwork[nsub] = *n; + iwork[sizei + nsub - 1] = 1; + scopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n); + } + st1 = st - 1; + if (nsize == 1) { + +/* + This is a 1-by-1 subproblem and is not solved + explicitly. +*/ + + scopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); + } else if (nsize <= *smlsiz) { + +/* This is a small subproblem and is solved by SLASDQ. */ + + slaset_("A", &nsize, &nsize, &c_b29, &c_b15, &work[vt + st1], + n); + slasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[ + st], &work[vt + st1], n, &work[nwork], n, &b[st + + b_dim1], ldb, &work[nwork], info); + if (*info != 0) { + return 0; + } + slacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx + + st1], n); + } else { + +/* A large problem. Solve it using divide and conquer. */ + + slasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & + work[u + st1], n, &work[vt + st1], &iwork[k + st1], & + work[difl + st1], &work[difr + st1], &work[z__ + st1], + &work[poles + st1], &iwork[givptr + st1], &iwork[ + givcol + st1], n, &iwork[perm + st1], &work[givnum + + st1], &work[c__ + st1], &work[s + st1], &work[nwork], + &iwork[iwk], info); + if (*info != 0) { + return 0; + } + bxst = bx + st1; + slalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, & + work[bxst], n, &work[u + st1], n, &work[vt + st1], & + iwork[k + st1], &work[difl + st1], &work[difr + st1], + &work[z__ + st1], &work[poles + st1], &iwork[givptr + + st1], &iwork[givcol + st1], n, &iwork[perm + st1], & + work[givnum + st1], &work[c__ + st1], &work[s + st1], + &work[nwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + } + st = i__ + 1; + } +/* L60: */ + } + +/* Apply the singular values and treat the tiny ones as zero. */ + + tol = rcnd * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1)); + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Some of the elements in D can be negative because 1-by-1 + subproblems were not solved explicitly. +*/ + + if ((r__1 = d__[i__], dabs(r__1)) <= tol) { + slaset_("A", &c__1, nrhs, &c_b29, &c_b29, &work[bx + i__ - 1], n); + } else { + ++(*rank); + slascl_("G", &c__0, &c__0, &d__[i__], &c_b15, &c__1, nrhs, &work[ + bx + i__ - 1], n, info); + } + d__[i__] = (r__1 = d__[i__], dabs(r__1)); +/* L70: */ + } + +/* Now apply back the right singular vectors. */ + + icmpq2 = 1; + i__1 = nsub; + for (i__ = 1; i__ <= i__1; ++i__) { + st = iwork[i__]; + st1 = st - 1; + nsize = iwork[sizei + i__ - 1]; + bxst = bx + st1; + if (nsize == 1) { + scopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb); + } else if (nsize <= *smlsiz) { + sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b15, &work[vt + st1], n, + &work[bxst], n, &c_b29, &b[st + b_dim1], ldb); + } else { + slalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st + + b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[ + k + st1], &work[difl + st1], &work[difr + st1], &work[z__ + + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[ + givcol + st1], n, &iwork[perm + st1], &work[givnum + st1], + &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[ + iwk], info); + if (*info != 0) { + return 0; + } + } +/* L80: */ + } + +/* Unscale and sort the singular values. */ + + slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, info); + slasrt_("D", n, &d__[1], info); + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, nrhs, &b[b_offset], ldb, + info); + + return 0; + +/* End of SLALSD */ + +} /* slalsd_ */ + +/* Subroutine */ int slamrg_(integer *n1, integer *n2, real *a, integer * + strd1, integer *strd2, integer *index) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, ind1, ind2, n1sv, n2sv; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAMRG will create a permutation list which will merge the elements + of A (which is composed of two independently sorted sets) into a + single set which is sorted in ascending order. + + Arguments + ========= + + N1 (input) INTEGER + N2 (input) INTEGER + These arguements contain the respective lengths of the two + sorted lists to be merged. + + A (input) REAL array, dimension (N1+N2) + The first N1 elements of A contain a list of numbers which + are sorted in either ascending or descending order. Likewise + for the final N2 elements. + + STRD1 (input) INTEGER + STRD2 (input) INTEGER + These are the strides to be taken through the array A. + Allowable strides are 1 and -1. They indicate whether a + subset of A is sorted in ascending (STRDx = 1) or descending + (STRDx = -1) order. + + INDEX (output) INTEGER array, dimension (N1+N2) + On exit this array will contain a permutation such that + if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be + sorted in ascending order. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --index; + --a; + + /* Function Body */ + n1sv = *n1; + n2sv = *n2; + if (*strd1 > 0) { + ind1 = 1; + } else { + ind1 = *n1; + } + if (*strd2 > 0) { + ind2 = *n1 + 1; + } else { + ind2 = *n1 + *n2; + } + i__ = 1; +/* while ( (N1SV > 0) & (N2SV > 0) ) */ +L10: + if (n1sv > 0 && n2sv > 0) { + if (a[ind1] <= a[ind2]) { + index[i__] = ind1; + ++i__; + ind1 += *strd1; + --n1sv; + } else { + index[i__] = ind2; + ++i__; + ind2 += *strd2; + --n2sv; + } + goto L10; + } +/* end while */ + if (n1sv == 0) { + i__1 = n2sv; + for (n1sv = 1; n1sv <= i__1; ++n1sv) { + index[i__] = ind2; + ++i__; + ind2 += *strd2; +/* L20: */ + } + } else { +/* N2SV .EQ. 0 */ + i__1 = n1sv; + for (n2sv = 1; n2sv <= i__1; ++n2sv) { + index[i__] = ind1; + ++i__; + ind1 += *strd1; +/* L30: */ + } + } + + return 0; + +/* End of SLAMRG */ + +} /* slamrg_ */ + +doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda, + real *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real ret_val, r__1, r__2, r__3; + + /* Local variables */ + static integer i__, j; + static real sum, scale; + extern logical lsame_(char *, char *); + static real value; + extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, + real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLANGE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real matrix A. + + Description + =========== + + SLANGE returns the value + + SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in SLANGE as described + above. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. When M = 0, + SLANGE is set to zero. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. When N = 0, + SLANGE is set to zero. + + A (input) REAL array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(M,1). + + WORK (workspace) REAL array, dimension (MAX(1,LWORK)), + where LWORK >= M when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1)); + value = dmax(r__2,r__3); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.f; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); +/* L30: */ + } + value = dmax(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.f; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); +/* L60: */ + } +/* L70: */ + } + value = 0.f; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = work[i__]; + value = dmax(r__1,r__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + slassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of SLANGE */ + +} /* slange_ */ + +doublereal slanst_(char *norm, integer *n, real *d__, real *e) +{ + /* System generated locals */ + integer i__1; + real ret_val, r__1, r__2, r__3, r__4, r__5; + + /* Local variables */ + static integer i__; + static real sum, scale; + extern logical lsame_(char *, char *); + static real anorm; + extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, + real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLANST returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real symmetric tridiagonal matrix A. + + Description + =========== + + SLANST returns the value + + SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in SLANST as described + above. + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, SLANST is + set to zero. + + D (input) REAL array, dimension (N) + The diagonal elements of A. + + E (input) REAL array, dimension (N-1) + The (n-1) sub-diagonal or super-diagonal elements of A. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + if (*n <= 0) { + anorm = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + anorm = (r__1 = d__[*n], dabs(r__1)); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__2 = anorm, r__3 = (r__1 = d__[i__], dabs(r__1)); + anorm = dmax(r__2,r__3); +/* Computing MAX */ + r__2 = anorm, r__3 = (r__1 = e[i__], dabs(r__1)); + anorm = dmax(r__2,r__3); +/* L10: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1' || lsame_(norm, "I")) { + +/* Find norm1(A). */ + + if (*n == 1) { + anorm = dabs(d__[1]); + } else { +/* Computing MAX */ + r__3 = dabs(d__[1]) + dabs(e[1]), r__4 = (r__1 = e[*n - 1], dabs( + r__1)) + (r__2 = d__[*n], dabs(r__2)); + anorm = dmax(r__3,r__4); + i__1 = *n - 1; + for (i__ = 2; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__4 = anorm, r__5 = (r__1 = d__[i__], dabs(r__1)) + (r__2 = + e[i__], dabs(r__2)) + (r__3 = e[i__ - 1], dabs(r__3)); + anorm = dmax(r__4,r__5); +/* L20: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + if (*n > 1) { + i__1 = *n - 1; + slassq_(&i__1, &e[1], &c__1, &scale, &sum); + sum *= 2; + } + slassq_(n, &d__[1], &c__1, &scale, &sum); + anorm = scale * sqrt(sum); + } + + ret_val = anorm; + return ret_val; + +/* End of SLANST */ + +} /* slanst_ */ + +doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda, + real *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real ret_val, r__1, r__2, r__3; + + /* Local variables */ + static integer i__, j; + static real sum, absa, scale; + extern logical lsame_(char *, char *); + static real value; + extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, + real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLANSY returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real symmetric matrix A. + + Description + =========== + + SLANSY returns the value + + SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in SLANSY as described + above. + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is to be referenced. + = 'U': Upper triangular part of A is referenced + = 'L': Lower triangular part of A is referenced + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, SLANSY is + set to zero. + + A (input) REAL array, dimension (LDA,N) + The symmetric matrix A. If UPLO = 'U', the leading n by n + upper triangular part of A contains the upper triangular part + of the matrix A, and the strictly lower triangular part of A + is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of A contains the lower triangular part of + the matrix A, and the strictly upper triangular part of A is + not referenced. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) REAL array, dimension (MAX(1,LWORK)), + where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, + WORK is not referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs( + r__1)); + value = dmax(r__2,r__3); +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs( + r__1)); + value = dmax(r__2,r__3); +/* L30: */ + } +/* L40: */ + } + } + } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is symmetric). */ + + value = 0.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.f; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1)); + sum += absa; + work[i__] += absa; +/* L50: */ + } + work[j] = sum + (r__1 = a[j + j * a_dim1], dabs(r__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = work[i__]; + value = dmax(r__1,r__2); +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.f; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = work[j] + (r__1 = a[j + j * a_dim1], dabs(r__1)); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1)); + sum += absa; + work[i__] += absa; +/* L90: */ + } + value = dmax(value,sum); +/* L100: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + slassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); +/* L120: */ + } + } + sum *= 2; + i__1 = *lda + 1; + slassq_(n, &a[a_offset], &i__1, &scale, &sum); + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of SLANSY */ + +} /* slansy_ */ + +/* Subroutine */ int slanv2_(real *a, real *b, real *c__, real *d__, real * + rt1r, real *rt1i, real *rt2r, real *rt2i, real *cs, real *sn) +{ + /* System generated locals */ + real r__1, r__2; + + /* Local variables */ + static real p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, temp, + scale, bcmax, bcmis, sigma; + extern doublereal slapy2_(real *, real *), slamch_(char *); + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric + matrix in standard form: + + [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] + [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] + + where either + 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or + 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex + conjugate eigenvalues. + + Arguments + ========= + + A (input/output) REAL + B (input/output) REAL + C (input/output) REAL + D (input/output) REAL + On entry, the elements of the input matrix. + On exit, they are overwritten by the elements of the + standardised Schur form. + + RT1R (output) REAL + RT1I (output) REAL + RT2R (output) REAL + RT2I (output) REAL + The real and imaginary parts of the eigenvalues. If the + eigenvalues are a complex conjugate pair, RT1I > 0. + + CS (output) REAL + SN (output) REAL + Parameters of the rotation matrix. + + Further Details + =============== + + Modified by V. Sima, Research Institute for Informatics, Bucharest, + Romania, to reduce the risk of cancellation errors, + when computing real eigenvalues, and to ensure, if possible, that + abs(RT1R) >= abs(RT2R). + + ===================================================================== +*/ + + + eps = slamch_("P"); + if (*c__ == 0.f) { + *cs = 1.f; + *sn = 0.f; + goto L10; + + } else if (*b == 0.f) { + +/* Swap rows and columns */ + + *cs = 0.f; + *sn = 1.f; + temp = *d__; + *d__ = *a; + *a = temp; + *b = -(*c__); + *c__ = 0.f; + goto L10; + } else if (*a - *d__ == 0.f && r_sign(&c_b15, b) != r_sign(&c_b15, c__)) { + *cs = 1.f; + *sn = 0.f; + goto L10; + } else { + + temp = *a - *d__; + p = temp * .5f; +/* Computing MAX */ + r__1 = dabs(*b), r__2 = dabs(*c__); + bcmax = dmax(r__1,r__2); +/* Computing MIN */ + r__1 = dabs(*b), r__2 = dabs(*c__); + bcmis = dmin(r__1,r__2) * r_sign(&c_b15, b) * r_sign(&c_b15, c__); +/* Computing MAX */ + r__1 = dabs(p); + scale = dmax(r__1,bcmax); + z__ = p / scale * p + bcmax / scale * bcmis; + +/* + If Z is of the order of the machine accuracy, postpone the + decision on the nature of eigenvalues +*/ + + if (z__ >= eps * 4.f) { + +/* Real eigenvalues. Compute A and D. */ + + r__1 = sqrt(scale) * sqrt(z__); + z__ = p + r_sign(&r__1, &p); + *a = *d__ + z__; + *d__ -= bcmax / z__ * bcmis; + +/* Compute B and the rotation matrix */ + + tau = slapy2_(c__, &z__); + *cs = z__ / tau; + *sn = *c__ / tau; + *b -= *c__; + *c__ = 0.f; + } else { + +/* + Complex eigenvalues, or real (almost) equal eigenvalues. + Make diagonal elements equal. +*/ + + sigma = *b + *c__; + tau = slapy2_(&sigma, &temp); + *cs = sqrt((dabs(sigma) / tau + 1.f) * .5f); + *sn = -(p / (tau * *cs)) * r_sign(&c_b15, &sigma); + +/* + Compute [ AA BB ] = [ A B ] [ CS -SN ] + [ CC DD ] [ C D ] [ SN CS ] +*/ + + aa = *a * *cs + *b * *sn; + bb = -(*a) * *sn + *b * *cs; + cc = *c__ * *cs + *d__ * *sn; + dd = -(*c__) * *sn + *d__ * *cs; + +/* + Compute [ A B ] = [ CS SN ] [ AA BB ] + [ C D ] [-SN CS ] [ CC DD ] +*/ + + *a = aa * *cs + cc * *sn; + *b = bb * *cs + dd * *sn; + *c__ = -aa * *sn + cc * *cs; + *d__ = -bb * *sn + dd * *cs; + + temp = (*a + *d__) * .5f; + *a = temp; + *d__ = temp; + + if (*c__ != 0.f) { + if (*b != 0.f) { + if (r_sign(&c_b15, b) == r_sign(&c_b15, c__)) { + +/* Real eigenvalues: reduce to upper triangular form */ + + sab = sqrt((dabs(*b))); + sac = sqrt((dabs(*c__))); + r__1 = sab * sac; + p = r_sign(&r__1, c__); + tau = 1.f / sqrt((r__1 = *b + *c__, dabs(r__1))); + *a = temp + p; + *d__ = temp - p; + *b -= *c__; + *c__ = 0.f; + cs1 = sab * tau; + sn1 = sac * tau; + temp = *cs * cs1 - *sn * sn1; + *sn = *cs * sn1 + *sn * cs1; + *cs = temp; + } + } else { + *b = -(*c__); + *c__ = 0.f; + temp = *cs; + *cs = -(*sn); + *sn = temp; + } + } + } + + } + +L10: + +/* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */ + + *rt1r = *a; + *rt2r = *d__; + if (*c__ == 0.f) { + *rt1i = 0.f; + *rt2i = 0.f; + } else { + *rt1i = sqrt((dabs(*b))) * sqrt((dabs(*c__))); + *rt2i = -(*rt1i); + } + return 0; + +/* End of SLANV2 */ + +} /* slanv2_ */ + +doublereal slapy2_(real *x, real *y) +{ + /* System generated locals */ + real ret_val, r__1; + + /* Local variables */ + static real w, z__, xabs, yabs; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary + overflow. + + Arguments + ========= + + X (input) REAL + Y (input) REAL + X and Y specify the values x and y. + + ===================================================================== +*/ + + + xabs = dabs(*x); + yabs = dabs(*y); + w = dmax(xabs,yabs); + z__ = dmin(xabs,yabs); + if (z__ == 0.f) { + ret_val = w; + } else { +/* Computing 2nd power */ + r__1 = z__ / w; + ret_val = w * sqrt(r__1 * r__1 + 1.f); + } + return ret_val; + +/* End of SLAPY2 */ + +} /* slapy2_ */ + +doublereal slapy3_(real *x, real *y, real *z__) +{ + /* System generated locals */ + real ret_val, r__1, r__2, r__3; + + /* Local variables */ + static real w, xabs, yabs, zabs; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause + unnecessary overflow. + + Arguments + ========= + + X (input) REAL + Y (input) REAL + Z (input) REAL + X, Y and Z specify the values x, y and z. + + ===================================================================== +*/ + + + xabs = dabs(*x); + yabs = dabs(*y); + zabs = dabs(*z__); +/* Computing MAX */ + r__1 = max(xabs,yabs); + w = dmax(r__1,zabs); + if (w == 0.f) { +/* + W can be zero for max(0,nan,0) + adding all three entries together will make sure + NaN will not disappear. +*/ + ret_val = xabs + yabs + zabs; + } else { +/* Computing 2nd power */ + r__1 = xabs / w; +/* Computing 2nd power */ + r__2 = yabs / w; +/* Computing 2nd power */ + r__3 = zabs / w; + ret_val = w * sqrt(r__1 * r__1 + r__2 * r__2 + r__3 * r__3); + } + return ret_val; + +/* End of SLAPY3 */ + +} /* slapy3_ */ + +/* Subroutine */ int slaqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * + wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + static integer i__, k; + static real aa, bb, cc, dd; + static integer ld; + static real cs; + static integer nh, it, ks, kt; + static real sn; + static integer ku, kv, ls, ns; + static real ss; + static integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, + kbot, nmin; + static real swap; + static integer ktop; + static real zdum[1] /* was [1][1] */; + static integer kacc22, itmax, nsmax, nwmax, kwtop; + extern /* Subroutine */ int slanv2_(real *, real *, real *, real *, real * + , real *, real *, real *, real *, real *), slaqr3_(logical *, + logical *, integer *, integer *, integer *, integer *, real *, + integer *, integer *, integer *, real *, integer *, integer *, + integer *, real *, real *, real *, integer *, integer *, real *, + integer *, integer *, real *, integer *, real *, integer *), + slaqr4_(logical *, logical *, integer *, integer *, integer *, + real *, integer *, real *, real *, integer *, integer *, real *, + integer *, real *, integer *, integer *), slaqr5_(logical *, + logical *, integer *, integer *, integer *, integer *, integer *, + real *, real *, real *, integer *, integer *, integer *, real *, + integer *, real *, integer *, real *, integer *, integer *, real * + , integer *, integer *, real *, integer *); + static integer nibble; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static char jbcmpz[2]; + extern /* Subroutine */ int slahqr_(logical *, logical *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , integer *, real *, integer *, integer *), slacpy_(char *, + integer *, integer *, real *, integer *, real *, integer *); + static integer nwupbd; + static logical sorted; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Purpose + ======= + + SLAQR0 computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**T, where T is an upper quasi-triangular matrix (the + Schur form), and Z is the orthogonal matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input orthogonal + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, + H(ILO,ILO-1) is zero. ILO and IHI are normally set by a + previous call to SGEBAL, and then passed to SGEHRD when the + matrix output by SGEBAL is reduced to Hessenberg form. + Otherwise, ILO and IHI should be set to 1 and N, + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) REAL array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and WANTT is .TRUE., then H contains + the upper quasi-triangular matrix T from the Schur + decomposition (the Schur form); 2-by-2 diagonal blocks + (corresponding to complex conjugate pairs of eigenvalues) + are returned in standard form, with H(i,i) = H(i+1,i+1) + and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is + .FALSE., then the contents of H are unspecified on exit. + (The output value of H when INFO.GT.0 is given under the + description of INFO below.) + + This subroutine may explicitly set H(i,j) = 0 for i.GT.j and + j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + WR (output) REAL array, dimension (IHI) + WI (output) REAL array, dimension (IHI) + The real and imaginary parts, respectively, of the computed + eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) + and WI(ILO:IHI). If two eigenvalues are computed as a + complex conjugate pair, they are stored in consecutive + elements of WR and WI, say the i-th and (i+1)th, with + WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then + the eigenvalues are stored in the same order as on the + diagonal of the Schur form returned in H, with + WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal + block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and + WI(i+1) = -WI(i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. + + Z (input/output) REAL array, dimension (LDZ,IHI) + If WANTZ is .FALSE., then Z is not referenced. + If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is + replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the + orthogonal Schur factor of H(ILO:IHI,ILO:IHI). + (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if WANTZ is .TRUE. + then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. + + WORK (workspace/output) REAL array, dimension LWORK + On exit, if LWORK = -1, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient, but LWORK typically as large as 6*N may + be required for optimal performance. A workspace query + to determine the optimal workspace size is recommended. + + If LWORK = -1, then SLAQR0 does a workspace query. + In this case, SLAQR0 checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: if INFO = i, SLAQR0 failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and WANT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is an orthogonal matrix. The final + value of H is upper Hessenberg and quasi-triangular + in rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + + (final value of Z(ILO:IHI,ILOZ:IHIZ) + = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U + + where U is the orthogonal matrix in (*) (regard- + less of the value of WANTT.) + + If INFO .GT. 0 and WANTZ is .FALSE., then Z is not + accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . SLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== Exceptional deflation windows: try to cure rare + . slow convergence by varying the size of the + . deflation window after KEXNW iterations. ==== + + ==== Exceptional shifts: try to cure rare slow convergence + . with ad-hoc exceptional shifts every KEXSH iterations. + . ==== + + ==== The constants WILK1 and WILK2 are used to form the + . exceptional shifts. ==== +*/ + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + +/* ==== Quick return for N = 0: nothing to do. ==== */ + + if (*n == 0) { + work[1] = 1.f; + return 0; + } + + if (*n <= 11) { + +/* ==== Tiny matrices must use SLAHQR. ==== */ + + lwkopt = 1; + if (*lwork != -1) { + slahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], & + wi[1], iloz, ihiz, &z__[z_offset], ldz, info); + } + } else { + +/* + ==== Use small bulge multi-shift QR with aggressive early + . deflation on larger-than-tiny matrices. ==== + + ==== Hope for the best. ==== +*/ + + *info = 0; + +/* ==== Set up job flags for ILAENV. ==== */ + + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + +/* + ==== NWR = recommended deflation window size. At this + . point, N .GT. NTINY = 11, so there is enough + . subdiagonal workspace for NWR.GE.2 as required. + . (In fact, there is enough subdiagonal space for + . NWR.GE.3.) ==== +*/ + + nwr = ilaenv_(&c__13, "SLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nwr = max(2,nwr); +/* Computing MIN */ + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); + nwr = min(i__1,nwr); + +/* + ==== NSR = recommended number of simultaneous shifts. + . At this point N .GT. NTINY = 11, so there is at + . enough subdiagonal workspace for NSR to be even + . and greater than or equal to two as required. ==== +*/ + + nsr = ilaenv_(&c__15, "SLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); +/* Computing MIN */ + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - + *ilo; + nsr = min(i__1,i__2); +/* Computing MAX */ + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1,i__2); + +/* + ==== Estimate optimal workspace ==== + + ==== Workspace query call to SLAQR3 ==== +*/ + + i__1 = nwr + 1; + slaqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, + ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[ + h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], + ldh, &work[1], &c_n1); + +/* + ==== Optimal workspace = MAX(SLAQR5, SLAQR3) ==== + + Computing MAX +*/ + i__1 = nsr * 3 / 2, i__2 = (integer) work[1]; + lwkopt = max(i__1,i__2); + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (real) lwkopt; + return 0; + } + +/* ==== SLAHQR/SLAQR0 crossover point ==== */ + + nmin = ilaenv_(&c__12, "SLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen) + 6, (ftnlen)2); + nmin = max(11,nmin); + +/* ==== Nibble crossover point ==== */ + + nibble = ilaenv_(&c__14, "SLAQR0", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + nibble = max(0,nibble); + +/* + ==== Accumulate reflections during ttswp? Use block + . 2-by-2 structure during matrix-matrix multiply? ==== +*/ + + kacc22 = ilaenv_(&c__16, "SLAQR0", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + kacc22 = max(0,kacc22); + kacc22 = min(2,kacc22); + +/* + ==== NWMAX = the largest possible deflation window for + . which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1,i__2); + nw = nwmax; + +/* + ==== NSMAX = the Largest number of simultaneous shifts + . for which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1,i__2); + nsmax -= nsmax % 2; + +/* ==== NDFL: an iteration count restarted at deflation. ==== */ + + ndfl = 1; + +/* + ==== ITMAX = iteration limit ==== + + Computing MAX +*/ + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1,i__2) * 30; + +/* ==== Last row and column in the active block ==== */ + + kbot = *ihi; + +/* ==== Main Loop ==== */ + + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + +/* ==== Done when KBOT falls below ILO ==== */ + + if (kbot < *ilo) { + goto L90; + } + +/* ==== Locate active block ==== */ + + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.f) { + goto L20; + } +/* L10: */ + } + k = *ilo; +L20: + ktop = k; + +/* + ==== Select deflation window size: + . Typical Case: + . If possible and advisable, nibble the entire + . active block. If not, use size MIN(NWR,NWMAX) + . or MIN(NWR+1,NWMAX) depending upon which has + . the smaller corresponding subdiagonal entry + . (a heuristic). + . + . Exceptional Case: + . If there have been no deflations in KEXNW or + . more iterations, then vary the deflation window + . size. At first, because, larger windows are, + . in general, more powerful than smaller ones, + . rapidly increase the window to the maximum possible. + . Then, gradually reduce the window size. ==== +*/ + + nh = kbot - ktop + 1; + nwupbd = min(nh,nwmax); + if (ndfl < 5) { + nw = min(nwupbd,nwr); + } else { +/* Computing MIN */ + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2,i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((r__1 = h__[kwtop + (kwtop - 1) * h_dim1], dabs(r__1)) + > (r__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], + dabs(r__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + +/* + ==== Aggressive early deflation: + . split workspace under the subdiagonal into + . - an nw-by-nw work array V in the lower + . left-hand-corner, + . - an NW-by-at-least-NW-but-more-is-better + . (NW-by-NHO) horizontal work array along + . the bottom edge, + . - an at-least-NW-but-more-is-better (NHV-by-NW) + . vertical work array along the left-hand-edge. + . ==== +*/ + + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + +/* ==== Aggressive early deflation ==== */ + + slaqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, + iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], + &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + +/* ==== Adjust KBOT accounting for new deflations. ==== */ + + kbot -= ld; + +/* ==== KS points to the shifts. ==== */ + + ks = kbot - ls + 1; + +/* + ==== Skip an expensive QR sweep if there is a (partly + . heuristic) reason to expect that many eigenvalues + . will deflate without it. Here, the QR sweep is + . skipped if many eigenvalues have just been deflated + . or if the remaining active block is small. +*/ + + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( + nmin,nwmax)) { + +/* + ==== NS = nominal number of simultaneous shifts. + . This may be lowered (slightly) if SLAQR3 + . did not provide that many shifts. ==== + + Computing MIN + Computing MAX +*/ + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); + ns = min(i__2,i__3); + ns -= ns % 2; + +/* + ==== If there have been no deflations + . in a multiple of KEXSH iterations, + . then try exceptional shifts. + . Otherwise use shifts provided by + . SLAQR3 above or from the eigenvalues + . of a trailing principal submatrix. ==== +*/ + + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; +/* Computing MAX */ + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3,i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1) + ) + (r__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], + dabs(r__2)); + aa = ss * .75f + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375f; + dd = aa; + slanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1] + , &wr[i__], &wi[i__], &cs, &sn); +/* L30: */ + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.f; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + +/* + ==== Got NS/2 or fewer shifts? Use SLAQR4 or + . SLAHQR on a trailing principal submatrix to + . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, + . there is enough space below the subdiagonal + . to fit an NS-by-NS scratch array.) ==== +*/ + + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + slacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & + h__[kt + h_dim1], ldh); + if (ns > nmin) { + slaqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ + kt + h_dim1], ldh, &wr[ks], &wi[ks], & + c__1, &c__1, zdum, &c__1, &work[1], lwork, + &inf); + } else { + slahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ + kt + h_dim1], ldh, &wr[ks], &wi[ks], & + c__1, &c__1, zdum, &c__1, &inf); + } + ks += inf; + +/* + ==== In case of a rare QR failure use + . eigenvalues of the trailing 2-by-2 + . principal submatrix. ==== +*/ + + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + slanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[ + kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn) + ; + ks = kbot - 1; + } + } + + if (kbot - ks + 1 > ns) { + +/* + ==== Sort the shifts (Helps a little) + . Bubble sort keeps complex conjugate + . pairs together. ==== +*/ + + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((r__1 = wr[i__], dabs(r__1)) + (r__2 = wi[ + i__], dabs(r__2)) < (r__3 = wr[i__ + + 1], dabs(r__3)) + (r__4 = wi[i__ + 1], + dabs(r__4))) { + sorted = FALSE_; + + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } +/* L40: */ + } +/* L50: */ + } +L60: + ; + } + +/* + ==== Shuffle shifts into pairs of real shifts + . and pairs of complex conjugate shifts + . assuming complex conjugate shifts are + . already adjacent to one another. (Yes, + . they are.) ==== +*/ + + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } +/* L70: */ + } + } + +/* + ==== If there are only two shifts and both are + . real, then use only one. ==== +*/ + + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.f) { + if ((r__1 = wr[kbot] - h__[kbot + kbot * h_dim1], + dabs(r__1)) < (r__2 = wr[kbot - 1] - h__[kbot + + kbot * h_dim1], dabs(r__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + +/* + ==== Use up to NS of the the smallest magnatiude + . shifts. If there aren't NS shifts available, + . then use them all, possibly dropping one to + . make the number of shifts even. ==== + + Computing MIN +*/ + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2,i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + +/* + ==== Small-bulge multi-shift QR sweep: + . split workspace under the subdiagonal into + . - a KDU-by-KDU work array U in the lower + . left-hand-corner, + . - a KDU-by-at-least-KDU-but-more-is-better + . (KDU-by-NHo) horizontal work array WH along + . the bottom edge, + . - and an at-least-KDU-but-more-is-better-by-KDU + . (NVE-by-KDU) vertical work WV arrow along + . the left-hand-edge. ==== +*/ + + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + +/* ==== Small-bulge multi-shift QR sweep ==== */ + + slaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], + &wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[ + z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku + + kwh * h_dim1], ldh); + } + +/* ==== Note progress (or the lack of it). ==== */ + + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + +/* + ==== End of main loop ==== + L80: +*/ + } + +/* + ==== Iteration limit exceeded. Set INFO to show where + . the problem occurred and exit. ==== +*/ + + *info = kbot; +L90: + ; + } + +/* ==== Return the optimal value of LWORK. ==== */ + + work[1] = (real) lwkopt; + +/* ==== End of SLAQR0 ==== */ + + return 0; +} /* slaqr0_ */ + +/* Subroutine */ int slaqr1_(integer *n, real *h__, integer *ldh, real *sr1, + real *si1, real *sr2, real *si2, real *v) +{ + /* System generated locals */ + integer h_dim1, h_offset; + real r__1, r__2, r__3; + + /* Local variables */ + static real s, h21s, h31s; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Given a 2-by-2 or 3-by-3 matrix H, SLAQR1 sets v to a + scalar multiple of the first column of the product + + (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) + + scaling to avoid overflows and most underflows. It + is assumed that either + + 1) sr1 = sr2 and si1 = -si2 + or + 2) si1 = si2 = 0. + + This is useful for starting double implicit shift bulges + in the QR algorithm. + + + N (input) integer + Order of the matrix H. N must be either 2 or 3. + + H (input) REAL array of dimension (LDH,N) + The 2-by-2 or 3-by-3 matrix H in (*). + + LDH (input) integer + The leading dimension of H as declared in + the calling procedure. LDH.GE.N + + SR1 (input) REAL + SI1 The shifts in (*). + SR2 + SI2 + + V (output) REAL array of dimension N + A scalar multiple of the first column of the + matrix K in (*). + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --v; + + /* Function Body */ + if (*n == 2) { + s = (r__1 = h__[h_dim1 + 1] - *sr2, dabs(r__1)) + dabs(*si2) + (r__2 = + h__[h_dim1 + 2], dabs(r__2)); + if (s == 0.f) { + v[1] = 0.f; + v[2] = 0.f; + } else { + h21s = h__[h_dim1 + 2] / s; + v[1] = h21s * h__[(h_dim1 << 1) + 1] + (h__[h_dim1 + 1] - *sr1) * + ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s); + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - * + sr2); + } + } else { + s = (r__1 = h__[h_dim1 + 1] - *sr2, dabs(r__1)) + dabs(*si2) + (r__2 = + h__[h_dim1 + 2], dabs(r__2)) + (r__3 = h__[h_dim1 + 3], dabs( + r__3)); + if (s == 0.f) { + v[1] = 0.f; + v[2] = 0.f; + v[3] = 0.f; + } else { + h21s = h__[h_dim1 + 2] / s; + h31s = h__[h_dim1 + 3] / s; + v[1] = (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s) + - *si1 * (*si2 / s) + h__[(h_dim1 << 1) + 1] * h21s + h__[ + h_dim1 * 3 + 1] * h31s; + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - * + sr2) + h__[h_dim1 * 3 + 2] * h31s; + v[3] = h31s * (h__[h_dim1 + 1] + h__[h_dim1 * 3 + 3] - *sr1 - * + sr2) + h21s * h__[(h_dim1 << 1) + 3]; + } + } + return 0; +} /* slaqr1_ */ + +/* Subroutine */ int slaqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh, + integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns, + integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh, + real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real * + work, integer *lwork) +{ + /* System generated locals */ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3, r__4, r__5, r__6; + + /* Local variables */ + static integer i__, j, k; + static real s, aa, bb, cc, dd, cs, sn; + static integer jw; + static real evi, evk, foo; + static integer kln; + static real tau, ulp; + static integer lwk1, lwk2; + static real beta; + static integer kend, kcol, info, ifst, ilst, ltop, krow; + static logical bulge; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), sgemm_( + char *, char *, integer *, integer *, integer *, real *, real *, + integer *, real *, integer *, real *, real *, integer *); + static integer infqr; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + static integer kwtop; + extern /* Subroutine */ int slanv2_(real *, real *, real *, real *, real * + , real *, real *, real *, real *, real *), slabad_(real *, real *) + ; + extern doublereal slamch_(char *); + extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + static real safmin; + extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, + real *); + static real safmax; + extern /* Subroutine */ int slahqr_(logical *, logical *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , integer *, real *, integer *, integer *), slacpy_(char *, + integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *); + static logical sorted; + extern /* Subroutine */ int strexc_(char *, integer *, real *, integer *, + real *, integer *, integer *, integer *, real *, integer *), sormhr_(char *, char *, integer *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + static real smlnum; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2.1) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + -- April 2009 -- + + + This subroutine is identical to SLAQR3 except that it avoids + recursion by calling SLAHQR instead of SLAQR4. + + + ****************************************************************** + Aggressive early deflation: + + This subroutine accepts as input an upper Hessenberg matrix + H and performs an orthogonal similarity transformation + designed to detect and deflate fully converged eigenvalues from + a trailing principal submatrix. On output H has been over- + written by a new Hessenberg matrix that is a perturbation of + an orthogonal similarity transformation of H. It is to be + hoped that the final version of H has many zero subdiagonal + entries. + + ****************************************************************** + WANTT (input) LOGICAL + If .TRUE., then the Hessenberg matrix H is fully updated + so that the quasi-triangular Schur factor may be + computed (in cooperation with the calling subroutine). + If .FALSE., then only enough of H is updated to preserve + the eigenvalues. + + WANTZ (input) LOGICAL + If .TRUE., then the orthogonal matrix Z is updated so + so that the orthogonal Schur factor may be computed + (in cooperation with the calling subroutine). + If .FALSE., then Z is not referenced. + + N (input) INTEGER + The order of the matrix H and (if WANTZ is .TRUE.) the + order of the orthogonal matrix Z. + + KTOP (input) INTEGER + It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. + KBOT and KTOP together determine an isolated block + along the diagonal of the Hessenberg matrix. + + KBOT (input) INTEGER + It is assumed without a check that either + KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together + determine an isolated block along the diagonal of the + Hessenberg matrix. + + NW (input) INTEGER + Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). + + H (input/output) REAL array, dimension (LDH,N) + On input the initial N-by-N section of H stores the + Hessenberg matrix undergoing aggressive early deflation. + On output H has been transformed by an orthogonal + similarity transformation, perturbed, and the returned + to Hessenberg form that (it is to be hoped) has some + zero subdiagonal entries. + + LDH (input) integer + Leading dimension of H just as declared in the calling + subroutine. N .LE. LDH + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. + + Z (input/output) REAL array, dimension (LDZ,N) + IF WANTZ is .TRUE., then on output, the orthogonal + similarity transformation mentioned above has been + accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ is .FALSE., then Z is unreferenced. + + LDZ (input) integer + The leading dimension of Z just as declared in the + calling subroutine. 1 .LE. LDZ. + + NS (output) integer + The number of unconverged (ie approximate) eigenvalues + returned in SR and SI that may be used as shifts by the + calling subroutine. + + ND (output) integer + The number of converged eigenvalues uncovered by this + subroutine. + + SR (output) REAL array, dimension KBOT + SI (output) REAL array, dimension KBOT + On output, the real and imaginary parts of approximate + eigenvalues that may be used for shifts are stored in + SR(KBOT-ND-NS+1) through SR(KBOT-ND) and + SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. + The real and imaginary parts of converged eigenvalues + are stored in SR(KBOT-ND+1) through SR(KBOT) and + SI(KBOT-ND+1) through SI(KBOT), respectively. + + V (workspace) REAL array, dimension (LDV,NW) + An NW-by-NW work array. + + LDV (input) integer scalar + The leading dimension of V just as declared in the + calling subroutine. NW .LE. LDV + + NH (input) integer scalar + The number of columns of T. NH.GE.NW. + + T (workspace) REAL array, dimension (LDT,NW) + + LDT (input) integer + The leading dimension of T just as declared in the + calling subroutine. NW .LE. LDT + + NV (input) integer + The number of rows of work array WV available for + workspace. NV.GE.NW. + + WV (workspace) REAL array, dimension (LDWV,NW) + + LDWV (input) integer + The leading dimension of W just as declared in the + calling subroutine. NW .LE. LDV + + WORK (workspace) REAL array, dimension LWORK. + On exit, WORK(1) is set to an estimate of the optimal value + of LWORK for the given values of N, NW, KTOP and KBOT. + + LWORK (input) integer + The dimension of the work array WORK. LWORK = 2*NW + suffices, but greater efficiency may result from larger + values of LWORK. + + If LWORK = -1, then a workspace query is assumed; SLAQR2 + only estimates the optimal workspace size for the given + values of N, NW, KTOP and KBOT. The estimate is returned + in WORK(1). No error message related to LWORK is issued + by XERBLA. Neither H nor Z are accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + + ==== Estimate optimal workspace. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + + /* Function Body */ +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + +/* ==== Workspace query call to SGEHRD ==== */ + + i__1 = jw - 1; + sgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & + c_n1, &info); + lwk1 = (integer) work[1]; + +/* ==== Workspace query call to SORMHR ==== */ + + i__1 = jw - 1; + sormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[1], &c_n1, &info); + lwk2 = (integer) work[1]; + +/* ==== Optimal workspace ==== */ + + lwkopt = jw + max(lwk1,lwk2); + } + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (real) lwkopt; + return 0; + } + +/* + ==== Nothing to do ... + ... for an empty active block ... ==== +*/ + *ns = 0; + *nd = 0; + work[1] = 1.f; + if (*ktop > *kbot) { + return 0; + } +/* ... nor for an empty deflation window. ==== */ + if (*nw < 1) { + return 0; + } + +/* ==== Machine constants ==== */ + + safmin = slamch_("SAFE MINIMUM"); + safmax = 1.f / safmin; + slabad_(&safmin, &safmax); + ulp = slamch_("PRECISION"); + smlnum = safmin * ((real) (*n) / ulp); + +/* + ==== Setup deflation window ==== + + Computing MIN +*/ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.f; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + + if (*kbot == kwtop) { + +/* ==== 1-by-1 deflation window: not much to do ==== */ + + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.f; + *ns = 1; + *nd = 0; +/* Computing MAX */ + r__2 = smlnum, r__3 = ulp * (r__1 = h__[kwtop + kwtop * h_dim1], dabs( + r__1)); + if (dabs(s) <= dmax(r__2,r__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.f; + } + } + work[1] = 1.f; + return 0; + } + +/* + ==== Convert to spike-triangular form. (In case of a + . rare QR failure, this routine continues to do + . aggressive early deflation using that part of + . the deflation window that converged using INFQR + . here and there to keep track.) ==== +*/ + + slacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], + ldt); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + scopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & + i__3); + + slaset_("A", &jw, &jw, &c_b29, &c_b15, &v[v_offset], ldv); + slahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], + &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr); + +/* ==== STREXC needs a clean margin near the diagonal ==== */ + + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.f; + t[j + 3 + j * t_dim1] = 0.f; +/* L10: */ + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.f; + } + +/* ==== Deflation detection loop ==== */ + + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.f; + } + +/* ==== Small spike tip test for deflation ==== */ + + if (! bulge) { + +/* ==== Real eigenvalue ==== */ + + foo = (r__1 = t[*ns + *ns * t_dim1], dabs(r__1)); + if (foo == 0.f) { + foo = dabs(s); + } +/* Computing MAX */ + r__2 = smlnum, r__3 = ulp * foo; + if ((r__1 = s * v[*ns * v_dim1 + 1], dabs(r__1)) <= dmax(r__2, + r__3)) { + +/* ==== Deflatable ==== */ + + --(*ns); + } else { + +/* + ==== Undeflatable. Move it up out of the way. + . (STREXC can not fail in this case.) ==== +*/ + + ifst = *ns; + strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ++ilst; + } + } else { + +/* ==== Complex conjugate pair ==== */ + + foo = (r__3 = t[*ns + *ns * t_dim1], dabs(r__3)) + sqrt((r__1 = t[ + *ns + (*ns - 1) * t_dim1], dabs(r__1))) * sqrt((r__2 = t[* + ns - 1 + *ns * t_dim1], dabs(r__2))); + if (foo == 0.f) { + foo = dabs(s); + } +/* Computing MAX */ + r__3 = (r__1 = s * v[*ns * v_dim1 + 1], dabs(r__1)), r__4 = (r__2 + = s * v[(*ns - 1) * v_dim1 + 1], dabs(r__2)); +/* Computing MAX */ + r__5 = smlnum, r__6 = ulp * foo; + if (dmax(r__3,r__4) <= dmax(r__5,r__6)) { + +/* ==== Deflatable ==== */ + + *ns += -2; + } else { + +/* + ==== Undeflatable. Move them up out of the way. + . Fortunately, STREXC does the right thing with + . ILST in case of a rare exchange failure. ==== +*/ + + ifst = *ns; + strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ilst += 2; + } + } + +/* ==== End deflation detection loop ==== */ + + goto L20; + } + +/* ==== Return to Hessenberg form ==== */ + + if (*ns == 0) { + s = 0.f; + } + + if (*ns < jw) { + +/* + ==== sorting diagonal blocks of T improves accuracy for + . graded matrices. Bubble sort deals well with + . exchange failures. ==== +*/ + + sorted = FALSE_; + i__ = *ns + 1; +L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.f) { + k = i__ + 1; + } else { + k = i__ + 2; + } +L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (r__1 = t[i__ + i__ * t_dim1], dabs(r__1)); + } else { + evi = (r__3 = t[i__ + i__ * t_dim1], dabs(r__3)) + sqrt((r__1 + = t[i__ + 1 + i__ * t_dim1], dabs(r__1))) * sqrt(( + r__2 = t[i__ + (i__ + 1) * t_dim1], dabs(r__2))); + } + + if (k == kend) { + evk = (r__1 = t[k + k * t_dim1], dabs(r__1)); + } else if (t[k + 1 + k * t_dim1] == 0.f) { + evk = (r__1 = t[k + k * t_dim1], dabs(r__1)); + } else { + evk = (r__3 = t[k + k * t_dim1], dabs(r__3)) + sqrt((r__1 = t[ + k + 1 + k * t_dim1], dabs(r__1))) * sqrt((r__2 = t[k + + (k + 1) * t_dim1], dabs(r__2))); + } + + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.f) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; +L50: + ; + } + +/* ==== Restore shift/eigenvalue array from T ==== */ + + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.f; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.f) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.f; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + slanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ + - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, & + sn); + i__ += -2; + } + goto L60; + } + + if (*ns < jw || s == 0.f) { + if (*ns > 1 && s != 0.f) { + +/* ==== Reflect spike back into lower triangle ==== */ + + scopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + slarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.f; + + i__1 = jw - 2; + i__2 = jw - 2; + slaset_("L", &i__1, &i__2, &c_b29, &c_b29, &t[t_dim1 + 3], ldt); + + slarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + slarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + slarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & + work[jw + 1]); + + i__1 = *lwork - jw; + sgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] + , &i__1, &info); + } + +/* ==== Copy updated reduced window into place ==== */ + + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + slacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] + , ldh); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + scopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], + &i__3); + +/* + ==== Accumulate orthogonal matrix in order update + . H and Z, if requested. ==== +*/ + + if (*ns > 1 && s != 0.f) { + i__1 = *lwork - jw; + sormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[jw + 1], &i__1, &info); + } + +/* ==== Update vertical slab in H ==== */ + + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3,i__4); + sgemm_("N", "N", &kln, &jw, &jw, &c_b15, &h__[krow + kwtop * + h_dim1], ldh, &v[v_offset], ldv, &c_b29, &wv[wv_offset], + ldwv); + slacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * + h_dim1], ldh); +/* L70: */ + } + +/* ==== Update horizontal slab in H ==== */ + + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; + kcol += i__1) { +/* Computing MIN */ + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3,i__4); + sgemm_("C", "N", &jw, &kln, &jw, &c_b15, &v[v_offset], ldv, & + h__[kwtop + kcol * h_dim1], ldh, &c_b29, &t[t_offset], + ldt); + slacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * + h_dim1], ldh); +/* L80: */ + } + } + +/* ==== Update vertical slab in Z ==== */ + + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3,i__4); + sgemm_("N", "N", &kln, &jw, &jw, &c_b15, &z__[krow + kwtop * + z_dim1], ldz, &v[v_offset], ldv, &c_b29, &wv[ + wv_offset], ldwv); + slacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + + kwtop * z_dim1], ldz); +/* L90: */ + } + } + } + +/* ==== Return the number of deflations ... ==== */ + + *nd = jw - *ns; + +/* + ==== ... and the number of shifts. (Subtracting + . INFQR from the spike length takes care + . of the case of a rare QR failure while + . calculating eigenvalues of the deflation + . window.) ==== +*/ + + *ns -= infqr; + +/* ==== Return optimal workspace. ==== */ + + work[1] = (real) lwkopt; + +/* ==== End of SLAQR2 ==== */ + + return 0; +} /* slaqr2_ */ + +/* Subroutine */ int slaqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh, + integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns, + integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh, + real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real * + work, integer *lwork) +{ + /* System generated locals */ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3, r__4, r__5, r__6; + + /* Local variables */ + static integer i__, j, k; + static real s, aa, bb, cc, dd, cs, sn; + static integer jw; + static real evi, evk, foo; + static integer kln; + static real tau, ulp; + static integer lwk1, lwk2, lwk3; + static real beta; + static integer kend, kcol, info, nmin, ifst, ilst, ltop, krow; + static logical bulge; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), sgemm_( + char *, char *, integer *, integer *, integer *, real *, real *, + integer *, real *, integer *, real *, real *, integer *); + static integer infqr; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + static integer kwtop; + extern /* Subroutine */ int slanv2_(real *, real *, real *, real *, real * + , real *, real *, real *, real *, real *), slaqr4_(logical *, + logical *, integer *, integer *, integer *, real *, integer *, + real *, real *, integer *, integer *, real *, integer *, real *, + integer *, integer *), slabad_(real *, real *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + static real safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static real safmax; + extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, + real *), slahqr_(logical *, logical *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, integer * + , real *, integer *, integer *), slacpy_(char *, integer *, + integer *, real *, integer *, real *, integer *), slaset_( + char *, integer *, integer *, real *, real *, real *, integer *); + static logical sorted; + extern /* Subroutine */ int strexc_(char *, integer *, real *, integer *, + real *, integer *, integer *, integer *, real *, integer *), sormhr_(char *, char *, integer *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + static real smlnum; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2.1) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + -- April 2009 -- + + + ****************************************************************** + Aggressive early deflation: + + This subroutine accepts as input an upper Hessenberg matrix + H and performs an orthogonal similarity transformation + designed to detect and deflate fully converged eigenvalues from + a trailing principal submatrix. On output H has been over- + written by a new Hessenberg matrix that is a perturbation of + an orthogonal similarity transformation of H. It is to be + hoped that the final version of H has many zero subdiagonal + entries. + + ****************************************************************** + WANTT (input) LOGICAL + If .TRUE., then the Hessenberg matrix H is fully updated + so that the quasi-triangular Schur factor may be + computed (in cooperation with the calling subroutine). + If .FALSE., then only enough of H is updated to preserve + the eigenvalues. + + WANTZ (input) LOGICAL + If .TRUE., then the orthogonal matrix Z is updated so + so that the orthogonal Schur factor may be computed + (in cooperation with the calling subroutine). + If .FALSE., then Z is not referenced. + + N (input) INTEGER + The order of the matrix H and (if WANTZ is .TRUE.) the + order of the orthogonal matrix Z. + + KTOP (input) INTEGER + It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. + KBOT and KTOP together determine an isolated block + along the diagonal of the Hessenberg matrix. + + KBOT (input) INTEGER + It is assumed without a check that either + KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together + determine an isolated block along the diagonal of the + Hessenberg matrix. + + NW (input) INTEGER + Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). + + H (input/output) REAL array, dimension (LDH,N) + On input the initial N-by-N section of H stores the + Hessenberg matrix undergoing aggressive early deflation. + On output H has been transformed by an orthogonal + similarity transformation, perturbed, and the returned + to Hessenberg form that (it is to be hoped) has some + zero subdiagonal entries. + + LDH (input) integer + Leading dimension of H just as declared in the calling + subroutine. N .LE. LDH + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. + + Z (input/output) REAL array, dimension (LDZ,N) + IF WANTZ is .TRUE., then on output, the orthogonal + similarity transformation mentioned above has been + accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ is .FALSE., then Z is unreferenced. + + LDZ (input) integer + The leading dimension of Z just as declared in the + calling subroutine. 1 .LE. LDZ. + + NS (output) integer + The number of unconverged (ie approximate) eigenvalues + returned in SR and SI that may be used as shifts by the + calling subroutine. + + ND (output) integer + The number of converged eigenvalues uncovered by this + subroutine. + + SR (output) REAL array, dimension KBOT + SI (output) REAL array, dimension KBOT + On output, the real and imaginary parts of approximate + eigenvalues that may be used for shifts are stored in + SR(KBOT-ND-NS+1) through SR(KBOT-ND) and + SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. + The real and imaginary parts of converged eigenvalues + are stored in SR(KBOT-ND+1) through SR(KBOT) and + SI(KBOT-ND+1) through SI(KBOT), respectively. + + V (workspace) REAL array, dimension (LDV,NW) + An NW-by-NW work array. + + LDV (input) integer scalar + The leading dimension of V just as declared in the + calling subroutine. NW .LE. LDV + + NH (input) integer scalar + The number of columns of T. NH.GE.NW. + + T (workspace) REAL array, dimension (LDT,NW) + + LDT (input) integer + The leading dimension of T just as declared in the + calling subroutine. NW .LE. LDT + + NV (input) integer + The number of rows of work array WV available for + workspace. NV.GE.NW. + + WV (workspace) REAL array, dimension (LDWV,NW) + + LDWV (input) integer + The leading dimension of W just as declared in the + calling subroutine. NW .LE. LDV + + WORK (workspace) REAL array, dimension LWORK. + On exit, WORK(1) is set to an estimate of the optimal value + of LWORK for the given values of N, NW, KTOP and KBOT. + + LWORK (input) integer + The dimension of the work array WORK. LWORK = 2*NW + suffices, but greater efficiency may result from larger + values of LWORK. + + If LWORK = -1, then a workspace query is assumed; SLAQR3 + only estimates the optimal workspace size for the given + values of N, NW, KTOP and KBOT. The estimate is returned + in WORK(1). No error message related to LWORK is issued + by XERBLA. Neither H nor Z are accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + + ==== Estimate optimal workspace. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + + /* Function Body */ +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + +/* ==== Workspace query call to SGEHRD ==== */ + + i__1 = jw - 1; + sgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & + c_n1, &info); + lwk1 = (integer) work[1]; + +/* ==== Workspace query call to SORMHR ==== */ + + i__1 = jw - 1; + sormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[1], &c_n1, &info); + lwk2 = (integer) work[1]; + +/* ==== Workspace query call to SLAQR4 ==== */ + + slaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[1], + &si[1], &c__1, &jw, &v[v_offset], ldv, &work[1], &c_n1, & + infqr); + lwk3 = (integer) work[1]; + +/* + ==== Optimal workspace ==== + + Computing MAX +*/ + i__1 = jw + max(lwk1,lwk2); + lwkopt = max(i__1,lwk3); + } + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (real) lwkopt; + return 0; + } + +/* + ==== Nothing to do ... + ... for an empty active block ... ==== +*/ + *ns = 0; + *nd = 0; + work[1] = 1.f; + if (*ktop > *kbot) { + return 0; + } +/* ... nor for an empty deflation window. ==== */ + if (*nw < 1) { + return 0; + } + +/* ==== Machine constants ==== */ + + safmin = slamch_("SAFE MINIMUM"); + safmax = 1.f / safmin; + slabad_(&safmin, &safmax); + ulp = slamch_("PRECISION"); + smlnum = safmin * ((real) (*n) / ulp); + +/* + ==== Setup deflation window ==== + + Computing MIN +*/ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.f; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + + if (*kbot == kwtop) { + +/* ==== 1-by-1 deflation window: not much to do ==== */ + + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.f; + *ns = 1; + *nd = 0; +/* Computing MAX */ + r__2 = smlnum, r__3 = ulp * (r__1 = h__[kwtop + kwtop * h_dim1], dabs( + r__1)); + if (dabs(s) <= dmax(r__2,r__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.f; + } + } + work[1] = 1.f; + return 0; + } + +/* + ==== Convert to spike-triangular form. (In case of a + . rare QR failure, this routine continues to do + . aggressive early deflation using that part of + . the deflation window that converged using INFQR + . here and there to keep track.) ==== +*/ + + slacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], + ldt); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + scopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & + i__3); + + slaset_("A", &jw, &jw, &c_b29, &c_b15, &v[v_offset], ldv); + nmin = ilaenv_(&c__12, "SLAQR3", "SV", &jw, &c__1, &jw, lwork, (ftnlen)6, + (ftnlen)2); + if (jw > nmin) { + slaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[ + kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &work[1], + lwork, &infqr); + } else { + slahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[ + kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr); + } + +/* ==== STREXC needs a clean margin near the diagonal ==== */ + + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.f; + t[j + 3 + j * t_dim1] = 0.f; +/* L10: */ + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.f; + } + +/* ==== Deflation detection loop ==== */ + + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.f; + } + +/* ==== Small spike tip test for deflation ==== */ + + if (! bulge) { + +/* ==== Real eigenvalue ==== */ + + foo = (r__1 = t[*ns + *ns * t_dim1], dabs(r__1)); + if (foo == 0.f) { + foo = dabs(s); + } +/* Computing MAX */ + r__2 = smlnum, r__3 = ulp * foo; + if ((r__1 = s * v[*ns * v_dim1 + 1], dabs(r__1)) <= dmax(r__2, + r__3)) { + +/* ==== Deflatable ==== */ + + --(*ns); + } else { + +/* + ==== Undeflatable. Move it up out of the way. + . (STREXC can not fail in this case.) ==== +*/ + + ifst = *ns; + strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ++ilst; + } + } else { + +/* ==== Complex conjugate pair ==== */ + + foo = (r__3 = t[*ns + *ns * t_dim1], dabs(r__3)) + sqrt((r__1 = t[ + *ns + (*ns - 1) * t_dim1], dabs(r__1))) * sqrt((r__2 = t[* + ns - 1 + *ns * t_dim1], dabs(r__2))); + if (foo == 0.f) { + foo = dabs(s); + } +/* Computing MAX */ + r__3 = (r__1 = s * v[*ns * v_dim1 + 1], dabs(r__1)), r__4 = (r__2 + = s * v[(*ns - 1) * v_dim1 + 1], dabs(r__2)); +/* Computing MAX */ + r__5 = smlnum, r__6 = ulp * foo; + if (dmax(r__3,r__4) <= dmax(r__5,r__6)) { + +/* ==== Deflatable ==== */ + + *ns += -2; + } else { + +/* + ==== Undeflatable. Move them up out of the way. + . Fortunately, STREXC does the right thing with + . ILST in case of a rare exchange failure. ==== +*/ + + ifst = *ns; + strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + ilst += 2; + } + } + +/* ==== End deflation detection loop ==== */ + + goto L20; + } + +/* ==== Return to Hessenberg form ==== */ + + if (*ns == 0) { + s = 0.f; + } + + if (*ns < jw) { + +/* + ==== sorting diagonal blocks of T improves accuracy for + . graded matrices. Bubble sort deals well with + . exchange failures. ==== +*/ + + sorted = FALSE_; + i__ = *ns + 1; +L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.f) { + k = i__ + 1; + } else { + k = i__ + 2; + } +L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (r__1 = t[i__ + i__ * t_dim1], dabs(r__1)); + } else { + evi = (r__3 = t[i__ + i__ * t_dim1], dabs(r__3)) + sqrt((r__1 + = t[i__ + 1 + i__ * t_dim1], dabs(r__1))) * sqrt(( + r__2 = t[i__ + (i__ + 1) * t_dim1], dabs(r__2))); + } + + if (k == kend) { + evk = (r__1 = t[k + k * t_dim1], dabs(r__1)); + } else if (t[k + 1 + k * t_dim1] == 0.f) { + evk = (r__1 = t[k + k * t_dim1], dabs(r__1)); + } else { + evk = (r__3 = t[k + k * t_dim1], dabs(r__3)) + sqrt((r__1 = t[ + k + 1 + k * t_dim1], dabs(r__1))) * sqrt((r__2 = t[k + + (k + 1) * t_dim1], dabs(r__2))); + } + + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &work[1], &info); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.f) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; +L50: + ; + } + +/* ==== Restore shift/eigenvalue array from T ==== */ + + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.f; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.f) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.f; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + slanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ + - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, & + sn); + i__ += -2; + } + goto L60; + } + + if (*ns < jw || s == 0.f) { + if (*ns > 1 && s != 0.f) { + +/* ==== Reflect spike back into lower triangle ==== */ + + scopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + slarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.f; + + i__1 = jw - 2; + i__2 = jw - 2; + slaset_("L", &i__1, &i__2, &c_b29, &c_b29, &t[t_dim1 + 3], ldt); + + slarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + slarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + slarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & + work[jw + 1]); + + i__1 = *lwork - jw; + sgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] + , &i__1, &info); + } + +/* ==== Copy updated reduced window into place ==== */ + + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + slacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] + , ldh); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + scopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], + &i__3); + +/* + ==== Accumulate orthogonal matrix in order update + . H and Z, if requested. ==== +*/ + + if (*ns > 1 && s != 0.f) { + i__1 = *lwork - jw; + sormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[jw + 1], &i__1, &info); + } + +/* ==== Update vertical slab in H ==== */ + + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3,i__4); + sgemm_("N", "N", &kln, &jw, &jw, &c_b15, &h__[krow + kwtop * + h_dim1], ldh, &v[v_offset], ldv, &c_b29, &wv[wv_offset], + ldwv); + slacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * + h_dim1], ldh); +/* L70: */ + } + +/* ==== Update horizontal slab in H ==== */ + + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; + kcol += i__1) { +/* Computing MIN */ + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3,i__4); + sgemm_("C", "N", &jw, &kln, &jw, &c_b15, &v[v_offset], ldv, & + h__[kwtop + kcol * h_dim1], ldh, &c_b29, &t[t_offset], + ldt); + slacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * + h_dim1], ldh); +/* L80: */ + } + } + +/* ==== Update vertical slab in Z ==== */ + + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3,i__4); + sgemm_("N", "N", &kln, &jw, &jw, &c_b15, &z__[krow + kwtop * + z_dim1], ldz, &v[v_offset], ldv, &c_b29, &wv[ + wv_offset], ldwv); + slacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + + kwtop * z_dim1], ldz); +/* L90: */ + } + } + } + +/* ==== Return the number of deflations ... ==== */ + + *nd = jw - *ns; + +/* + ==== ... and the number of shifts. (Subtracting + . INFQR from the spike length takes care + . of the case of a rare QR failure while + . calculating eigenvalues of the deflation + . window.) ==== +*/ + + *ns -= infqr; + +/* ==== Return optimal workspace. ==== */ + + work[1] = (real) lwkopt; + +/* ==== End of SLAQR3 ==== */ + + return 0; +} /* slaqr3_ */ + +/* Subroutine */ int slaqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * + wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + static integer i__, k; + static real aa, bb, cc, dd; + static integer ld; + static real cs; + static integer nh, it, ks, kt; + static real sn; + static integer ku, kv, ls, ns; + static real ss; + static integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, + kbot, nmin; + static real swap; + static integer ktop; + static real zdum[1] /* was [1][1] */; + static integer kacc22, itmax, nsmax, nwmax, kwtop; + extern /* Subroutine */ int slaqr2_(logical *, logical *, integer *, + integer *, integer *, integer *, real *, integer *, integer *, + integer *, real *, integer *, integer *, integer *, real *, real * + , real *, integer *, integer *, real *, integer *, integer *, + real *, integer *, real *, integer *), slanv2_(real *, real *, + real *, real *, real *, real *, real *, real *, real *, real *), + slaqr5_(logical *, logical *, integer *, integer *, integer *, + integer *, integer *, real *, real *, real *, integer *, integer * + , integer *, real *, integer *, real *, integer *, real *, + integer *, integer *, real *, integer *, integer *, real *, + integer *); + static integer nibble; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static char jbcmpz[2]; + extern /* Subroutine */ int slahqr_(logical *, logical *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , integer *, real *, integer *, integer *), slacpy_(char *, + integer *, integer *, real *, integer *, real *, integer *); + static integer nwupbd; + static logical sorted; + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + This subroutine implements one level of recursion for SLAQR0. + It is a complete implementation of the small bulge multi-shift + QR algorithm. It may be called by SLAQR0 and, for large enough + deflation window size, it may be called by SLAQR3. This + subroutine is identical to SLAQR0 except that it calls SLAQR2 + instead of SLAQR3. + + Purpose + ======= + + SLAQR4 computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**T, where T is an upper quasi-triangular matrix (the + Schur form), and Z is the orthogonal matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input orthogonal + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, + H(ILO,ILO-1) is zero. ILO and IHI are normally set by a + previous call to SGEBAL, and then passed to SGEHRD when the + matrix output by SGEBAL is reduced to Hessenberg form. + Otherwise, ILO and IHI should be set to 1 and N, + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) REAL array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and WANTT is .TRUE., then H contains + the upper quasi-triangular matrix T from the Schur + decomposition (the Schur form); 2-by-2 diagonal blocks + (corresponding to complex conjugate pairs of eigenvalues) + are returned in standard form, with H(i,i) = H(i+1,i+1) + and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is + .FALSE., then the contents of H are unspecified on exit. + (The output value of H when INFO.GT.0 is given under the + description of INFO below.) + + This subroutine may explicitly set H(i,j) = 0 for i.GT.j and + j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + WR (output) REAL array, dimension (IHI) + WI (output) REAL array, dimension (IHI) + The real and imaginary parts, respectively, of the computed + eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) + and WI(ILO:IHI). If two eigenvalues are computed as a + complex conjugate pair, they are stored in consecutive + elements of WR and WI, say the i-th and (i+1)th, with + WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then + the eigenvalues are stored in the same order as on the + diagonal of the Schur form returned in H, with + WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal + block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and + WI(i+1) = -WI(i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. + + Z (input/output) REAL array, dimension (LDZ,IHI) + If WANTZ is .FALSE., then Z is not referenced. + If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is + replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the + orthogonal Schur factor of H(ILO:IHI,ILO:IHI). + (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if WANTZ is .TRUE. + then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. + + WORK (workspace/output) REAL array, dimension LWORK + On exit, if LWORK = -1, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient, but LWORK typically as large as 6*N may + be required for optimal performance. A workspace query + to determine the optimal workspace size is recommended. + + If LWORK = -1, then SLAQR4 does a workspace query. + In this case, SLAQR4 checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: if INFO = i, SLAQR4 failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and WANT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is an orthogonal matrix. The final + value of H is upper Hessenberg and quasi-triangular + in rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + + (final value of Z(ILO:IHI,ILOZ:IHIZ) + = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U + + where U is the orthogonal matrix in (*) (regard- + less of the value of WANTT.) + + If INFO .GT. 0 and WANTZ is .FALSE., then Z is not + accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . SLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== Exceptional deflation windows: try to cure rare + . slow convergence by varying the size of the + . deflation window after KEXNW iterations. ==== + + ==== Exceptional shifts: try to cure rare slow convergence + . with ad-hoc exceptional shifts every KEXSH iterations. + . ==== + + ==== The constants WILK1 and WILK2 are used to form the + . exceptional shifts. ==== +*/ + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + +/* ==== Quick return for N = 0: nothing to do. ==== */ + + if (*n == 0) { + work[1] = 1.f; + return 0; + } + + if (*n <= 11) { + +/* ==== Tiny matrices must use SLAHQR. ==== */ + + lwkopt = 1; + if (*lwork != -1) { + slahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], & + wi[1], iloz, ihiz, &z__[z_offset], ldz, info); + } + } else { + +/* + ==== Use small bulge multi-shift QR with aggressive early + . deflation on larger-than-tiny matrices. ==== + + ==== Hope for the best. ==== +*/ + + *info = 0; + +/* ==== Set up job flags for ILAENV. ==== */ + + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + +/* + ==== NWR = recommended deflation window size. At this + . point, N .GT. NTINY = 11, so there is enough + . subdiagonal workspace for NWR.GE.2 as required. + . (In fact, there is enough subdiagonal space for + . NWR.GE.3.) ==== +*/ + + nwr = ilaenv_(&c__13, "SLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nwr = max(2,nwr); +/* Computing MIN */ + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); + nwr = min(i__1,nwr); + +/* + ==== NSR = recommended number of simultaneous shifts. + . At this point N .GT. NTINY = 11, so there is at + . enough subdiagonal workspace for NSR to be even + . and greater than or equal to two as required. ==== +*/ + + nsr = ilaenv_(&c__15, "SLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); +/* Computing MIN */ + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - + *ilo; + nsr = min(i__1,i__2); +/* Computing MAX */ + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1,i__2); + +/* + ==== Estimate optimal workspace ==== + + ==== Workspace query call to SLAQR2 ==== +*/ + + i__1 = nwr + 1; + slaqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, + ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[ + h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], + ldh, &work[1], &c_n1); + +/* + ==== Optimal workspace = MAX(SLAQR5, SLAQR2) ==== + + Computing MAX +*/ + i__1 = nsr * 3 / 2, i__2 = (integer) work[1]; + lwkopt = max(i__1,i__2); + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + work[1] = (real) lwkopt; + return 0; + } + +/* ==== SLAHQR/SLAQR0 crossover point ==== */ + + nmin = ilaenv_(&c__12, "SLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen) + 6, (ftnlen)2); + nmin = max(11,nmin); + +/* ==== Nibble crossover point ==== */ + + nibble = ilaenv_(&c__14, "SLAQR4", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + nibble = max(0,nibble); + +/* + ==== Accumulate reflections during ttswp? Use block + . 2-by-2 structure during matrix-matrix multiply? ==== +*/ + + kacc22 = ilaenv_(&c__16, "SLAQR4", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + kacc22 = max(0,kacc22); + kacc22 = min(2,kacc22); + +/* + ==== NWMAX = the largest possible deflation window for + . which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1,i__2); + nw = nwmax; + +/* + ==== NSMAX = the Largest number of simultaneous shifts + . for which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1,i__2); + nsmax -= nsmax % 2; + +/* ==== NDFL: an iteration count restarted at deflation. ==== */ + + ndfl = 1; + +/* + ==== ITMAX = iteration limit ==== + + Computing MAX +*/ + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1,i__2) * 30; + +/* ==== Last row and column in the active block ==== */ + + kbot = *ihi; + +/* ==== Main Loop ==== */ + + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + +/* ==== Done when KBOT falls below ILO ==== */ + + if (kbot < *ilo) { + goto L90; + } + +/* ==== Locate active block ==== */ + + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.f) { + goto L20; + } +/* L10: */ + } + k = *ilo; +L20: + ktop = k; + +/* + ==== Select deflation window size: + . Typical Case: + . If possible and advisable, nibble the entire + . active block. If not, use size MIN(NWR,NWMAX) + . or MIN(NWR+1,NWMAX) depending upon which has + . the smaller corresponding subdiagonal entry + . (a heuristic). + . + . Exceptional Case: + . If there have been no deflations in KEXNW or + . more iterations, then vary the deflation window + . size. At first, because, larger windows are, + . in general, more powerful than smaller ones, + . rapidly increase the window to the maximum possible. + . Then, gradually reduce the window size. ==== +*/ + + nh = kbot - ktop + 1; + nwupbd = min(nh,nwmax); + if (ndfl < 5) { + nw = min(nwupbd,nwr); + } else { +/* Computing MIN */ + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2,i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((r__1 = h__[kwtop + (kwtop - 1) * h_dim1], dabs(r__1)) + > (r__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], + dabs(r__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + +/* + ==== Aggressive early deflation: + . split workspace under the subdiagonal into + . - an nw-by-nw work array V in the lower + . left-hand-corner, + . - an NW-by-at-least-NW-but-more-is-better + . (NW-by-NHO) horizontal work array along + . the bottom edge, + . - an at-least-NW-but-more-is-better (NHV-by-NW) + . vertical work array along the left-hand-edge. + . ==== +*/ + + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + +/* ==== Aggressive early deflation ==== */ + + slaqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, + iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], + &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + +/* ==== Adjust KBOT accounting for new deflations. ==== */ + + kbot -= ld; + +/* ==== KS points to the shifts. ==== */ + + ks = kbot - ls + 1; + +/* + ==== Skip an expensive QR sweep if there is a (partly + . heuristic) reason to expect that many eigenvalues + . will deflate without it. Here, the QR sweep is + . skipped if many eigenvalues have just been deflated + . or if the remaining active block is small. +*/ + + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( + nmin,nwmax)) { + +/* + ==== NS = nominal number of simultaneous shifts. + . This may be lowered (slightly) if SLAQR2 + . did not provide that many shifts. ==== + + Computing MIN + Computing MAX +*/ + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); + ns = min(i__2,i__3); + ns -= ns % 2; + +/* + ==== If there have been no deflations + . in a multiple of KEXSH iterations, + . then try exceptional shifts. + . Otherwise use shifts provided by + . SLAQR2 above or from the eigenvalues + . of a trailing principal submatrix. ==== +*/ + + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; +/* Computing MAX */ + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3,i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1) + ) + (r__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], + dabs(r__2)); + aa = ss * .75f + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375f; + dd = aa; + slanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1] + , &wr[i__], &wi[i__], &cs, &sn); +/* L30: */ + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.f; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + +/* + ==== Got NS/2 or fewer shifts? Use SLAHQR + . on a trailing principal submatrix to + . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, + . there is enough space below the subdiagonal + . to fit an NS-by-NS scratch array.) ==== +*/ + + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + slacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & + h__[kt + h_dim1], ldh); + slahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + + h_dim1], ldh, &wr[ks], &wi[ks], &c__1, & + c__1, zdum, &c__1, &inf); + ks += inf; + +/* + ==== In case of a rare QR failure use + . eigenvalues of the trailing 2-by-2 + . principal submatrix. ==== +*/ + + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + slanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[ + kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn) + ; + ks = kbot - 1; + } + } + + if (kbot - ks + 1 > ns) { + +/* + ==== Sort the shifts (Helps a little) + . Bubble sort keeps complex conjugate + . pairs together. ==== +*/ + + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((r__1 = wr[i__], dabs(r__1)) + (r__2 = wi[ + i__], dabs(r__2)) < (r__3 = wr[i__ + + 1], dabs(r__3)) + (r__4 = wi[i__ + 1], + dabs(r__4))) { + sorted = FALSE_; + + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } +/* L40: */ + } +/* L50: */ + } +L60: + ; + } + +/* + ==== Shuffle shifts into pairs of real shifts + . and pairs of complex conjugate shifts + . assuming complex conjugate shifts are + . already adjacent to one another. (Yes, + . they are.) ==== +*/ + + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } +/* L70: */ + } + } + +/* + ==== If there are only two shifts and both are + . real, then use only one. ==== +*/ + + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.f) { + if ((r__1 = wr[kbot] - h__[kbot + kbot * h_dim1], + dabs(r__1)) < (r__2 = wr[kbot - 1] - h__[kbot + + kbot * h_dim1], dabs(r__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + +/* + ==== Use up to NS of the the smallest magnatiude + . shifts. If there aren't NS shifts available, + . then use them all, possibly dropping one to + . make the number of shifts even. ==== + + Computing MIN +*/ + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2,i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + +/* + ==== Small-bulge multi-shift QR sweep: + . split workspace under the subdiagonal into + . - a KDU-by-KDU work array U in the lower + . left-hand-corner, + . - a KDU-by-at-least-KDU-but-more-is-better + . (KDU-by-NHo) horizontal work array WH along + . the bottom edge, + . - and an at-least-KDU-but-more-is-better-by-KDU + . (NVE-by-KDU) vertical work WV arrow along + . the left-hand-edge. ==== +*/ + + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + +/* ==== Small-bulge multi-shift QR sweep ==== */ + + slaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], + &wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[ + z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1], + ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku + + kwh * h_dim1], ldh); + } + +/* ==== Note progress (or the lack of it). ==== */ + + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + +/* + ==== End of main loop ==== + L80: +*/ + } + +/* + ==== Iteration limit exceeded. Set INFO to show where + . the problem occurred and exit. ==== +*/ + + *info = kbot; +L90: + ; + } + +/* ==== Return the optimal value of LWORK. ==== */ + + work[1] = (real) lwkopt; + +/* ==== End of SLAQR4 ==== */ + + return 0; +} /* slaqr4_ */ + +/* Subroutine */ int slaqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, real *sr, + real *si, real *h__, integer *ldh, integer *iloz, integer *ihiz, real + *z__, integer *ldz, real *v, integer *ldv, real *u, integer *ldu, + integer *nv, real *wv, integer *ldwv, integer *nh, real *wh, integer * + ldwh) +{ + /* System generated locals */ + integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1, + wh_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3, + i__4, i__5, i__6, i__7; + real r__1, r__2, r__3, r__4, r__5; + + /* Local variables */ + static integer i__, j, k, m, i2, j2, i4, j4, k1; + static real h11, h12, h21, h22; + static integer m22, ns, nu; + static real vt[3], scl; + static integer kdu, kms; + static real ulp; + static integer knz, kzs; + static real tst1, tst2, beta; + static logical blk22, bmp22; + static integer mend, jcol, jlen, jbot, mbot; + static real swap; + static integer jtop, jrow, mtop; + static real alpha; + static logical accum; + static integer ndcol, incol; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer krcol, nbmps; + extern /* Subroutine */ int strmm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), slaqr1_(integer *, real *, + integer *, real *, real *, real *, real *, real *), slabad_(real * + , real *); + extern doublereal slamch_(char *); + static real safmin; + extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, + real *); + static real safmax; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slaset_(char *, integer *, + integer *, real *, real *, real *, integer *); + static real refsum; + static integer mstart; + static real smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + This auxiliary subroutine called by SLAQR0 performs a + single small-bulge multi-shift QR sweep. + + WANTT (input) logical scalar + WANTT = .true. if the quasi-triangular Schur factor + is being computed. WANTT is set to .false. otherwise. + + WANTZ (input) logical scalar + WANTZ = .true. if the orthogonal Schur factor is being + computed. WANTZ is set to .false. otherwise. + + KACC22 (input) integer with value 0, 1, or 2. + Specifies the computation mode of far-from-diagonal + orthogonal updates. + = 0: SLAQR5 does not accumulate reflections and does not + use matrix-matrix multiply to update far-from-diagonal + matrix entries. + = 1: SLAQR5 accumulates reflections and uses matrix-matrix + multiply to update the far-from-diagonal matrix entries. + = 2: SLAQR5 accumulates reflections, uses matrix-matrix + multiply to update the far-from-diagonal matrix entries, + and takes advantage of 2-by-2 block structure during + matrix multiplies. + + N (input) integer scalar + N is the order of the Hessenberg matrix H upon which this + subroutine operates. + + KTOP (input) integer scalar + KBOT (input) integer scalar + These are the first and last rows and columns of an + isolated diagonal block upon which the QR sweep is to be + applied. It is assumed without a check that + either KTOP = 1 or H(KTOP,KTOP-1) = 0 + and + either KBOT = N or H(KBOT+1,KBOT) = 0. + + NSHFTS (input) integer scalar + NSHFTS gives the number of simultaneous shifts. NSHFTS + must be positive and even. + + SR (input/output) REAL array of size (NSHFTS) + SI (input/output) REAL array of size (NSHFTS) + SR contains the real parts and SI contains the imaginary + parts of the NSHFTS shifts of origin that define the + multi-shift QR sweep. On output SR and SI may be + reordered. + + H (input/output) REAL array of size (LDH,N) + On input H contains a Hessenberg matrix. On output a + multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied + to the isolated diagonal block in rows and columns KTOP + through KBOT. + + LDH (input) integer scalar + LDH is the leading dimension of H just as declared in the + calling procedure. LDH.GE.MAX(1,N). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N + + Z (input/output) REAL array of size (LDZ,IHI) + If WANTZ = .TRUE., then the QR Sweep orthogonal + similarity transformation is accumulated into + Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ = .FALSE., then Z is unreferenced. + + LDZ (input) integer scalar + LDA is the leading dimension of Z just as declared in + the calling procedure. LDZ.GE.N. + + V (workspace) REAL array of size (LDV,NSHFTS/2) + + LDV (input) integer scalar + LDV is the leading dimension of V as declared in the + calling procedure. LDV.GE.3. + + U (workspace) REAL array of size + (LDU,3*NSHFTS-3) + + LDU (input) integer scalar + LDU is the leading dimension of U just as declared in the + in the calling subroutine. LDU.GE.3*NSHFTS-3. + + NH (input) integer scalar + NH is the number of columns in array WH available for + workspace. NH.GE.1. + + WH (workspace) REAL array of size (LDWH,NH) + + LDWH (input) integer scalar + Leading dimension of WH just as declared in the + calling procedure. LDWH.GE.3*NSHFTS-3. + + NV (input) integer scalar + NV is the number of rows in WV agailable for workspace. + NV.GE.1. + + WV (workspace) REAL array of size + (LDWV,3*NSHFTS-3) + + LDWV (input) integer scalar + LDWV is the leading dimension of WV as declared in the + in the calling subroutine. LDWV.GE.NV. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + Reference: + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and + Level 3 Performance, SIAM Journal of Matrix Analysis, + volume 23, pages 929--947, 2002. + + ================================================================ + + + ==== If there are no shifts, then there is nothing to do. ==== +*/ + + /* Parameter adjustments */ + --sr; + --si; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + wh_dim1 = *ldwh; + wh_offset = 1 + wh_dim1; + wh -= wh_offset; + + /* Function Body */ + if (*nshfts < 2) { + return 0; + } + +/* + ==== If the active block is empty or 1-by-1, then there + . is nothing to do. ==== +*/ + + if (*ktop >= *kbot) { + return 0; + } + +/* + ==== Shuffle shifts into pairs of real shifts and pairs + . of complex conjugate shifts assuming complex + . conjugate shifts are already adjacent to one + . another. ==== +*/ + + i__1 = *nshfts - 2; + for (i__ = 1; i__ <= i__1; i__ += 2) { + if (si[i__] != -si[i__ + 1]) { + + swap = sr[i__]; + sr[i__] = sr[i__ + 1]; + sr[i__ + 1] = sr[i__ + 2]; + sr[i__ + 2] = swap; + + swap = si[i__]; + si[i__] = si[i__ + 1]; + si[i__ + 1] = si[i__ + 2]; + si[i__ + 2] = swap; + } +/* L10: */ + } + +/* + ==== NSHFTS is supposed to be even, but if it is odd, + . then simply reduce it by one. The shuffle above + . ensures that the dropped shift is real and that + . the remaining shifts are paired. ==== +*/ + + ns = *nshfts - *nshfts % 2; + +/* ==== Machine constants for deflation ==== */ + + safmin = slamch_("SAFE MINIMUM"); + safmax = 1.f / safmin; + slabad_(&safmin, &safmax); + ulp = slamch_("PRECISION"); + smlnum = safmin * ((real) (*n) / ulp); + +/* + ==== Use accumulated reflections to update far-from-diagonal + . entries ? ==== +*/ + + accum = *kacc22 == 1 || *kacc22 == 2; + +/* ==== If so, exploit the 2-by-2 block structure? ==== */ + + blk22 = ns > 2 && *kacc22 == 2; + +/* ==== clear trash ==== */ + + if (*ktop + 2 <= *kbot) { + h__[*ktop + 2 + *ktop * h_dim1] = 0.f; + } + +/* ==== NBMPS = number of 2-shift bulges in the chain ==== */ + + nbmps = ns / 2; + +/* ==== KDU = width of slab ==== */ + + kdu = nbmps * 6 - 3; + +/* ==== Create and chase chains of NBMPS bulges ==== */ + + i__1 = *kbot - 2; + i__2 = nbmps * 3 - 2; + for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 : + incol <= i__1; incol += i__2) { + ndcol = incol + kdu; + if (accum) { + slaset_("ALL", &kdu, &kdu, &c_b29, &c_b15, &u[u_offset], ldu); + } + +/* + ==== Near-the-diagonal bulge chase. The following loop + . performs the near-the-diagonal part of a small bulge + . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal + . chunk extends from column INCOL to column NDCOL + . (including both column INCOL and column NDCOL). The + . following loop chases a 3*NBMPS column long chain of + . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL + . may be less than KTOP and and NDCOL may be greater than + . KBOT indicating phantom columns from which to chase + . bulges before they are actually introduced or to which + . to chase bulges beyond column KBOT.) ==== + + Computing MIN +*/ + i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2; + i__3 = min(i__4,i__5); + for (krcol = incol; krcol <= i__3; ++krcol) { + +/* + ==== Bulges number MTOP to MBOT are active double implicit + . shift bulges. There may or may not also be small + . 2-by-2 bulge, if there is room. The inactive bulges + . (if any) must wait until the active bulges have moved + . down the diagonal to make room. The phantom matrix + . paradigm described above helps keep track. ==== + + Computing MAX +*/ + i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1; + mtop = max(i__4,i__5); +/* Computing MIN */ + i__4 = nbmps, i__5 = (*kbot - krcol) / 3; + mbot = min(i__4,i__5); + m22 = mbot + 1; + bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2; + +/* + ==== Generate reflections to chase the chain right + . one column. (The minimum value of K is KTOP-1.) ==== +*/ + + i__4 = mbot; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + if (k == *ktop - 1) { + slaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &sr[(m + << 1) - 1], &si[(m << 1) - 1], &sr[m * 2], &si[m * + 2], &v[m * v_dim1 + 1]); + alpha = v[m * v_dim1 + 1]; + slarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m * + v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + v[m * v_dim1 + 3] = h__[k + 3 + k * h_dim1]; + slarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m * + v_dim1 + 1]); + +/* + ==== A Bulge may collapse because of vigilant + . deflation or destructive underflow. In the + . underflow case, try the two-small-subdiagonals + . trick to try to reinflate the bulge. ==== +*/ + + if (h__[k + 3 + k * h_dim1] != 0.f || h__[k + 3 + (k + 1) + * h_dim1] != 0.f || h__[k + 3 + (k + 2) * h_dim1] + == 0.f) { + +/* ==== Typical case: not collapsed (yet). ==== */ + + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.f; + h__[k + 3 + k * h_dim1] = 0.f; + } else { + +/* + ==== Atypical case: collapsed. Attempt to + . reintroduce ignoring H(K+1,K) and H(K+2,K). + . If the fill resulting from the new + . reflector is too large, then abandon it. + . Otherwise, use the new one. ==== +*/ + + slaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, & + sr[(m << 1) - 1], &si[(m << 1) - 1], &sr[m * + 2], &si[m * 2], vt); + alpha = vt[0]; + slarfg_(&c__3, &alpha, &vt[1], &c__1, vt); + refsum = vt[0] * (h__[k + 1 + k * h_dim1] + vt[1] * + h__[k + 2 + k * h_dim1]); + + if ((r__1 = h__[k + 2 + k * h_dim1] - refsum * vt[1], + dabs(r__1)) + (r__2 = refsum * vt[2], dabs( + r__2)) > ulp * ((r__3 = h__[k + k * h_dim1], + dabs(r__3)) + (r__4 = h__[k + 1 + (k + 1) * + h_dim1], dabs(r__4)) + (r__5 = h__[k + 2 + (k + + 2) * h_dim1], dabs(r__5)))) { + +/* + ==== Starting a new bulge here would + . create non-negligible fill. Use + . the old one with trepidation. ==== +*/ + + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.f; + h__[k + 3 + k * h_dim1] = 0.f; + } else { + +/* + ==== Stating a new bulge here would + . create only negligible fill. + . Replace the old reflector with + . the new one. ==== +*/ + + h__[k + 1 + k * h_dim1] -= refsum; + h__[k + 2 + k * h_dim1] = 0.f; + h__[k + 3 + k * h_dim1] = 0.f; + v[m * v_dim1 + 1] = vt[0]; + v[m * v_dim1 + 2] = vt[1]; + v[m * v_dim1 + 3] = vt[2]; + } + } + } +/* L20: */ + } + +/* ==== Generate a 2-by-2 reflection, if needed. ==== */ + + k = krcol + (m22 - 1) * 3; + if (bmp22) { + if (k == *ktop - 1) { + slaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[( + m22 << 1) - 1], &si[(m22 << 1) - 1], &sr[m22 * 2], + &si[m22 * 2], &v[m22 * v_dim1 + 1]); + beta = v[m22 * v_dim1 + 1]; + slarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 + * v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m22 * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + slarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 + * v_dim1 + 1]); + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.f; + } + } + +/* ==== Multiply H by reflections from the left ==== */ + + if (accum) { + jbot = min(ndcol,*kbot); + } else if (*wantt) { + jbot = *n; + } else { + jbot = *kbot; + } + i__4 = jbot; + for (j = max(*ktop,krcol); j <= i__4; ++j) { +/* Computing MIN */ + i__5 = mbot, i__6 = (j - krcol + 2) / 3; + mend = min(i__5,i__6); + i__5 = mend; + for (m = mtop; m <= i__5; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + v[ + m * v_dim1 + 2] * h__[k + 2 + j * h_dim1] + v[m * + v_dim1 + 3] * h__[k + 3 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m * v_dim1 + 2]; + h__[k + 3 + j * h_dim1] -= refsum * v[m * v_dim1 + 3]; +/* L30: */ + } +/* L40: */ + } + if (bmp22) { + k = krcol + (m22 - 1) * 3; +/* Computing MAX */ + i__4 = k + 1; + i__5 = jbot; + for (j = max(i__4,*ktop); j <= i__5; ++j) { + refsum = v[m22 * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + + v[m22 * v_dim1 + 2] * h__[k + 2 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; +/* L50: */ + } + } + +/* + ==== Multiply H by reflections from the right. + . Delay filling in the last row until the + . vigilant deflation check is complete. ==== +*/ + + if (accum) { + jtop = max(*ktop,incol); + } else if (*wantt) { + jtop = 1; + } else { + jtop = *ktop; + } + i__5 = mbot; + for (m = mtop; m <= i__5; ++m) { + if (v[m * v_dim1 + 1] != 0.f) { + k = krcol + (m - 1) * 3; +/* Computing MIN */ + i__6 = *kbot, i__7 = k + 3; + i__4 = min(i__6,i__7); + for (j = jtop; j <= i__4; ++j) { + refsum = v[m * v_dim1 + 1] * (h__[j + (k + 1) * + h_dim1] + v[m * v_dim1 + 2] * h__[j + (k + 2) + * h_dim1] + v[m * v_dim1 + 3] * h__[j + (k + + 3) * h_dim1]); + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m * v_dim1 + + 2]; + h__[j + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + + 3]; +/* L60: */ + } + + if (accum) { + +/* + ==== Accumulate U. (If necessary, update Z later + . with with an efficient matrix-matrix + . multiply.) ==== +*/ + + kms = k - incol; +/* Computing MAX */ + i__4 = 1, i__6 = *ktop - incol; + i__7 = kdu; + for (j = max(i__4,i__6); j <= i__7; ++j) { + refsum = v[m * v_dim1 + 1] * (u[j + (kms + 1) * + u_dim1] + v[m * v_dim1 + 2] * u[j + (kms + + 2) * u_dim1] + v[m * v_dim1 + 3] * u[j + + (kms + 3) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m * + v_dim1 + 2]; + u[j + (kms + 3) * u_dim1] -= refsum * v[m * + v_dim1 + 3]; +/* L70: */ + } + } else if (*wantz) { + +/* + ==== U is not accumulated, so update Z + . now by multiplying by reflections + . from the right. ==== +*/ + + i__7 = *ihiz; + for (j = *iloz; j <= i__7; ++j) { + refsum = v[m * v_dim1 + 1] * (z__[j + (k + 1) * + z_dim1] + v[m * v_dim1 + 2] * z__[j + (k + + 2) * z_dim1] + v[m * v_dim1 + 3] * z__[ + j + (k + 3) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m * + v_dim1 + 2]; + z__[j + (k + 3) * z_dim1] -= refsum * v[m * + v_dim1 + 3]; +/* L80: */ + } + } + } +/* L90: */ + } + +/* ==== Special case: 2-by-2 reflection (if needed) ==== */ + + k = krcol + (m22 - 1) * 3; + if (bmp22 && v[m22 * v_dim1 + 1] != 0.f) { +/* Computing MIN */ + i__7 = *kbot, i__4 = k + 3; + i__5 = min(i__7,i__4); + for (j = jtop; j <= i__5; ++j) { + refsum = v[m22 * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] + + v[m22 * v_dim1 + 2] * h__[j + (k + 2) * h_dim1]) + ; + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; +/* L100: */ + } + + if (accum) { + kms = k - incol; +/* Computing MAX */ + i__5 = 1, i__7 = *ktop - incol; + i__4 = kdu; + for (j = max(i__5,i__7); j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * (u[j + (kms + 1) * + u_dim1] + v[m22 * v_dim1 + 2] * u[j + (kms + + 2) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m22 * v_dim1 + + 2]; +/* L110: */ + } + } else if (*wantz) { + i__4 = *ihiz; + for (j = *iloz; j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * (z__[j + (k + 1) * + z_dim1] + v[m22 * v_dim1 + 2] * z__[j + (k + + 2) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m22 * v_dim1 + + 2]; +/* L120: */ + } + } + } + +/* ==== Vigilant deflation check ==== */ + + mstart = mtop; + if (krcol + (mstart - 1) * 3 < *ktop) { + ++mstart; + } + mend = mbot; + if (bmp22) { + ++mend; + } + if (krcol == *kbot - 2) { + ++mend; + } + i__4 = mend; + for (m = mstart; m <= i__4; ++m) { +/* Computing MIN */ + i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3; + k = min(i__5,i__7); + +/* + ==== The following convergence test requires that + . the tradition small-compared-to-nearby-diagonals + . criterion and the Ahues & Tisseur (LAWN 122, 1997) + . criteria both be satisfied. The latter improves + . accuracy in some examples. Falling back on an + . alternate convergence criterion when TST1 or TST2 + . is zero (as done here) is traditional but probably + . unnecessary. ==== +*/ + + if (h__[k + 1 + k * h_dim1] != 0.f) { + tst1 = (r__1 = h__[k + k * h_dim1], dabs(r__1)) + (r__2 = + h__[k + 1 + (k + 1) * h_dim1], dabs(r__2)); + if (tst1 == 0.f) { + if (k >= *ktop + 1) { + tst1 += (r__1 = h__[k + (k - 1) * h_dim1], dabs( + r__1)); + } + if (k >= *ktop + 2) { + tst1 += (r__1 = h__[k + (k - 2) * h_dim1], dabs( + r__1)); + } + if (k >= *ktop + 3) { + tst1 += (r__1 = h__[k + (k - 3) * h_dim1], dabs( + r__1)); + } + if (k <= *kbot - 2) { + tst1 += (r__1 = h__[k + 2 + (k + 1) * h_dim1], + dabs(r__1)); + } + if (k <= *kbot - 3) { + tst1 += (r__1 = h__[k + 3 + (k + 1) * h_dim1], + dabs(r__1)); + } + if (k <= *kbot - 4) { + tst1 += (r__1 = h__[k + 4 + (k + 1) * h_dim1], + dabs(r__1)); + } + } +/* Computing MAX */ + r__2 = smlnum, r__3 = ulp * tst1; + if ((r__1 = h__[k + 1 + k * h_dim1], dabs(r__1)) <= dmax( + r__2,r__3)) { +/* Computing MAX */ + r__3 = (r__1 = h__[k + 1 + k * h_dim1], dabs(r__1)), + r__4 = (r__2 = h__[k + (k + 1) * h_dim1], + dabs(r__2)); + h12 = dmax(r__3,r__4); +/* Computing MIN */ + r__3 = (r__1 = h__[k + 1 + k * h_dim1], dabs(r__1)), + r__4 = (r__2 = h__[k + (k + 1) * h_dim1], + dabs(r__2)); + h21 = dmin(r__3,r__4); +/* Computing MAX */ + r__3 = (r__1 = h__[k + 1 + (k + 1) * h_dim1], dabs( + r__1)), r__4 = (r__2 = h__[k + k * h_dim1] - + h__[k + 1 + (k + 1) * h_dim1], dabs(r__2)); + h11 = dmax(r__3,r__4); +/* Computing MIN */ + r__3 = (r__1 = h__[k + 1 + (k + 1) * h_dim1], dabs( + r__1)), r__4 = (r__2 = h__[k + k * h_dim1] - + h__[k + 1 + (k + 1) * h_dim1], dabs(r__2)); + h22 = dmin(r__3,r__4); + scl = h11 + h12; + tst2 = h22 * (h11 / scl); + +/* Computing MAX */ + r__1 = smlnum, r__2 = ulp * tst2; + if (tst2 == 0.f || h21 * (h12 / scl) <= dmax(r__1, + r__2)) { + h__[k + 1 + k * h_dim1] = 0.f; + } + } + } +/* L130: */ + } + +/* + ==== Fill in the last row of each bulge. ==== + + Computing MIN +*/ + i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3; + mend = min(i__4,i__5); + i__4 = mend; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * v[m * v_dim1 + 3] * h__[k + 4 + ( + k + 3) * h_dim1]; + h__[k + 4 + (k + 1) * h_dim1] = -refsum; + h__[k + 4 + (k + 2) * h_dim1] = -refsum * v[m * v_dim1 + 2]; + h__[k + 4 + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3]; +/* L140: */ + } + +/* + ==== End of near-the-diagonal bulge chase. ==== + + L150: +*/ + } + +/* + ==== Use U (if accumulated) to update far-from-diagonal + . entries in H. If required, use U to update Z as + . well. ==== +*/ + + if (accum) { + if (*wantt) { + jtop = 1; + jbot = *n; + } else { + jtop = *ktop; + jbot = *kbot; + } + if (! blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) { + +/* + ==== Updates not exploiting the 2-by-2 block + . structure of U. K1 and NU keep track of + . the location and size of U in the special + . cases of introducing bulges and chasing + . bulges off the bottom. In these special + . cases and in case the number of shifts + . is NS = 2, there is no 2-by-2 block + . structure to exploit. ==== + + Computing MAX +*/ + i__3 = 1, i__4 = *ktop - incol; + k1 = max(i__3,i__4); +/* Computing MAX */ + i__3 = 0, i__4 = ndcol - *kbot; + nu = kdu - max(i__3,i__4) - k1 + 1; + +/* ==== Horizontal Multiply ==== */ + + i__3 = jbot; + i__4 = *nh; + for (jcol = min(ndcol,*kbot) + 1; i__4 < 0 ? jcol >= i__3 : + jcol <= i__3; jcol += i__4) { +/* Computing MIN */ + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5,i__7); + sgemm_("C", "N", &nu, &jlen, &nu, &c_b15, &u[k1 + k1 * + u_dim1], ldu, &h__[incol + k1 + jcol * h_dim1], + ldh, &c_b29, &wh[wh_offset], ldwh); + slacpy_("ALL", &nu, &jlen, &wh[wh_offset], ldwh, &h__[ + incol + k1 + jcol * h_dim1], ldh); +/* L160: */ + } + +/* ==== Vertical multiply ==== */ + + i__4 = max(*ktop,incol) - 1; + i__3 = *nv; + for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; + jrow += i__3) { +/* Computing MIN */ + i__5 = *nv, i__7 = max(*ktop,incol) - jrow; + jlen = min(i__5,i__7); + sgemm_("N", "N", &jlen, &nu, &nu, &c_b15, &h__[jrow + ( + incol + k1) * h_dim1], ldh, &u[k1 + k1 * u_dim1], + ldu, &c_b29, &wv[wv_offset], ldwv); + slacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &h__[ + jrow + (incol + k1) * h_dim1], ldh); +/* L170: */ + } + +/* ==== Z multiply (also vertical) ==== */ + + if (*wantz) { + i__3 = *ihiz; + i__4 = *nv; + for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; + jrow += i__4) { +/* Computing MIN */ + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5,i__7); + sgemm_("N", "N", &jlen, &nu, &nu, &c_b15, &z__[jrow + + (incol + k1) * z_dim1], ldz, &u[k1 + k1 * + u_dim1], ldu, &c_b29, &wv[wv_offset], ldwv); + slacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &z__[ + jrow + (incol + k1) * z_dim1], ldz) + ; +/* L180: */ + } + } + } else { + +/* + ==== Updates exploiting U's 2-by-2 block structure. + . (I2, I4, J2, J4 are the last rows and columns + . of the blocks.) ==== +*/ + + i2 = (kdu + 1) / 2; + i4 = kdu; + j2 = i4 - i2; + j4 = kdu; + +/* + ==== KZS and KNZ deal with the band of zeros + . along the diagonal of one of the triangular + . blocks. ==== +*/ + + kzs = j4 - j2 - (ns + 1); + knz = ns + 1; + +/* ==== Horizontal multiply ==== */ + + i__4 = jbot; + i__3 = *nh; + for (jcol = min(ndcol,*kbot) + 1; i__3 < 0 ? jcol >= i__4 : + jcol <= i__4; jcol += i__3) { +/* Computing MIN */ + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5,i__7); + +/* + ==== Copy bottom of H to top+KZS of scratch ==== + (The first KZS rows get multiplied by zero.) ==== +*/ + + slacpy_("ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol * + h_dim1], ldh, &wh[kzs + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U21' ==== */ + + slaset_("ALL", &kzs, &jlen, &c_b29, &c_b29, &wh[wh_offset] + , ldwh); + strmm_("L", "U", "C", "N", &knz, &jlen, &c_b15, &u[j2 + 1 + + (kzs + 1) * u_dim1], ldu, &wh[kzs + 1 + wh_dim1] + , ldwh); + +/* ==== Multiply top of H by U11' ==== */ + + sgemm_("C", "N", &i2, &jlen, &j2, &c_b15, &u[u_offset], + ldu, &h__[incol + 1 + jcol * h_dim1], ldh, &c_b15, + &wh[wh_offset], ldwh); + +/* ==== Copy top of H to bottom of WH ==== */ + + slacpy_("ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1] + , ldh, &wh[i2 + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U21' ==== */ + + strmm_("L", "L", "C", "N", &j2, &jlen, &c_b15, &u[(i2 + 1) + * u_dim1 + 1], ldu, &wh[i2 + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + sgemm_("C", "N", &i__5, &jlen, &i__7, &c_b15, &u[j2 + 1 + + (i2 + 1) * u_dim1], ldu, &h__[incol + 1 + j2 + + jcol * h_dim1], ldh, &c_b15, &wh[i2 + 1 + wh_dim1] + , ldwh); + +/* ==== Copy it back ==== */ + + slacpy_("ALL", &kdu, &jlen, &wh[wh_offset], ldwh, &h__[ + incol + 1 + jcol * h_dim1], ldh); +/* L190: */ + } + +/* ==== Vertical multiply ==== */ + + i__3 = max(incol,*ktop) - 1; + i__4 = *nv; + for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; + jrow += i__4) { +/* Computing MIN */ + i__5 = *nv, i__7 = max(incol,*ktop) - jrow; + jlen = min(i__5,i__7); + +/* + ==== Copy right of H to scratch (the first KZS + . columns get multiplied by zero) ==== +*/ + + slacpy_("ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) * + h_dim1], ldh, &wv[(kzs + 1) * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U21 ==== */ + + slaset_("ALL", &jlen, &kzs, &c_b29, &c_b29, &wv[wv_offset] + , ldwv); + strmm_("R", "U", "N", "N", &jlen, &knz, &c_b15, &u[j2 + 1 + + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Multiply by U11 ==== */ + + sgemm_("N", "N", &jlen, &i2, &j2, &c_b15, &h__[jrow + ( + incol + 1) * h_dim1], ldh, &u[u_offset], ldu, & + c_b15, &wv[wv_offset], ldwv) + ; + +/* ==== Copy left of H to right of scratch ==== */ + + slacpy_("ALL", &jlen, &j2, &h__[jrow + (incol + 1) * + h_dim1], ldh, &wv[(i2 + 1) * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U21 ==== */ + + i__5 = i4 - i2; + strmm_("R", "L", "N", "N", &jlen, &i__5, &c_b15, &u[(i2 + + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1] + , ldwv); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + sgemm_("N", "N", &jlen, &i__5, &i__7, &c_b15, &h__[jrow + + (incol + 1 + j2) * h_dim1], ldh, &u[j2 + 1 + (i2 + + 1) * u_dim1], ldu, &c_b15, &wv[(i2 + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Copy it back ==== */ + + slacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &h__[ + jrow + (incol + 1) * h_dim1], ldh); +/* L200: */ + } + +/* ==== Multiply Z (also vertical) ==== */ + + if (*wantz) { + i__4 = *ihiz; + i__3 = *nv; + for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; + jrow += i__3) { +/* Computing MIN */ + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5,i__7); + +/* + ==== Copy right of Z to left of scratch (first + . KZS columns get multiplied by zero) ==== +*/ + + slacpy_("ALL", &jlen, &knz, &z__[jrow + (incol + 1 + + j2) * z_dim1], ldz, &wv[(kzs + 1) * wv_dim1 + + 1], ldwv); + +/* ==== Multiply by U12 ==== */ + + slaset_("ALL", &jlen, &kzs, &c_b29, &c_b29, &wv[ + wv_offset], ldwv); + strmm_("R", "U", "N", "N", &jlen, &knz, &c_b15, &u[j2 + + 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) + * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U11 ==== */ + + sgemm_("N", "N", &jlen, &i2, &j2, &c_b15, &z__[jrow + + (incol + 1) * z_dim1], ldz, &u[u_offset], ldu, + &c_b15, &wv[wv_offset], ldwv); + +/* ==== Copy left of Z to right of scratch ==== */ + + slacpy_("ALL", &jlen, &j2, &z__[jrow + (incol + 1) * + z_dim1], ldz, &wv[(i2 + 1) * wv_dim1 + 1], + ldwv); + +/* ==== Multiply by U21 ==== */ + + i__5 = i4 - i2; + strmm_("R", "L", "N", "N", &jlen, &i__5, &c_b15, &u[( + i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + sgemm_("N", "N", &jlen, &i__5, &i__7, &c_b15, &z__[ + jrow + (incol + 1 + j2) * z_dim1], ldz, &u[j2 + + 1 + (i2 + 1) * u_dim1], ldu, &c_b15, &wv[( + i2 + 1) * wv_dim1 + 1], ldwv); + +/* ==== Copy the result back to Z ==== */ + + slacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, & + z__[jrow + (incol + 1) * z_dim1], ldz); +/* L210: */ + } + } + } + } +/* L220: */ + } + +/* ==== End of SLAQR5 ==== */ + + return 0; +} /* slaqr5_ */ + +/* Subroutine */ int slarf_(char *side, integer *m, integer *n, real *v, + integer *incv, real *tau, real *c__, integer *ldc, real *work) +{ + /* System generated locals */ + integer c_dim1, c_offset; + real r__1; + + /* Local variables */ + static integer i__; + static logical applyleft; + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + static integer lastc; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + static integer lastv; + extern integer ilaslc_(integer *, integer *, real *, integer *), ilaslr_( + integer *, integer *, real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLARF applies a real elementary reflector H to a real m by n matrix + C, from either the left or the right. H is represented in the form + + H = I - tau * v * v' + + where tau is a real scalar and v is a real vector. + + If tau = 0, then H is taken to be the unit matrix. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) REAL array, dimension + (1 + (M-1)*abs(INCV)) if SIDE = 'L' + or (1 + (N-1)*abs(INCV)) if SIDE = 'R' + The vector v in the representation of H. V is not used if + TAU = 0. + + INCV (input) INTEGER + The increment between elements of v. INCV <> 0. + + TAU (input) REAL + The value tau in the representation of H. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) REAL array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + lastv = 0; + lastc = 0; + if (*tau != 0.f) { +/* + Set up variables for scanning V. LASTV begins pointing to the end + of V. +*/ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ + while(lastv > 0 && v[i__] == 0.f) { + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc); + } + } +/* + Note that lastc.eq.0 renders the BLAS operations null; no special + case is needed at this level. +*/ + if (applyleft) { + +/* Form H * C */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */ + + sgemv_("Transpose", &lastv, &lastc, &c_b15, &c__[c_offset], ldc, & + v[1], incv, &c_b29, &work[1], &c__1); + +/* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */ + + r__1 = -(*tau); + sger_(&lastv, &lastc, &r__1, &v[1], incv, &work[1], &c__1, &c__[ + c_offset], ldc); + } + } else { + +/* Form C * H */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ + + sgemv_("No transpose", &lastc, &lastv, &c_b15, &c__[c_offset], + ldc, &v[1], incv, &c_b29, &work[1], &c__1); + +/* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */ + + r__1 = -(*tau); + sger_(&lastc, &lastv, &r__1, &work[1], &c__1, &v[1], incv, &c__[ + c_offset], ldc); + } + } + return 0; + +/* End of SLARF */ + +} /* slarf_ */ + +/* Subroutine */ int slarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, real *v, integer *ldv, + real *t, integer *ldt, real *c__, integer *ldc, real *work, integer * + ldwork) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + static integer lastc; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer lastv; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), strmm_(char *, char *, char *, char *, integer *, + integer *, real *, real *, integer *, real *, integer *); + extern integer ilaslc_(integer *, integer *, real *, integer *), ilaslr_( + integer *, integer *, real *, integer *); + static char transt[1]; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLARFB applies a real block reflector H or its transpose H' to a + real m by n matrix C, from either the left or the right. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply H or H' from the Left + = 'R': apply H or H' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply H (No transpose) + = 'T': apply H' (Transpose) + + DIRECT (input) CHARACTER*1 + Indicates how H is formed from a product of elementary + reflectors + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Indicates how the vectors which define the elementary + reflectors are stored: + = 'C': Columnwise + = 'R': Rowwise + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + K (input) INTEGER + The order of the matrix T (= the number of elementary + reflectors whose product defines the block reflector). + + V (input) REAL array, dimension + (LDV,K) if STOREV = 'C' + (LDV,M) if STOREV = 'R' and SIDE = 'L' + (LDV,N) if STOREV = 'R' and SIDE = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); + if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); + if STOREV = 'R', LDV >= K. + + T (input) REAL array, dimension (LDT,K) + The triangular k by k matrix T in the representation of the + block reflector. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by H*C or H'*C or C*H or C*H'. + + LDC (input) INTEGER + The leading dimension of the array C. LDA >= max(1,M). + + WORK (workspace) REAL array, dimension (LDWORK,K) + + LDWORK (input) INTEGER + The leading dimension of the array WORK. + If SIDE = 'L', LDWORK >= max(1,N); + if SIDE = 'R', LDWORK >= max(1,M). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (lsame_(trans, "N")) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (lsame_(storev, "C")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 ) (first K rows) + ( V2 ) + where V1 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaslr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2 */ + + i__1 = lastv - *k; + sgemm_("Transpose", "No transpose", &lastc, k, &i__1, & + c_b15, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + + v_dim1], ldv, &c_b15, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + strmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b15, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (lastv > *k) { + +/* C2 := C2 - V2 * W' */ + + i__1 = lastv - *k; + sgemm_("No transpose", "Transpose", &i__1, &lastc, k, & + c_b151, &v[*k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork, &c_b15, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1' */ + + strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L20: */ + } +/* L30: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaslr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2 */ + + i__1 = lastv - *k; + sgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b15, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + strmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b15, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2' */ + + i__1 = lastv - *k; + sgemm_("No transpose", "Transpose", &lastc, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[*k + 1 + + v_dim1], ldv, &c_b15, &c__[(*k + 1) * c_dim1 + 1], + ldc); + } + +/* W := W * V1' */ + + strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* + Let V = ( V1 ) + ( V2 ) (last K rows) + where V2 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaslr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1 */ + + i__1 = lastv - *k; + sgemm_("Transpose", "No transpose", &lastc, k, &i__1, & + c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b15, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + strmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b15, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1 * W' */ + + i__1 = lastv - *k; + sgemm_("No transpose", "Transpose", &i__1, &lastc, k, & + c_b151, &v[v_offset], ldv, &work[work_offset], + ldwork, &c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L80: */ + } +/* L90: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaslr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(&lastc, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, & + work[j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1 */ + + i__1 = lastv - *k; + sgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b15, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + strmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b15, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1' */ + + i__1 = lastv - *k; + sgemm_("No transpose", "Transpose", &lastc, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (lsame_(storev, "R")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 V2 ) (V1: first K columns) + where V1 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaslc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); +/* L130: */ + } + +/* W := W * V1' */ + + strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2' */ + + i__1 = lastv - *k; + sgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b15, + &c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 + + 1], ldv, &c_b15, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + strmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b15, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C2 := C2 - V2' * W' */ + + i__1 = lastv - *k; + sgemm_("Transpose", "Transpose", &i__1, &lastc, k, & + c_b151, &v[(*k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork, &c_b15, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1 */ + + strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L140: */ + } +/* L150: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaslc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1' */ + + strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2' */ + + i__1 = lastv - *k; + sgemm_("No transpose", "Transpose", &lastc, k, &i__1, & + c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + + 1) * v_dim1 + 1], ldv, &c_b15, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + strmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b15, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = lastv - *k; + sgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b15, &c__[(*k + 1) * c_dim1 + + 1], ldc); + } + +/* W := W * V1 */ + + strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b15, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* + Let V = ( V1 V2 ) (V2: last K columns) + where V2 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaslc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2' */ + + strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1' */ + + i__1 = lastv - *k; + sgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b15, + &c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, & + work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + strmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b15, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1' * W' */ + + i__1 = lastv - *k; + sgemm_("Transpose", "Transpose", &i__1, &lastc, k, & + c_b151, &v[v_offset], ldv, &work[work_offset], + ldwork, &c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L200: */ + } +/* L210: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilaslc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, + &work[j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2' */ + + strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & + c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1' */ + + i__1 = lastv - *k; + sgemm_("No transpose", "Transpose", &lastc, k, &i__1, & + c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b15, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + strmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b15, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = lastv - *k; + sgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of SLARFB */ + +} /* slarfb_ */ + +/* Subroutine */ int slarfg_(integer *n, real *alpha, real *x, integer *incx, + real *tau) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Local variables */ + static integer j, knt; + static real beta; + extern doublereal snrm2_(integer *, real *, integer *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static real xnorm; + extern doublereal slapy2_(real *, real *), slamch_(char *); + static real safmin, rsafmn; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLARFG generates a real elementary reflector H of order n, such + that + + H * ( alpha ) = ( beta ), H' * H = I. + ( x ) ( 0 ) + + where alpha and beta are scalars, and x is an (n-1)-element real + vector. H is represented in the form + + H = I - tau * ( 1 ) * ( 1 v' ) , + ( v ) + + where tau is a real scalar and v is a real (n-1)-element + vector. + + If the elements of x are all zero, then tau = 0 and H is taken to be + the unit matrix. + + Otherwise 1 <= tau <= 2. + + Arguments + ========= + + N (input) INTEGER + The order of the elementary reflector. + + ALPHA (input/output) REAL + On entry, the value alpha. + On exit, it is overwritten with the value beta. + + X (input/output) REAL array, dimension + (1+(N-2)*abs(INCX)) + On entry, the vector x. + On exit, it is overwritten with the vector v. + + INCX (input) INTEGER + The increment between elements of X. INCX > 0. + + TAU (output) REAL + The value tau. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 1) { + *tau = 0.f; + return 0; + } + + i__1 = *n - 1; + xnorm = snrm2_(&i__1, &x[1], incx); + + if (xnorm == 0.f) { + +/* H = I */ + + *tau = 0.f; + } else { + +/* general case */ + + r__1 = slapy2_(alpha, &xnorm); + beta = -r_sign(&r__1, alpha); + safmin = slamch_("S") / slamch_("E"); + knt = 0; + if (dabs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + + rsafmn = 1.f / safmin; +L10: + ++knt; + i__1 = *n - 1; + sscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + *alpha *= rsafmn; + if (dabs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = snrm2_(&i__1, &x[1], incx); + r__1 = slapy2_(alpha, &xnorm); + beta = -r_sign(&r__1, alpha); + } + *tau = (beta - *alpha) / beta; + i__1 = *n - 1; + r__1 = 1.f / (*alpha - beta); + sscal_(&i__1, &r__1, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + beta *= safmin; +/* L20: */ + } + *alpha = beta; + } + + return 0; + +/* End of SLARFG */ + +} /* slarfg_ */ + +/* Subroutine */ int slarft_(char *direct, char *storev, integer *n, integer * + k, real *v, integer *ldv, real *tau, real *t, integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; + real r__1; + + /* Local variables */ + static integer i__, j, prevlastv; + static real vii; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + static integer lastv; + extern /* Subroutine */ int strmv_(char *, char *, char *, integer *, + real *, integer *, real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLARFT forms the triangular factor T of a real block reflector H + of order n, which is defined as a product of k elementary reflectors. + + If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + + If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + + If STOREV = 'C', the vector which defines the elementary reflector + H(i) is stored in the i-th column of the array V, and + + H = I - V * T * V' + + If STOREV = 'R', the vector which defines the elementary reflector + H(i) is stored in the i-th row of the array V, and + + H = I - V' * T * V + + Arguments + ========= + + DIRECT (input) CHARACTER*1 + Specifies the order in which the elementary reflectors are + multiplied to form the block reflector: + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Specifies how the vectors which define the elementary + reflectors are stored (see also Further Details): + = 'C': columnwise + = 'R': rowwise + + N (input) INTEGER + The order of the block reflector H. N >= 0. + + K (input) INTEGER + The order of the triangular factor T (= the number of + elementary reflectors). K >= 1. + + V (input/output) REAL array, dimension + (LDV,K) if STOREV = 'C' + (LDV,N) if STOREV = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i). + + T (output) REAL array, dimension (LDT,K) + The k by k triangular factor T of the block reflector. + If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is + lower triangular. The rest of the array is not used. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + Further Details + =============== + + The shape of the matrix V and the storage of the vectors which define + the H(i) is best illustrated by the following example with n = 5 and + k = 3. The elements equal to 1 are not stored; the corresponding + array elements are modified but restored on exit. The rest of the + array is not used. + + DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': + + V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) + ( v1 1 ) ( 1 v2 v2 v2 ) + ( v1 v2 1 ) ( 1 v3 v3 ) + ( v1 v2 v3 ) + ( v1 v2 v3 ) + + DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': + + V = ( v1 v2 v3 ) V = ( v1 v1 1 ) + ( v1 v2 v3 ) ( v2 v2 v2 1 ) + ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) + ( 1 v3 ) + ( 1 ) + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = max(i__,prevlastv); + if (tau[i__] == 0.f) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = 0.f; +/* L10: */ + } + } else { + +/* general case */ + + vii = v[i__ + i__ * v_dim1]; + v[i__ + i__ * v_dim1] = 1.f; + if (lsame_(storev, "C")) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[lastv + i__ * v_dim1] != 0.f) { + goto L15; + } + } +L15: + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */ + + i__2 = j - i__ + 1; + i__3 = i__ - 1; + r__1 = -tau[i__]; + sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + v_dim1], + ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b29, &t[ + i__ * t_dim1 + 1], &c__1); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[i__ + lastv * v_dim1] != 0.f) { + goto L16; + } + } +L16: + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */ + + i__2 = i__ - 1; + i__3 = j - i__ + 1; + r__1 = -tau[i__]; + sgemv_("No transpose", &i__2, &i__3, &r__1, &v[i__ * + v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & + c_b29, &t[i__ * t_dim1 + 1], &c__1); + } + v[i__ + i__ * v_dim1] = vii; + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + t[i__ + i__ * t_dim1] = tau[i__]; + if (i__ > 1) { + prevlastv = max(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } +/* L20: */ + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + if (tau[i__] == 0.f) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + t[j + i__ * t_dim1] = 0.f; +/* L30: */ + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { + vii = v[*n - *k + i__ + i__ * v_dim1]; + v[*n - *k + i__ + i__ * v_dim1] = 1.f; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[lastv + i__ * v_dim1] != 0.f) { + goto L35; + } + } +L35: + j = max(lastv,prevlastv); + +/* + T(i+1:k,i) := + - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i) +*/ + + i__1 = *n - *k + i__ - j + 1; + i__2 = *k - i__; + r__1 = -tau[i__]; + sgemv_("Transpose", &i__1, &i__2, &r__1, &v[j + (i__ + + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], & + c__1, &c_b29, &t[i__ + 1 + i__ * t_dim1], & + c__1); + v[*n - *k + i__ + i__ * v_dim1] = vii; + } else { + vii = v[i__ + (*n - *k + i__) * v_dim1]; + v[i__ + (*n - *k + i__) * v_dim1] = 1.f; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[i__ + lastv * v_dim1] != 0.f) { + goto L36; + } + } +L36: + j = max(lastv,prevlastv); + +/* + T(i+1:k,i) := + - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)' +*/ + + i__1 = *k - i__; + i__2 = *n - *k + i__ - j + 1; + r__1 = -tau[i__]; + sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b29, &t[i__ + 1 + i__ * t_dim1], & + c__1); + v[i__ + (*n - *k + i__) * v_dim1] = vii; + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + if (i__ > 1) { + prevlastv = min(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + t[i__ + i__ * t_dim1] = tau[i__]; + } +/* L40: */ + } + } + return 0; + +/* End of SLARFT */ + +} /* slarft_ */ + +/* Subroutine */ int slarfx_(char *side, integer *m, integer *n, real *v, + real *tau, real *c__, integer *ldc, real *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + + /* Local variables */ + static integer j; + static real t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6, + v7, v8, v9, t10, v10, sum; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLARFX applies a real elementary reflector H to a real m by n + matrix C, from either the left or the right. H is represented in the + form + + H = I - tau * v * v' + + where tau is a real scalar and v is a real vector. + + If tau = 0, then H is taken to be the unit matrix + + This version uses inline code if H has order < 11. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) REAL array, dimension (M) if SIDE = 'L' + or (N) if SIDE = 'R' + The vector v in the representation of H. + + TAU (input) REAL + The value tau in the representation of H. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDA >= (1,M). + + WORK (workspace) REAL array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + WORK is not referenced if H has order < 11. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (*tau == 0.f) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form H * C, where H has order m. */ + + switch (*m) { + case 1: goto L10; + case 2: goto L30; + case 3: goto L50; + case 4: goto L70; + case 5: goto L90; + case 6: goto L110; + case 7: goto L130; + case 8: goto L150; + case 9: goto L170; + case 10: goto L190; + } + +/* Code for general M */ + + slarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]); + goto L410; +L10: + +/* Special code for 1 x 1 Householder */ + + t1 = 1.f - *tau * v[1] * v[1]; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1]; +/* L20: */ + } + goto L410; +L30: + +/* Special code for 2 x 2 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; +/* L40: */ + } + goto L410; +L50: + +/* Special code for 3 x 3 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; +/* L60: */ + } + goto L410; +L70: + +/* Special code for 4 x 4 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; +/* L80: */ + } + goto L410; +L90: + +/* Special code for 5 x 5 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; +/* L100: */ + } + goto L410; +L110: + +/* Special code for 6 x 6 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; +/* L120: */ + } + goto L410; +L130: + +/* Special code for 7 x 7 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; +/* L140: */ + } + goto L410; +L150: + +/* Special code for 8 x 8 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; +/* L160: */ + } + goto L410; +L170: + +/* Special code for 9 x 9 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * + c_dim1 + 9]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; +/* L180: */ + } + goto L410; +L190: + +/* Special code for 10 x 10 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * + c_dim1 + 9] + v10 * c__[j * c_dim1 + 10]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; + c__[j * c_dim1 + 10] -= sum * t10; +/* L200: */ + } + goto L410; + } else { + +/* Form C * H, where H has order n. */ + + switch (*n) { + case 1: goto L210; + case 2: goto L230; + case 3: goto L250; + case 4: goto L270; + case 5: goto L290; + case 6: goto L310; + case 7: goto L330; + case 8: goto L350; + case 9: goto L370; + case 10: goto L390; + } + +/* Code for general N */ + + slarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]); + goto L410; +L210: + +/* Special code for 1 x 1 Householder */ + + t1 = 1.f - *tau * v[1] * v[1]; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + c__[j + c_dim1] = t1 * c__[j + c_dim1]; +/* L220: */ + } + goto L410; +L230: + +/* Special code for 2 x 2 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; +/* L240: */ + } + goto L410; +L250: + +/* Special code for 3 x 3 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; +/* L260: */ + } + goto L410; +L270: + +/* Special code for 4 x 4 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; +/* L280: */ + } + goto L410; +L290: + +/* Special code for 5 x 5 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; +/* L300: */ + } + goto L410; +L310: + +/* Special code for 6 x 6 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; +/* L320: */ + } + goto L410; +L330: + +/* Special code for 7 x 7 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; +/* L340: */ + } + goto L410; +L350: + +/* Special code for 8 x 8 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; +/* L360: */ + } + goto L410; +L370: + +/* Special code for 9 x 9 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ + j + c_dim1 * 9]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; +/* L380: */ + } + goto L410; +L390: + +/* Special code for 10 x 10 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ + j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; + c__[j + c_dim1 * 10] -= sum * t10; +/* L400: */ + } + goto L410; + } +L410: + return 0; + +/* End of SLARFX */ + +} /* slarfx_ */ + +/* Subroutine */ int slartg_(real *f, real *g, real *cs, real *sn, real *r__) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2; + + /* Local variables */ + static integer i__; + static real f1, g1, eps, scale; + static integer count; + static real safmn2, safmx2; + extern doublereal slamch_(char *); + static real safmin; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLARTG generate a plane rotation so that + + [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. + [ -SN CS ] [ G ] [ 0 ] + + This is a slower, more accurate version of the BLAS1 routine SROTG, + with the following other differences: + F and G are unchanged on return. + If G=0, then CS=1 and SN=0. + If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any + floating point operations (saves work in SBDSQR when + there are zeros on the diagonal). + + If F exceeds G in magnitude, CS will be positive. + + Arguments + ========= + + F (input) REAL + The first component of vector to be rotated. + + G (input) REAL + The second component of vector to be rotated. + + CS (output) REAL + The cosine of the rotation. + + SN (output) REAL + The sine of the rotation. + + R (output) REAL + The nonzero component of the rotated vector. + + This version has a few statements commented out for thread safety + (machine parameters are computed on each entry). 10 feb 03, SJH. + + ===================================================================== + + LOGICAL FIRST + SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 + DATA FIRST / .TRUE. / + + IF( FIRST ) THEN +*/ + safmin = slamch_("S"); + eps = slamch_("E"); + r__1 = slamch_("B"); + i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / 2.f); + safmn2 = pow_ri(&r__1, &i__1); + safmx2 = 1.f / safmn2; +/* + FIRST = .FALSE. + END IF +*/ + if (*g == 0.f) { + *cs = 1.f; + *sn = 0.f; + *r__ = *f; + } else if (*f == 0.f) { + *cs = 0.f; + *sn = 1.f; + *r__ = *g; + } else { + f1 = *f; + g1 = *g; +/* Computing MAX */ + r__1 = dabs(f1), r__2 = dabs(g1); + scale = dmax(r__1,r__2); + if (scale >= safmx2) { + count = 0; +L10: + ++count; + f1 *= safmn2; + g1 *= safmn2; +/* Computing MAX */ + r__1 = dabs(f1), r__2 = dabs(g1); + scale = dmax(r__1,r__2); + if (scale >= safmx2) { + goto L10; + } +/* Computing 2nd power */ + r__1 = f1; +/* Computing 2nd power */ + r__2 = g1; + *r__ = sqrt(r__1 * r__1 + r__2 * r__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmx2; +/* L20: */ + } + } else if (scale <= safmn2) { + count = 0; +L30: + ++count; + f1 *= safmx2; + g1 *= safmx2; +/* Computing MAX */ + r__1 = dabs(f1), r__2 = dabs(g1); + scale = dmax(r__1,r__2); + if (scale <= safmn2) { + goto L30; + } +/* Computing 2nd power */ + r__1 = f1; +/* Computing 2nd power */ + r__2 = g1; + *r__ = sqrt(r__1 * r__1 + r__2 * r__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmn2; +/* L40: */ + } + } else { +/* Computing 2nd power */ + r__1 = f1; +/* Computing 2nd power */ + r__2 = g1; + *r__ = sqrt(r__1 * r__1 + r__2 * r__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + } + if (dabs(*f) > dabs(*g) && *cs < 0.f) { + *cs = -(*cs); + *sn = -(*sn); + *r__ = -(*r__); + } + } + return 0; + +/* End of SLARTG */ + +} /* slartg_ */ + +/* Subroutine */ int slas2_(real *f, real *g, real *h__, real *ssmin, real * + ssmax) +{ + /* System generated locals */ + real r__1, r__2; + + /* Local variables */ + static real c__, fa, ga, ha, as, at, au, fhmn, fhmx; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAS2 computes the singular values of the 2-by-2 matrix + [ F G ] + [ 0 H ]. + On return, SSMIN is the smaller singular value and SSMAX is the + larger singular value. + + Arguments + ========= + + F (input) REAL + The (1,1) element of the 2-by-2 matrix. + + G (input) REAL + The (1,2) element of the 2-by-2 matrix. + + H (input) REAL + The (2,2) element of the 2-by-2 matrix. + + SSMIN (output) REAL + The smaller singular value. + + SSMAX (output) REAL + The larger singular value. + + Further Details + =============== + + Barring over/underflow, all output quantities are correct to within + a few units in the last place (ulps), even in the absence of a guard + digit in addition/subtraction. + + In IEEE arithmetic, the code works correctly if one matrix element is + infinite. + + Overflow will not occur unless the largest singular value itself + overflows, or is within a few ulps of overflow. (On machines with + partial overflow, like the Cray, overflow may occur if the largest + singular value is within a factor of 2 of overflow.) + + Underflow is harmless if underflow is gradual. Otherwise, results + may correspond to a matrix modified by perturbations of size near + the underflow threshold. + + ==================================================================== +*/ + + + fa = dabs(*f); + ga = dabs(*g); + ha = dabs(*h__); + fhmn = dmin(fa,ha); + fhmx = dmax(fa,ha); + if (fhmn == 0.f) { + *ssmin = 0.f; + if (fhmx == 0.f) { + *ssmax = ga; + } else { +/* Computing 2nd power */ + r__1 = dmin(fhmx,ga) / dmax(fhmx,ga); + *ssmax = dmax(fhmx,ga) * sqrt(r__1 * r__1 + 1.f); + } + } else { + if (ga < fhmx) { + as = fhmn / fhmx + 1.f; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + r__1 = ga / fhmx; + au = r__1 * r__1; + c__ = 2.f / (sqrt(as * as + au) + sqrt(at * at + au)); + *ssmin = fhmn * c__; + *ssmax = fhmx / c__; + } else { + au = fhmx / ga; + if (au == 0.f) { + +/* + Avoid possible harmful underflow if exponent range + asymmetric (true SSMIN may not underflow even if + AU underflows) +*/ + + *ssmin = fhmn * fhmx / ga; + *ssmax = ga; + } else { + as = fhmn / fhmx + 1.f; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + r__1 = as * au; +/* Computing 2nd power */ + r__2 = at * au; + c__ = 1.f / (sqrt(r__1 * r__1 + 1.f) + sqrt(r__2 * r__2 + 1.f) + ); + *ssmin = fhmn * c__ * au; + *ssmin += *ssmin; + *ssmax = ga / (c__ + c__); + } + } + } + return 0; + +/* End of SLAS2 */ + +} /* slas2_ */ + +/* Subroutine */ int slascl_(char *type__, integer *kl, integer *ku, real * + cfrom, real *cto, integer *m, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + static integer i__, j, k1, k2, k3, k4; + static real mul, cto1; + static logical done; + static real ctoc; + extern logical lsame_(char *, char *); + static integer itype; + static real cfrom1; + extern doublereal slamch_(char *); + static real cfromc; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real bignum; + extern logical sisnan_(real *); + static real smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLASCL multiplies the M by N real matrix A by the real scalar + CTO/CFROM. This is done without over/underflow as long as the final + result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + A may be full, upper triangular, lower triangular, upper Hessenberg, + or banded. + + Arguments + ========= + + TYPE (input) CHARACTER*1 + TYPE indices the storage type of the input matrix. + = 'G': A is a full matrix. + = 'L': A is a lower triangular matrix. + = 'U': A is an upper triangular matrix. + = 'H': A is an upper Hessenberg matrix. + = 'B': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the lower + half stored. + = 'Q': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the upper + half stored. + = 'Z': A is a band matrix with lower bandwidth KL and upper + bandwidth KU. + + KL (input) INTEGER + The lower bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + KU (input) INTEGER + The upper bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + CFROM (input) REAL + CTO (input) REAL + The matrix A is multiplied by CTO/CFROM. A(I,J) is computed + without over/underflow if the final result CTO*A(I,J)/CFROM + can be represented without over/underflow. CFROM must be + nonzero. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + The matrix to be multiplied by CTO/CFROM. See TYPE for the + storage type. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + INFO (output) INTEGER + 0 - successful exit + <0 - if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (lsame_(type__, "G")) { + itype = 0; + } else if (lsame_(type__, "L")) { + itype = 1; + } else if (lsame_(type__, "U")) { + itype = 2; + } else if (lsame_(type__, "H")) { + itype = 3; + } else if (lsame_(type__, "B")) { + itype = 4; + } else if (lsame_(type__, "Q")) { + itype = 5; + } else if (lsame_(type__, "Z")) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0.f || sisnan_(cfrom)) { + *info = -4; + } else if (sisnan_(cto)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASCL", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + if (cfrom1 == cfromc) { +/* + CFROMC is an inf. Multiply by a correctly signed zero for + finite CTOC, or a NaN if CTOC is infinite. +*/ + mul = ctoc / cfromc; + done = TRUE_; + cto1 = ctoc; + } else { + cto1 = ctoc / bignum; + if (cto1 == ctoc) { +/* + CTOC is either 0 or an inf. In both cases, CTOC itself + serves as the correct multiplication factor. +*/ + mul = ctoc; + done = TRUE_; + cfromc = 1.f; + } else if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (dabs(cto1) > dabs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + } + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of SLASCL */ + +} /* slascl_ */ + +/* Subroutine */ int slasd0_(integer *n, integer *sqre, real *d__, real *e, + real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz, + integer *iwork, real *work, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, + iwk, lvl, ndb1, nlp1, nrp1; + static real beta; + static integer idxq, nlvl; + static real alpha; + static integer inode, ndiml, idxqc, ndimr, itemp, sqrei; + extern /* Subroutine */ int slasd1_(integer *, integer *, integer *, real + *, real *, real *, real *, integer *, real *, integer *, integer * + , integer *, real *, integer *), xerbla_(char *, integer *), slasdq_(char *, integer *, integer *, integer *, integer + *, integer *, real *, real *, real *, integer *, real *, integer * + , real *, integer *, real *, integer *), slasdt_(integer * + , integer *, integer *, integer *, integer *, integer *, integer * + ); + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + Using a divide and conquer approach, SLASD0 computes the singular + value decomposition (SVD) of a real upper bidiagonal N-by-M + matrix B with diagonal D and offdiagonal E, where M = N + SQRE. + The algorithm computes orthogonal matrices U and VT such that + B = U * S * VT. The singular values S are overwritten on D. + + A related subroutine, SLASDA, computes only the singular values, + and optionally, the singular vectors in compact form. + + Arguments + ========= + + N (input) INTEGER + On entry, the row dimension of the upper bidiagonal matrix. + This is also the dimension of the main diagonal array D. + + SQRE (input) INTEGER + Specifies the column dimension of the bidiagonal matrix. + = 0: The bidiagonal matrix has column dimension M = N; + = 1: The bidiagonal matrix has column dimension M = N+1; + + D (input/output) REAL array, dimension (N) + On entry D contains the main diagonal of the bidiagonal + matrix. + On exit D, if INFO = 0, contains its singular values. + + E (input) REAL array, dimension (M-1) + Contains the subdiagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + U (output) REAL array, dimension at least (LDQ, N) + On exit, U contains the left singular vectors. + + LDU (input) INTEGER + On entry, leading dimension of U. + + VT (output) REAL array, dimension at least (LDVT, M) + On exit, VT' contains the right singular vectors. + + LDVT (input) INTEGER + On entry, leading dimension of VT. + + SMLSIZ (input) INTEGER + On entry, maximum size of the subproblems at the + bottom of the computation tree. + + IWORK (workspace) INTEGER array, dimension (8*N) + + WORK (workspace) REAL array, dimension (3*M**2+2*M) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --iwork; + --work; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } + + m = *n + *sqre; + + if (*ldu < *n) { + *info = -6; + } else if (*ldvt < m) { + *info = -8; + } else if (*smlsiz < 3) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD0", &i__1); + return 0; + } + +/* If the input matrix is too small, call SLASDQ to find the SVD. */ + + if (*n <= *smlsiz) { + slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], + ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); + return 0; + } + +/* Set up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + For the nodes on bottom level of the tree, solve + their subproblems by SLASDQ. +*/ + + ndb1 = (nd + 1) / 2; + ncc = 0; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nrp1 = nr + 1; + nlf = ic - nl; + nrf = ic + 1; + sqrei = 1; + slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[ + nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[ + nlf + nlf * u_dim1], ldu, &work[1], info); + if (*info != 0) { + return 0; + } + itemp = idxq + nlf - 2; + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j] = j; +/* L10: */ + } + if (i__ == nd) { + sqrei = *sqre; + } else { + sqrei = 1; + } + nrp1 = nr + sqrei; + slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[ + nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[ + nrf + nrf * u_dim1], ldu, &work[1], info); + if (*info != 0) { + return 0; + } + itemp = idxq + ic; + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j - 1] = j; +/* L20: */ + } +/* L30: */ + } + +/* Now conquer each subproblem bottom-up. */ + + for (lvl = nlvl; lvl >= 1; --lvl) { + +/* + Find the first node LF and last node LL on the + current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + if (*sqre == 0 && i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + idxqc = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + slasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf * + u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[ + idxqc], &iwork[iwk], &work[1], info); + if (*info != 0) { + return 0; + } +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of SLASD0 */ + +} /* slasd0_ */ + +/* Subroutine */ int slasd1_(integer *nl, integer *nr, integer *sqre, real * + d__, real *alpha, real *beta, real *u, integer *ldu, real *vt, + integer *ldvt, integer *idxq, integer *iwork, real *work, integer * + info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1; + real r__1, r__2; + + /* Local variables */ + static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, + idxc, idxp, ldvt2; + extern /* Subroutine */ int slasd2_(integer *, integer *, integer *, + integer *, real *, real *, real *, real *, real *, integer *, + real *, integer *, real *, real *, integer *, real *, integer *, + integer *, integer *, integer *, integer *, integer *, integer *), + slasd3_(integer *, integer *, integer *, integer *, real *, real + *, integer *, real *, real *, integer *, real *, integer *, real * + , integer *, real *, integer *, integer *, integer *, real *, + integer *); + static integer isigma; + extern /* Subroutine */ int xerbla_(char *, integer *), slascl_( + char *, integer *, integer *, real *, real *, integer *, integer * + , real *, integer *, integer *), slamrg_(integer *, + integer *, real *, integer *, integer *, integer *); + static real orgnrm; + static integer coltyp; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, + where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0. + + A related subroutine SLASD7 handles the case in which the singular + values (and the singular vectors in factored form) are desired. + + SLASD1 computes the SVD as follows: + + ( D1(in) 0 0 0 ) + B = U(in) * ( Z1' a Z2' b ) * VT(in) + ( 0 0 D2(in) 0 ) + + = U(out) * ( D(out) 0) * VT(out) + + where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M + with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + elsewhere; and the entry b is empty if SQRE = 0. + + The left singular vectors of the original matrix are stored in U, and + the transpose of the right singular vectors are stored in VT, and the + singular values are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple singular values or when there are zeros in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine SLASD2. + + The second stage consists of calculating the updated + singular values. This is done by finding the square roots of the + roots of the secular equation via the routine SLASD4 (as called + by SLASD3). This routine also calculates the singular vectors of + the current problem. + + The final stage consists of computing the updated singular vectors + directly using the updated singular values. The singular vectors + for the current problem are multiplied with the singular vectors + from the overall problem. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + D (input/output) REAL array, dimension (NL+NR+1). + N = NL+NR+1 + On entry D(1:NL,1:NL) contains the singular values of the + upper block; and D(NL+2:N) contains the singular values of + the lower block. On exit D(1:N) contains the singular values + of the modified matrix. + + ALPHA (input/output) REAL + Contains the diagonal element associated with the added row. + + BETA (input/output) REAL + Contains the off-diagonal element associated with the added + row. + + U (input/output) REAL array, dimension (LDU,N) + On entry U(1:NL, 1:NL) contains the left singular vectors of + the upper block; U(NL+2:N, NL+2:N) contains the left singular + vectors of the lower block. On exit U contains the left + singular vectors of the bidiagonal matrix. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= max( 1, N ). + + VT (input/output) REAL array, dimension (LDVT,M) + where M = N + SQRE. + On entry VT(1:NL+1, 1:NL+1)' contains the right singular + vectors of the upper block; VT(NL+2:M, NL+2:M)' contains + the right singular vectors of the lower block. On exit + VT' contains the right singular vectors of the + bidiagonal matrix. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= max( 1, M ). + + IDXQ (output) INTEGER array, dimension (N) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, i.e. + D( IDXQ( I = 1, N ) ) will be in ascending order. + + IWORK (workspace) INTEGER array, dimension (4*N) + + WORK (workspace) REAL array, dimension (3*M**2+2*M) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --idxq; + --iwork; + --work; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre < 0 || *sqre > 1) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD1", &i__1); + return 0; + } + + n = *nl + *nr + 1; + m = n + *sqre; + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in SLASD2 and SLASD3. +*/ + + ldu2 = n; + ldvt2 = m; + + iz = 1; + isigma = iz + m; + iu2 = isigma + n; + ivt2 = iu2 + ldu2 * n; + iq = ivt2 + ldvt2 * m; + + idx = 1; + idxc = idx + n; + coltyp = idxc + n; + idxp = coltyp + n; + +/* + Scale. + + Computing MAX +*/ + r__1 = dabs(*alpha), r__2 = dabs(*beta); + orgnrm = dmax(r__1,r__2); + d__[*nl + 1] = 0.f; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) { + orgnrm = (r__1 = d__[i__], dabs(r__1)); + } +/* L10: */ + } + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info); + *alpha /= orgnrm; + *beta /= orgnrm; + +/* Deflate singular values. */ + + slasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], + ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, & + work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], & + idxq[1], &iwork[coltyp], info); + +/* Solve Secular Equation and update singular vectors. */ + + ldq = k; + slasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[ + u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[ + ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info); + if (*info != 0) { + return 0; + } + +/* Unscale. */ + + slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info); + +/* Prepare the IDXQ sorting permutation. */ + + n1 = k; + n2 = n - k; + slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + + return 0; + +/* End of SLASD1 */ + +} /* slasd1_ */ + +/* Subroutine */ int slasd2_(integer *nl, integer *nr, integer *sqre, integer + *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer * + ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2, + real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, + integer *idxq, integer *coltyp, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, + vt2_dim1, vt2_offset, i__1; + real r__1, r__2; + + /* Local variables */ + static real c__; + static integer i__, j, m, n; + static real s; + static integer k2; + static real z1; + static integer ct, jp; + static real eps, tau, tol; + static integer psm[4], nlp1, nlp2, idxi, idxj, ctot[4]; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + static integer idxjp, jprev; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + extern doublereal slapy2_(real *, real *), slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( + integer *, integer *, real *, integer *, integer *, integer *); + static real hlftol; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slaset_(char *, integer *, + integer *, real *, real *, real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLASD2 merges the two sets of singular values together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + singular values are close together or if there is a tiny entry in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + SLASD2 is called from SLASD1. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + D (input/output) REAL array, dimension (N) + On entry D contains the singular values of the two submatrices + to be combined. On exit D contains the trailing (N-K) updated + singular values (those which were deflated) sorted into + increasing order. + + Z (output) REAL array, dimension (N) + On exit Z contains the updating row vector in the secular + equation. + + ALPHA (input) REAL + Contains the diagonal element associated with the added row. + + BETA (input) REAL + Contains the off-diagonal element associated with the added + row. + + U (input/output) REAL array, dimension (LDU,N) + On entry U contains the left singular vectors of two + submatrices in the two square blocks with corners at (1,1), + (NL, NL), and (NL+2, NL+2), (N,N). + On exit U contains the trailing (N-K) updated left singular + vectors (those which were deflated) in its last N-K columns. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= N. + + VT (input/output) REAL array, dimension (LDVT,M) + On entry VT' contains the right singular vectors of two + submatrices in the two square blocks with corners at (1,1), + (NL+1, NL+1), and (NL+2, NL+2), (M,M). + On exit VT' contains the trailing (N-K) updated right singular + vectors (those which were deflated) in its last N-K columns. + In case SQRE =1, the last row of VT spans the right null + space. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= M. + + DSIGMA (output) REAL array, dimension (N) + Contains a copy of the diagonal elements (K-1 singular values + and one zero) in the secular equation. + + U2 (output) REAL array, dimension (LDU2,N) + Contains a copy of the first K-1 left singular vectors which + will be used by SLASD3 in a matrix multiply (SGEMM) to solve + for the new left singular vectors. U2 is arranged into four + blocks. The first block contains a column with 1 at NL+1 and + zero everywhere else; the second block contains non-zero + entries only at and above NL; the third contains non-zero + entries only below NL+1; and the fourth is dense. + + LDU2 (input) INTEGER + The leading dimension of the array U2. LDU2 >= N. + + VT2 (output) REAL array, dimension (LDVT2,N) + VT2' contains a copy of the first K right singular vectors + which will be used by SLASD3 in a matrix multiply (SGEMM) to + solve for the new right singular vectors. VT2 is arranged into + three blocks. The first block contains a row that corresponds + to the special 0 diagonal element in SIGMA; the second block + contains non-zeros only at and before NL +1; the third block + contains non-zeros only at and after NL +2. + + LDVT2 (input) INTEGER + The leading dimension of the array VT2. LDVT2 >= M. + + IDXP (workspace) INTEGER array, dimension (N) + This will contain the permutation used to place deflated + values of D at the end of the array. On output IDXP(2:K) + points to the nondeflated D-values and IDXP(K+1:N) + points to the deflated singular values. + + IDX (workspace) INTEGER array, dimension (N) + This will contain the permutation used to sort the contents of + D into ascending order. + + IDXC (output) INTEGER array, dimension (N) + This will contain the permutation used to arrange the columns + of the deflated U matrix into three groups: the first group + contains non-zero entries only at and above NL, the second + contains non-zero entries only below NL+2, and the third is + dense. + + IDXQ (input/output) INTEGER array, dimension (N) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that entries in + the first hlaf of this permutation must first be moved one + position backward; and entries in the second half + must first have NL+1 added to their values. + + COLTYP (workspace/output) INTEGER array, dimension (N) + As workspace, this will contain a label which will indicate + which of the following types a column in the U2 matrix or a + row in the VT2 matrix is: + 1 : non-zero in the upper half only + 2 : non-zero in the lower half only + 3 : dense + 4 : deflated + + On exit, it is an array of dimension 4, with COLTYP(I) being + the dimension of the I-th type columns. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --dsigma; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxp; + --idx; + --idxc; + --idxq; + --coltyp; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + + n = *nl + *nr + 1; + m = n + *sqre; + + if (*ldu < n) { + *info = -10; + } else if (*ldvt < m) { + *info = -12; + } else if (*ldu2 < n) { + *info = -15; + } else if (*ldvt2 < m) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD2", &i__1); + return 0; + } + + nlp1 = *nl + 1; + nlp2 = *nl + 2; + +/* + Generate the first part of the vector Z; and move the singular + values in the first part of D one position backward. +*/ + + z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; + z__[1] = z1; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; +/* L10: */ + } + +/* Generate the second part of the vector Z. */ + + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; +/* L20: */ + } + +/* Initialize some reference arrays. */ + + i__1 = nlp1; + for (i__ = 2; i__ <= i__1; ++i__) { + coltyp[i__] = 1; +/* L30: */ + } + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + coltyp[i__] = 2; +/* L40: */ + } + +/* Sort the singular values into increasing order */ + + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; +/* L50: */ + } + +/* + DSIGMA, IDXC, IDXC, and the first column of U2 + are used as storage space. +*/ + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + u2[i__ + u2_dim1] = z__[idxq[i__]]; + idxc[i__] = coltyp[idxq[i__]]; +/* L60: */ + } + + slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = u2[idxi + u2_dim1]; + coltyp[i__] = idxc[idxi]; +/* L70: */ + } + +/* Calculate the allowable deflation tolerance */ + + eps = slamch_("Epsilon"); +/* Computing MAX */ + r__1 = dabs(*alpha), r__2 = dabs(*beta); + tol = dmax(r__1,r__2); +/* Computing MAX */ + r__2 = (r__1 = d__[n], dabs(r__1)); + tol = eps * 8.f * dmax(r__2,tol); + +/* + There are 2 kinds of deflation -- first a value in the z-vector + is small, second two (or more) singular values are very close + together (their difference is small). + + If the value in the z-vector is small, we simply permute the + array so that the corresponding singular value is moved to the + end. + + If two values in the D-vector are close, we perform a two-sided + rotation designed to make one of the corresponding z-vector + entries zero, and then permute the array so that the deflated + singular value is moved to the end. + + If there are multiple singular values then the problem deflates. + Here the number of equal singular values are found. As each equal + singular value is found, an elementary reflector is computed to + rotate the corresponding singular subspace so that the + corresponding components of Z are zero in this new basis. +*/ + + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + coltyp[j] = 4; + if (j == n) { + goto L120; + } + } else { + jprev = j; + goto L90; + } +/* L80: */ + } +L90: + j = jprev; +L100: + ++j; + if (j > n) { + goto L110; + } + if ((r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + coltyp[j] = 4; + } else { + +/* Check if singular values are close enough to allow deflation. */ + + if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) { + +/* Deflation is possible. */ + + s = z__[jprev]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = slapy2_(&c__, &s); + c__ /= tau; + s = -s / tau; + z__[j] = tau; + z__[jprev] = 0.f; + +/* + Apply back the Givens rotation to the left and right + singular vector matrices. +*/ + + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & + c__1, &c__, &s); + srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & + c__, &s); + if (coltyp[j] != coltyp[jprev]) { + coltyp[j] = 3; + } + coltyp[jprev] = 4; + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L100; +L110: + +/* Record the last singular value. */ + + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + +L120: + +/* + Count up the total number of the various types of columns, then + form a permutation which positions the four column types into + four groups of uniform structure (although one or more of these + groups may be empty). +*/ + + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; +/* L130: */ + } + i__1 = n; + for (j = 2; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; +/* L140: */ + } + +/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ + + psm[0] = 2; + psm[1] = ctot[0] + 2; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + +/* + Fill out the IDXC array so that the permutation which it induces + will place all type-1 columns first, all type-2 columns next, + then all type-3's, and finally all type-4's, starting from the + second column. This applies similarly to the rows of VT. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + ct = coltyp[jp]; + idxc[psm[ct - 1]] = j; + ++psm[ct - 1]; +/* L150: */ + } + +/* + Sort the singular values and corresponding singular vectors into + DSIGMA, U2, and VT2 respectively. The singular values/vectors + which were not deflated go into the first K slots of DSIGMA, U2, + and VT2 respectively, while those which were deflated go into the + last N - K slots, except that the first column/row will be treated + separately. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + idxj = idxq[idx[idxp[idxc[j]]] + 1]; + if (idxj <= nlp1) { + --idxj; + } + scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); + scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); +/* L160: */ + } + +/* Determine DSIGMA(1), DSIGMA(2) and Z(1) */ + + dsigma[1] = 0.f; + hlftol = tol / 2.f; + if (dabs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = slapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + c__ = 1.f; + s = 0.f; + z__[1] = tol; + } else { + c__ = z1 / z__[1]; + s = z__[m] / z__[1]; + } + } else { + if (dabs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + +/* Move the rest of the updating row to Z. */ + + i__1 = *k - 1; + scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); + +/* + Determine the first column of U2, the first row of VT2 and the + last row of VT. +*/ + + slaset_("A", &n, &c__1, &c_b29, &c_b29, &u2[u2_offset], ldu2); + u2[nlp1 + u2_dim1] = 1.f; + if (m > n) { + i__1 = nlp1; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; + vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; +/* L170: */ + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; + vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; +/* L180: */ + } + } else { + scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); + } + if (m > n) { + scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); + } + +/* + The deflated singular values and their corresponding vectors go + into the back of D, U, and V respectively. +*/ + + if (n > *k) { + i__1 = n - *k; + scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = n - *k; + slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) + * u_dim1 + 1], ldu); + i__1 = n - *k; + slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + + vt_dim1], ldvt); + } + +/* Copy CTOT into COLTYP for referencing in SLASD3. */ + + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; +/* L190: */ + } + + return 0; + +/* End of SLASD2 */ + +} /* slasd2_ */ + +/* Subroutine */ int slasd3_(integer *nl, integer *nr, integer *sqre, integer + *k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer * + ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2, + integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer * + info) +{ + /* System generated locals */ + integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, + vt_offset, vt2_dim1, vt2_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + static integer i__, j, m, n, jc; + static real rho; + static integer nlp1, nlp2, nrp1; + static real temp; + extern doublereal snrm2_(integer *, real *, integer *); + static integer ctemp; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer ktemp; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, + real *, real *, real *, real *, integer *), xerbla_(char *, + integer *), slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, + real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SLASD3 finds all the square roots of the roots of the secular + equation, as defined by the values in D and Z. It makes the + appropriate calls to SLASD4 and then updates the singular + vectors by matrix multiplication. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + SLASD3 is called from SLASD1. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (input) INTEGER + The size of the secular equation, 1 =< K = < N. + + D (output) REAL array, dimension(K) + On exit the square roots of the roots of the secular equation, + in ascending order. + + Q (workspace) REAL array, + dimension at least (LDQ,K). + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= K. + + DSIGMA (input/output) REAL array, dimension(K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. + + U (output) REAL array, dimension (LDU, N) + The last N - K columns of this matrix contain the deflated + left singular vectors. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= N. + + U2 (input) REAL array, dimension (LDU2, N) + The first K columns of this matrix contain the non-deflated + left singular vectors for the split problem. + + LDU2 (input) INTEGER + The leading dimension of the array U2. LDU2 >= N. + + VT (output) REAL array, dimension (LDVT, M) + The last M - K columns of VT' contain the deflated + right singular vectors. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= N. + + VT2 (input/output) REAL array, dimension (LDVT2, N) + The first K columns of VT2' contain the non-deflated + right singular vectors for the split problem. + + LDVT2 (input) INTEGER + The leading dimension of the array VT2. LDVT2 >= N. + + IDXC (input) INTEGER array, dimension (N) + The permutation used to arrange the columns of U (and rows of + VT) into three groups: the first group contains non-zero + entries only at and above (or before) NL +1; the second + contains non-zero entries only at and below (or after) NL+2; + and the third is dense. The first column of U and the row of + VT are treated separately, however. + + The rows of the singular vectors found by SLASD4 + must be likewise permuted before the matrix multiplies can + take place. + + CTOT (input) INTEGER array, dimension (4) + A count of the total number of the various types of columns + in U (or rows in VT), as described in IDXC. The fourth column + type is any column which has been deflated. + + Z (input/output) REAL array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating row vector. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dsigma; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxc; + --ctot; + --z__; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + + n = *nl + *nr + 1; + m = n + *sqre; + nlp1 = *nl + 1; + nlp2 = *nl + 2; + + if (*k < 1 || *k > n) { + *info = -4; + } else if (*ldq < *k) { + *info = -7; + } else if (*ldu < n) { + *info = -10; + } else if (*ldu2 < n) { + *info = -12; + } else if (*ldvt < m) { + *info = -14; + } else if (*ldvt2 < m) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD3", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 1) { + d__[1] = dabs(z__[1]); + scopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); + if (z__[1] > 0.f) { + scopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); + } else { + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + u[i__ + u_dim1] = -u2[i__ + u2_dim1]; +/* L10: */ + } + } + return 0; + } + +/* + Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), + which on any of these machines zeros out the bottommost + bit of DSIGMA(I) if it is 1; this makes the subsequent + subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DSIGMA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DSIGMA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DSIGMA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; +/* L20: */ + } + +/* Keep a copy of Z. */ + + scopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); + +/* Normalize Z. */ + + rho = snrm2_(k, &z__[1], &c__1); + slascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info); + rho *= rho; + +/* Find the new singular values. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + slasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], + &vt[j * vt_dim1 + 1], info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + return 0; + } +/* L30: */ + } + +/* Compute updated Z. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ + i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]); +/* L40: */ + } + i__2 = *k - 1; + for (j = i__; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ + i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); +/* L50: */ + } + r__2 = sqrt((r__1 = z__[i__], dabs(r__1))); + z__[i__] = r_sign(&r__2, &q[i__ + q_dim1]); +/* L60: */ + } + +/* + Compute left singular vectors of the modified diagonal matrix, + and store related information for the right singular vectors. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * + vt_dim1 + 1]; + u[i__ * u_dim1 + 1] = -1.f; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ + * vt_dim1]; + u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; +/* L70: */ + } + temp = snrm2_(k, &u[i__ * u_dim1 + 1], &c__1); + q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; +/* L80: */ + } +/* L90: */ + } + +/* Update the left singular vector matrix. */ + + if (*k == 2) { + sgemm_("N", "N", &n, k, k, &c_b15, &u2[u2_offset], ldu2, &q[q_offset], + ldq, &c_b29, &u[u_offset], ldu); + goto L100; + } + if (ctot[1] > 0) { + sgemm_("N", "N", nl, k, &ctot[1], &c_b15, &u2[(u2_dim1 << 1) + 1], + ldu2, &q[q_dim1 + 2], ldq, &c_b29, &u[u_dim1 + 1], ldu); + if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + sgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1] + , ldu2, &q[ktemp + q_dim1], ldq, &c_b15, &u[u_dim1 + 1], + ldu); + } + } else if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + sgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1], + ldu2, &q[ktemp + q_dim1], ldq, &c_b29, &u[u_dim1 + 1], ldu); + } else { + slacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu); + } + scopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); + ktemp = ctot[1] + 2; + ctemp = ctot[2] + ctot[3]; + sgemm_("N", "N", nr, k, &ctemp, &c_b15, &u2[nlp2 + ktemp * u2_dim1], ldu2, + &q[ktemp + q_dim1], ldq, &c_b29, &u[nlp2 + u_dim1], ldu); + +/* Generate the right singular vectors. */ + +L100: + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = snrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); + q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; +/* L110: */ + } +/* L120: */ + } + +/* Update the right singular vector matrix. */ + + if (*k == 2) { + sgemm_("N", "N", k, &m, k, &c_b15, &q[q_offset], ldq, &vt2[vt2_offset] + , ldvt2, &c_b29, &vt[vt_offset], ldvt); + return 0; + } + ktemp = ctot[1] + 1; + sgemm_("N", "N", k, &nlp1, &ktemp, &c_b15, &q[q_dim1 + 1], ldq, &vt2[ + vt2_dim1 + 1], ldvt2, &c_b29, &vt[vt_dim1 + 1], ldvt); + ktemp = ctot[1] + 2 + ctot[2]; + if (ktemp <= *ldvt2) { + sgemm_("N", "N", k, &nlp1, &ctot[3], &c_b15, &q[ktemp * q_dim1 + 1], + ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b15, &vt[vt_dim1 + 1], + ldvt); + } + + ktemp = ctot[1] + 1; + nrp1 = *nr + *sqre; + if (ktemp > 1) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; +/* L130: */ + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; +/* L140: */ + } + } + ctemp = ctot[2] + 1 + ctot[3]; + sgemm_("N", "N", k, &nrp1, &ctemp, &c_b15, &q[ktemp * q_dim1 + 1], ldq, & + vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b29, &vt[nlp2 * vt_dim1 + + 1], ldvt); + + return 0; + +/* End of SLASD3 */ + +} /* slasd3_ */ + +/* Subroutine */ int slasd4_(integer *n, integer *i__, real *d__, real *z__, + real *delta, real *rho, real *sigma, real *work, integer *info) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Local variables */ + static real a, b, c__; + static integer j; + static real w, dd[3]; + static integer ii; + static real dw, zz[3]; + static integer ip1; + static real eta, phi, eps, tau, psi; + static integer iim1, iip1; + static real dphi, dpsi; + static integer iter; + static real temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, dtiip; + static integer niter; + static real dtisq; + static logical swtch; + static real dtnsq; + extern /* Subroutine */ int slaed6_(integer *, logical *, real *, real *, + real *, real *, real *, integer *); + static real delsq2; + extern /* Subroutine */ int slasd5_(integer *, real *, real *, real *, + real *, real *, real *); + static real dtnsq1; + static logical swtch3; + extern doublereal slamch_(char *); + static logical orgati; + static real erretm, dtipsq, rhoinv; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + This subroutine computes the square root of the I-th updated + eigenvalue of a positive symmetric rank-one modification to + a positive diagonal matrix whose entries are given as the squares + of the corresponding entries in the array d, and that + + 0 <= D(i) < D(j) for i < j + + and that RHO > 0. This is arranged by the calling routine, and is + no loss in generality. The rank-one modified system is thus + + diag( D ) * diag( D ) + RHO * Z * Z_transpose. + + where we assume the Euclidean norm of Z is 1. + + The method consists of approximating the rational functions in the + secular equation by simpler interpolating rational functions. + + Arguments + ========= + + N (input) INTEGER + The length of all arrays. + + I (input) INTEGER + The index of the eigenvalue to be computed. 1 <= I <= N. + + D (input) REAL array, dimension ( N ) + The original eigenvalues. It is assumed that they are in + order, 0 <= D(I) < D(J) for I < J. + + Z (input) REAL array, dimension (N) + The components of the updating vector. + + DELTA (output) REAL array, dimension (N) + If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th + component. If N = 1, then DELTA(1) = 1. The vector DELTA + contains the information necessary to construct the + (singular) eigenvectors. + + RHO (input) REAL + The scalar in the symmetric updating formula. + + SIGMA (output) REAL + The computed sigma_I, the I-th updated eigenvalue. + + WORK (workspace) REAL array, dimension (N) + If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th + component. If N = 1, then WORK( 1 ) = 1. + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, the updating process failed. + + Internal Parameters + =================== + + Logical variable ORGATI (origin-at-i?) is used for distinguishing + whether D(i) or D(i+1) is treated as the origin. + + ORGATI = .true. origin at i + ORGATI = .false. origin at i+1 + + Logical variable SWTCH3 (switch-for-3-poles?) is for noting + if we are working with THREE poles! + + MAXIT is the maximum number of iterations allowed for each + eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Since this routine is called in an inner loop, we do no argument + checking. + + Quick return for N=1 and 2. +*/ + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]); + delta[1] = 1.f; + work[1] = 1.f; + return 0; + } + if (*n == 2) { + slasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]); + return 0; + } + +/* Compute machine epsilon */ + + eps = slamch_("Epsilon"); + rhoinv = 1.f / *rho; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + temp = *rho / 2.f; + +/* + If ||Z||_2 is not one, then TEMP should be set to + RHO * ||Z||_2^2 / TWO +*/ + + temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp)); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*n] + temp1; + delta[j] = d__[j] - d__[*n] - temp1; +/* L10: */ + } + + psi = 0.f; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (delta[j] * work[j]); +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[* + n] / (delta[*n] * work[*n]); + + if (w <= 0.f) { + temp1 = sqrt(d__[*n] * d__[*n] + *rho); + temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[* + n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] * + z__[*n] / *rho; + +/* + The following TAU is to approximate + SIGMA_n^2 - D( N )*D( N ) +*/ + + if (c__ <= temp) { + tau = *rho; + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[* + n]; + b = z__[*n] * z__[*n] * delsq; + if (a < 0.f) { + tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); + } + } + +/* + It can be proved that + D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO +*/ + + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * delsq; + +/* + The following TAU is to approximate + SIGMA_n^2 - D( N )*D( N ) +*/ + + if (a < 0.f) { + tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); + } + +/* + It can be proved that + D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 +*/ + + } + +/* The following ETA is to approximate SIGMA_n - D( N ) */ + + eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau)); + + *sigma = d__[*n] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - eta; + work[j] = d__[j] + d__[*i__] + eta; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (delta[j] * work[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (delta[*n] * work[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + ++niter; + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi); + b = dtnsq * dtnsq1 * w; + if (c__ < 0.f) { + c__ = dabs(c__); + } + if (c__ == 0.f) { + eta = *rho - *sigma * *sigma; + } else if (a >= 0.f) { + eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( + c__ * 2.f); + } else { + eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.f) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp > *rho) { + eta = *rho + dtnsq; + } + + tau += eta; + eta /= *sigma + sqrt(eta + *sigma * *sigma); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L50: */ + } + + *sigma += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (work[*n] * delta[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi); + b = dtnsq1 * dtnsq * w; + if (a >= 0.f) { + eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } else { + eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.f) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp <= 0.f) { + eta /= 2.f; + } + + tau += eta; + eta /= *sigma + sqrt(eta + *sigma * *sigma); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L70: */ + } + + *sigma += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (work[*n] * delta[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * + (dpsi + dphi); + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + goto L240; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]); + delsq2 = delsq / 2.f; + temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2)); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*i__] + temp; + delta[j] = d__[j] - d__[*i__] - temp; +/* L100: */ + } + + psi = 0.f; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L110: */ + } + + phi = 0.f; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[ + ip1] * z__[ip1] / (work[ip1] * delta[ip1]); + + if (w > 0.f) { + +/* + d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 + + We choose d(i) as origin. +*/ + + orgati = TRUE_; + sg2lb = 0.f; + sg2ub = delsq2; + a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * delsq; + if (a > 0.f) { + tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } else { + tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } + +/* + TAU now is an estimation of SIGMA^2 - D( I )^2. The + following, however, is the corresponding estimation of + SIGMA - D( I ). +*/ + + eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau)); + } else { + +/* + (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 + + We choose d(i+1) as origin. +*/ + + orgati = FALSE_; + sg2lb = -delsq2; + sg2ub = 0.f; + a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * delsq; + if (a < 0.f) { + tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs( + r__1)))); + } else { + tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1)))) + / (c__ * 2.f); + } + +/* + TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The + following, however, is the corresponding estimation of + SIGMA - D( IP1 ). +*/ + + eta = tau / (d__[ip1] + sqrt((r__1 = d__[ip1] * d__[ip1] + tau, + dabs(r__1)))); + } + + if (orgati) { + ii = *i__; + *sigma = d__[*i__] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*i__] + eta; + delta[j] = d__[j] - d__[*i__] - eta; +/* L130: */ + } + } else { + ii = *i__ + 1; + *sigma = d__[ip1] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[ip1] + eta; + delta[j] = d__[j] - d__[ip1] - eta; +/* L140: */ + } + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* + W is the value of the secular function with + its ii-th element removed. +*/ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.f) { + swtch3 = TRUE_; + } + } else { + if (w > 0.f) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f + + dabs(tau) * dw; + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + goto L240; + } + + if (w <= 0.f) { + sg2lb = dmax(sg2lb,tau); + } else { + sg2ub = dmin(sg2ub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (orgati) { +/* Computing 2nd power */ + r__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (r__1 * r__1); + } else { +/* Computing 2nd power */ + r__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (r__1 * r__1); + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.f) { + if (a == 0.f) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi + + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi + + dphi); + } + } + eta = b / a; + } else if (a <= 0.f) { + eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } else { + eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * + (d__[iim1] + d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * + (d__[iim1] + d__[iip1]) * temp1; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + if (*info != 0) { + goto L240; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.f) { + eta = -w / dw; + } + if (orgati) { + temp1 = work[*i__] * delta[*i__]; + temp = eta - temp1; + } else { + temp1 = work[ip1] * delta[ip1]; + temp = eta - temp1; + } + if (temp > sg2ub || temp < sg2lb) { + if (w < 0.f) { + eta = (sg2ub - tau) / 2.f; + } else { + eta = (sg2lb - tau) / 2.f; + } + } + + tau += eta; + eta /= *sigma + sqrt(*sigma * *sigma + eta); + + prew = w; + + *sigma += eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L170: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L180: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L190: */ + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f + + dabs(tau) * dw; + + if (w <= 0.f) { + sg2lb = dmax(sg2lb,tau); + } else { + sg2ub = dmin(sg2ub,tau); + } + + swtch = FALSE_; + if (orgati) { + if (-w > dabs(prew) / 10.f) { + swtch = TRUE_; + } + } else { + if (w > dabs(prew) / 10.f) { + swtch = TRUE_; + } + } + +/* Main loop to update the values of the array DELTA and WORK */ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + r__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (r__1 * r__1); + } else { +/* Computing 2nd power */ + r__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (r__1 * r__1); + } + } else { + temp = z__[ii] / (work[ii] * delta[ii]); + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - dtisq * dpsi - dtipsq * dphi; + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.f) { + if (a == 0.f) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * ( + dpsi + dphi); + } + } else { + a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi; + } + } + eta = b / a; + } else if (a <= 0.f) { + eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)) + )) / (c__ * 2.f); + } else { + eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, + dabs(r__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - dtiim * dpsi - dtiip * dphi; + zz[0] = dtiim * dtiim * dpsi; + zz[2] = dtiip * dtiip * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiip * (dpsi + dphi) - temp2; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiim * (dpsi + dphi) - temp2; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + } + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + if (*info != 0) { + goto L240; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.f) { + eta = -w / dw; + } + if (orgati) { + temp1 = work[*i__] * delta[*i__]; + temp = eta - temp1; + } else { + temp1 = work[ip1] * delta[ip1]; + temp = eta - temp1; + } + if (temp > sg2ub || temp < sg2lb) { + if (w < 0.f) { + eta = (sg2ub - tau) / 2.f; + } else { + eta = (sg2lb - tau) / 2.f; + } + } + + tau += eta; + eta /= *sigma + sqrt(*sigma * *sigma + eta); + + *sigma += eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L200: */ + } + + prew = w; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L210: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L220: */ + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * + 3.f + dabs(tau) * dw; + if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) { + swtch = ! swtch; + } + + if (w <= 0.f) { + sg2lb = dmax(sg2lb,tau); + } else { + sg2ub = dmin(sg2ub,tau); + } + +/* L230: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + + } + +L240: + return 0; + +/* End of SLASD4 */ + +} /* slasd4_ */ + +/* Subroutine */ int slasd5_(integer *i__, real *d__, real *z__, real *delta, + real *rho, real *dsigma, real *work) +{ + /* System generated locals */ + real r__1; + + /* Local variables */ + static real b, c__, w, del, tau, delsq; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + This subroutine computes the square root of the I-th eigenvalue + of a positive symmetric rank-one modification of a 2-by-2 diagonal + matrix + + diag( D ) * diag( D ) + RHO * Z * transpose(Z) . + + The diagonal entries in the array D are assumed to satisfy + + 0 <= D(i) < D(j) for i < j . + + We also assume RHO > 0 and that the Euclidean norm of the vector + Z is one. + + Arguments + ========= + + I (input) INTEGER + The index of the eigenvalue to be computed. I = 1 or I = 2. + + D (input) REAL array, dimension (2) + The original eigenvalues. We assume 0 <= D(1) < D(2). + + Z (input) REAL array, dimension (2) + The components of the updating vector. + + DELTA (output) REAL array, dimension (2) + Contains (D(j) - sigma_I) in its j-th component. + The vector DELTA contains the information necessary + to construct the eigenvectors. + + RHO (input) REAL + The scalar in the symmetric updating formula. + + DSIGMA (output) REAL + The computed sigma_I, the I-th updated eigenvalue. + + WORK (workspace) REAL array, dimension (2) + WORK contains (D(j) + sigma_I) in its j-th component. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + delsq = del * (d__[2] + d__[1]); + if (*i__ == 1) { + w = *rho * 4.f * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.f) - z__[1] * + z__[1] / (d__[1] * 3.f + d__[2])) / del + 1.f; + if (w > 0.f) { + b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * delsq; + +/* + B > ZERO, always + + The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) +*/ + + tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1)) + )); + +/* The following TAU is DSIGMA - D( 1 ) */ + + tau /= d__[1] + sqrt(d__[1] * d__[1] + tau); + *dsigma = d__[1] + tau; + delta[1] = -tau; + delta[2] = del - tau; + work[1] = d__[1] * 2.f + tau; + work[2] = d__[1] + tau + d__[2]; +/* + DELTA( 1 ) = -Z( 1 ) / TAU + DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) +*/ + } else { + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.f) { + tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f)); + } else { + tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f; + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt((r__1 = d__[2] * d__[2] + tau, dabs(r__1))); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2.f + tau; +/* + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU +*/ + } +/* + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP +*/ + } else { + +/* Now I=2 */ + + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.f) { + tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f; + } else { + tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f)); + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt(d__[2] * d__[2] + tau); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2.f + tau; +/* + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP +*/ + } + return 0; + +/* End of SLASD5 */ + +} /* slasd5_ */ + +/* Subroutine */ int slasd6_(integer *icompq, integer *nl, integer *nr, + integer *sqre, real *d__, real *vf, real *vl, real *alpha, real *beta, + integer *idxq, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * + difl, real *difr, real *z__, integer *k, real *c__, real *s, real * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, i__1; + real r__1, r__2; + + /* Local variables */ + static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), slasd7_(integer *, integer *, integer *, integer *, + integer *, real *, real *, real *, real *, real *, real *, real *, + real *, real *, real *, integer *, integer *, integer *, integer + *, integer *, integer *, integer *, real *, integer *, real *, + real *, integer *), slasd8_(integer *, integer *, real *, real *, + real *, real *, real *, real *, integer *, real *, real *, + integer *); + static integer isigma; + extern /* Subroutine */ int xerbla_(char *, integer *), slascl_( + char *, integer *, integer *, real *, real *, integer *, integer * + , real *, integer *, integer *), slamrg_(integer *, + integer *, real *, integer *, integer *, integer *); + static real orgnrm; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SLASD6 computes the SVD of an updated upper bidiagonal matrix B + obtained by merging two smaller ones by appending a row. This + routine is used only for the problem which requires all singular + values and optionally singular vector matrices in factored form. + B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. + A related subroutine, SLASD1, handles the case in which all singular + values and singular vectors of the bidiagonal matrix are desired. + + SLASD6 computes the SVD as follows: + + ( D1(in) 0 0 0 ) + B = U(in) * ( Z1' a Z2' b ) * VT(in) + ( 0 0 D2(in) 0 ) + + = U(out) * ( D(out) 0) * VT(out) + + where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M + with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + elsewhere; and the entry b is empty if SQRE = 0. + + The singular values of B can be computed using D1, D2, the first + components of all the right singular vectors of the lower block, and + the last components of all the right singular vectors of the upper + block. These components are stored and updated in VF and VL, + respectively, in SLASD6. Hence U and VT are not explicitly + referenced. + + The singular values are stored in D. The algorithm consists of two + stages: + + The first stage consists of deflating the size of the problem + when there are multiple singular values or if there is a zero + in the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine SLASD7. + + The second stage consists of calculating the updated + singular values. This is done by finding the roots of the + secular equation via the routine SLASD4 (as called by SLASD8). + This routine also updates VF and VL and computes the distances + between the updated singular values and the old singular + values. + + SLASD6 is called from SLASDA. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form: + = 0: Compute singular values only. + = 1: Compute singular vectors in factored form as well. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + D (input/output) REAL array, dimension (NL+NR+1). + On entry D(1:NL,1:NL) contains the singular values of the + upper block, and D(NL+2:N) contains the singular values + of the lower block. On exit D(1:N) contains the singular + values of the modified matrix. + + VF (input/output) REAL array, dimension (M) + On entry, VF(1:NL+1) contains the first components of all + right singular vectors of the upper block; and VF(NL+2:M) + contains the first components of all right singular vectors + of the lower block. On exit, VF contains the first components + of all right singular vectors of the bidiagonal matrix. + + VL (input/output) REAL array, dimension (M) + On entry, VL(1:NL+1) contains the last components of all + right singular vectors of the upper block; and VL(NL+2:M) + contains the last components of all right singular vectors of + the lower block. On exit, VL contains the last components of + all right singular vectors of the bidiagonal matrix. + + ALPHA (input/output) REAL + Contains the diagonal element associated with the added row. + + BETA (input/output) REAL + Contains the off-diagonal element associated with the added + row. + + IDXQ (output) INTEGER array, dimension (N) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, i.e. + D( IDXQ( I = 1, N ) ) will be in ascending order. + + PERM (output) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) to be applied + to each block. Not referenced if ICOMPQ = 0. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. Not referenced if ICOMPQ = 0. + + GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. Not referenced if ICOMPQ = 0. + + LDGCOL (input) INTEGER + leading dimension of GIVCOL, must be at least N. + + GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value to be used in the + corresponding Givens rotation. Not referenced if ICOMPQ = 0. + + LDGNUM (input) INTEGER + The leading dimension of GIVNUM and POLES, must be at least N. + + POLES (output) REAL array, dimension ( LDGNUM, 2 ) + On exit, POLES(1,*) is an array containing the new singular + values obtained from solving the secular equation, and + POLES(2,*) is an array containing the poles in the secular + equation. Not referenced if ICOMPQ = 0. + + DIFL (output) REAL array, dimension ( N ) + On exit, DIFL(I) is the distance between I-th updated + (undeflated) singular value and the I-th (undeflated) old + singular value. + + DIFR (output) REAL array, + dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + On exit, DIFR(I, 1) is the distance between I-th updated + (undeflated) singular value and the I+1-th (undeflated) old + singular value. + + If ICOMPQ = 1, DIFR(1:K,2) is an array containing the + normalizing factors for the right singular vector matrix. + + See SLASD8 for details on DIFL and DIFR. + + Z (output) REAL array, dimension ( M ) + The first elements of this array contain the components + of the deflation-adjusted updating row vector. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + C (output) REAL + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (output) REAL + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + WORK (workspace) REAL array, dimension ( 4 * M ) + + IWORK (workspace) INTEGER array, dimension ( 3 * N ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --vf; + --vl; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --difr; + --z__; + --work; + --iwork; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -14; + } else if (*ldgnum < n) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD6", &i__1); + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in SLASD7 and SLASD8. +*/ + + isigma = 1; + iw = isigma + n; + ivfw = iw + m; + ivlw = ivfw + m; + + idx = 1; + idxc = idx + n; + idxp = idxc + n; + +/* + Scale. + + Computing MAX +*/ + r__1 = dabs(*alpha), r__2 = dabs(*beta); + orgnrm = dmax(r__1,r__2); + d__[*nl + 1] = 0.f; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) { + orgnrm = (r__1 = d__[i__], dabs(r__1)); + } +/* L10: */ + } + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info); + *alpha /= orgnrm; + *beta /= orgnrm; + +/* Sort and Deflate singular values. */ + + slasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], & + work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], & + iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[ + givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s, + info); + +/* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */ + + slasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1], + ldgnum, &work[isigma], &work[iw], info); + +/* Save the poles if ICOMPQ = 1. */ + + if (*icompq == 1) { + scopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1); + scopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1); + } + +/* Unscale. */ + + slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info); + +/* Prepare the IDXQ sorting permutation. */ + + n1 = *k; + n2 = n - *k; + slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + + return 0; + +/* End of SLASD6 */ + +} /* slasd6_ */ + +/* Subroutine */ int slasd7_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *k, real *d__, real *z__, real *zw, real *vf, + real *vfw, real *vl, real *vlw, real *alpha, real *beta, real *dsigma, + integer *idx, integer *idxp, integer *idxq, integer *perm, integer * + givptr, integer *givcol, integer *ldgcol, real *givnum, integer * + ldgnum, real *c__, real *s, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1; + real r__1, r__2; + + /* Local variables */ + static integer i__, j, m, n, k2; + static real z1; + static integer jp; + static real eps, tau, tol; + static integer nlp1, nlp2, idxi, idxj; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + static integer idxjp, jprev; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + extern doublereal slapy2_(real *, real *), slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( + integer *, integer *, real *, integer *, integer *, integer *); + static real hlftol; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLASD7 merges the two sets of singular values together into a single + sorted set. Then it tries to deflate the size of the problem. There + are two ways in which deflation can occur: when two or more singular + values are close together or if there is a tiny entry in the Z + vector. For each such occurrence the order of the related + secular equation problem is reduced by one. + + SLASD7 is called from SLASD6. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed + in compact form, as follows: + = 0: Compute singular values only. + = 1: Compute singular vectors of upper + bidiagonal matrix in compact form. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has + N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, this is + the order of the related secular equation. 1 <= K <=N. + + D (input/output) REAL array, dimension ( N ) + On entry D contains the singular values of the two submatrices + to be combined. On exit D contains the trailing (N-K) updated + singular values (those which were deflated) sorted into + increasing order. + + Z (output) REAL array, dimension ( M ) + On exit Z contains the updating row vector in the secular + equation. + + ZW (workspace) REAL array, dimension ( M ) + Workspace for Z. + + VF (input/output) REAL array, dimension ( M ) + On entry, VF(1:NL+1) contains the first components of all + right singular vectors of the upper block; and VF(NL+2:M) + contains the first components of all right singular vectors + of the lower block. On exit, VF contains the first components + of all right singular vectors of the bidiagonal matrix. + + VFW (workspace) REAL array, dimension ( M ) + Workspace for VF. + + VL (input/output) REAL array, dimension ( M ) + On entry, VL(1:NL+1) contains the last components of all + right singular vectors of the upper block; and VL(NL+2:M) + contains the last components of all right singular vectors + of the lower block. On exit, VL contains the last components + of all right singular vectors of the bidiagonal matrix. + + VLW (workspace) REAL array, dimension ( M ) + Workspace for VL. + + ALPHA (input) REAL + Contains the diagonal element associated with the added row. + + BETA (input) REAL + Contains the off-diagonal element associated with the added + row. + + DSIGMA (output) REAL array, dimension ( N ) + Contains a copy of the diagonal elements (K-1 singular values + and one zero) in the secular equation. + + IDX (workspace) INTEGER array, dimension ( N ) + This will contain the permutation used to sort the contents of + D into ascending order. + + IDXP (workspace) INTEGER array, dimension ( N ) + This will contain the permutation used to place deflated + values of D at the end of the array. On output IDXP(2:K) + points to the nondeflated D-values and IDXP(K+1:N) + points to the deflated singular values. + + IDXQ (input) INTEGER array, dimension ( N ) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that entries in + the first half of this permutation must first be moved one + position backward; and entries in the second half + must first have NL+1 added to their values. + + PERM (output) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) to be applied + to each singular block. Not referenced if ICOMPQ = 0. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. Not referenced if ICOMPQ = 0. + + GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. Not referenced if ICOMPQ = 0. + + LDGCOL (input) INTEGER + The leading dimension of GIVCOL, must be at least N. + + GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value to be used in the + corresponding Givens rotation. Not referenced if ICOMPQ = 0. + + LDGNUM (input) INTEGER + The leading dimension of GIVNUM, must be at least N. + + C (output) REAL + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (output) REAL + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + --zw; + --vf; + --vfw; + --vl; + --vlw; + --dsigma; + --idx; + --idxp; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -22; + } else if (*ldgnum < n) { + *info = -24; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD7", &i__1); + return 0; + } + + nlp1 = *nl + 1; + nlp2 = *nl + 2; + if (*icompq == 1) { + *givptr = 0; + } + +/* + Generate the first part of the vector Z and move the singular + values in the first part of D one position backward. +*/ + + z1 = *alpha * vl[nlp1]; + vl[nlp1] = 0.f; + tau = vf[nlp1]; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vl[i__]; + vl[i__] = 0.f; + vf[i__ + 1] = vf[i__]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; +/* L10: */ + } + vf[1] = tau; + +/* Generate the second part of the vector Z. */ + + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vf[i__]; + vf[i__] = 0.f; +/* L20: */ + } + +/* Sort the singular values into increasing order */ + + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; +/* L30: */ + } + +/* DSIGMA, IDXC, IDXC, and ZW are used as storage space. */ + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + zw[i__] = z__[idxq[i__]]; + vfw[i__] = vf[idxq[i__]]; + vlw[i__] = vl[idxq[i__]]; +/* L40: */ + } + + slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = zw[idxi]; + vf[i__] = vfw[idxi]; + vl[i__] = vlw[idxi]; +/* L50: */ + } + +/* Calculate the allowable deflation tolerence */ + + eps = slamch_("Epsilon"); +/* Computing MAX */ + r__1 = dabs(*alpha), r__2 = dabs(*beta); + tol = dmax(r__1,r__2); +/* Computing MAX */ + r__2 = (r__1 = d__[n], dabs(r__1)); + tol = eps * 64.f * dmax(r__2,tol); + +/* + There are 2 kinds of deflation -- first a value in the z-vector + is small, second two (or more) singular values are very close + together (their difference is small). + + If the value in the z-vector is small, we simply permute the + array so that the corresponding singular value is moved to the + end. + + If two values in the D-vector are close, we perform a two-sided + rotation designed to make one of the corresponding z-vector + entries zero, and then permute the array so that the deflated + singular value is moved to the end. + + If there are multiple singular values then the problem deflates. + Here the number of equal singular values are found. As each equal + singular value is found, an elementary reflector is computed to + rotate the corresponding singular subspace so that the + corresponding components of Z are zero in this new basis. +*/ + + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + if (j == n) { + goto L100; + } + } else { + jprev = j; + goto L70; + } +/* L60: */ + } +L70: + j = jprev; +L80: + ++j; + if (j > n) { + goto L90; + } + if ((r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + } else { + +/* Check if singular values are close enough to allow deflation. */ + + if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) { + +/* Deflation is possible. */ + + *s = z__[jprev]; + *c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = slapy2_(c__, s); + z__[j] = tau; + z__[jprev] = 0.f; + *c__ /= tau; + *s = -(*s) / tau; + +/* Record the appropriate Givens rotation */ + + if (*icompq == 1) { + ++(*givptr); + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + givcol[*givptr + (givcol_dim1 << 1)] = idxjp; + givcol[*givptr + givcol_dim1] = idxj; + givnum[*givptr + (givnum_dim1 << 1)] = *c__; + givnum[*givptr + givnum_dim1] = *s; + } + srot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s); + srot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s); + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L80; +L90: + +/* Record the last singular value. */ + + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + +L100: + +/* + Sort the singular values into DSIGMA. The singular values which + were not deflated go into the first K slots of DSIGMA, except + that DSIGMA(1) is treated separately. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + vfw[j] = vf[jp]; + vlw[j] = vl[jp]; +/* L110: */ + } + if (*icompq == 1) { + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + perm[j] = idxq[idx[jp] + 1]; + if (perm[j] <= nlp1) { + --perm[j]; + } +/* L120: */ + } + } + +/* + The deflated singular values go back into the last N - K slots of + D. +*/ + + i__1 = n - *k; + scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + +/* + Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and + VL(M). +*/ + + dsigma[1] = 0.f; + hlftol = tol / 2.f; + if (dabs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = slapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + *c__ = 1.f; + *s = 0.f; + z__[1] = tol; + } else { + *c__ = z1 / z__[1]; + *s = -z__[m] / z__[1]; + } + srot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s); + srot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s); + } else { + if (dabs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + +/* Restore Z, VF, and VL. */ + + i__1 = *k - 1; + scopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1); + i__1 = n - 1; + scopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1); + i__1 = n - 1; + scopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1); + + return 0; + +/* End of SLASD7 */ + +} /* slasd7_ */ + +/* Subroutine */ int slasd8_(integer *icompq, integer *k, real *d__, real * + z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr, + real *dsigma, real *work, integer *info) +{ + /* System generated locals */ + integer difr_dim1, difr_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + static integer i__, j; + static real dj, rho; + static integer iwk1, iwk2, iwk3; + static real temp; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + static integer iwk2i, iwk3i; + extern doublereal snrm2_(integer *, real *, integer *); + static real diflj, difrj, dsigj; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, + real *, real *, real *, real *, integer *), xerbla_(char *, + integer *); + static real dsigjp; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SLASD8 finds the square roots of the roots of the secular equation, + as defined by the values in DSIGMA and Z. It makes the appropriate + calls to SLASD4, and stores, for each element in D, the distance + to its two nearest poles (elements in DSIGMA). It also updates + the arrays VF and VL, the first and last components of all the + right singular vectors of the original bidiagonal matrix. + + SLASD8 is called from SLASD6. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form in the calling routine: + = 0: Compute singular values only. + = 1: Compute singular vectors in factored form as well. + + K (input) INTEGER + The number of terms in the rational function to be solved + by SLASD4. K >= 1. + + D (output) REAL array, dimension ( K ) + On output, D contains the updated singular values. + + Z (input/output) REAL array, dimension ( K ) + On entry, the first K elements of this array contain the + components of the deflation-adjusted updating row vector. + On exit, Z is updated. + + VF (input/output) REAL array, dimension ( K ) + On entry, VF contains information passed through DBEDE8. + On exit, VF contains the first K components of the first + components of all right singular vectors of the bidiagonal + matrix. + + VL (input/output) REAL array, dimension ( K ) + On entry, VL contains information passed through DBEDE8. + On exit, VL contains the first K components of the last + components of all right singular vectors of the bidiagonal + matrix. + + DIFL (output) REAL array, dimension ( K ) + On exit, DIFL(I) = D(I) - DSIGMA(I). + + DIFR (output) REAL array, + dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and + dimension ( K ) if ICOMPQ = 0. + On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not + defined and will not be referenced. + + If ICOMPQ = 1, DIFR(1:K,2) is an array containing the + normalizing factors for the right singular vector matrix. + + LDDIFR (input) INTEGER + The leading dimension of DIFR, must be at least K. + + DSIGMA (input/output) REAL array, dimension ( K ) + On entry, the first K elements of this array contain the old + roots of the deflated updating problem. These are the poles + of the secular equation. + On exit, the elements of DSIGMA may be very slightly altered + in value. + + WORK (workspace) REAL array, dimension at least 3 * K + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + --vf; + --vl; + --difl; + difr_dim1 = *lddifr; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + --dsigma; + --work; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*k < 1) { + *info = -2; + } else if (*lddifr < *k) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD8", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 1) { + d__[1] = dabs(z__[1]); + difl[1] = d__[1]; + if (*icompq == 1) { + difl[2] = 1.f; + difr[(difr_dim1 << 1) + 1] = 1.f; + } + return 0; + } + +/* + Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), + which on any of these machines zeros out the bottommost + bit of DSIGMA(I) if it is 1; this makes the subsequent + subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DSIGMA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DSIGMA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; +/* L10: */ + } + +/* Book keeping. */ + + iwk1 = 1; + iwk2 = iwk1 + *k; + iwk3 = iwk2 + *k; + iwk2i = iwk2 - 1; + iwk3i = iwk3 - 1; + +/* Normalize Z. */ + + rho = snrm2_(k, &z__[1], &c__1); + slascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info); + rho *= rho; + +/* Initialize WORK(IWK3). */ + + slaset_("A", k, &c__1, &c_b15, &c_b15, &work[iwk3], k); + +/* + Compute the updated singular values, the arrays DIFL, DIFR, + and the updated Z. +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + slasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ + iwk2], info); + +/* If the root finder fails, the computation is terminated. */ + + if (*info != 0) { + return 0; + } + work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; + difl[j] = -work[j]; + difr[j + difr_dim1] = -work[j + 1]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L20: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L30: */ + } +/* L40: */ + } + +/* Compute updated Z. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + r__2 = sqrt((r__1 = work[iwk3i + i__], dabs(r__1))); + z__[i__] = r_sign(&r__2, &z__[i__]); +/* L50: */ + } + +/* Update VF and VL. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = d__[j]; + dsigj = -dsigma[j]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -dsigma[j + 1]; + } + work[j] = -z__[j] / diflj / (dsigma[j] + dj); + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigj) - diflj) / ( + dsigma[i__] + dj); +/* L60: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigjp) + difrj) / + (dsigma[i__] + dj); +/* L70: */ + } + temp = snrm2_(k, &work[1], &c__1); + work[iwk2i + j] = sdot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; + work[iwk3i + j] = sdot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; + if (*icompq == 1) { + difr[j + (difr_dim1 << 1)] = temp; + } +/* L80: */ + } + + scopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); + scopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); + + return 0; + +/* End of SLASD8 */ + +} /* slasd8_ */ + +/* Subroutine */ int slasda_(integer *icompq, integer *smlsiz, integer *n, + integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt, + integer *k, real *difl, real *difr, real *z__, real *poles, integer * + givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum, + real *c__, real *s, real *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, + difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, + z_dim1, z_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc, + nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; + static real beta; + static integer idxq, nlvl; + static real alpha; + static integer inode, ndiml, ndimr, idxqi, itemp, sqrei; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), slasd6_(integer *, integer *, integer *, integer *, + real *, real *, real *, real *, real *, integer *, integer *, + integer *, integer *, integer *, real *, integer *, real *, real * + , real *, real *, integer *, real *, real *, real *, integer *, + integer *); + static integer nwork1, nwork2; + extern /* Subroutine */ int xerbla_(char *, integer *), slasdq_( + char *, integer *, integer *, integer *, integer *, integer *, + real *, real *, real *, integer *, real *, integer *, real *, + integer *, real *, integer *), slasdt_(integer *, integer + *, integer *, integer *, integer *, integer *, integer *), + slaset_(char *, integer *, integer *, real *, real *, real *, + integer *); + static integer smlszp; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + Using a divide and conquer approach, SLASDA computes the singular + value decomposition (SVD) of a real upper bidiagonal N-by-M matrix + B with diagonal D and offdiagonal E, where M = N + SQRE. The + algorithm computes the singular values in the SVD B = U * S * VT. + The orthogonal matrices U and VT are optionally computed in + compact form. + + A related subroutine, SLASD0, computes the singular values and + the singular vectors in explicit form. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed + in compact form, as follows + = 0: Compute singular values only. + = 1: Compute singular vectors of upper bidiagonal + matrix in compact form. + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The row dimension of the upper bidiagonal matrix. This is + also the dimension of the main diagonal array D. + + SQRE (input) INTEGER + Specifies the column dimension of the bidiagonal matrix. + = 0: The bidiagonal matrix has column dimension M = N; + = 1: The bidiagonal matrix has column dimension M = N + 1. + + D (input/output) REAL array, dimension ( N ) + On entry D contains the main diagonal of the bidiagonal + matrix. On exit D, if INFO = 0, contains its singular values. + + E (input) REAL array, dimension ( M-1 ) + Contains the subdiagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + U (output) REAL array, + dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left + singular vector matrices of all subproblems at the bottom + level. + + LDU (input) INTEGER, LDU = > N. + The leading dimension of arrays U, VT, DIFL, DIFR, POLES, + GIVNUM, and Z. + + VT (output) REAL array, + dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right + singular vector matrices of all subproblems at the bottom + level. + + K (output) INTEGER array, dimension ( N ) + if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. + If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th + secular equation on the computation tree. + + DIFL (output) REAL array, dimension ( LDU, NLVL ), + where NLVL = floor(log_2 (N/SMLSIZ))). + + DIFR (output) REAL array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) + record distances between singular values on the I-th + level and singular values on the (I -1)-th level, and + DIFR(1:N, 2 * I ) contains the normalizing factors for + the right singular vector matrix. See SLASD8 for details. + + Z (output) REAL array, + dimension ( LDU, NLVL ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + The first K elements of Z(1, I) contain the components of + the deflation-adjusted updating row vector for subproblems + on the I-th level. + + POLES (output) REAL array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and + POLES(1, 2*I) contain the new and old singular values + involved in the secular equations on the I-th level. + + GIVPTR (output) INTEGER array, + dimension ( N ) if ICOMPQ = 1, and not referenced if + ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records + the number of Givens rotations performed on the I-th + problem on the computation tree. + + GIVCOL (output) INTEGER array, + dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not + referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, + GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations + of Givens rotations performed on the I-th level on the + computation tree. + + LDGCOL (input) INTEGER, LDGCOL = > N. + The leading dimension of arrays GIVCOL and PERM. + + PERM (output) INTEGER array, dimension ( LDGCOL, NLVL ) + if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records + permutations done on the I-th level of the computation tree. + + GIVNUM (output) REAL array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not + referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, + GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- + values of Givens rotations performed on the I-th level on + the computation tree. + + C (output) REAL array, + dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. + If ICOMPQ = 1 and the I-th subproblem is not square, on exit, + C( I ) contains the C-value of a Givens rotation related to + the right null space of the I-th subproblem. + + S (output) REAL array, dimension ( N ) if + ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 + and the I-th subproblem is not square, on exit, S( I ) + contains the S-value of a Givens rotation related to + the right null space of the I-th subproblem. + + WORK (workspace) REAL array, dimension + (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). + + IWORK (workspace) INTEGER array, dimension (7*N). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, a singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldu < *n + *sqre) { + *info = -8; + } else if (*ldgcol < *n) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASDA", &i__1); + return 0; + } + + m = *n + *sqre; + +/* If the input matrix is too small, call SLASDQ to find the SVD. */ + + if (*n <= *smlsiz) { + if (*icompq == 0) { + slasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & + work[1], info); + } else { + slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] + , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], + info); + } + return 0; + } + +/* Book-keeping and set up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + + ncc = 0; + nru = 0; + + smlszp = *smlsiz + 1; + vf = 1; + vl = vf + m; + nwork1 = vl + m; + nwork2 = nwork1 + smlszp * smlszp; + + slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + for the nodes on bottom level of the tree, solve + their subproblems by SLASDQ. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + idxqi = idxq + nlf - 2; + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + sqrei = 1; + if (*icompq == 0) { + slaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &work[nwork1], &smlszp); + slasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & + work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], + &nl, &work[nwork2], info); + itemp = nwork1 + nl * smlszp; + scopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); + scopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + slaset_("A", &nl, &nl, &c_b29, &c_b15, &u[nlf + u_dim1], ldu); + slaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &vt[nlf + vt_dim1], + ldu); + slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & + vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf + + u_dim1], ldu, &work[nwork1], info); + scopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1); + scopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L10: */ + } + if (i__ == nd && *sqre == 0) { + sqrei = 0; + } else { + sqrei = 1; + } + idxqi += nlp1; + vfi += nlp1; + vli += nlp1; + nrp1 = nr + sqrei; + if (*icompq == 0) { + slaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &work[nwork1], &smlszp); + slasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & + work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], + &nr, &work[nwork2], info); + itemp = nwork1 + (nrp1 - 1) * smlszp; + scopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); + scopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + slaset_("A", &nr, &nr, &c_b29, &c_b15, &u[nrf + u_dim1], ldu); + slaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &vt[nrf + vt_dim1], + ldu); + slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & + vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf + + u_dim1], ldu, &work[nwork1], info); + scopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1); + scopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L20: */ + } +/* L30: */ + } + +/* Now conquer each subproblem bottom-up. */ + + j = pow_ii(&c__2, &nlvl); + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* + Find the first node LF and last node LL on + the current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + idxqi = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + if (*icompq == 0) { + slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[ + perm_offset], &givptr[1], &givcol[givcol_offset], + ldgcol, &givnum[givnum_offset], ldu, &poles[ + poles_offset], &difl[difl_offset], &difr[difr_offset], + &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], + &iwork[iwk], info); + } else { + --j; + slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf + + lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 * + givcol_dim1], ldgcol, &givnum[nlf + lvl2 * + givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], & + difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 * + difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j], + &s[j], &work[nwork1], &iwork[iwk], info); + } + if (*info != 0) { + return 0; + } +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of SLASDA */ + +} /* slasda_ */ + +/* Subroutine */ int slasdq_(char *uplo, integer *sqre, integer *n, integer * + ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt, + integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real * + work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + + /* Local variables */ + static integer i__, j; + static real r__, cs, sn; + static integer np1, isub; + static real smin; + static integer sqre1; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, + integer *, real *, real *, real *, integer *); + static integer iuplo; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), xerbla_(char *, integer *), slartg_(real *, + real *, real *, real *, real *); + static logical rotate; + extern /* Subroutine */ int sbdsqr_(char *, integer *, integer *, integer + *, integer *, real *, real *, real *, integer *, real *, integer * + , real *, integer *, real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLASDQ computes the singular value decomposition (SVD) of a real + (upper or lower) bidiagonal matrix with diagonal D and offdiagonal + E, accumulating the transformations if desired. Letting B denote + the input bidiagonal matrix, the algorithm computes orthogonal + matrices Q and P such that B = Q * S * P' (P' denotes the transpose + of P). The singular values S are overwritten on D. + + The input matrix U is changed to U * Q if desired. + The input matrix VT is changed to P' * VT if desired. + The input matrix C is changed to Q' * C if desired. + + See "Computing Small Singular Values of Bidiagonal Matrices With + Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + LAPACK Working Note #3, for a detailed description of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + On entry, UPLO specifies whether the input bidiagonal matrix + is upper or lower bidiagonal, and wether it is square are + not. + UPLO = 'U' or 'u' B is upper bidiagonal. + UPLO = 'L' or 'l' B is lower bidiagonal. + + SQRE (input) INTEGER + = 0: then the input matrix is N-by-N. + = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and + (N+1)-by-N if UPLU = 'L'. + + The bidiagonal matrix has + N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + N (input) INTEGER + On entry, N specifies the number of rows and columns + in the matrix. N must be at least 0. + + NCVT (input) INTEGER + On entry, NCVT specifies the number of columns of + the matrix VT. NCVT must be at least 0. + + NRU (input) INTEGER + On entry, NRU specifies the number of rows of + the matrix U. NRU must be at least 0. + + NCC (input) INTEGER + On entry, NCC specifies the number of columns of + the matrix C. NCC must be at least 0. + + D (input/output) REAL array, dimension (N) + On entry, D contains the diagonal entries of the + bidiagonal matrix whose SVD is desired. On normal exit, + D contains the singular values in ascending order. + + E (input/output) REAL array. + dimension is (N-1) if SQRE = 0 and N if SQRE = 1. + On entry, the entries of E contain the offdiagonal entries + of the bidiagonal matrix whose SVD is desired. On normal + exit, E will contain 0. If the algorithm does not converge, + D and E will contain the diagonal and superdiagonal entries + of a bidiagonal matrix orthogonally equivalent to the one + given as input. + + VT (input/output) REAL array, dimension (LDVT, NCVT) + On entry, contains a matrix which on exit has been + premultiplied by P', dimension N-by-NCVT if SQRE = 0 + and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). + + LDVT (input) INTEGER + On entry, LDVT specifies the leading dimension of VT as + declared in the calling (sub) program. LDVT must be at + least 1. If NCVT is nonzero LDVT must also be at least N. + + U (input/output) REAL array, dimension (LDU, N) + On entry, contains a matrix which on exit has been + postmultiplied by Q, dimension NRU-by-N if SQRE = 0 + and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). + + LDU (input) INTEGER + On entry, LDU specifies the leading dimension of U as + declared in the calling (sub) program. LDU must be at + least max( 1, NRU ) . + + C (input/output) REAL array, dimension (LDC, NCC) + On entry, contains an N-by-NCC matrix which on exit + has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 + and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). + + LDC (input) INTEGER + On entry, LDC specifies the leading dimension of C as + declared in the calling (sub) program. LDC must be at + least 1. If NCC is nonzero, LDC must also be at least N. + + WORK (workspace) REAL array, dimension (4*N) + Workspace. Only referenced if one of NCVT, NRU, or NCC is + nonzero, and if N is at least 2. + + INFO (output) INTEGER + On exit, a value of 0 indicates a successful exit. + If INFO < 0, argument number -INFO is illegal. + If INFO > 0, the algorithm did not converge, and INFO + specifies how many superdiagonals did not converge. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + iuplo = 0; + if (lsame_(uplo, "U")) { + iuplo = 1; + } + if (lsame_(uplo, "L")) { + iuplo = 2; + } + if (iuplo == 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ncvt < 0) { + *info = -4; + } else if (*nru < 0) { + *info = -5; + } else if (*ncc < 0) { + *info = -6; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -10; + } else if (*ldu < max(1,*nru)) { + *info = -12; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASDQ", &i__1); + return 0; + } + if (*n == 0) { + return 0; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + np1 = *n + 1; + sqre1 = *sqre; + +/* + If matrix non-square upper bidiagonal, rotate to be lower + bidiagonal. The rotations are on the right. +*/ + + if (iuplo == 1 && sqre1 == 1) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L10: */ + } + slartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + e[*n] = 0.f; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + iuplo = 2; + sqre1 = 0; + +/* Update singular vectors if desired. */ + + if (*ncvt > 0) { + slasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[ + vt_offset], ldvt); + } + } + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left. +*/ + + if (iuplo == 2) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L20: */ + } + +/* + If matrix (N+1)-by-N lower bidiagonal, one additional + rotation is needed. +*/ + + if (sqre1 == 1) { + slartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + } + +/* Update singular vectors if desired. */ + + if (*nru > 0) { + if (sqre1 == 0) { + slasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[ + u_offset], ldu); + } else { + slasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[ + u_offset], ldu); + } + } + if (*ncc > 0) { + if (sqre1 == 0) { + slasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc); + } else { + slasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc); + } + } + } + +/* + Call SBDSQR to compute the SVD of the reduced real + N-by-N upper bidiagonal matrix. +*/ + + sbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[ + u_offset], ldu, &c__[c_offset], ldc, &work[1], info); + +/* + Sort the singular values into ascending order (insertion sort on + singular values, but only one transposition per singular vector) +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I). */ + + isub = i__; + smin = d__[i__]; + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + if (d__[j] < smin) { + isub = j; + smin = d__[j]; + } +/* L30: */ + } + if (isub != i__) { + +/* Swap singular values and vectors. */ + + d__[isub] = d__[i__]; + d__[i__] = smin; + if (*ncvt > 0) { + sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1], + ldvt); + } + if (*nru > 0) { + sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1] + , &c__1); + } + if (*ncc > 0) { + sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc) + ; + } + } +/* L40: */ + } + + return 0; + +/* End of SLASDQ */ + +} /* slasdq_ */ + +/* Subroutine */ int slasdt_(integer *n, integer *lvl, integer *nd, integer * + inode, integer *ndiml, integer *ndimr, integer *msub) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, il, ir, maxn; + static real temp; + static integer nlvl, llst, ncrnt; + + +/* + -- LAPACK auxiliary routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + SLASDT creates a tree of subproblems for bidiagonal divide and + conquer. + + Arguments + ========= + + N (input) INTEGER + On entry, the number of diagonal elements of the + bidiagonal matrix. + + LVL (output) INTEGER + On exit, the number of levels on the computation tree. + + ND (output) INTEGER + On exit, the number of nodes on the tree. + + INODE (output) INTEGER array, dimension ( N ) + On exit, centers of subproblems. + + NDIML (output) INTEGER array, dimension ( N ) + On exit, row dimensions of left children. + + NDIMR (output) INTEGER array, dimension ( N ) + On exit, row dimensions of right children. + + MSUB (input) INTEGER + On entry, the maximum row dimension each subproblem at the + bottom of the tree can be of. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Find the number of levels on the tree. +*/ + + /* Parameter adjustments */ + --ndimr; + --ndiml; + --inode; + + /* Function Body */ + maxn = max(1,*n); + temp = log((real) maxn / (real) (*msub + 1)) / log(2.f); + *lvl = (integer) temp + 1; + + i__ = *n / 2; + inode[1] = i__ + 1; + ndiml[1] = i__; + ndimr[1] = *n - i__ - 1; + il = 0; + ir = 1; + llst = 1; + i__1 = *lvl - 1; + for (nlvl = 1; nlvl <= i__1; ++nlvl) { + +/* + Constructing the tree at (NLVL+1)-st level. The number of + nodes created on this level is LLST * 2. +*/ + + i__2 = llst - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + il += 2; + ir += 2; + ncrnt = llst + i__; + ndiml[il] = ndiml[ncrnt] / 2; + ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1; + inode[il] = inode[ncrnt] - ndimr[il] - 1; + ndiml[ir] = ndimr[ncrnt] / 2; + ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1; + inode[ir] = inode[ncrnt] + ndiml[ir] + 1; +/* L10: */ + } + llst <<= 1; +/* L20: */ + } + *nd = (llst << 1) - 1; + + return 0; + +/* End of SLASDT */ + +} /* slasdt_ */ + +/* Subroutine */ int slaset_(char *uplo, integer *m, integer *n, real *alpha, + real *beta, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLASET initializes an m-by-n matrix A to BETA on the diagonal and + ALPHA on the offdiagonals. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be set. + = 'U': Upper triangular part is set; the strictly lower + triangular part of A is not changed. + = 'L': Lower triangular part is set; the strictly upper + triangular part of A is not changed. + Otherwise: All of the matrix A is set. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + ALPHA (input) REAL + The constant to which the offdiagonal elements are to be set. + + BETA (input) REAL + The constant to which the diagonal elements are to be set. + + A (input/output) REAL array, dimension (LDA,N) + On exit, the leading m-by-n submatrix of A is set as follows: + + if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, + if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, + otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, + + and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + +/* + Set the strictly upper triangular or trapezoidal part of the + array to ALPHA. +*/ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + +/* + Set the strictly lower triangular or trapezoidal part of the + array to ALPHA. +*/ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L30: */ + } +/* L40: */ + } + + } else { + +/* Set the leading m-by-n submatrix to ALPHA. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L50: */ + } +/* L60: */ + } + } + +/* Set the first min(M,N) diagonal elements to BETA. */ + + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + i__ * a_dim1] = *beta; +/* L70: */ + } + + return 0; + +/* End of SLASET */ + +} /* slaset_ */ + +/* Subroutine */ int slasq1_(integer *n, real *d__, real *e, real *work, + integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + real r__1, r__2, r__3; + + /* Local variables */ + static integer i__; + static real eps; + extern /* Subroutine */ int slas2_(real *, real *, real *, real *, real *) + ; + static real scale; + static integer iinfo; + static real sigmn, sigmx; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), slasq2_(integer *, real *, integer *); + extern doublereal slamch_(char *); + static real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *), slascl_( + char *, integer *, integer *, real *, real *, integer *, integer * + , real *, integer *, integer *), slasrt_(char *, integer * + , real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- November 2008 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + SLASQ1 computes the singular values of a real N-by-N bidiagonal + matrix with diagonal D and off-diagonal E. The singular values + are computed to high relative accuracy, in the absence of + denormalization, underflow and overflow. The algorithm was first + presented in + + "Accurate singular values and differential qd algorithms" by K. V. + Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, + 1994, + + and the present implementation is described in "An implementation of + the dqds Algorithm (Positive Case)", LAPACK Working Note. + + Arguments + ========= + + N (input) INTEGER + The number of rows and columns in the matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, D contains the diagonal elements of the + bidiagonal matrix whose SVD is desired. On normal exit, + D contains the singular values in decreasing order. + + E (input/output) REAL array, dimension (N) + On entry, elements E(1:N-1) contain the off-diagonal elements + of the bidiagonal matrix whose SVD is desired. + On exit, E is overwritten. + + WORK (workspace) REAL array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm failed + = 1, a split was marked by a positive value in E + = 2, current block of Z not diagonalized after 30*N + iterations (in inner while loop) + = 3, termination criterion of outer while loop not met + (program created more than N unreduced blocks) + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --work; + --e; + --d__; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -2; + i__1 = -(*info); + xerbla_("SLASQ1", &i__1); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + d__[1] = dabs(d__[1]); + return 0; + } else if (*n == 2) { + slas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx); + d__[1] = sigmx; + d__[2] = sigmn; + return 0; + } + +/* Estimate the largest singular value. */ + + sigmx = 0.f; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = (r__1 = d__[i__], dabs(r__1)); +/* Computing MAX */ + r__2 = sigmx, r__3 = (r__1 = e[i__], dabs(r__1)); + sigmx = dmax(r__2,r__3); +/* L10: */ + } + d__[*n] = (r__1 = d__[*n], dabs(r__1)); + +/* Early return if SIGMX is zero (matrix is already diagonal). */ + + if (sigmx == 0.f) { + slasrt_("D", n, &d__[1], &iinfo); + return 0; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = sigmx, r__2 = d__[i__]; + sigmx = dmax(r__1,r__2); +/* L20: */ + } + +/* + Copy D and E into WORK (in the Z format) and scale (squaring the + input data makes scaling by a power of the radix pointless). +*/ + + eps = slamch_("Precision"); + safmin = slamch_("Safe minimum"); + scale = sqrt(eps / safmin); + scopy_(n, &d__[1], &c__1, &work[1], &c__2); + i__1 = *n - 1; + scopy_(&i__1, &e[1], &c__1, &work[2], &c__2); + i__1 = (*n << 1) - 1; + i__2 = (*n << 1) - 1; + slascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2, + &iinfo); + +/* Compute the q's and e's. */ + + i__1 = (*n << 1) - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + r__1 = work[i__]; + work[i__] = r__1 * r__1; +/* L30: */ + } + work[*n * 2] = 0.f; + + slasq2_(n, &work[1], info); + + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = sqrt(work[i__]); +/* L40: */ + } + slascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, & + iinfo); + } + + return 0; + +/* End of SLASQ1 */ + +} /* slasq1_ */ + +/* Subroutine */ int slasq2_(integer *n, real *z__, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + real r__1, r__2; + + /* Local variables */ + static real d__, e, g; + static integer k; + static real s, t; + static integer i0, i4, n0; + static real dn; + static integer pp; + static real dn1, dn2, dee, eps, tau, tol; + static integer ipn4; + static real tol2; + static logical ieee; + static integer nbig; + static real dmin__, emin, emax; + static integer kmin, ndiv, iter; + static real qmin, temp, qmax, zmax; + static integer splt; + static real dmin1, dmin2; + static integer nfail; + static real desig, trace, sigma; + static integer iinfo, ttype; + extern /* Subroutine */ int slasq3_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, integer *, integer *, integer * + , logical *, integer *, real *, real *, real *, real *, real *, + real *, real *); + static real deemin; + extern doublereal slamch_(char *); + static integer iwhila, iwhilb; + static real oldemn, safmin; + extern /* Subroutine */ int xerbla_(char *, integer *), slasrt_( + char *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- November 2008 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + SLASQ2 computes all the eigenvalues of the symmetric positive + definite tridiagonal matrix associated with the qd array Z to high + relative accuracy are computed to high relative accuracy, in the + absence of denormalization, underflow and overflow. + + To see the relation of Z to the tridiagonal matrix, let L be a + unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and + let U be an upper bidiagonal matrix with 1's above and diagonal + Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the + symmetric tridiagonal to which it is similar. + + Note : SLASQ2 defines a logical variable, IEEE, which is true + on machines which follow ieee-754 floating-point standard in their + handling of infinities and NaNs, and false otherwise. This variable + is passed to SLASQ3. + + Arguments + ========= + + N (input) INTEGER + The number of rows and columns in the matrix. N >= 0. + + Z (input/output) REAL array, dimension ( 4*N ) + On entry Z holds the qd array. On exit, entries 1 to N hold + the eigenvalues in decreasing order, Z( 2*N+1 ) holds the + trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If + N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) + holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of + shifts that failed. + + INFO (output) INTEGER + = 0: successful exit + < 0: if the i-th argument is a scalar and had an illegal + value, then INFO = -i, if the i-th argument is an + array and the j-entry had an illegal value, then + INFO = -(i*100+j) + > 0: the algorithm failed + = 1, a split was marked by a positive value in E + = 2, current block of Z not diagonalized after 30*N + iterations (in inner while loop) + = 3, termination criterion of outer while loop not met + (program created more than N unreduced blocks) + + Further Details + =============== + Local Variables: I0:N0 defines a current unreduced segment of Z. + The shifts are accumulated in SIGMA. Iteration count is in ITER. + Ping-pong is controlled by PP (alternates between 0 and 1). + + ===================================================================== + + + Test the input arguments. + (in case SLASQ2 is not called by SLASQ1) +*/ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + *info = 0; + eps = slamch_("Precision"); + safmin = slamch_("Safe minimum"); + tol = eps * 100.f; +/* Computing 2nd power */ + r__1 = tol; + tol2 = r__1 * r__1; + + if (*n < 0) { + *info = -1; + xerbla_("SLASQ2", &c__1); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + +/* 1-by-1 case. */ + + if (z__[1] < 0.f) { + *info = -201; + xerbla_("SLASQ2", &c__2); + } + return 0; + } else if (*n == 2) { + +/* 2-by-2 case. */ + + if (z__[2] < 0.f || z__[3] < 0.f) { + *info = -2; + xerbla_("SLASQ2", &c__2); + return 0; + } else if (z__[3] > z__[1]) { + d__ = z__[3]; + z__[3] = z__[1]; + z__[1] = d__; + } + z__[5] = z__[1] + z__[2] + z__[3]; + if (z__[2] > z__[3] * tol2) { + t = (z__[1] - z__[3] + z__[2]) * .5f; + s = z__[3] * (z__[2] / t); + if (s <= t) { + s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.f) + 1.f))); + } else { + s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[1] + (s + z__[2]); + z__[3] *= z__[1] / t; + z__[1] = t; + } + z__[2] = z__[3]; + z__[6] = z__[2] + z__[1]; + return 0; + } + +/* Check for negative data and compute sums of q's and e's. */ + + z__[*n * 2] = 0.f; + emin = z__[2]; + qmax = 0.f; + zmax = 0.f; + d__ = 0.f; + e = 0.f; + + i__1 = *n - 1 << 1; + for (k = 1; k <= i__1; k += 2) { + if (z__[k] < 0.f) { + *info = -(k + 200); + xerbla_("SLASQ2", &c__2); + return 0; + } else if (z__[k + 1] < 0.f) { + *info = -(k + 201); + xerbla_("SLASQ2", &c__2); + return 0; + } + d__ += z__[k]; + e += z__[k + 1]; +/* Computing MAX */ + r__1 = qmax, r__2 = z__[k]; + qmax = dmax(r__1,r__2); +/* Computing MIN */ + r__1 = emin, r__2 = z__[k + 1]; + emin = dmin(r__1,r__2); +/* Computing MAX */ + r__1 = max(qmax,zmax), r__2 = z__[k + 1]; + zmax = dmax(r__1,r__2); +/* L10: */ + } + if (z__[(*n << 1) - 1] < 0.f) { + *info = -((*n << 1) + 199); + xerbla_("SLASQ2", &c__2); + return 0; + } + d__ += z__[(*n << 1) - 1]; +/* Computing MAX */ + r__1 = qmax, r__2 = z__[(*n << 1) - 1]; + qmax = dmax(r__1,r__2); + zmax = dmax(qmax,zmax); + +/* Check for diagonality. */ + + if (e == 0.f) { + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 1) - 1]; +/* L20: */ + } + slasrt_("D", n, &z__[1], &iinfo); + z__[(*n << 1) - 1] = d__; + return 0; + } + + trace = d__ + e; + +/* Check for zero data. */ + + if (trace == 0.f) { + z__[(*n << 1) - 1] = 0.f; + return 0; + } + +/* + Check whether the machine is IEEE conformable. + + IEEE = ILAENV( 10, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND. + $ ILAENV( 11, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 + + [11/15/2008] The case IEEE=.TRUE. has a problem in single precision with + some the test matrices of type 16. The double precision code is fine. +*/ + + ieee = FALSE_; + +/* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ + + for (k = *n << 1; k >= 2; k += -2) { + z__[k * 2] = 0.f; + z__[(k << 1) - 1] = z__[k]; + z__[(k << 1) - 2] = 0.f; + z__[(k << 1) - 3] = z__[k - 1]; +/* L30: */ + } + + i0 = 1; + n0 = *n; + +/* Reverse the qd-array, if warranted. */ + + if (z__[(i0 << 2) - 3] * 1.5f < z__[(n0 << 2) - 3]) { + ipn4 = i0 + n0 << 2; + i__1 = i0 + n0 - 1 << 1; + for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; +/* L40: */ + } + } + +/* Initial split checking via dqd and Li's test. */ + + pp = 0; + + for (k = 1; k <= 2; ++k) { + + d__ = z__[(n0 << 2) + pp - 3]; + i__1 = (i0 << 2) + pp; + for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) { + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.f; + d__ = z__[i4 - 3]; + } else { + d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); + } +/* L50: */ + } + +/* dqd maps Z to ZZ plus Li's test. */ + + emin = z__[(i0 << 2) + pp + 1]; + d__ = z__[(i0 << 2) + pp - 3]; + i__1 = (n0 - 1 << 2) + pp; + for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { + z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.f; + z__[i4 - (pp << 1) - 2] = d__; + z__[i4 - (pp << 1)] = 0.f; + d__ = z__[i4 + 1]; + } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && + safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { + temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; + z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; + d__ *= temp; + } else { + z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( + pp << 1) - 2]); + d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); + } +/* Computing MIN */ + r__1 = emin, r__2 = z__[i4 - (pp << 1)]; + emin = dmin(r__1,r__2); +/* L60: */ + } + z__[(n0 << 2) - pp - 2] = d__; + +/* Now find qmax. */ + + qmax = z__[(i0 << 2) - pp - 2]; + i__1 = (n0 << 2) - pp - 2; + for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { +/* Computing MAX */ + r__1 = qmax, r__2 = z__[i4]; + qmax = dmax(r__1,r__2); +/* L70: */ + } + +/* Prepare for the next iteration on K. */ + + pp = 1 - pp; +/* L80: */ + } + +/* Initialise variables to pass to SLASQ3. */ + + ttype = 0; + dmin1 = 0.f; + dmin2 = 0.f; + dn = 0.f; + dn1 = 0.f; + dn2 = 0.f; + g = 0.f; + tau = 0.f; + + iter = 2; + nfail = 0; + ndiv = n0 - i0 << 1; + + i__1 = *n + 1; + for (iwhila = 1; iwhila <= i__1; ++iwhila) { + if (n0 < 1) { + goto L170; + } + +/* + While array unfinished do + + E(N0) holds the value of SIGMA when submatrix in I0:N0 + splits from the rest of the array, but is negated. +*/ + + desig = 0.f; + if (n0 == *n) { + sigma = 0.f; + } else { + sigma = -z__[(n0 << 2) - 1]; + } + if (sigma < 0.f) { + *info = 1; + return 0; + } + +/* + Find last unreduced submatrix's top index I0, find QMAX and + EMIN. Find Gershgorin-type bound if Q's much greater than E's. +*/ + + emax = 0.f; + if (n0 > i0) { + emin = (r__1 = z__[(n0 << 2) - 5], dabs(r__1)); + } else { + emin = 0.f; + } + qmin = z__[(n0 << 2) - 3]; + qmax = qmin; + for (i4 = n0 << 2; i4 >= 8; i4 += -4) { + if (z__[i4 - 5] <= 0.f) { + goto L100; + } + if (qmin >= emax * 4.f) { +/* Computing MIN */ + r__1 = qmin, r__2 = z__[i4 - 3]; + qmin = dmin(r__1,r__2); +/* Computing MAX */ + r__1 = emax, r__2 = z__[i4 - 5]; + emax = dmax(r__1,r__2); + } +/* Computing MAX */ + r__1 = qmax, r__2 = z__[i4 - 7] + z__[i4 - 5]; + qmax = dmax(r__1,r__2); +/* Computing MIN */ + r__1 = emin, r__2 = z__[i4 - 5]; + emin = dmin(r__1,r__2); +/* L90: */ + } + i4 = 4; + +L100: + i0 = i4 / 4; + pp = 0; + + if (n0 - i0 > 1) { + dee = z__[(i0 << 2) - 3]; + deemin = dee; + kmin = i0; + i__2 = (n0 << 2) - 3; + for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) { + dee = z__[i4] * (dee / (dee + z__[i4 - 2])); + if (dee <= deemin) { + deemin = dee; + kmin = (i4 + 3) / 4; + } +/* L110: */ + } + if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] * + .5f) { + ipn4 = i0 + n0 << 2; + pp = 2; + i__2 = i0 + n0 - 1 << 1; + for (i4 = i0 << 2; i4 <= i__2; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 2]; + z__[i4 - 2] = z__[ipn4 - i4 - 2]; + z__[ipn4 - i4 - 2] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; + temp = z__[i4]; + z__[i4] = z__[ipn4 - i4 - 4]; + z__[ipn4 - i4 - 4] = temp; +/* L120: */ + } + } + } + +/* + Put -(initial shift) into DMIN. + + Computing MAX +*/ + r__1 = 0.f, r__2 = qmin - sqrt(qmin) * 2.f * sqrt(emax); + dmin__ = -dmax(r__1,r__2); + +/* + Now I0:N0 is unreduced. + PP = 0 for ping, PP = 1 for pong. + PP = 2 indicates that flipping was applied to the Z array and + and that the tests for deflation upon entry in SLASQ3 + should not be performed. +*/ + + nbig = (n0 - i0 + 1) * 30; + i__2 = nbig; + for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { + if (i0 > n0) { + goto L150; + } + +/* While submatrix unfinished take a good dqds step. */ + + slasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & + nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, & + dn1, &dn2, &g, &tau); + + pp = 1 - pp; + +/* When EMIN is very small check for splits. */ + + if (pp == 0 && n0 - i0 >= 3) { + if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * + sigma) { + splt = i0 - 1; + qmax = z__[(i0 << 2) - 3]; + emin = z__[(i0 << 2) - 1]; + oldemn = z__[i0 * 4]; + i__3 = n0 - 3 << 2; + for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { + if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= + tol2 * sigma) { + z__[i4 - 1] = -sigma; + splt = i4 / 4; + qmax = 0.f; + emin = z__[i4 + 3]; + oldemn = z__[i4 + 4]; + } else { +/* Computing MAX */ + r__1 = qmax, r__2 = z__[i4 + 1]; + qmax = dmax(r__1,r__2); +/* Computing MIN */ + r__1 = emin, r__2 = z__[i4 - 1]; + emin = dmin(r__1,r__2); +/* Computing MIN */ + r__1 = oldemn, r__2 = z__[i4]; + oldemn = dmin(r__1,r__2); + } +/* L130: */ + } + z__[(n0 << 2) - 1] = emin; + z__[n0 * 4] = oldemn; + i0 = splt + 1; + } + } + +/* L140: */ + } + + *info = 2; + return 0; + +/* end IWHILB */ + +L150: + +/* L160: */ + ; + } + + *info = 3; + return 0; + +/* end IWHILA */ + +L170: + +/* Move q's to the front. */ + + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 2) - 3]; +/* L180: */ + } + +/* Sort and compute sum of eigenvalues. */ + + slasrt_("D", n, &z__[1], &iinfo); + + e = 0.f; + for (k = *n; k >= 1; --k) { + e += z__[k]; +/* L190: */ + } + +/* Store trace, sum(eigenvalues) and information on performance. */ + + z__[(*n << 1) + 1] = trace; + z__[(*n << 1) + 2] = e; + z__[(*n << 1) + 3] = (real) iter; +/* Computing 2nd power */ + i__1 = *n; + z__[(*n << 1) + 4] = (real) ndiv / (real) (i__1 * i__1); + z__[(*n << 1) + 5] = nfail * 100.f / (real) iter; + return 0; + +/* End of SLASQ2 */ + +} /* slasq2_ */ + +/* Subroutine */ int slasq3_(integer *i0, integer *n0, real *z__, integer *pp, + real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail, + integer *iter, integer *ndiv, logical *ieee, integer *ttype, real * + dmin1, real *dmin2, real *dn, real *dn1, real *dn2, real *g, real * + tau) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2; + + /* Local variables */ + static real s, t; + static integer j4, nn; + static real eps, tol; + static integer n0in, ipn4; + static real tol2, temp; + extern /* Subroutine */ int slasq4_(integer *, integer *, real *, integer + *, integer *, real *, real *, real *, real *, real *, real *, + real *, integer *, real *), slasq5_(integer *, integer *, real *, + integer *, real *, real *, real *, real *, real *, real *, real *, + logical *), slasq6_(integer *, integer *, real *, integer *, + real *, real *, real *, real *, real *, real *); + extern doublereal slamch_(char *); + extern logical sisnan_(real *); + + +/* + -- LAPACK routine (version 3.2.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- June 2010 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. + In case of failure it changes shifts, and tries again until output + is positive. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input/output) INTEGER + Last index. + + Z (input) REAL array, dimension ( 4*N ) + Z holds the qd array. + + PP (input/output) INTEGER + PP=0 for ping, PP=1 for pong. + PP=2 indicates that flipping was applied to the Z array + and that the initial tests for deflation should not be + performed. + + DMIN (output) REAL + Minimum value of d. + + SIGMA (output) REAL + Sum of shifts used in current segment. + + DESIG (input/output) REAL + Lower order part of SIGMA + + QMAX (input) REAL + Maximum value of q. + + NFAIL (output) INTEGER + Number of times shift was too big. + + ITER (output) INTEGER + Number of iterations. + + NDIV (output) INTEGER + Number of divisions. + + IEEE (input) LOGICAL + Flag for IEEE or non IEEE arithmetic (passed to SLASQ5). + + TTYPE (input/output) INTEGER + Shift type. + + DMIN1 (input/output) REAL + + DMIN2 (input/output) REAL + + DN (input/output) REAL + + DN1 (input/output) REAL + + DN2 (input/output) REAL + + G (input/output) REAL + + TAU (input/output) REAL + + These are passed as arguments in order to save their values + between calls to SLASQ3. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + n0in = *n0; + eps = slamch_("Precision"); + tol = eps * 100.f; +/* Computing 2nd power */ + r__1 = tol; + tol2 = r__1 * r__1; + +/* Check for deflation. */ + +L10: + + if (*n0 < *i0) { + return 0; + } + if (*n0 == *i0) { + goto L20; + } + nn = (*n0 << 2) + *pp; + if (*n0 == *i0 + 1) { + goto L40; + } + +/* Check whether E(N0-1) is negligible, 1 eigenvalue. */ + + if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - + 4] > tol2 * z__[nn - 7]) { + goto L30; + } + +L20: + + z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; + --(*n0); + goto L10; + +/* Check whether E(N0-2) is negligible, 2 eigenvalues. */ + +L30: + + if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ + nn - 11]) { + goto L50; + } + +L40: + + if (z__[nn - 3] > z__[nn - 7]) { + s = z__[nn - 3]; + z__[nn - 3] = z__[nn - 7]; + z__[nn - 7] = s; + } + if (z__[nn - 5] > z__[nn - 3] * tol2) { + t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5f; + s = z__[nn - 3] * (z__[nn - 5] / t); + if (s <= t) { + s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.f) + 1.f))); + } else { + s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[nn - 7] + (s + z__[nn - 5]); + z__[nn - 3] *= z__[nn - 7] / t; + z__[nn - 7] = t; + } + z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; + z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; + *n0 += -2; + goto L10; + +L50: + if (*pp == 2) { + *pp = 0; + } + +/* Reverse the qd-array, if warranted. */ + + if (*dmin__ <= 0.f || *n0 < n0in) { + if (z__[(*i0 << 2) + *pp - 3] * 1.5f < z__[(*n0 << 2) + *pp - 3]) { + ipn4 = *i0 + *n0 << 2; + i__1 = *i0 + *n0 - 1 << 1; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + temp = z__[j4 - 3]; + z__[j4 - 3] = z__[ipn4 - j4 - 3]; + z__[ipn4 - j4 - 3] = temp; + temp = z__[j4 - 2]; + z__[j4 - 2] = z__[ipn4 - j4 - 2]; + z__[ipn4 - j4 - 2] = temp; + temp = z__[j4 - 1]; + z__[j4 - 1] = z__[ipn4 - j4 - 5]; + z__[ipn4 - j4 - 5] = temp; + temp = z__[j4]; + z__[j4] = z__[ipn4 - j4 - 4]; + z__[ipn4 - j4 - 4] = temp; +/* L60: */ + } + if (*n0 - *i0 <= 4) { + z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; + z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; + } +/* Computing MIN */ + r__1 = *dmin2, r__2 = z__[(*n0 << 2) + *pp - 1]; + *dmin2 = dmin(r__1,r__2); +/* Computing MIN */ + r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*i0 << 2) + *pp - 1] + , r__1 = min(r__1,r__2), r__2 = z__[(*i0 << 2) + *pp + 3]; + z__[(*n0 << 2) + *pp - 1] = dmin(r__1,r__2); +/* Computing MIN */ + r__1 = z__[(*n0 << 2) - *pp], r__2 = z__[(*i0 << 2) - *pp], r__1 = + min(r__1,r__2), r__2 = z__[(*i0 << 2) - *pp + 4]; + z__[(*n0 << 2) - *pp] = dmin(r__1,r__2); +/* Computing MAX */ + r__1 = *qmax, r__2 = z__[(*i0 << 2) + *pp - 3], r__1 = max(r__1, + r__2), r__2 = z__[(*i0 << 2) + *pp + 1]; + *qmax = dmax(r__1,r__2); + *dmin__ = -0.f; + } + } + +/* Choose a shift. */ + + slasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, + tau, ttype, g); + +/* Call dqds until DMIN > 0. */ + +L70: + + slasq5_(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2, + ieee); + + *ndiv += *n0 - *i0 + 2; + ++(*iter); + +/* Check status. */ + + if (*dmin__ >= 0.f && *dmin1 > 0.f) { + +/* Success. */ + + goto L90; + + } else if (*dmin__ < 0.f && *dmin1 > 0.f && z__[(*n0 - 1 << 2) - *pp] < + tol * (*sigma + *dn1) && dabs(*dn) < tol * *sigma) { + +/* Convergence hidden by negative DN. */ + + z__[(*n0 - 1 << 2) - *pp + 2] = 0.f; + *dmin__ = 0.f; + goto L90; + } else if (*dmin__ < 0.f) { + +/* TAU too big. Select new TAU and try again. */ + + ++(*nfail); + if (*ttype < -22) { + +/* Failed twice. Play it safe. */ + + *tau = 0.f; + } else if (*dmin1 > 0.f) { + +/* Late failure. Gives excellent shift. */ + + *tau = (*tau + *dmin__) * (1.f - eps * 2.f); + *ttype += -11; + } else { + +/* Early failure. Divide by 4. */ + + *tau *= .25f; + *ttype += -12; + } + goto L70; + } else if (sisnan_(dmin__)) { + +/* NaN. */ + + if (*tau == 0.f) { + goto L80; + } else { + *tau = 0.f; + goto L70; + } + } else { + +/* Possible underflow. Play it safe. */ + + goto L80; + } + +/* Risk of underflow. */ + +L80: + slasq6_(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2); + *ndiv += *n0 - *i0 + 2; + ++(*iter); + *tau = 0.f; + +L90: + if (*tau < *sigma) { + *desig += *tau; + t = *sigma + *desig; + *desig -= t - *sigma; + } else { + t = *sigma + *tau; + *desig = *sigma - (t - *tau) + *desig; + } + *sigma = t; + + return 0; + +/* End of SLASQ3 */ + +} /* slasq3_ */ + +/* Subroutine */ int slasq4_(integer *i0, integer *n0, real *z__, integer *pp, + integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn, + real *dn1, real *dn2, real *tau, integer *ttype, real *g) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2; + + /* Local variables */ + static real s, a2, b1, b2; + static integer i4, nn, np; + static real gam, gap1, gap2; + + +/* + -- LAPACK routine (version 3.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- November 2008 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + SLASQ4 computes an approximation TAU to the smallest eigenvalue + using values of d from the previous transform. + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) REAL array, dimension ( 4*N ) + Z holds the qd array. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + NOIN (input) INTEGER + The value of N0 at start of EIGTEST. + + DMIN (input) REAL + Minimum value of d. + + DMIN1 (input) REAL + Minimum value of d, excluding D( N0 ). + + DMIN2 (input) REAL + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (input) REAL + d(N) + + DN1 (input) REAL + d(N-1) + + DN2 (input) REAL + d(N-2) + + TAU (output) REAL + This is the shift. + + TTYPE (output) INTEGER + Shift type. + + G (input/output) REAL + G is passed as an argument in order to save its value between + calls to SLASQ4. + + Further Details + =============== + CNST1 = 9/16 + + ===================================================================== + + + A negative DMIN forces the shift to take that absolute value + TTYPE records the type of shift. +*/ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*dmin__ <= 0.f) { + *tau = -(*dmin__); + *ttype = -1; + return 0; + } + + nn = (*n0 << 2) + *pp; + if (*n0in == *n0) { + +/* No eigenvalues deflated. */ + + if (*dmin__ == *dn || *dmin__ == *dn1) { + + b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); + b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); + a2 = z__[nn - 7] + z__[nn - 5]; + +/* Cases 2 and 3. */ + + if (*dmin__ == *dn && *dmin1 == *dn1) { + gap2 = *dmin2 - a2 - *dmin2 * .25f; + if (gap2 > 0.f && gap2 > b2) { + gap1 = a2 - *dn - b2 / gap2 * b2; + } else { + gap1 = a2 - *dn - (b1 + b2); + } + if (gap1 > 0.f && gap1 > b1) { +/* Computing MAX */ + r__1 = *dn - b1 / gap1 * b1, r__2 = *dmin__ * .5f; + s = dmax(r__1,r__2); + *ttype = -2; + } else { + s = 0.f; + if (*dn > b1) { + s = *dn - b1; + } + if (a2 > b1 + b2) { +/* Computing MIN */ + r__1 = s, r__2 = a2 - (b1 + b2); + s = dmin(r__1,r__2); + } +/* Computing MAX */ + r__1 = s, r__2 = *dmin__ * .333f; + s = dmax(r__1,r__2); + *ttype = -3; + } + } else { + +/* Case 4. */ + + *ttype = -4; + s = *dmin__ * .25f; + if (*dmin__ == *dn) { + gam = *dn; + a2 = 0.f; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b2 = z__[nn - 5] / z__[nn - 7]; + np = nn - 9; + } else { + np = nn - (*pp << 1); + b2 = z__[np - 2]; + gam = *dn1; + if (z__[np - 4] > z__[np - 2]) { + return 0; + } + a2 = z__[np - 4] / z__[np - 2]; + if (z__[nn - 9] > z__[nn - 11]) { + return 0; + } + b2 = z__[nn - 9] / z__[nn - 11]; + np = nn - 13; + } + +/* Approximate contribution to norm squared from I < NN-1. */ + + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = np; i4 >= i__1; i4 += -4) { + if (b2 == 0.f) { + goto L20; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (dmax(b2,b1) * 100.f < a2 || .563f < a2) { + goto L20; + } +/* L10: */ + } +L20: + a2 *= 1.05f; + +/* Rayleigh quotient residual bound. */ + + if (a2 < .563f) { + s = gam * (1.f - sqrt(a2)) / (a2 + 1.f); + } + } + } else if (*dmin__ == *dn2) { + +/* Case 5. */ + + *ttype = -5; + s = *dmin__ * .25f; + +/* Compute contribution to norm squared from I > NN-2. */ + + np = nn - (*pp << 1); + b1 = z__[np - 2]; + b2 = z__[np - 6]; + gam = *dn2; + if (z__[np - 8] > b2 || z__[np - 4] > b1) { + return 0; + } + a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.f); + +/* Approximate contribution to norm squared from I < NN-2. */ + + if (*n0 - *i0 > 2) { + b2 = z__[nn - 13] / z__[nn - 15]; + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = nn - 17; i4 >= i__1; i4 += -4) { + if (b2 == 0.f) { + goto L40; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (dmax(b2,b1) * 100.f < a2 || .563f < a2) { + goto L40; + } +/* L30: */ + } +L40: + a2 *= 1.05f; + } + + if (a2 < .563f) { + s = gam * (1.f - sqrt(a2)) / (a2 + 1.f); + } + } else { + +/* Case 6, no information to guide us. */ + + if (*ttype == -6) { + *g += (1.f - *g) * .333f; + } else if (*ttype == -18) { + *g = .083250000000000005f; + } else { + *g = .25f; + } + s = *g * *dmin__; + *ttype = -6; + } + + } else if (*n0in == *n0 + 1) { + +/* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ + + if (*dmin1 == *dn1 && *dmin2 == *dn2) { + +/* Cases 7 and 8. */ + + *ttype = -7; + s = *dmin1 * .333f; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.f) { + goto L60; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + a2 = b1; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (dmax(b1,a2) * 100.f < b2) { + goto L60; + } +/* L50: */ + } +L60: + b2 = sqrt(b2 * 1.05f); +/* Computing 2nd power */ + r__1 = b2; + a2 = *dmin1 / (r__1 * r__1 + 1.f); + gap2 = *dmin2 * .5f - a2; + if (gap2 > 0.f && gap2 > b2 * a2) { +/* Computing MAX */ + r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2); + s = dmax(r__1,r__2); + } else { +/* Computing MAX */ + r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f); + s = dmax(r__1,r__2); + *ttype = -8; + } + } else { + +/* Case 9. */ + + s = *dmin1 * .25f; + if (*dmin1 == *dn1) { + s = *dmin1 * .5f; + } + *ttype = -9; + } + + } else if (*n0in == *n0 + 2) { + +/* + Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. + + Cases 10 and 11. +*/ + + if (*dmin2 == *dn2 && z__[nn - 5] * 2.f < z__[nn - 7]) { + *ttype = -10; + s = *dmin2 * .333f; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.f) { + goto L80; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (b1 * 100.f < b2) { + goto L80; + } +/* L70: */ + } +L80: + b2 = sqrt(b2 * 1.05f); +/* Computing 2nd power */ + r__1 = b2; + a2 = *dmin2 / (r__1 * r__1 + 1.f); + gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ + nn - 9]) - a2; + if (gap2 > 0.f && gap2 > b2 * a2) { +/* Computing MAX */ + r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2); + s = dmax(r__1,r__2); + } else { +/* Computing MAX */ + r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f); + s = dmax(r__1,r__2); + } + } else { + s = *dmin2 * .25f; + *ttype = -11; + } + } else if (*n0in > *n0 + 2) { + +/* Case 12, more than two eigenvalues deflated. No information. */ + + s = 0.f; + *ttype = -12; + } + + *tau = s; + return 0; + +/* End of SLASQ4 */ + +} /* slasq4_ */ + +/* Subroutine */ int slasq5_(integer *i0, integer *n0, real *z__, integer *pp, + real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real * + dnm1, real *dnm2, logical *ieee) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2; + + /* Local variables */ + static real d__; + static integer j4, j4p2; + static real emin, temp; + + +/* + -- LAPACK routine (version 3.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- November 2008 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + SLASQ5 computes one dqds transform in ping-pong form, one + version for IEEE machines another for non IEEE machines. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) REAL array, dimension ( 4*N ) + Z holds the qd array. EMIN is stored in Z(4*N0) to avoid + an extra argument. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + TAU (input) REAL + This is the shift. + + DMIN (output) REAL + Minimum value of d. + + DMIN1 (output) REAL + Minimum value of d, excluding D( N0 ). + + DMIN2 (output) REAL + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (output) REAL + d(N0), the last value of d. + + DNM1 (output) REAL + d(N0-1). + + DNM2 (output) REAL + d(N0-2). + + IEEE (input) LOGICAL + Flag for IEEE or non IEEE arithmetic. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4] - *tau; + *dmin__ = d__; + *dmin1 = -z__[j4]; + + if (*ieee) { + +/* Code for IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + temp = z__[j4 + 1] / z__[j4 - 2]; + d__ = d__ * temp - *tau; + *dmin__ = dmin(*dmin__,d__); + z__[j4] = z__[j4 - 1] * temp; +/* Computing MIN */ + r__1 = z__[j4]; + emin = dmin(r__1,emin); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + temp = z__[j4 + 2] / z__[j4 - 3]; + d__ = d__ * temp - *tau; + *dmin__ = dmin(*dmin__,d__); + z__[j4 - 1] = z__[j4] * temp; +/* Computing MIN */ + r__1 = z__[j4 - 1]; + emin = dmin(r__1,emin); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + *dmin__ = dmin(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + *dmin__ = dmin(*dmin__,*dn); + + } else { + +/* Code for non IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (d__ < 0.f) { + return 0; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; + } + *dmin__ = dmin(*dmin__,d__); +/* Computing MIN */ + r__1 = emin, r__2 = z__[j4]; + emin = dmin(r__1,r__2); +/* L30: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (d__ < 0.f) { + return 0; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; + } + *dmin__ = dmin(*dmin__,d__); +/* Computing MIN */ + r__1 = emin, r__2 = z__[j4 - 1]; + emin = dmin(r__1,r__2); +/* L40: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (*dnm2 < 0.f) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + } + *dmin__ = dmin(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (*dnm1 < 0.f) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + } + *dmin__ = dmin(*dmin__,*dn); + + } + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of SLASQ5 */ + +} /* slasq5_ */ + +/* Subroutine */ int slasq6_(integer *i0, integer *n0, real *z__, integer *pp, + real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real * + dnm2) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2; + + /* Local variables */ + static real d__; + static integer j4, j4p2; + static real emin, temp; + extern doublereal slamch_(char *); + static real safmin; + + +/* + -- LAPACK routine (version 3.2) -- + + -- Contributed by Osni Marques of the Lawrence Berkeley National -- + -- Laboratory and Beresford Parlett of the Univ. of California at -- + -- Berkeley -- + -- November 2008 -- + + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + + + Purpose + ======= + + SLASQ6 computes one dqd (shift equal to zero) transform in + ping-pong form, with protection against underflow and overflow. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) REAL array, dimension ( 4*N ) + Z holds the qd array. EMIN is stored in Z(4*N0) to avoid + an extra argument. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + DMIN (output) REAL + Minimum value of d. + + DMIN1 (output) REAL + Minimum value of d, excluding D( N0 ). + + DMIN2 (output) REAL + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (output) REAL + d(N0), the last value of d. + + DNM1 (output) REAL + d(N0-1). + + DNM2 (output) REAL + d(N0-2). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + safmin = slamch_("Safe minimum"); + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4]; + *dmin__ = d__; + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (z__[j4 - 2] == 0.f) { + z__[j4] = 0.f; + d__ = z__[j4 + 1]; + *dmin__ = d__; + emin = 0.f; + } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 + - 2] < z__[j4 + 1]) { + temp = z__[j4 + 1] / z__[j4 - 2]; + z__[j4] = z__[j4 - 1] * temp; + d__ *= temp; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); + } + *dmin__ = dmin(*dmin__,d__); +/* Computing MIN */ + r__1 = emin, r__2 = z__[j4]; + emin = dmin(r__1,r__2); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (z__[j4 - 3] == 0.f) { + z__[j4 - 1] = 0.f; + d__ = z__[j4 + 2]; + *dmin__ = d__; + emin = 0.f; + } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 + - 3] < z__[j4 + 2]) { + temp = z__[j4 + 2] / z__[j4 - 3]; + z__[j4 - 1] = z__[j4] * temp; + d__ *= temp; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); + } + *dmin__ = dmin(*dmin__,d__); +/* Computing MIN */ + r__1 = emin, r__2 = z__[j4 - 1]; + emin = dmin(r__1,r__2); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (z__[j4 - 2] == 0.f) { + z__[j4] = 0.f; + *dnm1 = z__[j4p2 + 2]; + *dmin__ = *dnm1; + emin = 0.f; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dnm1 = *dnm2 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); + } + *dmin__ = dmin(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (z__[j4 - 2] == 0.f) { + z__[j4] = 0.f; + *dn = z__[j4p2 + 2]; + *dmin__ = *dn; + emin = 0.f; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dn = *dnm1 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); + } + *dmin__ = dmin(*dmin__,*dn); + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of SLASQ6 */ + +} /* slasq6_ */ + +/* Subroutine */ int slasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, real *c__, real *s, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, info; + static real temp; + extern logical lsame_(char *, char *); + static real ctemp, stemp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLASR applies a sequence of plane rotations to a real matrix A, + from either the left or the right. + + When SIDE = 'L', the transformation takes the form + + A := P*A + + and when SIDE = 'R', the transformation takes the form + + A := A*P**T + + where P is an orthogonal matrix consisting of a sequence of z plane + rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', + and P**T is the transpose of P. + + When DIRECT = 'F' (Forward sequence), then + + P = P(z-1) * ... * P(2) * P(1) + + and when DIRECT = 'B' (Backward sequence), then + + P = P(1) * P(2) * ... * P(z-1) + + where P(k) is a plane rotation matrix defined by the 2-by-2 rotation + + R(k) = ( c(k) s(k) ) + = ( -s(k) c(k) ). + + When PIVOT = 'V' (Variable pivot), the rotation is performed + for the plane (k,k+1), i.e., P(k) has the form + + P(k) = ( 1 ) + ( ... ) + ( 1 ) + ( c(k) s(k) ) + ( -s(k) c(k) ) + ( 1 ) + ( ... ) + ( 1 ) + + where R(k) appears as a rank-2 modification to the identity matrix in + rows and columns k and k+1. + + When PIVOT = 'T' (Top pivot), the rotation is performed for the + plane (1,k+1), so P(k) has the form + + P(k) = ( c(k) s(k) ) + ( 1 ) + ( ... ) + ( 1 ) + ( -s(k) c(k) ) + ( 1 ) + ( ... ) + ( 1 ) + + where R(k) appears in rows and columns 1 and k+1. + + Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is + performed for the plane (k,z), giving P(k) the form + + P(k) = ( 1 ) + ( ... ) + ( 1 ) + ( c(k) s(k) ) + ( 1 ) + ( ... ) + ( 1 ) + ( -s(k) c(k) ) + + where R(k) appears in rows and columns k and z. The rotations are + performed without ever forming P(k) explicitly. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + Specifies whether the plane rotation matrix P is applied to + A on the left or the right. + = 'L': Left, compute A := P*A + = 'R': Right, compute A:= A*P**T + + PIVOT (input) CHARACTER*1 + Specifies the plane for which P(k) is a plane rotation + matrix. + = 'V': Variable pivot, the plane (k,k+1) + = 'T': Top pivot, the plane (1,k+1) + = 'B': Bottom pivot, the plane (k,z) + + DIRECT (input) CHARACTER*1 + Specifies whether P is a forward or backward sequence of + plane rotations. + = 'F': Forward, P = P(z-1)*...*P(2)*P(1) + = 'B': Backward, P = P(1)*P(2)*...*P(z-1) + + M (input) INTEGER + The number of rows of the matrix A. If m <= 1, an immediate + return is effected. + + N (input) INTEGER + The number of columns of the matrix A. If n <= 1, an + immediate return is effected. + + C (input) REAL array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + The cosines c(k) of the plane rotations. + + S (input) REAL array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + The sines s(k) of the plane rotations. The 2-by-2 plane + rotation part of the matrix P(k), R(k), has the form + R(k) = ( c(k) s(k) ) + ( -s(k) c(k) ). + + A (input/output) REAL array, dimension (LDA,N) + The M-by-N matrix A. On exit, A is overwritten by P*A if + SIDE = 'R' or by A*P**T if SIDE = 'L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + --c__; + --s; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! (lsame_(side, "L") || lsame_(side, "R"))) { + info = 1; + } else if (! (lsame_(pivot, "V") || lsame_(pivot, + "T") || lsame_(pivot, "B"))) { + info = 2; + } else if (! (lsame_(direct, "F") || lsame_(direct, + "B"))) { + info = 3; + } else if (*m < 0) { + info = 4; + } else if (*n < 0) { + info = 5; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("SLASR ", &info); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form P * A */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L10: */ + } + } +/* L20: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L30: */ + } + } +/* L40: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L50: */ + } + } +/* L60: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L90: */ + } + } +/* L100: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L110: */ + } + } +/* L120: */ + } + } + } + } else if (lsame_(side, "R")) { + +/* Form A * P' */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L130: */ + } + } +/* L140: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L150: */ + } + } +/* L160: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L170: */ + } + } +/* L180: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L190: */ + } + } +/* L200: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L210: */ + } + } +/* L220: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L230: */ + } + } +/* L240: */ + } + } + } + } + + return 0; + +/* End of SLASR */ + +} /* slasr_ */ + +/* Subroutine */ int slasrt_(char *id, integer *n, real *d__, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, j; + static real d1, d2, d3; + static integer dir; + static real tmp; + static integer endd; + extern logical lsame_(char *, char *); + static integer stack[64] /* was [2][32] */; + static real dmnmx; + static integer start; + extern /* Subroutine */ int xerbla_(char *, integer *); + static integer stkpnt; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + Sort the numbers in D in increasing order (if ID = 'I') or + in decreasing order (if ID = 'D' ). + + Use Quick Sort, reverting to Insertion sort on arrays of + size <= 20. Dimension of STACK limits N to about 2**32. + + Arguments + ========= + + ID (input) CHARACTER*1 + = 'I': sort D in increasing order; + = 'D': sort D in decreasing order. + + N (input) INTEGER + The length of the array D. + + D (input/output) REAL array, dimension (N) + On entry, the array to be sorted. + On exit, D has been sorted into increasing order + (D(1) <= ... <= D(N) ) or into decreasing order + (D(1) >= ... >= D(N) ), depending on ID. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input paramters. +*/ + + /* Parameter adjustments */ + --d__; + + /* Function Body */ + *info = 0; + dir = -1; + if (lsame_(id, "D")) { + dir = 0; + } else if (lsame_(id, "I")) { + dir = 1; + } + if (dir == -1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASRT", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + + stkpnt = 1; + stack[0] = 1; + stack[1] = *n; +L10: + start = stack[(stkpnt << 1) - 2]; + endd = stack[(stkpnt << 1) - 1]; + --stkpnt; + if (endd - start <= 20 && endd - start > 0) { + +/* Do Insertion sort on D( START:ENDD ) */ + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] > d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L30; + } +/* L20: */ + } +L30: + ; + } + + } else { + +/* Sort into increasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] < d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L50; + } +/* L40: */ + } +L50: + ; + } + + } + + } else if (endd - start > 20) { + +/* + Partition D( START:ENDD ) and stack parts, largest one first + + Choose partition entry as median of 3 +*/ + + d1 = d__[start]; + d2 = d__[endd]; + i__ = (start + endd) / 2; + d3 = d__[i__]; + if (d1 < d2) { + if (d3 < d1) { + dmnmx = d1; + } else if (d3 < d2) { + dmnmx = d3; + } else { + dmnmx = d2; + } + } else { + if (d3 < d2) { + dmnmx = d2; + } else if (d3 < d1) { + dmnmx = d3; + } else { + dmnmx = d1; + } + } + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__ = start - 1; + j = endd + 1; +L60: +L70: + --j; + if (d__[j] < dmnmx) { + goto L70; + } +L80: + ++i__; + if (d__[i__] > dmnmx) { + goto L80; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L60; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } else { + +/* Sort into increasing order */ + + i__ = start - 1; + j = endd + 1; +L90: +L100: + --j; + if (d__[j] > dmnmx) { + goto L100; + } +L110: + ++i__; + if (d__[i__] < dmnmx) { + goto L110; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L90; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } + } + if (stkpnt > 0) { + goto L10; + } + return 0; + +/* End of SLASRT */ + +} /* slasrt_ */ + +/* Subroutine */ int slassq_(integer *n, real *x, integer *incx, real *scale, + real *sumsq) +{ + /* System generated locals */ + integer i__1, i__2; + real r__1; + + /* Local variables */ + static integer ix; + static real absxi; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLASSQ returns the values scl and smsq such that + + ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + + where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is + assumed to be non-negative and scl returns the value + + scl = max( scale, abs( x( i ) ) ). + + scale and sumsq must be supplied in SCALE and SUMSQ and + scl and smsq are overwritten on SCALE and SUMSQ respectively. + + The routine makes only one pass through the vector x. + + Arguments + ========= + + N (input) INTEGER + The number of elements to be used from the vector X. + + X (input) REAL array, dimension (N) + The vector for which a scaled sum of squares is computed. + x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. + + INCX (input) INTEGER + The increment between successive values of the vector X. + INCX > 0. + + SCALE (input/output) REAL + On entry, the value scale in the equation above. + On exit, SCALE is overwritten with scl , the scaling factor + for the sum of squares. + + SUMSQ (input/output) REAL + On entry, the value sumsq in the equation above. + On exit, SUMSQ is overwritten with smsq , the basic sum of + squares from which scl has been factored out. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.f) { + absxi = (r__1 = x[ix], dabs(r__1)); + if (*scale < absxi) { +/* Computing 2nd power */ + r__1 = *scale / absxi; + *sumsq = *sumsq * (r__1 * r__1) + 1; + *scale = absxi; + } else { +/* Computing 2nd power */ + r__1 = absxi / *scale; + *sumsq += r__1 * r__1; + } + } +/* L10: */ + } + } + return 0; + +/* End of SLASSQ */ + +} /* slassq_ */ + +/* Subroutine */ int slasv2_(real *f, real *g, real *h__, real *ssmin, real * + ssmax, real *snr, real *csr, real *snl, real *csl) +{ + /* System generated locals */ + real r__1; + + /* Local variables */ + static real a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, clt, + crt, slt, srt; + static integer pmax; + static real temp; + static logical swap; + static real tsign; + static logical gasmal; + extern doublereal slamch_(char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLASV2 computes the singular value decomposition of a 2-by-2 + triangular matrix + [ F G ] + [ 0 H ]. + On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the + smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and + right singular vectors for abs(SSMAX), giving the decomposition + + [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] + [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. + + Arguments + ========= + + F (input) REAL + The (1,1) element of the 2-by-2 matrix. + + G (input) REAL + The (1,2) element of the 2-by-2 matrix. + + H (input) REAL + The (2,2) element of the 2-by-2 matrix. + + SSMIN (output) REAL + abs(SSMIN) is the smaller singular value. + + SSMAX (output) REAL + abs(SSMAX) is the larger singular value. + + SNL (output) REAL + CSL (output) REAL + The vector (CSL, SNL) is a unit left singular vector for the + singular value abs(SSMAX). + + SNR (output) REAL + CSR (output) REAL + The vector (CSR, SNR) is a unit right singular vector for the + singular value abs(SSMAX). + + Further Details + =============== + + Any input parameter may be aliased with any output parameter. + + Barring over/underflow and assuming a guard digit in subtraction, all + output quantities are correct to within a few units in the last + place (ulps). + + In IEEE arithmetic, the code works correctly if one matrix element is + infinite. + + Overflow will not occur unless the largest singular value itself + overflows or is within a few ulps of overflow. (On machines with + partial overflow, like the Cray, overflow may occur if the largest + singular value is within a factor of 2 of overflow.) + + Underflow is harmless if underflow is gradual. Otherwise, results + may correspond to a matrix modified by perturbations of size near + the underflow threshold. + + ===================================================================== +*/ + + + ft = *f; + fa = dabs(ft); + ht = *h__; + ha = dabs(*h__); + +/* + PMAX points to the maximum absolute element of matrix + PMAX = 1 if F largest in absolute values + PMAX = 2 if G largest in absolute values + PMAX = 3 if H largest in absolute values +*/ + + pmax = 1; + swap = ha > fa; + if (swap) { + pmax = 3; + temp = ft; + ft = ht; + ht = temp; + temp = fa; + fa = ha; + ha = temp; + +/* Now FA .ge. HA */ + + } + gt = *g; + ga = dabs(gt); + if (ga == 0.f) { + +/* Diagonal matrix */ + + *ssmin = ha; + *ssmax = fa; + clt = 1.f; + crt = 1.f; + slt = 0.f; + srt = 0.f; + } else { + gasmal = TRUE_; + if (ga > fa) { + pmax = 2; + if (fa / ga < slamch_("EPS")) { + +/* Case of very large GA */ + + gasmal = FALSE_; + *ssmax = ga; + if (ha > 1.f) { + *ssmin = fa / (ga / ha); + } else { + *ssmin = fa / ga * ha; + } + clt = 1.f; + slt = ht / gt; + srt = 1.f; + crt = ft / gt; + } + } + if (gasmal) { + +/* Normal case */ + + d__ = fa - ha; + if (d__ == fa) { + +/* Copes with infinite F or H */ + + l = 1.f; + } else { + l = d__ / fa; + } + +/* Note that 0 .le. L .le. 1 */ + + m = gt / ft; + +/* Note that abs(M) .le. 1/macheps */ + + t = 2.f - l; + +/* Note that T .ge. 1 */ + + mm = m * m; + tt = t * t; + s = sqrt(tt + mm); + +/* Note that 1 .le. S .le. 1 + 1/macheps */ + + if (l == 0.f) { + r__ = dabs(m); + } else { + r__ = sqrt(l * l + mm); + } + +/* Note that 0 .le. R .le. 1 + 1/macheps */ + + a = (s + r__) * .5f; + +/* Note that 1 .le. A .le. 1 + abs(M) */ + + *ssmin = ha / a; + *ssmax = fa * a; + if (mm == 0.f) { + +/* Note that M is very tiny */ + + if (l == 0.f) { + t = r_sign(&c_b3178, &ft) * r_sign(&c_b15, >); + } else { + t = gt / r_sign(&d__, &ft) + m / t; + } + } else { + t = (m / (s + t) + m / (r__ + l)) * (a + 1.f); + } + l = sqrt(t * t + 4.f); + crt = 2.f / l; + srt = t / l; + clt = (crt + srt * m) / a; + slt = ht / ft * srt / a; + } + } + if (swap) { + *csl = srt; + *snl = crt; + *csr = slt; + *snr = clt; + } else { + *csl = clt; + *snl = slt; + *csr = crt; + *snr = srt; + } + +/* Correct signs of SSMAX and SSMIN */ + + if (pmax == 1) { + tsign = r_sign(&c_b15, csr) * r_sign(&c_b15, csl) * r_sign(&c_b15, f); + } + if (pmax == 2) { + tsign = r_sign(&c_b15, snr) * r_sign(&c_b15, csl) * r_sign(&c_b15, g); + } + if (pmax == 3) { + tsign = r_sign(&c_b15, snr) * r_sign(&c_b15, snl) * r_sign(&c_b15, + h__); + } + *ssmax = r_sign(ssmax, &tsign); + r__1 = tsign * r_sign(&c_b15, f) * r_sign(&c_b15, h__); + *ssmin = r_sign(ssmin, &r__1); + return 0; + +/* End of SLASV2 */ + +} /* slasv2_ */ + +/* Subroutine */ int slaswp_(integer *n, real *a, integer *lda, integer *k1, + integer *k2, integer *ipiv, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + static real temp; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLASWP performs a series of row interchanges on the matrix A. + One row interchange is initiated for each of rows K1 through K2 of A. + + Arguments + ========= + + N (input) INTEGER + The number of columns of the matrix A. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the matrix of column dimension N to which the row + interchanges will be applied. + On exit, the permuted matrix. + + LDA (input) INTEGER + The leading dimension of the array A. + + K1 (input) INTEGER + The first element of IPIV for which a row interchange will + be done. + + K2 (input) INTEGER + The last element of IPIV for which a row interchange will + be done. + + IPIV (input) INTEGER array, dimension (K2*abs(INCX)) + The vector of pivot indices. Only the elements in positions + K1 through K2 of IPIV are accessed. + IPIV(K) = L implies rows K and L are to be interchanged. + + INCX (input) INTEGER + The increment between successive values of IPIV. If IPIV + is negative, the pivots are applied in reverse order. + + Further Details + =============== + + Modified by + R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA + + ===================================================================== + + + Interchange row I with row IPIV(I) for each of rows K1 through K2. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = (1 - *k2) * *incx + 1; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) + { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L10: */ + } + } + ix += *incx; +/* L20: */ + } +/* L30: */ + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L40: */ + } + } + ix += *incx; +/* L50: */ + } + } + + return 0; + +/* End of SLASWP */ + +} /* slaswp_ */ + +/* Subroutine */ int slasy2_(logical *ltranl, logical *ltranr, integer *isgn, + integer *n1, integer *n2, real *tl, integer *ldtl, real *tr, integer * + ldtr, real *b, integer *ldb, real *scale, real *x, integer *ldx, real + *xnorm, integer *info) +{ + /* Initialized data */ + + static integer locu12[4] = { 3,4,1,2 }; + static integer locl21[4] = { 2,1,4,3 }; + static integer locu22[4] = { 4,3,2,1 }; + static logical xswpiv[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; + static logical bswpiv[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; + + /* System generated locals */ + integer b_dim1, b_offset, tl_dim1, tl_offset, tr_dim1, tr_offset, x_dim1, + x_offset; + real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8; + + /* Local variables */ + static integer i__, j, k; + static real x2[2], l21, u11, u12; + static integer ip, jp; + static real u22, t16[16] /* was [4][4] */, gam, bet, eps, sgn, tmp[4], + tau1, btmp[4], smin; + static integer ipiv; + static real temp; + static integer jpiv[4]; + static real xmax; + static integer ipsv, jpsv; + static logical bswap; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), sswap_(integer *, real *, integer *, real *, integer * + ); + static logical xswap; + extern doublereal slamch_(char *); + extern integer isamax_(integer *, real *, integer *); + static real smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in + + op(TL)*X + ISGN*X*op(TR) = SCALE*B, + + where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or + -1. op(T) = T or T', where T' denotes the transpose of T. + + Arguments + ========= + + LTRANL (input) LOGICAL + On entry, LTRANL specifies the op(TL): + = .FALSE., op(TL) = TL, + = .TRUE., op(TL) = TL'. + + LTRANR (input) LOGICAL + On entry, LTRANR specifies the op(TR): + = .FALSE., op(TR) = TR, + = .TRUE., op(TR) = TR'. + + ISGN (input) INTEGER + On entry, ISGN specifies the sign of the equation + as described before. ISGN may only be 1 or -1. + + N1 (input) INTEGER + On entry, N1 specifies the order of matrix TL. + N1 may only be 0, 1 or 2. + + N2 (input) INTEGER + On entry, N2 specifies the order of matrix TR. + N2 may only be 0, 1 or 2. + + TL (input) REAL array, dimension (LDTL,2) + On entry, TL contains an N1 by N1 matrix. + + LDTL (input) INTEGER + The leading dimension of the matrix TL. LDTL >= max(1,N1). + + TR (input) REAL array, dimension (LDTR,2) + On entry, TR contains an N2 by N2 matrix. + + LDTR (input) INTEGER + The leading dimension of the matrix TR. LDTR >= max(1,N2). + + B (input) REAL array, dimension (LDB,2) + On entry, the N1 by N2 matrix B contains the right-hand + side of the equation. + + LDB (input) INTEGER + The leading dimension of the matrix B. LDB >= max(1,N1). + + SCALE (output) REAL + On exit, SCALE contains the scale factor. SCALE is chosen + less than or equal to 1 to prevent the solution overflowing. + + X (output) REAL array, dimension (LDX,2) + On exit, X contains the N1 by N2 solution. + + LDX (input) INTEGER + The leading dimension of the matrix X. LDX >= max(1,N1). + + XNORM (output) REAL + On exit, XNORM is the infinity-norm of the solution. + + INFO (output) INTEGER + On exit, INFO is set to + 0: successful exit. + 1: TL and TR have too close eigenvalues, so TL or + TR is perturbed to get a nonsingular equation. + NOTE: In the interests of speed, this routine does not + check the inputs for errors. + + ===================================================================== +*/ + + /* Parameter adjustments */ + tl_dim1 = *ldtl; + tl_offset = 1 + tl_dim1; + tl -= tl_offset; + tr_dim1 = *ldtr; + tr_offset = 1 + tr_dim1; + tr -= tr_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + + /* Function Body */ + +/* Do not check the input parameters for errors */ + + *info = 0; + +/* Quick return if possible */ + + if (*n1 == 0 || *n2 == 0) { + return 0; + } + +/* Set constants to control overflow */ + + eps = slamch_("P"); + smlnum = slamch_("S") / eps; + sgn = (real) (*isgn); + + k = *n1 + *n1 + *n2 - 2; + switch (k) { + case 1: goto L10; + case 2: goto L20; + case 3: goto L30; + case 4: goto L50; + } + +/* 1 by 1: TL11*X + SGN*X*TR11 = B11 */ + +L10: + tau1 = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + bet = dabs(tau1); + if (bet <= smlnum) { + tau1 = smlnum; + bet = smlnum; + *info = 1; + } + + *scale = 1.f; + gam = (r__1 = b[b_dim1 + 1], dabs(r__1)); + if (smlnum * gam > bet) { + *scale = 1.f / gam; + } + + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / tau1; + *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)); + return 0; + +/* + 1 by 2: + TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12] = [B11 B12] + [TR21 TR22] +*/ + +L20: + +/* + Computing MAX + Computing MAX +*/ + r__7 = (r__1 = tl[tl_dim1 + 1], dabs(r__1)), r__8 = (r__2 = tr[tr_dim1 + + 1], dabs(r__2)), r__7 = max(r__7,r__8), r__8 = (r__3 = tr[( + tr_dim1 << 1) + 1], dabs(r__3)), r__7 = max(r__7,r__8), r__8 = ( + r__4 = tr[tr_dim1 + 2], dabs(r__4)), r__7 = max(r__7,r__8), r__8 = + (r__5 = tr[(tr_dim1 << 1) + 2], dabs(r__5)); + r__6 = eps * dmax(r__7,r__8); + smin = dmax(r__6,smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranr) { + tmp[1] = sgn * tr[tr_dim1 + 2]; + tmp[2] = sgn * tr[(tr_dim1 << 1) + 1]; + } else { + tmp[1] = sgn * tr[(tr_dim1 << 1) + 1]; + tmp[2] = sgn * tr[tr_dim1 + 2]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[(b_dim1 << 1) + 1]; + goto L40; + +/* + 2 by 1: + op[TL11 TL12]*[X11] + ISGN* [X11]*TR11 = [B11] + [TL21 TL22] [X21] [X21] [B21] +*/ + +L30: +/* + Computing MAX + Computing MAX +*/ + r__7 = (r__1 = tr[tr_dim1 + 1], dabs(r__1)), r__8 = (r__2 = tl[tl_dim1 + + 1], dabs(r__2)), r__7 = max(r__7,r__8), r__8 = (r__3 = tl[( + tl_dim1 << 1) + 1], dabs(r__3)), r__7 = max(r__7,r__8), r__8 = ( + r__4 = tl[tl_dim1 + 2], dabs(r__4)), r__7 = max(r__7,r__8), r__8 = + (r__5 = tl[(tl_dim1 << 1) + 2], dabs(r__5)); + r__6 = eps * dmax(r__7,r__8); + smin = dmax(r__6,smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + if (*ltranl) { + tmp[1] = tl[(tl_dim1 << 1) + 1]; + tmp[2] = tl[tl_dim1 + 2]; + } else { + tmp[1] = tl[tl_dim1 + 2]; + tmp[2] = tl[(tl_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; +L40: + +/* + Solve 2 by 2 system using complete pivoting. + Set pivots less than SMIN to SMIN. +*/ + + ipiv = isamax_(&c__4, tmp, &c__1); + u11 = tmp[ipiv - 1]; + if (dabs(u11) <= smin) { + *info = 1; + u11 = smin; + } + u12 = tmp[locu12[ipiv - 1] - 1]; + l21 = tmp[locl21[ipiv - 1] - 1] / u11; + u22 = tmp[locu22[ipiv - 1] - 1] - u12 * l21; + xswap = xswpiv[ipiv - 1]; + bswap = bswpiv[ipiv - 1]; + if (dabs(u22) <= smin) { + *info = 1; + u22 = smin; + } + if (bswap) { + temp = btmp[1]; + btmp[1] = btmp[0] - l21 * temp; + btmp[0] = temp; + } else { + btmp[1] -= l21 * btmp[0]; + } + *scale = 1.f; + if (smlnum * 2.f * dabs(btmp[1]) > dabs(u22) || smlnum * 2.f * dabs(btmp[ + 0]) > dabs(u11)) { +/* Computing MAX */ + r__1 = dabs(btmp[0]), r__2 = dabs(btmp[1]); + *scale = .5f / dmax(r__1,r__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + } + x2[1] = btmp[1] / u22; + x2[0] = btmp[0] / u11 - u12 / u11 * x2[1]; + if (xswap) { + temp = x2[1]; + x2[1] = x2[0]; + x2[0] = temp; + } + x[x_dim1 + 1] = x2[0]; + if (*n1 == 1) { + x[(x_dim1 << 1) + 1] = x2[1]; + *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)) + (r__2 = x[(x_dim1 << 1) + + 1], dabs(r__2)); + } else { + x[x_dim1 + 2] = x2[1]; +/* Computing MAX */ + r__3 = (r__1 = x[x_dim1 + 1], dabs(r__1)), r__4 = (r__2 = x[x_dim1 + + 2], dabs(r__2)); + *xnorm = dmax(r__3,r__4); + } + return 0; + +/* + 2 by 2: + op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12] + [TL21 TL22] [X21 X22] [X21 X22] [TR21 TR22] [B21 B22] + + Solve equivalent 4 by 4 system using complete pivoting. + Set pivots less than SMIN to SMIN. +*/ + +L50: +/* Computing MAX */ + r__5 = (r__1 = tr[tr_dim1 + 1], dabs(r__1)), r__6 = (r__2 = tr[(tr_dim1 << + 1) + 1], dabs(r__2)), r__5 = max(r__5,r__6), r__6 = (r__3 = tr[ + tr_dim1 + 2], dabs(r__3)), r__5 = max(r__5,r__6), r__6 = (r__4 = + tr[(tr_dim1 << 1) + 2], dabs(r__4)); + smin = dmax(r__5,r__6); +/* Computing MAX */ + r__5 = smin, r__6 = (r__1 = tl[tl_dim1 + 1], dabs(r__1)), r__5 = max(r__5, + r__6), r__6 = (r__2 = tl[(tl_dim1 << 1) + 1], dabs(r__2)), r__5 = + max(r__5,r__6), r__6 = (r__3 = tl[tl_dim1 + 2], dabs(r__3)), r__5 + = max(r__5,r__6), r__6 = (r__4 = tl[(tl_dim1 << 1) + 2], dabs( + r__4)); + smin = dmax(r__5,r__6); +/* Computing MAX */ + r__1 = eps * smin; + smin = dmax(r__1,smlnum); + btmp[0] = 0.f; + scopy_(&c__16, btmp, &c__0, t16, &c__1); + t16[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + t16[5] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + t16[10] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + t16[15] = tl[(tl_dim1 << 1) + 2] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranl) { + t16[4] = tl[tl_dim1 + 2]; + t16[1] = tl[(tl_dim1 << 1) + 1]; + t16[14] = tl[tl_dim1 + 2]; + t16[11] = tl[(tl_dim1 << 1) + 1]; + } else { + t16[4] = tl[(tl_dim1 << 1) + 1]; + t16[1] = tl[tl_dim1 + 2]; + t16[14] = tl[(tl_dim1 << 1) + 1]; + t16[11] = tl[tl_dim1 + 2]; + } + if (*ltranr) { + t16[8] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[13] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[2] = sgn * tr[tr_dim1 + 2]; + t16[7] = sgn * tr[tr_dim1 + 2]; + } else { + t16[8] = sgn * tr[tr_dim1 + 2]; + t16[13] = sgn * tr[tr_dim1 + 2]; + t16[2] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[7] = sgn * tr[(tr_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; + btmp[2] = b[(b_dim1 << 1) + 1]; + btmp[3] = b[(b_dim1 << 1) + 2]; + +/* Perform elimination */ + + for (i__ = 1; i__ <= 3; ++i__) { + xmax = 0.f; + for (ip = i__; ip <= 4; ++ip) { + for (jp = i__; jp <= 4; ++jp) { + if ((r__1 = t16[ip + (jp << 2) - 5], dabs(r__1)) >= xmax) { + xmax = (r__1 = t16[ip + (jp << 2) - 5], dabs(r__1)); + ipsv = ip; + jpsv = jp; + } +/* L60: */ + } +/* L70: */ + } + if (ipsv != i__) { + sswap_(&c__4, &t16[ipsv - 1], &c__4, &t16[i__ - 1], &c__4); + temp = btmp[i__ - 1]; + btmp[i__ - 1] = btmp[ipsv - 1]; + btmp[ipsv - 1] = temp; + } + if (jpsv != i__) { + sswap_(&c__4, &t16[(jpsv << 2) - 4], &c__1, &t16[(i__ << 2) - 4], + &c__1); + } + jpiv[i__ - 1] = jpsv; + if ((r__1 = t16[i__ + (i__ << 2) - 5], dabs(r__1)) < smin) { + *info = 1; + t16[i__ + (i__ << 2) - 5] = smin; + } + for (j = i__ + 1; j <= 4; ++j) { + t16[j + (i__ << 2) - 5] /= t16[i__ + (i__ << 2) - 5]; + btmp[j - 1] -= t16[j + (i__ << 2) - 5] * btmp[i__ - 1]; + for (k = i__ + 1; k <= 4; ++k) { + t16[j + (k << 2) - 5] -= t16[j + (i__ << 2) - 5] * t16[i__ + ( + k << 2) - 5]; +/* L80: */ + } +/* L90: */ + } +/* L100: */ + } + if (dabs(t16[15]) < smin) { + t16[15] = smin; + } + *scale = 1.f; + if (smlnum * 8.f * dabs(btmp[0]) > dabs(t16[0]) || smlnum * 8.f * dabs( + btmp[1]) > dabs(t16[5]) || smlnum * 8.f * dabs(btmp[2]) > dabs( + t16[10]) || smlnum * 8.f * dabs(btmp[3]) > dabs(t16[15])) { +/* Computing MAX */ + r__1 = dabs(btmp[0]), r__2 = dabs(btmp[1]), r__1 = max(r__1,r__2), + r__2 = dabs(btmp[2]), r__1 = max(r__1,r__2), r__2 = dabs(btmp[ + 3]); + *scale = .125f / dmax(r__1,r__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + btmp[2] *= *scale; + btmp[3] *= *scale; + } + for (i__ = 1; i__ <= 4; ++i__) { + k = 5 - i__; + temp = 1.f / t16[k + (k << 2) - 5]; + tmp[k - 1] = btmp[k - 1] * temp; + for (j = k + 1; j <= 4; ++j) { + tmp[k - 1] -= temp * t16[k + (j << 2) - 5] * tmp[j - 1]; +/* L110: */ + } +/* L120: */ + } + for (i__ = 1; i__ <= 3; ++i__) { + if (jpiv[4 - i__ - 1] != 4 - i__) { + temp = tmp[4 - i__ - 1]; + tmp[4 - i__ - 1] = tmp[jpiv[4 - i__ - 1] - 1]; + tmp[jpiv[4 - i__ - 1] - 1] = temp; + } +/* L130: */ + } + x[x_dim1 + 1] = tmp[0]; + x[x_dim1 + 2] = tmp[1]; + x[(x_dim1 << 1) + 1] = tmp[2]; + x[(x_dim1 << 1) + 2] = tmp[3]; +/* Computing MAX */ + r__1 = dabs(tmp[0]) + dabs(tmp[2]), r__2 = dabs(tmp[1]) + dabs(tmp[3]); + *xnorm = dmax(r__1,r__2); + return 0; + +/* End of SLASY2 */ + +} /* slasy2_ */ + +/* Subroutine */ int slatrd_(char *uplo, integer *n, integer *nb, real *a, + integer *lda, real *e, real *tau, real *w, integer *ldw) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, iw; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + static real alpha; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *), saxpy_( + integer *, real *, real *, integer *, real *, integer *), ssymv_( + char *, integer *, real *, real *, integer *, real *, integer *, + real *, real *, integer *), slarfg_(integer *, real *, + real *, integer *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLATRD reduces NB rows and columns of a real symmetric matrix A to + symmetric tridiagonal form by an orthogonal similarity + transformation Q' * A * Q, and returns the matrices V and W which are + needed to apply the transformation to the unreduced part of A. + + If UPLO = 'U', SLATRD reduces the last NB rows and columns of a + matrix, of which the upper triangle is supplied; + if UPLO = 'L', SLATRD reduces the first NB rows and columns of a + matrix, of which the lower triangle is supplied. + + This is an auxiliary routine called by SSYTRD. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. + + NB (input) INTEGER + The number of rows and columns to be reduced. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit: + if UPLO = 'U', the last NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements above the diagonal + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors; + if UPLO = 'L', the first NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements below the diagonal + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= (1,N). + + E (output) REAL array, dimension (N-1) + If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal + elements of the last NB columns of the reduced matrix; + if UPLO = 'L', E(1:nb) contains the subdiagonal elements of + the first NB columns of the reduced matrix. + + TAU (output) REAL array, dimension (N-1) + The scalar factors of the elementary reflectors, stored in + TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. + See Further Details. + + W (output) REAL array, dimension (LDW,NB) + The n-by-nb matrix W required to update the unreduced part + of A. + + LDW (input) INTEGER + The leading dimension of the array W. LDW >= max(1,N). + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n) H(n-1) . . . H(n-nb+1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), + and tau in TAU(i-1). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), + and tau in TAU(i). + + The elements of the vectors v together form the n-by-nb matrix V + which is needed, with W, to apply the transformation to the unreduced + part of the matrix, using a symmetric rank-2k update of the form: + A := A - V*W' - W*V'. + + The contents of A on exit are illustrated by the following examples + with n = 5 and nb = 2: + + if UPLO = 'U': if UPLO = 'L': + + ( a a a v4 v5 ) ( d ) + ( a a v4 v5 ) ( 1 d ) + ( a 1 v5 ) ( v1 1 a ) + ( d 1 ) ( v1 v2 a a ) + ( d ) ( v1 v2 a a a ) + + where d denotes a diagonal element of the reduced matrix, a denotes + an element of the original matrix that is unchanged, and vi denotes + an element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --e; + --tau; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + + if (lsame_(uplo, "U")) { + +/* Reduce last NB columns of upper triangle */ + + i__1 = *n - *nb + 1; + for (i__ = *n; i__ >= i__1; --i__) { + iw = i__ - *n + *nb; + if (i__ < *n) { + +/* Update A(1:i,i) */ + + i__2 = *n - i__; + sgemv_("No transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & + c_b15, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + sgemv_("No transpose", &i__, &i__2, &c_b151, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b15, &a[i__ * a_dim1 + 1], &c__1); + } + if (i__ > 1) { + +/* + Generate elementary reflector H(i) to annihilate + A(1:i-2,i) +*/ + + i__2 = i__ - 1; + slarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 + + 1], &c__1, &tau[i__ - 1]); + e[i__ - 1] = a[i__ - 1 + i__ * a_dim1]; + a[i__ - 1 + i__ * a_dim1] = 1.f; + +/* Compute W(1:i-1,i) */ + + i__2 = i__ - 1; + ssymv_("Upper", &i__2, &c_b15, &a[a_offset], lda, &a[i__ * + a_dim1 + 1], &c__1, &c_b29, &w[iw * w_dim1 + 1], & + c__1); + if (i__ < *n) { + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, & + c_b29, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) + * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, & + c_b29, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &w[(iw + 1) + * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1); + } + i__2 = i__ - 1; + sscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + alpha = tau[i__ - 1] * -.5f * sdot_(&i__2, &w[iw * w_dim1 + 1] + , &c__1, &a[i__ * a_dim1 + 1], &c__1); + i__2 = i__ - 1; + saxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * + w_dim1 + 1], &c__1); + } + +/* L10: */ + } + } else { + +/* Reduce first NB columns of lower triangle */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:n,i) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1], + lda, &w[i__ + w_dim1], ldw, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + w_dim1], + ldw, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + if (i__ < *n) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:n,i) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + + i__ * a_dim1], &c__1, &tau[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Compute W(i+1:n,i) */ + + i__2 = *n - i__; + ssymv_("Lower", &i__2, &c_b15, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b29, &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &w[i__ + 1 + w_dim1] + , ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[ + i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[ + i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + 1 + + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + sscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + alpha = tau[i__] * -.5f * sdot_(&i__2, &w[i__ + 1 + i__ * + w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + } + +/* L20: */ + } + } + + return 0; + +/* End of SLATRD */ + +} /* slatrd_ */ + +/* Subroutine */ int slauu2_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + static real aii; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAUU2 computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the unblocked form of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAUU2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1], + lda, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("No transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + aii, &a[i__ * a_dim1 + 1], &c__1); + } else { + sscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } +/* L10: */ + } + + } else { + +/* Compute the product L' * L. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1], & + c__1, &a[i__ + i__ * a_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii, &a[ + i__ + a_dim1], lda); + } else { + sscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } +/* L20: */ + } + } + + return 0; + +/* End of SLAUU2 */ + +} /* slauu2_ */ + +/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, ib, nb; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static logical upper; + extern /* Subroutine */ int strmm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), ssyrk_(char *, char *, integer + *, integer *, real *, real *, integer *, real *, real *, integer * + ), slauu2_(char *, integer *, real *, integer *, + integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SLAUUM computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the blocked form of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAUUM", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + slauu2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, + &c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 + + 1], lda) + ; + slauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, & + c_b15, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ * + a_dim1 + 1], lda); + i__3 = *n - i__ - ib + 1; + ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &a[ + i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ + + i__ * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the product L' * L. */ + + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, & + c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], + lda); + slauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, & + c_b15, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + + ib + a_dim1], lda, &c_b15, &a[i__ + a_dim1], lda); + i__3 = *n - i__ - ib + 1; + ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a[i__ + + ib + i__ * a_dim1], lda, &c_b15, &a[i__ + i__ * + a_dim1], lda); + } +/* L20: */ + } + } + } + + return 0; + +/* End of SLAUUM */ + +} /* slauum_ */ + +/* Subroutine */ int sorg2r_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + slarf_(char *, integer *, integer *, real *, integer *, real *, + real *, integer *, real *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORG2R generates an m by n real matrix Q with orthonormal columns, + which is defined as the first n columns of a product of k elementary + reflectors of order m + + Q = H(1) H(2) . . . H(k) + + as returned by SGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by SGEQRF in the first k columns of its array + argument A. + On exit, the m-by-n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQRF. + + WORK (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORG2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns k+1:n to columns of the unit matrix */ + + i__1 = *n; + for (j = *k + 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.f; +/* L10: */ + } + a[j + j * a_dim1] = 1.f; +/* L20: */ + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the left */ + + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.f; + i__1 = *m - i__ + 1; + i__2 = *n - i__; + slarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + } + if (i__ < *m) { + i__1 = *m - i__; + r__1 = -tau[i__]; + sscal_(&i__1, &r__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + } + a[i__ + i__ * a_dim1] = 1.f - tau[i__]; + +/* Set A(1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[l + i__ * a_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of SORG2R */ + +} /* sorg2r_ */ + +/* Subroutine */ int sorgbr_(char *vect, integer *m, integer *n, integer *k, + real *a, integer *lda, real *tau, real *work, integer *lwork, integer + *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, mn; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical wantq; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), sorgqr_( + integer *, integer *, integer *, real *, integer *, real *, real * + , integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORGBR generates one of the real orthogonal matrices Q or P**T + determined by SGEBRD when reducing a real matrix A to bidiagonal + form: A = Q * B * P**T. Q and P**T are defined as products of + elementary reflectors H(i) or G(i) respectively. + + If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + is of order M: + if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n + columns of Q, where m >= n >= k; + if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an + M-by-M matrix. + + If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T + is of order N: + if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m + rows of P**T, where n >= m >= k; + if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as + an N-by-N matrix. + + Arguments + ========= + + VECT (input) CHARACTER*1 + Specifies whether the matrix Q or the matrix P**T is + required, as defined in the transformation applied by SGEBRD: + = 'Q': generate Q; + = 'P': generate P**T. + + M (input) INTEGER + The number of rows of the matrix Q or P**T to be returned. + M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q or P**T to be returned. + N >= 0. + If VECT = 'Q', M >= N >= min(M,K); + if VECT = 'P', N >= M >= min(N,K). + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original M-by-K + matrix reduced by SGEBRD. + If VECT = 'P', the number of rows in the original K-by-N + matrix reduced by SGEBRD. + K >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by SGEBRD. + On exit, the M-by-N matrix Q or P**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (input) REAL array, dimension + (min(M,K)) if VECT = 'Q' + (min(N,K)) if VECT = 'P' + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i), which determines Q or P**T, as + returned by SGEBRD in its array argument TAUQ or TAUP. + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,min(M,N)). + For optimum performance LWORK >= min(M,N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(vect, "Q"); + mn = min(*m,*n); + lquery = *lwork == -1; + if (! wantq && ! lsame_(vect, "P")) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( + *m > *n || *m < min(*n,*k))) { + *info = -3; + } else if (*k < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*lwork < max(1,mn) && ! lquery) { + *info = -9; + } + + if (*info == 0) { + if (wantq) { + nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } + lwkopt = max(1,mn) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORGBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + work[1] = 1.f; + return 0; + } + + if (wantq) { + +/* + Form Q, determined by a call to SGEBRD to reduce an m-by-k + matrix +*/ + + if (*m >= *k) { + +/* If m >= k, assume m >= n >= k */ + + sorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If m < k, assume m = n + + Shift the vectors which define the elementary reflectors one + column to the right, and set the first row and column of Q + to those of the unit matrix +*/ + + for (j = *m; j >= 2; --j) { + a[j * a_dim1 + 1] = 0.f; + i__1 = *m; + for (i__ = j + 1; i__ <= i__1; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L10: */ + } +/* L20: */ + } + a[a_dim1 + 1] = 1.f; + i__1 = *m; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.f; +/* L30: */ + } + if (*m > 1) { + +/* Form Q(2:m,2:m) */ + + i__1 = *m - 1; + i__2 = *m - 1; + i__3 = *m - 1; + sorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } else { + +/* + Form P', determined by a call to SGEBRD to reduce a k-by-n + matrix +*/ + + if (*k < *n) { + +/* If k < n, assume k <= m <= n */ + + sorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If k >= n, assume m = n + + Shift the vectors which define the elementary reflectors one + row downward, and set the first row and column of P' to + those of the unit matrix +*/ + + a[a_dim1 + 1] = 1.f; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.f; +/* L40: */ + } + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + for (i__ = j - 1; i__ >= 2; --i__) { + a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1]; +/* L50: */ + } + a[j * a_dim1 + 1] = 0.f; +/* L60: */ + } + if (*n > 1) { + +/* Form P'(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + sorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORGBR */ + +} /* sorgbr_ */ + +/* Subroutine */ int sorghr_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, nb, nh, iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORGHR generates a real orthogonal matrix Q which is defined as the + product of IHI-ILO elementary reflectors of order N, as returned by + SGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + N (input) INTEGER + The order of the matrix Q. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of SGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by SGEHRD. + On exit, the N-by-N orthogonal matrix Q. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (input) REAL array, dimension (N-1) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEHRD. + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= IHI-ILO. + For optimum performance LWORK >= (IHI-ILO)*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,nh) && ! lquery) { + *info = -8; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "SORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = max(1,nh) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORGHR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.f; + return 0; + } + +/* + Shift the vectors which define the elementary reflectors one + column to the right, and set the first ilo and the last n-ihi + rows and columns to those of the unit matrix +*/ + + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L10: */ + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L20: */ + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L50: */ + } + a[j + j * a_dim1] = 1.f; +/* L60: */ + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L70: */ + } + a[j + j * a_dim1] = 1.f; +/* L80: */ + } + + if (nh > 0) { + +/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ + + sorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* + ilo], &work[1], lwork, &iinfo); + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORGHR */ + +} /* sorghr_ */ + +/* Subroutine */ int sorgl2_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + slarf_(char *, integer *, integer *, real *, integer *, real *, + real *, integer *, real *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORGL2 generates an m by n real matrix Q with orthonormal rows, + which is defined as the first m rows of a product of k elementary + reflectors of order n + + Q = H(k) . . . H(2) H(1) + + as returned by SGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by SGELQF in the first k rows of its array argument A. + On exit, the m-by-n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGELQF. + + WORK (workspace) REAL array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORGL2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows k+1:m to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = *k + 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.f; +/* L10: */ + } + if (j > *k && j <= *m) { + a[j + j * a_dim1] = 1.f; + } +/* L20: */ + } + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the right */ + + if (i__ < *n) { + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.f; + i__1 = *m - i__; + i__2 = *n - i__ + 1; + slarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & + tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__1 = *n - i__; + r__1 = -tau[i__]; + sscal_(&i__1, &r__1, &a[i__ + (i__ + 1) * a_dim1], lda); + } + a[i__ + i__ * a_dim1] = 1.f - tau[i__]; + +/* Set A(i,1:i-1) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[i__ + l * a_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of SORGL2 */ + +} /* sorgl2_ */ + +/* Subroutine */ int sorglq_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int sorgl2_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *), slarfb_(char *, char *, + char *, char *, integer *, integer *, integer *, real *, integer * + , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORGLQ generates an M-by-N real matrix Q with orthonormal rows, + which is defined as the first M rows of a product of K elementary + reflectors of order N + + Q = H(k) . . . H(2) H(1) + + as returned by SGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by SGELQF in the first k rows of its array argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGELQF. + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*m) * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORGLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "SORGLQ", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SORGLQ", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk rows are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(kk+1:m,1:kk) to zero. */ + + i__1 = kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = kk + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *m) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + sorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *n - i__ + 1; + slarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i+ib:m,i:n) from the right */ + + i__2 = *m - i__ - ib + 1; + i__3 = *n - i__ + 1; + slarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, & + i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } + +/* Apply H' to columns i:n of current block */ + + i__2 = *n - i__ + 1; + sorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set columns 1:i-1 of current block to zero */ + + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + ib - 1; + for (l = i__; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (real) iws; + return 0; + +/* End of SORGLQ */ + +} /* sorglq_ */ + +/* Subroutine */ int sorgqr_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *), slarfb_(char *, char *, + char *, char *, integer *, integer *, integer *, real *, integer * + , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORGQR generates an M-by-N real matrix Q with orthonormal columns, + which is defined as the first N columns of a product of K elementary + reflectors of order M + + Q = H(1) H(2) . . . H(k) + + as returned by SGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by SGEQRF in the first k columns of its array + argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQRF. + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*n) * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORGQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "SORGQR", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SORGQR", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk columns are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(1:kk,kk+1:n) to zero. */ + + i__1 = *n; + for (j = kk + 1; j <= i__1; ++j) { + i__2 = kk; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *n) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + sorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *m - i__ + 1; + slarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i:m,i+ib:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__ - ib + 1; + slarfb_("Left", "No transpose", "Forward", "Columnwise", & + i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & + work[ib + 1], &ldwork); + } + +/* Apply H to rows i:m of current block */ + + i__2 = *m - i__ + 1; + sorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set rows 1:i-1 of current block to zero */ + + i__2 = i__ + ib - 1; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ - 1; + for (l = 1; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (real) iws; + return 0; + +/* End of SORGQR */ + +} /* sorgqr_ */ + +/* Subroutine */ int sorm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, mi, ni, nq; + static real aii; + static logical left; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORM2L overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by SGEQLF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQLF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) REAL array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORM2L", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) */ + + aii = a[nq - *k + i__ + i__ * a_dim1]; + a[nq - *k + i__ + i__ * a_dim1] = 1.f; + slarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[ + c_offset], ldc, &work[1]); + a[nq - *k + i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of SORM2L */ + +} /* sorm2l_ */ + +/* Subroutine */ int sorm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static real aii; + static logical left; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORM2R overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQRF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) REAL array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORM2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of SORM2R */ + +} /* sorm2r_ */ + +/* Subroutine */ int sormbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, + integer *ldc, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran, applyq; + static char transt[1]; + extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': P * C C * P + TRANS = 'T': P**T * C C * P**T + + Here Q and P**T are the orthogonal matrices determined by SGEBRD when + reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and + P**T are defined as products of elementary reflectors H(i) and G(i) + respectively. + + Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + order of the orthogonal matrix Q or P**T that is applied. + + If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + if nq >= k, Q = H(1) H(2) . . . H(k); + if nq < k, Q = H(1) H(2) . . . H(nq-1). + + If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + if k < nq, P = G(1) G(2) . . . G(k); + if k >= nq, P = G(1) G(2) . . . G(nq-1). + + Arguments + ========= + + VECT (input) CHARACTER*1 + = 'Q': apply Q or Q**T; + = 'P': apply P or P**T. + + SIDE (input) CHARACTER*1 + = 'L': apply Q, Q**T, P or P**T from the Left; + = 'R': apply Q, Q**T, P or P**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q or P; + = 'T': Transpose, apply Q**T or P**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original + matrix reduced by SGEBRD. + If VECT = 'P', the number of rows in the original + matrix reduced by SGEBRD. + K >= 0. + + A (input) REAL array, dimension + (LDA,min(nq,K)) if VECT = 'Q' + (LDA,nq) if VECT = 'P' + The vectors which define the elementary reflectors H(i) and + G(i), whose products determine the matrices Q and P, as + returned by SGEBRD. + + LDA (input) INTEGER + The leading dimension of the array A. + If VECT = 'Q', LDA >= max(1,nq); + if VECT = 'P', LDA >= max(1,min(nq,K)). + + TAU (input) REAL array, dimension (min(nq,K)) + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i) which determines Q or P, as returned + by SGEBRD in the array argument TAUQ or TAUP. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q + or P*C or P**T*C or C*P or C*P**T. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + applyq = lsame_(vect, "Q"); + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! applyq && ! lsame_(vect, "P")) { + *info = -1; + } else if (! left && ! lsame_(side, "R")) { + *info = -2; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*k < 0) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = min(nq,*k); + if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + } + + if (*info == 0) { + if (applyq) { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "SORMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "SORMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORMBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + work[1] = 1.f; + if (*m == 0 || *n == 0) { + return 0; + } + + if (applyq) { + +/* Apply Q */ + + if (nq >= *k) { + +/* Q was determined by a call to SGEBRD with nq >= k */ + + sormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* Q was determined by a call to SGEBRD with nq < k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + sormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] + , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + } else { + +/* Apply P */ + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + if (nq > *k) { + +/* P was determined by a call to SGEBRD with nq > k */ + + sormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* P was determined by a call to SGEBRD with nq <= k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + sormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, + &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & + iinfo); + } + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORMBR */ + +} /* sormbr_ */ + +/* Subroutine */ int sormhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real * + c__, integer *ldc, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, nh, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORMHR overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + IHI-ILO elementary reflectors, as returned by SGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of SGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and + ILO = 1 and IHI = 0, if M = 0; + if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and + ILO = 1 and IHI = 0, if N = 0. + + A (input) REAL array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by SGEHRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) REAL array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEHRD. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + left = lsame_(side, "L"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ilo < 1 || *ilo > max(1,nq)) { + *info = -5; + } else if (*ihi < min(*ilo,nq) || *ihi > nq) { + *info = -6; + } else if (*lda < max(1,nq)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + + if (*info == 0) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "SORMQR", ch__1, &nh, n, &nh, &c_n1, (ftnlen) + 6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &nh, &nh, &c_n1, (ftnlen) + 6, (ftnlen)2); + } + lwkopt = max(1,nw) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("SORMHR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nh == 0) { + work[1] = 1.f; + return 0; + } + + if (left) { + mi = nh; + ni = *n; + i1 = *ilo + 1; + i2 = 1; + } else { + mi = *m; + ni = nh; + i1 = 1; + i2 = *ilo + 1; + } + + sormqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, & + tau[*ilo], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + + work[1] = (real) lwkopt; + return 0; + +/* End of SORMHR */ + +} /* sormhr_ */ + +/* Subroutine */ int sorml2_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static real aii; + static logical left; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORML2 overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGELQF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) REAL array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORML2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of SORML2 */ + +} /* sorml2_ */ + +/* Subroutine */ int sormlq_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static real t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int sorml2_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *), slarfb_(char *, char *, char *, char * + , integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static logical notran; + static integer ldwork; + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORMLQ overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGELQF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "SORMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORMLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "SORMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + sorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + slarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], + lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + slarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], + ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORMLQ */ + +} /* sormlq_ */ + +/* Subroutine */ int sormql_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static real t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int sorm2l_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *), slarfb_(char *, char *, char *, char * + , integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORMQL overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQLF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + + if (*info == 0) { + if (*m == 0 || *n == 0) { + lwkopt = 1; + } else { + +/* + Determine the block size. NB may be at most NBMAX, where + NBMAX is used to define the local array T. + + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQL", ch__1, m, n, k, &c_n1, + (ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = nw * nb; + } + work[1] = (real) lwkopt; + + if (*lwork < nw && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORMQL", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + sorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i+ib-1) . . . H(i+1) H(i) +*/ + + i__4 = nq - *k + i__ + ib - 1; + slarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] + , lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H' */ + + slarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ + i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & + work[1], &ldwork); +/* L10: */ + } + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORMQL */ + +} /* sormql_ */ + +/* Subroutine */ int sormqr_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static real t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *), slarfb_(char *, char *, char *, char * + , integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORMQR overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQRF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORMQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + sorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + slarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], t, &c__65) + ; + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + slarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * + c_dim1], ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORMQR */ + +} /* sormqr_ */ + +/* Subroutine */ int sormtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sormql_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SORMTR overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + nq-1 elementary reflectors, as returned by SSYTRD: + + if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + + if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A contains elementary reflectors + from SSYTRD; + = 'L': Lower triangle of A contains elementary reflectors + from SSYTRD. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + A (input) REAL array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by SSYTRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) REAL array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SSYTRD. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + if (upper) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "SORMQL", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "SORMQL", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("SORMTR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nq == 1) { + work[1] = 1.f; + return 0; + } + + if (left) { + mi = *m - 1; + ni = *n; + } else { + mi = *m; + ni = *n - 1; + } + + if (upper) { + +/* Q was determined by a call to SSYTRD with UPLO = 'U' */ + + i__2 = nq - 1; + sormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & + tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); + } else { + +/* Q was determined by a call to SSYTRD with UPLO = 'L' */ + + if (left) { + i1 = 2; + i2 = 1; + } else { + i1 = 1; + i2 = 2; + } + i__2 = nq - 1; + sormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & + c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORMTR */ + +} /* sormtr_ */ + +/* Subroutine */ int spotf2_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + + /* Local variables */ + static integer j; + static real ajj; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern logical sisnan_(real *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SPOTF2 computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U' * U , if UPLO = 'U', or + A = L * L', if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the unblocked version of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored. + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n by n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U'*U or A = L*L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, the leading minor of order k is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j * a_dim1 + 1], &c__1, + &a[j * a_dim1 + 1], &c__1); + if (ajj <= 0.f || sisnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + i__3 = *n - j; + sgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(j + 1) * + a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b15, & + a[j + (j + 1) * a_dim1], lda); + i__2 = *n - j; + r__1 = 1.f / ajj; + sscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j + a_dim1], lda, &a[j + + a_dim1], lda); + if (ajj <= 0.f || sisnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = *n - j; + i__3 = j - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[j + 1 + + a_dim1], lda, &a[j + a_dim1], lda, &c_b15, &a[j + 1 + + j * a_dim1], &c__1); + i__2 = *n - j; + r__1 = 1.f / ajj; + sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of SPOTF2 */ + +} /* spotf2_ */ + +/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer j, jb, nb; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static logical upper; + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), ssyrk_(char *, char *, integer + *, integer *, real *, real *, integer *, real *, real *, integer * + ), spotf2_(char *, integer *, real *, integer *, + integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SPOTRF computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U**T * U, if UPLO = 'U', or + A = L * L**T, if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the block version of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + spotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + ssyrk_("Upper", "Transpose", &jb, &i__3, &c_b151, &a[j * + a_dim1 + 1], lda, &c_b15, &a[j + j * a_dim1], lda); + spotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + sgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, & + c_b151, &a[j * a_dim1 + 1], lda, &a[(j + jb) * + a_dim1 + 1], lda, &c_b15, &a[j + (j + jb) * + a_dim1], lda); + i__3 = *n - j - jb + 1; + strsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & + i__3, &c_b15, &a[j + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + ssyrk_("Lower", "No transpose", &jb, &i__3, &c_b151, &a[j + + a_dim1], lda, &c_b15, &a[j + j * a_dim1], lda); + spotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, & + c_b151, &a[j + jb + a_dim1], lda, &a[j + a_dim1], + lda, &c_b15, &a[j + jb + j * a_dim1], lda); + i__3 = *n - j - jb + 1; + strsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & + jb, &c_b15, &a[j + j * a_dim1], lda, &a[j + jb + + j * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of SPOTRF */ + +} /* spotrf_ */ + +/* Subroutine */ int spotri_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *), slauum_( + char *, integer *, real *, integer *, integer *), strtri_( + char *, char *, integer *, real *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SPOTRI computes the inverse of a real symmetric positive definite + matrix A using the Cholesky factorization A = U**T*U or A = L*L**T + computed by SPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the triangular factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T, as computed by + SPOTRF. + On exit, the upper or lower triangle of the (symmetric) + inverse of A, overwriting the input factor U or L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the (i,i) element of the factor U or L is + zero, and the inverse could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + strtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + +/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ + + slauum_(uplo, n, &a[a_offset], lda, info); + + return 0; + +/* End of SPOTRI */ + +} /* spotri_ */ + +/* Subroutine */ int spotrs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SPOTRS solves a system of linear equations A*X = B with a symmetric + positive definite matrix A using the Cholesky factorization + A = U**T*U or A = L*L**T computed by SPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) REAL array, dimension (LDA,N) + The triangular factor U or L from the Cholesky factorization + A = U**T*U or A = L*L**T, as computed by SPOTRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) REAL array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* + Solve A*X = B where A = U'*U. + + Solve U'*X = B, overwriting B with X. +*/ + + strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A*X = B where A = L*L'. + + Solve L*X = B, overwriting B with X. +*/ + + strsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + strsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + } + + return 0; + +/* End of SPOTRS */ + +} /* spotrs_ */ + +/* Subroutine */ int sstedc_(char *compz, integer *n, real *d__, real *e, + real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + static integer i__, j, k, m; + static real p; + static integer ii, lgn; + static real eps, tiny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer lwmin, start; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), slaed0_(integer *, integer *, integer *, real *, real + *, real *, integer *, real *, integer *, real *, integer *, + integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer finish; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, + real *, integer *), slaset_(char *, integer *, integer *, + real *, real *, real *, integer *); + static integer liwmin, icompz; + static real orgnrm; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *), + slasrt_(char *, integer *, real *, integer *); + static logical lquery; + static integer smlsiz; + extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, + real *, integer *, real *, integer *); + static integer storez, strtrw; + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SSTEDC computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + The eigenvectors of a full or band real symmetric matrix can also be + found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this + matrix to tridiagonal form. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. See SLAED3 for details. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'I': Compute eigenvectors of tridiagonal matrix also. + = 'V': Compute eigenvectors of original dense symmetric + matrix also. On entry, Z contains the orthogonal + matrix used to reduce the original matrix to + tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the subdiagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Z (input/output) REAL array, dimension (LDZ,N) + On entry, if COMPZ = 'V', then Z contains the orthogonal + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original symmetric matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1. + If eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. + If COMPZ = 'V' and N > 1 then LWORK must be at least + ( 1 + 3*N + 2*N*lg N + 3*N**2 ), + where lg( N ) = smallest integer k such + that 2**k >= N. + If COMPZ = 'I' and N > 1 then LWORK must be at least + ( 1 + 4*N + N**2 ). + Note that for COMPZ = 'I' or 'V', then if N is less than or + equal to the minimum divide size, usually 25, then LWORK need + only be max(1,2*(N-1)). + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. + If COMPZ = 'V' and N > 1 then LIWORK must be at least + ( 6 + 6*N + 5*N*lg N ). + If COMPZ = 'I' and N > 1 then LIWORK must be at least + ( 3 + 5*N ). + Note that for COMPZ = 'I' or 'V', then if N is less than or + equal to the minimum divide size, usually 25, then LIWORK + need only be 1. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the IWORK array, + returns this value as the first entry of the IWORK array, and + no error message related to LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + + smlsiz = ilaenv_(&c__9, "SSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + if (*n <= 1 || icompz == 0) { + liwmin = 1; + lwmin = 1; + } else if (*n <= smlsiz) { + liwmin = 1; + lwmin = *n - 1 << 1; + } else { + lgn = (integer) (log((real) (*n)) / log(2.f)); + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (icompz == 1) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; + liwmin = *n * 6 + 6 + *n * 5 * lgn; + } else if (icompz == 2) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = (*n << 2) + 1 + i__1 * i__1; + liwmin = *n * 5 + 3; + } + } + work[1] = (real) lwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SSTEDC", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*n == 1) { + if (icompz != 0) { + z__[z_dim1 + 1] = 1.f; + } + return 0; + } + +/* + If the following conditional clause is removed, then the routine + will use the Divide and Conquer routine to compute only the + eigenvalues, which requires (3N + 3N**2) real workspace and + (2 + 5N + 2N lg(N)) integer workspace. + Since on many architectures SSTERF is much faster than any other + algorithm for finding eigenvalues only, it is used here + as the default. If the conditional clause is removed, then + information on the size of workspace needs to be changed. + + If COMPZ = 'N', use SSTERF to compute the eigenvalues. +*/ + + if (icompz == 0) { + ssterf_(n, &d__[1], &e[1], info); + goto L50; + } + +/* + If N is smaller than the minimum divide size (SMLSIZ+1), then + solve the problem with another solver. +*/ + + if (*n <= smlsiz) { + + ssteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info); + + } else { + +/* + If COMPZ = 'V', the Z matrix must be stored elsewhere for later + use. +*/ + + if (icompz == 1) { + storez = *n * *n + 1; + } else { + storez = 1; + } + + if (icompz == 2) { + slaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz); + } + +/* Scale. */ + + orgnrm = slanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.f) { + goto L50; + } + + eps = slamch_("Epsilon"); + + start = 1; + +/* while ( START <= N ) */ + +L10: + if (start <= *n) { + +/* + Let FINISH be the position of the next subdiagonal entry + such that E( FINISH ) <= TINY or FINISH = N if no such + subdiagonal exists. The matrix identified by the elements + between START and FINISH constitutes an independent + sub-problem. +*/ + + finish = start; +L20: + if (finish < *n) { + tiny = eps * sqrt((r__1 = d__[finish], dabs(r__1))) * sqrt(( + r__2 = d__[finish + 1], dabs(r__2))); + if ((r__1 = e[finish], dabs(r__1)) > tiny) { + ++finish; + goto L20; + } + } + +/* (Sub) Problem determined. Compute its size and solve it. */ + + m = finish - start + 1; + if (m == 1) { + start = finish + 1; + goto L10; + } + if (m > smlsiz) { + +/* Scale. */ + + orgnrm = slanst_("M", &m, &d__[start], &e[start]); + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &m, &c__1, &d__[ + start], &m, info); + i__1 = m - 1; + i__2 = m - 1; + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &i__1, &c__1, &e[ + start], &i__2, info); + + if (icompz == 1) { + strtrw = 1; + } else { + strtrw = start; + } + slaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw + + start * z_dim1], ldz, &work[1], n, &work[storez], & + iwork[1], info); + if (*info != 0) { + *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % + (m + 1) + start - 1; + goto L50; + } + +/* Scale back. */ + + slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &m, &c__1, &d__[ + start], &m, info); + + } else { + if (icompz == 1) { + +/* + Since QR won't update a Z matrix which is larger than + the length of D, we must solve the sub-problem in a + workspace and then multiply back into Z. +*/ + + ssteqr_("I", &m, &d__[start], &e[start], &work[1], &m, & + work[m * m + 1], info); + slacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[ + storez], n); + sgemm_("N", "N", n, &m, &m, &c_b15, &work[storez], n, & + work[1], &m, &c_b29, &z__[start * z_dim1 + 1], + ldz); + } else if (icompz == 2) { + ssteqr_("I", &m, &d__[start], &e[start], &z__[start + + start * z_dim1], ldz, &work[1], info); + } else { + ssterf_(&m, &d__[start], &e[start], info); + } + if (*info != 0) { + *info = start * (*n + 1) + finish; + goto L50; + } + } + + start = finish + 1; + goto L10; + } + +/* + endwhile + + If the problem split any number of times, then the eigenvalues + will not be properly ordered. Here we permute the eigenvalues + (and the associated eigenvectors) into ascending order. +*/ + + if (m != *n) { + if (icompz == 0) { + +/* Use Quick Sort */ + + slasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L30: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * + z_dim1 + 1], &c__1); + } +/* L40: */ + } + } + } + } + +L50: + work[1] = (real) lwmin; + iwork[1] = liwmin; + + return 0; + +/* End of SSTEDC */ + +} /* sstedc_ */ + +/* Subroutine */ int ssteqr_(char *compz, integer *n, real *d__, real *e, + real *z__, integer *ldz, real *work, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + static real b, c__, f, g; + static integer i__, j, k, l, m; + static real p, r__, s; + static integer l1, ii, mm, lm1, mm1, nm1; + static real rt1, rt2, eps; + static integer lsv; + static real tst, eps2; + static integer lend, jtot; + extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *) + ; + extern logical lsame_(char *, char *); + static real anorm; + extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, + integer *, real *, real *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer *); + static integer lendm1, lendp1; + extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real * + , real *, real *); + extern doublereal slapy2_(real *, real *); + static integer iscale; + extern doublereal slamch_(char *); + static real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real safmax; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + static integer lendsv; + extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * + ), slaset_(char *, integer *, integer *, real *, real *, real *, + integer *); + static real ssfmin; + static integer nmaxit, icompz; + static real ssfmax; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SSTEQR computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the implicit QL or QR method. + The eigenvectors of a full or band symmetric matrix can also be found + if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to + tridiagonal form. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors of the original + symmetric matrix. On entry, Z must contain the + orthogonal matrix used to reduce the original matrix + to tridiagonal form. + = 'I': Compute eigenvalues and eigenvectors of the + tridiagonal matrix. Z is initialized to the identity + matrix. + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + Z (input/output) REAL array, dimension (LDZ, N) + On entry, if COMPZ = 'V', then Z contains the orthogonal + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original symmetric matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1, and if + eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace) REAL array, dimension (max(1,2*N-2)) + If COMPZ = 'N', then WORK is not referenced. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm has failed to find all the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero; on exit, D + and E contain the elements of a symmetric tridiagonal + matrix which is orthogonally similar to the original + matrix. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SSTEQR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz == 2) { + z__[z_dim1 + 1] = 1.f; + } + return 0; + } + +/* Determine the unit roundoff and over/underflow thresholds. */ + + eps = slamch_("E"); +/* Computing 2nd power */ + r__1 = eps; + eps2 = r__1 * r__1; + safmin = slamch_("S"); + safmax = 1.f / safmin; + ssfmax = sqrt(safmax) / 3.f; + ssfmin = sqrt(safmin) / eps2; + +/* + Compute the eigenvalues and eigenvectors of the tridiagonal + matrix. +*/ + + if (icompz == 2) { + slaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz); + } + + nmaxit = *n * 30; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + nm1 = *n - 1; + +L10: + if (l1 > *n) { + goto L160; + } + if (l1 > 1) { + e[l1 - 1] = 0.f; + } + if (l1 <= nm1) { + i__1 = nm1; + for (m = l1; m <= i__1; ++m) { + tst = (r__1 = e[m], dabs(r__1)); + if (tst == 0.f) { + goto L30; + } + if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m + + 1], dabs(r__2))) * eps) { + e[m] = 0.f; + goto L30; + } +/* L20: */ + } + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = slanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm == 0.f) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + +/* Choose between QL and QR iteration */ + + if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) { + lend = lsv; + l = lendsv; + } + + if (lend > l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L40: + if (l != lend) { + lendm1 = lend - 1; + i__1 = lendm1; + for (m = l; m <= i__1; ++m) { +/* Computing 2nd power */ + r__2 = (r__1 = e[m], dabs(r__1)); + tst = r__2 * r__2; + if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m + + 1], dabs(r__2)) + safmin) { + goto L60; + } +/* L50: */ + } + } + + m = lend; + +L60: + if (m < lend) { + e[m] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L80; + } + +/* + If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l + 1) { + if (icompz > 0) { + slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); + work[l] = c__; + work[*n - 1 + l] = s; + slasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & + z__[l * z_dim1 + 1], ldz); + } else { + slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); + } + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.f; + l += 2; + if (l <= lend) { + goto L40; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l + 1] - p) / (e[l] * 2.f); + r__ = slapy2_(&g, &c_b15); + g = d__[m] - p + e[l] / (g + r_sign(&r__, &g)); + + s = 1.f; + c__ = 1.f; + p = 0.f; + +/* Inner loop */ + + mm1 = m - 1; + i__1 = l; + for (i__ = mm1; i__ >= i__1; --i__) { + f = s * e[i__]; + b = c__ * e[i__]; + slartg_(&g, &f, &c__, &s, &r__); + if (i__ != m - 1) { + e[i__ + 1] = r__; + } + g = d__[i__ + 1] - p; + r__ = (d__[i__] - g) * s + c__ * 2.f * b; + p = s * r__; + d__[i__ + 1] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = -s; + } + +/* L70: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = m - l + 1; + slasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[l] = g; + goto L40; + +/* Eigenvalue found. */ + +L80: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L40; + } + goto L140; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L90: + if (l != lend) { + lendp1 = lend + 1; + i__1 = lendp1; + for (m = l; m >= i__1; --m) { +/* Computing 2nd power */ + r__2 = (r__1 = e[m - 1], dabs(r__1)); + tst = r__2 * r__2; + if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m + - 1], dabs(r__2)) + safmin) { + goto L110; + } +/* L100: */ + } + } + + m = lend; + +L110: + if (m > lend) { + e[m - 1] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L130; + } + +/* + If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l - 1) { + if (icompz > 0) { + slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) + ; + work[m] = c__; + work[*n - 1 + m] = s; + slasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & + z__[(l - 1) * z_dim1 + 1], ldz); + } else { + slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); + } + d__[l - 1] = rt1; + d__[l] = rt2; + e[l - 1] = 0.f; + l += -2; + if (l >= lend) { + goto L90; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l - 1] - p) / (e[l - 1] * 2.f); + r__ = slapy2_(&g, &c_b15); + g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g)); + + s = 1.f; + c__ = 1.f; + p = 0.f; + +/* Inner loop */ + + lm1 = l - 1; + i__1 = lm1; + for (i__ = m; i__ <= i__1; ++i__) { + f = s * e[i__]; + b = c__ * e[i__]; + slartg_(&g, &f, &c__, &s, &r__); + if (i__ != m) { + e[i__ - 1] = r__; + } + g = d__[i__] - p; + r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b; + p = s * r__; + d__[i__] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = s; + } + +/* L120: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = l - m + 1; + slasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[lm1] = g; + goto L90; + +/* Eigenvalue found. */ + +L130: + d__[l] = p; + + --l; + if (l >= lend) { + goto L90; + } + goto L140; + + } + +/* Undo scaling if necessary */ + +L140: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } else if (iscale == 2) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.f) { + ++(*info); + } +/* L150: */ + } + goto L190; + +/* Order eigenvalues and eigenvectors. */ + +L160: + if (icompz == 0) { + +/* Use Quick Sort */ + + slasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L170: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L180: */ + } + } + +L190: + return 0; + +/* End of SSTEQR */ + +} /* ssteqr_ */ + +/* Subroutine */ int ssterf_(integer *n, real *d__, real *e, integer *info) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2, r__3; + + /* Local variables */ + static real c__; + static integer i__, l, m; + static real p, r__, s; + static integer l1; + static real bb, rt1, rt2, eps, rte; + static integer lsv; + static real eps2, oldc; + static integer lend, jtot; + extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *) + ; + static real gamma, alpha, sigma, anorm; + extern doublereal slapy2_(real *, real *); + static integer iscale; + static real oldgam; + extern doublereal slamch_(char *); + static real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real safmax; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + static integer lendsv; + static real ssfmin; + static integer nmaxit; + static real ssfmax; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SSTERF computes all eigenvalues of a symmetric tridiagonal matrix + using the Pal-Walker-Kahan variant of the QL or QR algorithm. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the n diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm failed to find all of the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n < 0) { + *info = -1; + i__1 = -(*info); + xerbla_("SSTERF", &i__1); + return 0; + } + if (*n <= 1) { + return 0; + } + +/* Determine the unit roundoff for this environment. */ + + eps = slamch_("E"); +/* Computing 2nd power */ + r__1 = eps; + eps2 = r__1 * r__1; + safmin = slamch_("S"); + safmax = 1.f / safmin; + ssfmax = sqrt(safmax) / 3.f; + ssfmin = sqrt(safmin) / eps2; + +/* Compute the eigenvalues of the tridiagonal matrix. */ + + nmaxit = *n * 30; + sigma = 0.f; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + +L10: + if (l1 > *n) { + goto L170; + } + if (l1 > 1) { + e[l1 - 1] = 0.f; + } + i__1 = *n - 1; + for (m = l1; m <= i__1; ++m) { + if ((r__3 = e[m], dabs(r__3)) <= sqrt((r__1 = d__[m], dabs(r__1))) * + sqrt((r__2 = d__[m + 1], dabs(r__2))) * eps) { + e[m] = 0.f; + goto L30; + } +/* L20: */ + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = slanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + + i__1 = lend - 1; + for (i__ = l; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + r__1 = e[i__]; + e[i__] = r__1 * r__1; +/* L40: */ + } + +/* Choose between QL and QR iteration */ + + if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) { + lend = lsv; + l = lendsv; + } + + if (lend >= l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L50: + if (l != lend) { + i__1 = lend - 1; + for (m = l; m <= i__1; ++m) { + if ((r__2 = e[m], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[ + m + 1], dabs(r__1))) { + goto L70; + } +/* L60: */ + } + } + m = lend; + +L70: + if (m < lend) { + e[m] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L90; + } + +/* + If remaining matrix is 2 by 2, use SLAE2 to compute its + eigenvalues. +*/ + + if (m == l + 1) { + rte = sqrt(e[l]); + slae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2); + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.f; + l += 2; + if (l <= lend) { + goto L50; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l]); + sigma = (d__[l + 1] - p) / (rte * 2.f); + r__ = slapy2_(&sigma, &c_b15); + sigma = p - rte / (sigma + r_sign(&r__, &sigma)); + + c__ = 1.f; + s = 0.f; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l; + for (i__ = m - 1; i__ >= i__1; --i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m - 1) { + e[i__ + 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__ + 1] = oldgam + (alpha - gamma); + if (c__ != 0.f) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L80: */ + } + + e[l] = s * p; + d__[l] = sigma + gamma; + goto L50; + +/* Eigenvalue found. */ + +L90: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L50; + } + goto L150; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L100: + i__1 = lend + 1; + for (m = l; m >= i__1; --m) { + if ((r__2 = e[m - 1], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[ + m - 1], dabs(r__1))) { + goto L120; + } +/* L110: */ + } + m = lend; + +L120: + if (m > lend) { + e[m - 1] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L140; + } + +/* + If remaining matrix is 2 by 2, use SLAE2 to compute its + eigenvalues. +*/ + + if (m == l - 1) { + rte = sqrt(e[l - 1]); + slae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2); + d__[l] = rt1; + d__[l - 1] = rt2; + e[l - 1] = 0.f; + l += -2; + if (l >= lend) { + goto L100; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l - 1]); + sigma = (d__[l - 1] - p) / (rte * 2.f); + r__ = slapy2_(&sigma, &c_b15); + sigma = p - rte / (sigma + r_sign(&r__, &sigma)); + + c__ = 1.f; + s = 0.f; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l - 1; + for (i__ = m; i__ <= i__1; ++i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m) { + e[i__ - 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__ + 1]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__] = oldgam + (alpha - gamma); + if (c__ != 0.f) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L130: */ + } + + e[l - 1] = s * p; + d__[l] = sigma + gamma; + goto L100; + +/* Eigenvalue found. */ + +L140: + d__[l] = p; + + --l; + if (l >= lend) { + goto L100; + } + goto L150; + + } + +/* Undo scaling if necessary */ + +L150: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + } + if (iscale == 2) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.f) { + ++(*info); + } +/* L160: */ + } + goto L180; + +/* Sort eigenvalues in increasing order. */ + +L170: + slasrt_("I", n, &d__[1], info); + +L180: + return 0; + +/* End of SSTERF */ + +} /* ssterf_ */ + +/* Subroutine */ int ssyevd_(char *jobz, char *uplo, integer *n, real *a, + integer *lda, real *w, real *work, integer *lwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static real eps; + static integer inde; + static real anrm, rmin, rmax; + static integer lopt; + static real sigma; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static integer lwmin, liopt; + static logical lower, wantz; + static integer indwk2, llwrk2, iscale; + extern doublereal slamch_(char *); + static real safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *); + static real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + static integer indtau; + extern /* Subroutine */ int sstedc_(char *, integer *, real *, real *, + real *, integer *, real *, integer *, integer *, integer *, + integer *), slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *); + static integer indwrk, liwmin; + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + extern doublereal slansy_(char *, char *, integer *, real *, integer *, + real *); + static integer llwork; + static real smlnum; + static logical lquery; + extern /* Subroutine */ int sormtr_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *), ssytrd_(char *, + integer *, real *, integer *, real *, real *, real *, real *, + integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SSYEVD computes all eigenvalues and, optionally, eigenvectors of a + real symmetric matrix A. If eigenvectors are desired, it uses a + divide and conquer algorithm. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Because of large use of BLAS of level 3, SSYEVD needs N**2 more + workspace than SSYEVX. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA, N) + On entry, the symmetric matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of A contains the + upper triangular part of the matrix A. If UPLO = 'L', + the leading N-by-N lower triangular part of A contains + the lower triangular part of the matrix A. + On exit, if JOBZ = 'V', then if INFO = 0, A contains the + orthonormal eigenvectors of the matrix A. + If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') + or the upper triangle (if UPLO='U') of A, including the + diagonal, is destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) REAL array, dimension (N) + If INFO = 0, the eigenvalues in ascending order. + + WORK (workspace/output) REAL array, + dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If N <= 1, LWORK must be at least 1. + If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. + If JOBZ = 'V' and N > 1, LWORK must be at least + 1 + 6*N + 2*N**2. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal sizes of the WORK and IWORK + arrays, returns these values as the first entries of the WORK + and IWORK arrays, and no error message related to LWORK or + LIWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If N <= 1, LIWORK must be at least 1. + If JOBZ = 'N' and N > 1, LIWORK must be at least 1. + If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal sizes of the WORK and + IWORK arrays, returns these values as the first entries of + the WORK and IWORK arrays, and no error message related to + LWORK or LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i and JOBZ = 'N', then the algorithm failed + to converge; i off-diagonal elements of an intermediate + tridiagonal form did not converge to zero; + if INFO = i and JOBZ = 'V', then the algorithm failed + to compute an eigenvalue while working on the submatrix + lying in rows and columns INFO/(N+1) through + mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + Modified description of INFO. Sven, 16 Feb 05. + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + + if (*info == 0) { + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + lopt = lwmin; + liopt = liwmin; + } else { + if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); + } else { + liwmin = 1; + lwmin = (*n << 1) + 1; + } +/* Computing MAX */ + i__1 = lwmin, i__2 = (*n << 1) + ilaenv_(&c__1, "SSYTRD", uplo, n, + &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lopt = max(i__1,i__2); + liopt = liwmin; + } + work[1] = (real) lopt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SSYEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + w[1] = a[a_dim1 + 1]; + if (wantz) { + a[a_dim1 + 1] = 1.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = slansy_("M", uplo, n, &a[a_offset], lda, &work[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + slascl_(uplo, &c__0, &c__0, &c_b15, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call SSYTRD to reduce symmetric matrix to tridiagonal form. */ + + inde = 1; + indtau = inde + *n; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + indwk2 = indwrk + *n * *n; + llwrk2 = *lwork - indwk2 + 1; + + ssytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + +/* + For eigenvalues only, call SSTERF. For eigenvectors, first call + SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the + tridiagonal matrix, then call SORMTR to multiply it by the + Householder transformations stored in A. +*/ + + if (! wantz) { + ssterf_(n, &w[1], &work[inde], info); + } else { + sstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & + llwrk2, &iwork[1], liwork, info); + sormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + slacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + r__1 = 1.f / sigma; + sscal_(n, &r__1, &w[1], &c__1); + } + + work[1] = (real) lopt; + iwork[1] = liopt; + + return 0; + +/* End of SSYEVD */ + +} /* ssyevd_ */ + +/* Subroutine */ int ssytd2_(char *uplo, integer *n, real *a, integer *lda, + real *d__, real *e, real *tau, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + static real taui; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *); + static real alpha; + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *), ssymv_(char *, integer *, real *, real *, + integer *, real *, integer *, real *, real *, integer *), + xerbla_(char *, integer *), slarfg_(integer *, real *, + real *, integer *, real *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal + form T by an orthogonal similarity transformation: Q' * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the orthogonal + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the orthogonal matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) REAL array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) REAL array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) REAL array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SSYTD2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(1:i-1,i+1) +*/ + + slarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1 + + 1], &c__1, &taui); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + + if (taui != 0.f) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + a[i__ + (i__ + 1) * a_dim1] = 1.f; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + ssymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b29, &tau[1], &c__1) + ; + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + alpha = taui * -.5f * sdot_(&i__, &tau[1], &c__1, &a[(i__ + 1) + * a_dim1 + 1], &c__1); + saxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + ssyr2_(uplo, &i__, &c_b151, &a[(i__ + 1) * a_dim1 + 1], &c__1, + &tau[1], &c__1, &a[a_offset], lda); + + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } + d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1]; + tau[i__] = taui; +/* L10: */ + } + d__[1] = a[a_dim1 + 1]; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(i+2:n,i) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &taui); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + + if (taui != 0.f) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + ssymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &tau[ + i__], &c__1); + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + i__2 = *n - i__; + alpha = taui * -.5f * sdot_(&i__2, &tau[i__], &c__1, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + i__2 = *n - i__; + ssyr2_(uplo, &i__2, &c_b151, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda); + + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } + d__[i__] = a[i__ + i__ * a_dim1]; + tau[i__] = taui; +/* L20: */ + } + d__[*n] = a[*n + *n * a_dim1]; + } + + return 0; + +/* End of SSYTD2 */ + +} /* ssytd2_ */ + +/* Subroutine */ int ssytrd_(char *uplo, integer *n, real *a, integer *lda, + real *d__, real *e, real *tau, real *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, kk, nx, iws; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + static logical upper; + extern /* Subroutine */ int ssytd2_(char *, integer *, real *, integer *, + real *, real *, real *, integer *), ssyr2k_(char *, char * + , integer *, integer *, real *, real *, integer *, real *, + integer *, real *, real *, integer *), xerbla_( + char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slatrd_(char *, integer *, integer *, real *, + integer *, real *, real *, real *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + SSYTRD reduces a real symmetric matrix A to real symmetric + tridiagonal form T by an orthogonal similarity transformation: + Q**T * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the orthogonal + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the orthogonal matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) REAL array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) REAL array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) REAL array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SSYTRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.f; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "SSYTRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code by setting NX = N. + + Computing MAX +*/ + i__1 = *lwork / ldwork; + nb = max(i__1,1); + nbmin = ilaenv_(&c__2, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* + Reduce the upper triangle of A. + Columns 1:kk are handled by the unblocked method. +*/ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = i__ + nb - 1; + slatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork); + +/* + Update the unreduced submatrix A(1:i-1,1:i-1), using an + update of the form: A := A - V*W' - W*V' +*/ + + i__3 = i__ - 1; + ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ * + a_dim1 + 1], lda, &work[1], &ldwork, &c_b15, &a[a_offset], + lda); + +/* + Copy superdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j - 1 + j * a_dim1] = e[j - 1]; + d__[j] = a[j + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + ssytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = *n - i__ + 1; + slatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork); + +/* + Update the unreduced submatrix A(i+ib:n,i+ib:n), using + an update of the form: A := A - V*W' - W*V' +*/ + + i__3 = *n - i__ - nb + 1; + ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b15, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda); + +/* + Copy subdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + 1 + j * a_dim1] = e[j]; + d__[j] = a[j + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + ssytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo); + } + + work[1] = (real) lwkopt; + return 0; + +/* End of SSYTRD */ + +} /* ssytrd_ */ + +/* Subroutine */ int strevc_(char *side, char *howmny, logical *select, + integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, + integer *ldvr, integer *mm, integer *m, real *work, integer *info) +{ + /* System generated locals */ + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + static integer i__, j, k; + static real x[4] /* was [2][2] */; + static integer j1, j2, n2, ii, ki, ip, is; + static real wi, wr, rec, ulp, beta, emax; + static logical pair, allv; + static integer ierr; + static real unfl, ovfl, smin; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + static logical over; + static real vmax; + static integer jnxt; + static real scale; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static real remax; + static logical leftv; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + static logical bothv; + static real vcrit; + static logical somev; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + static real xnorm; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *), slaln2_(logical *, integer *, integer *, real + *, real *, real *, integer *, real *, real *, real *, integer *, + real *, real *, real *, integer *, real *, real *, integer *), + slabad_(real *, real *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static real bignum; + extern integer isamax_(integer *, real *, integer *); + static logical rightv; + static real smlnum; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + STREVC computes some or all of the right and/or left eigenvectors of + a real upper quasi-triangular matrix T. + Matrices of this type are produced by the Schur factorization of + a real general matrix: A = Q*T*Q**T, as computed by SHSEQR. + + The right eigenvector x and the left eigenvector y of T corresponding + to an eigenvalue w are defined by: + + T*x = w*x, (y**H)*T = w*(y**H) + + where y**H denotes the conjugate transpose of y. + The eigenvalues are not input to this routine, but are read directly + from the diagonal blocks of T. + + This routine returns the matrices X and/or Y of right and left + eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + input matrix. If Q is the orthogonal factor that reduces a matrix + A to Schur form T, then Q*X and Q*Y are the matrices of right and + left eigenvectors of A. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'R': compute right eigenvectors only; + = 'L': compute left eigenvectors only; + = 'B': compute both right and left eigenvectors. + + HOWMNY (input) CHARACTER*1 + = 'A': compute all right and/or left eigenvectors; + = 'B': compute all right and/or left eigenvectors, + backtransformed by the matrices in VR and/or VL; + = 'S': compute selected right and/or left eigenvectors, + as indicated by the logical array SELECT. + + SELECT (input/output) LOGICAL array, dimension (N) + If HOWMNY = 'S', SELECT specifies the eigenvectors to be + computed. + If w(j) is a real eigenvalue, the corresponding real + eigenvector is computed if SELECT(j) is .TRUE.. + If w(j) and w(j+1) are the real and imaginary parts of a + complex eigenvalue, the corresponding complex eigenvector is + computed if either SELECT(j) or SELECT(j+1) is .TRUE., and + on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to + .FALSE.. + Not referenced if HOWMNY = 'A' or 'B'. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input) REAL array, dimension (LDT,N) + The upper quasi-triangular matrix T in Schur canonical form. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + VL (input/output) REAL array, dimension (LDVL,MM) + On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must + contain an N-by-N matrix Q (usually the orthogonal matrix Q + of Schur vectors returned by SHSEQR). + On exit, if SIDE = 'L' or 'B', VL contains: + if HOWMNY = 'A', the matrix Y of left eigenvectors of T; + if HOWMNY = 'B', the matrix Q*Y; + if HOWMNY = 'S', the left eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VL, in the same order as their + eigenvalues. + A complex eigenvector corresponding to a complex eigenvalue + is stored in two consecutive columns, the first holding the + real part, and the second the imaginary part. + Not referenced if SIDE = 'R'. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1, and if + SIDE = 'L' or 'B', LDVL >= N. + + VR (input/output) REAL array, dimension (LDVR,MM) + On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must + contain an N-by-N matrix Q (usually the orthogonal matrix Q + of Schur vectors returned by SHSEQR). + On exit, if SIDE = 'R' or 'B', VR contains: + if HOWMNY = 'A', the matrix X of right eigenvectors of T; + if HOWMNY = 'B', the matrix Q*X; + if HOWMNY = 'S', the right eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VR, in the same order as their + eigenvalues. + A complex eigenvector corresponding to a complex eigenvalue + is stored in two consecutive columns, the first holding the + real part and the second the imaginary part. + Not referenced if SIDE = 'L'. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1, and if + SIDE = 'R' or 'B', LDVR >= N. + + MM (input) INTEGER + The number of columns in the arrays VL and/or VR. MM >= M. + + M (output) INTEGER + The number of columns in the arrays VL and/or VR actually + used to store the eigenvectors. + If HOWMNY = 'A' or 'B', M is set to N. + Each selected real eigenvector occupies one column and each + selected complex eigenvector occupies two columns. + + WORK (workspace) REAL array, dimension (3*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The algorithm used in this program is basically backward (forward) + substitution, with scaling to make the the code robust against + possible overflow. + + Each eigenvector is normalized so that the element of largest + magnitude has magnitude 1; here the magnitude of a complex number + (x,y) is taken to be |x| + |y|. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + bothv = lsame_(side, "B"); + rightv = lsame_(side, "R") || bothv; + leftv = lsame_(side, "L") || bothv; + + allv = lsame_(howmny, "A"); + over = lsame_(howmny, "B"); + somev = lsame_(howmny, "S"); + + *info = 0; + if (! rightv && ! leftv) { + *info = -1; + } else if (! allv && ! over && ! somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1,*n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else { + +/* + Set M to the number of columns required to store the selected + eigenvectors, standardize the array SELECT if necessary, and + test MM. +*/ + + if (somev) { + *m = 0; + pair = FALSE_; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (pair) { + pair = FALSE_; + select[j] = FALSE_; + } else { + if (j < *n) { + if (t[j + 1 + j * t_dim1] == 0.f) { + if (select[j]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[j] || select[j + 1]) { + select[j] = TRUE_; + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } +/* L10: */ + } + } else { + *m = *n; + } + + if (*mm < *m) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("STREVC", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* Set the constants to control overflow. */ + + unfl = slamch_("Safe minimum"); + ovfl = 1.f / unfl; + slabad_(&unfl, &ovfl); + ulp = slamch_("Precision"); + smlnum = unfl * (*n / ulp); + bignum = (1.f - ulp) / smlnum; + +/* + Compute 1-norm of each column of strictly upper triangular + part of T to control overflow in triangular solver. +*/ + + work[1] = 0.f; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + work[j] = 0.f; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[j] += (r__1 = t[i__ + j * t_dim1], dabs(r__1)); +/* L20: */ + } +/* L30: */ + } + +/* + Index IP is used to specify the real or complex eigenvalue: + IP = 0, real eigenvalue, + 1, first of conjugate complex pair: (wr,wi) + -1, second of conjugate complex pair: (wr,wi) +*/ + + n2 = *n << 1; + + if (rightv) { + +/* Compute right eigenvectors. */ + + ip = 0; + is = *m; + for (ki = *n; ki >= 1; --ki) { + + if (ip == 1) { + goto L130; + } + if (ki == 1) { + goto L40; + } + if (t[ki + (ki - 1) * t_dim1] == 0.f) { + goto L40; + } + ip = -1; + +L40: + if (somev) { + if (ip == 0) { + if (! select[ki]) { + goto L130; + } + } else { + if (! select[ki - 1]) { + goto L130; + } + } + } + +/* Compute the KI-th eigenvalue (WR,WI). */ + + wr = t[ki + ki * t_dim1]; + wi = 0.f; + if (ip != 0) { + wi = sqrt((r__1 = t[ki + (ki - 1) * t_dim1], dabs(r__1))) * + sqrt((r__2 = t[ki - 1 + ki * t_dim1], dabs(r__2))); + } +/* Computing MAX */ + r__1 = ulp * (dabs(wr) + dabs(wi)); + smin = dmax(r__1,smlnum); + + if (ip == 0) { + +/* Real right eigenvector */ + + work[ki + *n] = 1.f; + +/* Form right-hand side */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + work[k + *n] = -t[k + ki * t_dim1]; +/* L50: */ + } + +/* + Solve the upper quasi-triangular system: + (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. +*/ + + jnxt = ki - 1; + for (j = ki - 1; j >= 1; --j) { + if (j > jnxt) { + goto L60; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.f) { + j1 = j - 1; + jnxt = j - 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + + slaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* + Scale X(1,1) to avoid overflow when updating + the right-hand side. +*/ + + if (xnorm > 1.f) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.f) { + sscal_(&ki, &scale, &work[*n + 1], &c__1); + } + work[j + *n] = x[0]; + +/* Update right-hand side */ + + i__1 = j - 1; + r__1 = -x[0]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + + } else { + +/* 2-by-2 diagonal block */ + + slaln2_(&c_false, &c__2, &c__1, &smin, &c_b15, &t[j - + 1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, & + work[j - 1 + *n], n, &wr, &c_b29, x, &c__2, & + scale, &xnorm, &ierr); + +/* + Scale X(1,1) and X(2,1) to avoid overflow when + updating the right-hand side. +*/ + + if (xnorm > 1.f) { +/* Computing MAX */ + r__1 = work[j - 1], r__2 = work[j]; + beta = dmax(r__1,r__2); + if (beta > bignum / xnorm) { + x[0] /= xnorm; + x[1] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.f) { + sscal_(&ki, &scale, &work[*n + 1], &c__1); + } + work[j - 1 + *n] = x[0]; + work[j + *n] = x[1]; + +/* Update right-hand side */ + + i__1 = j - 2; + r__1 = -x[0]; + saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[*n + 1], &c__1); + i__1 = j - 2; + r__1 = -x[1]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + } +L60: + ; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + scopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], & + c__1); + + ii = isamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + remax = 1.f / (r__1 = vr[ii + is * vr_dim1], dabs(r__1)); + sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + vr[k + is * vr_dim1] = 0.f; +/* L70: */ + } + } else { + if (ki > 1) { + i__1 = ki - 1; + sgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[*n + 1], &c__1, &work[ki + *n], &vr[ki * + vr_dim1 + 1], &c__1); + } + + ii = isamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + remax = 1.f / (r__1 = vr[ii + ki * vr_dim1], dabs(r__1)); + sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + + } else { + +/* + Complex right eigenvector. + + Initial solve + [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. + [ (T(KI,KI-1) T(KI,KI) ) ] +*/ + + if ((r__1 = t[ki - 1 + ki * t_dim1], dabs(r__1)) >= (r__2 = t[ + ki + (ki - 1) * t_dim1], dabs(r__2))) { + work[ki - 1 + *n] = 1.f; + work[ki + n2] = wi / t[ki - 1 + ki * t_dim1]; + } else { + work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1]; + work[ki + n2] = 1.f; + } + work[ki + *n] = 0.f; + work[ki - 1 + n2] = 0.f; + +/* Form right-hand side */ + + i__1 = ki - 2; + for (k = 1; k <= i__1; ++k) { + work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) * + t_dim1]; + work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1]; +/* L80: */ + } + +/* + Solve upper quasi-triangular system: + (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) +*/ + + jnxt = ki - 2; + for (j = ki - 2; j >= 1; --j) { + if (j > jnxt) { + goto L90; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.f) { + j1 = j - 1; + jnxt = j - 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + + slaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &wi, x, &c__2, &scale, &xnorm, & + ierr); + +/* + Scale X(1,1) and X(1,2) to avoid overflow when + updating the right-hand side. +*/ + + if (xnorm > 1.f) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + x[2] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.f) { + sscal_(&ki, &scale, &work[*n + 1], &c__1); + sscal_(&ki, &scale, &work[n2 + 1], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; + +/* Update the right-hand side */ + + i__1 = j - 1; + r__1 = -x[0]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + i__1 = j - 1; + r__1 = -x[2]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + n2 + 1], &c__1); + + } else { + +/* 2-by-2 diagonal block */ + + slaln2_(&c_false, &c__2, &c__2, &smin, &c_b15, &t[j - + 1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, & + work[j - 1 + *n], n, &wr, &wi, x, &c__2, & + scale, &xnorm, &ierr); + +/* + Scale X to avoid overflow when updating + the right-hand side. +*/ + + if (xnorm > 1.f) { +/* Computing MAX */ + r__1 = work[j - 1], r__2 = work[j]; + beta = dmax(r__1,r__2); + if (beta > bignum / xnorm) { + rec = 1.f / xnorm; + x[0] *= rec; + x[2] *= rec; + x[1] *= rec; + x[3] *= rec; + scale *= rec; + } + } + +/* Scale if necessary */ + + if (scale != 1.f) { + sscal_(&ki, &scale, &work[*n + 1], &c__1); + sscal_(&ki, &scale, &work[n2 + 1], &c__1); + } + work[j - 1 + *n] = x[0]; + work[j + *n] = x[1]; + work[j - 1 + n2] = x[2]; + work[j + n2] = x[3]; + +/* Update the right-hand side */ + + i__1 = j - 2; + r__1 = -x[0]; + saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[*n + 1], &c__1); + i__1 = j - 2; + r__1 = -x[1]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + i__1 = j - 2; + r__1 = -x[2]; + saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[n2 + 1], &c__1); + i__1 = j - 2; + r__1 = -x[3]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + n2 + 1], &c__1); + } +L90: + ; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + scopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1 + + 1], &c__1); + scopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], & + c__1); + + emax = 0.f; + i__1 = ki; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + r__3 = emax, r__4 = (r__1 = vr[k + (is - 1) * vr_dim1] + , dabs(r__1)) + (r__2 = vr[k + is * vr_dim1], + dabs(r__2)); + emax = dmax(r__3,r__4); +/* L100: */ + } + + remax = 1.f / emax; + sscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1); + sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + vr[k + (is - 1) * vr_dim1] = 0.f; + vr[k + is * vr_dim1] = 0.f; +/* L110: */ + } + + } else { + + if (ki > 2) { + i__1 = ki - 2; + sgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[( + ki - 1) * vr_dim1 + 1], &c__1); + i__1 = ki - 2; + sgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[n2 + 1], &c__1, &work[ki + n2], &vr[ki * + vr_dim1 + 1], &c__1); + } else { + sscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1 + + 1], &c__1); + sscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], & + c__1); + } + + emax = 0.f; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + r__3 = emax, r__4 = (r__1 = vr[k + (ki - 1) * vr_dim1] + , dabs(r__1)) + (r__2 = vr[k + ki * vr_dim1], + dabs(r__2)); + emax = dmax(r__3,r__4); +/* L120: */ + } + remax = 1.f / emax; + sscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1); + sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + } + + --is; + if (ip != 0) { + --is; + } +L130: + if (ip == 1) { + ip = 0; + } + if (ip == -1) { + ip = 1; + } +/* L140: */ + } + } + + if (leftv) { + +/* Compute left eigenvectors. */ + + ip = 0; + is = 1; + i__1 = *n; + for (ki = 1; ki <= i__1; ++ki) { + + if (ip == -1) { + goto L250; + } + if (ki == *n) { + goto L150; + } + if (t[ki + 1 + ki * t_dim1] == 0.f) { + goto L150; + } + ip = 1; + +L150: + if (somev) { + if (! select[ki]) { + goto L250; + } + } + +/* Compute the KI-th eigenvalue (WR,WI). */ + + wr = t[ki + ki * t_dim1]; + wi = 0.f; + if (ip != 0) { + wi = sqrt((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1))) * + sqrt((r__2 = t[ki + 1 + ki * t_dim1], dabs(r__2))); + } +/* Computing MAX */ + r__1 = ulp * (dabs(wr) + dabs(wi)); + smin = dmax(r__1,smlnum); + + if (ip == 0) { + +/* Real left eigenvector. */ + + work[ki + *n] = 1.f; + +/* Form right-hand side */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + work[k + *n] = -t[ki + k * t_dim1]; +/* L160: */ + } + +/* + Solve the quasi-triangular system: + (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK +*/ + + vmax = 1.f; + vcrit = bignum; + + jnxt = ki + 1; + i__2 = *n; + for (j = ki + 1; j <= i__2; ++j) { + if (j < jnxt) { + goto L170; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.f) { + j2 = j + 1; + jnxt = j + 2; + } + } + + if (j1 == j2) { + +/* + 1-by-1 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side. +*/ + + if (work[j] > vcrit) { + rec = 1.f / vmax; + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + *n], &c__1); + vmax = 1.f; + vcrit = bignum; + } + + i__3 = j - ki - 1; + work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1], + &c__1, &work[ki + 1 + *n], &c__1); + +/* Solve (T(J,J)-WR)'*X = WORK */ + + slaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* Scale if necessary */ + + if (scale != 1.f) { + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + *n], &c__1); + } + work[j + *n] = x[0]; +/* Computing MAX */ + r__2 = (r__1 = work[j + *n], dabs(r__1)); + vmax = dmax(r__2,vmax); + vcrit = bignum / vmax; + + } else { + +/* + 2-by-2 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side. + + Computing MAX +*/ + r__1 = work[j], r__2 = work[j + 1]; + beta = dmax(r__1,r__2); + if (beta > vcrit) { + rec = 1.f / vmax; + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + *n], &c__1); + vmax = 1.f; + vcrit = bignum; + } + + i__3 = j - ki - 1; + work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1], + &c__1, &work[ki + 1 + *n], &c__1); + + i__3 = j - ki - 1; + work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 1 + (j + 1) * + t_dim1], &c__1, &work[ki + 1 + *n], &c__1); + +/* + Solve + [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) + [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) +*/ + + slaln2_(&c_true, &c__2, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* Scale if necessary */ + + if (scale != 1.f) { + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + *n], &c__1); + } + work[j + *n] = x[0]; + work[j + 1 + *n] = x[1]; + +/* Computing MAX */ + r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = ( + r__2 = work[j + 1 + *n], dabs(r__2)), r__3 = + max(r__3,r__4); + vmax = dmax(r__3,vmax); + vcrit = bignum / vmax; + + } +L170: + ; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * + vl_dim1], &c__1); + + i__2 = *n - ki + 1; + ii = isamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - + 1; + remax = 1.f / (r__1 = vl[ii + is * vl_dim1], dabs(r__1)); + i__2 = *n - ki + 1; + sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + vl[k + is * vl_dim1] = 0.f; +/* L180: */ + } + + } else { + + if (ki < *n) { + i__2 = *n - ki; + sgemv_("N", n, &i__2, &c_b15, &vl[(ki + 1) * vl_dim1 + + 1], ldvl, &work[ki + 1 + *n], &c__1, &work[ + ki + *n], &vl[ki * vl_dim1 + 1], &c__1); + } + + ii = isamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + remax = 1.f / (r__1 = vl[ii + ki * vl_dim1], dabs(r__1)); + sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + + } + + } else { + +/* + Complex left eigenvector. + + Initial solve: + ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. + ((T(KI+1,KI) T(KI+1,KI+1)) ) +*/ + + if ((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1)) >= (r__2 = + t[ki + 1 + ki * t_dim1], dabs(r__2))) { + work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1]; + work[ki + 1 + n2] = 1.f; + } else { + work[ki + *n] = 1.f; + work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1]; + } + work[ki + 1 + *n] = 0.f; + work[ki + n2] = 0.f; + +/* Form right-hand side */ + + i__2 = *n; + for (k = ki + 2; k <= i__2; ++k) { + work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1]; + work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1] + ; +/* L190: */ + } + +/* + Solve complex quasi-triangular system: + ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 +*/ + + vmax = 1.f; + vcrit = bignum; + + jnxt = ki + 2; + i__2 = *n; + for (j = ki + 2; j <= i__2; ++j) { + if (j < jnxt) { + goto L200; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.f) { + j2 = j + 1; + jnxt = j + 2; + } + } + + if (j1 == j2) { + +/* + 1-by-1 diagonal block + + Scale if necessary to avoid overflow when + forming the right-hand side elements. +*/ + + if (work[j] > vcrit) { + rec = 1.f / vmax; + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + n2], &c__1); + vmax = 1.f; + vcrit = bignum; + } + + i__3 = j - ki - 2; + work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + *n], &c__1); + i__3 = j - ki - 2; + work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + n2], &c__1); + +/* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */ + + r__1 = -wi; + slaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &r__1, x, &c__2, &scale, &xnorm, & + ierr); + +/* Scale if necessary */ + + if (scale != 1.f) { + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + n2], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; +/* Computing MAX */ + r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = ( + r__2 = work[j + n2], dabs(r__2)), r__3 = max( + r__3,r__4); + vmax = dmax(r__3,vmax); + vcrit = bignum / vmax; + + } else { + +/* + 2-by-2 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side elements. + + Computing MAX +*/ + r__1 = work[j], r__2 = work[j + 1]; + beta = dmax(r__1,r__2); + if (beta > vcrit) { + rec = 1.f / vmax; + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + n2], &c__1); + vmax = 1.f; + vcrit = bignum; + } + + i__3 = j - ki - 2; + work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + *n], &c__1); + + i__3 = j - ki - 2; + work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + n2], &c__1); + + i__3 = j - ki - 2; + work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 2 + (j + 1) * + t_dim1], &c__1, &work[ki + 2 + *n], &c__1); + + i__3 = j - ki - 2; + work[j + 1 + n2] -= sdot_(&i__3, &t[ki + 2 + (j + 1) * + t_dim1], &c__1, &work[ki + 2 + n2], &c__1); + +/* + Solve 2-by-2 complex linear equation + ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B + ([T(j+1,j) T(j+1,j+1)] ) +*/ + + r__1 = -wi; + slaln2_(&c_true, &c__2, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &r__1, x, &c__2, &scale, &xnorm, & + ierr); + +/* Scale if necessary */ + + if (scale != 1.f) { + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + n2], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; + work[j + 1 + *n] = x[1]; + work[j + 1 + n2] = x[3]; +/* Computing MAX */ + r__1 = dabs(x[0]), r__2 = dabs(x[2]), r__1 = max(r__1, + r__2), r__2 = dabs(x[1]), r__1 = max(r__1, + r__2), r__2 = dabs(x[3]), r__1 = max(r__1, + r__2); + vmax = dmax(r__1,vmax); + vcrit = bignum / vmax; + + } +L200: + ; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * + vl_dim1], &c__1); + i__2 = *n - ki + 1; + scopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) * + vl_dim1], &c__1); + + emax = 0.f; + i__2 = *n; + for (k = ki; k <= i__2; ++k) { +/* Computing MAX */ + r__3 = emax, r__4 = (r__1 = vl[k + is * vl_dim1], + dabs(r__1)) + (r__2 = vl[k + (is + 1) * + vl_dim1], dabs(r__2)); + emax = dmax(r__3,r__4); +/* L220: */ + } + remax = 1.f / emax; + i__2 = *n - ki + 1; + sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + i__2 = *n - ki + 1; + sscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1) + ; + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + vl[k + is * vl_dim1] = 0.f; + vl[k + (is + 1) * vl_dim1] = 0.f; +/* L230: */ + } + } else { + if (ki < *n - 1) { + i__2 = *n - ki - 1; + sgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1 + + 1], ldvl, &work[ki + 2 + *n], &c__1, &work[ + ki + *n], &vl[ki * vl_dim1 + 1], &c__1); + i__2 = *n - ki - 1; + sgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1 + + 1], ldvl, &work[ki + 2 + n2], &c__1, &work[ + ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], & + c__1); + } else { + sscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], & + c__1); + sscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + + 1], &c__1); + } + + emax = 0.f; + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing MAX */ + r__3 = emax, r__4 = (r__1 = vl[k + ki * vl_dim1], + dabs(r__1)) + (r__2 = vl[k + (ki + 1) * + vl_dim1], dabs(r__2)); + emax = dmax(r__3,r__4); +/* L240: */ + } + remax = 1.f / emax; + sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + sscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1); + + } + + } + + ++is; + if (ip != 0) { + ++is; + } +L250: + if (ip == -1) { + ip = 0; + } + if (ip == 1) { + ip = -1; + } + +/* L260: */ + } + + } + + return 0; + +/* End of STREVC */ + +} /* strevc_ */ + +/* Subroutine */ int strexc_(char *compq, integer *n, real *t, integer *ldt, + real *q, integer *ldq, integer *ifst, integer *ilst, real *work, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + + /* Local variables */ + static integer nbf, nbl, here; + extern logical lsame_(char *, char *); + static logical wantq; + extern /* Subroutine */ int xerbla_(char *, integer *), slaexc_( + logical *, integer *, real *, integer *, real *, integer *, + integer *, integer *, integer *, real *, integer *); + static integer nbnext; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + STREXC reorders the real Schur factorization of a real matrix + A = Q*T*Q**T, so that the diagonal block of T with row index IFST is + moved to row ILST. + + The real Schur form T is reordered by an orthogonal similarity + transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors + is updated by postmultiplying it with Z. + + T must be in Schur canonical form (as returned by SHSEQR), that is, + block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + 2-by-2 diagonal block has its diagonal elements equal and its + off-diagonal elements of opposite sign. + + Arguments + ========= + + COMPQ (input) CHARACTER*1 + = 'V': update the matrix Q of Schur vectors; + = 'N': do not update Q. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input/output) REAL array, dimension (LDT,N) + On entry, the upper quasi-triangular matrix T, in Schur + Schur canonical form. + On exit, the reordered upper quasi-triangular matrix, again + in Schur canonical form. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + Q (input/output) REAL array, dimension (LDQ,N) + On entry, if COMPQ = 'V', the matrix Q of Schur vectors. + On exit, if COMPQ = 'V', Q has been postmultiplied by the + orthogonal transformation matrix Z which reorders T. + If COMPQ = 'N', Q is not referenced. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + IFST (input/output) INTEGER + ILST (input/output) INTEGER + Specify the reordering of the diagonal blocks of T. + The block with row index IFST is moved to row ILST, by a + sequence of transpositions between adjacent blocks. + On exit, if IFST pointed on entry to the second row of a + 2-by-2 block, it is changed to point to the first row; ILST + always points to the first row of the block in its final + position (which may differ from its input value by +1 or -1). + 1 <= IFST <= N; 1 <= ILST <= N. + + WORK (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + = 1: two adjacent blocks were too close to swap (the problem + is very ill-conditioned); T may have been partially + reordered, and ILST points to the first row of the + current position of the block being moved. + + ===================================================================== + + + Decode and test the input arguments. +*/ + + /* Parameter adjustments */ + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(compq, "V"); + if (! wantq && ! lsame_(compq, "N")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldt < max(1,*n)) { + *info = -4; + } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) { + *info = -6; + } else if (*ifst < 1 || *ifst > *n) { + *info = -7; + } else if (*ilst < 1 || *ilst > *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("STREXC", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + +/* + Determine the first row of specified block + and find out it is 1 by 1 or 2 by 2. +*/ + + if (*ifst > 1) { + if (t[*ifst + (*ifst - 1) * t_dim1] != 0.f) { + --(*ifst); + } + } + nbf = 1; + if (*ifst < *n) { + if (t[*ifst + 1 + *ifst * t_dim1] != 0.f) { + nbf = 2; + } + } + +/* + Determine the first row of the final block + and find out it is 1 by 1 or 2 by 2. +*/ + + if (*ilst > 1) { + if (t[*ilst + (*ilst - 1) * t_dim1] != 0.f) { + --(*ilst); + } + } + nbl = 1; + if (*ilst < *n) { + if (t[*ilst + 1 + *ilst * t_dim1] != 0.f) { + nbl = 2; + } + } + + if (*ifst == *ilst) { + return 0; + } + + if (*ifst < *ilst) { + +/* Update ILST */ + + if (nbf == 2 && nbl == 1) { + --(*ilst); + } + if (nbf == 1 && nbl == 2) { + ++(*ilst); + } + + here = *ifst; + +L10: + +/* Swap block with next one below */ + + if (nbf == 1 || nbf == 2) { + +/* Current block either 1 by 1 or 2 by 2 */ + + nbnext = 1; + if (here + nbf + 1 <= *n) { + if (t[here + nbf + 1 + (here + nbf) * t_dim1] != 0.f) { + nbnext = 2; + } + } + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, & + nbf, &nbnext, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += nbnext; + +/* Test if 2 by 2 block breaks into two 1 by 1 blocks */ + + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.f) { + nbf = 3; + } + } + + } else { + +/* + Current block consists of two 1 by 1 blocks each of which + must be swapped individually +*/ + + nbnext = 1; + if (here + 3 <= *n) { + if (t[here + 3 + (here + 2) * t_dim1] != 0.f) { + nbnext = 2; + } + } + i__1 = here + 1; + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & + c__1, &nbnext, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + +/* Swap two 1 by 1 blocks, no problems possible */ + + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &nbnext, &work[1], info); + ++here; + } else { + +/* Recompute NBNEXT in case 2 by 2 split */ + + if (t[here + 2 + (here + 1) * t_dim1] == 0.f) { + nbnext = 1; + } + if (nbnext == 2) { + +/* 2 by 2 Block did not split */ + + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &nbnext, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += 2; + } else { + +/* 2 by 2 Block did split */ + + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &c__1, &work[1], info); + i__1 = here + 1; + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + i__1, &c__1, &c__1, &work[1], info); + here += 2; + } + } + } + if (here < *ilst) { + goto L10; + } + + } else { + + here = *ifst; +L20: + +/* Swap block with next one above */ + + if (nbf == 1 || nbf == 2) { + +/* Current block either 1 by 1 or 2 by 2 */ + + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.f) { + nbnext = 2; + } + } + i__1 = here - nbnext; + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & + nbnext, &nbf, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here -= nbnext; + +/* Test if 2 by 2 block breaks into two 1 by 1 blocks */ + + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.f) { + nbf = 3; + } + } + + } else { + +/* + Current block consists of two 1 by 1 blocks each of which + must be swapped individually +*/ + + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.f) { + nbnext = 2; + } + } + i__1 = here - nbnext; + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & + nbnext, &c__1, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + +/* Swap two 1 by 1 blocks, no problems possible */ + + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &nbnext, &c__1, &work[1], info); + --here; + } else { + +/* Recompute NBNEXT in case 2 by 2 split */ + + if (t[here + (here - 1) * t_dim1] == 0.f) { + nbnext = 1; + } + if (nbnext == 2) { + +/* 2 by 2 Block did not split */ + + i__1 = here - 1; + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + i__1, &c__2, &c__1, &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += -2; + } else { + +/* 2 by 2 Block did split */ + + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + here, &c__1, &c__1, &work[1], info); + i__1 = here - 1; + slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & + i__1, &c__1, &c__1, &work[1], info); + here += -2; + } + } + } + if (here > *ilst) { + goto L20; + } + } + *ilst = here; + + return 0; + +/* End of STREXC */ + +} /* strexc_ */ + +/* Subroutine */ int strti2_(char *uplo, char *diag, integer *n, real *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer j; + static real ajj; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static logical upper; + extern /* Subroutine */ int strmv_(char *, char *, char *, integer *, + real *, integer *, real *, integer *), + xerbla_(char *, integer *); + static logical nounit; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + STRTI2 computes the inverse of a real upper or lower triangular + matrix. + + This is the Level 2 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading n by n upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("STRTI2", &i__1); + return 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + a[j + j * a_dim1] = 1.f / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.f; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + strmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & + a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + sscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Compute inverse of lower triangular matrix. */ + + for (j = *n; j >= 1; --j) { + if (nounit) { + a[j + j * a_dim1] = 1.f / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.f; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + strmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + + 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); + i__1 = *n - j; + sscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + + return 0; + +/* End of STRTI2 */ + +} /* strti2_ */ + +/* Subroutine */ int strtri_(char *uplo, char *diag, integer *n, real *a, + integer *lda, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5; + char ch__1[2]; + + /* Local variables */ + static integer j, jb, nb, nn; + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int strmm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), strsm_(char *, char *, char *, + char *, integer *, integer *, real *, real *, integer *, real *, + integer *), strti2_(char *, char * + , integer *, real *, integer *, integer *), + xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical nounit; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + STRTRI computes the inverse of a real upper or lower triangular + matrix A. + + This is the Level 3 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': A is upper triangular; + = 'L': A is lower triangular. + + DIAG (input) CHARACTER*1 + = 'N': A is non-unit triangular; + = 'U': A is unit triangular. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, A(i,i) is exactly zero. The triangular + matrix is singular and its inverse can not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("STRTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (a[*info + *info * a_dim1] == 0.f) { + return 0; + } +/* L10: */ + } + *info = 0; + } + +/* + Determine the block size for this environment. + + Writing concatenation +*/ + i__2[0] = 1, a__1[0] = uplo; + i__2[1] = 1, a__1[1] = diag; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "STRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + strti2_(uplo, diag, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute inverse of upper triangular matrix */ + + i__1 = *n; + i__3 = nb; + for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { +/* Computing MIN */ + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4,i__5); + +/* Compute rows 1:j-1 of current block column */ + + i__4 = j - 1; + strmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & + c_b15, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); + i__4 = j - 1; + strsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & + c_b151, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], + lda); + +/* Compute inverse of current diagonal block */ + + strti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L20: */ + } + } else { + +/* Compute inverse of lower triangular matrix */ + + nn = (*n - 1) / nb * nb + 1; + i__3 = -nb; + for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) { +/* Computing MIN */ + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1,i__4); + if (j + jb <= *n) { + +/* Compute rows j+jb:n of current block column */ + + i__1 = *n - j - jb + 1; + strmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, + &c_b15, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + + jb + j * a_dim1], lda); + i__1 = *n - j - jb + 1; + strsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, + &c_b151, &a[j + j * a_dim1], lda, &a[j + jb + j * + a_dim1], lda); + } + +/* Compute inverse of current diagonal block */ + + strti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L30: */ + } + } + } + + return 0; + +/* End of STRTRI */ + +} /* strtri_ */ + diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_s_lapack.f.patch b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_s_lapack.f.patch new file mode 100644 index 0000000000..2e82d986e6 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_s_lapack.f.patch @@ -0,0 +1,32 @@ +@@ -17359,5 +17359,6 @@ + ! Skip any trailing zeros. + DO LASTV = N, I+1, -1 +- IF( V( LASTV, I ).NE.ZERO ) EXIT ++ IF( V( LASTV, I ).NE.ZERO ) GO TO 15 + END DO ++ 15 CONTINUE + J = MIN( LASTV, PREVLASTV ) +@@ -17371,5 +17372,6 @@ + ! Skip any trailing zeros. + DO LASTV = N, I+1, -1 +- IF( V( I, LASTV ).NE.ZERO ) EXIT ++ IF( V( I, LASTV ).NE.ZERO ) GO TO 16 + END DO ++ 16 CONTINUE + J = MIN( LASTV, PREVLASTV ) +@@ -17415,5 +17417,6 @@ + ! Skip any leading zeros. + DO LASTV = 1, I-1 +- IF( V( LASTV, I ).NE.ZERO ) EXIT ++ IF( V( LASTV, I ).NE.ZERO ) GO TO 35 + END DO ++ 35 CONTINUE + J = MAX( LASTV, PREVLASTV ) +@@ -17431,5 +17434,6 @@ + ! Skip any leading zeros. + DO LASTV = N, I+1, -1 +- IF( V( I, LASTV ).NE.ZERO ) EXIT ++ IF( V( I, LASTV ).NE.ZERO ) GO TO 36 + END DO ++ 36 CONTINUE + J = MAX( LASTV, PREVLASTV ) diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_z_lapack.c b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_z_lapack.c new file mode 100644 index 0000000000..0f11f2e725 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_z_lapack.c @@ -0,0 +1,29996 @@ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif + + +/* Table of constant values */ + +static integer c__1 = 1; +static doublecomplex c_b56 = {0.,0.}; +static doublecomplex c_b57 = {1.,0.}; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__0 = 0; +static integer c__65 = 65; +static integer c__9 = 9; +static integer c__6 = 6; +static doublereal c_b328 = 0.; +static doublereal c_b1034 = 1.; +static integer c__12 = 12; +static integer c__49 = 49; +static doublereal c_b1276 = -1.; +static integer c__13 = 13; +static integer c__15 = 15; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_false = FALSE_; +static logical c_true = TRUE_; +static doublereal c_b2435 = .5; + +/* Subroutine */ int zgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *scale, integer *m, doublecomplex *v, + integer *ldv, integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + static integer i__, k; + static doublereal s; + static integer ii; + extern logical lsame_(char *, char *); + static logical leftv; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), xerbla_(char *, integer *), + zdscal_(integer *, doublereal *, doublecomplex *, integer *); + static logical rightv; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGEBAK forms the right or left eigenvectors of a complex general + matrix by backward transformation on the computed eigenvectors of the + balanced matrix output by ZGEBAL. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the type of backward transformation required: + = 'N', do nothing, return immediately; + = 'P', do backward transformation for permutation only; + = 'S', do backward transformation for scaling only; + = 'B', do backward transformations for both permutation and + scaling. + JOB must be the same as the argument JOB supplied to ZGEBAL. + + SIDE (input) CHARACTER*1 + = 'R': V contains right eigenvectors; + = 'L': V contains left eigenvectors. + + N (input) INTEGER + The number of rows of the matrix V. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + The integers ILO and IHI determined by ZGEBAL. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + SCALE (input) DOUBLE PRECISION array, dimension (N) + Details of the permutation and scaling factors, as returned + by ZGEBAL. + + M (input) INTEGER + The number of columns of the matrix V. M >= 0. + + V (input/output) COMPLEX*16 array, dimension (LDV,M) + On entry, the matrix of right or left eigenvectors to be + transformed, as returned by ZHSEIN or ZTREVC. + On exit, V is overwritten by the transformed eigenvectors. + + LDV (input) INTEGER + The leading dimension of the array V. LDV >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Decode and Test the input parameters +*/ + + /* Parameter adjustments */ + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEBAK", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + zdscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1. / scale[i__]; + zdscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + + } + +/* + Backward permutation + + For I = ILO-1 step -1 until 1, + IHI+1 step 1 until N do -- +*/ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L40; + } + zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + } + + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L50; + } + zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L50: + ; + } + } + } + + return 0; + +/* End of ZGEBAK */ + +} /* zgebak_ */ + +/* Subroutine */ int zgebal_(char *job, integer *n, doublecomplex *a, integer + *lda, integer *ilo, integer *ihi, doublereal *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal c__, f, g; + static integer i__, j, k, l, m; + static doublereal r__, s, ca, ra; + static integer ica, ira, iexc; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static doublereal sfmin1, sfmin2, sfmax1, sfmax2; + + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + extern integer izamax_(integer *, doublecomplex *, integer *); + static logical noconv; + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + ZGEBAL balances a general complex matrix A. This involves, first, + permuting A by a similarity transformation to isolate eigenvalues + in the first 1 to ILO-1 and last IHI+1 to N elements on the + diagonal; and second, applying a diagonal similarity transformation + to rows and columns ILO to IHI to make the rows and columns as + close in norm as possible. Both steps are optional. + + Balancing may reduce the 1-norm of the matrix, and improve the + accuracy of the computed eigenvalues and/or eigenvectors. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the operations to be performed on A: + = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 + for i = 1,...,N; + = 'P': permute only; + = 'S': scale only; + = 'B': both permute and scale. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the input matrix A. + On exit, A is overwritten by the balanced matrix. + If JOB = 'N', A is not referenced. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + ILO (output) INTEGER + IHI (output) INTEGER + ILO and IHI are set to integers such that on exit + A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. + If JOB = 'N' or 'S', ILO = 1 and IHI = N. + + SCALE (output) DOUBLE PRECISION array, dimension (N) + Details of the permutations and scaling factors applied to + A. If P(j) is the index of the row and column interchanged + with row and column j and D(j) is the scaling factor + applied to row and column j, then + SCALE(j) = P(j) for j = 1,...,ILO-1 + = D(j) for j = ILO,...,IHI + = P(j) for j = IHI+1,...,N. + The order in which the interchanges are made is N to IHI+1, + then 1 to ILO-1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The permutations consist of row and column interchanges which put + the matrix in the form + + ( T1 X Y ) + P A P = ( 0 B Z ) + ( 0 0 T2 ) + + where T1 and T2 are upper triangular matrices whose eigenvalues lie + along the diagonal. The column indices ILO and IHI mark the starting + and ending columns of the submatrix B. Balancing consists of applying + a diagonal similarity transformation inv(D) * B * D to make the + 1-norms of each row of B and its corresponding column nearly equal. + The output matrix is + + ( T1 X*D Y ) + ( 0 inv(D)*B*D inv(D)*Z ). + ( 0 0 T2 ) + + Information about the permutations P and the diagonal matrix D is + returned in the vector SCALE. + + This subroutine is based on the EISPACK routine CBAL. + + Modified by Tzu-Yi Chen, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEBAL", &i__1); + return 0; + } + + k = 1; + l = *n; + + if (*n == 0) { + goto L210; + } + + if (lsame_(job, "N")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L10: */ + } + goto L210; + } + + if (lsame_(job, "S")) { + goto L120; + } + +/* Permutation to isolate eigenvalues if possible */ + + goto L50; + +/* Row and column exchange. */ + +L20: + scale[m] = (doublereal) j; + if (j == m) { + goto L30; + } + + zswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + zswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); + +L30: + switch (iexc) { + case 1: goto L40; + case 2: goto L80; + } + +/* Search for rows isolating an eigenvalue and push them down. */ + +L40: + if (l == 1) { + goto L210; + } + --l; + +L50: + for (j = l; j >= 1; --j) { + + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + i__2 = j + i__ * a_dim1; + if (a[i__2].r != 0. || d_imag(&a[j + i__ * a_dim1]) != 0.) { + goto L70; + } +L60: + ; + } + + m = l; + iexc = 1; + goto L20; +L70: + ; + } + + goto L90; + +/* Search for columns isolating an eigenvalue and push them left. */ + +L80: + ++k; + +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + i__3 = i__ + j * a_dim1; + if (a[i__3].r != 0. || d_imag(&a[i__ + j * a_dim1]) != 0.) { + goto L110; + } +L100: + ; + } + + m = k; + iexc = 2; + goto L20; +L110: + ; + } + +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L130: */ + } + + if (lsame_(job, "P")) { + goto L210; + } + +/* + Balance the submatrix in rows K to L. + + Iterative loop for norm reduction +*/ + + sfmin1 = SAFEMINIMUM / PRECISION; + sfmax1 = 1. / sfmin1; + sfmin2 = sfmin1 * 2.; + sfmax2 = 1. / sfmin2; +L140: + noconv = FALSE_; + + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + c__ = 0.; + r__ = 0.; + + i__2 = l; + for (j = k; j <= i__2; ++j) { + if (j == i__) { + goto L150; + } + i__3 = j + i__ * a_dim1; + c__ += (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + i__ * + a_dim1]), abs(d__2)); + i__3 = i__ + j * a_dim1; + r__ += (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + j * + a_dim1]), abs(d__2)); +L150: + ; + } + ica = izamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = z_abs(&a[ica + i__ * a_dim1]); + i__2 = *n - k + 1; + ira = izamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = z_abs(&a[i__ + (ira + k - 1) * a_dim1]); + +/* Guard against zero C or R due to underflow. */ + + if (c__ == 0. || r__ == 0.) { + goto L200; + } + g = r__ / 2.; + f = 1.; + s = c__ + r__; +L160: +/* Computing MAX */ + d__1 = max(f,c__); +/* Computing MIN */ + d__2 = min(r__,g); + if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) { + goto L170; + } + d__1 = c__ + f + ca + r__ + g + ra; + if (disnan_(&d__1)) { + +/* Exit if NaN to avoid infinite loop */ + + *info = -3; + i__2 = -(*info); + xerbla_("ZGEBAL", &i__2); + return 0; + } + f *= 2.; + c__ *= 2.; + ca *= 2.; + r__ /= 2.; + g /= 2.; + ra /= 2.; + goto L160; + +L170: + g = c__ / 2.; +L180: +/* Computing MIN */ + d__1 = min(f,c__), d__1 = min(d__1,g); + if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) { + goto L190; + } + f /= 2.; + c__ /= 2.; + g /= 2.; + ca /= 2.; + r__ *= 2.; + ra *= 2.; + goto L180; + +/* Now balance. */ + +L190: + if (c__ + r__ >= s * .95) { + goto L200; + } + if (f < 1. && scale[i__] < 1.) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1. && scale[i__] > 1.) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1. / f; + scale[i__] *= f; + noconv = TRUE_; + + i__2 = *n - k + 1; + zdscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + zdscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + +L200: + ; + } + + if (noconv) { + goto L140; + } + +L210: + *ilo = k; + *ihi = l; + + return 0; + +/* End of ZGEBAL */ + +} /* zgebal_ */ + +/* Subroutine */ int zgebd2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, + doublecomplex *taup, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__; + static doublecomplex alpha; + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, + integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGEBD2 reduces a complex general m by n matrix A to upper or lower + real bidiagonal form B by a unitary transformation: Q' * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the unitary matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the unitary matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) COMPLEX*16 array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + WORK (workspace) COMPLEX*16 array, dimension (max(M,N)) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in + A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in + A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, v and u are complex vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("ZGEBD2", &i__1); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & + tauq[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Apply H(i)' to A(i:m,i+1:n) from the left */ + + if (i__ < *n) { + i__2 = *m - i__ + 1; + i__3 = *n - i__; + d_cnjg(&z__1, &tauq[i__]); + zlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, & + z__1, &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + } + i__2 = i__ + i__ * a_dim1; + i__3 = i__; + a[i__2].r = d__[i__3], a[i__2].i = 0.; + + if (i__ < *n) { + +/* + Generate elementary reflector G(i) to annihilate + A(i,i+2:n) +*/ + + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + (i__ + 1) * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + zlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + (i__ + 1) * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.; + } else { + i__2 = i__; + taup[i__2].r = 0., taup[i__2].i = 0.; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + if (i__ < *m) { + i__2 = *m - i__; + i__3 = *n - i__ + 1; + zlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, & + taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__; + a[i__2].r = d__[i__3], a[i__2].i = 0.; + + if (i__ < *m) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:m,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, + &tauq[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Apply H(i)' to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + d_cnjg(&z__1, &tauq[i__]); + zlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &z__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, & + work[1]); + i__2 = i__ + 1 + i__ * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.; + } else { + i__2 = i__; + tauq[i__2].r = 0., tauq[i__2].i = 0.; + } +/* L20: */ + } + } + return 0; + +/* End of ZGEBD2 */ + +} /* zgebd2_ */ + +/* Subroutine */ int zgebrd_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, + doublecomplex *taup, doublecomplex *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, nb, nx; + static doublereal ws; + static integer nbmin, iinfo, minmn; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zgebd2_(integer *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *), + xerbla_(char *, integer *), zlabrd_(integer *, integer *, + integer *, doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwrkx, ldwrky, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGEBRD reduces a general complex M-by-N matrix A to upper or lower + bidiagonal form B by a unitary transformation: Q**H * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the unitary matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the unitary matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) COMPLEX*16 array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,M,N). + For optimum performance LWORK >= (M+N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in + A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in + A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "ZGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = max(i__1,i__2); + lwkopt = (*m + *n) * nb; + d__1 = (doublereal) lwkopt; + work[1].r = d__1, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*lwork < max(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("ZGEBRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = min(*m,*n); + if (minmn == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + ws = (doublereal) max(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* + Set the crossover point NX. + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "ZGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (doublereal) ((*m + *n) * nb); + if ((doublereal) (*lwork) < ws) { + +/* + Not enough work space for the optimal NB, consider using + a smaller block size. +*/ + + nbmin = ilaenv_(&c__2, "ZGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* + Reduce rows and columns i:i+ib-1 to bidiagonal form and return + the matrices X and Y which are needed to update the unreduced + part of the matrix +*/ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + zlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* + Update the trailing submatrix A(i+ib:m,i+ib:n), using + an update of the form A := A - V*Y' - X*U' +*/ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, & + z__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + + nb + 1], &ldwrky, &c_b57, &a[i__ + nb + (i__ + nb) * a_dim1], + lda); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &z__1, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b57, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + j * a_dim1; + i__5 = j; + a[i__4].r = d__[i__5], a[i__4].i = 0.; + i__4 = j + (j + 1) * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + j * a_dim1; + i__5 = j; + a[i__4].r = d__[i__5], a[i__4].i = 0.; + i__4 = j + 1 + j * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + zgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1].r = ws, work[1].i = 0.; + return 0; + +/* End of ZGEBRD */ + +} /* zgebrd_ */ + +/* Subroutine */ int zgeev_(char *jobvl, char *jobvr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl, + integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + doublereal d__1, d__2; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, k, ihi; + static doublereal scl; + static integer ilo; + static doublereal dum[1], eps; + static doublecomplex tmp; + static integer ibal; + static char side[1]; + static doublereal anrm; + static integer ierr, itau, iwrk, nout; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); + extern doublereal dznrm2_(integer *, doublecomplex *, integer *); + static logical scalea; + + static doublereal cscale; + extern /* Subroutine */ int zgebak_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublecomplex *, integer *, + integer *), zgebal_(char *, integer *, + doublecomplex *, integer *, integer *, integer *, doublereal *, + integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical select[1]; + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *); + static doublereal bignum; + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zgehrd_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *), zlacpy_(char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *); + static integer minwrk, maxwrk; + static logical wantvl; + static doublereal smlnum; + static integer hswork, irwork; + extern /* Subroutine */ int zhseqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), ztrevc_(char *, char *, logical *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, integer *, doublecomplex *, + doublereal *, integer *); + static logical lquery, wantvr; + extern /* Subroutine */ int zunghr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the + eigenvalues and, optionally, the left and/or right eigenvectors. + + The right eigenvector v(j) of A satisfies + A * v(j) = lambda(j) * v(j) + where lambda(j) is its eigenvalue. + The left eigenvector u(j) of A satisfies + u(j)**H * A = lambda(j) * u(j)**H + where u(j)**H denotes the conjugate transpose of u(j). + + The computed eigenvectors are normalized to have Euclidean norm + equal to 1 and largest component real. + + Arguments + ========= + + JOBVL (input) CHARACTER*1 + = 'N': left eigenvectors of A are not computed; + = 'V': left eigenvectors of are computed. + + JOBVR (input) CHARACTER*1 + = 'N': right eigenvectors of A are not computed; + = 'V': right eigenvectors of A are computed. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the N-by-N matrix A. + On exit, A has been overwritten. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) COMPLEX*16 array, dimension (N) + W contains the computed eigenvalues. + + VL (output) COMPLEX*16 array, dimension (LDVL,N) + If JOBVL = 'V', the left eigenvectors u(j) are stored one + after another in the columns of VL, in the same order + as their eigenvalues. + If JOBVL = 'N', VL is not referenced. + u(j) = VL(:,j), the j-th column of VL. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1; if + JOBVL = 'V', LDVL >= N. + + VR (output) COMPLEX*16 array, dimension (LDVR,N) + If JOBVR = 'V', the right eigenvectors v(j) are stored one + after another in the columns of VR, in the same order + as their eigenvalues. + If JOBVR = 'N', VR is not referenced. + v(j) = VR(:,j), the j-th column of VR. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1; if + JOBVR = 'V', LDVR >= N. + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,2*N). + For good performance, LWORK must generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = i, the QR algorithm failed to compute all the + eigenvalues, and no eigenvectors have been computed; + elements and i+1:N of W contain eigenvalues which have + converged. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -1; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + CWorkspace refers to complex workspace, and RWorkspace to real + workspace. NB refers to the optimal block size for the + immediately following subroutine, as returned by ILAENV. + HSWORK refers to the workspace preferred by ZHSEQR, as + calculated below. HSWORK is computed assuming ILO=1 and IHI=N, + the worst case.) +*/ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = *n + *n * ilaenv_(&c__1, "ZGEHRD", " ", n, &c__1, n, & + c__0, (ftnlen)6, (ftnlen)1); + minwrk = *n << 1; + if (wantvl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "ZUNGHR", + " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); + zhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &w[1], &vl[ + vl_offset], ldvl, &work[1], &c_n1, info); + } else if (wantvr) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "ZUNGHR", + " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); + zhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[1], &c_n1, info); + } else { + zhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[1], &c_n1, info); + } + hswork = (integer) work[1].r; +/* Computing MAX */ + i__1 = max(maxwrk,hswork); + maxwrk = max(i__1,minwrk); + } + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEEV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = PRECISION; + smlnum = SAFEMINIMUM; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + zlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* + Balance the matrix + (CWorkspace: none) + (RWorkspace: need N) +*/ + + ibal = 1; + zgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &rwork[ibal], &ierr); + +/* + Reduce to upper Hessenberg form + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: none) +*/ + + itau = 1; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + zgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvl) { + +/* + Want left eigenvectors + Copy Householder vectors to VL +*/ + + *(unsigned char *)side = 'L'; + zlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* + Generate unitary matrix in VL + (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) + (RWorkspace: none) +*/ + + i__1 = *lwork - iwrk + 1; + zunghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VL + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + zhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vl[ + vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* + Want left and right eigenvectors + Copy Schur vectors to VR +*/ + + *(unsigned char *)side = 'B'; + zlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* + Want right eigenvectors + Copy Householder vectors to VR +*/ + + *(unsigned char *)side = 'R'; + zlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* + Generate unitary matrix in VR + (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) + (RWorkspace: none) +*/ + + i__1 = *lwork - iwrk + 1; + zunghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VR + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + zhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* + Compute eigenvalues only + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + zhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO > 0 from ZHSEQR, then quit */ + + if (*info > 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* + Compute left and/or right eigenvectors + (CWorkspace: need 2*N) + (RWorkspace: need 2*N) +*/ + + irwork = ibal + *n; + ztrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &rwork[irwork], + &ierr); + } + + if (wantvl) { + +/* + Undo balancing of left eigenvectors + (CWorkspace: none) + (RWorkspace: need N) +*/ + + zgebak_("B", "L", n, &ilo, &ihi, &rwork[ibal], n, &vl[vl_offset], + ldvl, &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scl = 1. / dznrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + zdscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * vl_dim1; +/* Computing 2nd power */ + d__1 = vl[i__3].r; +/* Computing 2nd power */ + d__2 = d_imag(&vl[k + i__ * vl_dim1]); + rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L10: */ + } + k = idamax_(n, &rwork[irwork], &c__1); + d_cnjg(&z__2, &vl[k + i__ * vl_dim1]); + d__1 = sqrt(rwork[irwork + k - 1]); + z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1; + tmp.r = z__1.r, tmp.i = z__1.i; + zscal_(n, &tmp, &vl[i__ * vl_dim1 + 1], &c__1); + i__2 = k + i__ * vl_dim1; + i__3 = k + i__ * vl_dim1; + d__1 = vl[i__3].r; + z__1.r = d__1, z__1.i = 0.; + vl[i__2].r = z__1.r, vl[i__2].i = z__1.i; +/* L20: */ + } + } + + if (wantvr) { + +/* + Undo balancing of right eigenvectors + (CWorkspace: none) + (RWorkspace: need N) +*/ + + zgebak_("B", "R", n, &ilo, &ihi, &rwork[ibal], n, &vr[vr_offset], + ldvr, &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scl = 1. / dznrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + zdscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * vr_dim1; +/* Computing 2nd power */ + d__1 = vr[i__3].r; +/* Computing 2nd power */ + d__2 = d_imag(&vr[k + i__ * vr_dim1]); + rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L30: */ + } + k = idamax_(n, &rwork[irwork], &c__1); + d_cnjg(&z__2, &vr[k + i__ * vr_dim1]); + d__1 = sqrt(rwork[irwork + k - 1]); + z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1; + tmp.r = z__1.r, tmp.i = z__1.i; + zscal_(n, &tmp, &vr[i__ * vr_dim1 + 1], &c__1); + i__2 = k + i__ * vr_dim1; + i__3 = k + i__ * vr_dim1; + d__1 = vr[i__3].r; + z__1.r = d__1, z__1.i = 0.; + vr[i__2].r = z__1.r, vr[i__2].i = z__1.i; +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[*info + 1] + , &i__2, &ierr); + if (*info > 0) { + i__1 = ilo - 1; + zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[1], n, + &ierr); + } + } + + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + return 0; + +/* End of ZGEEV */ + +} /* zgeev_ */ + +/* Subroutine */ int zgehd2_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__; + static doublecomplex alpha; + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGEHD2 reduces a complex general matrix A to upper Hessenberg form H + by a unitary similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to ZGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= max(1,N). + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the n by n general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the unitary matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX*16 array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX*16 array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEHD2", &i__1); + return 0; + } + + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + +/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *ihi - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[ + i__]); + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ + + i__2 = *ihi - i__; + zlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); + +/* Apply H(i)' to A(i+1:ihi,i+1:n) from the left */ + + i__2 = *ihi - i__; + i__3 = *n - i__; + d_cnjg(&z__1, &tau[i__]); + zlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &z__1, + &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); + + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; +/* L10: */ + } + + return 0; + +/* End of ZGEHD2 */ + +} /* zgehd2_ */ + +/* Subroutine */ int zgehrd_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j; + static doublecomplex t[4160] /* was [65][64] */; + static integer ib; + static doublecomplex ei; + static integer nb, nh, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer * + , doublecomplex *, integer *), + zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), zgehd2_(integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *), zlahr2_(integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_( + char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2.1) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + -- April 2009 -- + + + Purpose + ======= + + ZGEHRD reduces a complex general matrix A to upper Hessenberg form H by + an unitary similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to ZGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the N-by-N general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the unitary matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX*16 array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to + zero. + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + This file is a slight modification of LAPACK-3.0's DGEHRD + subroutine incorporating improvements proposed by Quintana-Orti and + Van de Geijn (2006). (See DLAHR2.) + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "ZGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + lwkopt = *n * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEHRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + tau[i__2].r = 0., tau[i__2].i = 0.; +/* L10: */ + } + i__1 = *n - 1; + for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { + i__2 = i__; + tau[i__2].r = 0., tau[i__2].i = 0.; +/* L20: */ + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + +/* + Determine the block size + + Computing MIN +*/ + i__1 = 64, i__2 = ilaenv_(&c__1, "ZGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + nbmin = 2; + iws = 1; + if (nb > 1 && nb < nh) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code) + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "ZGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code */ + + iws = *n * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code + + Computing MAX +*/ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZGEHRD", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + if (*lwork >= *n * nbmin) { + nb = *lwork / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + i__ = *ilo; + + } else { + +/* Use blocked code */ + + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3,i__4); + +/* + Reduce columns i:i+ib-1 to Hessenberg form, returning the + matrices V and T of the block reflector H = I - V*T*V' + which performs the reduction, and also the matrix Y = A*V*T +*/ + + zlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & + c__65, &work[1], &ldwork); + +/* + Apply the block reflector H to A(1:ihi,i+ib:ihi) from the + right, computing A := A - Y * V'. V(i+ib,ib-1) must be set + to 1 +*/ + + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + ei.r = a[i__3].r, ei.i = a[i__3].i; + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + a[i__3].r = 1., a[i__3].i = 0.; + i__3 = *ihi - i__ - ib + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, & + z__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, + &c_b57, &a[(i__ + ib) * a_dim1 + 1], lda); + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + a[i__3].r = ei.r, a[i__3].i = ei.i; + +/* + Apply the block reflector H to A(1:i,i+1:i+ib-1) from the + right +*/ + + i__3 = ib - 1; + ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", &i__, & + i__3, &c_b57, &a[i__ + 1 + i__ * a_dim1], lda, &work[1], & + ldwork); + i__3 = ib - 2; + for (j = 0; j <= i__3; ++j) { + z__1.r = -1., z__1.i = -0.; + zaxpy_(&i__, &z__1, &work[ldwork * j + 1], &c__1, &a[(i__ + j + + 1) * a_dim1 + 1], &c__1); +/* L30: */ + } + +/* + Apply the block reflector H to A(i+1:ihi,i+ib:n) from the + left +*/ + + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + zlarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", & + i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, & + c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], & + ldwork); +/* L40: */ + } + } + +/* Use unblocked code to reduce the rest of the matrix */ + + zgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1].r = (doublereal) iws, work[1].i = 0.; + + return 0; + +/* End of ZGEHRD */ + +} /* zgehrd_ */ + +/* Subroutine */ int zgelq2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static doublecomplex alpha; + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, + integer *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + ZGELQ2 computes an LQ factorization of a complex m by n matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and below the diagonal of the array + contain the m by min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX*16 array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + A(i,i+1:n), and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGELQ2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &tau[i__] + ); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + zlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__2 = i__ + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); +/* L10: */ + } + return 0; + +/* End of ZGELQ2 */ + +} /* zgelq2_ */ + +/* Subroutine */ int zgelqf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zgelq2_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), xerbla_( + char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGELQF computes an LQ factorization of a complex M-by-N matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and below the diagonal of the array + contain the m-by-min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + A(i,i+1:n), and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "ZGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *m * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGELQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "ZGELQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZGELQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the LQ factorization of the current block + A(i:i+ib-1,i:n) +*/ + + i__3 = *n - i__ + 1; + zgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *n - i__ + 1; + zlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + zlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + zgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZGELQF */ + +} /* zgelqf_ */ + +/* Subroutine */ int zgelsd_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer ie, il, mm; + static doublereal eps, anrm, bnrm; + static integer itau, nlvl, iascl, ibscl; + static doublereal sfmin; + static integer minmn, maxmn, itaup, itauq, mnthr, nwork; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *), zgebrd_(integer *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + static doublereal bignum; + extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zlalsd_(char *, integer *, integer *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *, + doublecomplex *, doublereal *, integer *, integer *), + zlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublecomplex *, integer *, integer *), zgeqrf_(integer *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *); + static integer ldwork; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *); + static integer liwork, minwrk, maxwrk; + static doublereal smlnum; + extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ); + static integer lrwork; + static logical lquery; + static integer nrwork, smlsiz; + extern /* Subroutine */ int zunmlq_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGELSD computes the minimum-norm solution to a real linear least + squares problem: + minimize 2-norm(| b - A*x |) + using the singular value decomposition (SVD) of A. A is an M-by-N + matrix which may be rank-deficient. + + Several right hand side vectors b and solution vectors x can be + handled in a single call; they are stored as the columns of the + M-by-NRHS right hand side matrix B and the N-by-NRHS solution + matrix X. + + The problem is solved in three steps: + (1) Reduce the coefficient matrix A to bidiagonal form with + Householder tranformations, reducing the original problem + into a "bidiagonal least squares problem" (BLS) + (2) Solve the BLS using a divide and conquer approach. + (3) Apply back all the Householder tranformations to solve + the original least squares problem. + + The effective rank of A is determined by treating as zero those + singular values which are less than RCOND times the largest singular + value. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrices B and X. NRHS >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, A has been destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) + On entry, the M-by-NRHS right hand side matrix B. + On exit, B is overwritten by the N-by-NRHS solution matrix X. + If m >= n and RANK = n, the residual sum-of-squares for + the solution in the i-th column is given by the sum of + squares of the modulus of elements n+1:m in that column. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M,N). + + S (output) DOUBLE PRECISION array, dimension (min(M,N)) + The singular values of A in decreasing order. + The condition number of A in the 2-norm = S(1)/S(min(m,n)). + + RCOND (input) DOUBLE PRECISION + RCOND is used to determine the effective rank of A. + Singular values S(i) <= RCOND*S(1) are treated as zero. + If RCOND < 0, machine precision is used instead. + + RANK (output) INTEGER + The effective rank of A, i.e., the number of singular values + which are greater than RCOND*S(1). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK must be at least 1. + The exact minimum amount of workspace needed depends on M, + N and NRHS. As long as LWORK is at least + 2*N + N*NRHS + if M is greater than or equal to N or + 2*M + M*NRHS + if M is less than N, the code will execute correctly. + For good performance, LWORK should generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the array WORK and the + minimum sizes of the arrays RWORK and IWORK, and returns + these values as the first entries of the WORK, RWORK and + IWORK arrays, and no error message related to LWORK is issued + by XERBLA. + + RWORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) + LRWORK >= + 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + + MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) + if M is greater than or equal to N or + 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + + MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) + if M is less than N, the code will execute correctly. + SMLSIZ is returned by ILAENV and is equal to the maximum + size of the subproblems at the bottom of the computation + tree (usually about 25), and + NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) + On exit, if INFO = 0, RWORK(1) returns the minimum LRWORK. + + IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) + LIWORK >= max(1, 3*MINMN*NLVL + 11*MINMN), + where MINMN = MIN( M,N ). + On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: the algorithm for computing the SVD failed to converge; + if INFO = i, i off-diagonal elements of an intermediate + bidiagonal form did not converge to zero. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input arguments. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --s; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + maxmn = max(*m,*n); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,maxmn)) { + *info = -7; + } + +/* + Compute workspace. + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV.) +*/ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + liwork = 1; + lrwork = 1; + if (minmn > 0) { + smlsiz = ilaenv_(&c__9, "ZGELSD", " ", &c__0, &c__0, &c__0, &c__0, + (ftnlen)6, (ftnlen)1); + mnthr = ilaenv_(&c__6, "ZGELSD", " ", m, n, nrhs, &c_n1, (ftnlen) + 6, (ftnlen)1); +/* Computing MAX */ + i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + + 1)) / log(2.)) + 1; + nlvl = max(i__1,0); + liwork = minmn * 3 * nlvl + minmn * 11; + mm = *m; + if (*m >= *n && *m >= mnthr) { + +/* + Path 1a - overdetermined, with many more rows than + columns. +*/ + + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, + &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *nrhs * ilaenv_(&c__1, "ZUNMQR", "LC", + m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); + } + if (*m >= *n) { + +/* + Path 1 - overdetermined or exactly determined. + + Computing MAX + Computing 2nd power +*/ + i__3 = smlsiz + 1; + i__1 = i__3 * i__3, i__2 = *n * (*nrhs + 1) + (*nrhs << 1); + lrwork = *n * 10 + (*n << 1) * smlsiz + (*n << 3) * nlvl + + smlsiz * 3 * *nrhs + max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (mm + *n) * ilaenv_(&c__1, + "ZGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *nrhs * ilaenv_(&c__1, + "ZUNMBR", "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "ZUNMBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * *nrhs; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = (*n << 1) + mm, i__2 = (*n << 1) + *n * *nrhs; + minwrk = max(i__1,i__2); + } + if (*n > *m) { +/* + Computing MAX + Computing 2nd power +*/ + i__3 = smlsiz + 1; + i__1 = i__3 * i__3, i__2 = *n * (*nrhs + 1) + (*nrhs << 1); + lrwork = *m * 10 + (*m << 1) * smlsiz + (*m << 3) * nlvl + + smlsiz * 3 * *nrhs + max(i__1,i__2); + if (*n >= mnthr) { + +/* + Path 2a - underdetermined, with many more columns + than rows. +*/ + + maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * + ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * + ilaenv_(&c__1, "ZUNMBR", "QLC", m, nrhs, m, &c_n1, + (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * + ilaenv_(&c__1, "ZUNMLQ", "LC", n, nrhs, m, &c_n1, + (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *m * *nrhs; + maxwrk = max(i__1,i__2); +/* + XXX: Ensure the Path 2a case below is triggered. The workspace + calculation should use queries for all routines eventually. + Computing MAX + Computing MAX +*/ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), + i__3 = max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3,i__4) + ; + maxwrk = max(i__1,i__2); + } else { + +/* Path 2 - underdetermined. */ + + maxwrk = (*m << 1) + (*n + *m) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *nrhs * ilaenv_(&c__1, + "ZUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "PLN", n, nrhs, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * *nrhs; + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = (*m << 1) + *n, i__2 = (*m << 1) + *m * *nrhs; + minwrk = max(i__1,i__2); + } + } + minwrk = min(minwrk,maxwrk); + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + iwork[1] = liwork; + rwork[1] = (doublereal) lrwork; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGELSD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters. */ + + eps = PRECISION; + sfmin = SAFEMINIMUM; + smlnum = sfmin / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ + + anrm = zlange_("M", m, n, &a[a_offset], lda, &rwork[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + zlaset_("F", &i__1, nrhs, &c_b56, &c_b56, &b[b_offset], ldb); + dlaset_("F", &minmn, &c__1, &c_b328, &c_b328, &s[1], &c__1) + ; + *rank = 0; + goto L10; + } + +/* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ + + bnrm = zlange_("M", m, nrhs, &b[b_offset], ldb, &rwork[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + zlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + zlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* If M < N make sure B(M+1:N,:) = 0 */ + + if (*m < *n) { + i__1 = *n - *m; + zlaset_("F", &i__1, nrhs, &c_b56, &c_b56, &b[*m + 1 + b_dim1], ldb); + } + +/* Overdetermined case. */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns */ + + mm = *n; + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R. + (RWorkspace: need N) + (CWorkspace: need N, prefer N*NB) +*/ + + i__1 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + +/* + Multiply B by transpose(Q). + (RWorkspace: need N) + (CWorkspace: need NRHS, prefer NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + zunmqr_("L", "C", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below R. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &a[a_dim1 + 2], + lda); + } + } + + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + ie = 1; + nrwork = ie + *n; + +/* + Bidiagonalize R in A. + (RWorkspace: need N) + (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(&mm, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of R. + (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "C", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + zlalsd_("U", &smlsiz, n, nrhs, &s[1], &rwork[ie], &b[b_offset], ldb, + rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of R. */ + + i__1 = *lwork - nwork + 1; + zunmbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & + b[b_offset], ldb, &work[nwork], &i__1, info); + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( + i__1,*nrhs), i__2 = *n - *m * 3; + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) { + +/* + Path 2a - underdetermined, with many more columns than rows + and sufficient workspace for an efficient algorithm. +*/ + + ldwork = *m; +/* + Computing MAX + Computing MAX +*/ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + + *m + *m * *nrhs; + if (*lwork >= max(i__1,i__2)) { + ldwork = *lda; + } + itau = 1; + nwork = *m + 1; + +/* + Compute A=L*Q. + (CWorkspace: need 2*M, prefer M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + il = nwork; + +/* Copy L to WORK(IL), zeroing out above its diagonal. */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); + i__1 = *m - 1; + i__2 = *m - 1; + zlaset_("U", &i__1, &i__2, &c_b56, &c_b56, &work[il + ldwork], & + ldwork); + itauq = il + ldwork * *m; + itaup = itauq + *m; + nwork = itaup + *m; + ie = 1; + nrwork = ie + *m; + +/* + Bidiagonalize L in WORK(IL). + (RWorkspace: need M) + (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, m, &work[il], &ldwork, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of L. + (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "C", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + zlalsd_("U", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], + info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of L. */ + + i__1 = *lwork - nwork + 1; + zunmbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ + itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below first M rows of B. */ + + i__1 = *n - *m; + zlaset_("F", &i__1, nrhs, &c_b56, &c_b56, &b[*m + 1 + b_dim1], + ldb); + nwork = itau + *m; + +/* + Multiply transpose(Q) by B. + (CWorkspace: need NRHS, prefer NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + zunmlq_("L", "C", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + + } else { + +/* Path 2 - remaining underdetermined cases. */ + + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + ie = 1; + nrwork = ie + *m; + +/* + Bidiagonalize A. + (RWorkspace: need M) + (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors. + (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "C", m, nrhs, n, &a[a_offset], lda, &work[itauq] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + zlalsd_("L", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], + info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of A. */ + + i__1 = *lwork - nwork + 1; + zunmbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + + } + } + +/* Undo scaling. */ + + if (iascl == 1) { + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } else if (iascl == 2) { + zlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (ibscl == 1) { + zlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + zlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L10: + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + iwork[1] = liwork; + rwork[1] = (doublereal) lrwork; + return 0; + +/* End of ZGELSD */ + +} /* zgelsd_ */ + +/* Subroutine */ int zgeqr2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__, k; + static doublecomplex alpha; + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + ZGEQR2 computes a QR factorization of a complex m by n matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(m,n) by n upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX*16 array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEQR2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] + , &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i)' to A(i:m,i+1:n) from the left */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + d_cnjg(&z__1, &tau[i__]); + zlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &z__1, + &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + i__2 = i__ + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; + } +/* L10: */ + } + return 0; + +/* End of ZGEQR2 */ + +} /* zgeqr2_ */ + +/* Subroutine */ int zgeqrf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zgeqr2_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), xerbla_( + char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGEQRF computes a QR factorization of a complex M-by-N matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(M,N)-by-N upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of min(m,n) elementary reflectors (see Further + Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "ZGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "ZGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the QR factorization of the current block + A(i:m,i:i+ib-1) +*/ + + i__3 = *m - i__ + 1; + zgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *m - i__ + 1; + zlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + zlarfb_("Left", "Conjugate transpose", "Forward", "Columnwise" + , &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, & + work[1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, + &work[ib + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + zgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZGEQRF */ + +} /* zgeqrf_ */ + +/* Subroutine */ int zgesdd_(char *jobz, integer *m, integer *n, + doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, + integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Local variables */ + static integer i__, ie, il, ir, iu, blk; + static doublereal dum[1], eps; + static integer iru, ivt, iscl; + static doublereal anrm; + static integer idum[1], ierr, itau, irvt; + extern logical lsame_(char *, char *); + static integer chunk, minmn; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer wrkbl, itaup, itauq; + static logical wntqa; + static integer nwork; + static logical wntqn, wntqo, wntqs; + extern /* Subroutine */ int zlacp2_(char *, integer *, integer *, + doublereal *, integer *, doublecomplex *, integer *); + static integer mnthr1, mnthr2; + extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal + *, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), xerbla_(char *, integer *), + zgebrd_(integer *, integer *, doublecomplex *, integer *, + doublereal *, doublereal *, doublecomplex *, doublecomplex *, + doublecomplex *, integer *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static doublereal bignum; + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zlacrm_(integer *, integer *, doublecomplex *, integer *, + doublereal *, integer *, doublecomplex *, integer *, doublereal *) + , zlarcm_(integer *, integer *, doublereal *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ); + static integer ldwrkl; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *); + static integer ldwrkr, minwrk, ldwrku, maxwrk; + extern /* Subroutine */ int zungbr_(char *, integer *, integer *, integer + *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + static integer ldwkvt; + static doublereal smlnum; + static logical wntqas; + extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ), zunglq_(integer *, integer *, integer * + , doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + static integer nrwork; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + 8-15-00: Improve consistency of WS calculations (eca) + + + Purpose + ======= + + ZGESDD computes the singular value decomposition (SVD) of a complex + M-by-N matrix A, optionally computing the left and/or right singular + vectors, by using divide-and-conquer method. The SVD is written + + A = U * SIGMA * conjugate-transpose(V) + + where SIGMA is an M-by-N matrix which is zero except for its + min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + V is an N-by-N unitary matrix. The diagonal elements of SIGMA + are the singular values of A; they are real and non-negative, and + are returned in descending order. The first min(m,n) columns of + U and V are the left and right singular vectors of A. + + Note that the routine returns VT = V**H, not V. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + Specifies options for computing all or part of the matrix U: + = 'A': all M columns of U and all N rows of V**H are + returned in the arrays U and VT; + = 'S': the first min(M,N) columns of U and the first + min(M,N) rows of V**H are returned in the arrays U + and VT; + = 'O': If M >= N, the first N columns of U are overwritten + in the array A and all rows of V**H are returned in + the array VT; + otherwise, all columns of U are returned in the + array U and the first M rows of V**H are overwritten + in the array A; + = 'N': no columns of U or rows of V**H are computed. + + M (input) INTEGER + The number of rows of the input matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the input matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, + if JOBZ = 'O', A is overwritten with the first N columns + of U (the left singular vectors, stored + columnwise) if M >= N; + A is overwritten with the first M rows + of V**H (the right singular vectors, stored + rowwise) otherwise. + if JOBZ .ne. 'O', the contents of A are destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + S (output) DOUBLE PRECISION array, dimension (min(M,N)) + The singular values of A, sorted so that S(i) >= S(i+1). + + U (output) COMPLEX*16 array, dimension (LDU,UCOL) + UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; + UCOL = min(M,N) if JOBZ = 'S'. + If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M + unitary matrix U; + if JOBZ = 'S', U contains the first min(M,N) columns of U + (the left singular vectors, stored columnwise); + if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1; if + JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. + + VT (output) COMPLEX*16 array, dimension (LDVT,N) + If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the + N-by-N unitary matrix V**H; + if JOBZ = 'S', VT contains the first min(M,N) rows of + V**H (the right singular vectors, stored rowwise); + if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1; if + JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; + if JOBZ = 'S', LDVT >= min(M,N). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N). + if JOBZ = 'O', + LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + if JOBZ = 'S' or 'A', + LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + For good performance, LWORK should generally be larger. + + If LWORK = -1, a workspace query is assumed. The optimal + size for the WORK array is calculated and stored in WORK(1), + and no other work except argument checking is performed. + + RWORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) + If JOBZ = 'N', LRWORK >= 5*min(M,N). + Otherwise, + LRWORK >= min(M,N)*max(5*min(M,N)+7,2*max(M,N)+2*min(M,N)+1) + + IWORK (workspace) INTEGER array, dimension (8*min(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The updating process of DBDSDC did not converge. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + mnthr1 = (integer) (minmn * 17. / 9.); + mnthr2 = (integer) (minmn * 5. / 3.); + wntqa = lsame_(jobz, "A"); + wntqs = lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, "O"); + wntqn = lsame_(jobz, "N"); + minwrk = 1; + maxwrk = 1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + CWorkspace refers to complex workspace, and RWorkspace to + real workspace. NB refers to the optimal block size for the + immediately following subroutine, as returned by ILAENV.) +*/ + + if (*info == 0 && *m > 0 && *n > 0) { + if (*m >= *n) { + +/* + There is no complex work space needed for bidiagonal SVD + The real work space needed for bidiagonal SVD is BDSPAC + for computing singular values and singular vectors; BDSPAN + for computing singular values only. + BDSPAC = 5*N*N + 7*N + BDSPAN = MAX(7*N+4, 3*N+2+SMLSIZ*(SMLSIZ+8)) +*/ + + if (*m >= mnthr1) { + if (wntqn) { + +/* Path 1 (M much larger than N, JOBZ='N') */ + + maxwrk = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + maxwrk = max(i__1,i__2); + minwrk = *n * 3; + } else if (wntqo) { + +/* Path 2 (M much larger than N, JOBZ='O') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "ZUNGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *n + *n * *n + wrkbl; + minwrk = (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M much larger than N, JOBZ='S') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "ZUNGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *n * *n + wrkbl; + minwrk = *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M much larger than N, JOBZ='A') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "ZUNGQR", + " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *n * *n + wrkbl; + minwrk = *n * *n + (*n << 1) + *m; + } + } else if (*m >= mnthr2) { + +/* Path 5 (M much larger than N, but not as much as MNTHR1) */ + + maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*n << 1) + *m; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *n * *n; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } + } else { + +/* Path 6 (M at least N, but not much larger) */ + + maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*n << 1) + *m; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *n * *n; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } + } + } else { + +/* + There is no complex work space needed for bidiagonal SVD + The real work space needed for bidiagonal SVD is BDSPAC + for computing singular values and singular vectors; BDSPAN + for computing singular values only. + BDSPAC = 5*M*M + 7*M + BDSPAN = MAX(7*M+4, 3*M+2+SMLSIZ*(SMLSIZ+8)) +*/ + + if (*n >= mnthr1) { + if (wntqn) { + +/* Path 1t (N much larger than M, JOBZ='N') */ + + maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + maxwrk = max(i__1,i__2); + minwrk = *m * 3; + } else if (wntqo) { + +/* Path 2t (N much larger than M, JOBZ='O') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "ZUNGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *n + *m * *m + wrkbl; + minwrk = (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N much larger than M, JOBZ='S') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "ZUNGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *m + wrkbl; + minwrk = *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N much larger than M, JOBZ='A') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "ZUNGLQ", + " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *m + wrkbl; + minwrk = *m * *m + (*m << 1) + *n; + } + } else if (*n >= mnthr2) { + +/* Path 5t (N much larger than M, but not as much as MNTHR1) */ + + maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*m << 1) + *n; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *m * *m; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } + } else { + +/* Path 6t (N greater than M, but not much larger) */ + + maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*m << 1) + *n; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *m * *m; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "PRC", n, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } + } + } + maxwrk = max(maxwrk,minwrk); + } + if (*info == 0) { + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + if (*lwork < minwrk && *lwork != -1) { + *info = -13; + } + } + +/* Quick returns */ + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGESDD", &i__1); + return 0; + } + if (*lwork == -1) { + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = PRECISION; + smlnum = sqrt(SAFEMINIMUM) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* + A has at least as many rows as columns. If A has sufficiently + more rows than columns, first reduce using the QR + decomposition (if sufficient workspace available) +*/ + + if (*m >= mnthr1) { + + if (wntqn) { + +/* + Path 1 (M much larger than N, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: need 0) +*/ + + i__1 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (CWorkspace: need 3*N, prefer 2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nrwork = ie + *n; + +/* + Perform bidiagonal SVD, compute singular values only + (CWorkspace: 0) + (RWorkspace: need BDSPAN) +*/ + + dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2 (M much larger than N, JOBZ='O') + N left singular vectors to be overwritten on A and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + if (*lwork >= *m * *n + *n * *n + *n * 3) { + +/* WORK(IR) is M by N */ + + ldwrkr = *m; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK( IR ), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of R in WORK(IRU) and computing right singular vectors + of R in WORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by the left singular vectors of R + (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & + ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by the right singular vectors of R + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IU), storing result in WORK(IR) and copying to A + (CWorkspace: need 2*N*N, prefer N*N+M*N) + (RWorkspace: 0) +*/ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + zgemm_("N", "N", &chunk, n, n, &c_b57, &a[i__ + a_dim1], + lda, &work[iu], &ldwrku, &c_b56, &work[ir], & + ldwrkr); + zlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* + Path 3 (M much larger than N, JOBZ='S') + N left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + zlaset_("L", &i__2, &i__1, &c_b56, &c_b56, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IR), storing result in U + (CWorkspace: need N*N) + (RWorkspace: 0) +*/ + + zlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + zgemm_("N", "N", m, n, n, &c_b57, &a[a_offset], lda, &work[ir] + , &ldwrkr, &c_b56, &u[u_offset], ldu); + + } else if (wntqa) { + +/* + Path 4 (M much larger than N, JOBZ='A') + M left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* + Compute A=Q*R, copying result to U + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Generate Q in U + (CWorkspace: need N+M, prefer N+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out below it */ + + i__2 = *n - 1; + i__1 = *n - 1; + zlaset_("L", &i__2, &i__1, &c_b56, &c_b56, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (CWorkspace: need 3*N, prefer 2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by left singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of R + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in U by left singular vectors of R in + WORK(IU), storing result in A + (CWorkspace: need N*N) + (RWorkspace: 0) +*/ + + zgemm_("N", "N", m, n, n, &c_b57, &u[u_offset], ldu, &work[iu] + , &ldwrku, &c_b56, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else if (*m >= mnthr2) { + +/* + MNTHR2 <= M < MNTHR1 + + Path 5 (M much larger than N, but not as much as MNTHR1) + Reduce to bidiagonal form without QR decomposition, use + ZUNGBR and matrix multiplication to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *n; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAN) +*/ + + dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ + nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *n * 3) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + } else { + +/* WORK(IU) is LDWRKU by N */ + + ldwrku = (*lwork - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in WORK(IU), copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &work[iu] + , &ldwrku, &rwork[nrwork]); + zlacpy_("F", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need N*N, prefer M*N) + (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +*/ + + nrwork = irvt; + i__2 = *m; + i__1 = ldwrku; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrku); + zlacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], n, + &work[iu], &ldwrku, &rwork[nrwork]); + zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L20: */ + } + + } else if (wntqs) { + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__1, &ierr); + +/* + Copy A to U, generate Q + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, n, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need N*N+2*M*N) +*/ + + nrwork = irvt; + zlacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + } else { + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__1, &ierr); + +/* + Copy A to U, generate Q + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: 0) + (Rworkspace: need 3*N*N) +*/ + + nrwork = irvt; + zlacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + } + + } else { + +/* + M .LT. MNTHR2 + + Path 6 (M at least N, but not much larger) + Reduce to bidiagonal form without QR decomposition + Use ZUNMBR to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *n; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAN) +*/ + + dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + if (*lwork >= *m * *n + *n * 3) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + } else { + +/* WORK( IU ) is LDWRKU by N */ + + ldwrku = (*lwork - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: need 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + + if (*lwork >= *m * *n + *n * 3) { + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by left singular vectors of A, copying + to A + (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB) + (Rworkspace: need 0) +*/ + + zlaset_("F", m, n, &c_b56, &c_b56, &work[iu], &ldwrku); + zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & + ierr); + zlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* + Generate Q in A + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: need 0) +*/ + + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__1, &ierr); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need N*N, prefer M*N) + (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +*/ + + nrwork = irvt; + i__1 = *m; + i__2 = ldwrku; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrku); + zlacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], + n, &work[iu], &ldwrku, &rwork[nrwork]); + zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L30: */ + } + } + + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + zlaset_("F", m, n, &c_b56, &c_b56, &u[u_offset], ldu); + zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + } else { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + zlaset_("F", m, m, &c_b56, &c_b56, &u[u_offset], ldu); + if (*m > *n) { + i__2 = *m - *n; + i__1 = *m - *n; + zlaset_("F", &i__2, &i__1, &c_b56, &c_b57, &u[*n + 1 + (* + n + 1) * u_dim1], ldu); + } + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 2*N+M, prefer 2*N+M*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + } + + } + + } else { + +/* + A has more columns than rows. If A has sufficiently more + columns than rows, first reduce using the LQ decomposition (if + sufficient workspace available) +*/ + + if (*n >= mnthr1) { + + if (wntqn) { + +/* + Path 1t (N much larger than M, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Zero out above L */ + + i__2 = *m - 1; + i__1 = *m - 1; + zlaset_("U", &i__2, &i__1, &c_b56, &c_b56, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (CWorkspace: need 3*M, prefer 2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + nrwork = ie + *m; + +/* + Perform bidiagonal SVD, compute singular values only + (CWorkspace: 0) + (RWorkspace: need BDSPAN) +*/ + + dbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2t (N much larger than M, JOBZ='O') + M right singular vectors to be overwritten on A and + M left singular vectors to be computed in U +*/ + + ivt = 1; + ldwkvt = *m; + +/* WORK(IVT) is M by M */ + + il = ivt + ldwkvt * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3) { + +/* WORK(IL) M by N */ + + ldwrkl = *m; + chunk = *n; + } else { + +/* WORK(IL) is M by CHUNK */ + + ldwrkl = *m; + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + itau = il + ldwrkl * chunk; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + zlaset_("U", &i__2, &i__1, &c_b56, &c_b56, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by the left singular vectors of L + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by the right singular vectors of L + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IL) by Q + in A, storing result in WORK(IL) and copying to A + (CWorkspace: need 2*M*M, prefer M*M+M*N)) + (RWorkspace: 0) +*/ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + zgemm_("N", "N", m, &blk, m, &c_b57, &work[ivt], m, &a[ + i__ * a_dim1 + 1], lda, &c_b56, &work[il], & + ldwrkl); + zlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + + } else if (wntqs) { + +/* + Path 3t (N much larger than M, JOBZ='S') + M right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + zlaset_("U", &i__1, &i__2, &c_b56, &c_b56, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by left singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Copy VT to WORK(IL), multiply right singular vectors of L + in WORK(IL) by Q in A, storing result in VT + (CWorkspace: need M*M) + (RWorkspace: 0) +*/ + + zlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + zgemm_("N", "N", m, n, m, &c_b57, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b56, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* + Path 9t (N much larger than M, JOBZ='A') + N right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* + Compute A=L*Q, copying result to VT + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Generate Q in VT + (CWorkspace: need M+N, prefer M+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__1, &ierr); + +/* Produce L in A, zeroing out above it */ + + i__1 = *m - 1; + i__2 = *m - 1; + zlaset_("U", &i__1, &i__2, &c_b56, &c_b56, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of L + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by right singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by + Q in VT, storing result in A + (CWorkspace: need M*M) + (RWorkspace: 0) +*/ + + zgemm_("N", "N", m, n, m, &c_b57, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b56, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else if (*n >= mnthr2) { + +/* + MNTHR2 <= N < MNTHR1 + + Path 5t (N much larger than M, but not as much as MNTHR1) + Reduce to bidiagonal form without QR decomposition, use + ZUNGBR and matrix multiplication to compute singular vectors +*/ + + + ie = 1; + nrwork = ie + *m; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) + (RWorkspace: M) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, &ierr); + + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAN) +*/ + + dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + ivt = nwork; + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Generate P**H in A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + nwork], &i__1, &ierr); + + ldwkvt = *m; + if (*lwork >= *m * *n + *m * 3) { + +/* WORK( IVT ) is M by N */ + + nwork = ivt + ldwkvt * *n; + chunk = *n; + } else { + +/* WORK( IVT ) is M by CHUNK */ + + chunk = (*lwork - *m * 3) / *m; + nwork = ivt + ldwkvt * chunk; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRVT) + storing the result in WORK(IVT), copying to U + (Cworkspace: need 0) + (Rworkspace: need 2*M*M) +*/ + + zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &work[ivt], & + ldwkvt, &rwork[nrwork]); + zlacpy_("F", m, m, &work[ivt], &ldwkvt, &u[u_offset], ldu); + +/* + Multiply RWORK(IRVT) by P**H in A, storing the + result in WORK(IVT), copying to A + (CWorkspace: need M*M, prefer M*N) + (Rworkspace: need 2*M*M, prefer 2*M*N) +*/ + + nrwork = iru; + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + zlarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1], + lda, &work[ivt], &ldwkvt, &rwork[nrwork]); + zlacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * + a_dim1 + 1], lda); +/* L50: */ + } + } else if (wntqs) { + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__2, &ierr); + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zungbr_("P", m, n, m, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need 3*M*M) +*/ + + zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need M*M+2*M*N) +*/ + + nrwork = iru; + zlarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + } else { + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__2, &ierr); + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zungbr_("P", n, n, m, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need 3*M*M) +*/ + + zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need M*M+2*M*N) +*/ + + zlarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + } + + } else { + +/* + N .LT. MNTHR2 + + Path 6t (N greater than M, but not much larger) + Reduce to bidiagonal form without LQ decomposition + Use ZUNMBR to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *m; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) + (RWorkspace: M) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAN) +*/ + + dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3) { + +/* WORK( IVT ) is M by N */ + + zlaset_("F", m, n, &c_b56, &c_b56, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; + } else { + +/* WORK( IVT ) is M by CHUNK */ + + chunk = (*lwork - *m * 3) / *m; + nwork = ivt + ldwkvt * chunk; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: need 0) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3) { + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by right singular vectors of A, + copying to A + (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB) + (Rworkspace: need 0) +*/ + + zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + zlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* + Generate P**H in A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: need 0) +*/ + + i__2 = *lwork - nwork + 1; + zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need M*M, prefer M*N) + (Rworkspace: need 3*M*M, prefer M*M+2*M*N) +*/ + + nrwork = iru; + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + zlarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1] + , lda, &work[ivt], &ldwkvt, &rwork[nrwork]); + zlacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * + a_dim1 + 1], lda); +/* L60: */ + } + } + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + zlaset_("F", m, n, &c_b56, &c_b56, &vt[vt_offset], ldvt); + zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* Set all of VT to identity matrix */ + + zlaset_("F", n, n, &c_b56, &c_b57, &vt[vt_offset], ldvt); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 2*M+N, prefer 2*M+N*NB) + (RWorkspace: M*M) +*/ + + zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm > bignum) { + i__1 = minmn - 1; + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &i__1, &c__1, &rwork[ + ie], &minmn, &ierr); + } + if (anrm < smlnum) { + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm < smlnum) { + i__1 = minmn - 1; + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__1, &c__1, &rwork[ + ie], &minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + + return 0; + +/* End of ZGESDD */ + +} /* zgesdd_ */ + +/* Subroutine */ int zgesv_(integer *n, integer *nrhs, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int xerbla_(char *, integer *), zgetrf_( + integer *, integer *, doublecomplex *, integer *, integer *, + integer *), zgetrs_(char *, integer *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGESV computes the solution to a complex system of linear equations + A * X = B, + where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + + The LU decomposition with partial pivoting and row interchanges is + used to factor A as + A = P * L * U, + where P is a permutation matrix, L is unit lower triangular, and U is + upper triangular. The factored form of A is then used to solve the + system of equations A * X = B. + + Arguments + ========= + + N (input) INTEGER + The number of linear equations, i.e., the order of the + matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the N-by-N coefficient matrix A. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (output) INTEGER array, dimension (N) + The pivot indices that define the permutation matrix P; + row i of the matrix was interchanged with row IPIV(i). + + B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) + On entry, the N-by-NRHS matrix of right hand side matrix B. + On exit, if INFO = 0, the N-by-NRHS solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, so the solution could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGESV ", &i__1); + return 0; + } + +/* Compute the LU factorization of A. */ + + zgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + zgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of ZGESV */ + +} /* zgesv_ */ + +/* Subroutine */ int zgetf2_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, jp; + static doublereal sfmin; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zgeru_(integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *), zswap_(integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer izamax_(integer *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGETF2 computes an LU factorization of a general m-by-n matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 2 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, U(k,k) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Compute machine safe minimum */ + + sfmin = SAFEMINIMUM; + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + izamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + i__2 = jp + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + zswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + if (z_abs(&a[j + j * a_dim1]) >= sfmin) { + i__2 = *m - j; + z_div(&z__1, &c_b57, &a[j + j * a_dim1]); + zscal_(&i__2, &z__1, &a[j + 1 + j * a_dim1], &c__1); + } else { + i__2 = *m - j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ + j * a_dim1; + z_div(&z__1, &a[j + i__ + j * a_dim1], &a[j + j * + a_dim1]); + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L20: */ + } + } + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < min(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + z__1.r = -1., z__1.i = -0.; + zgeru_(&i__2, &i__3, &z__1, &a[j + 1 + j * a_dim1], &c__1, &a[j + + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda) + ; + } +/* L10: */ + } + return 0; + +/* End of ZGETF2 */ + +} /* zgetf2_ */ + +/* Subroutine */ int zgetrf_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, jb, nb, iinfo; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer * + , doublecomplex *, integer *), + zgetf2_(integer *, integer *, doublecomplex *, integer *, integer + *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, + integer *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGETRF computes an LU factorization of a general M-by-N matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 3 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "ZGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + zgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* + Factor diagonal and subdiagonal blocks and test for exact + singularity. +*/ + + i__3 = *m - j + 1; + zgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + ztrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b57, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &z__1, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b57, &a[j + jb + (j + jb) * + a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of ZGETRF */ + +} /* zgetrf_ */ + +/* Subroutine */ int zgetrs_(char *trans, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + xerbla_(char *, integer *); + static logical notran; + extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, + integer *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZGETRS solves a system of linear equations + A * X = B, A**T * X = B, or A**H * X = B + with a general N-by-N matrix A using the LU factorization computed + by ZGETRF. + + Arguments + ========= + + TRANS (input) CHARACTER*1 + Specifies the form of the system of equations: + = 'N': A * X = B (No transpose) + = 'T': A**T * X = B (Transpose) + = 'C': A**H * X = B (Conjugate transpose) + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The factors L and U from the factorization A = P*L*U + as computed by ZGETRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (input) INTEGER array, dimension (N) + The pivot indices from ZGETRF; for 1<=i<=N, row i of the + matrix was interchanged with row IPIV(i). + + B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* + Solve A * X = B. + + Apply row interchanges to the right hand sides. +*/ + + zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + ztrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b57, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b57, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A**T * X = B or A**H * X = B. + + Solve U'*X = B, overwriting B with X. +*/ + + ztrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b57, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + ztrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b57, &a[a_offset], + lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of ZGETRS */ + +} /* zgetrs_ */ + +/* Subroutine */ int zheevd_(char *jobz, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static doublereal eps; + static integer inde; + static doublereal anrm; + static integer imax; + static doublereal rmin, rmax; + static integer lopt; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + static doublereal sigma; + extern logical lsame_(char *, char *); + static integer iinfo, lwmin, liopt; + static logical lower; + static integer llrwk, lropt; + static logical wantz; + static integer indwk2, llwrk2; + + static integer iscale; + static doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal bignum; + extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *); + static integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), zstedc_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *, integer + *); + static integer indrwk, indwrk, liwmin; + extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, + doublecomplex *, integer *, integer *), zlacpy_(char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + static integer lrwmin, llwork; + static doublereal smlnum; + static logical lquery; + extern /* Subroutine */ int zunmtr_(char *, char *, char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a + complex Hermitian matrix A. If eigenvectors are desired, it uses a + divide and conquer algorithm. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA, N) + On entry, the Hermitian matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of A contains the + upper triangular part of the matrix A. If UPLO = 'L', + the leading N-by-N lower triangular part of A contains + the lower triangular part of the matrix A. + On exit, if JOBZ = 'V', then if INFO = 0, A contains the + orthonormal eigenvectors of the matrix A. + If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') + or the upper triangle (if UPLO='U') of A, including the + diagonal, is destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) DOUBLE PRECISION array, dimension (N) + If INFO = 0, the eigenvalues in ascending order. + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. + If N <= 1, LWORK must be at least 1. + If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. + If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal sizes of the WORK, RWORK and + IWORK arrays, returns these values as the first entries of + the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + RWORK (workspace/output) DOUBLE PRECISION array, + dimension (LRWORK) + On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. + + LRWORK (input) INTEGER + The dimension of the array RWORK. + If N <= 1, LRWORK must be at least 1. + If JOBZ = 'N' and N > 1, LRWORK must be at least N. + If JOBZ = 'V' and N > 1, LRWORK must be at least + 1 + 5*N + 2*N**2. + + If LRWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal sizes of the WORK, RWORK + and IWORK arrays, returns these values as the first entries + of the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If N <= 1, LIWORK must be at least 1. + If JOBZ = 'N' and N > 1, LIWORK must be at least 1. + If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal sizes of the WORK, RWORK + and IWORK arrays, returns these values as the first entries + of the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i and JOBZ = 'N', then the algorithm failed + to converge; i off-diagonal elements of an intermediate + tridiagonal form did not converge to zero; + if INFO = i and JOBZ = 'V', then the algorithm failed + to compute an eigenvalue while working on the submatrix + lying in rows and columns INFO/(N+1) through + mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + Modified description of INFO. Sven, 16 Feb 05. + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + lopt = lwmin; + lropt = lrwmin; + liopt = liwmin; + } else { + if (wantz) { + lwmin = (*n << 1) + *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n + 1; + lrwmin = *n; + liwmin = 1; + } +/* Computing MAX */ + i__1 = lwmin, i__2 = *n + ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, + &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lopt = max(i__1,i__2); + lropt = lrwmin; + liopt = liwmin; + } + work[1].r = (doublereal) lopt, work[1].i = 0.; + rwork[1] = (doublereal) lropt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = SAFEMINIMUM; + eps = PRECISION; + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + zlascl_(uplo, &c__0, &c__0, &c_b1034, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indwrk = indtau + *n; + indrwk = inde + *n; + indwk2 = indwrk + *n * *n; + llwork = *lwork - indwrk + 1; + llwrk2 = *lwork - indwk2 + 1; + llrwk = *lrwork - indrwk + 1; + zhetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + +/* + For eigenvalues only, call DSTERF. For eigenvectors, first call + ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the + tridiagonal matrix, then call ZUNMTR to multiply it to the + Householder transformations represented as Householder vectors in + A. +*/ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], + &llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); + zunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + zlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + + work[1].r = (doublereal) lopt, work[1].i = 0.; + rwork[1] = (doublereal) lropt; + iwork[1] = liopt; + + return 0; + +/* End of ZHEEVD */ + +} /* zheevd_ */ + +/* Subroutine */ int zhetd2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + static integer i__; + static doublecomplex taui; + extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static doublecomplex alpha; + extern logical lsame_(char *, char *); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zhemv_(char *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *); + static logical upper; + extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_( + char *, integer *), zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZHETD2 reduces a complex Hermitian matrix A to real symmetric + tridiagonal form T by a unitary similarity transformation: + Q' * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the unitary + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the unitary matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) DOUBLE PRECISION array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) COMPLEX*16 array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETD2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + i__1 = *n + *n * a_dim1; + i__2 = *n + *n * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(1:i-1,i+1) +*/ + + i__1 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__1].r, alpha.i = a[i__1].i; + zlarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui); + i__1 = i__; + e[i__1] = alpha.r; + + if (taui.r != 0. || taui.i != 0.) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + i__1 = i__ + (i__ + 1) * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + zhemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b56, &tau[1], &c__1) + ; + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + z__3.r = -.5, z__3.i = -0.; + z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * + taui.i + z__3.i * taui.r; + zdotc_(&z__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1] + , &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + zaxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__, &z__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, & + tau[1], &c__1, &a[a_offset], lda); + + } else { + i__1 = i__ + i__ * a_dim1; + i__2 = i__ + i__ * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + i__1 = i__ + (i__ + 1) * a_dim1; + i__2 = i__; + a[i__1].r = e[i__2], a[i__1].i = 0.; + i__1 = i__ + 1; + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + d__[i__1] = a[i__2].r; + i__1 = i__; + tau[i__1].r = taui.r, tau[i__1].i = taui.i; +/* L10: */ + } + i__1 = a_dim1 + 1; + d__[1] = a[i__1].r; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(i+2:n,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, & + taui); + i__2 = i__; + e[i__2] = alpha.r; + + if (taui.r != 0. || taui.i != 0.) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + zhemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b56, &tau[ + i__], &c__1); + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + z__3.r = -.5, z__3.i = -0.; + z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * + taui.i + z__3.i * taui.r; + i__2 = *n - i__; + zdotc_(&z__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * + a_dim1], &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = *n - i__; + zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__2, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1, + &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda); + + } else { + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + i__3 = i__ + 1 + (i__ + 1) * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + i__2 = i__ + 1 + i__ * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.; + i__2 = i__; + i__3 = i__ + i__ * a_dim1; + d__[i__2] = a[i__3].r; + i__2 = i__; + tau[i__2].r = taui.r, tau[i__2].i = taui.i; +/* L20: */ + } + i__1 = *n; + i__2 = *n + *n * a_dim1; + d__[i__1] = a[i__2].r; + } + + return 0; + +/* End of ZHETD2 */ + +} /* zhetd2_ */ + +/* Subroutine */ int zhetrd_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, nb, kk, nx, iws; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + static logical upper; + extern /* Subroutine */ int zhetd2_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, integer *), zher2k_(char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, doublecomplex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlatrd_(char *, integer *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + doublecomplex *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZHETRD reduces a complex Hermitian matrix A to real symmetric + tridiagonal form T by a unitary similarity transformation: + Q**H * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the unitary + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the unitary matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) DOUBLE PRECISION array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) COMPLEX*16 array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "ZHETRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code by setting NX = N. + + Computing MAX +*/ + i__1 = *lwork / ldwork; + nb = max(i__1,1); + nbmin = ilaenv_(&c__2, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* + Reduce the upper triangle of A. + Columns 1:kk are handled by the unblocked method. +*/ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = i__ + nb - 1; + zlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork); + +/* + Update the unreduced submatrix A(1:i-1,1:i-1), using an + update of the form: A := A - V*W' - W*V' +*/ + + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ * a_dim1 + + 1], lda, &work[1], &ldwork, &c_b1034, &a[a_offset], lda); + +/* + Copy superdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j - 1 + j * a_dim1; + i__5 = j - 1; + a[i__4].r = e[i__5], a[i__4].i = 0.; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + zhetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = *n - i__ + 1; + zlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork); + +/* + Update the unreduced submatrix A(i+nb:n,i+nb:n), using + an update of the form: A := A - V*W' - W*V' +*/ + + i__3 = *n - i__ - nb + 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b1034, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda); + +/* + Copy subdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + 1 + j * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + zhetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo); + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZHETRD */ + +} /* zhetrd_ */ + +/* Subroutine */ int zhseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, + doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2]; + doublereal d__1, d__2, d__3; + doublecomplex z__1; + char ch__1[2]; + + /* Local variables */ + static doublecomplex hl[2401] /* was [49][49] */; + static integer kbot, nmin; + extern logical lsame_(char *, char *); + static logical initz; + static doublecomplex workl[49]; + static logical wantt, wantz; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlaqr0_(logical *, logical *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *), xerbla_(char *, integer * + ); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlahqr_(logical *, logical *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, integer *, integer *), + zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlaset_(char *, integer *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + integer *); + static logical lquery; + + +/* + -- LAPACK computational routine (version 3.2.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + June 2010 + + Purpose + ======= + + ZHSEQR computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**H, where T is an upper triangular matrix (the + Schur form), and Z is the unitary matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input unitary + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + Arguments + ========= + + JOB (input) CHARACTER*1 + = 'E': compute eigenvalues only; + = 'S': compute eigenvalues and the Schur form T. + + COMPZ (input) CHARACTER*1 + = 'N': no Schur vectors are computed; + = 'I': Z is initialized to the unit matrix and the matrix Z + of Schur vectors of H is returned; + = 'V': Z must contain an unitary matrix Q on entry, and + the product Q*Z is returned. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to ZGEBAL, and then passed to ZGEHRD + when the matrix output by ZGEBAL is reduced to Hessenberg + form. Otherwise ILO and IHI should be set to 1 and N + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) COMPLEX*16 array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and JOB = 'S', H contains the upper + triangular matrix T from the Schur decomposition (the + Schur form). If INFO = 0 and JOB = 'E', the contents of + H are unspecified on exit. (The output value of H when + INFO.GT.0 is given under the description of INFO below.) + + Unlike earlier versions of ZHSEQR, this subroutine may + explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 + or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + W (output) COMPLEX*16 array, dimension (N) + The computed eigenvalues. If JOB = 'S', the eigenvalues are + stored in the same order as on the diagonal of the Schur + form returned in H, with W(i) = H(i,i). + + Z (input/output) COMPLEX*16 array, dimension (LDZ,N) + If COMPZ = 'N', Z is not referenced. + If COMPZ = 'I', on entry Z need not be set and on exit, + if INFO = 0, Z contains the unitary matrix Z of the Schur + vectors of H. If COMPZ = 'V', on entry Z must contain an + N-by-N matrix Q, which is assumed to be equal to the unit + matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, + if INFO = 0, Z contains Q*Z. + Normally Q is the unitary matrix generated by ZUNGHR + after the call to ZGEHRD which formed the Hessenberg matrix + H. (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if COMPZ = 'I' or + COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient and delivers very good and sometimes + optimal performance. However, LWORK as large as 11*N + may be required for optimal performance. A workspace + query is recommended to determine the optimal workspace + size. + + If LWORK = -1, then ZHSEQR does a workspace query. + In this case, ZHSEQR checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .LT. 0: if INFO = -i, the i-th argument had an illegal + value + .GT. 0: if INFO = i, ZHSEQR failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and JOB = 'E', then on exit, the + remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and JOB = 'S', then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is a unitary matrix. The final + value of H is upper Hessenberg and triangular in + rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and COMPZ = 'V', then on exit + + (final value of Z) = (initial value of Z)*U + + where U is the unitary matrix in (*) (regard- + less of the value of JOB.) + + If INFO .GT. 0 and COMPZ = 'I', then on exit + (final value of Z) = U + where U is the unitary matrix in (*) (regard- + less of the value of JOB.) + + If INFO .GT. 0 and COMPZ = 'N', then Z is not + accessed. + + ================================================================ + Default values supplied by + ILAENV(ISPEC,'ZHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). + It is suggested that these defaults be adjusted in order + to attain best performance in each particular + computational environment. + + ISPEC=12: The ZLAHQR vs ZLAQR0 crossover point. + Default: 75. (Must be at least 11.) + + ISPEC=13: Recommended deflation window size. + This depends on ILO, IHI and NS. NS is the + number of simultaneous shifts returned + by ILAENV(ISPEC=15). (See ISPEC=15 below.) + The default for (IHI-ILO+1).LE.500 is NS. + The default for (IHI-ILO+1).GT.500 is 3*NS/2. + + ISPEC=14: Nibble crossover point. (See IPARMQ for + details.) Default: 14% of deflation window + size. + + ISPEC=15: Number of simultaneous shifts in a multishift + QR iteration. + + If IHI-ILO+1 is ... + + greater than ...but less ... the + or equal to ... than default is + + 1 30 NS = 2(+) + 30 60 NS = 4(+) + 60 150 NS = 10(+) + 150 590 NS = ** + 590 3000 NS = 64 + 3000 6000 NS = 128 + 6000 infinity NS = 256 + + (+) By default some or all matrices of this order + are passed to the implicit double shift routine + ZLAHQR and this parameter is ignored. See + ISPEC=12 above and comments in IPARMQ for + details. + + (**) The asterisks (**) indicate an ad-hoc + function of N increasing from 10 to 64. + + ISPEC=16: Select structured matrix multiply. + If the number of simultaneous shifts (specified + by ISPEC=15) is less than 14, then the default + for ISPEC=16 is 0. Otherwise the default for + ISPEC=16 is 2. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . ZLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== NL allocates some local workspace to help small matrices + . through a rare ZLAHQR failure. NL .GT. NTINY = 11 is + . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom- + . mended. (The default value of NMIN is 75.) Using NL = 49 + . allows up to six simultaneous shifts and a 16-by-16 + . deflation window. ==== + + ==== Decode and check the input parameters. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantt = lsame_(job, "S"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + d__1 = (doublereal) max(1,*n); + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(job, "E") && ! wantt) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1,*n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { + *info = -10; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -12; + } + + if (*info != 0) { + +/* ==== Quick return in case of invalid argument. ==== */ + + i__1 = -(*info); + xerbla_("ZHSEQR", &i__1); + return 0; + + } else if (*n == 0) { + +/* ==== Quick return in case N = 0; nothing to do. ==== */ + + return 0; + + } else if (lquery) { + +/* ==== Quick return in case of a workspace query ==== */ + + zlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo, + ihi, &z__[z_offset], ldz, &work[1], lwork, info); +/* + ==== Ensure reported workspace size is backward-compatible with + . previous LAPACK versions. ==== + Computing MAX +*/ + d__2 = work[1].r, d__3 = (doublereal) max(1,*n); + d__1 = max(d__2,d__3); + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + + } else { + +/* ==== copy eigenvalues isolated by ZGEBAL ==== */ + + if (*ilo > 1) { + i__1 = *ilo - 1; + i__2 = *ldh + 1; + zcopy_(&i__1, &h__[h_offset], &i__2, &w[1], &c__1); + } + if (*ihi < *n) { + i__1 = *n - *ihi; + i__2 = *ldh + 1; + zcopy_(&i__1, &h__[*ihi + 1 + (*ihi + 1) * h_dim1], &i__2, &w[* + ihi + 1], &c__1); + } + +/* ==== Initialize Z, if requested ==== */ + + if (initz) { + zlaset_("A", n, n, &c_b56, &c_b57, &z__[z_offset], ldz) + ; + } + +/* ==== Quick return if possible ==== */ + + if (*ilo == *ihi) { + i__1 = *ilo; + i__2 = *ilo + *ilo * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + return 0; + } + +/* + ==== ZLAHQR/ZLAQR0 crossover point ==== + + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = job; + i__3[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + nmin = ilaenv_(&c__12, "ZHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nmin = max(11,nmin); + +/* ==== ZLAQR0 for big matrices; ZLAHQR for small ones ==== */ + + if (*n > nmin) { + zlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], + ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info); + } else { + +/* ==== Small matrix ==== */ + + zlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], + ilo, ihi, &z__[z_offset], ldz, info); + + if (*info > 0) { + +/* + ==== A rare ZLAHQR failure! ZLAQR0 sometimes succeeds + . when ZLAHQR fails. ==== +*/ + + kbot = *info; + + if (*n >= 49) { + +/* + ==== Larger matrices have enough subdiagonal scratch + . space to call ZLAQR0 directly. ==== +*/ + + zlaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], + ldh, &w[1], ilo, ihi, &z__[z_offset], ldz, &work[ + 1], lwork, info); + + } else { + +/* + ==== Tiny matrices don't have enough subdiagonal + . scratch space to benefit from ZLAQR0. Hence, + . tiny matrices must be copied into a larger + . array before calling ZLAQR0. ==== +*/ + + zlacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49); + i__1 = *n + 1 + *n * 49 - 50; + hl[i__1].r = 0., hl[i__1].i = 0.; + i__1 = 49 - *n; + zlaset_("A", &c__49, &i__1, &c_b56, &c_b56, &hl[(*n + 1) * + 49 - 49], &c__49); + zlaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, & + w[1], ilo, ihi, &z__[z_offset], ldz, workl, & + c__49, info); + if (wantt || *info != 0) { + zlacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh); + } + } + } + } + +/* ==== Clear out the trash, if necessary. ==== */ + + if ((wantt || *info != 0) && *n > 2) { + i__1 = *n - 2; + i__2 = *n - 2; + zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &h__[h_dim1 + 3], ldh); + } + +/* + ==== Ensure reported workspace size is backward-compatible with + . previous LAPACK versions. ==== + + Computing MAX +*/ + d__2 = (doublereal) max(1,*n), d__3 = work[1].r; + d__1 = max(d__2,d__3); + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + } + +/* ==== End of ZHSEQR ==== */ + + return 0; +} /* zhseqr_ */ + +/* Subroutine */ int zlabrd_(integer *m, integer *n, integer *nb, + doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, + doublecomplex *tauq, doublecomplex *taup, doublecomplex *x, integer * + ldx, doublecomplex *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, + i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__; + static doublecomplex alpha; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + zlarfg_(integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *), zlacgv_(integer *, doublecomplex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLABRD reduces the first NB rows and columns of a complex general + m by n matrix A to upper or lower real bidiagonal form by a unitary + transformation Q' * A * P, and returns the matrices X and Y which + are needed to apply the transformation to the unreduced part of A. + + If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + bidiagonal form. + + This is an auxiliary routine called by ZGEBRD + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. + + N (input) INTEGER + The number of columns in the matrix A. + + NB (input) INTEGER + The number of leading rows and columns of A to be reduced. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, the first NB rows and columns of the matrix are + overwritten; the rest of the array is unchanged. + If m >= n, elements on and below the diagonal in the first NB + columns, with the array TAUQ, represent the unitary + matrix Q as a product of elementary reflectors; and + elements above the diagonal in the first NB rows, with the + array TAUP, represent the unitary matrix P as a product + of elementary reflectors. + If m < n, elements below the diagonal in the first NB + columns, with the array TAUQ, represent the unitary + matrix Q as a product of elementary reflectors, and + elements on and above the diagonal in the first NB rows, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (NB) + The diagonal elements of the first NB rows and columns of + the reduced matrix. D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (NB) + The off-diagonal elements of the first NB rows and columns of + the reduced matrix. + + TAUQ (output) COMPLEX*16 array dimension (NB) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX*16 array, dimension (NB) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + X (output) COMPLEX*16 array, dimension (LDX,NB) + The m-by-nb matrix X required to update the unreduced part + of A. + + LDX (input) INTEGER + The leading dimension of the array X. LDX >= max(1,M). + + Y (output) COMPLEX*16 array, dimension (LDY,NB) + The n-by-nb matrix Y required to update the unreduced part + of A. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= max(1,N). + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors. + + If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in + A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The elements of the vectors v and u together form the m-by-nb matrix + V and the nb-by-n matrix U' which are needed, with X and Y, to apply + the transformation to the unreduced part of the matrix, using a block + update of the form: A := A - V*Y' - X*U'. + + The contents of A on exit are illustrated by the following examples + with nb = 2: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) + ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) + ( v1 v2 a a a ) ( v1 1 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix which is unchanged, + vi denotes an element of the vector defining H(i), and ui an element + of the vector defining G(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:m,i) */ + + i__2 = i__ - 1; + zlacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda, + &y[i__ + y_dim1], ldy, &c_b57, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + zlacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + x_dim1], ldx, + &a[i__ * a_dim1 + 1], &c__1, &c_b57, &a[i__ + i__ * + a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & + tauq[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + ( + i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], & + c__1, &c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b56, & + y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b57, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &x[i__ + + x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b56, & + y[i__ * y_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ + + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & + c_b57, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + +/* Update A(i,i+1:n) */ + + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + zlacgv_(&i__, &a[i__ + a_dim1], lda); + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__, &z__1, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b57, &a[i__ + + (i__ + 1) * a_dim1], lda); + zlacgv_(&i__, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &x[i__ + x_dim1], ldx); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ + + 1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b57, + &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &x[i__ + x_dim1], ldx); + +/* Generate reflection P(i) to annihilate A(i,i+2:n) */ + + i__2 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + (i__ + 1) * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + zgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ + 1 + ( + i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], + lda, &c_b56, &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + zgemv_("Conjugate transpose", &i__2, &i__, &c_b57, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b56, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__, &z__1, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + zgemv_("No transpose", &i__2, &i__3, &c_b57, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b56, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i,i:n) */ + + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + y_dim1], ldy, + &a[i__ + a_dim1], lda, &c_b57, &a[i__ + i__ * a_dim1], + lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &x[i__ + x_dim1], ldx); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[i__ * + a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b57, &a[i__ + + i__ * a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &x[i__ + x_dim1], ldx); + +/* Generate reflection P(i) to annihilate A(i,i+1:n) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + if (i__ < *m) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; + zgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ + 1 + i__ + * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b56, & + x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &y[i__ + + y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b56, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + zgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ * a_dim1 + + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b56, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + +/* Update A(i+1:m,i) */ + + i__2 = i__ - 1; + zlacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b57, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = i__ - 1; + zlacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__, &z__1, &x[i__ + 1 + + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b57, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, + &tauq[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * + a_dim1], &c__1, &c_b56, &y[i__ + 1 + i__ * y_dim1], & + c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b56, &y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b57, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + zgemv_("Conjugate transpose", &i__2, &i__, &c_b57, &x[i__ + 1 + + x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b56, &y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("Conjugate transpose", &i__, &i__2, &z__1, &a[(i__ + 1) + * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & + c_b57, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + } else { + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + } +/* L20: */ + } + } + return 0; + +/* End of ZLABRD */ + +} /* zlabrd_ */ + +/* Subroutine */ int zlacgv_(integer *n, doublecomplex *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + doublecomplex z__1; + + /* Local variables */ + static integer i__, ioff; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLACGV conjugates a complex vector of length N. + + Arguments + ========= + + N (input) INTEGER + The length of the vector X. N >= 0. + + X (input/output) COMPLEX*16 array, dimension + (1+(N-1)*abs(INCX)) + On entry, the vector of length N to be conjugated. + On exit, X is overwritten with conjg(X). + + INCX (input) INTEGER + The spacing between successive elements of X. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*incx == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + d_cnjg(&z__1, &x[i__]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; +/* L10: */ + } + } else { + ioff = 1; + if (*incx < 0) { + ioff = 1 - (*n - 1) * *incx; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ioff; + d_cnjg(&z__1, &x[ioff]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + ioff += *incx; +/* L20: */ + } + } + return 0; + +/* End of ZLACGV */ + +} /* zlacgv_ */ + +/* Subroutine */ int zlacp2_(char *uplo, integer *m, integer *n, doublereal * + a, integer *lda, doublecomplex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLACP2 copies all or part of a real two-dimensional matrix A to a + complex matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper trapezium + is accessed; if UPLO = 'L', only the lower trapezium is + accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) COMPLEX*16 array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.; +/* L50: */ + } +/* L60: */ + } + } + + return 0; + +/* End of ZLACP2 */ + +} /* zlacp2_ */ + +/* Subroutine */ int zlacpy_(char *uplo, integer *m, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLACPY copies all or part of a two-dimensional matrix A to another + matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper trapezium + is accessed; if UPLO = 'L', only the lower trapezium is + accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) COMPLEX*16 array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L30: */ + } +/* L40: */ + } + + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L50: */ + } +/* L60: */ + } + } + + return 0; + +/* End of ZLACPY */ + +} /* zlacpy_ */ + +/* Subroutine */ int zlacrm_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *b, integer *ldb, doublecomplex *c__, + integer *ldc, doublereal *rwork) +{ + /* System generated locals */ + integer b_dim1, b_offset, a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLACRM performs a very simple matrix-matrix multiplication: + C := A * B, + where A is M by N and complex; B is N by N and real; + C is M by N and complex. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A and of the matrix C. + M >= 0. + + N (input) INTEGER + The number of columns and rows of the matrix B and + the number of columns of the matrix C. + N >= 0. + + A (input) COMPLEX*16 array, dimension (LDA, N) + A contains the M by N matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >=max(1,M). + + B (input) DOUBLE PRECISION array, dimension (LDB, N) + B contains the N by N matrix B. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >=max(1,N). + + C (input) COMPLEX*16 array, dimension (LDC, N) + C contains the M by N matrix C. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >=max(1,N). + + RWORK (workspace) DOUBLE PRECISION array, dimension (2*M*N) + + ===================================================================== + + + Quick return if possible. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --rwork; + + /* Function Body */ + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + rwork[(j - 1) * *m + i__] = a[i__3].r; +/* L10: */ + } +/* L20: */ + } + + l = *m * *n + 1; + dgemm_("N", "N", m, n, n, &c_b1034, &rwork[1], m, &b[b_offset], ldb, & + c_b328, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = l + (j - 1) * *m + i__ - 1; + c__[i__3].r = rwork[i__4], c__[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + rwork[(j - 1) * *m + i__] = d_imag(&a[i__ + j * a_dim1]); +/* L50: */ + } +/* L60: */ + } + dgemm_("N", "N", m, n, n, &c_b1034, &rwork[1], m, &b[b_offset], ldb, & + c_b328, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + d__1 = c__[i__4].r; + i__5 = l + (j - 1) * *m + i__ - 1; + z__1.r = d__1, z__1.i = rwork[i__5]; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + + return 0; + +/* End of ZLACRM */ + +} /* zlacrm_ */ + +/* Double Complex */ VOID zladiv_(doublecomplex * ret_val, doublecomplex *x, + doublecomplex *y) +{ + /* System generated locals */ + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1; + + /* Local variables */ + static doublereal zi, zr; + extern /* Subroutine */ int dladiv_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLADIV := X / Y, where X and Y are complex. The computation of X / Y + will not overflow on an intermediary step unless the results + overflows. + + Arguments + ========= + + X (input) COMPLEX*16 + Y (input) COMPLEX*16 + The complex scalars X and Y. + + ===================================================================== +*/ + + + d__1 = x->r; + d__2 = d_imag(x); + d__3 = y->r; + d__4 = d_imag(y); + dladiv_(&d__1, &d__2, &d__3, &d__4, &zr, &zi); + z__1.r = zr, z__1.i = zi; + ret_val->r = z__1.r, ret_val->i = z__1.i; + + return ; + +/* End of ZLADIV */ + +} /* zladiv_ */ + +/* Subroutine */ int zlaed0_(integer *qsiz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *qstore, + integer *ldqs, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer i__, j, k, ll, iq, lgn, msd2, smm1, spm1, spm2; + static doublereal temp; + static integer curr, iperm; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer indxq, iwrem, iqptr, tlvls; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlaed7_(integer *, integer *, + integer *, integer *, integer *, integer *, doublereal *, + doublecomplex *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *, + doublereal *, doublecomplex *, doublereal *, integer *, integer *) + ; + static integer igivcl; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlacrm_(integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, doublecomplex *, integer *, + doublereal *); + static integer igivnm, submat, curprb, subpbs, igivpt; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + static integer curlvl, matsiz, iprmpt, smlsiz; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + Using the divide and conquer method, ZLAED0 computes all eigenvalues + of a symmetric tridiagonal matrix which is one diagonal block of + those from reducing a dense or band Hermitian matrix and + corresponding eigenvectors of the dense or band matrix. + + Arguments + ========= + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the off-diagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Q (input/output) COMPLEX*16 array, dimension (LDQ,N) + On entry, Q must contain an QSIZ x N matrix whose columns + unitarily orthonormal. It is a part of the unitary matrix + that reduces the full dense Hermitian matrix to a + (reducible) symmetric tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + IWORK (workspace) INTEGER array, + the dimension of IWORK must be at least + 6 + 6*N + 5*N*lg N + ( lg( N ) = smallest integer k + such that 2^k >= N ) + + RWORK (workspace) DOUBLE PRECISION array, + dimension (1 + 3*N + 2*N*lg N + 3*N**2) + ( lg( N ) = smallest integer k + such that 2^k >= N ) + + QSTORE (workspace) COMPLEX*16 array, dimension (LDQS, N) + Used to store parts of + the eigenvector matrix when the updating matrix multiplies + take place. + + LDQS (input) INTEGER + The leading dimension of the array QSTORE. + LDQS >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + ===================================================================== + + Warning: N could be as big as QSIZ! + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + qstore_dim1 = *ldqs; + qstore_offset = 1 + qstore_dim1; + qstore -= qstore_offset; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + +/* + IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN + INFO = -1 + ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) ) + $ THEN +*/ + if (*qsiz < max(0,*n)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } else if (*ldqs < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLAED0", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + smlsiz = ilaenv_(&c__9, "ZLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + Determine the size and placement of the submatrices, and save in + the leading elements of IWORK. +*/ + + iwork[1] = *n; + subpbs = 1; + tlvls = 0; +L10: + if (iwork[subpbs] > smlsiz) { + for (j = subpbs; j >= 1; --j) { + iwork[j * 2] = (iwork[j] + 1) / 2; + iwork[(j << 1) - 1] = iwork[j] / 2; +/* L20: */ + } + ++tlvls; + subpbs <<= 1; + goto L10; + } + i__1 = subpbs; + for (j = 2; j <= i__1; ++j) { + iwork[j] += iwork[j - 1]; +/* L30: */ + } + +/* + Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 + using rank-1 modifications (cuts). +*/ + + spm1 = subpbs - 1; + i__1 = spm1; + for (i__ = 1; i__ <= i__1; ++i__) { + submat = iwork[i__] + 1; + smm1 = submat - 1; + d__[smm1] -= (d__1 = e[smm1], abs(d__1)); + d__[submat] -= (d__1 = e[smm1], abs(d__1)); +/* L40: */ + } + + indxq = (*n << 2) + 3; + +/* + Set up workspaces for eigenvalues only/accumulate new vectors + routine +*/ + + temp = log((doublereal) (*n)) / log(2.); + lgn = (integer) temp; + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; + iperm = iprmpt + *n * lgn; + iqptr = iperm + *n * lgn; + igivpt = iqptr + *n + 2; + igivcl = igivpt + *n * lgn; + + igivnm = 1; + iq = igivnm + (*n << 1) * lgn; +/* Computing 2nd power */ + i__1 = *n; + iwrem = iq + i__1 * i__1 + 1; +/* Initialize pointers */ + i__1 = subpbs; + for (i__ = 0; i__ <= i__1; ++i__) { + iwork[iprmpt + i__] = 1; + iwork[igivpt + i__] = 1; +/* L50: */ + } + iwork[iqptr] = 1; + +/* + Solve each submatrix eigenproblem at the bottom of the divide and + conquer tree. +*/ + + curr = 0; + i__1 = spm1; + for (i__ = 0; i__ <= i__1; ++i__) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[1]; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 1] - iwork[i__]; + } + ll = iq - 1 + iwork[iqptr + curr]; + dsteqr_("I", &matsiz, &d__[submat], &e[submat], &rwork[ll], &matsiz, & + rwork[1], info); + zlacrm_(qsiz, &matsiz, &q[submat * q_dim1 + 1], ldq, &rwork[ll], & + matsiz, &qstore[submat * qstore_dim1 + 1], ldqs, &rwork[iwrem] + ); +/* Computing 2nd power */ + i__2 = matsiz; + iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; + ++curr; + if (*info > 0) { + *info = submat * (*n + 1) + submat + matsiz - 1; + return 0; + } + k = 1; + i__2 = iwork[i__ + 1]; + for (j = submat; j <= i__2; ++j) { + iwork[indxq + j] = k; + ++k; +/* L60: */ + } +/* L70: */ + } + +/* + Successively merge eigensystems of adjacent submatrices + into eigensystem for the corresponding larger matrix. + + while ( SUBPBS > 1 ) +*/ + + curlvl = 1; +L80: + if (subpbs > 1) { + spm2 = subpbs - 2; + i__1 = spm2; + for (i__ = 0; i__ <= i__1; i__ += 2) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[2]; + msd2 = iwork[1]; + curprb = 0; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 2] - iwork[i__]; + msd2 = matsiz / 2; + ++curprb; + } + +/* + Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) + into an eigensystem of size MATSIZ. ZLAED7 handles the case + when the eigenvectors of a full or band Hermitian matrix (which + was reduced to tridiagonal form) are desired. + + I am free to use Q as a valuable working space until Loop 150. +*/ + + zlaed7_(&matsiz, &msd2, qsiz, &tlvls, &curlvl, &curprb, &d__[ + submat], &qstore[submat * qstore_dim1 + 1], ldqs, &e[ + submat + msd2 - 1], &iwork[indxq + submat], &rwork[iq], & + iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[ + igivpt], &iwork[igivcl], &rwork[igivnm], &q[submat * + q_dim1 + 1], &rwork[iwrem], &iwork[subpbs + 1], info); + if (*info > 0) { + *info = submat * (*n + 1) + submat + matsiz - 1; + return 0; + } + iwork[i__ / 2 + 1] = iwork[i__ + 2]; +/* L90: */ + } + subpbs /= 2; + ++curlvl; + goto L80; + } + +/* + end while + + Re-merge the eigenvalues/vectors which were deflated at the final + merge step. +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + rwork[i__] = d__[j]; + zcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1] + , &c__1); +/* L100: */ + } + dcopy_(n, &rwork[1], &c__1, &d__[1], &c__1); + + return 0; + +/* End of ZLAED0 */ + +} /* zlaed0_ */ + +/* Subroutine */ int zlaed7_(integer *n, integer *cutpnt, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, + doublecomplex *q, integer *ldq, doublereal *rho, integer *indxq, + doublereal *qstore, integer *qptr, integer *prmptr, integer *perm, + integer *givptr, integer *givcol, doublereal *givnum, doublecomplex * + work, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Local variables */ + static integer i__, k, n1, n2, iq, iw, iz, ptr, indx, curr, indxc, indxp; + extern /* Subroutine */ int dlaed9_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *), + zlaed8_(integer *, integer *, integer *, doublecomplex *, integer + *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *, integer *, + doublereal *, integer *), dlaeda_(integer *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + static integer idlmda; + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *), zlacrm_(integer *, integer *, doublecomplex *, integer *, + doublereal *, integer *, doublecomplex *, integer *, doublereal * + ); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLAED7 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and optionally eigenvectors of a dense or banded + Hermitian matrix that has been reduced to tridiagonal form. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine DLAED2. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine DLAED4 (as called by SLAED3). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= curlvl <= tlvls. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) COMPLEX*16 array, dimension (LDQ,N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + RHO (input) DOUBLE PRECISION + Contains the subdiagonal element used to create the rank-1 + modification. + + INDXQ (output) INTEGER array, dimension (N) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, + ie. D( INDXQ( I = 1, N ) ) will be in ascending order. + + IWORK (workspace) INTEGER array, dimension (4*N) + + RWORK (workspace) DOUBLE PRECISION array, + dimension (3*N+2*QSIZ*N) + + WORK (workspace) COMPLEX*16 array, dimension (QSIZ*N) + + QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1) + Stores eigenvectors of submatrices encountered during + divide and conquer, packed together. QPTR points to + beginning of the submatrices. + + QPTR (input/output) INTEGER array, dimension (N+2) + List of indices pointing to beginning of submatrices stored + in QSTORE. The submatrices are numbered starting at the + bottom left of the divide and conquer tree, from left to + right and bottom to top. + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and also the size of + the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --qstore; + --qptr; + --prmptr; + --perm; + --givptr; + givcol -= 3; + givnum -= 3; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + +/* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN +*/ + if (*n < 0) { + *info = -1; + } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { + *info = -2; + } else if (*qsiz < *n) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLAED7", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in DLAED2 and SLAED3. +*/ + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq = iw + *n; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; + ptr += pow_ii(&c__2, &i__2); +/* L10: */ + } + curr = ptr + *curpbm; + dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & + givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[ + iz + *n], info); + +/* + When solving the final problem, we no longer need the stored data, + so we will overwrite the data from this level onto the previously + used storage space. +*/ + + if (*curlvl == *tlvls) { + qptr[curr] = 1; + prmptr[curr] = 1; + givptr[curr] = 1; + } + +/* Sort and Deflate eigenvalues. */ + + zlaed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz], + &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[ + indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[ + (givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], info); + prmptr[curr + 1] = prmptr[curr] + *n; + givptr[curr + 1] += givptr[curr]; + +/* Solve Secular Equation. */ + + if (k != 0) { + dlaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda] + , &rwork[iw], &qstore[qptr[curr]], &k, info); + zlacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[ + q_offset], ldq, &rwork[iq]); +/* Computing 2nd power */ + i__1 = k; + qptr[curr + 1] = qptr[curr] + i__1 * i__1; + if (*info != 0) { + return 0; + } + +/* Prepare the INDXQ sorting premutation. */ + + n1 = k; + n2 = *n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + qptr[curr + 1] = qptr[curr]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L20: */ + } + } + + return 0; + +/* End of ZLAED7 */ + +} /* zlaed7_ */ + +/* Subroutine */ int zlaed8_(integer *k, integer *n, integer *qsiz, + doublecomplex *q, integer *ldq, doublereal *d__, doublereal *rho, + integer *cutpnt, doublereal *z__, doublereal *dlamda, doublecomplex * + q2, integer *ldq2, doublereal *w, integer *indxp, integer *indx, + integer *indxq, integer *perm, integer *givptr, integer *givcol, + doublereal *givnum, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; + doublereal d__1; + + /* Local variables */ + static doublereal c__; + static integer i__, j; + static doublereal s, t; + static integer k2, n1, n2, jp, n1p1; + static doublereal eps, tau, tol; + static integer jlam, imax, jmax; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dcopy_(integer *, doublereal *, integer *, doublereal + *, integer *), zdrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *), zcopy_( + integer *, doublecomplex *, integer *, doublecomplex *, integer *) + ; + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + ZLAED8 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny element in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + K (output) INTEGER + Contains the number of non-deflated eigenvalues. + This is the order of the related secular equation. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the dense or band matrix to tridiagonal form. + QSIZ >= N if ICOMPQ = 1. + + Q (input/output) COMPLEX*16 array, dimension (LDQ,N) + On entry, Q contains the eigenvectors of the partially solved + system which has been previously updated in matrix + multiplies with other partially solved eigensystems. + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max( 1, N ). + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, D contains the eigenvalues of the two submatrices to + be combined. On exit, D contains the trailing (N-K) updated + eigenvalues (those which were deflated) sorted into increasing + order. + + RHO (input/output) DOUBLE PRECISION + Contains the off diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. RHO is modified during the computation to + the value required by DLAED3. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. MIN(1,N) <= CUTPNT <= N. + + Z (input) DOUBLE PRECISION array, dimension (N) + On input this vector contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). The contents of Z are + destroyed during the updating process. + + DLAMDA (output) DOUBLE PRECISION array, dimension (N) + Contains a copy of the first K eigenvalues which will be used + by DLAED3 to form the secular equation. + + Q2 (output) COMPLEX*16 array, dimension (LDQ2,N) + If ICOMPQ = 0, Q2 is not referenced. Otherwise, + Contains a copy of the first K eigenvectors which will be used + by DLAED7 in a matrix multiply (DGEMM) to update the new + eigenvectors. + + LDQ2 (input) INTEGER + The leading dimension of the array Q2. LDQ2 >= max( 1, N ). + + W (output) DOUBLE PRECISION array, dimension (N) + This will hold the first k values of the final + deflation-altered z-vector and will be passed to DLAED3. + + INDXP (workspace) INTEGER array, dimension (N) + This will contain the permutation used to place deflated + values of D at the end of the array. On output INDXP(1:K) + points to the nondeflated D-values and INDXP(K+1:N) + points to the deflated eigenvalues. + + INDX (workspace) INTEGER array, dimension (N) + This will contain the permutation used to sort the contents of + D into ascending order. + + INDXQ (input) INTEGER array, dimension (N) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that elements in + the second half of this permutation must first have CUTPNT + added to their values in order to be accurate. + + PERM (output) INTEGER array, dimension (N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (output) INTEGER + Contains the number of Givens rotations which took place in + this subproblem. + + GIVCOL (output) INTEGER array, dimension (2, N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (output) DOUBLE PRECISION array, dimension (2, N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --d__; + --z__; + --dlamda; + q2_dim1 = *ldq2; + q2_offset = 1 + q2_dim1; + q2 -= q2_offset; + --w; + --indxp; + --indx; + --indxq; + --perm; + givcol -= 3; + givnum -= 3; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -2; + } else if (*qsiz < *n) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -5; + } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { + *info = -8; + } else if (*ldq2 < max(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLAED8", &i__1); + return 0; + } + +/* + Need to initialize GIVPTR to O here in case of quick exit + to prevent an unspecified code behavior (usually sigfault) + when IWORK array on entry to *stedc is not zeroed + (or at least some IWORK entries which used in *laed7 for GIVPTR). +*/ + + *givptr = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n1 = *cutpnt; + n2 = *n - n1; + n1p1 = n1 + 1; + + if (*rho < 0.) { + dscal_(&n2, &c_b1276, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1 */ + + t = 1. / sqrt(2.); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + indx[j] = j; +/* L10: */ + } + dscal_(n, &t, &z__[1], &c__1); + *rho = (d__1 = *rho * 2., abs(d__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { + indxq[i__] += *cutpnt; +/* L20: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; + w[i__] = z__[indxq[i__]]; +/* L30: */ + } + i__ = 1; + j = *cutpnt + 1; + dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = dlamda[indx[i__]]; + z__[i__] = w[indx[i__]]; +/* L40: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = idamax_(n, &z__[1], &c__1); + jmax = idamax_(n, &d__[1], &c__1); + eps = EPSILON; + tol = eps * 8. * (d__1 = d__[jmax], abs(d__1)); + +/* + If the rank-1 modifier is small enough, no more needs to be done + -- except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { + *k = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; + zcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] + , &c__1); +/* L50: */ + } + zlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); + return 0; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + if (j == *n) { + goto L100; + } + } else { + jlam = j; + goto L70; + } +/* L60: */ + } +L70: + ++j; + if (j > *n) { + goto L90; + } + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[jlam]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = dlapy2_(&c__, &s); + t = d__[j] - d__[jlam]; + c__ /= tau; + s = -s / tau; + if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + z__[j] = tau; + z__[jlam] = 0.; + +/* Record the appropriate Givens rotation */ + + ++(*givptr); + givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; + givcol[(*givptr << 1) + 2] = indxq[indx[j]]; + givnum[(*givptr << 1) + 1] = c__; + givnum[(*givptr << 1) + 2] = s; + zdrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[ + indx[j]] * q_dim1 + 1], &c__1, &c__, &s); + t = d__[jlam] * c__ * c__ + d__[j] * s * s; + d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; + d__[jlam] = t; + --k2; + i__ = 1; +L80: + if (k2 + i__ <= *n) { + if (d__[jlam] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = jlam; + ++i__; + goto L80; + } else { + indxp[k2 + i__ - 1] = jlam; + } + } else { + indxp[k2 + i__ - 1] = jlam; + } + jlam = j; + } else { + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + jlam = j; + } + } + goto L70; +L90: + +/* Record the last eigenvalue. */ + + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + +L100: + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; + zcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], & + c__1); +/* L110: */ + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + if (*k < *n) { + i__1 = *n - *k; + dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = *n - *k; + zlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k + + 1) * q_dim1 + 1], ldq); + } + + return 0; + +/* End of ZLAED8 */ + +} /* zlaed8_ */ + +/* Subroutine */ int zlahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7; + + /* Local variables */ + static integer i__, j, k, l, m; + static doublereal s; + static doublecomplex t, u, v[2], x, y; + static integer i1, i2; + static doublecomplex t1; + static doublereal t2; + static doublecomplex v2; + static doublereal aa, ab, ba, bb, h10; + static doublecomplex h11; + static doublereal h21; + static doublecomplex h22, sc; + static integer nh, nz; + static doublereal sx; + static integer jhi; + static doublecomplex h11s; + static integer jlo, its; + static doublereal ulp; + static doublecomplex sum; + static doublereal tst; + static doublecomplex temp; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + static doublereal rtemp; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); + + static doublereal safmin, safmax; + extern /* Subroutine */ int zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *); + extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *, + doublecomplex *); + static doublereal smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Purpose + ======= + + ZLAHQR is an auxiliary routine called by CHSEQR to update the + eigenvalues and Schur decomposition already computed by CHSEQR, by + dealing with the Hessenberg submatrix in rows and columns ILO to + IHI. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows and + columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). + ZLAHQR works primarily with the Hessenberg submatrix in rows + and columns ILO to IHI, but applies transformations to all of + H if WANTT is .TRUE.. + 1 <= ILO <= max(1,IHI); IHI <= N. + + H (input/output) COMPLEX*16 array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO is zero and if WANTT is .TRUE., then H + is upper triangular in rows and columns ILO:IHI. If INFO + is zero and if WANTT is .FALSE., then the contents of H + are unspecified on exit. The output state of H in case + INF is positive is below under the description of INFO. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + W (output) COMPLEX*16 array, dimension (N) + The computed eigenvalues ILO to IHI are stored in the + corresponding elements of W. If WANTT is .TRUE., the + eigenvalues are stored in the same order as on the diagonal + of the Schur form returned in H, with W(i) = H(i,i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. + + Z (input/output) COMPLEX*16 array, dimension (LDZ,N) + If WANTZ is .TRUE., on entry Z must contain the current + matrix Z of transformations accumulated by CHSEQR, and on + exit Z has been updated; transformations are applied only to + the submatrix Z(ILOZ:IHIZ,ILO:IHI). + If WANTZ is .FALSE., Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: if INFO = i, ZLAHQR failed to compute all the + eigenvalues ILO to IHI in a total of 30 iterations + per eigenvalue; elements i+1:ihi of W contain + those eigenvalues which have been successfully + computed. + + If INFO .GT. 0 and WANTT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the + eigenvalues of the upper Hessenberg matrix + rows and columns ILO thorugh INFO of the final, + output value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + (*) (initial value of H)*U = U*(final value of H) + where U is an orthognal matrix. The final + value of H is upper Hessenberg and triangular in + rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + (final value of Z) = (initial value of Z)*U + where U is the orthogonal matrix in (*) + (regardless of the value of WANTT.) + + Further Details + =============== + + 02-96 Based on modifications by + David Day, Sandia National Laboratory, USA + + 12-04 Further modifications by + Ralph Byers, University of Kansas, USA + This is a modified version of ZLAHQR from LAPACK version 3.0. + It is (1) more robust against overflow and underflow and + (2) adopts the more conservative Ahues & Tisseur stopping + criterion (LAWN 122, 1997). + + ========================================================= +*/ + + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + i__1 = *ilo; + i__2 = *ilo + *ilo * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + return 0; + } + +/* ==== clear out the trash ==== */ + i__1 = *ihi - 3; + for (j = *ilo; j <= i__1; ++j) { + i__2 = j + 2 + j * h_dim1; + h__[i__2].r = 0., h__[i__2].i = 0.; + i__2 = j + 3 + j * h_dim1; + h__[i__2].r = 0., h__[i__2].i = 0.; +/* L10: */ + } + if (*ilo <= *ihi - 2) { + i__1 = *ihi + (*ihi - 2) * h_dim1; + h__[i__1].r = 0., h__[i__1].i = 0.; + } +/* ==== ensure that subdiagonal entries are real ==== */ + if (*wantt) { + jlo = 1; + jhi = *n; + } else { + jlo = *ilo; + jhi = *ihi; + } + i__1 = *ihi; + for (i__ = *ilo + 1; i__ <= i__1; ++i__) { + if (d_imag(&h__[i__ + (i__ - 1) * h_dim1]) != 0.) { +/* + ==== The following redundant normalization + . avoids problems with both gradual and + . sudden underflow in ABS(H(I,I-1)) ==== +*/ + i__2 = i__ + (i__ - 1) * h_dim1; + i__3 = i__ + (i__ - 1) * h_dim1; + d__3 = (d__1 = h__[i__3].r, abs(d__1)) + (d__2 = d_imag(&h__[i__ + + (i__ - 1) * h_dim1]), abs(d__2)); + z__1.r = h__[i__2].r / d__3, z__1.i = h__[i__2].i / d__3; + sc.r = z__1.r, sc.i = z__1.i; + d_cnjg(&z__2, &sc); + d__1 = z_abs(&sc); + z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1; + sc.r = z__1.r, sc.i = z__1.i; + i__2 = i__ + (i__ - 1) * h_dim1; + d__1 = z_abs(&h__[i__ + (i__ - 1) * h_dim1]); + h__[i__2].r = d__1, h__[i__2].i = 0.; + i__2 = jhi - i__ + 1; + zscal_(&i__2, &sc, &h__[i__ + i__ * h_dim1], ldh); +/* Computing MIN */ + i__3 = jhi, i__4 = i__ + 1; + i__2 = min(i__3,i__4) - jlo + 1; + d_cnjg(&z__1, &sc); + zscal_(&i__2, &z__1, &h__[jlo + i__ * h_dim1], &c__1); + if (*wantz) { + i__2 = *ihiz - *iloz + 1; + d_cnjg(&z__1, &sc); + zscal_(&i__2, &z__1, &z__[*iloz + i__ * z_dim1], &c__1); + } + } +/* L20: */ + } + + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + +/* Set machine-dependent constants for the stopping criterion. */ + + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = PRECISION; + smlnum = safmin * ((doublereal) nh / ulp); + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are set inside the main loop. +*/ + + if (*wantt) { + i1 = 1; + i2 = *n; + } + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of 1. Each iteration of the loop works + with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO, or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L30: + if (i__ < *ilo) { + goto L150; + } + +/* + Perform QR iterations on rows and columns ILO to I until a + submatrix of order 1 splits off at the bottom because a + subdiagonal element has become negligible. +*/ + + l = *ilo; + for (its = 0; its <= 30; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__1 = l + 1; + for (k = i__; k >= i__1; --k) { + i__2 = k + (k - 1) * h_dim1; + if ((d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[k + (k + - 1) * h_dim1]), abs(d__2)) <= smlnum) { + goto L50; + } + i__2 = k - 1 + (k - 1) * h_dim1; + i__3 = k + k * h_dim1; + tst = (d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[k - 1 + + (k - 1) * h_dim1]), abs(d__2)) + ((d__3 = h__[i__3].r, + abs(d__3)) + (d__4 = d_imag(&h__[k + k * h_dim1]), abs( + d__4))); + if (tst == 0.) { + if (k - 2 >= *ilo) { + i__2 = k - 1 + (k - 2) * h_dim1; + tst += (d__1 = h__[i__2].r, abs(d__1)); + } + if (k + 1 <= *ihi) { + i__2 = k + 1 + k * h_dim1; + tst += (d__1 = h__[i__2].r, abs(d__1)); + } + } +/* + ==== The following is a conservative small subdiagonal + . deflation criterion due to Ahues & Tisseur (LAWN 122, + . 1997). It has better mathematical foundation and + . improves accuracy in some examples. ==== +*/ + i__2 = k + (k - 1) * h_dim1; + if ((d__1 = h__[i__2].r, abs(d__1)) <= ulp * tst) { +/* Computing MAX */ + i__2 = k + (k - 1) * h_dim1; + i__3 = k - 1 + k * h_dim1; + d__5 = (d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[ + k + (k - 1) * h_dim1]), abs(d__2)), d__6 = (d__3 = + h__[i__3].r, abs(d__3)) + (d__4 = d_imag(&h__[k - 1 + + k * h_dim1]), abs(d__4)); + ab = max(d__5,d__6); +/* Computing MIN */ + i__2 = k + (k - 1) * h_dim1; + i__3 = k - 1 + k * h_dim1; + d__5 = (d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[ + k + (k - 1) * h_dim1]), abs(d__2)), d__6 = (d__3 = + h__[i__3].r, abs(d__3)) + (d__4 = d_imag(&h__[k - 1 + + k * h_dim1]), abs(d__4)); + ba = min(d__5,d__6); + i__2 = k - 1 + (k - 1) * h_dim1; + i__3 = k + k * h_dim1; + z__2.r = h__[i__2].r - h__[i__3].r, z__2.i = h__[i__2].i - + h__[i__3].i; + z__1.r = z__2.r, z__1.i = z__2.i; +/* Computing MAX */ + i__4 = k + k * h_dim1; + d__5 = (d__1 = h__[i__4].r, abs(d__1)) + (d__2 = d_imag(&h__[ + k + k * h_dim1]), abs(d__2)), d__6 = (d__3 = z__1.r, + abs(d__3)) + (d__4 = d_imag(&z__1), abs(d__4)); + aa = max(d__5,d__6); + i__2 = k - 1 + (k - 1) * h_dim1; + i__3 = k + k * h_dim1; + z__2.r = h__[i__2].r - h__[i__3].r, z__2.i = h__[i__2].i - + h__[i__3].i; + z__1.r = z__2.r, z__1.i = z__2.i; +/* Computing MIN */ + i__4 = k + k * h_dim1; + d__5 = (d__1 = h__[i__4].r, abs(d__1)) + (d__2 = d_imag(&h__[ + k + k * h_dim1]), abs(d__2)), d__6 = (d__3 = z__1.r, + abs(d__3)) + (d__4 = d_imag(&z__1), abs(d__4)); + bb = min(d__5,d__6); + s = aa + ab; +/* Computing MAX */ + d__1 = smlnum, d__2 = ulp * (bb * (aa / s)); + if (ba * (ab / s) <= max(d__1,d__2)) { + goto L50; + } + } +/* L40: */ + } +L50: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible */ + + i__1 = l + (l - 1) * h_dim1; + h__[i__1].r = 0., h__[i__1].i = 0.; + } + +/* Exit from loop if a submatrix of order 1 has split off. */ + + if (l >= i__) { + goto L140; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! (*wantt)) { + i1 = l; + i2 = i__; + } + + if (its == 10) { + +/* Exceptional shift. */ + + i__1 = l + 1 + l * h_dim1; + s = (d__1 = h__[i__1].r, abs(d__1)) * .75; + i__1 = l + l * h_dim1; + z__1.r = s + h__[i__1].r, z__1.i = h__[i__1].i; + t.r = z__1.r, t.i = z__1.i; + } else if (its == 20) { + +/* Exceptional shift. */ + + i__1 = i__ + (i__ - 1) * h_dim1; + s = (d__1 = h__[i__1].r, abs(d__1)) * .75; + i__1 = i__ + i__ * h_dim1; + z__1.r = s + h__[i__1].r, z__1.i = h__[i__1].i; + t.r = z__1.r, t.i = z__1.i; + } else { + +/* Wilkinson's shift. */ + + i__1 = i__ + i__ * h_dim1; + t.r = h__[i__1].r, t.i = h__[i__1].i; + z_sqrt(&z__2, &h__[i__ - 1 + i__ * h_dim1]); + z_sqrt(&z__3, &h__[i__ + (i__ - 1) * h_dim1]); + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = z__2.r * + z__3.i + z__2.i * z__3.r; + u.r = z__1.r, u.i = z__1.i; + s = (d__1 = u.r, abs(d__1)) + (d__2 = d_imag(&u), abs(d__2)); + if (s != 0.) { + i__1 = i__ - 1 + (i__ - 1) * h_dim1; + z__2.r = h__[i__1].r - t.r, z__2.i = h__[i__1].i - t.i; + z__1.r = z__2.r * .5, z__1.i = z__2.i * .5; + x.r = z__1.r, x.i = z__1.i; + sx = (d__1 = x.r, abs(d__1)) + (d__2 = d_imag(&x), abs(d__2)); +/* Computing MAX */ + d__3 = s, d__4 = (d__1 = x.r, abs(d__1)) + (d__2 = d_imag(&x), + abs(d__2)); + s = max(d__3,d__4); + z__5.r = x.r / s, z__5.i = x.i / s; + pow_zi(&z__4, &z__5, &c__2); + z__7.r = u.r / s, z__7.i = u.i / s; + pow_zi(&z__6, &z__7, &c__2); + z__3.r = z__4.r + z__6.r, z__3.i = z__4.i + z__6.i; + z_sqrt(&z__2, &z__3); + z__1.r = s * z__2.r, z__1.i = s * z__2.i; + y.r = z__1.r, y.i = z__1.i; + if (sx > 0.) { + z__1.r = x.r / sx, z__1.i = x.i / sx; + z__2.r = x.r / sx, z__2.i = x.i / sx; + if (z__1.r * y.r + d_imag(&z__2) * d_imag(&y) < 0.) { + z__3.r = -y.r, z__3.i = -y.i; + y.r = z__3.r, y.i = z__3.i; + } + } + z__4.r = x.r + y.r, z__4.i = x.i + y.i; + zladiv_(&z__3, &u, &z__4); + z__2.r = u.r * z__3.r - u.i * z__3.i, z__2.i = u.r * z__3.i + + u.i * z__3.r; + z__1.r = t.r - z__2.r, z__1.i = t.i - z__2.i; + t.r = z__1.r, t.i = z__1.i; + } + } + +/* Look for two consecutive small subdiagonal elements. */ + + i__1 = l + 1; + for (m = i__ - 1; m >= i__1; --m) { + +/* + Determine the effect of starting the single-shift QR + iteration at row M, and see if this would make H(M,M-1) + negligible. +*/ + + i__2 = m + m * h_dim1; + h11.r = h__[i__2].r, h11.i = h__[i__2].i; + i__2 = m + 1 + (m + 1) * h_dim1; + h22.r = h__[i__2].r, h22.i = h__[i__2].i; + z__1.r = h11.r - t.r, z__1.i = h11.i - t.i; + h11s.r = z__1.r, h11s.i = z__1.i; + i__2 = m + 1 + m * h_dim1; + h21 = h__[i__2].r; + s = (d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(d__2)) + + abs(h21); + z__1.r = h11s.r / s, z__1.i = h11s.i / s; + h11s.r = z__1.r, h11s.i = z__1.i; + h21 /= s; + v[0].r = h11s.r, v[0].i = h11s.i; + v[1].r = h21, v[1].i = 0.; + i__2 = m + (m - 1) * h_dim1; + h10 = h__[i__2].r; + if (abs(h10) * abs(h21) <= ulp * (((d__1 = h11s.r, abs(d__1)) + ( + d__2 = d_imag(&h11s), abs(d__2))) * ((d__3 = h11.r, abs( + d__3)) + (d__4 = d_imag(&h11), abs(d__4)) + ((d__5 = + h22.r, abs(d__5)) + (d__6 = d_imag(&h22), abs(d__6)))))) { + goto L70; + } +/* L60: */ + } + i__1 = l + l * h_dim1; + h11.r = h__[i__1].r, h11.i = h__[i__1].i; + i__1 = l + 1 + (l + 1) * h_dim1; + h22.r = h__[i__1].r, h22.i = h__[i__1].i; + z__1.r = h11.r - t.r, z__1.i = h11.i - t.i; + h11s.r = z__1.r, h11s.i = z__1.i; + i__1 = l + 1 + l * h_dim1; + h21 = h__[i__1].r; + s = (d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(d__2)) + + abs(h21); + z__1.r = h11s.r / s, z__1.i = h11s.i / s; + h11s.r = z__1.r, h11s.i = z__1.i; + h21 /= s; + v[0].r = h11s.r, v[0].i = h11s.i; + v[1].r = h21, v[1].i = 0.; +L70: + +/* Single-shift QR step */ + + i__1 = i__ - 1; + for (k = m; k <= i__1; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. + + V(2) is always real before the call to ZLARFG, and hence + after the call T2 ( = T1*V(2) ) is also real. +*/ + + if (k > m) { + zcopy_(&c__2, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + zlarfg_(&c__2, v, &v[1], &c__1, &t1); + if (k > m) { + i__2 = k + (k - 1) * h_dim1; + h__[i__2].r = v[0].r, h__[i__2].i = v[0].i; + i__2 = k + 1 + (k - 1) * h_dim1; + h__[i__2].r = 0., h__[i__2].i = 0.; + } + v2.r = v[1].r, v2.i = v[1].i; + z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * v2.i + t1.i * + v2.r; + t2 = z__1.r; + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__2 = i2; + for (j = k; j <= i__2; ++j) { + d_cnjg(&z__3, &t1); + i__3 = k + j * h_dim1; + z__2.r = z__3.r * h__[i__3].r - z__3.i * h__[i__3].i, z__2.i = + z__3.r * h__[i__3].i + z__3.i * h__[i__3].r; + i__4 = k + 1 + j * h_dim1; + z__4.r = t2 * h__[i__4].r, z__4.i = t2 * h__[i__4].i; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + sum.r = z__1.r, sum.i = z__1.i; + i__3 = k + j * h_dim1; + i__4 = k + j * h_dim1; + z__1.r = h__[i__4].r - sum.r, z__1.i = h__[i__4].i - sum.i; + h__[i__3].r = z__1.r, h__[i__3].i = z__1.i; + i__3 = k + 1 + j * h_dim1; + i__4 = k + 1 + j * h_dim1; + z__2.r = sum.r * v2.r - sum.i * v2.i, z__2.i = sum.r * v2.i + + sum.i * v2.r; + z__1.r = h__[i__4].r - z__2.r, z__1.i = h__[i__4].i - z__2.i; + h__[i__3].r = z__1.r, h__[i__3].i = z__1.i; +/* L80: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+2,I). + + Computing MIN +*/ + i__3 = k + 2; + i__2 = min(i__3,i__); + for (j = i1; j <= i__2; ++j) { + i__3 = j + k * h_dim1; + z__2.r = t1.r * h__[i__3].r - t1.i * h__[i__3].i, z__2.i = + t1.r * h__[i__3].i + t1.i * h__[i__3].r; + i__4 = j + (k + 1) * h_dim1; + z__3.r = t2 * h__[i__4].r, z__3.i = t2 * h__[i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + sum.r = z__1.r, sum.i = z__1.i; + i__3 = j + k * h_dim1; + i__4 = j + k * h_dim1; + z__1.r = h__[i__4].r - sum.r, z__1.i = h__[i__4].i - sum.i; + h__[i__3].r = z__1.r, h__[i__3].i = z__1.i; + i__3 = j + (k + 1) * h_dim1; + i__4 = j + (k + 1) * h_dim1; + d_cnjg(&z__3, &v2); + z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i = sum.r * + z__3.i + sum.i * z__3.r; + z__1.r = h__[i__4].r - z__2.r, z__1.i = h__[i__4].i - z__2.i; + h__[i__3].r = z__1.r, h__[i__3].i = z__1.i; +/* L90: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__2 = *ihiz; + for (j = *iloz; j <= i__2; ++j) { + i__3 = j + k * z_dim1; + z__2.r = t1.r * z__[i__3].r - t1.i * z__[i__3].i, z__2.i = + t1.r * z__[i__3].i + t1.i * z__[i__3].r; + i__4 = j + (k + 1) * z_dim1; + z__3.r = t2 * z__[i__4].r, z__3.i = t2 * z__[i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + sum.r = z__1.r, sum.i = z__1.i; + i__3 = j + k * z_dim1; + i__4 = j + k * z_dim1; + z__1.r = z__[i__4].r - sum.r, z__1.i = z__[i__4].i - + sum.i; + z__[i__3].r = z__1.r, z__[i__3].i = z__1.i; + i__3 = j + (k + 1) * z_dim1; + i__4 = j + (k + 1) * z_dim1; + d_cnjg(&z__3, &v2); + z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i = sum.r * + z__3.i + sum.i * z__3.r; + z__1.r = z__[i__4].r - z__2.r, z__1.i = z__[i__4].i - + z__2.i; + z__[i__3].r = z__1.r, z__[i__3].i = z__1.i; +/* L100: */ + } + } + + if (k == m && m > l) { + +/* + If the QR step was started at row M > L because two + consecutive small subdiagonals were found, then extra + scaling must be performed to ensure that H(M,M-1) remains + real. +*/ + + z__1.r = 1. - t1.r, z__1.i = 0. - t1.i; + temp.r = z__1.r, temp.i = z__1.i; + d__1 = z_abs(&temp); + z__1.r = temp.r / d__1, z__1.i = temp.i / d__1; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = m + 1 + m * h_dim1; + i__3 = m + 1 + m * h_dim1; + d_cnjg(&z__2, &temp); + z__1.r = h__[i__3].r * z__2.r - h__[i__3].i * z__2.i, z__1.i = + h__[i__3].r * z__2.i + h__[i__3].i * z__2.r; + h__[i__2].r = z__1.r, h__[i__2].i = z__1.i; + if (m + 2 <= i__) { + i__2 = m + 2 + (m + 1) * h_dim1; + i__3 = m + 2 + (m + 1) * h_dim1; + z__1.r = h__[i__3].r * temp.r - h__[i__3].i * temp.i, + z__1.i = h__[i__3].r * temp.i + h__[i__3].i * + temp.r; + h__[i__2].r = z__1.r, h__[i__2].i = z__1.i; + } + i__2 = i__; + for (j = m; j <= i__2; ++j) { + if (j != m + 1) { + if (i2 > j) { + i__3 = i2 - j; + zscal_(&i__3, &temp, &h__[j + (j + 1) * h_dim1], + ldh); + } + i__3 = j - i1; + d_cnjg(&z__1, &temp); + zscal_(&i__3, &z__1, &h__[i1 + j * h_dim1], &c__1); + if (*wantz) { + d_cnjg(&z__1, &temp); + zscal_(&nz, &z__1, &z__[*iloz + j * z_dim1], & + c__1); + } + } +/* L110: */ + } + } +/* L120: */ + } + +/* Ensure that H(I,I-1) is real. */ + + i__1 = i__ + (i__ - 1) * h_dim1; + temp.r = h__[i__1].r, temp.i = h__[i__1].i; + if (d_imag(&temp) != 0.) { + rtemp = z_abs(&temp); + i__1 = i__ + (i__ - 1) * h_dim1; + h__[i__1].r = rtemp, h__[i__1].i = 0.; + z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp; + temp.r = z__1.r, temp.i = z__1.i; + if (i2 > i__) { + i__1 = i2 - i__; + d_cnjg(&z__1, &temp); + zscal_(&i__1, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); + } + i__1 = i__ - i1; + zscal_(&i__1, &temp, &h__[i1 + i__ * h_dim1], &c__1); + if (*wantz) { + zscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1); + } + } + +/* L130: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L140: + +/* H(I,I-1) is negligible: one eigenvalue has converged. */ + + i__1 = i__; + i__2 = i__ + i__ * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + +/* return to start of the main loop with new value of I. */ + + i__ = l - 1; + goto L30; + +L150: + return 0; + +/* End of ZLAHQR */ + +} /* zlahqr_ */ + +/* Subroutine */ int zlahr2_(integer *n, integer *k, integer *nb, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, + integer *ldt, doublecomplex *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, + i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__; + static doublecomplex ei; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zgemm_(char *, char *, integer *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, + integer *), ztrmm_(char *, char *, char *, char *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), ztrmv_(char *, char *, char *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *), zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *), zlacgv_(integer *, + doublecomplex *, integer *), zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2.1) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + -- April 2009 -- + + + Purpose + ======= + + ZLAHR2 reduces the first NB columns of A complex general n-BY-(n-k+1) + matrix A so that elements below the k-th subdiagonal are zero. The + reduction is performed by an unitary similarity transformation + Q' * A * Q. The routine returns the matrices V and T which determine + Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. + + This is an auxiliary routine called by ZGEHRD. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. + + K (input) INTEGER + The offset for the reduction. Elements below the k-th + subdiagonal in the first NB columns are reduced to zero. + K < N. + + NB (input) INTEGER + The number of columns to be reduced. + + A (input/output) COMPLEX*16 array, dimension (LDA,N-K+1) + On entry, the n-by-(n-k+1) general matrix A. + On exit, the elements on and above the k-th subdiagonal in + the first NB columns are overwritten with the corresponding + elements of the reduced matrix; the elements below the k-th + subdiagonal, with the array TAU, represent the matrix Q as a + product of elementary reflectors. The other columns of A are + unchanged. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX*16 array, dimension (NB) + The scalar factors of the elementary reflectors. See Further + Details. + + T (output) COMPLEX*16 array, dimension (LDT,NB) + The upper triangular matrix T. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= NB. + + Y (output) COMPLEX*16 array, dimension (LDY,NB) + The n-by-nb matrix Y. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= N. + + Further Details + =============== + + The matrix Q is represented as a product of nb elementary reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in + A(i+k+1:n,i), and tau in TAU(i). + + The elements of the vectors v together form the (n-k+1)-by-nb matrix + V which is needed, with T and Y, to apply the transformation to the + unreduced part of the matrix, using an update of the form: + A := (I - V*T*V') * (A - Y*V'). + + The contents of A on exit are illustrated by the following example + with n = 7, k = 3 and nb = 2: + + ( a a a a a ) + ( a a a a a ) + ( a a a a a ) + ( h h a a a ) + ( v1 h a a a ) + ( v1 v2 a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + This subroutine is a slight modification of LAPACK-3.0's DLAHRD + incorporating improvements proposed by Quintana-Orti and Van de + Gejin. Note that the entries of A(1:K,2:NB) differ from those + returned by the original LAPACK-3.0's DLAHRD routine. (This + subroutine is not backward compatible with LAPACK-3.0's DLAHRD.) + + References + ========== + + Gregorio Quintana-Orti and Robert van de Geijn, "Improving the + performance of reduction to Hessenberg form," ACM Transactions on + Mathematical Software, 32(2):180-194, June 2006. + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*n <= 1) { + return 0; + } + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + +/* + Update A(K+1:N,I) + + Update I-th column of A - Y * V' +*/ + + i__2 = i__ - 1; + zlacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); + i__2 = *n - *k; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("NO TRANSPOSE", &i__2, &i__3, &z__1, &y[*k + 1 + y_dim1], + ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b57, &a[*k + 1 + + i__ * a_dim1], &c__1); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); + +/* + Apply I - V * T' * V' to this column (call it b) from the + left, using the last column of T as workspace + + Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) + ( V2 ) ( b2 ) + + where V1 is unit lower triangular + + w := V1' * b1 +*/ + + i__2 = i__ - 1; + zcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + ztrmv_("Lower", "Conjugate transpose", "UNIT", &i__2, &a[*k + 1 + + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1); + +/* w := w + V2'*b2 */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[*k + i__ + + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b57, + &t[*nb * t_dim1 + 1], &c__1); + +/* w := T'*w */ + + i__2 = i__ - 1; + ztrmv_("Upper", "Conjugate transpose", "NON-UNIT", &i__2, &t[ + t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1); + +/* b2 := b2 - V2*w */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("NO TRANSPOSE", &i__2, &i__3, &z__1, &a[*k + i__ + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1, &c_b57, &a[*k + i__ + + i__ * a_dim1], &c__1); + +/* b1 := b1 - V1*w */ + + i__2 = i__ - 1; + ztrmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1] + , lda, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zaxpy_(&i__2, &z__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ + * a_dim1], &c__1); + + i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1; + a[i__2].r = ei.r, a[i__2].i = ei.i; + } + +/* + Generate the elementary reflector H(I) to annihilate + A(K+I+1:N,I) +*/ + + i__2 = *n - *k - i__ + 1; +/* Computing MIN */ + i__3 = *k + i__ + 1; + zlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + i__2 = *k + i__ + i__ * a_dim1; + ei.r = a[i__2].r, ei.i = a[i__2].i; + i__2 = *k + i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute Y(K+1:N,I) */ + + i__2 = *n - *k; + i__3 = *n - *k - i__ + 1; + zgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b57, &a[*k + 1 + (i__ + 1) * + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56, &y[* + k + 1 + i__ * y_dim1], &c__1); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[*k + i__ + + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56, &t[ + i__ * t_dim1 + 1], &c__1); + i__2 = *n - *k; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("NO TRANSPOSE", &i__2, &i__3, &z__1, &y[*k + 1 + y_dim1], ldy, + &t[i__ * t_dim1 + 1], &c__1, &c_b57, &y[*k + 1 + i__ * y_dim1] + , &c__1); + i__2 = *n - *k; + zscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1); + +/* Compute T(1:I,I) */ + + i__2 = i__ - 1; + i__3 = i__; + z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; + zscal_(&i__2, &z__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + ztrmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, + &t[i__ * t_dim1 + 1], &c__1) + ; + i__2 = i__ + i__ * t_dim1; + i__3 = i__; + t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; + +/* L10: */ + } + i__1 = *k + *nb + *nb * a_dim1; + a[i__1].r = ei.r, a[i__1].i = ei.i; + +/* Compute Y(1:K,1:NB) */ + + zlacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy); + ztrmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b57, &a[*k + 1 + + a_dim1], lda, &y[y_offset], ldy); + if (*n > *k + *nb) { + i__1 = *n - *k - *nb; + zgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b57, &a[(*nb + + 2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, & + c_b57, &y[y_offset], ldy); + } + ztrmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b57, &t[ + t_offset], ldt, &y[y_offset], ldy); + + return 0; + +/* End of ZLAHR2 */ + +} /* zlahr2_ */ + +/* Subroutine */ int zlals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, doublecomplex *b, integer *ldb, + doublecomplex *bx, integer *ldbx, integer *perm, integer *givptr, + integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, + doublereal *poles, doublereal *difl, doublereal *difr, doublereal * + z__, integer *k, doublereal *c__, doublereal *s, doublereal *rwork, + integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, difr_dim1, difr_offset, givnum_dim1, + givnum_offset, poles_dim1, poles_offset, b_dim1, b_offset, + bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, m, n; + static doublereal dj; + static integer nlp1, jcol; + static doublereal temp; + static integer jrow; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + static doublereal diflj, difrj, dsigj; + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), zdrot_(integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), xerbla_(char *, integer *); + static doublereal dsigjp; + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *), zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex * + , integer *, integer *), zlacpy_(char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLALS0 applies back the multiplying factors of either the left or the + right singular vector matrix of a diagonal matrix appended by a row + to the right hand side matrix B in solving the least squares problem + using the divide-and-conquer SVD approach. + + For the left singular vector matrix, three types of orthogonal + matrices are involved: + + (1L) Givens rotations: the number of such rotations is GIVPTR; the + pairs of columns/rows they were applied to are stored in GIVCOL; + and the C- and S-values of these rotations are stored in GIVNUM. + + (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + row, and for J=2:N, PERM(J)-th row of B is to be moved to the + J-th row. + + (3L) The left singular vector matrix of the remaining matrix. + + For the right singular vector matrix, four types of orthogonal + matrices are involved: + + (1R) The right singular vector matrix of the remaining matrix. + + (2R) If SQRE = 1, one extra Givens rotation to generate the right + null space. + + (3R) The inverse transformation of (2L). + + (4R) The inverse transformation of (1L). + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form: + = 0: Left singular vector matrix. + = 1: Right singular vector matrix. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input/output) COMPLEX*16 array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. On output, B contains + the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B. LDB must be at least + max(1,MAX( M, N ) ). + + BX (workspace) COMPLEX*16 array, dimension ( LDBX, NRHS ) + + LDBX (input) INTEGER + The leading dimension of BX. + + PERM (input) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) applied + to the two blocks. + + GIVPTR (input) INTEGER + The number of Givens rotations which took place in this + subproblem. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of rows/columns + involved in a Givens rotation. + + LDGCOL (input) INTEGER + The leading dimension of GIVCOL, must be at least N. + + GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value used in the + corresponding Givens rotation. + + LDGNUM (input) INTEGER + The leading dimension of arrays DIFR, POLES and + GIVNUM, must be at least K. + + POLES (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + On entry, POLES(1:K, 1) contains the new singular + values obtained from solving the secular equation, and + POLES(1:K, 2) is an array containing the poles in the secular + equation. + + DIFL (input) DOUBLE PRECISION array, dimension ( K ). + On entry, DIFL(I) is the distance between I-th updated + (undeflated) singular value and the I-th (undeflated) old + singular value. + + DIFR (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). + On entry, DIFR(I, 1) contains the distances between I-th + updated (undeflated) singular value and the I+1-th + (undeflated) old singular value. And DIFR(I, 2) is the + normalizing factor for the I-th right singular vector. + + Z (input) DOUBLE PRECISION array, dimension ( K ) + Contain the components of the deflation-adjusted updating row + vector. + + K (input) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + C (input) DOUBLE PRECISION + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (input) DOUBLE PRECISION + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + RWORK (workspace) DOUBLE PRECISION array, dimension + ( K*(1+NRHS) + 2*NRHS ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + difr_dim1 = *ldgnum; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --z__; + --rwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } + + n = *nl + *nr + 1; + + if (*nrhs < 1) { + *info = -5; + } else if (*ldb < n) { + *info = -7; + } else if (*ldbx < n) { + *info = -9; + } else if (*givptr < 0) { + *info = -11; + } else if (*ldgcol < n) { + *info = -13; + } else if (*ldgnum < n) { + *info = -15; + } else if (*k < 1) { + *info = -20; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLALS0", &i__1); + return 0; + } + + m = n + *sqre; + nlp1 = *nl + 1; + + if (*icompq == 0) { + +/* + Apply back orthogonal transformations from the left. + + Step (1L): apply back the Givens rotations performed. +*/ + + i__1 = *givptr; + for (i__ = 1; i__ <= i__1; ++i__) { + zdrot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); +/* L10: */ + } + +/* Step (2L): permute rows of B. */ + + zcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + zcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], + ldbx); +/* L20: */ + } + +/* + Step (3L): apply the inverse of the left singular vector + matrix to BX. +*/ + + if (*k == 1) { + zcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); + if (z__[1] < 0.) { + zdscal_(nrhs, &c_b1276, &b[b_offset], ldb); + } + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = poles[j + poles_dim1]; + dsigj = -poles[j + (poles_dim1 << 1)]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; + } + if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) { + rwork[j] = 0.; + } else { + rwork[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj + / (poles[j + (poles_dim1 << 1)] + dj); + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + rwork[i__] = 0.; + } else { + rwork[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigj) - diflj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L30: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + rwork[i__] = 0.; + } else { + rwork[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigjp) + difrj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L40: */ + } + rwork[1] = -1.; + temp = dnrm2_(k, &rwork[1], &c__1); + +/* + Since B and BX are complex, the following call to DGEMV + is performed in two steps (real and imaginary parts). + + CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, + $ B( J, 1 ), LDB ) +*/ + + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + i__4 = jrow + jcol * bx_dim1; + rwork[i__] = bx[i__4].r; +/* L50: */ + } +/* L60: */ + } + dgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1], &c__1); + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + rwork[i__] = d_imag(&bx[jrow + jcol * bx_dim1]); +/* L70: */ + } +/* L80: */ + } + dgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1 + *nrhs], + &c__1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = j + jcol * b_dim1; + i__4 = jcol + *k; + i__5 = jcol + *k + *nrhs; + z__1.r = rwork[i__4], z__1.i = rwork[i__5]; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L90: */ + } + zlascl_("G", &c__0, &c__0, &temp, &c_b1034, &c__1, nrhs, &b[j + + b_dim1], ldb, info); +/* L100: */ + } + } + +/* Move the deflated rows of BX to B also. */ + + if (*k < max(m,n)) { + i__1 = n - *k; + zlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 + + b_dim1], ldb); + } + } else { + +/* + Apply back the right orthogonal transformations. + + Step (1R): apply back the new right singular vector matrix + to B. +*/ + + if (*k == 1) { + zcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dsigj = poles[j + (poles_dim1 << 1)]; + if (z__[j] == 0.) { + rwork[j] = 0.; + } else { + rwork[j] = -z__[j] / difl[j] / (dsigj + poles[j + + poles_dim1]) / difr[j + (difr_dim1 << 1)]; + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + rwork[i__] = 0.; + } else { + d__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; + rwork[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[ + i__ + difr_dim1]) / (dsigj + poles[i__ + + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; + } +/* L110: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + rwork[i__] = 0.; + } else { + d__1 = -poles[i__ + (poles_dim1 << 1)]; + rwork[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[ + i__]) / (dsigj + poles[i__ + poles_dim1]) / + difr[i__ + (difr_dim1 << 1)]; + } +/* L120: */ + } + +/* + Since B and BX are complex, the following call to DGEMV + is performed in two steps (real and imaginary parts). + + CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, + $ BX( J, 1 ), LDBX ) +*/ + + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + i__4 = jrow + jcol * b_dim1; + rwork[i__] = b[i__4].r; +/* L130: */ + } +/* L140: */ + } + dgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1], &c__1); + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + rwork[i__] = d_imag(&b[jrow + jcol * b_dim1]); +/* L150: */ + } +/* L160: */ + } + dgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1 + *nrhs], + &c__1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = j + jcol * bx_dim1; + i__4 = jcol + *k; + i__5 = jcol + *k + *nrhs; + z__1.r = rwork[i__4], z__1.i = rwork[i__5]; + bx[i__3].r = z__1.r, bx[i__3].i = z__1.i; +/* L170: */ + } +/* L180: */ + } + } + +/* + Step (2R): if SQRE = 1, apply back the rotation that is + related to the right null space of the subproblem. +*/ + + if (*sqre == 1) { + zcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); + zdrot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, + s); + } + if (*k < max(m,n)) { + i__1 = n - *k; + zlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + + bx_dim1], ldbx); + } + +/* Step (3R): permute rows of B. */ + + zcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); + if (*sqre == 1) { + zcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); + } + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + zcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], + ldb); +/* L190: */ + } + +/* Step (4R): apply back the Givens rotations performed. */ + + for (i__ = *givptr; i__ >= 1; --i__) { + d__1 = -givnum[i__ + givnum_dim1]; + zdrot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &d__1); +/* L200: */ + } + } + + return 0; + +/* End of ZLALS0 */ + +} /* zlals0_ */ + +/* Subroutine */ int zlalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, doublecomplex *b, integer *ldb, doublecomplex *bx, + integer *ldbx, doublereal *u, integer *ldu, doublereal *vt, integer * + k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * + poles, integer *givptr, integer *givcol, integer *ldgcol, integer * + perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * + rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, + difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, + z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1, + i__2, i__3, i__4, i__5, i__6; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl, + ndb1, nlp1, lvl2, nrp1, jcol, nlvl, sqre, jrow, jimag; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer jreal, inode, ndiml, ndimr; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlals0_(integer *, integer *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *), dlasdt_(integer *, integer *, integer * + , integer *, integer *, integer *, integer *), xerbla_(char *, + integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLALSA is an itermediate step in solving the least squares problem + by computing the SVD of the coefficient matrix in compact form (The + singular vectors are computed as products of simple orthorgonal + matrices.). + + If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector + matrix of an upper bidiagonal matrix to the right hand side; and if + ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the + right hand side. The singular vector matrices were generated in + compact form by ZLALSA. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether the left or the right singular vector + matrix is involved. + = 0: Left singular vector matrix + = 1: Right singular vector matrix + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The row and column dimensions of the upper bidiagonal matrix. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input/output) COMPLEX*16 array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. + On output, B contains the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,MAX( M, N ) ). + + BX (output) COMPLEX*16 array, dimension ( LDBX, NRHS ) + On exit, the result of applying the left or right singular + vector matrix to B. + + LDBX (input) INTEGER + The leading dimension of BX. + + U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). + On entry, U contains the left singular vector matrices of all + subproblems at the bottom level. + + LDU (input) INTEGER, LDU = > N. + The leading dimension of arrays U, VT, DIFL, DIFR, + POLES, GIVNUM, and Z. + + VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). + On entry, VT' contains the right singular vector matrices of + all subproblems at the bottom level. + + K (input) INTEGER array, dimension ( N ). + + DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). + where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. + + DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record + distances between singular values on the I-th level and + singular values on the (I -1)-th level, and DIFR(*, 2 * I) + record the normalizing factors of the right singular vectors + matrices of subproblems on I-th level. + + Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). + On entry, Z(1, I) contains the components of the deflation- + adjusted updating row vector for subproblems on the I-th + level. + + POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old + singular values involved in the secular equations on the I-th + level. + + GIVPTR (input) INTEGER array, dimension ( N ). + On entry, GIVPTR( I ) records the number of Givens + rotations performed on the I-th problem on the computation + tree. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). + On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the + locations of Givens rotations performed on the I-th level on + the computation tree. + + LDGCOL (input) INTEGER, LDGCOL = > N. + The leading dimension of arrays GIVCOL and PERM. + + PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). + On entry, PERM(*, I) records permutations done on the I-th + level of the computation tree. + + GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- + values of Givens rotations performed on the I-th level on the + computation tree. + + C (input) DOUBLE PRECISION array, dimension ( N ). + On entry, if the I-th subproblem is not square, + C( I ) contains the C-value of a Givens rotation related to + the right null space of the I-th subproblem. + + S (input) DOUBLE PRECISION array, dimension ( N ). + On entry, if the I-th subproblem is not square, + S( I ) contains the S-value of a Givens rotation related to + the right null space of the I-th subproblem. + + RWORK (workspace) DOUBLE PRECISION array, dimension at least + MAX( (SMLSZ+1)*NRHS*3, N*(1+NRHS) + 2*NRHS ). + + IWORK (workspace) INTEGER array. + The dimension must be at least 3 * N + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < *smlsiz) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < *n) { + *info = -6; + } else if (*ldbx < *n) { + *info = -8; + } else if (*ldu < *n) { + *info = -10; + } else if (*ldgcol < *n) { + *info = -19; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLALSA", &i__1); + return 0; + } + +/* Book-keeping and setting up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + The following code applies back the left singular vector factors. + For applying back the right singular vector factors, go to 170. +*/ + + if (*icompq == 1) { + goto L170; + } + +/* + The nodes on the bottom level of the tree were solved + by DLASDQ. The corresponding left and right singular vector + matrices are in explicit form. First apply back the left + singular vector matrices. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + +/* + Since B and BX are complex, the following call to DGEMM + is performed in two steps (real and imaginary parts). + + CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +*/ + + j = nl * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nl - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L10: */ + } +/* L20: */ + } + dgemm_("T", "N", &nl, nrhs, &nl, &c_b1034, &u[nlf + u_dim1], ldu, & + rwork[(nl * *nrhs << 1) + 1], &nl, &c_b328, &rwork[1], &nl); + j = nl * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nl - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L30: */ + } +/* L40: */ + } + dgemm_("T", "N", &nl, nrhs, &nl, &c_b1034, &u[nlf + u_dim1], ldu, & + rwork[(nl * *nrhs << 1) + 1], &nl, &c_b328, &rwork[nl * *nrhs + + 1], &nl); + jreal = 0; + jimag = nl * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nl - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; +/* L50: */ + } +/* L60: */ + } + +/* + Since B and BX are complex, the following call to DGEMM + is performed in two steps (real and imaginary parts). + + CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +*/ + + j = nr * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nr - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L70: */ + } +/* L80: */ + } + dgemm_("T", "N", &nr, nrhs, &nr, &c_b1034, &u[nrf + u_dim1], ldu, & + rwork[(nr * *nrhs << 1) + 1], &nr, &c_b328, &rwork[1], &nr); + j = nr * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nr - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L90: */ + } +/* L100: */ + } + dgemm_("T", "N", &nr, nrhs, &nr, &c_b1034, &u[nrf + u_dim1], ldu, & + rwork[(nr * *nrhs << 1) + 1], &nr, &c_b328, &rwork[nr * *nrhs + + 1], &nr); + jreal = 0; + jimag = nr * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nr - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; +/* L110: */ + } +/* L120: */ + } + +/* L130: */ + } + +/* + Next copy the rows of B that correspond to unchanged rows + in the bidiagonal matrix to BX. +*/ + + i__1 = nd; + for (i__ = 1; i__ <= i__1; ++i__) { + ic = iwork[inode + i__ - 1]; + zcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx); +/* L140: */ + } + +/* + Finally go through the left singular vector matrices of all + the other subproblems bottom-up on the tree. +*/ + + j = pow_ii(&c__2, &nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* + find the first node LF and last node LL on + the current level LVL +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + --j; + zlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, & + b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &rwork[1], info); +/* L150: */ + } +/* L160: */ + } + goto L330; + +/* ICOMPQ = 1: applying back the right singular vector factors. */ + +L170: + +/* + First now go through the right singular vector matrices of all + the tree nodes top-down. +*/ + + j = 0; + i__1 = nlvl; + for (lvl = 1; lvl <= i__1; ++lvl) { + lvl2 = (lvl << 1) - 1; + +/* + Find the first node LF and last node LL on + the current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__2 = lvl - 1; + lf = pow_ii(&c__2, &i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; + for (i__ = ll; i__ >= i__2; --i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqre = 0; + } else { + sqre = 1; + } + ++j; + zlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[ + nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &rwork[1], info); +/* L180: */ + } +/* L190: */ + } + +/* + The nodes on the bottom level of the tree were solved + by DLASDQ. The corresponding right singular vector + matrices are in explicit form. Apply them back. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlp1 = nl + 1; + if (i__ == nd) { + nrp1 = nr; + } else { + nrp1 = nr + 1; + } + nlf = ic - nl; + nrf = ic + 1; + +/* + Since B and BX are complex, the following call to DGEMM is + performed in two steps (real and imaginary parts). + + CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +*/ + + j = nlp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nlp1 - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L200: */ + } +/* L210: */ + } + dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1034, &vt[nlf + vt_dim1], + ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b328, &rwork[ + 1], &nlp1); + j = nlp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nlp1 - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L220: */ + } +/* L230: */ + } + dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1034, &vt[nlf + vt_dim1], + ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b328, &rwork[ + nlp1 * *nrhs + 1], &nlp1); + jreal = 0; + jimag = nlp1 * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nlp1 - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; +/* L240: */ + } +/* L250: */ + } + +/* + Since B and BX are complex, the following call to DGEMM is + performed in two steps (real and imaginary parts). + + CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +*/ + + j = nrp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nrp1 - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L260: */ + } +/* L270: */ + } + dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1034, &vt[nrf + vt_dim1], + ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b328, &rwork[ + 1], &nrp1); + j = nrp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nrp1 - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L280: */ + } +/* L290: */ + } + dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1034, &vt[nrf + vt_dim1], + ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b328, &rwork[ + nrp1 * *nrhs + 1], &nrp1); + jreal = 0; + jimag = nrp1 * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nrp1 - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; +/* L300: */ + } +/* L310: */ + } + +/* L320: */ + } + +L330: + + return 0; + +/* End of ZLALSA */ + +} /* zlalsa_ */ + +/* Subroutine */ int zlalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, doublereal *d__, doublereal *e, doublecomplex *b, integer *ldb, + doublereal *rcond, integer *rank, doublecomplex *work, doublereal * + rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + static integer c__, i__, j, k; + static doublereal r__; + static integer s, u, z__; + static doublereal cs; + static integer bx; + static doublereal sn; + static integer st, vt, nm1, st1; + static doublereal eps; + static integer iwk; + static doublereal tol; + static integer difl, difr; + static doublereal rcnd; + static integer jcol, irwb, perm, nsub, nlvl, sqre, bxst, jrow, irwu, + jimag; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer jreal, irwib, poles, sizei, irwrb, nsize; + extern /* Subroutine */ int zdrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *), zcopy_( + integer *, doublecomplex *, integer *, doublecomplex *, integer *) + ; + static integer irwvt, icmpq1, icmpq2; + + extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), dlaset_(char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), xerbla_(char *, integer *); + static integer givcol; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int zlalsa_(integer *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *), zlascl_(char *, integer *, + integer *, doublereal *, doublereal *, integer *, integer *, + doublecomplex *, integer *, integer *), dlasrt_(char *, + integer *, doublereal *, integer *), zlacpy_(char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *), zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *); + static doublereal orgnrm; + static integer givnum, givptr, nrwork, irwwrk, smlszp; + + +/* + -- LAPACK routine (version 3.2.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + June 2010 + + + Purpose + ======= + + ZLALSD uses the singular value decomposition of A to solve the least + squares problem of finding X to minimize the Euclidean norm of each + column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + are N-by-NRHS. The solution X overwrites B. + + The singular values of A smaller than RCOND times the largest + singular value are treated as zero in solving the least squares + problem; in this case a minimum norm solution is returned. + The actual singular values are returned in D in ascending order. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': D and E define an upper bidiagonal matrix. + = 'L': D and E define a lower bidiagonal matrix. + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The dimension of the bidiagonal matrix. N >= 0. + + NRHS (input) INTEGER + The number of columns of B. NRHS must be at least 1. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry D contains the main diagonal of the bidiagonal + matrix. On exit, if INFO = 0, D contains its singular values. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + Contains the super-diagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) + On input, B contains the right hand sides of the least + squares problem. On output, B contains the solution X. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,N). + + RCOND (input) DOUBLE PRECISION + The singular values of A less than or equal to RCOND times + the largest singular value are treated as zero in solving + the least squares problem. If RCOND is negative, + machine precision is used instead. + For example, if diag(S)*X=B were the least squares problem, + where diag(S) is a diagonal matrix of singular values, the + solution would be X(i) = B(i) / S(i) if S(i) is greater than + RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to + RCOND*max(S). + + RANK (output) INTEGER + The number of singular values of A greater than RCOND times + the largest singular value. + + WORK (workspace) COMPLEX*16 array, dimension at least + (N * NRHS). + + RWORK (workspace) DOUBLE PRECISION array, dimension at least + (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + + MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ), + where + NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) + + IWORK (workspace) INTEGER array, dimension at least + (3*N*NLVL + 11*N). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute a singular value while + working on the submatrix lying in rows and columns + INFO/(N+1) through MOD(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < 1 || *ldb < *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLALSD", &i__1); + return 0; + } + + eps = EPSILON; + +/* Set up the tolerance. */ + + if (*rcond <= 0. || *rcond >= 1.) { + rcnd = eps; + } else { + rcnd = *rcond; + } + + *rank = 0; + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } else if (*n == 1) { + if (d__[1] == 0.) { + zlaset_("A", &c__1, nrhs, &c_b56, &c_b56, &b[b_offset], ldb); + } else { + *rank = 1; + zlascl_("G", &c__0, &c__0, &d__[1], &c_b1034, &c__1, nrhs, &b[ + b_offset], ldb, info); + d__[1] = abs(d__[1]); + } + return 0; + } + +/* Rotate the matrix if it is lower bidiagonal. */ + + if (*(unsigned char *)uplo == 'L') { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (*nrhs == 1) { + zdrot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], & + c__1, &cs, &sn); + } else { + rwork[(i__ << 1) - 1] = cs; + rwork[i__ * 2] = sn; + } +/* L10: */ + } + if (*nrhs > 1) { + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n - 1; + for (j = 1; j <= i__2; ++j) { + cs = rwork[(j << 1) - 1]; + sn = rwork[j * 2]; + zdrot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ + * b_dim1], &c__1, &cs, &sn); +/* L20: */ + } +/* L30: */ + } + } + } + +/* Scale. */ + + nm1 = *n - 1; + orgnrm = dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + zlaset_("A", n, nrhs, &c_b56, &c_b56, &b[b_offset], ldb); + return 0; + } + + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, n, &c__1, &d__[1], n, info); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, &nm1, &c__1, &e[1], &nm1, + info); + +/* + If N is smaller than the minimum divide size SMLSIZ, then solve + the problem with another solver. +*/ + + if (*n <= *smlsiz) { + irwu = 1; + irwvt = irwu + *n * *n; + irwwrk = irwvt + *n * *n; + irwrb = irwwrk; + irwib = irwrb + *n * *nrhs; + irwb = irwib + *n * *nrhs; + dlaset_("A", n, n, &c_b328, &c_b1034, &rwork[irwu], n); + dlaset_("A", n, n, &c_b328, &c_b1034, &rwork[irwvt], n); + dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &rwork[irwvt], n, + &rwork[irwu], n, &rwork[irwwrk], &c__1, &rwork[irwwrk], info); + if (*info != 0) { + return 0; + } + +/* + In the real version, B is passed to DLASDQ and multiplied + internally by Q'. Here B is complex and that product is + computed below in two steps (real and imaginary parts). +*/ + + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + i__3 = jrow + jcol * b_dim1; + rwork[j] = b[i__3].r; +/* L40: */ + } +/* L50: */ + } + dgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwu], n, &rwork[irwb], + n, &c_b328, &rwork[irwrb], n); + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L60: */ + } +/* L70: */ + } + dgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwu], n, &rwork[irwb], + n, &c_b328, &rwork[irwib], n); + jreal = irwrb - 1; + jimag = irwib - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++jreal; + ++jimag; + i__3 = jrow + jcol * b_dim1; + i__4 = jreal; + i__5 = jimag; + z__1.r = rwork[i__4], z__1.i = rwork[i__5]; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L80: */ + } +/* L90: */ + } + + tol = rcnd * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] <= tol) { + zlaset_("A", &c__1, nrhs, &c_b56, &c_b56, &b[i__ + b_dim1], + ldb); + } else { + zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1034, &c__1, nrhs, & + b[i__ + b_dim1], ldb, info); + ++(*rank); + } +/* L100: */ + } + +/* + Since B is complex, the following call to DGEMM is performed + in two steps (real and imaginary parts). That is for V * B + (in the real version of the code V' is stored in WORK). + + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, + $ WORK( NWORK ), N ) +*/ + + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + i__3 = jrow + jcol * b_dim1; + rwork[j] = b[i__3].r; +/* L110: */ + } +/* L120: */ + } + dgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwvt], n, &rwork[irwb], + n, &c_b328, &rwork[irwrb], n); + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L130: */ + } +/* L140: */ + } + dgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwvt], n, &rwork[irwb], + n, &c_b328, &rwork[irwib], n); + jreal = irwrb - 1; + jimag = irwib - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++jreal; + ++jimag; + i__3 = jrow + jcol * b_dim1; + i__4 = jreal; + i__5 = jimag; + z__1.r = rwork[i__4], z__1.i = rwork[i__5]; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L150: */ + } +/* L160: */ + } + +/* Unscale. */ + + dlascl_("G", &c__0, &c__0, &c_b1034, &orgnrm, n, &c__1, &d__[1], n, + info); + dlasrt_("D", n, &d__[1], info); + zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, n, nrhs, &b[b_offset], + ldb, info); + + return 0; + } + +/* Book-keeping and setting up some constants. */ + + nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) / + log(2.)) + 1; + + smlszp = *smlsiz + 1; + + u = 1; + vt = *smlsiz * *n + 1; + difl = vt + smlszp * *n; + difr = difl + nlvl * *n; + z__ = difr + (nlvl * *n << 1); + c__ = z__ + nlvl * *n; + s = c__ + *n; + poles = s + *n; + givnum = poles + (nlvl << 1) * *n; + nrwork = givnum + (nlvl << 1) * *n; + bx = 1; + + irwrb = nrwork; + irwib = irwrb + *smlsiz * *nrhs; + irwb = irwib + *smlsiz * *nrhs; + + sizei = *n + 1; + k = sizei + *n; + givptr = k + *n; + perm = givptr + *n; + givcol = perm + nlvl * *n; + iwk = givcol + (nlvl * *n << 1); + + st = 1; + sqre = 0; + icmpq1 = 1; + icmpq2 = 0; + nsub = 0; + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_sign(&eps, &d__[i__]); + } +/* L170: */ + } + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + ++nsub; + iwork[nsub] = st; + +/* + Subproblem found. First determine its size and then + apply divide and conquer on it. +*/ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else { + +/* + A subproblem with E(NM1) small. This implies an + 1-by-1 subproblem at D(N), which is not solved + explicitly. +*/ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + ++nsub; + iwork[nsub] = *n; + iwork[sizei + nsub - 1] = 1; + zcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n); + } + st1 = st - 1; + if (nsize == 1) { + +/* + This is a 1-by-1 subproblem and is not solved + explicitly. +*/ + + zcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); + } else if (nsize <= *smlsiz) { + +/* This is a small subproblem and is solved by DLASDQ. */ + + dlaset_("A", &nsize, &nsize, &c_b328, &c_b1034, &rwork[vt + + st1], n); + dlaset_("A", &nsize, &nsize, &c_b328, &c_b1034, &rwork[u + + st1], n); + dlasdq_("U", &c__0, &nsize, &nsize, &nsize, &c__0, &d__[st], & + e[st], &rwork[vt + st1], n, &rwork[u + st1], n, & + rwork[nrwork], &c__1, &rwork[nrwork], info) + ; + if (*info != 0) { + return 0; + } + +/* + In the real version, B is passed to DLASDQ and multiplied + internally by Q'. Here B is complex and that product is + computed below in two steps (real and imaginary parts). +*/ + + j = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L180: */ + } +/* L190: */ + } + dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[u + + st1], n, &rwork[irwb], &nsize, &c_b328, &rwork[irwrb], + &nsize); + j = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L200: */ + } +/* L210: */ + } + dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[u + + st1], n, &rwork[irwb], &nsize, &c_b328, &rwork[irwib], + &nsize); + jreal = irwrb - 1; + jimag = irwib - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * b_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L220: */ + } +/* L230: */ + } + + zlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx + + st1], n); + } else { + +/* A large problem. Solve it using divide and conquer. */ + + dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & + rwork[u + st1], n, &rwork[vt + st1], &iwork[k + st1], + &rwork[difl + st1], &rwork[difr + st1], &rwork[z__ + + st1], &rwork[poles + st1], &iwork[givptr + st1], & + iwork[givcol + st1], n, &iwork[perm + st1], &rwork[ + givnum + st1], &rwork[c__ + st1], &rwork[s + st1], & + rwork[nrwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + bxst = bx + st1; + zlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, & + work[bxst], n, &rwork[u + st1], n, &rwork[vt + st1], & + iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1] + , &rwork[z__ + st1], &rwork[poles + st1], &iwork[ + givptr + st1], &iwork[givcol + st1], n, &iwork[perm + + st1], &rwork[givnum + st1], &rwork[c__ + st1], &rwork[ + s + st1], &rwork[nrwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + } + st = i__ + 1; + } +/* L240: */ + } + +/* Apply the singular values and treat the tiny ones as zero. */ + + tol = rcnd * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Some of the elements in D can be negative because 1-by-1 + subproblems were not solved explicitly. +*/ + + if ((d__1 = d__[i__], abs(d__1)) <= tol) { + zlaset_("A", &c__1, nrhs, &c_b56, &c_b56, &work[bx + i__ - 1], n); + } else { + ++(*rank); + zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1034, &c__1, nrhs, & + work[bx + i__ - 1], n, info); + } + d__[i__] = (d__1 = d__[i__], abs(d__1)); +/* L250: */ + } + +/* Now apply back the right singular vectors. */ + + icmpq2 = 1; + i__1 = nsub; + for (i__ = 1; i__ <= i__1; ++i__) { + st = iwork[i__]; + st1 = st - 1; + nsize = iwork[sizei + i__ - 1]; + bxst = bx + st1; + if (nsize == 1) { + zcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb); + } else if (nsize <= *smlsiz) { + +/* + Since B and BX are complex, the following call to DGEMM + is performed in two steps (real and imaginary parts). + + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO, + $ B( ST, 1 ), LDB ) +*/ + + j = bxst - *n - 1; + jreal = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + j += *n; + i__3 = nsize; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++jreal; + i__4 = j + jrow; + rwork[jreal] = work[i__4].r; +/* L260: */ + } +/* L270: */ + } + dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[vt + st1], + n, &rwork[irwb], &nsize, &c_b328, &rwork[irwrb], &nsize); + j = bxst - *n - 1; + jimag = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + j += *n; + i__3 = nsize; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++jimag; + rwork[jimag] = d_imag(&work[j + jrow]); +/* L280: */ + } +/* L290: */ + } + dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[vt + st1], + n, &rwork[irwb], &nsize, &c_b328, &rwork[irwib], &nsize); + jreal = irwrb - 1; + jimag = irwib - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * b_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L300: */ + } +/* L310: */ + } + } else { + zlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st + + b_dim1], ldb, &rwork[u + st1], n, &rwork[vt + st1], & + iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1], & + rwork[z__ + st1], &rwork[poles + st1], &iwork[givptr + + st1], &iwork[givcol + st1], n, &iwork[perm + st1], &rwork[ + givnum + st1], &rwork[c__ + st1], &rwork[s + st1], &rwork[ + nrwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + } +/* L320: */ + } + +/* Unscale and sort the singular values. */ + + dlascl_("G", &c__0, &c__0, &c_b1034, &orgnrm, n, &c__1, &d__[1], n, info); + dlasrt_("D", n, &d__[1], info); + zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, n, nrhs, &b[b_offset], ldb, + info); + + return 0; + +/* End of ZLALSD */ + +} /* zlalsd_ */ + +doublereal zlange_(char *norm, integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1, d__2; + + /* Local variables */ + static integer i__, j; + static doublereal sum, scale; + extern logical lsame_(char *, char *); + static doublereal value; + extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLANGE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + complex matrix A. + + Description + =========== + + ZLANGE returns the value + + ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in ZLANGE as described + above. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. When M = 0, + ZLANGE is set to zero. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. When N = 0, + ZLANGE is set to zero. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(M,1). + + WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), + where LWORK >= M when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]); + value = max(d__1,d__2); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += z_abs(&a[i__ + j * a_dim1]); +/* L30: */ + } + value = max(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += z_abs(&a[i__ + j * a_dim1]); +/* L60: */ + } +/* L70: */ + } + value = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + zlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of ZLANGE */ + +} /* zlange_ */ + +doublereal zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Local variables */ + static integer i__, j; + static doublereal sum, absa, scale; + extern logical lsame_(char *, char *); + static doublereal value; + extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLANHE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + complex hermitian matrix A. + + Description + =========== + + ZLANHE returns the value + + ZLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in ZLANHE as described + above. + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + hermitian matrix A is to be referenced. + = 'U': Upper triangular part of A is referenced + = 'L': Lower triangular part of A is referenced + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, ZLANHE is + set to zero. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The hermitian matrix A. If UPLO = 'U', the leading n by n + upper triangular part of A contains the upper triangular part + of the matrix A, and the strictly lower triangular part of A + is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of A contains the lower triangular part of + the matrix A, and the strictly upper triangular part of A is + not referenced. Note that the imaginary parts of the diagonal + elements need not be set and are assumed to be zero. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), + where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, + WORK is not referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]); + value = max(d__1,d__2); +/* L10: */ + } +/* Computing MAX */ + i__2 = j + j * a_dim1; + d__2 = value, d__3 = (d__1 = a[i__2].r, abs(d__1)); + value = max(d__2,d__3); +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j + j * a_dim1; + d__2 = value, d__3 = (d__1 = a[i__2].r, abs(d__1)); + value = max(d__2,d__3); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]); + value = max(d__1,d__2); +/* L30: */ + } +/* L40: */ + } + } + } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is hermitian). */ + + value = 0.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = z_abs(&a[i__ + j * a_dim1]); + sum += absa; + work[i__] += absa; +/* L50: */ + } + i__2 = j + j * a_dim1; + work[j] = sum + (d__1 = a[i__2].r, abs(d__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * a_dim1; + sum = work[j] + (d__1 = a[i__2].r, abs(d__1)); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = z_abs(&a[i__ + j * a_dim1]); + sum += absa; + work[i__] += absa; +/* L90: */ + } + value = max(value,sum); +/* L100: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + zlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + zlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); +/* L120: */ + } + } + sum *= 2; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + if (a[i__2].r != 0.) { + i__2 = i__ + i__ * a_dim1; + absa = (d__1 = a[i__2].r, abs(d__1)); + if (scale < absa) { +/* Computing 2nd power */ + d__1 = scale / absa; + sum = sum * (d__1 * d__1) + 1.; + scale = absa; + } else { +/* Computing 2nd power */ + d__1 = absa / scale; + sum += d__1 * d__1; + } + } +/* L130: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of ZLANHE */ + +} /* zlanhe_ */ + +/* Subroutine */ int zlaqr0_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8; + doublecomplex z__1, z__2, z__3, z__4, z__5; + + /* Local variables */ + static integer i__, k; + static doublereal s; + static doublecomplex aa, bb, cc, dd; + static integer ld, nh, it, ks, kt, ku, kv, ls, ns, nw; + static doublecomplex tr2, det; + static integer inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, + nmin; + static doublecomplex swap; + static integer ktop; + static doublecomplex zdum[1] /* was [1][1] */; + static integer kacc22, itmax, nsmax, nwmax, kwtop; + extern /* Subroutine */ int zlaqr3_(logical *, logical *, integer *, + integer *, integer *, integer *, doublecomplex *, integer *, + integer *, integer *, doublecomplex *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer * + , doublecomplex *, integer *), zlaqr4_(logical *, logical *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *), zlaqr5_(logical *, + logical *, integer *, integer *, integer *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + integer *, doublecomplex *, integer *); + static integer nibble; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static char jbcmpz[2]; + static doublecomplex rtdisc; + static integer nwupbd; + static logical sorted; + extern /* Subroutine */ int zlahqr_(logical *, logical *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, integer *, integer *), + zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Purpose + ======= + + ZLAQR0 computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**H, where T is an upper triangular matrix (the + Schur form), and Z is the unitary matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input unitary + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, + H(ILO,ILO-1) is zero. ILO and IHI are normally set by a + previous call to ZGEBAL, and then passed to ZGEHRD when the + matrix output by ZGEBAL is reduced to Hessenberg form. + Otherwise, ILO and IHI should be set to 1 and N, + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) COMPLEX*16 array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and WANTT is .TRUE., then H + contains the upper triangular matrix T from the Schur + decomposition (the Schur form). If INFO = 0 and WANT is + .FALSE., then the contents of H are unspecified on exit. + (The output value of H when INFO.GT.0 is given under the + description of INFO below.) + + This subroutine may explicitly set H(i,j) = 0 for i.GT.j and + j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + W (output) COMPLEX*16 array, dimension (N) + The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored + in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are + stored in the same order as on the diagonal of the Schur + form returned in H, with W(i) = H(i,i). + + Z (input/output) COMPLEX*16 array, dimension (LDZ,IHI) + If WANTZ is .FALSE., then Z is not referenced. + If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is + replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the + orthogonal Schur factor of H(ILO:IHI,ILO:IHI). + (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if WANTZ is .TRUE. + then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. + + WORK (workspace/output) COMPLEX*16 array, dimension LWORK + On exit, if LWORK = -1, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient, but LWORK typically as large as 6*N may + be required for optimal performance. A workspace query + to determine the optimal workspace size is recommended. + + If LWORK = -1, then ZLAQR0 does a workspace query. + In this case, ZLAQR0 checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: if INFO = i, ZLAQR0 failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and WANT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is a unitary matrix. The final + value of H is upper Hessenberg and triangular in + rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + + (final value of Z(ILO:IHI,ILOZ:IHIZ) + = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U + + where U is the unitary matrix in (*) (regard- + less of the value of WANTT.) + + If INFO .GT. 0 and WANTZ is .FALSE., then Z is not + accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . ZLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== Exceptional deflation windows: try to cure rare + . slow convergence by varying the size of the + . deflation window after KEXNW iterations. ==== + + ==== Exceptional shifts: try to cure rare slow convergence + . with ad-hoc exceptional shifts every KEXSH iterations. + . ==== + + ==== The constant WILK1 is used to form the exceptional + . shifts. ==== +*/ + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + +/* ==== Quick return for N = 0: nothing to do. ==== */ + + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + if (*n <= 11) { + +/* ==== Tiny matrices must use ZLAHQR. ==== */ + + lwkopt = 1; + if (*lwork != -1) { + zlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], + iloz, ihiz, &z__[z_offset], ldz, info); + } + } else { + +/* + ==== Use small bulge multi-shift QR with aggressive early + . deflation on larger-than-tiny matrices. ==== + + ==== Hope for the best. ==== +*/ + + *info = 0; + +/* ==== Set up job flags for ILAENV. ==== */ + + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + +/* + ==== NWR = recommended deflation window size. At this + . point, N .GT. NTINY = 11, so there is enough + . subdiagonal workspace for NWR.GE.2 as required. + . (In fact, there is enough subdiagonal space for + . NWR.GE.3.) ==== +*/ + + nwr = ilaenv_(&c__13, "ZLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nwr = max(2,nwr); +/* Computing MIN */ + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); + nwr = min(i__1,nwr); + +/* + ==== NSR = recommended number of simultaneous shifts. + . At this point N .GT. NTINY = 11, so there is at + . enough subdiagonal workspace for NSR to be even + . and greater than or equal to two as required. ==== +*/ + + nsr = ilaenv_(&c__15, "ZLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); +/* Computing MIN */ + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - + *ilo; + nsr = min(i__1,i__2); +/* Computing MAX */ + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1,i__2); + +/* + ==== Estimate optimal workspace ==== + + ==== Workspace query call to ZLAQR3 ==== +*/ + + i__1 = nwr + 1; + zlaqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, + ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[h_offset], + ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], ldh, &work[1], + &c_n1); + +/* + ==== Optimal workspace = MAX(ZLAQR5, ZLAQR3) ==== + + Computing MAX +*/ + i__1 = nsr * 3 / 2, i__2 = (integer) work[1].r; + lwkopt = max(i__1,i__2); + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + d__1 = (doublereal) lwkopt; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + } + +/* ==== ZLAHQR/ZLAQR0 crossover point ==== */ + + nmin = ilaenv_(&c__12, "ZLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen) + 6, (ftnlen)2); + nmin = max(11,nmin); + +/* ==== Nibble crossover point ==== */ + + nibble = ilaenv_(&c__14, "ZLAQR0", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + nibble = max(0,nibble); + +/* + ==== Accumulate reflections during ttswp? Use block + . 2-by-2 structure during matrix-matrix multiply? ==== +*/ + + kacc22 = ilaenv_(&c__16, "ZLAQR0", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + kacc22 = max(0,kacc22); + kacc22 = min(2,kacc22); + +/* + ==== NWMAX = the largest possible deflation window for + . which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1,i__2); + nw = nwmax; + +/* + ==== NSMAX = the Largest number of simultaneous shifts + . for which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1,i__2); + nsmax -= nsmax % 2; + +/* ==== NDFL: an iteration count restarted at deflation. ==== */ + + ndfl = 1; + +/* + ==== ITMAX = iteration limit ==== + + Computing MAX +*/ + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1,i__2) * 30; + +/* ==== Last row and column in the active block ==== */ + + kbot = *ihi; + +/* ==== Main Loop ==== */ + + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + +/* ==== Done when KBOT falls below ILO ==== */ + + if (kbot < *ilo) { + goto L80; + } + +/* ==== Locate active block ==== */ + + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + i__3 = k + (k - 1) * h_dim1; + if (h__[i__3].r == 0. && h__[i__3].i == 0.) { + goto L20; + } +/* L10: */ + } + k = *ilo; +L20: + ktop = k; + +/* + ==== Select deflation window size: + . Typical Case: + . If possible and advisable, nibble the entire + . active block. If not, use size MIN(NWR,NWMAX) + . or MIN(NWR+1,NWMAX) depending upon which has + . the smaller corresponding subdiagonal entry + . (a heuristic). + . + . Exceptional Case: + . If there have been no deflations in KEXNW or + . more iterations, then vary the deflation window + . size. At first, because, larger windows are, + . in general, more powerful than smaller ones, + . rapidly increase the window to the maximum possible. + . Then, gradually reduce the window size. ==== +*/ + + nh = kbot - ktop + 1; + nwupbd = min(nh,nwmax); + if (ndfl < 5) { + nw = min(nwupbd,nwr); + } else { +/* Computing MIN */ + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2,i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + i__2 = kwtop + (kwtop - 1) * h_dim1; + i__3 = kwtop - 1 + (kwtop - 2) * h_dim1; + if ((d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[ + kwtop + (kwtop - 1) * h_dim1]), abs(d__2)) > ( + d__3 = h__[i__3].r, abs(d__3)) + (d__4 = d_imag(& + h__[kwtop - 1 + (kwtop - 2) * h_dim1]), abs(d__4)) + ) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + +/* + ==== Aggressive early deflation: + . split workspace under the subdiagonal into + . - an nw-by-nw work array V in the lower + . left-hand-corner, + . - an NW-by-at-least-NW-but-more-is-better + . (NW-by-NHO) horizontal work array along + . the bottom edge, + . - an at-least-NW-but-more-is-better (NHV-by-NW) + . vertical work array along the left-hand-edge. + . ==== +*/ + + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + +/* ==== Aggressive early deflation ==== */ + + zlaqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, + iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[kv + + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], ldh, &nve, & + h__[kwv + h_dim1], ldh, &work[1], lwork); + +/* ==== Adjust KBOT accounting for new deflations. ==== */ + + kbot -= ld; + +/* ==== KS points to the shifts. ==== */ + + ks = kbot - ls + 1; + +/* + ==== Skip an expensive QR sweep if there is a (partly + . heuristic) reason to expect that many eigenvalues + . will deflate without it. Here, the QR sweep is + . skipped if many eigenvalues have just been deflated + . or if the remaining active block is small. +*/ + + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( + nmin,nwmax)) { + +/* + ==== NS = nominal number of simultaneous shifts. + . This may be lowered (slightly) if ZLAQR3 + . did not provide that many shifts. ==== + + Computing MIN + Computing MAX +*/ + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); + ns = min(i__2,i__3); + ns -= ns % 2; + +/* + ==== If there have been no deflations + . in a multiple of KEXSH iterations, + . then try exceptional shifts. + . Otherwise use shifts provided by + . ZLAQR3 above or from the eigenvalues + . of a trailing principal submatrix. ==== +*/ + + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; + i__2 = ks + 1; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + i__3 = i__; + i__4 = i__ + i__ * h_dim1; + i__5 = i__ + (i__ - 1) * h_dim1; + d__3 = ((d__1 = h__[i__5].r, abs(d__1)) + (d__2 = + d_imag(&h__[i__ + (i__ - 1) * h_dim1]), abs( + d__2))) * .75; + z__1.r = h__[i__4].r + d__3, z__1.i = h__[i__4].i; + w[i__3].r = z__1.r, w[i__3].i = z__1.i; + i__3 = i__ - 1; + i__4 = i__; + w[i__3].r = w[i__4].r, w[i__3].i = w[i__4].i; +/* L30: */ + } + } else { + +/* + ==== Got NS/2 or fewer shifts? Use ZLAQR4 or + . ZLAHQR on a trailing principal submatrix to + . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, + . there is enough space below the subdiagonal + . to fit an NS-by-NS scratch array.) ==== +*/ + + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + zlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & + h__[kt + h_dim1], ldh); + if (ns > nmin) { + zlaqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ + kt + h_dim1], ldh, &w[ks], &c__1, &c__1, + zdum, &c__1, &work[1], lwork, &inf); + } else { + zlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ + kt + h_dim1], ldh, &w[ks], &c__1, &c__1, + zdum, &c__1, &inf); + } + ks += inf; + +/* + ==== In case of a rare QR failure use + . eigenvalues of the trailing 2-by-2 + . principal submatrix. Scale to avoid + . overflows, underflows and subnormals. + . (The scale factor S can not be zero, + . because H(KBOT,KBOT-1) is nonzero.) ==== +*/ + + if (ks >= kbot) { + i__2 = kbot - 1 + (kbot - 1) * h_dim1; + i__3 = kbot + (kbot - 1) * h_dim1; + i__4 = kbot - 1 + kbot * h_dim1; + i__5 = kbot + kbot * h_dim1; + s = (d__1 = h__[i__2].r, abs(d__1)) + (d__2 = + d_imag(&h__[kbot - 1 + (kbot - 1) * + h_dim1]), abs(d__2)) + ((d__3 = h__[i__3] + .r, abs(d__3)) + (d__4 = d_imag(&h__[kbot + + (kbot - 1) * h_dim1]), abs(d__4))) + (( + d__5 = h__[i__4].r, abs(d__5)) + (d__6 = + d_imag(&h__[kbot - 1 + kbot * h_dim1]), + abs(d__6))) + ((d__7 = h__[i__5].r, abs( + d__7)) + (d__8 = d_imag(&h__[kbot + kbot * + h_dim1]), abs(d__8))); + i__2 = kbot - 1 + (kbot - 1) * h_dim1; + z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i / + s; + aa.r = z__1.r, aa.i = z__1.i; + i__2 = kbot + (kbot - 1) * h_dim1; + z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i / + s; + cc.r = z__1.r, cc.i = z__1.i; + i__2 = kbot - 1 + kbot * h_dim1; + z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i / + s; + bb.r = z__1.r, bb.i = z__1.i; + i__2 = kbot + kbot * h_dim1; + z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i / + s; + dd.r = z__1.r, dd.i = z__1.i; + z__2.r = aa.r + dd.r, z__2.i = aa.i + dd.i; + z__1.r = z__2.r / 2., z__1.i = z__2.i / 2.; + tr2.r = z__1.r, tr2.i = z__1.i; + z__3.r = aa.r - tr2.r, z__3.i = aa.i - tr2.i; + z__4.r = dd.r - tr2.r, z__4.i = dd.i - tr2.i; + z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, + z__2.i = z__3.r * z__4.i + z__3.i * + z__4.r; + z__5.r = bb.r * cc.r - bb.i * cc.i, z__5.i = bb.r + * cc.i + bb.i * cc.r; + z__1.r = z__2.r - z__5.r, z__1.i = z__2.i - + z__5.i; + det.r = z__1.r, det.i = z__1.i; + z__2.r = -det.r, z__2.i = -det.i; + z_sqrt(&z__1, &z__2); + rtdisc.r = z__1.r, rtdisc.i = z__1.i; + i__2 = kbot - 1; + z__2.r = tr2.r + rtdisc.r, z__2.i = tr2.i + + rtdisc.i; + z__1.r = s * z__2.r, z__1.i = s * z__2.i; + w[i__2].r = z__1.r, w[i__2].i = z__1.i; + i__2 = kbot; + z__2.r = tr2.r - rtdisc.r, z__2.i = tr2.i - + rtdisc.i; + z__1.r = s * z__2.r, z__1.i = s * z__2.i; + w[i__2].r = z__1.r, w[i__2].i = z__1.i; + + ks = kbot - 1; + } + } + + if (kbot - ks + 1 > ns) { + +/* ==== Sort the shifts (Helps a little) ==== */ + + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + i__4 = i__; + i__5 = i__ + 1; + if ((d__1 = w[i__4].r, abs(d__1)) + (d__2 = + d_imag(&w[i__]), abs(d__2)) < (d__3 = + w[i__5].r, abs(d__3)) + (d__4 = + d_imag(&w[i__ + 1]), abs(d__4))) { + sorted = FALSE_; + i__4 = i__; + swap.r = w[i__4].r, swap.i = w[i__4].i; + i__4 = i__; + i__5 = i__ + 1; + w[i__4].r = w[i__5].r, w[i__4].i = w[i__5] + .i; + i__4 = i__ + 1; + w[i__4].r = swap.r, w[i__4].i = swap.i; + } +/* L40: */ + } +/* L50: */ + } +L60: + ; + } + } + +/* + ==== If there are only two shifts, then use + . only one. ==== +*/ + + if (kbot - ks + 1 == 2) { + i__2 = kbot; + i__3 = kbot + kbot * h_dim1; + z__2.r = w[i__2].r - h__[i__3].r, z__2.i = w[i__2].i - + h__[i__3].i; + z__1.r = z__2.r, z__1.i = z__2.i; + i__4 = kbot - 1; + i__5 = kbot + kbot * h_dim1; + z__4.r = w[i__4].r - h__[i__5].r, z__4.i = w[i__4].i - + h__[i__5].i; + z__3.r = z__4.r, z__3.i = z__4.i; + if ((d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1), + abs(d__2)) < (d__3 = z__3.r, abs(d__3)) + (d__4 = + d_imag(&z__3), abs(d__4))) { + i__2 = kbot - 1; + i__3 = kbot; + w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i; + } else { + i__2 = kbot; + i__3 = kbot - 1; + w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i; + } + } + +/* + ==== Use up to NS of the the smallest magnatiude + . shifts. If there aren't NS shifts available, + . then use them all, possibly dropping one to + . make the number of shifts even. ==== + + Computing MIN +*/ + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2,i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + +/* + ==== Small-bulge multi-shift QR sweep: + . split workspace under the subdiagonal into + . - a KDU-by-KDU work array U in the lower + . left-hand-corner, + . - a KDU-by-at-least-KDU-but-more-is-better + . (KDU-by-NHo) horizontal work array WH along + . the bottom edge, + . - and an at-least-KDU-but-more-is-better-by-KDU + . (NVE-by-KDU) vertical work WV arrow along + . the left-hand-edge. ==== +*/ + + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + +/* ==== Small-bulge multi-shift QR sweep ==== */ + + zlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &w[ks], & + h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, & + work[1], &c__3, &h__[ku + h_dim1], ldh, &nve, &h__[ + kwv + h_dim1], ldh, &nho, &h__[ku + kwh * h_dim1], + ldh); + } + +/* ==== Note progress (or the lack of it). ==== */ + + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + +/* + ==== End of main loop ==== + L70: +*/ + } + +/* + ==== Iteration limit exceeded. Set INFO to show where + . the problem occurred and exit. ==== +*/ + + *info = kbot; +L80: + ; + } + +/* ==== Return the optimal value of LWORK. ==== */ + + d__1 = (doublereal) lwkopt; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + +/* ==== End of ZLAQR0 ==== */ + + return 0; +} /* zlaqr0_ */ + +/* Subroutine */ int zlaqr1_(integer *n, doublecomplex *h__, integer *ldh, + doublecomplex *s1, doublecomplex *s2, doublecomplex *v) +{ + /* System generated locals */ + integer h_dim1, h_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8; + + /* Local variables */ + static doublereal s; + static doublecomplex h21s, h31s; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + Given a 2-by-2 or 3-by-3 matrix H, ZLAQR1 sets v to a + scalar multiple of the first column of the product + + (*) K = (H - s1*I)*(H - s2*I) + + scaling to avoid overflows and most underflows. + + This is useful for starting double implicit shift bulges + in the QR algorithm. + + + N (input) integer + Order of the matrix H. N must be either 2 or 3. + + H (input) COMPLEX*16 array of dimension (LDH,N) + The 2-by-2 or 3-by-3 matrix H in (*). + + LDH (input) integer + The leading dimension of H as declared in + the calling procedure. LDH.GE.N + + S1 (input) COMPLEX*16 + S2 S1 and S2 are the shifts defining K in (*) above. + + V (output) COMPLEX*16 array of dimension N + A scalar multiple of the first column of the + matrix K in (*). + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --v; + + /* Function Body */ + if (*n == 2) { + i__1 = h_dim1 + 1; + z__2.r = h__[i__1].r - s2->r, z__2.i = h__[i__1].i - s2->i; + z__1.r = z__2.r, z__1.i = z__2.i; + i__2 = h_dim1 + 2; + s = (d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1), abs(d__2)) + ( + (d__3 = h__[i__2].r, abs(d__3)) + (d__4 = d_imag(&h__[h_dim1 + + 2]), abs(d__4))); + if (s == 0.) { + v[1].r = 0., v[1].i = 0.; + v[2].r = 0., v[2].i = 0.; + } else { + i__1 = h_dim1 + 2; + z__1.r = h__[i__1].r / s, z__1.i = h__[i__1].i / s; + h21s.r = z__1.r, h21s.i = z__1.i; + i__1 = (h_dim1 << 1) + 1; + z__2.r = h21s.r * h__[i__1].r - h21s.i * h__[i__1].i, z__2.i = + h21s.r * h__[i__1].i + h21s.i * h__[i__1].r; + i__2 = h_dim1 + 1; + z__4.r = h__[i__2].r - s1->r, z__4.i = h__[i__2].i - s1->i; + i__3 = h_dim1 + 1; + z__6.r = h__[i__3].r - s2->r, z__6.i = h__[i__3].i - s2->i; + z__5.r = z__6.r / s, z__5.i = z__6.i / s; + z__3.r = z__4.r * z__5.r - z__4.i * z__5.i, z__3.i = z__4.r * + z__5.i + z__4.i * z__5.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + v[1].r = z__1.r, v[1].i = z__1.i; + i__1 = h_dim1 + 1; + i__2 = (h_dim1 << 1) + 2; + z__4.r = h__[i__1].r + h__[i__2].r, z__4.i = h__[i__1].i + h__[ + i__2].i; + z__3.r = z__4.r - s1->r, z__3.i = z__4.i - s1->i; + z__2.r = z__3.r - s2->r, z__2.i = z__3.i - s2->i; + z__1.r = h21s.r * z__2.r - h21s.i * z__2.i, z__1.i = h21s.r * + z__2.i + h21s.i * z__2.r; + v[2].r = z__1.r, v[2].i = z__1.i; + } + } else { + i__1 = h_dim1 + 1; + z__2.r = h__[i__1].r - s2->r, z__2.i = h__[i__1].i - s2->i; + z__1.r = z__2.r, z__1.i = z__2.i; + i__2 = h_dim1 + 2; + i__3 = h_dim1 + 3; + s = (d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1), abs(d__2)) + ( + (d__3 = h__[i__2].r, abs(d__3)) + (d__4 = d_imag(&h__[h_dim1 + + 2]), abs(d__4))) + ((d__5 = h__[i__3].r, abs(d__5)) + (d__6 + = d_imag(&h__[h_dim1 + 3]), abs(d__6))); + if (s == 0.) { + v[1].r = 0., v[1].i = 0.; + v[2].r = 0., v[2].i = 0.; + v[3].r = 0., v[3].i = 0.; + } else { + i__1 = h_dim1 + 2; + z__1.r = h__[i__1].r / s, z__1.i = h__[i__1].i / s; + h21s.r = z__1.r, h21s.i = z__1.i; + i__1 = h_dim1 + 3; + z__1.r = h__[i__1].r / s, z__1.i = h__[i__1].i / s; + h31s.r = z__1.r, h31s.i = z__1.i; + i__1 = h_dim1 + 1; + z__4.r = h__[i__1].r - s1->r, z__4.i = h__[i__1].i - s1->i; + i__2 = h_dim1 + 1; + z__6.r = h__[i__2].r - s2->r, z__6.i = h__[i__2].i - s2->i; + z__5.r = z__6.r / s, z__5.i = z__6.i / s; + z__3.r = z__4.r * z__5.r - z__4.i * z__5.i, z__3.i = z__4.r * + z__5.i + z__4.i * z__5.r; + i__3 = (h_dim1 << 1) + 1; + z__7.r = h__[i__3].r * h21s.r - h__[i__3].i * h21s.i, z__7.i = + h__[i__3].r * h21s.i + h__[i__3].i * h21s.r; + z__2.r = z__3.r + z__7.r, z__2.i = z__3.i + z__7.i; + i__4 = h_dim1 * 3 + 1; + z__8.r = h__[i__4].r * h31s.r - h__[i__4].i * h31s.i, z__8.i = + h__[i__4].r * h31s.i + h__[i__4].i * h31s.r; + z__1.r = z__2.r + z__8.r, z__1.i = z__2.i + z__8.i; + v[1].r = z__1.r, v[1].i = z__1.i; + i__1 = h_dim1 + 1; + i__2 = (h_dim1 << 1) + 2; + z__5.r = h__[i__1].r + h__[i__2].r, z__5.i = h__[i__1].i + h__[ + i__2].i; + z__4.r = z__5.r - s1->r, z__4.i = z__5.i - s1->i; + z__3.r = z__4.r - s2->r, z__3.i = z__4.i - s2->i; + z__2.r = h21s.r * z__3.r - h21s.i * z__3.i, z__2.i = h21s.r * + z__3.i + h21s.i * z__3.r; + i__3 = h_dim1 * 3 + 2; + z__6.r = h__[i__3].r * h31s.r - h__[i__3].i * h31s.i, z__6.i = + h__[i__3].r * h31s.i + h__[i__3].i * h31s.r; + z__1.r = z__2.r + z__6.r, z__1.i = z__2.i + z__6.i; + v[2].r = z__1.r, v[2].i = z__1.i; + i__1 = h_dim1 + 1; + i__2 = h_dim1 * 3 + 3; + z__5.r = h__[i__1].r + h__[i__2].r, z__5.i = h__[i__1].i + h__[ + i__2].i; + z__4.r = z__5.r - s1->r, z__4.i = z__5.i - s1->i; + z__3.r = z__4.r - s2->r, z__3.i = z__4.i - s2->i; + z__2.r = h31s.r * z__3.r - h31s.i * z__3.i, z__2.i = h31s.r * + z__3.i + h31s.i * z__3.r; + i__3 = (h_dim1 << 1) + 3; + z__6.r = h21s.r * h__[i__3].r - h21s.i * h__[i__3].i, z__6.i = + h21s.r * h__[i__3].i + h21s.i * h__[i__3].r; + z__1.r = z__2.r + z__6.r, z__1.i = z__2.i + z__6.i; + v[3].r = z__1.r, v[3].i = z__1.i; + } + } + return 0; +} /* zlaqr1_ */ + +/* Subroutine */ int zlaqr2_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublecomplex *h__, + integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, integer *ns, integer *nd, doublecomplex *sh, + doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t, + integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv, + doublecomplex *work, integer *lwork) +{ + /* System generated locals */ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, j; + static doublecomplex s; + static integer jw; + static doublereal foo; + static integer kln; + static doublecomplex tau; + static integer knt; + static doublereal ulp; + static integer lwk1, lwk2; + static doublecomplex beta; + static integer kcol, info, ifst, ilst, ltop, krow; + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *); + static integer infqr; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer kwtop; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); + + static doublereal safmin, safmax; + extern /* Subroutine */ int zgehrd_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *), zlahqr_(logical *, + logical *, integer *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, integer *, doublecomplex *, + integer *, integer *), zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *); + static doublereal smlnum; + extern /* Subroutine */ int ztrexc_(char *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, integer *, integer *, + integer *); + static integer lwkopt; + extern /* Subroutine */ int zunmhr_(char *, char *, integer *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ); + + +/* + -- LAPACK auxiliary routine (version 3.2.1) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + -- April 2009 -- + + + This subroutine is identical to ZLAQR3 except that it avoids + recursion by calling ZLAHQR instead of ZLAQR4. + + + ****************************************************************** + Aggressive early deflation: + + This subroutine accepts as input an upper Hessenberg matrix + H and performs an unitary similarity transformation + designed to detect and deflate fully converged eigenvalues from + a trailing principal submatrix. On output H has been over- + written by a new Hessenberg matrix that is a perturbation of + an unitary similarity transformation of H. It is to be + hoped that the final version of H has many zero subdiagonal + entries. + + ****************************************************************** + WANTT (input) LOGICAL + If .TRUE., then the Hessenberg matrix H is fully updated + so that the triangular Schur factor may be + computed (in cooperation with the calling subroutine). + If .FALSE., then only enough of H is updated to preserve + the eigenvalues. + + WANTZ (input) LOGICAL + If .TRUE., then the unitary matrix Z is updated so + so that the unitary Schur factor may be computed + (in cooperation with the calling subroutine). + If .FALSE., then Z is not referenced. + + N (input) INTEGER + The order of the matrix H and (if WANTZ is .TRUE.) the + order of the unitary matrix Z. + + KTOP (input) INTEGER + It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. + KBOT and KTOP together determine an isolated block + along the diagonal of the Hessenberg matrix. + + KBOT (input) INTEGER + It is assumed without a check that either + KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together + determine an isolated block along the diagonal of the + Hessenberg matrix. + + NW (input) INTEGER + Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). + + H (input/output) COMPLEX*16 array, dimension (LDH,N) + On input the initial N-by-N section of H stores the + Hessenberg matrix undergoing aggressive early deflation. + On output H has been transformed by a unitary + similarity transformation, perturbed, and the returned + to Hessenberg form that (it is to be hoped) has some + zero subdiagonal entries. + + LDH (input) integer + Leading dimension of H just as declared in the calling + subroutine. N .LE. LDH + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. + + Z (input/output) COMPLEX*16 array, dimension (LDZ,N) + IF WANTZ is .TRUE., then on output, the unitary + similarity transformation mentioned above has been + accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ is .FALSE., then Z is unreferenced. + + LDZ (input) integer + The leading dimension of Z just as declared in the + calling subroutine. 1 .LE. LDZ. + + NS (output) integer + The number of unconverged (ie approximate) eigenvalues + returned in SR and SI that may be used as shifts by the + calling subroutine. + + ND (output) integer + The number of converged eigenvalues uncovered by this + subroutine. + + SH (output) COMPLEX*16 array, dimension KBOT + On output, approximate eigenvalues that may + be used for shifts are stored in SH(KBOT-ND-NS+1) + through SR(KBOT-ND). Converged eigenvalues are + stored in SH(KBOT-ND+1) through SH(KBOT). + + V (workspace) COMPLEX*16 array, dimension (LDV,NW) + An NW-by-NW work array. + + LDV (input) integer scalar + The leading dimension of V just as declared in the + calling subroutine. NW .LE. LDV + + NH (input) integer scalar + The number of columns of T. NH.GE.NW. + + T (workspace) COMPLEX*16 array, dimension (LDT,NW) + + LDT (input) integer + The leading dimension of T just as declared in the + calling subroutine. NW .LE. LDT + + NV (input) integer + The number of rows of work array WV available for + workspace. NV.GE.NW. + + WV (workspace) COMPLEX*16 array, dimension (LDWV,NW) + + LDWV (input) integer + The leading dimension of W just as declared in the + calling subroutine. NW .LE. LDV + + WORK (workspace) COMPLEX*16 array, dimension LWORK. + On exit, WORK(1) is set to an estimate of the optimal value + of LWORK for the given values of N, NW, KTOP and KBOT. + + LWORK (input) integer + The dimension of the work array WORK. LWORK = 2*NW + suffices, but greater efficiency may result from larger + values of LWORK. + + If LWORK = -1, then a workspace query is assumed; ZLAQR2 + only estimates the optimal workspace size for the given + values of N, NW, KTOP and KBOT. The estimate is returned + in WORK(1). No error message related to LWORK is issued + by XERBLA. Neither H nor Z are accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + + ==== Estimate optimal workspace. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sh; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + + /* Function Body */ +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + +/* ==== Workspace query call to ZGEHRD ==== */ + + i__1 = jw - 1; + zgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & + c_n1, &info); + lwk1 = (integer) work[1].r; + +/* ==== Workspace query call to ZUNMHR ==== */ + + i__1 = jw - 1; + zunmhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[1], &c_n1, &info); + lwk2 = (integer) work[1].r; + +/* ==== Optimal workspace ==== */ + + lwkopt = jw + max(lwk1,lwk2); + } + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + d__1 = (doublereal) lwkopt; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + } + +/* + ==== Nothing to do ... + ... for an empty active block ... ==== +*/ + *ns = 0; + *nd = 0; + work[1].r = 1., work[1].i = 0.; + if (*ktop > *kbot) { + return 0; + } +/* ... nor for an empty deflation window. ==== */ + if (*nw < 1) { + return 0; + } + +/* ==== Machine constants ==== */ + + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = PRECISION; + smlnum = safmin * ((doublereal) (*n) / ulp); + +/* + ==== Setup deflation window ==== + + Computing MIN +*/ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s.r = 0., s.i = 0.; + } else { + i__1 = kwtop + (kwtop - 1) * h_dim1; + s.r = h__[i__1].r, s.i = h__[i__1].i; + } + + if (*kbot == kwtop) { + +/* ==== 1-by-1 deflation window: not much to do ==== */ + + i__1 = kwtop; + i__2 = kwtop + kwtop * h_dim1; + sh[i__1].r = h__[i__2].r, sh[i__1].i = h__[i__2].i; + *ns = 1; + *nd = 0; +/* Computing MAX */ + i__1 = kwtop + kwtop * h_dim1; + d__5 = smlnum, d__6 = ulp * ((d__1 = h__[i__1].r, abs(d__1)) + (d__2 = + d_imag(&h__[kwtop + kwtop * h_dim1]), abs(d__2))); + if ((d__3 = s.r, abs(d__3)) + (d__4 = d_imag(&s), abs(d__4)) <= max( + d__5,d__6)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + i__1 = kwtop + (kwtop - 1) * h_dim1; + h__[i__1].r = 0., h__[i__1].i = 0.; + } + } + work[1].r = 1., work[1].i = 0.; + return 0; + } + +/* + ==== Convert to spike-triangular form. (In case of a + . rare QR failure, this routine continues to do + . aggressive early deflation using that part of + . the deflation window that converged using INFQR + . here and there to keep track.) ==== +*/ + + zlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], + ldt); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + zcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & + i__3); + + zlaset_("A", &jw, &jw, &c_b56, &c_b57, &v[v_offset], ldv); + zlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[kwtop], + &c__1, &jw, &v[v_offset], ldv, &infqr); + +/* ==== Deflation detection loop ==== */ + + *ns = jw; + ilst = infqr + 1; + i__1 = jw; + for (knt = infqr + 1; knt <= i__1; ++knt) { + +/* ==== Small spike tip deflation test ==== */ + + i__2 = *ns + *ns * t_dim1; + foo = (d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[*ns + *ns * + t_dim1]), abs(d__2)); + if (foo == 0.) { + foo = (d__1 = s.r, abs(d__1)) + (d__2 = d_imag(&s), abs(d__2)); + } + i__2 = *ns * v_dim1 + 1; +/* Computing MAX */ + d__5 = smlnum, d__6 = ulp * foo; + if (((d__1 = s.r, abs(d__1)) + (d__2 = d_imag(&s), abs(d__2))) * (( + d__3 = v[i__2].r, abs(d__3)) + (d__4 = d_imag(&v[*ns * v_dim1 + + 1]), abs(d__4))) <= max(d__5,d__6)) { + +/* ==== One more converged eigenvalue ==== */ + + --(*ns); + } else { + +/* + ==== One undeflatable eigenvalue. Move it up out of the + . way. (ZTREXC can not fail in this case.) ==== +*/ + + ifst = *ns; + ztrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, & + ilst, &info); + ++ilst; + } +/* L10: */ + } + +/* ==== Return to Hessenberg form ==== */ + + if (*ns == 0) { + s.r = 0., s.i = 0.; + } + + if (*ns < jw) { + +/* + ==== sorting the diagonal of T improves accuracy for + . graded matrices. ==== +*/ + + i__1 = *ns; + for (i__ = infqr + 1; i__ <= i__1; ++i__) { + ifst = i__; + i__2 = *ns; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = j + j * t_dim1; + i__4 = ifst + ifst * t_dim1; + if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[j + j * + t_dim1]), abs(d__2)) > (d__3 = t[i__4].r, abs(d__3)) + + (d__4 = d_imag(&t[ifst + ifst * t_dim1]), abs(d__4)) + ) { + ifst = j; + } +/* L20: */ + } + ilst = i__; + if (ifst != ilst) { + ztrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &info); + } +/* L30: */ + } + } + +/* ==== Restore shift/eigenvalue array from T ==== */ + + i__1 = jw; + for (i__ = infqr + 1; i__ <= i__1; ++i__) { + i__2 = kwtop + i__ - 1; + i__3 = i__ + i__ * t_dim1; + sh[i__2].r = t[i__3].r, sh[i__2].i = t[i__3].i; +/* L40: */ + } + + + if (*ns < jw || s.r == 0. && s.i == 0.) { + if (*ns > 1 && (s.r != 0. || s.i != 0.)) { + +/* ==== Reflect spike back into lower triangle ==== */ + + zcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + i__1 = *ns; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + d_cnjg(&z__1, &work[i__]); + work[i__2].r = z__1.r, work[i__2].i = z__1.i; +/* L50: */ + } + beta.r = work[1].r, beta.i = work[1].i; + zlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1].r = 1., work[1].i = 0.; + + i__1 = jw - 2; + i__2 = jw - 2; + zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &t[t_dim1 + 3], ldt); + + d_cnjg(&z__1, &tau); + zlarf_("L", ns, &jw, &work[1], &c__1, &z__1, &t[t_offset], ldt, & + work[jw + 1]); + zlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + zlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & + work[jw + 1]); + + i__1 = *lwork - jw; + zgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] + , &i__1, &info); + } + +/* ==== Copy updated reduced window into place ==== */ + + if (kwtop > 1) { + i__1 = kwtop + (kwtop - 1) * h_dim1; + d_cnjg(&z__2, &v[v_dim1 + 1]); + z__1.r = s.r * z__2.r - s.i * z__2.i, z__1.i = s.r * z__2.i + s.i + * z__2.r; + h__[i__1].r = z__1.r, h__[i__1].i = z__1.i; + } + zlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] + , ldh); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + zcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], + &i__3); + +/* + ==== Accumulate orthogonal matrix in order update + . H and Z, if requested. ==== +*/ + + if (*ns > 1 && (s.r != 0. || s.i != 0.)) { + i__1 = *lwork - jw; + zunmhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[jw + 1], &i__1, &info); + } + +/* ==== Update vertical slab in H ==== */ + + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3,i__4); + zgemm_("N", "N", &kln, &jw, &jw, &c_b57, &h__[krow + kwtop * + h_dim1], ldh, &v[v_offset], ldv, &c_b56, &wv[wv_offset], + ldwv); + zlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * + h_dim1], ldh); +/* L60: */ + } + +/* ==== Update horizontal slab in H ==== */ + + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; + kcol += i__1) { +/* Computing MIN */ + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3,i__4); + zgemm_("C", "N", &jw, &kln, &jw, &c_b57, &v[v_offset], ldv, & + h__[kwtop + kcol * h_dim1], ldh, &c_b56, &t[t_offset], + ldt); + zlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * + h_dim1], ldh); +/* L70: */ + } + } + +/* ==== Update vertical slab in Z ==== */ + + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3,i__4); + zgemm_("N", "N", &kln, &jw, &jw, &c_b57, &z__[krow + kwtop * + z_dim1], ldz, &v[v_offset], ldv, &c_b56, &wv[ + wv_offset], ldwv); + zlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + + kwtop * z_dim1], ldz); +/* L80: */ + } + } + } + +/* ==== Return the number of deflations ... ==== */ + + *nd = jw - *ns; + +/* + ==== ... and the number of shifts. (Subtracting + . INFQR from the spike length takes care + . of the case of a rare QR failure while + . calculating eigenvalues of the deflation + . window.) ==== +*/ + + *ns -= infqr; + +/* ==== Return optimal workspace. ==== */ + + d__1 = (doublereal) lwkopt; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + +/* ==== End of ZLAQR2 ==== */ + + return 0; +} /* zlaqr2_ */ + +/* Subroutine */ int zlaqr3_(logical *wantt, logical *wantz, integer *n, + integer *ktop, integer *kbot, integer *nw, doublecomplex *h__, + integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, integer *ns, integer *nd, doublecomplex *sh, + doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t, + integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv, + doublecomplex *work, integer *lwork) +{ + /* System generated locals */ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, j; + static doublecomplex s; + static integer jw; + static doublereal foo; + static integer kln; + static doublecomplex tau; + static integer knt; + static doublereal ulp; + static integer lwk1, lwk2, lwk3; + static doublecomplex beta; + static integer kcol, info, nmin, ifst, ilst, ltop, krow; + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *); + static integer infqr; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer kwtop; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), dlabad_(doublereal *, doublereal *), + zlaqr4_(logical *, logical *, integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ); + + static doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static doublereal safmax; + extern /* Subroutine */ int zgehrd_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *), zlahqr_(logical *, + logical *, integer *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, integer *, doublecomplex *, + integer *, integer *), zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *); + static doublereal smlnum; + extern /* Subroutine */ int ztrexc_(char *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, integer *, integer *, + integer *); + static integer lwkopt; + extern /* Subroutine */ int zunmhr_(char *, char *, integer *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ); + + +/* + -- LAPACK auxiliary routine (version 3.2.1) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + -- April 2009 -- + + + ****************************************************************** + Aggressive early deflation: + + This subroutine accepts as input an upper Hessenberg matrix + H and performs an unitary similarity transformation + designed to detect and deflate fully converged eigenvalues from + a trailing principal submatrix. On output H has been over- + written by a new Hessenberg matrix that is a perturbation of + an unitary similarity transformation of H. It is to be + hoped that the final version of H has many zero subdiagonal + entries. + + ****************************************************************** + WANTT (input) LOGICAL + If .TRUE., then the Hessenberg matrix H is fully updated + so that the triangular Schur factor may be + computed (in cooperation with the calling subroutine). + If .FALSE., then only enough of H is updated to preserve + the eigenvalues. + + WANTZ (input) LOGICAL + If .TRUE., then the unitary matrix Z is updated so + so that the unitary Schur factor may be computed + (in cooperation with the calling subroutine). + If .FALSE., then Z is not referenced. + + N (input) INTEGER + The order of the matrix H and (if WANTZ is .TRUE.) the + order of the unitary matrix Z. + + KTOP (input) INTEGER + It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. + KBOT and KTOP together determine an isolated block + along the diagonal of the Hessenberg matrix. + + KBOT (input) INTEGER + It is assumed without a check that either + KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together + determine an isolated block along the diagonal of the + Hessenberg matrix. + + NW (input) INTEGER + Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). + + H (input/output) COMPLEX*16 array, dimension (LDH,N) + On input the initial N-by-N section of H stores the + Hessenberg matrix undergoing aggressive early deflation. + On output H has been transformed by a unitary + similarity transformation, perturbed, and the returned + to Hessenberg form that (it is to be hoped) has some + zero subdiagonal entries. + + LDH (input) integer + Leading dimension of H just as declared in the calling + subroutine. N .LE. LDH + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. + + Z (input/output) COMPLEX*16 array, dimension (LDZ,N) + IF WANTZ is .TRUE., then on output, the unitary + similarity transformation mentioned above has been + accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ is .FALSE., then Z is unreferenced. + + LDZ (input) integer + The leading dimension of Z just as declared in the + calling subroutine. 1 .LE. LDZ. + + NS (output) integer + The number of unconverged (ie approximate) eigenvalues + returned in SR and SI that may be used as shifts by the + calling subroutine. + + ND (output) integer + The number of converged eigenvalues uncovered by this + subroutine. + + SH (output) COMPLEX*16 array, dimension KBOT + On output, approximate eigenvalues that may + be used for shifts are stored in SH(KBOT-ND-NS+1) + through SR(KBOT-ND). Converged eigenvalues are + stored in SH(KBOT-ND+1) through SH(KBOT). + + V (workspace) COMPLEX*16 array, dimension (LDV,NW) + An NW-by-NW work array. + + LDV (input) integer scalar + The leading dimension of V just as declared in the + calling subroutine. NW .LE. LDV + + NH (input) integer scalar + The number of columns of T. NH.GE.NW. + + T (workspace) COMPLEX*16 array, dimension (LDT,NW) + + LDT (input) integer + The leading dimension of T just as declared in the + calling subroutine. NW .LE. LDT + + NV (input) integer + The number of rows of work array WV available for + workspace. NV.GE.NW. + + WV (workspace) COMPLEX*16 array, dimension (LDWV,NW) + + LDWV (input) integer + The leading dimension of W just as declared in the + calling subroutine. NW .LE. LDV + + WORK (workspace) COMPLEX*16 array, dimension LWORK. + On exit, WORK(1) is set to an estimate of the optimal value + of LWORK for the given values of N, NW, KTOP and KBOT. + + LWORK (input) integer + The dimension of the work array WORK. LWORK = 2*NW + suffices, but greater efficiency may result from larger + values of LWORK. + + If LWORK = -1, then a workspace query is assumed; ZLAQR3 + only estimates the optimal workspace size for the given + values of N, NW, KTOP and KBOT. The estimate is returned + in WORK(1). No error message related to LWORK is issued + by XERBLA. Neither H nor Z are accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + + ==== Estimate optimal workspace. ==== +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sh; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + + /* Function Body */ +/* Computing MIN */ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + +/* ==== Workspace query call to ZGEHRD ==== */ + + i__1 = jw - 1; + zgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & + c_n1, &info); + lwk1 = (integer) work[1].r; + +/* ==== Workspace query call to ZUNMHR ==== */ + + i__1 = jw - 1; + zunmhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[1], &c_n1, &info); + lwk2 = (integer) work[1].r; + +/* ==== Workspace query call to ZLAQR4 ==== */ + + zlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[1], + &c__1, &jw, &v[v_offset], ldv, &work[1], &c_n1, &infqr); + lwk3 = (integer) work[1].r; + +/* + ==== Optimal workspace ==== + + Computing MAX +*/ + i__1 = jw + max(lwk1,lwk2); + lwkopt = max(i__1,lwk3); + } + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + d__1 = (doublereal) lwkopt; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + } + +/* + ==== Nothing to do ... + ... for an empty active block ... ==== +*/ + *ns = 0; + *nd = 0; + work[1].r = 1., work[1].i = 0.; + if (*ktop > *kbot) { + return 0; + } +/* ... nor for an empty deflation window. ==== */ + if (*nw < 1) { + return 0; + } + +/* ==== Machine constants ==== */ + + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = PRECISION; + smlnum = safmin * ((doublereal) (*n) / ulp); + +/* + ==== Setup deflation window ==== + + Computing MIN +*/ + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1,i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s.r = 0., s.i = 0.; + } else { + i__1 = kwtop + (kwtop - 1) * h_dim1; + s.r = h__[i__1].r, s.i = h__[i__1].i; + } + + if (*kbot == kwtop) { + +/* ==== 1-by-1 deflation window: not much to do ==== */ + + i__1 = kwtop; + i__2 = kwtop + kwtop * h_dim1; + sh[i__1].r = h__[i__2].r, sh[i__1].i = h__[i__2].i; + *ns = 1; + *nd = 0; +/* Computing MAX */ + i__1 = kwtop + kwtop * h_dim1; + d__5 = smlnum, d__6 = ulp * ((d__1 = h__[i__1].r, abs(d__1)) + (d__2 = + d_imag(&h__[kwtop + kwtop * h_dim1]), abs(d__2))); + if ((d__3 = s.r, abs(d__3)) + (d__4 = d_imag(&s), abs(d__4)) <= max( + d__5,d__6)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + i__1 = kwtop + (kwtop - 1) * h_dim1; + h__[i__1].r = 0., h__[i__1].i = 0.; + } + } + work[1].r = 1., work[1].i = 0.; + return 0; + } + +/* + ==== Convert to spike-triangular form. (In case of a + . rare QR failure, this routine continues to do + . aggressive early deflation using that part of + . the deflation window that converged using INFQR + . here and there to keep track.) ==== +*/ + + zlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], + ldt); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + zcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & + i__3); + + zlaset_("A", &jw, &jw, &c_b56, &c_b57, &v[v_offset], ldv); + nmin = ilaenv_(&c__12, "ZLAQR3", "SV", &jw, &c__1, &jw, lwork, (ftnlen)6, + (ftnlen)2); + if (jw > nmin) { + zlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[ + kwtop], &c__1, &jw, &v[v_offset], ldv, &work[1], lwork, & + infqr); + } else { + zlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[ + kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr); + } + +/* ==== Deflation detection loop ==== */ + + *ns = jw; + ilst = infqr + 1; + i__1 = jw; + for (knt = infqr + 1; knt <= i__1; ++knt) { + +/* ==== Small spike tip deflation test ==== */ + + i__2 = *ns + *ns * t_dim1; + foo = (d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[*ns + *ns * + t_dim1]), abs(d__2)); + if (foo == 0.) { + foo = (d__1 = s.r, abs(d__1)) + (d__2 = d_imag(&s), abs(d__2)); + } + i__2 = *ns * v_dim1 + 1; +/* Computing MAX */ + d__5 = smlnum, d__6 = ulp * foo; + if (((d__1 = s.r, abs(d__1)) + (d__2 = d_imag(&s), abs(d__2))) * (( + d__3 = v[i__2].r, abs(d__3)) + (d__4 = d_imag(&v[*ns * v_dim1 + + 1]), abs(d__4))) <= max(d__5,d__6)) { + +/* ==== One more converged eigenvalue ==== */ + + --(*ns); + } else { + +/* + ==== One undeflatable eigenvalue. Move it up out of the + . way. (ZTREXC can not fail in this case.) ==== +*/ + + ifst = *ns; + ztrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, & + ilst, &info); + ++ilst; + } +/* L10: */ + } + +/* ==== Return to Hessenberg form ==== */ + + if (*ns == 0) { + s.r = 0., s.i = 0.; + } + + if (*ns < jw) { + +/* + ==== sorting the diagonal of T improves accuracy for + . graded matrices. ==== +*/ + + i__1 = *ns; + for (i__ = infqr + 1; i__ <= i__1; ++i__) { + ifst = i__; + i__2 = *ns; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = j + j * t_dim1; + i__4 = ifst + ifst * t_dim1; + if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[j + j * + t_dim1]), abs(d__2)) > (d__3 = t[i__4].r, abs(d__3)) + + (d__4 = d_imag(&t[ifst + ifst * t_dim1]), abs(d__4)) + ) { + ifst = j; + } +/* L20: */ + } + ilst = i__; + if (ifst != ilst) { + ztrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, + &ilst, &info); + } +/* L30: */ + } + } + +/* ==== Restore shift/eigenvalue array from T ==== */ + + i__1 = jw; + for (i__ = infqr + 1; i__ <= i__1; ++i__) { + i__2 = kwtop + i__ - 1; + i__3 = i__ + i__ * t_dim1; + sh[i__2].r = t[i__3].r, sh[i__2].i = t[i__3].i; +/* L40: */ + } + + + if (*ns < jw || s.r == 0. && s.i == 0.) { + if (*ns > 1 && (s.r != 0. || s.i != 0.)) { + +/* ==== Reflect spike back into lower triangle ==== */ + + zcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + i__1 = *ns; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + d_cnjg(&z__1, &work[i__]); + work[i__2].r = z__1.r, work[i__2].i = z__1.i; +/* L50: */ + } + beta.r = work[1].r, beta.i = work[1].i; + zlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1].r = 1., work[1].i = 0.; + + i__1 = jw - 2; + i__2 = jw - 2; + zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &t[t_dim1 + 3], ldt); + + d_cnjg(&z__1, &tau); + zlarf_("L", ns, &jw, &work[1], &c__1, &z__1, &t[t_offset], ldt, & + work[jw + 1]); + zlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & + work[jw + 1]); + zlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & + work[jw + 1]); + + i__1 = *lwork - jw; + zgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] + , &i__1, &info); + } + +/* ==== Copy updated reduced window into place ==== */ + + if (kwtop > 1) { + i__1 = kwtop + (kwtop - 1) * h_dim1; + d_cnjg(&z__2, &v[v_dim1 + 1]); + z__1.r = s.r * z__2.r - s.i * z__2.i, z__1.i = s.r * z__2.i + s.i + * z__2.r; + h__[i__1].r = z__1.r, h__[i__1].i = z__1.i; + } + zlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] + , ldh); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + zcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], + &i__3); + +/* + ==== Accumulate orthogonal matrix in order update + . H and Z, if requested. ==== +*/ + + if (*ns > 1 && (s.r != 0. || s.i != 0.)) { + i__1 = *lwork - jw; + zunmhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], + &v[v_offset], ldv, &work[jw + 1], &i__1, &info); + } + +/* ==== Update vertical slab in H ==== */ + + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3,i__4); + zgemm_("N", "N", &kln, &jw, &jw, &c_b57, &h__[krow + kwtop * + h_dim1], ldh, &v[v_offset], ldv, &c_b56, &wv[wv_offset], + ldwv); + zlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * + h_dim1], ldh); +/* L60: */ + } + +/* ==== Update horizontal slab in H ==== */ + + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; + kcol += i__1) { +/* Computing MIN */ + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3,i__4); + zgemm_("C", "N", &jw, &kln, &jw, &c_b57, &v[v_offset], ldv, & + h__[kwtop + kcol * h_dim1], ldh, &c_b56, &t[t_offset], + ldt); + zlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * + h_dim1], ldh); +/* L70: */ + } + } + +/* ==== Update vertical slab in Z ==== */ + + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += + i__2) { +/* Computing MIN */ + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3,i__4); + zgemm_("N", "N", &kln, &jw, &jw, &c_b57, &z__[krow + kwtop * + z_dim1], ldz, &v[v_offset], ldv, &c_b56, &wv[ + wv_offset], ldwv); + zlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + + kwtop * z_dim1], ldz); +/* L80: */ + } + } + } + +/* ==== Return the number of deflations ... ==== */ + + *nd = jw - *ns; + +/* + ==== ... and the number of shifts. (Subtracting + . INFQR from the spike length takes care + . of the case of a rare QR failure while + . calculating eigenvalues of the deflation + . window.) ==== +*/ + + *ns -= infqr; + +/* ==== Return optimal workspace. ==== */ + + d__1 = (doublereal) lwkopt; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + +/* ==== End of ZLAQR3 ==== */ + + return 0; +} /* zlaqr3_ */ + +/* Subroutine */ int zlaqr4_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8; + doublecomplex z__1, z__2, z__3, z__4, z__5; + + /* Local variables */ + static integer i__, k; + static doublereal s; + static doublecomplex aa, bb, cc, dd; + static integer ld, nh, it, ks, kt, ku, kv, ls, ns, nw; + static doublecomplex tr2, det; + static integer inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, + nmin; + static doublecomplex swap; + static integer ktop; + static doublecomplex zdum[1] /* was [1][1] */; + static integer kacc22, itmax, nsmax, nwmax, kwtop; + extern /* Subroutine */ int zlaqr2_(logical *, logical *, integer *, + integer *, integer *, integer *, doublecomplex *, integer *, + integer *, integer *, doublecomplex *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer * + , doublecomplex *, integer *), zlaqr5_(logical *, logical *, + integer *, integer *, integer *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + integer *, doublecomplex *, integer *); + static integer nibble; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static char jbcmpz[2]; + static doublecomplex rtdisc; + static integer nwupbd; + static logical sorted; + extern /* Subroutine */ int zlahqr_(logical *, logical *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, integer *, integer *), + zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static integer lwkopt; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + This subroutine implements one level of recursion for ZLAQR0. + It is a complete implementation of the small bulge multi-shift + QR algorithm. It may be called by ZLAQR0 and, for large enough + deflation window size, it may be called by ZLAQR3. This + subroutine is identical to ZLAQR0 except that it calls ZLAQR2 + instead of ZLAQR3. + + Purpose + ======= + + ZLAQR4 computes the eigenvalues of a Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**H, where T is an upper triangular matrix (the + Schur form), and Z is the unitary matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input unitary + matrix Q so that this routine can give the Schur factorization + of a matrix A which has been reduced to the Hessenberg form H + by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N .GE. 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, + H(ILO,ILO-1) is zero. ILO and IHI are normally set by a + previous call to ZGEBAL, and then passed to ZGEHRD when the + matrix output by ZGEBAL is reduced to Hessenberg form. + Otherwise, ILO and IHI should be set to 1 and N, + respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. + If N = 0, then ILO = 1 and IHI = 0. + + H (input/output) COMPLEX*16 array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if INFO = 0 and WANTT is .TRUE., then H + contains the upper triangular matrix T from the Schur + decomposition (the Schur form). If INFO = 0 and WANT is + .FALSE., then the contents of H are unspecified on exit. + (The output value of H when INFO.GT.0 is given under the + description of INFO below.) + + This subroutine may explicitly set H(i,j) = 0 for i.GT.j and + j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. + + LDH (input) INTEGER + The leading dimension of the array H. LDH .GE. max(1,N). + + W (output) COMPLEX*16 array, dimension (N) + The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored + in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are + stored in the same order as on the diagonal of the Schur + form returned in H, with W(i) = H(i,i). + + Z (input/output) COMPLEX*16 array, dimension (LDZ,IHI) + If WANTZ is .FALSE., then Z is not referenced. + If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is + replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the + orthogonal Schur factor of H(ILO:IHI,ILO:IHI). + (The output value of Z when INFO.GT.0 is given under + the description of INFO below.) + + LDZ (input) INTEGER + The leading dimension of the array Z. if WANTZ is .TRUE. + then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. + + WORK (workspace/output) COMPLEX*16 array, dimension LWORK + On exit, if LWORK = -1, WORK(1) returns an estimate of + the optimal value for LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK .GE. max(1,N) + is sufficient, but LWORK typically as large as 6*N may + be required for optimal performance. A workspace query + to determine the optimal workspace size is recommended. + + If LWORK = -1, then ZLAQR4 does a workspace query. + In this case, ZLAQR4 checks the input parameters and + estimates the optimal workspace size for the given + values of N, ILO and IHI. The estimate is returned + in WORK(1). No error message related to LWORK is + issued by XERBLA. Neither H nor Z are accessed. + + + INFO (output) INTEGER + = 0: successful exit + .GT. 0: if INFO = i, ZLAQR4 failed to compute all of + the eigenvalues. Elements 1:ilo-1 and i+1:n of WR + and WI contain those eigenvalues which have been + successfully computed. (Failures are rare.) + + If INFO .GT. 0 and WANT is .FALSE., then on exit, + the remaining unconverged eigenvalues are the eigen- + values of the upper Hessenberg matrix rows and + columns ILO through INFO of the final, output + value of H. + + If INFO .GT. 0 and WANTT is .TRUE., then on exit + + (*) (initial value of H)*U = U*(final value of H) + + where U is a unitary matrix. The final + value of H is upper Hessenberg and triangular in + rows and columns INFO+1 through IHI. + + If INFO .GT. 0 and WANTZ is .TRUE., then on exit + + (final value of Z(ILO:IHI,ILOZ:IHIZ) + = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U + + where U is the unitary matrix in (*) (regard- + less of the value of WANTT.) + + If INFO .GT. 0 and WANTZ is .FALSE., then Z is not + accessed. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + References: + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 + Performance, SIAM Journal of Matrix Analysis, volume 23, pages + 929--947, 2002. + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part II: Aggressive Early Deflation, SIAM Journal + of Matrix Analysis, volume 23, pages 948--973, 2002. + + ================================================================ + + ==== Matrices of order NTINY or smaller must be processed by + . ZLAHQR because of insufficient subdiagonal scratch space. + . (This is a hard limit.) ==== + + ==== Exceptional deflation windows: try to cure rare + . slow convergence by varying the size of the + . deflation window after KEXNW iterations. ==== + + ==== Exceptional shifts: try to cure rare slow convergence + . with ad-hoc exceptional shifts every KEXSH iterations. + . ==== + + ==== The constant WILK1 is used to form the exceptional + . shifts. ==== +*/ + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + +/* ==== Quick return for N = 0: nothing to do. ==== */ + + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + if (*n <= 11) { + +/* ==== Tiny matrices must use ZLAHQR. ==== */ + + lwkopt = 1; + if (*lwork != -1) { + zlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], + iloz, ihiz, &z__[z_offset], ldz, info); + } + } else { + +/* + ==== Use small bulge multi-shift QR with aggressive early + . deflation on larger-than-tiny matrices. ==== + + ==== Hope for the best. ==== +*/ + + *info = 0; + +/* ==== Set up job flags for ILAENV. ==== */ + + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + +/* + ==== NWR = recommended deflation window size. At this + . point, N .GT. NTINY = 11, so there is enough + . subdiagonal workspace for NWR.GE.2 as required. + . (In fact, there is enough subdiagonal space for + . NWR.GE.3.) ==== +*/ + + nwr = ilaenv_(&c__13, "ZLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nwr = max(2,nwr); +/* Computing MIN */ + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); + nwr = min(i__1,nwr); + +/* + ==== NSR = recommended number of simultaneous shifts. + . At this point N .GT. NTINY = 11, so there is at + . enough subdiagonal workspace for NSR to be even + . and greater than or equal to two as required. ==== +*/ + + nsr = ilaenv_(&c__15, "ZLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); +/* Computing MIN */ + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - + *ilo; + nsr = min(i__1,i__2); +/* Computing MAX */ + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1,i__2); + +/* + ==== Estimate optimal workspace ==== + + ==== Workspace query call to ZLAQR2 ==== +*/ + + i__1 = nwr + 1; + zlaqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, + ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[h_offset], + ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], ldh, &work[1], + &c_n1); + +/* + ==== Optimal workspace = MAX(ZLAQR5, ZLAQR2) ==== + + Computing MAX +*/ + i__1 = nsr * 3 / 2, i__2 = (integer) work[1].r; + lwkopt = max(i__1,i__2); + +/* ==== Quick return in case of workspace query. ==== */ + + if (*lwork == -1) { + d__1 = (doublereal) lwkopt; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + } + +/* ==== ZLAHQR/ZLAQR0 crossover point ==== */ + + nmin = ilaenv_(&c__12, "ZLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen) + 6, (ftnlen)2); + nmin = max(11,nmin); + +/* ==== Nibble crossover point ==== */ + + nibble = ilaenv_(&c__14, "ZLAQR4", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + nibble = max(0,nibble); + +/* + ==== Accumulate reflections during ttswp? Use block + . 2-by-2 structure during matrix-matrix multiply? ==== +*/ + + kacc22 = ilaenv_(&c__16, "ZLAQR4", jbcmpz, n, ilo, ihi, lwork, ( + ftnlen)6, (ftnlen)2); + kacc22 = max(0,kacc22); + kacc22 = min(2,kacc22); + +/* + ==== NWMAX = the largest possible deflation window for + . which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1,i__2); + nw = nwmax; + +/* + ==== NSMAX = the Largest number of simultaneous shifts + . for which there is sufficient workspace. ==== + + Computing MIN +*/ + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1,i__2); + nsmax -= nsmax % 2; + +/* ==== NDFL: an iteration count restarted at deflation. ==== */ + + ndfl = 1; + +/* + ==== ITMAX = iteration limit ==== + + Computing MAX +*/ + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1,i__2) * 30; + +/* ==== Last row and column in the active block ==== */ + + kbot = *ihi; + +/* ==== Main Loop ==== */ + + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + +/* ==== Done when KBOT falls below ILO ==== */ + + if (kbot < *ilo) { + goto L80; + } + +/* ==== Locate active block ==== */ + + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + i__3 = k + (k - 1) * h_dim1; + if (h__[i__3].r == 0. && h__[i__3].i == 0.) { + goto L20; + } +/* L10: */ + } + k = *ilo; +L20: + ktop = k; + +/* + ==== Select deflation window size: + . Typical Case: + . If possible and advisable, nibble the entire + . active block. If not, use size MIN(NWR,NWMAX) + . or MIN(NWR+1,NWMAX) depending upon which has + . the smaller corresponding subdiagonal entry + . (a heuristic). + . + . Exceptional Case: + . If there have been no deflations in KEXNW or + . more iterations, then vary the deflation window + . size. At first, because, larger windows are, + . in general, more powerful than smaller ones, + . rapidly increase the window to the maximum possible. + . Then, gradually reduce the window size. ==== +*/ + + nh = kbot - ktop + 1; + nwupbd = min(nh,nwmax); + if (ndfl < 5) { + nw = min(nwupbd,nwr); + } else { +/* Computing MIN */ + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2,i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + i__2 = kwtop + (kwtop - 1) * h_dim1; + i__3 = kwtop - 1 + (kwtop - 2) * h_dim1; + if ((d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[ + kwtop + (kwtop - 1) * h_dim1]), abs(d__2)) > ( + d__3 = h__[i__3].r, abs(d__3)) + (d__4 = d_imag(& + h__[kwtop - 1 + (kwtop - 2) * h_dim1]), abs(d__4)) + ) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + +/* + ==== Aggressive early deflation: + . split workspace under the subdiagonal into + . - an nw-by-nw work array V in the lower + . left-hand-corner, + . - an NW-by-at-least-NW-but-more-is-better + . (NW-by-NHO) horizontal work array along + . the bottom edge, + . - an at-least-NW-but-more-is-better (NHV-by-NW) + . vertical work array along the left-hand-edge. + . ==== +*/ + + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + +/* ==== Aggressive early deflation ==== */ + + zlaqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, + iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[kv + + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], ldh, &nve, & + h__[kwv + h_dim1], ldh, &work[1], lwork); + +/* ==== Adjust KBOT accounting for new deflations. ==== */ + + kbot -= ld; + +/* ==== KS points to the shifts. ==== */ + + ks = kbot - ls + 1; + +/* + ==== Skip an expensive QR sweep if there is a (partly + . heuristic) reason to expect that many eigenvalues + . will deflate without it. Here, the QR sweep is + . skipped if many eigenvalues have just been deflated + . or if the remaining active block is small. +*/ + + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( + nmin,nwmax)) { + +/* + ==== NS = nominal number of simultaneous shifts. + . This may be lowered (slightly) if ZLAQR2 + . did not provide that many shifts. ==== + + Computing MIN + Computing MAX +*/ + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); + ns = min(i__2,i__3); + ns -= ns % 2; + +/* + ==== If there have been no deflations + . in a multiple of KEXSH iterations, + . then try exceptional shifts. + . Otherwise use shifts provided by + . ZLAQR2 above or from the eigenvalues + . of a trailing principal submatrix. ==== +*/ + + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; + i__2 = ks + 1; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + i__3 = i__; + i__4 = i__ + i__ * h_dim1; + i__5 = i__ + (i__ - 1) * h_dim1; + d__3 = ((d__1 = h__[i__5].r, abs(d__1)) + (d__2 = + d_imag(&h__[i__ + (i__ - 1) * h_dim1]), abs( + d__2))) * .75; + z__1.r = h__[i__4].r + d__3, z__1.i = h__[i__4].i; + w[i__3].r = z__1.r, w[i__3].i = z__1.i; + i__3 = i__ - 1; + i__4 = i__; + w[i__3].r = w[i__4].r, w[i__3].i = w[i__4].i; +/* L30: */ + } + } else { + +/* + ==== Got NS/2 or fewer shifts? Use ZLAHQR + . on a trailing principal submatrix to + . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, + . there is enough space below the subdiagonal + . to fit an NS-by-NS scratch array.) ==== +*/ + + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + zlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & + h__[kt + h_dim1], ldh); + zlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + + h_dim1], ldh, &w[ks], &c__1, &c__1, zdum, & + c__1, &inf); + ks += inf; + +/* + ==== In case of a rare QR failure use + . eigenvalues of the trailing 2-by-2 + . principal submatrix. Scale to avoid + . overflows, underflows and subnormals. + . (The scale factor S can not be zero, + . because H(KBOT,KBOT-1) is nonzero.) ==== +*/ + + if (ks >= kbot) { + i__2 = kbot - 1 + (kbot - 1) * h_dim1; + i__3 = kbot + (kbot - 1) * h_dim1; + i__4 = kbot - 1 + kbot * h_dim1; + i__5 = kbot + kbot * h_dim1; + s = (d__1 = h__[i__2].r, abs(d__1)) + (d__2 = + d_imag(&h__[kbot - 1 + (kbot - 1) * + h_dim1]), abs(d__2)) + ((d__3 = h__[i__3] + .r, abs(d__3)) + (d__4 = d_imag(&h__[kbot + + (kbot - 1) * h_dim1]), abs(d__4))) + (( + d__5 = h__[i__4].r, abs(d__5)) + (d__6 = + d_imag(&h__[kbot - 1 + kbot * h_dim1]), + abs(d__6))) + ((d__7 = h__[i__5].r, abs( + d__7)) + (d__8 = d_imag(&h__[kbot + kbot * + h_dim1]), abs(d__8))); + i__2 = kbot - 1 + (kbot - 1) * h_dim1; + z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i / + s; + aa.r = z__1.r, aa.i = z__1.i; + i__2 = kbot + (kbot - 1) * h_dim1; + z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i / + s; + cc.r = z__1.r, cc.i = z__1.i; + i__2 = kbot - 1 + kbot * h_dim1; + z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i / + s; + bb.r = z__1.r, bb.i = z__1.i; + i__2 = kbot + kbot * h_dim1; + z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i / + s; + dd.r = z__1.r, dd.i = z__1.i; + z__2.r = aa.r + dd.r, z__2.i = aa.i + dd.i; + z__1.r = z__2.r / 2., z__1.i = z__2.i / 2.; + tr2.r = z__1.r, tr2.i = z__1.i; + z__3.r = aa.r - tr2.r, z__3.i = aa.i - tr2.i; + z__4.r = dd.r - tr2.r, z__4.i = dd.i - tr2.i; + z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, + z__2.i = z__3.r * z__4.i + z__3.i * + z__4.r; + z__5.r = bb.r * cc.r - bb.i * cc.i, z__5.i = bb.r + * cc.i + bb.i * cc.r; + z__1.r = z__2.r - z__5.r, z__1.i = z__2.i - + z__5.i; + det.r = z__1.r, det.i = z__1.i; + z__2.r = -det.r, z__2.i = -det.i; + z_sqrt(&z__1, &z__2); + rtdisc.r = z__1.r, rtdisc.i = z__1.i; + i__2 = kbot - 1; + z__2.r = tr2.r + rtdisc.r, z__2.i = tr2.i + + rtdisc.i; + z__1.r = s * z__2.r, z__1.i = s * z__2.i; + w[i__2].r = z__1.r, w[i__2].i = z__1.i; + i__2 = kbot; + z__2.r = tr2.r - rtdisc.r, z__2.i = tr2.i - + rtdisc.i; + z__1.r = s * z__2.r, z__1.i = s * z__2.i; + w[i__2].r = z__1.r, w[i__2].i = z__1.i; + + ks = kbot - 1; + } + } + + if (kbot - ks + 1 > ns) { + +/* ==== Sort the shifts (Helps a little) ==== */ + + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + i__4 = i__; + i__5 = i__ + 1; + if ((d__1 = w[i__4].r, abs(d__1)) + (d__2 = + d_imag(&w[i__]), abs(d__2)) < (d__3 = + w[i__5].r, abs(d__3)) + (d__4 = + d_imag(&w[i__ + 1]), abs(d__4))) { + sorted = FALSE_; + i__4 = i__; + swap.r = w[i__4].r, swap.i = w[i__4].i; + i__4 = i__; + i__5 = i__ + 1; + w[i__4].r = w[i__5].r, w[i__4].i = w[i__5] + .i; + i__4 = i__ + 1; + w[i__4].r = swap.r, w[i__4].i = swap.i; + } +/* L40: */ + } +/* L50: */ + } +L60: + ; + } + } + +/* + ==== If there are only two shifts, then use + . only one. ==== +*/ + + if (kbot - ks + 1 == 2) { + i__2 = kbot; + i__3 = kbot + kbot * h_dim1; + z__2.r = w[i__2].r - h__[i__3].r, z__2.i = w[i__2].i - + h__[i__3].i; + z__1.r = z__2.r, z__1.i = z__2.i; + i__4 = kbot - 1; + i__5 = kbot + kbot * h_dim1; + z__4.r = w[i__4].r - h__[i__5].r, z__4.i = w[i__4].i - + h__[i__5].i; + z__3.r = z__4.r, z__3.i = z__4.i; + if ((d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1), + abs(d__2)) < (d__3 = z__3.r, abs(d__3)) + (d__4 = + d_imag(&z__3), abs(d__4))) { + i__2 = kbot - 1; + i__3 = kbot; + w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i; + } else { + i__2 = kbot; + i__3 = kbot - 1; + w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i; + } + } + +/* + ==== Use up to NS of the the smallest magnatiude + . shifts. If there aren't NS shifts available, + . then use them all, possibly dropping one to + . make the number of shifts even. ==== + + Computing MIN +*/ + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2,i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + +/* + ==== Small-bulge multi-shift QR sweep: + . split workspace under the subdiagonal into + . - a KDU-by-KDU work array U in the lower + . left-hand-corner, + . - a KDU-by-at-least-KDU-but-more-is-better + . (KDU-by-NHo) horizontal work array WH along + . the bottom edge, + . - and an at-least-KDU-but-more-is-better-by-KDU + . (NVE-by-KDU) vertical work WV arrow along + . the left-hand-edge. ==== +*/ + + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + +/* ==== Small-bulge multi-shift QR sweep ==== */ + + zlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &w[ks], & + h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, & + work[1], &c__3, &h__[ku + h_dim1], ldh, &nve, &h__[ + kwv + h_dim1], ldh, &nho, &h__[ku + kwh * h_dim1], + ldh); + } + +/* ==== Note progress (or the lack of it). ==== */ + + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + +/* + ==== End of main loop ==== + L70: +*/ + } + +/* + ==== Iteration limit exceeded. Set INFO to show where + . the problem occurred and exit. ==== +*/ + + *info = kbot; +L80: + ; + } + +/* ==== Return the optimal value of LWORK. ==== */ + + d__1 = (doublereal) lwkopt; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + +/* ==== End of ZLAQR4 ==== */ + + return 0; +} /* zlaqr4_ */ + +/* Subroutine */ int zlaqr5_(logical *wantt, logical *wantz, integer *kacc22, + integer *n, integer *ktop, integer *kbot, integer *nshfts, + doublecomplex *s, doublecomplex *h__, integer *ldh, integer *iloz, + integer *ihiz, doublecomplex *z__, integer *ldz, doublecomplex *v, + integer *ldv, doublecomplex *u, integer *ldu, integer *nv, + doublecomplex *wv, integer *ldwv, integer *nh, doublecomplex *wh, + integer *ldwh) +{ + /* System generated locals */ + integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1, + wh_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3, + i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11; + doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8, d__9, d__10; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8; + + /* Local variables */ + static integer j, k, m, i2, j2, i4, j4, k1; + static doublereal h11, h12, h21, h22; + static integer m22, ns, nu; + static doublecomplex vt[3]; + static doublereal scl; + static integer kdu, kms; + static doublereal ulp; + static integer knz, kzs; + static doublereal tst1, tst2; + static doublecomplex beta; + static logical blk22, bmp22; + static integer mend, jcol, jlen, jbot, mbot, jtop, jrow, mtop; + static doublecomplex alpha; + static logical accum; + static integer ndcol, incol, krcol, nbmps; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer * + , doublecomplex *, integer *), + dlabad_(doublereal *, doublereal *), zlaqr1_(integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + doublecomplex *); + + static doublereal safmin, safmax; + extern /* Subroutine */ int zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *); + static doublecomplex refsum; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *); + static integer mstart; + static doublereal smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. + November 2006 + + + This auxiliary subroutine called by ZLAQR0 performs a + single small-bulge multi-shift QR sweep. + + WANTT (input) logical scalar + WANTT = .true. if the triangular Schur factor + is being computed. WANTT is set to .false. otherwise. + + WANTZ (input) logical scalar + WANTZ = .true. if the unitary Schur factor is being + computed. WANTZ is set to .false. otherwise. + + KACC22 (input) integer with value 0, 1, or 2. + Specifies the computation mode of far-from-diagonal + orthogonal updates. + = 0: ZLAQR5 does not accumulate reflections and does not + use matrix-matrix multiply to update far-from-diagonal + matrix entries. + = 1: ZLAQR5 accumulates reflections and uses matrix-matrix + multiply to update the far-from-diagonal matrix entries. + = 2: ZLAQR5 accumulates reflections, uses matrix-matrix + multiply to update the far-from-diagonal matrix entries, + and takes advantage of 2-by-2 block structure during + matrix multiplies. + + N (input) integer scalar + N is the order of the Hessenberg matrix H upon which this + subroutine operates. + + KTOP (input) integer scalar + KBOT (input) integer scalar + These are the first and last rows and columns of an + isolated diagonal block upon which the QR sweep is to be + applied. It is assumed without a check that + either KTOP = 1 or H(KTOP,KTOP-1) = 0 + and + either KBOT = N or H(KBOT+1,KBOT) = 0. + + NSHFTS (input) integer scalar + NSHFTS gives the number of simultaneous shifts. NSHFTS + must be positive and even. + + S (input/output) COMPLEX*16 array of size (NSHFTS) + S contains the shifts of origin that define the multi- + shift QR sweep. On output S may be reordered. + + H (input/output) COMPLEX*16 array of size (LDH,N) + On input H contains a Hessenberg matrix. On output a + multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied + to the isolated diagonal block in rows and columns KTOP + through KBOT. + + LDH (input) integer scalar + LDH is the leading dimension of H just as declared in the + calling procedure. LDH.GE.MAX(1,N). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N + + Z (input/output) COMPLEX*16 array of size (LDZ,IHI) + If WANTZ = .TRUE., then the QR Sweep unitary + similarity transformation is accumulated into + Z(ILOZ:IHIZ,ILO:IHI) from the right. + If WANTZ = .FALSE., then Z is unreferenced. + + LDZ (input) integer scalar + LDA is the leading dimension of Z just as declared in + the calling procedure. LDZ.GE.N. + + V (workspace) COMPLEX*16 array of size (LDV,NSHFTS/2) + + LDV (input) integer scalar + LDV is the leading dimension of V as declared in the + calling procedure. LDV.GE.3. + + U (workspace) COMPLEX*16 array of size + (LDU,3*NSHFTS-3) + + LDU (input) integer scalar + LDU is the leading dimension of U just as declared in the + in the calling subroutine. LDU.GE.3*NSHFTS-3. + + NH (input) integer scalar + NH is the number of columns in array WH available for + workspace. NH.GE.1. + + WH (workspace) COMPLEX*16 array of size (LDWH,NH) + + LDWH (input) integer scalar + Leading dimension of WH just as declared in the + calling procedure. LDWH.GE.3*NSHFTS-3. + + NV (input) integer scalar + NV is the number of rows in WV agailable for workspace. + NV.GE.1. + + WV (workspace) COMPLEX*16 array of size + (LDWV,3*NSHFTS-3) + + LDWV (input) integer scalar + LDWV is the leading dimension of WV as declared in the + in the calling subroutine. LDWV.GE.NV. + + ================================================================ + Based on contributions by + Karen Braman and Ralph Byers, Department of Mathematics, + University of Kansas, USA + + ================================================================ + Reference: + + K. Braman, R. Byers and R. Mathias, The Multi-Shift QR + Algorithm Part I: Maintaining Well Focused Shifts, and + Level 3 Performance, SIAM Journal of Matrix Analysis, + volume 23, pages 929--947, 2002. + + ================================================================ + + + ==== If there are no shifts, then there is nothing to do. ==== +*/ + + /* Parameter adjustments */ + --s; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + wh_dim1 = *ldwh; + wh_offset = 1 + wh_dim1; + wh -= wh_offset; + + /* Function Body */ + if (*nshfts < 2) { + return 0; + } + +/* + ==== If the active block is empty or 1-by-1, then there + . is nothing to do. ==== +*/ + + if (*ktop >= *kbot) { + return 0; + } + +/* + ==== NSHFTS is supposed to be even, but if it is odd, + . then simply reduce it by one. ==== +*/ + + ns = *nshfts - *nshfts % 2; + +/* ==== Machine constants for deflation ==== */ + + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = PRECISION; + smlnum = safmin * ((doublereal) (*n) / ulp); + +/* + ==== Use accumulated reflections to update far-from-diagonal + . entries ? ==== +*/ + + accum = *kacc22 == 1 || *kacc22 == 2; + +/* ==== If so, exploit the 2-by-2 block structure? ==== */ + + blk22 = ns > 2 && *kacc22 == 2; + +/* ==== clear trash ==== */ + + if (*ktop + 2 <= *kbot) { + i__1 = *ktop + 2 + *ktop * h_dim1; + h__[i__1].r = 0., h__[i__1].i = 0.; + } + +/* ==== NBMPS = number of 2-shift bulges in the chain ==== */ + + nbmps = ns / 2; + +/* ==== KDU = width of slab ==== */ + + kdu = nbmps * 6 - 3; + +/* ==== Create and chase chains of NBMPS bulges ==== */ + + i__1 = *kbot - 2; + i__2 = nbmps * 3 - 2; + for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 : + incol <= i__1; incol += i__2) { + ndcol = incol + kdu; + if (accum) { + zlaset_("ALL", &kdu, &kdu, &c_b56, &c_b57, &u[u_offset], ldu); + } + +/* + ==== Near-the-diagonal bulge chase. The following loop + . performs the near-the-diagonal part of a small bulge + . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal + . chunk extends from column INCOL to column NDCOL + . (including both column INCOL and column NDCOL). The + . following loop chases a 3*NBMPS column long chain of + . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL + . may be less than KTOP and and NDCOL may be greater than + . KBOT indicating phantom columns from which to chase + . bulges before they are actually introduced or to which + . to chase bulges beyond column KBOT.) ==== + + Computing MIN +*/ + i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2; + i__3 = min(i__4,i__5); + for (krcol = incol; krcol <= i__3; ++krcol) { + +/* + ==== Bulges number MTOP to MBOT are active double implicit + . shift bulges. There may or may not also be small + . 2-by-2 bulge, if there is room. The inactive bulges + . (if any) must wait until the active bulges have moved + . down the diagonal to make room. The phantom matrix + . paradigm described above helps keep track. ==== + + Computing MAX +*/ + i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1; + mtop = max(i__4,i__5); +/* Computing MIN */ + i__4 = nbmps, i__5 = (*kbot - krcol) / 3; + mbot = min(i__4,i__5); + m22 = mbot + 1; + bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2; + +/* + ==== Generate reflections to chase the chain right + . one column. (The minimum value of K is KTOP-1.) ==== +*/ + + i__4 = mbot; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + if (k == *ktop - 1) { + zlaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &s[(m << + 1) - 1], &s[m * 2], &v[m * v_dim1 + 1]); + i__5 = m * v_dim1 + 1; + alpha.r = v[i__5].r, alpha.i = v[i__5].i; + zlarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m * + v_dim1 + 1]); + } else { + i__5 = k + 1 + k * h_dim1; + beta.r = h__[i__5].r, beta.i = h__[i__5].i; + i__5 = m * v_dim1 + 2; + i__6 = k + 2 + k * h_dim1; + v[i__5].r = h__[i__6].r, v[i__5].i = h__[i__6].i; + i__5 = m * v_dim1 + 3; + i__6 = k + 3 + k * h_dim1; + v[i__5].r = h__[i__6].r, v[i__5].i = h__[i__6].i; + zlarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m * + v_dim1 + 1]); + +/* + ==== A Bulge may collapse because of vigilant + . deflation or destructive underflow. In the + . underflow case, try the two-small-subdiagonals + . trick to try to reinflate the bulge. ==== +*/ + + i__5 = k + 3 + k * h_dim1; + i__6 = k + 3 + (k + 1) * h_dim1; + i__7 = k + 3 + (k + 2) * h_dim1; + if (h__[i__5].r != 0. || h__[i__5].i != 0. || (h__[i__6] + .r != 0. || h__[i__6].i != 0.) || h__[i__7].r == + 0. && h__[i__7].i == 0.) { + +/* ==== Typical case: not collapsed (yet). ==== */ + + i__5 = k + 1 + k * h_dim1; + h__[i__5].r = beta.r, h__[i__5].i = beta.i; + i__5 = k + 2 + k * h_dim1; + h__[i__5].r = 0., h__[i__5].i = 0.; + i__5 = k + 3 + k * h_dim1; + h__[i__5].r = 0., h__[i__5].i = 0.; + } else { + +/* + ==== Atypical case: collapsed. Attempt to + . reintroduce ignoring H(K+1,K) and H(K+2,K). + . If the fill resulting from the new + . reflector is too large, then abandon it. + . Otherwise, use the new one. ==== +*/ + + zlaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, & + s[(m << 1) - 1], &s[m * 2], vt); + alpha.r = vt[0].r, alpha.i = vt[0].i; + zlarfg_(&c__3, &alpha, &vt[1], &c__1, vt); + d_cnjg(&z__2, vt); + i__5 = k + 1 + k * h_dim1; + d_cnjg(&z__5, &vt[1]); + i__6 = k + 2 + k * h_dim1; + z__4.r = z__5.r * h__[i__6].r - z__5.i * h__[i__6].i, + z__4.i = z__5.r * h__[i__6].i + z__5.i * h__[ + i__6].r; + z__3.r = h__[i__5].r + z__4.r, z__3.i = h__[i__5].i + + z__4.i; + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = + z__2.r * z__3.i + z__2.i * z__3.r; + refsum.r = z__1.r, refsum.i = z__1.i; + + i__5 = k + 2 + k * h_dim1; + z__3.r = refsum.r * vt[1].r - refsum.i * vt[1].i, + z__3.i = refsum.r * vt[1].i + refsum.i * vt[1] + .r; + z__2.r = h__[i__5].r - z__3.r, z__2.i = h__[i__5].i - + z__3.i; + z__1.r = z__2.r, z__1.i = z__2.i; + z__5.r = refsum.r * vt[2].r - refsum.i * vt[2].i, + z__5.i = refsum.r * vt[2].i + refsum.i * vt[2] + .r; + z__4.r = z__5.r, z__4.i = z__5.i; + i__6 = k + k * h_dim1; + i__7 = k + 1 + (k + 1) * h_dim1; + i__8 = k + 2 + (k + 2) * h_dim1; + if ((d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1) + , abs(d__2)) + ((d__3 = z__4.r, abs(d__3)) + ( + d__4 = d_imag(&z__4), abs(d__4))) > ulp * (( + d__5 = h__[i__6].r, abs(d__5)) + (d__6 = + d_imag(&h__[k + k * h_dim1]), abs(d__6)) + (( + d__7 = h__[i__7].r, abs(d__7)) + (d__8 = + d_imag(&h__[k + 1 + (k + 1) * h_dim1]), abs( + d__8))) + ((d__9 = h__[i__8].r, abs(d__9)) + ( + d__10 = d_imag(&h__[k + 2 + (k + 2) * h_dim1]) + , abs(d__10))))) { + +/* + ==== Starting a new bulge here would + . create non-negligible fill. Use + . the old one with trepidation. ==== +*/ + + i__5 = k + 1 + k * h_dim1; + h__[i__5].r = beta.r, h__[i__5].i = beta.i; + i__5 = k + 2 + k * h_dim1; + h__[i__5].r = 0., h__[i__5].i = 0.; + i__5 = k + 3 + k * h_dim1; + h__[i__5].r = 0., h__[i__5].i = 0.; + } else { + +/* + ==== Stating a new bulge here would + . create only negligible fill. + . Replace the old reflector with + . the new one. ==== +*/ + + i__5 = k + 1 + k * h_dim1; + i__6 = k + 1 + k * h_dim1; + z__1.r = h__[i__6].r - refsum.r, z__1.i = h__[ + i__6].i - refsum.i; + h__[i__5].r = z__1.r, h__[i__5].i = z__1.i; + i__5 = k + 2 + k * h_dim1; + h__[i__5].r = 0., h__[i__5].i = 0.; + i__5 = k + 3 + k * h_dim1; + h__[i__5].r = 0., h__[i__5].i = 0.; + i__5 = m * v_dim1 + 1; + v[i__5].r = vt[0].r, v[i__5].i = vt[0].i; + i__5 = m * v_dim1 + 2; + v[i__5].r = vt[1].r, v[i__5].i = vt[1].i; + i__5 = m * v_dim1 + 3; + v[i__5].r = vt[2].r, v[i__5].i = vt[2].i; + } + } + } +/* L10: */ + } + +/* ==== Generate a 2-by-2 reflection, if needed. ==== */ + + k = krcol + (m22 - 1) * 3; + if (bmp22) { + if (k == *ktop - 1) { + zlaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &s[( + m22 << 1) - 1], &s[m22 * 2], &v[m22 * v_dim1 + 1]) + ; + i__4 = m22 * v_dim1 + 1; + beta.r = v[i__4].r, beta.i = v[i__4].i; + zlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 + * v_dim1 + 1]); + } else { + i__4 = k + 1 + k * h_dim1; + beta.r = h__[i__4].r, beta.i = h__[i__4].i; + i__4 = m22 * v_dim1 + 2; + i__5 = k + 2 + k * h_dim1; + v[i__4].r = h__[i__5].r, v[i__4].i = h__[i__5].i; + zlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 + * v_dim1 + 1]); + i__4 = k + 1 + k * h_dim1; + h__[i__4].r = beta.r, h__[i__4].i = beta.i; + i__4 = k + 2 + k * h_dim1; + h__[i__4].r = 0., h__[i__4].i = 0.; + } + } + +/* ==== Multiply H by reflections from the left ==== */ + + if (accum) { + jbot = min(ndcol,*kbot); + } else if (*wantt) { + jbot = *n; + } else { + jbot = *kbot; + } + i__4 = jbot; + for (j = max(*ktop,krcol); j <= i__4; ++j) { +/* Computing MIN */ + i__5 = mbot, i__6 = (j - krcol + 2) / 3; + mend = min(i__5,i__6); + i__5 = mend; + for (m = mtop; m <= i__5; ++m) { + k = krcol + (m - 1) * 3; + d_cnjg(&z__2, &v[m * v_dim1 + 1]); + i__6 = k + 1 + j * h_dim1; + d_cnjg(&z__6, &v[m * v_dim1 + 2]); + i__7 = k + 2 + j * h_dim1; + z__5.r = z__6.r * h__[i__7].r - z__6.i * h__[i__7].i, + z__5.i = z__6.r * h__[i__7].i + z__6.i * h__[i__7] + .r; + z__4.r = h__[i__6].r + z__5.r, z__4.i = h__[i__6].i + + z__5.i; + d_cnjg(&z__8, &v[m * v_dim1 + 3]); + i__8 = k + 3 + j * h_dim1; + z__7.r = z__8.r * h__[i__8].r - z__8.i * h__[i__8].i, + z__7.i = z__8.r * h__[i__8].i + z__8.i * h__[i__8] + .r; + z__3.r = z__4.r + z__7.r, z__3.i = z__4.i + z__7.i; + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = + z__2.r * z__3.i + z__2.i * z__3.r; + refsum.r = z__1.r, refsum.i = z__1.i; + i__6 = k + 1 + j * h_dim1; + i__7 = k + 1 + j * h_dim1; + z__1.r = h__[i__7].r - refsum.r, z__1.i = h__[i__7].i - + refsum.i; + h__[i__6].r = z__1.r, h__[i__6].i = z__1.i; + i__6 = k + 2 + j * h_dim1; + i__7 = k + 2 + j * h_dim1; + i__8 = m * v_dim1 + 2; + z__2.r = refsum.r * v[i__8].r - refsum.i * v[i__8].i, + z__2.i = refsum.r * v[i__8].i + refsum.i * v[i__8] + .r; + z__1.r = h__[i__7].r - z__2.r, z__1.i = h__[i__7].i - + z__2.i; + h__[i__6].r = z__1.r, h__[i__6].i = z__1.i; + i__6 = k + 3 + j * h_dim1; + i__7 = k + 3 + j * h_dim1; + i__8 = m * v_dim1 + 3; + z__2.r = refsum.r * v[i__8].r - refsum.i * v[i__8].i, + z__2.i = refsum.r * v[i__8].i + refsum.i * v[i__8] + .r; + z__1.r = h__[i__7].r - z__2.r, z__1.i = h__[i__7].i - + z__2.i; + h__[i__6].r = z__1.r, h__[i__6].i = z__1.i; +/* L20: */ + } +/* L30: */ + } + if (bmp22) { + k = krcol + (m22 - 1) * 3; +/* Computing MAX */ + i__4 = k + 1; + i__5 = jbot; + for (j = max(i__4,*ktop); j <= i__5; ++j) { + d_cnjg(&z__2, &v[m22 * v_dim1 + 1]); + i__4 = k + 1 + j * h_dim1; + d_cnjg(&z__5, &v[m22 * v_dim1 + 2]); + i__6 = k + 2 + j * h_dim1; + z__4.r = z__5.r * h__[i__6].r - z__5.i * h__[i__6].i, + z__4.i = z__5.r * h__[i__6].i + z__5.i * h__[i__6] + .r; + z__3.r = h__[i__4].r + z__4.r, z__3.i = h__[i__4].i + + z__4.i; + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = + z__2.r * z__3.i + z__2.i * z__3.r; + refsum.r = z__1.r, refsum.i = z__1.i; + i__4 = k + 1 + j * h_dim1; + i__6 = k + 1 + j * h_dim1; + z__1.r = h__[i__6].r - refsum.r, z__1.i = h__[i__6].i - + refsum.i; + h__[i__4].r = z__1.r, h__[i__4].i = z__1.i; + i__4 = k + 2 + j * h_dim1; + i__6 = k + 2 + j * h_dim1; + i__7 = m22 * v_dim1 + 2; + z__2.r = refsum.r * v[i__7].r - refsum.i * v[i__7].i, + z__2.i = refsum.r * v[i__7].i + refsum.i * v[i__7] + .r; + z__1.r = h__[i__6].r - z__2.r, z__1.i = h__[i__6].i - + z__2.i; + h__[i__4].r = z__1.r, h__[i__4].i = z__1.i; +/* L40: */ + } + } + +/* + ==== Multiply H by reflections from the right. + . Delay filling in the last row until the + . vigilant deflation check is complete. ==== +*/ + + if (accum) { + jtop = max(*ktop,incol); + } else if (*wantt) { + jtop = 1; + } else { + jtop = *ktop; + } + i__5 = mbot; + for (m = mtop; m <= i__5; ++m) { + i__4 = m * v_dim1 + 1; + if (v[i__4].r != 0. || v[i__4].i != 0.) { + k = krcol + (m - 1) * 3; +/* Computing MIN */ + i__6 = *kbot, i__7 = k + 3; + i__4 = min(i__6,i__7); + for (j = jtop; j <= i__4; ++j) { + i__6 = m * v_dim1 + 1; + i__7 = j + (k + 1) * h_dim1; + i__8 = m * v_dim1 + 2; + i__9 = j + (k + 2) * h_dim1; + z__4.r = v[i__8].r * h__[i__9].r - v[i__8].i * h__[ + i__9].i, z__4.i = v[i__8].r * h__[i__9].i + v[ + i__8].i * h__[i__9].r; + z__3.r = h__[i__7].r + z__4.r, z__3.i = h__[i__7].i + + z__4.i; + i__10 = m * v_dim1 + 3; + i__11 = j + (k + 3) * h_dim1; + z__5.r = v[i__10].r * h__[i__11].r - v[i__10].i * h__[ + i__11].i, z__5.i = v[i__10].r * h__[i__11].i + + v[i__10].i * h__[i__11].r; + z__2.r = z__3.r + z__5.r, z__2.i = z__3.i + z__5.i; + z__1.r = v[i__6].r * z__2.r - v[i__6].i * z__2.i, + z__1.i = v[i__6].r * z__2.i + v[i__6].i * + z__2.r; + refsum.r = z__1.r, refsum.i = z__1.i; + i__6 = j + (k + 1) * h_dim1; + i__7 = j + (k + 1) * h_dim1; + z__1.r = h__[i__7].r - refsum.r, z__1.i = h__[i__7].i + - refsum.i; + h__[i__6].r = z__1.r, h__[i__6].i = z__1.i; + i__6 = j + (k + 2) * h_dim1; + i__7 = j + (k + 2) * h_dim1; + d_cnjg(&z__3, &v[m * v_dim1 + 2]); + z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, + z__2.i = refsum.r * z__3.i + refsum.i * + z__3.r; + z__1.r = h__[i__7].r - z__2.r, z__1.i = h__[i__7].i - + z__2.i; + h__[i__6].r = z__1.r, h__[i__6].i = z__1.i; + i__6 = j + (k + 3) * h_dim1; + i__7 = j + (k + 3) * h_dim1; + d_cnjg(&z__3, &v[m * v_dim1 + 3]); + z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, + z__2.i = refsum.r * z__3.i + refsum.i * + z__3.r; + z__1.r = h__[i__7].r - z__2.r, z__1.i = h__[i__7].i - + z__2.i; + h__[i__6].r = z__1.r, h__[i__6].i = z__1.i; +/* L50: */ + } + + if (accum) { + +/* + ==== Accumulate U. (If necessary, update Z later + . with with an efficient matrix-matrix + . multiply.) ==== +*/ + + kms = k - incol; +/* Computing MAX */ + i__4 = 1, i__6 = *ktop - incol; + i__7 = kdu; + for (j = max(i__4,i__6); j <= i__7; ++j) { + i__4 = m * v_dim1 + 1; + i__6 = j + (kms + 1) * u_dim1; + i__8 = m * v_dim1 + 2; + i__9 = j + (kms + 2) * u_dim1; + z__4.r = v[i__8].r * u[i__9].r - v[i__8].i * u[ + i__9].i, z__4.i = v[i__8].r * u[i__9].i + + v[i__8].i * u[i__9].r; + z__3.r = u[i__6].r + z__4.r, z__3.i = u[i__6].i + + z__4.i; + i__10 = m * v_dim1 + 3; + i__11 = j + (kms + 3) * u_dim1; + z__5.r = v[i__10].r * u[i__11].r - v[i__10].i * u[ + i__11].i, z__5.i = v[i__10].r * u[i__11] + .i + v[i__10].i * u[i__11].r; + z__2.r = z__3.r + z__5.r, z__2.i = z__3.i + + z__5.i; + z__1.r = v[i__4].r * z__2.r - v[i__4].i * z__2.i, + z__1.i = v[i__4].r * z__2.i + v[i__4].i * + z__2.r; + refsum.r = z__1.r, refsum.i = z__1.i; + i__4 = j + (kms + 1) * u_dim1; + i__6 = j + (kms + 1) * u_dim1; + z__1.r = u[i__6].r - refsum.r, z__1.i = u[i__6].i + - refsum.i; + u[i__4].r = z__1.r, u[i__4].i = z__1.i; + i__4 = j + (kms + 2) * u_dim1; + i__6 = j + (kms + 2) * u_dim1; + d_cnjg(&z__3, &v[m * v_dim1 + 2]); + z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, + z__2.i = refsum.r * z__3.i + refsum.i * + z__3.r; + z__1.r = u[i__6].r - z__2.r, z__1.i = u[i__6].i - + z__2.i; + u[i__4].r = z__1.r, u[i__4].i = z__1.i; + i__4 = j + (kms + 3) * u_dim1; + i__6 = j + (kms + 3) * u_dim1; + d_cnjg(&z__3, &v[m * v_dim1 + 3]); + z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, + z__2.i = refsum.r * z__3.i + refsum.i * + z__3.r; + z__1.r = u[i__6].r - z__2.r, z__1.i = u[i__6].i - + z__2.i; + u[i__4].r = z__1.r, u[i__4].i = z__1.i; +/* L60: */ + } + } else if (*wantz) { + +/* + ==== U is not accumulated, so update Z + . now by multiplying by reflections + . from the right. ==== +*/ + + i__7 = *ihiz; + for (j = *iloz; j <= i__7; ++j) { + i__4 = m * v_dim1 + 1; + i__6 = j + (k + 1) * z_dim1; + i__8 = m * v_dim1 + 2; + i__9 = j + (k + 2) * z_dim1; + z__4.r = v[i__8].r * z__[i__9].r - v[i__8].i * + z__[i__9].i, z__4.i = v[i__8].r * z__[ + i__9].i + v[i__8].i * z__[i__9].r; + z__3.r = z__[i__6].r + z__4.r, z__3.i = z__[i__6] + .i + z__4.i; + i__10 = m * v_dim1 + 3; + i__11 = j + (k + 3) * z_dim1; + z__5.r = v[i__10].r * z__[i__11].r - v[i__10].i * + z__[i__11].i, z__5.i = v[i__10].r * z__[ + i__11].i + v[i__10].i * z__[i__11].r; + z__2.r = z__3.r + z__5.r, z__2.i = z__3.i + + z__5.i; + z__1.r = v[i__4].r * z__2.r - v[i__4].i * z__2.i, + z__1.i = v[i__4].r * z__2.i + v[i__4].i * + z__2.r; + refsum.r = z__1.r, refsum.i = z__1.i; + i__4 = j + (k + 1) * z_dim1; + i__6 = j + (k + 1) * z_dim1; + z__1.r = z__[i__6].r - refsum.r, z__1.i = z__[ + i__6].i - refsum.i; + z__[i__4].r = z__1.r, z__[i__4].i = z__1.i; + i__4 = j + (k + 2) * z_dim1; + i__6 = j + (k + 2) * z_dim1; + d_cnjg(&z__3, &v[m * v_dim1 + 2]); + z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, + z__2.i = refsum.r * z__3.i + refsum.i * + z__3.r; + z__1.r = z__[i__6].r - z__2.r, z__1.i = z__[i__6] + .i - z__2.i; + z__[i__4].r = z__1.r, z__[i__4].i = z__1.i; + i__4 = j + (k + 3) * z_dim1; + i__6 = j + (k + 3) * z_dim1; + d_cnjg(&z__3, &v[m * v_dim1 + 3]); + z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, + z__2.i = refsum.r * z__3.i + refsum.i * + z__3.r; + z__1.r = z__[i__6].r - z__2.r, z__1.i = z__[i__6] + .i - z__2.i; + z__[i__4].r = z__1.r, z__[i__4].i = z__1.i; +/* L70: */ + } + } + } +/* L80: */ + } + +/* ==== Special case: 2-by-2 reflection (if needed) ==== */ + + k = krcol + (m22 - 1) * 3; + i__5 = m22 * v_dim1 + 1; + if (bmp22 && (v[i__5].r != 0. || v[i__5].i != 0.)) { +/* Computing MIN */ + i__7 = *kbot, i__4 = k + 3; + i__5 = min(i__7,i__4); + for (j = jtop; j <= i__5; ++j) { + i__7 = m22 * v_dim1 + 1; + i__4 = j + (k + 1) * h_dim1; + i__6 = m22 * v_dim1 + 2; + i__8 = j + (k + 2) * h_dim1; + z__3.r = v[i__6].r * h__[i__8].r - v[i__6].i * h__[i__8] + .i, z__3.i = v[i__6].r * h__[i__8].i + v[i__6].i * + h__[i__8].r; + z__2.r = h__[i__4].r + z__3.r, z__2.i = h__[i__4].i + + z__3.i; + z__1.r = v[i__7].r * z__2.r - v[i__7].i * z__2.i, z__1.i = + v[i__7].r * z__2.i + v[i__7].i * z__2.r; + refsum.r = z__1.r, refsum.i = z__1.i; + i__7 = j + (k + 1) * h_dim1; + i__4 = j + (k + 1) * h_dim1; + z__1.r = h__[i__4].r - refsum.r, z__1.i = h__[i__4].i - + refsum.i; + h__[i__7].r = z__1.r, h__[i__7].i = z__1.i; + i__7 = j + (k + 2) * h_dim1; + i__4 = j + (k + 2) * h_dim1; + d_cnjg(&z__3, &v[m22 * v_dim1 + 2]); + z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, z__2.i = + refsum.r * z__3.i + refsum.i * z__3.r; + z__1.r = h__[i__4].r - z__2.r, z__1.i = h__[i__4].i - + z__2.i; + h__[i__7].r = z__1.r, h__[i__7].i = z__1.i; +/* L90: */ + } + + if (accum) { + kms = k - incol; +/* Computing MAX */ + i__5 = 1, i__7 = *ktop - incol; + i__4 = kdu; + for (j = max(i__5,i__7); j <= i__4; ++j) { + i__5 = m22 * v_dim1 + 1; + i__7 = j + (kms + 1) * u_dim1; + i__6 = m22 * v_dim1 + 2; + i__8 = j + (kms + 2) * u_dim1; + z__3.r = v[i__6].r * u[i__8].r - v[i__6].i * u[i__8] + .i, z__3.i = v[i__6].r * u[i__8].i + v[i__6] + .i * u[i__8].r; + z__2.r = u[i__7].r + z__3.r, z__2.i = u[i__7].i + + z__3.i; + z__1.r = v[i__5].r * z__2.r - v[i__5].i * z__2.i, + z__1.i = v[i__5].r * z__2.i + v[i__5].i * + z__2.r; + refsum.r = z__1.r, refsum.i = z__1.i; + i__5 = j + (kms + 1) * u_dim1; + i__7 = j + (kms + 1) * u_dim1; + z__1.r = u[i__7].r - refsum.r, z__1.i = u[i__7].i - + refsum.i; + u[i__5].r = z__1.r, u[i__5].i = z__1.i; + i__5 = j + (kms + 2) * u_dim1; + i__7 = j + (kms + 2) * u_dim1; + d_cnjg(&z__3, &v[m22 * v_dim1 + 2]); + z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, + z__2.i = refsum.r * z__3.i + refsum.i * + z__3.r; + z__1.r = u[i__7].r - z__2.r, z__1.i = u[i__7].i - + z__2.i; + u[i__5].r = z__1.r, u[i__5].i = z__1.i; +/* L100: */ + } + } else if (*wantz) { + i__4 = *ihiz; + for (j = *iloz; j <= i__4; ++j) { + i__5 = m22 * v_dim1 + 1; + i__7 = j + (k + 1) * z_dim1; + i__6 = m22 * v_dim1 + 2; + i__8 = j + (k + 2) * z_dim1; + z__3.r = v[i__6].r * z__[i__8].r - v[i__6].i * z__[ + i__8].i, z__3.i = v[i__6].r * z__[i__8].i + v[ + i__6].i * z__[i__8].r; + z__2.r = z__[i__7].r + z__3.r, z__2.i = z__[i__7].i + + z__3.i; + z__1.r = v[i__5].r * z__2.r - v[i__5].i * z__2.i, + z__1.i = v[i__5].r * z__2.i + v[i__5].i * + z__2.r; + refsum.r = z__1.r, refsum.i = z__1.i; + i__5 = j + (k + 1) * z_dim1; + i__7 = j + (k + 1) * z_dim1; + z__1.r = z__[i__7].r - refsum.r, z__1.i = z__[i__7].i + - refsum.i; + z__[i__5].r = z__1.r, z__[i__5].i = z__1.i; + i__5 = j + (k + 2) * z_dim1; + i__7 = j + (k + 2) * z_dim1; + d_cnjg(&z__3, &v[m22 * v_dim1 + 2]); + z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, + z__2.i = refsum.r * z__3.i + refsum.i * + z__3.r; + z__1.r = z__[i__7].r - z__2.r, z__1.i = z__[i__7].i - + z__2.i; + z__[i__5].r = z__1.r, z__[i__5].i = z__1.i; +/* L110: */ + } + } + } + +/* ==== Vigilant deflation check ==== */ + + mstart = mtop; + if (krcol + (mstart - 1) * 3 < *ktop) { + ++mstart; + } + mend = mbot; + if (bmp22) { + ++mend; + } + if (krcol == *kbot - 2) { + ++mend; + } + i__4 = mend; + for (m = mstart; m <= i__4; ++m) { +/* Computing MIN */ + i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3; + k = min(i__5,i__7); + +/* + ==== The following convergence test requires that + . the tradition small-compared-to-nearby-diagonals + . criterion and the Ahues & Tisseur (LAWN 122, 1997) + . criteria both be satisfied. The latter improves + . accuracy in some examples. Falling back on an + . alternate convergence criterion when TST1 or TST2 + . is zero (as done here) is traditional but probably + . unnecessary. ==== +*/ + + i__5 = k + 1 + k * h_dim1; + if (h__[i__5].r != 0. || h__[i__5].i != 0.) { + i__5 = k + k * h_dim1; + i__7 = k + 1 + (k + 1) * h_dim1; + tst1 = (d__1 = h__[i__5].r, abs(d__1)) + (d__2 = d_imag(& + h__[k + k * h_dim1]), abs(d__2)) + ((d__3 = h__[ + i__7].r, abs(d__3)) + (d__4 = d_imag(&h__[k + 1 + + (k + 1) * h_dim1]), abs(d__4))); + if (tst1 == 0.) { + if (k >= *ktop + 1) { + i__5 = k + (k - 1) * h_dim1; + tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 = + d_imag(&h__[k + (k - 1) * h_dim1]), abs( + d__2)); + } + if (k >= *ktop + 2) { + i__5 = k + (k - 2) * h_dim1; + tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 = + d_imag(&h__[k + (k - 2) * h_dim1]), abs( + d__2)); + } + if (k >= *ktop + 3) { + i__5 = k + (k - 3) * h_dim1; + tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 = + d_imag(&h__[k + (k - 3) * h_dim1]), abs( + d__2)); + } + if (k <= *kbot - 2) { + i__5 = k + 2 + (k + 1) * h_dim1; + tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 = + d_imag(&h__[k + 2 + (k + 1) * h_dim1]), + abs(d__2)); + } + if (k <= *kbot - 3) { + i__5 = k + 3 + (k + 1) * h_dim1; + tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 = + d_imag(&h__[k + 3 + (k + 1) * h_dim1]), + abs(d__2)); + } + if (k <= *kbot - 4) { + i__5 = k + 4 + (k + 1) * h_dim1; + tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 = + d_imag(&h__[k + 4 + (k + 1) * h_dim1]), + abs(d__2)); + } + } + i__5 = k + 1 + k * h_dim1; +/* Computing MAX */ + d__3 = smlnum, d__4 = ulp * tst1; + if ((d__1 = h__[i__5].r, abs(d__1)) + (d__2 = d_imag(&h__[ + k + 1 + k * h_dim1]), abs(d__2)) <= max(d__3,d__4) + ) { +/* Computing MAX */ + i__5 = k + 1 + k * h_dim1; + i__7 = k + (k + 1) * h_dim1; + d__5 = (d__1 = h__[i__5].r, abs(d__1)) + (d__2 = + d_imag(&h__[k + 1 + k * h_dim1]), abs(d__2)), + d__6 = (d__3 = h__[i__7].r, abs(d__3)) + ( + d__4 = d_imag(&h__[k + (k + 1) * h_dim1]), + abs(d__4)); + h12 = max(d__5,d__6); +/* Computing MIN */ + i__5 = k + 1 + k * h_dim1; + i__7 = k + (k + 1) * h_dim1; + d__5 = (d__1 = h__[i__5].r, abs(d__1)) + (d__2 = + d_imag(&h__[k + 1 + k * h_dim1]), abs(d__2)), + d__6 = (d__3 = h__[i__7].r, abs(d__3)) + ( + d__4 = d_imag(&h__[k + (k + 1) * h_dim1]), + abs(d__4)); + h21 = min(d__5,d__6); + i__5 = k + k * h_dim1; + i__7 = k + 1 + (k + 1) * h_dim1; + z__2.r = h__[i__5].r - h__[i__7].r, z__2.i = h__[i__5] + .i - h__[i__7].i; + z__1.r = z__2.r, z__1.i = z__2.i; +/* Computing MAX */ + i__6 = k + 1 + (k + 1) * h_dim1; + d__5 = (d__1 = h__[i__6].r, abs(d__1)) + (d__2 = + d_imag(&h__[k + 1 + (k + 1) * h_dim1]), abs( + d__2)), d__6 = (d__3 = z__1.r, abs(d__3)) + ( + d__4 = d_imag(&z__1), abs(d__4)); + h11 = max(d__5,d__6); + i__5 = k + k * h_dim1; + i__7 = k + 1 + (k + 1) * h_dim1; + z__2.r = h__[i__5].r - h__[i__7].r, z__2.i = h__[i__5] + .i - h__[i__7].i; + z__1.r = z__2.r, z__1.i = z__2.i; +/* Computing MIN */ + i__6 = k + 1 + (k + 1) * h_dim1; + d__5 = (d__1 = h__[i__6].r, abs(d__1)) + (d__2 = + d_imag(&h__[k + 1 + (k + 1) * h_dim1]), abs( + d__2)), d__6 = (d__3 = z__1.r, abs(d__3)) + ( + d__4 = d_imag(&z__1), abs(d__4)); + h22 = min(d__5,d__6); + scl = h11 + h12; + tst2 = h22 * (h11 / scl); + +/* Computing MAX */ + d__1 = smlnum, d__2 = ulp * tst2; + if (tst2 == 0. || h21 * (h12 / scl) <= max(d__1,d__2)) + { + i__5 = k + 1 + k * h_dim1; + h__[i__5].r = 0., h__[i__5].i = 0.; + } + } + } +/* L120: */ + } + +/* + ==== Fill in the last row of each bulge. ==== + + Computing MIN +*/ + i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3; + mend = min(i__4,i__5); + i__4 = mend; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + i__5 = m * v_dim1 + 1; + i__7 = m * v_dim1 + 3; + z__2.r = v[i__5].r * v[i__7].r - v[i__5].i * v[i__7].i, + z__2.i = v[i__5].r * v[i__7].i + v[i__5].i * v[i__7] + .r; + i__6 = k + 4 + (k + 3) * h_dim1; + z__1.r = z__2.r * h__[i__6].r - z__2.i * h__[i__6].i, z__1.i = + z__2.r * h__[i__6].i + z__2.i * h__[i__6].r; + refsum.r = z__1.r, refsum.i = z__1.i; + i__5 = k + 4 + (k + 1) * h_dim1; + z__1.r = -refsum.r, z__1.i = -refsum.i; + h__[i__5].r = z__1.r, h__[i__5].i = z__1.i; + i__5 = k + 4 + (k + 2) * h_dim1; + z__2.r = -refsum.r, z__2.i = -refsum.i; + d_cnjg(&z__3, &v[m * v_dim1 + 2]); + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = z__2.r * + z__3.i + z__2.i * z__3.r; + h__[i__5].r = z__1.r, h__[i__5].i = z__1.i; + i__5 = k + 4 + (k + 3) * h_dim1; + i__7 = k + 4 + (k + 3) * h_dim1; + d_cnjg(&z__3, &v[m * v_dim1 + 3]); + z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, z__2.i = + refsum.r * z__3.i + refsum.i * z__3.r; + z__1.r = h__[i__7].r - z__2.r, z__1.i = h__[i__7].i - z__2.i; + h__[i__5].r = z__1.r, h__[i__5].i = z__1.i; +/* L130: */ + } + +/* + ==== End of near-the-diagonal bulge chase. ==== + + L140: +*/ + } + +/* + ==== Use U (if accumulated) to update far-from-diagonal + . entries in H. If required, use U to update Z as + . well. ==== +*/ + + if (accum) { + if (*wantt) { + jtop = 1; + jbot = *n; + } else { + jtop = *ktop; + jbot = *kbot; + } + if (! blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) { + +/* + ==== Updates not exploiting the 2-by-2 block + . structure of U. K1 and NU keep track of + . the location and size of U in the special + . cases of introducing bulges and chasing + . bulges off the bottom. In these special + . cases and in case the number of shifts + . is NS = 2, there is no 2-by-2 block + . structure to exploit. ==== + + Computing MAX +*/ + i__3 = 1, i__4 = *ktop - incol; + k1 = max(i__3,i__4); +/* Computing MAX */ + i__3 = 0, i__4 = ndcol - *kbot; + nu = kdu - max(i__3,i__4) - k1 + 1; + +/* ==== Horizontal Multiply ==== */ + + i__3 = jbot; + i__4 = *nh; + for (jcol = min(ndcol,*kbot) + 1; i__4 < 0 ? jcol >= i__3 : + jcol <= i__3; jcol += i__4) { +/* Computing MIN */ + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5,i__7); + zgemm_("C", "N", &nu, &jlen, &nu, &c_b57, &u[k1 + k1 * + u_dim1], ldu, &h__[incol + k1 + jcol * h_dim1], + ldh, &c_b56, &wh[wh_offset], ldwh); + zlacpy_("ALL", &nu, &jlen, &wh[wh_offset], ldwh, &h__[ + incol + k1 + jcol * h_dim1], ldh); +/* L150: */ + } + +/* ==== Vertical multiply ==== */ + + i__4 = max(*ktop,incol) - 1; + i__3 = *nv; + for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; + jrow += i__3) { +/* Computing MIN */ + i__5 = *nv, i__7 = max(*ktop,incol) - jrow; + jlen = min(i__5,i__7); + zgemm_("N", "N", &jlen, &nu, &nu, &c_b57, &h__[jrow + ( + incol + k1) * h_dim1], ldh, &u[k1 + k1 * u_dim1], + ldu, &c_b56, &wv[wv_offset], ldwv); + zlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &h__[ + jrow + (incol + k1) * h_dim1], ldh); +/* L160: */ + } + +/* ==== Z multiply (also vertical) ==== */ + + if (*wantz) { + i__3 = *ihiz; + i__4 = *nv; + for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; + jrow += i__4) { +/* Computing MIN */ + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5,i__7); + zgemm_("N", "N", &jlen, &nu, &nu, &c_b57, &z__[jrow + + (incol + k1) * z_dim1], ldz, &u[k1 + k1 * + u_dim1], ldu, &c_b56, &wv[wv_offset], ldwv); + zlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &z__[ + jrow + (incol + k1) * z_dim1], ldz) + ; +/* L170: */ + } + } + } else { + +/* + ==== Updates exploiting U's 2-by-2 block structure. + . (I2, I4, J2, J4 are the last rows and columns + . of the blocks.) ==== +*/ + + i2 = (kdu + 1) / 2; + i4 = kdu; + j2 = i4 - i2; + j4 = kdu; + +/* + ==== KZS and KNZ deal with the band of zeros + . along the diagonal of one of the triangular + . blocks. ==== +*/ + + kzs = j4 - j2 - (ns + 1); + knz = ns + 1; + +/* ==== Horizontal multiply ==== */ + + i__4 = jbot; + i__3 = *nh; + for (jcol = min(ndcol,*kbot) + 1; i__3 < 0 ? jcol >= i__4 : + jcol <= i__4; jcol += i__3) { +/* Computing MIN */ + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5,i__7); + +/* + ==== Copy bottom of H to top+KZS of scratch ==== + (The first KZS rows get multiplied by zero.) ==== +*/ + + zlacpy_("ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol * + h_dim1], ldh, &wh[kzs + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U21' ==== */ + + zlaset_("ALL", &kzs, &jlen, &c_b56, &c_b56, &wh[wh_offset] + , ldwh); + ztrmm_("L", "U", "C", "N", &knz, &jlen, &c_b57, &u[j2 + 1 + + (kzs + 1) * u_dim1], ldu, &wh[kzs + 1 + wh_dim1] + , ldwh); + +/* ==== Multiply top of H by U11' ==== */ + + zgemm_("C", "N", &i2, &jlen, &j2, &c_b57, &u[u_offset], + ldu, &h__[incol + 1 + jcol * h_dim1], ldh, &c_b57, + &wh[wh_offset], ldwh); + +/* ==== Copy top of H to bottom of WH ==== */ + + zlacpy_("ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1] + , ldh, &wh[i2 + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U21' ==== */ + + ztrmm_("L", "L", "C", "N", &j2, &jlen, &c_b57, &u[(i2 + 1) + * u_dim1 + 1], ldu, &wh[i2 + 1 + wh_dim1], ldwh); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + zgemm_("C", "N", &i__5, &jlen, &i__7, &c_b57, &u[j2 + 1 + + (i2 + 1) * u_dim1], ldu, &h__[incol + 1 + j2 + + jcol * h_dim1], ldh, &c_b57, &wh[i2 + 1 + wh_dim1] + , ldwh); + +/* ==== Copy it back ==== */ + + zlacpy_("ALL", &kdu, &jlen, &wh[wh_offset], ldwh, &h__[ + incol + 1 + jcol * h_dim1], ldh); +/* L180: */ + } + +/* ==== Vertical multiply ==== */ + + i__3 = max(incol,*ktop) - 1; + i__4 = *nv; + for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; + jrow += i__4) { +/* Computing MIN */ + i__5 = *nv, i__7 = max(incol,*ktop) - jrow; + jlen = min(i__5,i__7); + +/* + ==== Copy right of H to scratch (the first KZS + . columns get multiplied by zero) ==== +*/ + + zlacpy_("ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) * + h_dim1], ldh, &wv[(kzs + 1) * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U21 ==== */ + + zlaset_("ALL", &jlen, &kzs, &c_b56, &c_b56, &wv[wv_offset] + , ldwv); + ztrmm_("R", "U", "N", "N", &jlen, &knz, &c_b57, &u[j2 + 1 + + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Multiply by U11 ==== */ + + zgemm_("N", "N", &jlen, &i2, &j2, &c_b57, &h__[jrow + ( + incol + 1) * h_dim1], ldh, &u[u_offset], ldu, & + c_b57, &wv[wv_offset], ldwv) + ; + +/* ==== Copy left of H to right of scratch ==== */ + + zlacpy_("ALL", &jlen, &j2, &h__[jrow + (incol + 1) * + h_dim1], ldh, &wv[(i2 + 1) * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U21 ==== */ + + i__5 = i4 - i2; + ztrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b57, &u[(i2 + + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1] + , ldwv); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + zgemm_("N", "N", &jlen, &i__5, &i__7, &c_b57, &h__[jrow + + (incol + 1 + j2) * h_dim1], ldh, &u[j2 + 1 + (i2 + + 1) * u_dim1], ldu, &c_b57, &wv[(i2 + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Copy it back ==== */ + + zlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &h__[ + jrow + (incol + 1) * h_dim1], ldh); +/* L190: */ + } + +/* ==== Multiply Z (also vertical) ==== */ + + if (*wantz) { + i__4 = *ihiz; + i__3 = *nv; + for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; + jrow += i__3) { +/* Computing MIN */ + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5,i__7); + +/* + ==== Copy right of Z to left of scratch (first + . KZS columns get multiplied by zero) ==== +*/ + + zlacpy_("ALL", &jlen, &knz, &z__[jrow + (incol + 1 + + j2) * z_dim1], ldz, &wv[(kzs + 1) * wv_dim1 + + 1], ldwv); + +/* ==== Multiply by U12 ==== */ + + zlaset_("ALL", &jlen, &kzs, &c_b56, &c_b56, &wv[ + wv_offset], ldwv); + ztrmm_("R", "U", "N", "N", &jlen, &knz, &c_b57, &u[j2 + + 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) + * wv_dim1 + 1], ldwv); + +/* ==== Multiply by U11 ==== */ + + zgemm_("N", "N", &jlen, &i2, &j2, &c_b57, &z__[jrow + + (incol + 1) * z_dim1], ldz, &u[u_offset], ldu, + &c_b57, &wv[wv_offset], ldwv); + +/* ==== Copy left of Z to right of scratch ==== */ + + zlacpy_("ALL", &jlen, &j2, &z__[jrow + (incol + 1) * + z_dim1], ldz, &wv[(i2 + 1) * wv_dim1 + 1], + ldwv); + +/* ==== Multiply by U21 ==== */ + + i__5 = i4 - i2; + ztrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b57, &u[( + i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * + wv_dim1 + 1], ldwv); + +/* ==== Multiply by U22 ==== */ + + i__5 = i4 - i2; + i__7 = j4 - j2; + zgemm_("N", "N", &jlen, &i__5, &i__7, &c_b57, &z__[ + jrow + (incol + 1 + j2) * z_dim1], ldz, &u[j2 + + 1 + (i2 + 1) * u_dim1], ldu, &c_b57, &wv[( + i2 + 1) * wv_dim1 + 1], ldwv); + +/* ==== Copy the result back to Z ==== */ + + zlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, & + z__[jrow + (incol + 1) * z_dim1], ldz); +/* L200: */ + } + } + } + } +/* L210: */ + } + +/* ==== End of ZLAQR5 ==== */ + + return 0; +} /* zlaqr5_ */ + +/* Subroutine */ int zlarcm_(integer *m, integer *n, doublereal *a, integer * + lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, + doublereal *rwork) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLARCM performs a very simple matrix-matrix multiplication: + C := A * B, + where A is M by M and real; B is M by N and complex; + C is M by N and complex. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A and of the matrix C. + M >= 0. + + N (input) INTEGER + The number of columns and rows of the matrix B and + the number of columns of the matrix C. + N >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA, M) + A contains the M by M matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >=max(1,M). + + B (input) DOUBLE PRECISION array, dimension (LDB, N) + B contains the M by N matrix B. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >=max(1,M). + + C (input) COMPLEX*16 array, dimension (LDC, N) + C contains the M by N matrix C. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >=max(1,M). + + RWORK (workspace) DOUBLE PRECISION array, dimension (2*M*N) + + ===================================================================== + + + Quick return if possible. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --rwork; + + /* Function Body */ + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + rwork[(j - 1) * *m + i__] = b[i__3].r; +/* L10: */ + } +/* L20: */ + } + + l = *m * *n + 1; + dgemm_("N", "N", m, n, m, &c_b1034, &a[a_offset], lda, &rwork[1], m, & + c_b328, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = l + (j - 1) * *m + i__ - 1; + c__[i__3].r = rwork[i__4], c__[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + rwork[(j - 1) * *m + i__] = d_imag(&b[i__ + j * b_dim1]); +/* L50: */ + } +/* L60: */ + } + dgemm_("N", "N", m, n, m, &c_b1034, &a[a_offset], lda, &rwork[1], m, & + c_b328, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + d__1 = c__[i__4].r; + i__5 = l + (j - 1) * *m + i__ - 1; + z__1.r = d__1, z__1.i = rwork[i__5]; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + + return 0; + +/* End of ZLARCM */ + +} /* zlarcm_ */ + +/* Subroutine */ int zlarf_(char *side, integer *m, integer *n, doublecomplex + *v, integer *incv, doublecomplex *tau, doublecomplex *c__, integer * + ldc, doublecomplex *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + doublecomplex z__1; + + /* Local variables */ + static integer i__; + static logical applyleft; + extern logical lsame_(char *, char *); + static integer lastc; + extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + static integer lastv; + extern integer ilazlc_(integer *, integer *, doublecomplex *, integer *), + ilazlr_(integer *, integer *, doublecomplex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLARF applies a complex elementary reflector H to a complex M-by-N + matrix C, from either the left or the right. H is represented in the + form + + H = I - tau * v * v' + + where tau is a complex scalar and v is a complex vector. + + If tau = 0, then H is taken to be the unit matrix. + + To apply H' (the conjugate transpose of H), supply conjg(tau) instead + tau. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) COMPLEX*16 array, dimension + (1 + (M-1)*abs(INCV)) if SIDE = 'L' + or (1 + (N-1)*abs(INCV)) if SIDE = 'R' + The vector v in the representation of H. V is not used if + TAU = 0. + + INCV (input) INTEGER + The increment between elements of v. INCV <> 0. + + TAU (input) COMPLEX*16 + The value tau in the representation of H. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX*16 array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + lastv = 0; + lastc = 0; + if (tau->r != 0. || tau->i != 0.) { +/* + Set up variables for scanning V. LASTV begins pointing to the end + of V. +*/ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ + for(;;) { /* while(complicated condition) */ + i__1 = i__; + if (!(lastv > 0 && (v[i__1].r == 0. && v[i__1].i == 0.))) + break; + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc); + } + } +/* + Note that lastc.eq.0 renders the BLAS operations null; no special + case is needed at this level. +*/ + if (applyleft) { + +/* Form H * C */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */ + + zgemv_("Conjugate transpose", &lastv, &lastc, &c_b57, &c__[ + c_offset], ldc, &v[1], incv, &c_b56, &work[1], &c__1); + +/* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */ + + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(&lastv, &lastc, &z__1, &v[1], incv, &work[1], &c__1, &c__[ + c_offset], ldc); + } + } else { + +/* Form C * H */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ + + zgemv_("No transpose", &lastc, &lastv, &c_b57, &c__[c_offset], + ldc, &v[1], incv, &c_b56, &work[1], &c__1); + +/* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */ + + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(&lastc, &lastv, &z__1, &work[1], &c__1, &v[1], incv, &c__[ + c_offset], ldc); + } + } + return 0; + +/* End of ZLARF */ + +} /* zlarf_ */ + +/* Subroutine */ int zlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublecomplex *v, integer + *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, integer * + ldc, doublecomplex *work, integer *ldwork) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + static integer lastc; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer lastv; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), ztrmm_(char *, char *, char *, char * + , integer *, integer *, doublecomplex *, doublecomplex *, integer + *, doublecomplex *, integer *); + extern integer ilazlc_(integer *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zlacgv_(integer *, doublecomplex *, integer *) + ; + extern integer ilazlr_(integer *, integer *, doublecomplex *, integer *); + static char transt[1]; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLARFB applies a complex block reflector H or its transpose H' to a + complex M-by-N matrix C, from either the left or the right. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply H or H' from the Left + = 'R': apply H or H' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply H (No transpose) + = 'C': apply H' (Conjugate transpose) + + DIRECT (input) CHARACTER*1 + Indicates how H is formed from a product of elementary + reflectors + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Indicates how the vectors which define the elementary + reflectors are stored: + = 'C': Columnwise + = 'R': Rowwise + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + K (input) INTEGER + The order of the matrix T (= the number of elementary + reflectors whose product defines the block reflector). + + V (input) COMPLEX*16 array, dimension + (LDV,K) if STOREV = 'C' + (LDV,M) if STOREV = 'R' and SIDE = 'L' + (LDV,N) if STOREV = 'R' and SIDE = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); + if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); + if STOREV = 'R', LDV >= K. + + T (input) COMPLEX*16 array, dimension (LDT,K) + The triangular K-by-K matrix T in the representation of the + block reflector. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by H*C or H'*C or C*H or C*H'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX*16 array, dimension (LDWORK,K) + + LDWORK (input) INTEGER + The leading dimension of the array WORK. + If SIDE = 'L', LDWORK >= max(1,N); + if SIDE = 'R', LDWORK >= max(1,M). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (lsame_(trans, "N")) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (lsame_(storev, "C")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 ) (first K rows) + ( V2 ) + where V1 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilazlr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); + zlacgv_(&lastc, &work[j * work_dim1 + 1], &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + ztrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b57, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2 */ + + i__1 = lastv - *k; + zgemm_("Conjugate transpose", "No transpose", &lastc, k, & + i__1, &c_b57, &c__[*k + 1 + c_dim1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b57, &work[work_offset], + ldwork); + } + +/* W := W * T' or W * T */ + + ztrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b57, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (*m > *k) { + +/* C2 := C2 - V2 * W' */ + + i__1 = lastv - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", &i__1, & + lastc, k, &z__1, &v[*k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork, &c_b57, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1' */ + + ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[v_offset], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * c_dim1; + i__4 = j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L20: */ + } +/* L30: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilazlr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + ztrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b57, &v[v_offset], ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2 */ + + i__1 = lastv - *k; + zgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b57, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b57, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + ztrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b57, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2' */ + + i__1 = lastv - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", &lastc, & + i__1, k, &z__1, &work[work_offset], ldwork, &v[*k + + 1 + v_dim1], ldv, &c_b57, &c__[(*k + 1) * + c_dim1 + 1], ldc); + } + +/* W := W * V1' */ + + ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[v_offset], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* + Let V = ( V1 ) + ( V2 ) (last K rows) + where V2 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilazlr_(m, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); + zlacgv_(&lastc, &work[j * work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + ztrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1 */ + + i__1 = lastv - *k; + zgemm_("Conjugate transpose", "No transpose", &lastc, k, & + i__1, &c_b57, &c__[c_offset], ldc, &v[v_offset], + ldv, &c_b57, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + ztrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b57, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1 * W' */ + + i__1 = lastv - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", &i__1, & + lastc, k, &z__1, &v[v_offset], ldv, &work[ + work_offset], ldwork, &c_b57, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[lastv - *k + 1 + v_dim1], ldv, & + work[work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = lastv - *k + j + i__ * c_dim1; + i__4 = lastv - *k + j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L80: */ + } +/* L90: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilazlr_(n, k, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, + &work[j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + ztrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1 */ + + i__1 = lastv - *k; + zgemm_("No transpose", "No transpose", &lastc, k, &i__1, & + c_b57, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b57, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ztrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b57, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1' */ + + i__1 = lastv - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", &lastc, & + i__1, k, &z__1, &work[work_offset], ldwork, &v[ + v_offset], ldv, &c_b57, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[lastv - *k + 1 + v_dim1], ldv, & + work[work_offset], ldwork); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (lastv - *k + j) * c_dim1; + i__4 = i__ + (lastv - *k + j) * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (lsame_(storev, "R")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 V2 ) (V1: first K columns) + where V1 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilazlc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + + 1], &c__1); + zlacgv_(&lastc, &work[j * work_dim1 + 1], &c__1); +/* L130: */ + } + +/* W := W * V1' */ + + ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[v_offset], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2'*V2' */ + + i__1 = lastv - *k; + zgemm_("Conjugate transpose", "Conjugate transpose", & + lastc, k, &i__1, &c_b57, &c__[*k + 1 + c_dim1], + ldc, &v[(*k + 1) * v_dim1 + 1], ldv, &c_b57, & + work[work_offset], ldwork) + ; + } + +/* W := W * T' or W * T */ + + ztrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & + c_b57, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C2 := C2 - V2' * W' */ + + i__1 = lastv - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("Conjugate transpose", "Conjugate transpose", & + i__1, &lastc, k, &z__1, &v[(*k + 1) * v_dim1 + 1], + ldv, &work[work_offset], ldwork, &c_b57, &c__[*k + + 1 + c_dim1], ldc); + } + +/* W := W * V1 */ + + ztrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b57, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * c_dim1; + i__4 = j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L140: */ + } +/* L150: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilazlc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1' */ + + ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[v_offset], ldv, &work[ + work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C2 * V2' */ + + i__1 = lastv - *k; + zgemm_("No transpose", "Conjugate transpose", &lastc, k, & + i__1, &c_b57, &c__[(*k + 1) * c_dim1 + 1], ldc, & + v[(*k + 1) * v_dim1 + 1], ldv, &c_b57, &work[ + work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ztrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b57, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = lastv - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + z__1, &work[work_offset], ldwork, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b57, &c__[(*k + 1) * c_dim1 + + 1], ldc); + } + +/* W := W * V1 */ + + ztrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & + c_b57, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* + Let V = ( V1 V2 ) (V2: last K columns) + where V2 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilazlc_(k, m, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc); + +/* + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ + j * work_dim1 + 1], &c__1); + zlacgv_(&lastc, &work[j * work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2' */ + + ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], + ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1'*V1' */ + + i__1 = lastv - *k; + zgemm_("Conjugate transpose", "Conjugate transpose", & + lastc, k, &i__1, &c_b57, &c__[c_offset], ldc, &v[ + v_offset], ldv, &c_b57, &work[work_offset], + ldwork); + } + +/* W := W * T' or W * T */ + + ztrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & + c_b57, &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (lastv > *k) { + +/* C1 := C1 - V1' * W' */ + + i__1 = lastv - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("Conjugate transpose", "Conjugate transpose", & + i__1, &lastc, k, &z__1, &v[v_offset], ldv, &work[ + work_offset], ldwork, &c_b57, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + ztrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = lastv - *k + j + i__ * c_dim1; + i__4 = lastv - *k + j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L200: */ + } +/* L210: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + Computing MAX +*/ + i__1 = *k, i__2 = ilazlc_(k, n, &v[v_offset], ldv); + lastv = max(i__1,i__2); + lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc); + +/* + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, + &work[j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2' */ + + ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", & + lastc, k, &c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], + ldv, &work[work_offset], ldwork); + if (lastv > *k) { + +/* W := W + C1 * V1' */ + + i__1 = lastv - *k; + zgemm_("No transpose", "Conjugate transpose", &lastc, k, & + i__1, &c_b57, &c__[c_offset], ldc, &v[v_offset], + ldv, &c_b57, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ztrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b57, + &t[t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (lastv > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = lastv - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "No transpose", &lastc, &i__1, k, & + z__1, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b57, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + ztrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & + c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = lastc; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (lastv - *k + j) * c_dim1; + i__4 = i__ + (lastv - *k + j) * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of ZLARFB */ + +} /* zlarfb_ */ + +/* Subroutine */ int zlarfg_(integer *n, doublecomplex *alpha, doublecomplex * + x, integer *incx, doublecomplex *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer j, knt; + static doublereal beta, alphi, alphr; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + static doublereal xnorm; + extern doublereal dlapy3_(doublereal *, doublereal *, doublereal *), + dznrm2_(integer *, doublecomplex *, integer *), dlamch_(char *); + static doublereal safmin; + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *); + static doublereal rsafmn; + extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *, + doublecomplex *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLARFG generates a complex elementary reflector H of order n, such + that + + H' * ( alpha ) = ( beta ), H' * H = I. + ( x ) ( 0 ) + + where alpha and beta are scalars, with beta real, and x is an + (n-1)-element complex vector. H is represented in the form + + H = I - tau * ( 1 ) * ( 1 v' ) , + ( v ) + + where tau is a complex scalar and v is a complex (n-1)-element + vector. Note that H is not hermitian. + + If the elements of x are all zero and alpha is real, then tau = 0 + and H is taken to be the unit matrix. + + Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . + + Arguments + ========= + + N (input) INTEGER + The order of the elementary reflector. + + ALPHA (input/output) COMPLEX*16 + On entry, the value alpha. + On exit, it is overwritten with the value beta. + + X (input/output) COMPLEX*16 array, dimension + (1+(N-2)*abs(INCX)) + On entry, the vector x. + On exit, it is overwritten with the vector v. + + INCX (input) INTEGER + The increment between elements of X. INCX > 0. + + TAU (output) COMPLEX*16 + The value tau. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 0) { + tau->r = 0., tau->i = 0.; + return 0; + } + + i__1 = *n - 1; + xnorm = dznrm2_(&i__1, &x[1], incx); + alphr = alpha->r; + alphi = d_imag(alpha); + + if (xnorm == 0. && alphi == 0.) { + +/* H = I */ + + tau->r = 0., tau->i = 0.; + } else { + +/* general case */ + + d__1 = dlapy3_(&alphr, &alphi, &xnorm); + beta = -d_sign(&d__1, &alphr); + safmin = SAFEMINIMUM / EPSILON; + rsafmn = 1. / safmin; + + knt = 0; + if (abs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + +L10: + ++knt; + i__1 = *n - 1; + zdscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + alphi *= rsafmn; + alphr *= rsafmn; + if (abs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = dznrm2_(&i__1, &x[1], incx); + z__1.r = alphr, z__1.i = alphi; + alpha->r = z__1.r, alpha->i = z__1.i; + d__1 = dlapy3_(&alphr, &alphi, &xnorm); + beta = -d_sign(&d__1, &alphr); + } + d__1 = (beta - alphr) / beta; + d__2 = -alphi / beta; + z__1.r = d__1, z__1.i = d__2; + tau->r = z__1.r, tau->i = z__1.i; + z__2.r = alpha->r - beta, z__2.i = alpha->i; + zladiv_(&z__1, &c_b57, &z__2); + alpha->r = z__1.r, alpha->i = z__1.i; + i__1 = *n - 1; + zscal_(&i__1, alpha, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + beta *= safmin; +/* L20: */ + } + alpha->r = beta, alpha->i = 0.; + } + + return 0; + +/* End of ZLARFG */ + +} /* zlarfg_ */ + +/* Subroutine */ int zlarft_(char *direct, char *storev, integer *n, integer * + k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * + t, integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, prevlastv; + static doublecomplex vii; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + static integer lastv; + extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLARFT forms the triangular factor T of a complex block reflector H + of order n, which is defined as a product of k elementary reflectors. + + If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + + If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + + If STOREV = 'C', the vector which defines the elementary reflector + H(i) is stored in the i-th column of the array V, and + + H = I - V * T * V' + + If STOREV = 'R', the vector which defines the elementary reflector + H(i) is stored in the i-th row of the array V, and + + H = I - V' * T * V + + Arguments + ========= + + DIRECT (input) CHARACTER*1 + Specifies the order in which the elementary reflectors are + multiplied to form the block reflector: + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Specifies how the vectors which define the elementary + reflectors are stored (see also Further Details): + = 'C': columnwise + = 'R': rowwise + + N (input) INTEGER + The order of the block reflector H. N >= 0. + + K (input) INTEGER + The order of the triangular factor T (= the number of + elementary reflectors). K >= 1. + + V (input/output) COMPLEX*16 array, dimension + (LDV,K) if STOREV = 'C' + (LDV,N) if STOREV = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i). + + T (output) COMPLEX*16 array, dimension (LDT,K) + The k by k triangular factor T of the block reflector. + If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is + lower triangular. The rest of the array is not used. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + Further Details + =============== + + The shape of the matrix V and the storage of the vectors which define + the H(i) is best illustrated by the following example with n = 5 and + k = 3. The elements equal to 1 are not stored; the corresponding + array elements are modified but restored on exit. The rest of the + array is not used. + + DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': + + V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) + ( v1 1 ) ( 1 v2 v2 v2 ) + ( v1 v2 1 ) ( 1 v3 v3 ) + ( v1 v2 v3 ) + ( v1 v2 v3 ) + + DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': + + V = ( v1 v2 v3 ) V = ( v1 v1 1 ) + ( v1 v2 v3 ) ( v2 v2 v2 1 ) + ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) + ( 1 v3 ) + ( 1 ) + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = max(prevlastv,i__); + i__2 = i__; + if (tau[i__2].r == 0. && tau[i__2].i == 0.) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + t[i__3].r = 0., t[i__3].i = 0.; +/* L10: */ + } + } else { + +/* general case */ + + i__2 = i__ + i__ * v_dim1; + vii.r = v[i__2].r, vii.i = v[i__2].i; + i__2 = i__ + i__ * v_dim1; + v[i__2].r = 1., v[i__2].i = 0.; + if (lsame_(storev, "C")) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + i__3 = lastv + i__ * v_dim1; + if (v[i__3].r != 0. || v[i__3].i != 0.) { + goto L15; + } + } +L15: + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */ + + i__2 = j - i__ + 1; + i__3 = i__ - 1; + i__4 = i__; + z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; + zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &v[i__ + + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, & + c_b56, &t[i__ * t_dim1 + 1], &c__1); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + i__3 = i__ + lastv * v_dim1; + if (v[i__3].r != 0. || v[i__3].i != 0.) { + goto L16; + } + } +L16: + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */ + + if (i__ < j) { + i__2 = j - i__; + zlacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); + } + i__2 = i__ - 1; + i__3 = j - i__ + 1; + i__4 = i__; + z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; + zgemv_("No transpose", &i__2, &i__3, &z__1, &v[i__ * + v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & + c_b56, &t[i__ * t_dim1 + 1], &c__1); + if (i__ < j) { + i__2 = j - i__; + zlacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); + } + } + i__2 = i__ + i__ * v_dim1; + v[i__2].r = vii.r, v[i__2].i = vii.i; + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ + i__ * t_dim1; + i__3 = i__; + t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; + if (i__ > 1) { + prevlastv = max(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } +/* L20: */ + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + i__1 = i__; + if (tau[i__1].r == 0. && tau[i__1].i == 0.) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + t[i__2].r = 0., t[i__2].i = 0.; +/* L30: */ + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { + i__1 = *n - *k + i__ + i__ * v_dim1; + vii.r = v[i__1].r, vii.i = v[i__1].i; + i__1 = *n - *k + i__ + i__ * v_dim1; + v[i__1].r = 1., v[i__1].i = 0.; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + i__2 = lastv + i__ * v_dim1; + if (v[i__2].r != 0. || v[i__2].i != 0.) { + goto L35; + } + } +L35: + j = max(lastv,prevlastv); + +/* + T(i+1:k,i) := + - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i) +*/ + + i__1 = *n - *k + i__ - j + 1; + i__2 = *k - i__; + i__3 = i__; + z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; + zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &v[ + j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * + v_dim1], &c__1, &c_b56, &t[i__ + 1 + i__ * + t_dim1], &c__1); + i__1 = *n - *k + i__ + i__ * v_dim1; + v[i__1].r = vii.r, v[i__1].i = vii.i; + } else { + i__1 = i__ + (*n - *k + i__) * v_dim1; + vii.r = v[i__1].r, vii.i = v[i__1].i; + i__1 = i__ + (*n - *k + i__) * v_dim1; + v[i__1].r = 1., v[i__1].i = 0.; +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + i__2 = i__ + lastv * v_dim1; + if (v[i__2].r != 0. || v[i__2].i != 0.) { + goto L36; + } + } +L36: + j = max(lastv,prevlastv); + +/* + T(i+1:k,i) := + - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)' +*/ + + i__1 = *n - *k + i__ - 1 - j + 1; + zlacgv_(&i__1, &v[i__ + j * v_dim1], ldv); + i__1 = *k - i__; + i__2 = *n - *k + i__ - j + 1; + i__3 = i__; + z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; + zgemv_("No transpose", &i__1, &i__2, &z__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b56, &t[i__ + 1 + i__ * t_dim1], & + c__1); + i__1 = *n - *k + i__ - 1 - j + 1; + zlacgv_(&i__1, &v[i__ + j * v_dim1], ldv); + i__1 = i__ + (*n - *k + i__) * v_dim1; + v[i__1].r = vii.r, v[i__1].i = vii.i; + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + if (i__ > 1) { + prevlastv = min(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + i__1 = i__ + i__ * t_dim1; + i__2 = i__; + t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i; + } +/* L40: */ + } + } + return 0; + +/* End of ZLARFT */ + +} /* zlarft_ */ + +/* Subroutine */ int zlartg_(doublecomplex *f, doublecomplex *g, doublereal * + cs, doublecomplex *sn, doublecomplex *r__) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8, d__9, d__10; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static doublereal d__; + static integer i__; + static doublereal f2, g2; + static doublecomplex ff; + static doublereal di, dr; + static doublecomplex fs, gs; + static doublereal f2s, g2s, eps, scale; + static integer count; + static doublereal safmn2; + extern doublereal dlapy2_(doublereal *, doublereal *); + static doublereal safmx2; + + static doublereal safmin; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLARTG generates a plane rotation so that + + [ CS SN ] [ F ] [ R ] + [ __ ] . [ ] = [ ] where CS**2 + |SN|**2 = 1. + [ -SN CS ] [ G ] [ 0 ] + + This is a faster version of the BLAS1 routine ZROTG, except for + the following differences: + F and G are unchanged on return. + If G=0, then CS=1 and SN=0. + If F=0, then CS=0 and SN is chosen so that R is real. + + Arguments + ========= + + F (input) COMPLEX*16 + The first component of vector to be rotated. + + G (input) COMPLEX*16 + The second component of vector to be rotated. + + CS (output) DOUBLE PRECISION + The cosine of the rotation. + + SN (output) COMPLEX*16 + The sine of the rotation. + + R (output) COMPLEX*16 + The nonzero component of the rotated vector. + + Further Details + ======= ======= + + 3-5-96 - Modified with a new algorithm by W. Kahan and J. Demmel + + This version has a few statements commented out for thread safety + (machine parameters are computed on each entry). 10 feb 03, SJH. + + ===================================================================== + + LOGICAL FIRST + SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 + DATA FIRST / .TRUE. / + + IF( FIRST ) THEN +*/ + safmin = SAFEMINIMUM; + eps = EPSILON; + d__1 = BASE; + i__1 = (integer) (log(safmin / eps) / log(BASE) / 2.); + safmn2 = pow_di(&d__1, &i__1); + safmx2 = 1. / safmn2; +/* + FIRST = .FALSE. + END IF + Computing MAX + Computing MAX +*/ + d__7 = (d__1 = f->r, abs(d__1)), d__8 = (d__2 = d_imag(f), abs(d__2)); +/* Computing MAX */ + d__9 = (d__3 = g->r, abs(d__3)), d__10 = (d__4 = d_imag(g), abs(d__4)); + d__5 = max(d__7,d__8), d__6 = max(d__9,d__10); + scale = max(d__5,d__6); + fs.r = f->r, fs.i = f->i; + gs.r = g->r, gs.i = g->i; + count = 0; + if (scale >= safmx2) { +L10: + ++count; + z__1.r = safmn2 * fs.r, z__1.i = safmn2 * fs.i; + fs.r = z__1.r, fs.i = z__1.i; + z__1.r = safmn2 * gs.r, z__1.i = safmn2 * gs.i; + gs.r = z__1.r, gs.i = z__1.i; + scale *= safmn2; + if (scale >= safmx2) { + goto L10; + } + } else if (scale <= safmn2) { + if (g->r == 0. && g->i == 0.) { + *cs = 1.; + sn->r = 0., sn->i = 0.; + r__->r = f->r, r__->i = f->i; + return 0; + } +L20: + --count; + z__1.r = safmx2 * fs.r, z__1.i = safmx2 * fs.i; + fs.r = z__1.r, fs.i = z__1.i; + z__1.r = safmx2 * gs.r, z__1.i = safmx2 * gs.i; + gs.r = z__1.r, gs.i = z__1.i; + scale *= safmx2; + if (scale <= safmn2) { + goto L20; + } + } +/* Computing 2nd power */ + d__1 = fs.r; +/* Computing 2nd power */ + d__2 = d_imag(&fs); + f2 = d__1 * d__1 + d__2 * d__2; +/* Computing 2nd power */ + d__1 = gs.r; +/* Computing 2nd power */ + d__2 = d_imag(&gs); + g2 = d__1 * d__1 + d__2 * d__2; + if (f2 <= max(g2,1.) * safmin) { + +/* This is a rare case: F is very small. */ + + if (f->r == 0. && f->i == 0.) { + *cs = 0.; + d__2 = g->r; + d__3 = d_imag(g); + d__1 = dlapy2_(&d__2, &d__3); + r__->r = d__1, r__->i = 0.; +/* Do complex/real division explicitly with two real divisions */ + d__1 = gs.r; + d__2 = d_imag(&gs); + d__ = dlapy2_(&d__1, &d__2); + d__1 = gs.r / d__; + d__2 = -d_imag(&gs) / d__; + z__1.r = d__1, z__1.i = d__2; + sn->r = z__1.r, sn->i = z__1.i; + return 0; + } + d__1 = fs.r; + d__2 = d_imag(&fs); + f2s = dlapy2_(&d__1, &d__2); +/* + G2 and G2S are accurate + G2 is at least SAFMIN, and G2S is at least SAFMN2 +*/ + g2s = sqrt(g2); +/* + Error in CS from underflow in F2S is at most + UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS + If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN, + and so CS .lt. sqrt(SAFMIN) + If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN + and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS) + Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S +*/ + *cs = f2s / g2s; +/* + Make sure abs(FF) = 1 + Do complex/real division explicitly with 2 real divisions + Computing MAX +*/ + d__3 = (d__1 = f->r, abs(d__1)), d__4 = (d__2 = d_imag(f), abs(d__2)); + if (max(d__3,d__4) > 1.) { + d__1 = f->r; + d__2 = d_imag(f); + d__ = dlapy2_(&d__1, &d__2); + d__1 = f->r / d__; + d__2 = d_imag(f) / d__; + z__1.r = d__1, z__1.i = d__2; + ff.r = z__1.r, ff.i = z__1.i; + } else { + dr = safmx2 * f->r; + di = safmx2 * d_imag(f); + d__ = dlapy2_(&dr, &di); + d__1 = dr / d__; + d__2 = di / d__; + z__1.r = d__1, z__1.i = d__2; + ff.r = z__1.r, ff.i = z__1.i; + } + d__1 = gs.r / g2s; + d__2 = -d_imag(&gs) / g2s; + z__2.r = d__1, z__2.i = d__2; + z__1.r = ff.r * z__2.r - ff.i * z__2.i, z__1.i = ff.r * z__2.i + ff.i + * z__2.r; + sn->r = z__1.r, sn->i = z__1.i; + z__2.r = *cs * f->r, z__2.i = *cs * f->i; + z__3.r = sn->r * g->r - sn->i * g->i, z__3.i = sn->r * g->i + sn->i * + g->r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + r__->r = z__1.r, r__->i = z__1.i; + } else { + +/* + This is the most common case. + Neither F2 nor F2/G2 are less than SAFMIN + F2S cannot overflow, and it is accurate +*/ + + f2s = sqrt(g2 / f2 + 1.); +/* Do the F2S(real)*FS(complex) multiply with two real multiplies */ + d__1 = f2s * fs.r; + d__2 = f2s * d_imag(&fs); + z__1.r = d__1, z__1.i = d__2; + r__->r = z__1.r, r__->i = z__1.i; + *cs = 1. / f2s; + d__ = f2 + g2; +/* Do complex/real division explicitly with two real divisions */ + d__1 = r__->r / d__; + d__2 = d_imag(r__) / d__; + z__1.r = d__1, z__1.i = d__2; + sn->r = z__1.r, sn->i = z__1.i; + d_cnjg(&z__2, &gs); + z__1.r = sn->r * z__2.r - sn->i * z__2.i, z__1.i = sn->r * z__2.i + + sn->i * z__2.r; + sn->r = z__1.r, sn->i = z__1.i; + if (count != 0) { + if (count > 0) { + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + z__1.r = safmx2 * r__->r, z__1.i = safmx2 * r__->i; + r__->r = z__1.r, r__->i = z__1.i; +/* L30: */ + } + } else { + i__1 = -count; + for (i__ = 1; i__ <= i__1; ++i__) { + z__1.r = safmn2 * r__->r, z__1.i = safmn2 * r__->i; + r__->r = z__1.r, r__->i = z__1.i; +/* L40: */ + } + } + } + } + return 0; + +/* End of ZLARTG */ + +} /* zlartg_ */ + +/* Subroutine */ int zlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublecomplex *a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, k1, k2, k3, k4; + static doublereal mul, cto1; + static logical done; + static doublereal ctoc; + extern logical lsame_(char *, char *); + static integer itype; + static doublereal cfrom1; + + static doublereal cfromc; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal bignum, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLASCL multiplies the M by N complex matrix A by the real scalar + CTO/CFROM. This is done without over/underflow as long as the final + result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + A may be full, upper triangular, lower triangular, upper Hessenberg, + or banded. + + Arguments + ========= + + TYPE (input) CHARACTER*1 + TYPE indices the storage type of the input matrix. + = 'G': A is a full matrix. + = 'L': A is a lower triangular matrix. + = 'U': A is an upper triangular matrix. + = 'H': A is an upper Hessenberg matrix. + = 'B': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the lower + half stored. + = 'Q': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the upper + half stored. + = 'Z': A is a band matrix with lower bandwidth KL and upper + bandwidth KU. + + KL (input) INTEGER + The lower bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + KU (input) INTEGER + The upper bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + CFROM (input) DOUBLE PRECISION + CTO (input) DOUBLE PRECISION + The matrix A is multiplied by CTO/CFROM. A(I,J) is computed + without over/underflow if the final result CTO*A(I,J)/CFROM + can be represented without over/underflow. CFROM must be + nonzero. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + The matrix to be multiplied by CTO/CFROM. See TYPE for the + storage type. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + INFO (output) INTEGER + 0 - successful exit + <0 - if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (lsame_(type__, "G")) { + itype = 0; + } else if (lsame_(type__, "L")) { + itype = 1; + } else if (lsame_(type__, "U")) { + itype = 2; + } else if (lsame_(type__, "H")) { + itype = 3; + } else if (lsame_(type__, "B")) { + itype = 4; + } else if (lsame_(type__, "Q")) { + itype = 5; + } else if (lsame_(type__, "Z")) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0. || disnan_(cfrom)) { + *info = -4; + } else if (disnan_(cto)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLASCL", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = SAFEMINIMUM; + bignum = 1. / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + if (cfrom1 == cfromc) { +/* + CFROMC is an inf. Multiply by a correctly signed zero for + finite CTOC, or a NaN if CTOC is infinite. +*/ + mul = ctoc / cfromc; + done = TRUE_; + cto1 = ctoc; + } else { + cto1 = ctoc / bignum; + if (cto1 == ctoc) { +/* + CTOC is either 0 or an inf. In both cases, CTOC itself + serves as the correct multiplication factor. +*/ + mul = ctoc; + done = TRUE_; + cfromc = 1.; + } else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (abs(cto1) > abs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + } + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + i__2 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of ZLASCL */ + +} /* zlascl_ */ + +/* Subroutine */ int zlaset_(char *uplo, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *a, integer * + lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLASET initializes a 2-D array A to BETA on the diagonal and + ALPHA on the offdiagonals. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be set. + = 'U': Upper triangular part is set. The lower triangle + is unchanged. + = 'L': Lower triangular part is set. The upper triangle + is unchanged. + Otherwise: All of the matrix A is set. + + M (input) INTEGER + On entry, M specifies the number of rows of A. + + N (input) INTEGER + On entry, N specifies the number of columns of A. + + ALPHA (input) COMPLEX*16 + All the offdiagonal array elements are set to ALPHA. + + BETA (input) COMPLEX*16 + All the diagonal array elements are set to BETA. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; + A(i,i) = BETA , 1 <= i <= min(m,n) + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + +/* + Set the diagonal to BETA and the strictly upper triangular + part of the array to ALPHA. +*/ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L10: */ + } +/* L20: */ + } + i__1 = min(*n,*m); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L30: */ + } + + } else if (lsame_(uplo, "L")) { + +/* + Set the diagonal to BETA and the strictly lower triangular + part of the array to ALPHA. +*/ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L40: */ + } +/* L50: */ + } + i__1 = min(*n,*m); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L60: */ + } + + } else { + +/* + Set the array to BETA on the diagonal and ALPHA on the + offdiagonal. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L70: */ + } +/* L80: */ + } + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L90: */ + } + } + + return 0; + +/* End of ZLASET */ + +} /* zlaset_ */ + +/* Subroutine */ int zlasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, doublereal *c__, doublereal *s, doublecomplex *a, + integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static integer i__, j, info; + static doublecomplex temp; + extern logical lsame_(char *, char *); + static doublereal ctemp, stemp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLASR applies a sequence of real plane rotations to a complex matrix + A, from either the left or the right. + + When SIDE = 'L', the transformation takes the form + + A := P*A + + and when SIDE = 'R', the transformation takes the form + + A := A*P**T + + where P is an orthogonal matrix consisting of a sequence of z plane + rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', + and P**T is the transpose of P. + + When DIRECT = 'F' (Forward sequence), then + + P = P(z-1) * ... * P(2) * P(1) + + and when DIRECT = 'B' (Backward sequence), then + + P = P(1) * P(2) * ... * P(z-1) + + where P(k) is a plane rotation matrix defined by the 2-by-2 rotation + + R(k) = ( c(k) s(k) ) + = ( -s(k) c(k) ). + + When PIVOT = 'V' (Variable pivot), the rotation is performed + for the plane (k,k+1), i.e., P(k) has the form + + P(k) = ( 1 ) + ( ... ) + ( 1 ) + ( c(k) s(k) ) + ( -s(k) c(k) ) + ( 1 ) + ( ... ) + ( 1 ) + + where R(k) appears as a rank-2 modification to the identity matrix in + rows and columns k and k+1. + + When PIVOT = 'T' (Top pivot), the rotation is performed for the + plane (1,k+1), so P(k) has the form + + P(k) = ( c(k) s(k) ) + ( 1 ) + ( ... ) + ( 1 ) + ( -s(k) c(k) ) + ( 1 ) + ( ... ) + ( 1 ) + + where R(k) appears in rows and columns 1 and k+1. + + Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is + performed for the plane (k,z), giving P(k) the form + + P(k) = ( 1 ) + ( ... ) + ( 1 ) + ( c(k) s(k) ) + ( 1 ) + ( ... ) + ( 1 ) + ( -s(k) c(k) ) + + where R(k) appears in rows and columns k and z. The rotations are + performed without ever forming P(k) explicitly. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + Specifies whether the plane rotation matrix P is applied to + A on the left or the right. + = 'L': Left, compute A := P*A + = 'R': Right, compute A:= A*P**T + + PIVOT (input) CHARACTER*1 + Specifies the plane for which P(k) is a plane rotation + matrix. + = 'V': Variable pivot, the plane (k,k+1) + = 'T': Top pivot, the plane (1,k+1) + = 'B': Bottom pivot, the plane (k,z) + + DIRECT (input) CHARACTER*1 + Specifies whether P is a forward or backward sequence of + plane rotations. + = 'F': Forward, P = P(z-1)*...*P(2)*P(1) + = 'B': Backward, P = P(1)*P(2)*...*P(z-1) + + M (input) INTEGER + The number of rows of the matrix A. If m <= 1, an immediate + return is effected. + + N (input) INTEGER + The number of columns of the matrix A. If n <= 1, an + immediate return is effected. + + C (input) DOUBLE PRECISION array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + The cosines c(k) of the plane rotations. + + S (input) DOUBLE PRECISION array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + The sines s(k) of the plane rotations. The 2-by-2 plane + rotation part of the matrix P(k), R(k), has the form + R(k) = ( c(k) s(k) ) + ( -s(k) c(k) ). + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + The M-by-N matrix A. On exit, A is overwritten by P*A if + SIDE = 'R' or by A*P**T if SIDE = 'L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + --c__; + --s; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! (lsame_(side, "L") || lsame_(side, "R"))) { + info = 1; + } else if (! (lsame_(pivot, "V") || lsame_(pivot, + "T") || lsame_(pivot, "B"))) { + info = 2; + } else if (! (lsame_(direct, "F") || lsame_(direct, + "B"))) { + info = 3; + } else if (*m < 0) { + info = 4; + } else if (*n < 0) { + info = 5; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("ZLASR ", &info); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form P * A */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + 1 + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + 1 + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = j + i__ * a_dim1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = j + i__ * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = j + i__ * a_dim1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L10: */ + } + } +/* L20: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + 1 + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + 1 + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = j + i__ * a_dim1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + i__ * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = j + i__ * a_dim1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L30: */ + } + } +/* L40: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = i__ * a_dim1 + 1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ * a_dim1 + 1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = i__ * a_dim1 + 1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L50: */ + } + } +/* L60: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = i__ * a_dim1 + 1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ * a_dim1 + 1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = i__ * a_dim1 + 1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + i__ * a_dim1; + i__4 = *m + i__ * a_dim1; + z__2.r = stemp * a[i__4].r, z__2.i = stemp * a[ + i__4].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = *m + i__ * a_dim1; + i__4 = *m + i__ * a_dim1; + z__2.r = ctemp * a[i__4].r, z__2.i = ctemp * a[ + i__4].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L90: */ + } + } +/* L100: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + i__ * a_dim1; + i__3 = *m + i__ * a_dim1; + z__2.r = stemp * a[i__3].r, z__2.i = stemp * a[ + i__3].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = *m + i__ * a_dim1; + i__3 = *m + i__ * a_dim1; + z__2.r = ctemp * a[i__3].r, z__2.i = ctemp * a[ + i__3].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L110: */ + } + } +/* L120: */ + } + } + } + } else if (lsame_(side, "R")) { + +/* Form A * P' */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (j + 1) * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + (j + 1) * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = i__ + j * a_dim1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = i__ + j * a_dim1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L130: */ + } + } +/* L140: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + (j + 1) * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + (j + 1) * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = i__ + j * a_dim1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ + j * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = i__ + j * a_dim1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L150: */ + } + } +/* L160: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + j * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = i__ + a_dim1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ + a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = i__ + a_dim1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L170: */ + } + } +/* L180: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + j * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = i__ + a_dim1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ + a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = i__ + a_dim1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L190: */ + } + } +/* L200: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + j * a_dim1; + i__4 = i__ + *n * a_dim1; + z__2.r = stemp * a[i__4].r, z__2.i = stemp * a[ + i__4].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ + *n * a_dim1; + i__4 = i__ + *n * a_dim1; + z__2.r = ctemp * a[i__4].r, z__2.i = ctemp * a[ + i__4].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L210: */ + } + } +/* L220: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + j * a_dim1; + i__3 = i__ + *n * a_dim1; + z__2.r = stemp * a[i__3].r, z__2.i = stemp * a[ + i__3].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ + *n * a_dim1; + i__3 = i__ + *n * a_dim1; + z__2.r = ctemp * a[i__3].r, z__2.i = ctemp * a[ + i__3].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L230: */ + } + } +/* L240: */ + } + } + } + } + + return 0; + +/* End of ZLASR */ + +} /* zlasr_ */ + +/* Subroutine */ int zlassq_(integer *n, doublecomplex *x, integer *incx, + doublereal *scale, doublereal *sumsq) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + static integer ix; + static doublereal temp1; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLASSQ returns the values scl and ssq such that + + ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + + where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is + assumed to be at least unity and the value of ssq will then satisfy + + 1.0 .le. ssq .le. ( sumsq + 2*n ). + + scale is assumed to be non-negative and scl returns the value + + scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), + i + + scale and sumsq must be supplied in SCALE and SUMSQ respectively. + SCALE and SUMSQ are overwritten by scl and ssq respectively. + + The routine makes only one pass through the vector X. + + Arguments + ========= + + N (input) INTEGER + The number of elements to be used from the vector X. + + X (input) COMPLEX*16 array, dimension (N) + The vector x as described above. + x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. + + INCX (input) INTEGER + The increment between successive values of the vector X. + INCX > 0. + + SCALE (input/output) DOUBLE PRECISION + On entry, the value scale in the equation above. + On exit, SCALE is overwritten with the value scl . + + SUMSQ (input/output) DOUBLE PRECISION + On entry, the value sumsq in the equation above. + On exit, SUMSQ is overwritten with the value ssq . + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + i__3 = ix; + if (x[i__3].r != 0.) { + i__3 = ix; + temp1 = (d__1 = x[i__3].r, abs(d__1)); + if (*scale < temp1) { +/* Computing 2nd power */ + d__1 = *scale / temp1; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = temp1; + } else { +/* Computing 2nd power */ + d__1 = temp1 / *scale; + *sumsq += d__1 * d__1; + } + } + if (d_imag(&x[ix]) != 0.) { + temp1 = (d__1 = d_imag(&x[ix]), abs(d__1)); + if (*scale < temp1) { +/* Computing 2nd power */ + d__1 = *scale / temp1; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = temp1; + } else { +/* Computing 2nd power */ + d__1 = temp1 / *scale; + *sumsq += d__1 * d__1; + } + } +/* L10: */ + } + } + + return 0; + +/* End of ZLASSQ */ + +} /* zlassq_ */ + +/* Subroutine */ int zlaswp_(integer *n, doublecomplex *a, integer *lda, + integer *k1, integer *k2, integer *ipiv, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + + /* Local variables */ + static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + static doublecomplex temp; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLASWP performs a series of row interchanges on the matrix A. + One row interchange is initiated for each of rows K1 through K2 of A. + + Arguments + ========= + + N (input) INTEGER + The number of columns of the matrix A. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the matrix of column dimension N to which the row + interchanges will be applied. + On exit, the permuted matrix. + + LDA (input) INTEGER + The leading dimension of the array A. + + K1 (input) INTEGER + The first element of IPIV for which a row interchange will + be done. + + K2 (input) INTEGER + The last element of IPIV for which a row interchange will + be done. + + IPIV (input) INTEGER array, dimension (K2*abs(INCX)) + The vector of pivot indices. Only the elements in positions + K1 through K2 of IPIV are accessed. + IPIV(K) = L implies rows K and L are to be interchanged. + + INCX (input) INTEGER + The increment between successive values of IPIV. If IPIV + is negative, the pivots are applied in reverse order. + + Further Details + =============== + + Modified by + R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA + + ===================================================================== + + + Interchange row I with row IPIV(I) for each of rows K1 through K2. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = (1 - *k2) * *incx + 1; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) + { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + i__5 = i__ + k * a_dim1; + temp.r = a[i__5].r, temp.i = a[i__5].i; + i__5 = i__ + k * a_dim1; + i__6 = ip + k * a_dim1; + a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i; + i__5 = ip + k * a_dim1; + a[i__5].r = temp.r, a[i__5].i = temp.i; +/* L10: */ + } + } + ix += *incx; +/* L20: */ + } +/* L30: */ + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + i__4 = i__ + k * a_dim1; + temp.r = a[i__4].r, temp.i = a[i__4].i; + i__4 = i__ + k * a_dim1; + i__5 = ip + k * a_dim1; + a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i; + i__4 = ip + k * a_dim1; + a[i__4].r = temp.r, a[i__4].i = temp.i; +/* L40: */ + } + } + ix += *incx; +/* L50: */ + } + } + + return 0; + +/* End of ZLASWP */ + +} /* zlaswp_ */ + +/* Subroutine */ int zlatrd_(char *uplo, integer *n, integer *nb, + doublecomplex *a, integer *lda, doublereal *e, doublecomplex *tau, + doublecomplex *w, integer *ldw) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + static integer i__, iw; + static doublecomplex alpha; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + zhemv_(char *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *), zaxpy_(integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, + integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLATRD reduces NB rows and columns of a complex Hermitian matrix A to + Hermitian tridiagonal form by a unitary similarity + transformation Q' * A * Q, and returns the matrices V and W which are + needed to apply the transformation to the unreduced part of A. + + If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a + matrix, of which the upper triangle is supplied; + if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a + matrix, of which the lower triangle is supplied. + + This is an auxiliary routine called by ZHETRD. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. + + NB (input) INTEGER + The number of rows and columns to be reduced. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit: + if UPLO = 'U', the last NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements above the diagonal + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors; + if UPLO = 'L', the first NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements below the diagonal + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + E (output) DOUBLE PRECISION array, dimension (N-1) + If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal + elements of the last NB columns of the reduced matrix; + if UPLO = 'L', E(1:nb) contains the subdiagonal elements of + the first NB columns of the reduced matrix. + + TAU (output) COMPLEX*16 array, dimension (N-1) + The scalar factors of the elementary reflectors, stored in + TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. + See Further Details. + + W (output) COMPLEX*16 array, dimension (LDW,NB) + The n-by-nb matrix W required to update the unreduced part + of A. + + LDW (input) INTEGER + The leading dimension of the array W. LDW >= max(1,N). + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n) H(n-1) . . . H(n-nb+1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), + and tau in TAU(i-1). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), + and tau in TAU(i). + + The elements of the vectors v together form the n-by-nb matrix V + which is needed, with W, to apply the transformation to the unreduced + part of the matrix, using a Hermitian rank-2k update of the form: + A := A - V*W' - W*V'. + + The contents of A on exit are illustrated by the following examples + with n = 5 and nb = 2: + + if UPLO = 'U': if UPLO = 'L': + + ( a a a v4 v5 ) ( d ) + ( a a v4 v5 ) ( 1 d ) + ( a 1 v5 ) ( v1 1 a ) + ( d 1 ) ( v1 v2 a a ) + ( d ) ( v1 v2 a a a ) + + where d denotes a diagonal element of the reduced matrix, a denotes + an element of the original matrix that is unchanged, and vi denotes + an element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --e; + --tau; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + + if (lsame_(uplo, "U")) { + +/* Reduce last NB columns of upper triangle */ + + i__1 = *n - *nb + 1; + for (i__ = *n; i__ >= i__1; --i__) { + iw = i__ - *n + *nb; + if (i__ < *n) { + +/* Update A(1:i,i) */ + + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n - i__; + zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__, &i__2, &z__1, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & + c_b57, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__, &i__2, &z__1, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b57, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + if (i__ > 1) { + +/* + Generate elementary reflector H(i) to annihilate + A(1:i-2,i) +*/ + + i__2 = i__ - 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = i__ - 1; + zlarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__ + - 1]); + i__2 = i__ - 1; + e[i__2] = alpha.r; + i__2 = i__ - 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute W(1:i-1,i) */ + + i__2 = i__ - 1; + zhemv_("Upper", &i__2, &c_b57, &a[a_offset], lda, &a[i__ * + a_dim1 + 1], &c__1, &c_b56, &w[iw * w_dim1 + 1], & + c__1); + if (i__ < *n) { + i__2 = i__ - 1; + i__3 = *n - i__; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &w[( + iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], + &c__1, &c_b56, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b57, &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[( + i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], + &c__1, &c_b56, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &w[(iw + 1) * + w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b57, &w[iw * w_dim1 + 1], &c__1); + } + i__2 = i__ - 1; + zscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); + z__3.r = -.5, z__3.i = -0.; + i__2 = i__ - 1; + z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i = + z__3.r * tau[i__2].i + z__3.i * tau[i__2].r; + i__3 = i__ - 1; + zdotc_(&z__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ * + a_dim1 + 1], &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = i__ - 1; + zaxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * + w_dim1 + 1], &c__1); + } + +/* L10: */ + } + } else { + +/* Reduce first NB columns of lower triangle */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:n,i) */ + + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = i__ - 1; + zlacgv_(&i__2, &w[i__ + w_dim1], ldw); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda, + &w[i__ + w_dim1], ldw, &c_b57, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + zlacgv_(&i__2, &w[i__ + w_dim1], ldw); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + w_dim1], ldw, + &a[i__ + a_dim1], lda, &c_b57, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + if (i__ < *n) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:n,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, + &tau[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute W(i+1:n,i) */ + + i__2 = *n - i__; + zhemv_("Lower", &i__2, &c_b57, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b56, &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &w[i__ + + 1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b56, &w[i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b57, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b56, &w[i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + 1 + + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b57, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + zscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); + z__3.r = -.5, z__3.i = -0.; + i__2 = i__; + z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i = + z__3.r * tau[i__2].i + z__3.i * tau[i__2].r; + i__3 = *n - i__; + zdotc_(&z__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = *n - i__; + zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + } + +/* L20: */ + } + } + + return 0; + +/* End of ZLATRD */ + +} /* zlatrd_ */ + +/* Subroutine */ int zlatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, + doublereal *scale, doublereal *cnorm, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + static integer i__, j; + static doublereal xj, rec, tjj; + static integer jinc; + static doublereal xbnd; + static integer imax; + static doublereal tmax; + static doublecomplex tjjs; + static doublereal xmax, grow; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + static doublereal tscal; + static doublecomplex uscal; + static integer jlast; + static doublecomplex csumj; + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + static logical upper; + extern /* Double Complex */ VOID zdotu_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), ztrsv_( + char *, char *, char *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), dlabad_( + doublereal *, doublereal *); + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + static doublereal bignum; + extern integer izamax_(integer *, doublecomplex *, integer *); + extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *, + doublecomplex *); + static logical notran; + static integer jfirst; + extern doublereal dzasum_(integer *, doublecomplex *, integer *); + static doublereal smlnum; + static logical nounit; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLATRS solves one of the triangular systems + + A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + + with scaling to prevent overflow. Here A is an upper or lower + triangular matrix, A**T denotes the transpose of A, A**H denotes the + conjugate transpose of A, x and b are n-element vectors, and s is a + scaling factor, usually less than or equal to 1, chosen so that the + components of x will be less than the overflow threshold. If the + unscaled problem will not cause overflow, the Level 2 BLAS routine + ZTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), + then s is set to 0 and a non-trivial solution to A*x = 0 is returned. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + TRANS (input) CHARACTER*1 + Specifies the operation applied to A. + = 'N': Solve A * x = s*b (No transpose) + = 'T': Solve A**T * x = s*b (Transpose) + = 'C': Solve A**H * x = s*b (Conjugate transpose) + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + NORMIN (input) CHARACTER*1 + Specifies whether CNORM has been set or not. + = 'Y': CNORM contains the column norms on entry + = 'N': CNORM is not set on entry. On exit, the norms will + be computed and stored in CNORM. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The triangular matrix A. If UPLO = 'U', the leading n by n + upper triangular part of the array A contains the upper + triangular matrix, and the strictly lower triangular part of + A is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of the array A contains the lower triangular + matrix, and the strictly upper triangular part of A is not + referenced. If DIAG = 'U', the diagonal elements of A are + also not referenced and are assumed to be 1. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max (1,N). + + X (input/output) COMPLEX*16 array, dimension (N) + On entry, the right hand side b of the triangular system. + On exit, X is overwritten by the solution vector x. + + SCALE (output) DOUBLE PRECISION + The scaling factor s for the triangular system + A * x = s*b, A**T * x = s*b, or A**H * x = s*b. + If SCALE = 0, the matrix A is singular or badly scaled, and + the vector x is an exact or approximate solution to A*x = 0. + + CNORM (input or output) DOUBLE PRECISION array, dimension (N) + + If NORMIN = 'Y', CNORM is an input argument and CNORM(j) + contains the norm of the off-diagonal part of the j-th column + of A. If TRANS = 'N', CNORM(j) must be greater than or equal + to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) + must be greater than or equal to the 1-norm. + + If NORMIN = 'N', CNORM is an output argument and CNORM(j) + returns the 1-norm of the offdiagonal part of the j-th column + of A. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + Further Details + ======= ======= + + A rough bound on x is computed; if that is less than overflow, ZTRSV + is called, otherwise, specific code is used which checks for possible + overflow or divide-by-zero at every operation. + + A columnwise scheme is used for solving A*x = b. The basic algorithm + if A is lower triangular is + + x[1:n] := b[1:n] + for j = 1, ..., n + x(j) := x(j) / A(j,j) + x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] + end + + Define bounds on the components of x after j iterations of the loop: + M(j) = bound on x[1:j] + G(j) = bound on x[j+1:n] + Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. + + Then for iteration j+1 we have + M(j+1) <= G(j) / | A(j+1,j+1) | + G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | + <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) + + where CNORM(j+1) is greater than or equal to the infinity-norm of + column j+1 of A, not counting the diagonal. Hence + + G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) + 1<=i<=j + and + + |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) + 1<=i< j + + Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTRSV if the + reciprocal of the largest M(j), j=1,..,n, is larger than + max(underflow, 1/overflow). + + The bound on x(j) is also used to determine when a step in the + columnwise method can be performed without fear of overflow. If + the computed bound is greater than a large constant, x is scaled to + prevent overflow, but if the bound overflows, x is set to 0, x(j) to + 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. + + Similarly, a row-wise scheme is used to solve A**T *x = b or + A**H *x = b. The basic algorithm for A upper triangular is + + for j = 1, ..., n + x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) + end + + We simultaneously compute two bounds + G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j + M(j) = bound on x(i), 1<=i<=j + + The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we + add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. + Then the bound on x(j) is + + M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | + + <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) + 1<=i<=j + + and we can safely call ZTRSV if 1/M(n) and 1/G(n) are both greater + than max(underflow, 1/overflow). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --cnorm; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + notran = lsame_(trans, "N"); + nounit = lsame_(diag, "N"); + +/* Test the input parameters. */ + + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -3; + } else if (! lsame_(normin, "Y") && ! lsame_(normin, + "N")) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLATRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine machine dependent parameters to control overflow. */ + + smlnum = SAFEMINIMUM; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum /= PRECISION; + bignum = 1. / smlnum; + *scale = 1.; + + if (lsame_(normin, "N")) { + +/* Compute the 1-norm of each column, not including the diagonal. */ + + if (upper) { + +/* A is upper triangular. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + cnorm[j] = dzasum_(&i__2, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* A is lower triangular. */ + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + cnorm[j] = dzasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1); +/* L20: */ + } + cnorm[*n] = 0.; + } + } + +/* + Scale the column norms by TSCAL if the maximum element in CNORM is + greater than BIGNUM/2. +*/ + + imax = idamax_(n, &cnorm[1], &c__1); + tmax = cnorm[imax]; + if (tmax <= bignum * .5) { + tscal = 1.; + } else { + tscal = .5 / (smlnum * tmax); + dscal_(n, &tscal, &cnorm[1], &c__1); + } + +/* + Compute a bound on the computed solution vector to see if the + Level 2 BLAS routine ZTRSV can be used. +*/ + + xmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j; + d__3 = xmax, d__4 = (d__1 = x[i__2].r / 2., abs(d__1)) + (d__2 = + d_imag(&x[j]) / 2., abs(d__2)); + xmax = max(d__3,d__4); +/* L30: */ + } + xbnd = xmax; + + if (notran) { + +/* Compute the growth in A * x = b. */ + + if (upper) { + jfirst = *n; + jlast = 1; + jinc = -1; + } else { + jfirst = 1; + jlast = *n; + jinc = 1; + } + + if (tscal != 1.) { + grow = 0.; + goto L60; + } + + if (nounit) { + +/* + A is non-unit triangular. + + Compute GROW = 1/G(j) and XBND = 1/M(j). + Initially, G(0) = max{x(i), i=1,...,n}. +*/ + + grow = .5 / max(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L60; + } + + i__3 = j + j * a_dim1; + tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs( + d__2)); + + if (tjj >= smlnum) { + +/* + M(j) = G(j-1) / abs(A(j,j)) + + Computing MIN +*/ + d__1 = xbnd, d__2 = min(1.,tjj) * grow; + xbnd = min(d__1,d__2); + } else { + +/* M(j) could overflow, set XBND to 0. */ + + xbnd = 0.; + } + + if (tjj + cnorm[j] >= smlnum) { + +/* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ + + grow *= tjj / (tjj + cnorm[j]); + } else { + +/* G(j) could overflow, set GROW to 0. */ + + grow = 0.; + } +/* L40: */ + } + grow = xbnd; + } else { + +/* + A is unit triangular. + + Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. + + Computing MIN +*/ + d__1 = 1., d__2 = .5 / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L60; + } + +/* G(j) = G(j-1)*( 1 + CNORM(j) ) */ + + grow *= 1. / (cnorm[j] + 1.); +/* L50: */ + } + } +L60: + + ; + } else { + +/* Compute the growth in A**T * x = b or A**H * x = b. */ + + if (upper) { + jfirst = 1; + jlast = *n; + jinc = 1; + } else { + jfirst = *n; + jlast = 1; + jinc = -1; + } + + if (tscal != 1.) { + grow = 0.; + goto L90; + } + + if (nounit) { + +/* + A is non-unit triangular. + + Compute GROW = 1/G(j) and XBND = 1/M(j). + Initially, M(0) = max{x(i), i=1,...,n}. +*/ + + grow = .5 / max(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L90; + } + +/* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ + + xj = cnorm[j] + 1.; +/* Computing MIN */ + d__1 = grow, d__2 = xbnd / xj; + grow = min(d__1,d__2); + + i__3 = j + j * a_dim1; + tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs( + d__2)); + + if (tjj >= smlnum) { + +/* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ + + if (xj > tjj) { + xbnd *= tjj / xj; + } + } else { + +/* M(j) could overflow, set XBND to 0. */ + + xbnd = 0.; + } +/* L70: */ + } + grow = min(grow,xbnd); + } else { + +/* + A is unit triangular. + + Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. + + Computing MIN +*/ + d__1 = 1., d__2 = .5 / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L90; + } + +/* G(j) = ( 1 + CNORM(j) )*G(j-1) */ + + xj = cnorm[j] + 1.; + grow /= xj; +/* L80: */ + } + } +L90: + ; + } + + if (grow * tscal > smlnum) { + +/* + Use the Level 2 BLAS solve if the reciprocal of the bound on + elements of X is not too small. +*/ + + ztrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1); + } else { + +/* Use a Level 1 BLAS solve, scaling intermediate results. */ + + if (xmax > bignum * .5) { + +/* + Scale X so that its components are less than or equal to + BIGNUM in absolute value. +*/ + + *scale = bignum * .5 / xmax; + zdscal_(n, scale, &x[1], &c__1); + xmax = bignum; + } else { + xmax *= 2.; + } + + if (notran) { + +/* Solve A * x = b */ + + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ + + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]), + abs(d__2)); + if (nounit) { + i__3 = j + j * a_dim1; + z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3].i; + tjjs.r = z__1.r, tjjs.i = z__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.; + if (tscal == 1.) { + goto L110; + } + } + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs( + d__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale x by 1/b(j). */ + + rec = 1. / xj; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) + , abs(d__2)); + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* + Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM + to avoid overflow when dividing by A(j,j). +*/ + + rec = tjj * bignum / xj; + if (cnorm[j] > 1.) { + +/* + Scale by 1/CNORM(j) to avoid overflow when + multiplying x(j) times column j. +*/ + + rec /= cnorm[j]; + } + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) + , abs(d__2)); + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0, and compute a solution to A*x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0., x[i__4].i = 0.; +/* L100: */ + } + i__3 = j; + x[i__3].r = 1., x[i__3].i = 0.; + xj = 1.; + *scale = 0.; + xmax = 0.; + } +L110: + +/* + Scale x if necessary to avoid overflow when adding a + multiple of column j of A. +*/ + + if (xj > 1.) { + rec = 1. / xj; + if (cnorm[j] > (bignum - xmax) * rec) { + +/* Scale x by 1/(2*abs(x(j))). */ + + rec *= .5; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + } + } else if (xj * cnorm[j] > bignum - xmax) { + +/* Scale x by 1/2. */ + + zdscal_(n, &c_b2435, &x[1], &c__1); + *scale *= .5; + } + + if (upper) { + if (j > 1) { + +/* + Compute the update + x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +*/ + + i__3 = j - 1; + i__4 = j; + z__2.r = -x[i__4].r, z__2.i = -x[i__4].i; + z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; + zaxpy_(&i__3, &z__1, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + i__3 = j - 1; + i__ = izamax_(&i__3, &x[1], &c__1); + i__3 = i__; + xmax = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( + &x[i__]), abs(d__2)); + } + } else { + if (j < *n) { + +/* + Compute the update + x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +*/ + + i__3 = *n - j; + i__4 = j; + z__2.r = -x[i__4].r, z__2.i = -x[i__4].i; + z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; + zaxpy_(&i__3, &z__1, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + i__3 = *n - j; + i__ = j + izamax_(&i__3, &x[j + 1], &c__1); + i__3 = i__; + xmax = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( + &x[i__]), abs(d__2)); + } + } +/* L120: */ + } + + } else if (lsame_(trans, "T")) { + +/* Solve A**T * x = b */ + + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Compute x(j) = b(j) - sum A(k,j)*x(k). + k<>j +*/ + + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]), + abs(d__2)); + uscal.r = tscal, uscal.i = 0.; + rec = 1. / max(xmax,1.); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5; + if (nounit) { + i__3 = j + j * a_dim1; + z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3] + .i; + tjjs.r = z__1.r, tjjs.i = z__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.; + } + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), + abs(d__2)); + if (tjj > 1.) { + +/* + Divide by A(j,j) when scaling x if A(j,j) > 1. + + Computing MIN +*/ + d__1 = 1., d__2 = rec * tjj; + rec = min(d__1,d__2); + zladiv_(&z__1, &uscal, &tjjs); + uscal.r = z__1.r, uscal.i = z__1.i; + } + if (rec < 1.) { + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + csumj.r = 0., csumj.i = 0.; + if (uscal.r == 1. && uscal.i == 0.) { + +/* + If the scaling needed for A in the dot product is 1, + call ZDOTU to perform the dot product. +*/ + + if (upper) { + i__3 = j - 1; + zdotu_(&z__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + csumj.r = z__1.r, csumj.i = z__1.i; + } else if (j < *n) { + i__3 = *n - j; + zdotu_(&z__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + csumj.r = z__1.r, csumj.i = z__1.i; + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * a_dim1; + z__3.r = a[i__4].r * uscal.r - a[i__4].i * + uscal.i, z__3.i = a[i__4].r * uscal.i + a[ + i__4].i * uscal.r; + i__5 = i__; + z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i, + z__2.i = z__3.r * x[i__5].i + z__3.i * x[ + i__5].r; + z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + + z__2.i; + csumj.r = z__1.r, csumj.i = z__1.i; +/* L130: */ + } + } else if (j < *n) { + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * a_dim1; + z__3.r = a[i__4].r * uscal.r - a[i__4].i * + uscal.i, z__3.i = a[i__4].r * uscal.i + a[ + i__4].i * uscal.r; + i__5 = i__; + z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i, + z__2.i = z__3.r * x[i__5].i + z__3.i * x[ + i__5].r; + z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + + z__2.i; + csumj.r = z__1.r, csumj.i = z__1.i; +/* L140: */ + } + } + } + + z__1.r = tscal, z__1.i = 0.; + if (uscal.r == z__1.r && uscal.i == z__1.i) { + +/* + Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) + was not used to scale the dotproduct. +*/ + + i__3 = j; + i__4 = j; + z__1.r = x[i__4].r - csumj.r, z__1.i = x[i__4].i - + csumj.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) + , abs(d__2)); + if (nounit) { + i__3 = j + j * a_dim1; + z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3] + .i; + tjjs.r = z__1.r, tjjs.i = z__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.; + if (tscal == 1.) { + goto L160; + } + } + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), + abs(d__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1. / xj; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0 and compute a solution to A**T *x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0., x[i__4].i = 0.; +/* L150: */ + } + i__3 = j; + x[i__3].r = 1., x[i__3].i = 0.; + *scale = 0.; + xmax = 0.; + } +L160: + ; + } else { + +/* + Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot + product has already been divided by 1/A(j,j). +*/ + + i__3 = j; + zladiv_(&z__2, &x[j], &tjjs); + z__1.r = z__2.r - csumj.r, z__1.i = z__2.i - csumj.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } +/* Computing MAX */ + i__3 = j; + d__3 = xmax, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = + d_imag(&x[j]), abs(d__2)); + xmax = max(d__3,d__4); +/* L170: */ + } + + } else { + +/* Solve A**H * x = b */ + + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Compute x(j) = b(j) - sum A(k,j)*x(k). + k<>j +*/ + + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]), + abs(d__2)); + uscal.r = tscal, uscal.i = 0.; + rec = 1. / max(xmax,1.); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5; + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; + tjjs.r = z__1.r, tjjs.i = z__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.; + } + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), + abs(d__2)); + if (tjj > 1.) { + +/* + Divide by A(j,j) when scaling x if A(j,j) > 1. + + Computing MIN +*/ + d__1 = 1., d__2 = rec * tjj; + rec = min(d__1,d__2); + zladiv_(&z__1, &uscal, &tjjs); + uscal.r = z__1.r, uscal.i = z__1.i; + } + if (rec < 1.) { + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + csumj.r = 0., csumj.i = 0.; + if (uscal.r == 1. && uscal.i == 0.) { + +/* + If the scaling needed for A in the dot product is 1, + call ZDOTC to perform the dot product. +*/ + + if (upper) { + i__3 = j - 1; + zdotc_(&z__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + csumj.r = z__1.r, csumj.i = z__1.i; + } else if (j < *n) { + i__3 = *n - j; + zdotc_(&z__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + csumj.r = z__1.r, csumj.i = z__1.i; + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + d_cnjg(&z__4, &a[i__ + j * a_dim1]); + z__3.r = z__4.r * uscal.r - z__4.i * uscal.i, + z__3.i = z__4.r * uscal.i + z__4.i * + uscal.r; + i__4 = i__; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, + z__2.i = z__3.r * x[i__4].i + z__3.i * x[ + i__4].r; + z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + + z__2.i; + csumj.r = z__1.r, csumj.i = z__1.i; +/* L180: */ + } + } else if (j < *n) { + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + d_cnjg(&z__4, &a[i__ + j * a_dim1]); + z__3.r = z__4.r * uscal.r - z__4.i * uscal.i, + z__3.i = z__4.r * uscal.i + z__4.i * + uscal.r; + i__4 = i__; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, + z__2.i = z__3.r * x[i__4].i + z__3.i * x[ + i__4].r; + z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + + z__2.i; + csumj.r = z__1.r, csumj.i = z__1.i; +/* L190: */ + } + } + } + + z__1.r = tscal, z__1.i = 0.; + if (uscal.r == z__1.r && uscal.i == z__1.i) { + +/* + Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) + was not used to scale the dotproduct. +*/ + + i__3 = j; + i__4 = j; + z__1.r = x[i__4].r - csumj.r, z__1.i = x[i__4].i - + csumj.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) + , abs(d__2)); + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; + tjjs.r = z__1.r, tjjs.i = z__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.; + if (tscal == 1.) { + goto L210; + } + } + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), + abs(d__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1. / xj; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0 and compute a solution to A**H *x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0., x[i__4].i = 0.; +/* L200: */ + } + i__3 = j; + x[i__3].r = 1., x[i__3].i = 0.; + *scale = 0.; + xmax = 0.; + } +L210: + ; + } else { + +/* + Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot + product has already been divided by 1/A(j,j). +*/ + + i__3 = j; + zladiv_(&z__2, &x[j], &tjjs); + z__1.r = z__2.r - csumj.r, z__1.i = z__2.i - csumj.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } +/* Computing MAX */ + i__3 = j; + d__3 = xmax, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = + d_imag(&x[j]), abs(d__2)); + xmax = max(d__3,d__4); +/* L220: */ + } + } + *scale /= tscal; + } + +/* Scale the column norms by 1/TSCAL for return. */ + + if (tscal != 1.) { + d__1 = 1. / tscal; + dscal_(n, &d__1, &cnorm[1], &c__1); + } + + return 0; + +/* End of ZLATRS */ + +} /* zlatrs_ */ + +/* Subroutine */ int zlauu2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + static integer i__; + static doublereal aii; + extern logical lsame_(char *, char *); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( + integer *, doublereal *, doublecomplex *, integer *), zlacgv_( + integer *, doublecomplex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLAUU2 computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the unblocked form of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLAUU2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + zdotc_(&z__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[ + i__ + (i__ + 1) * a_dim1], lda); + d__1 = aii * aii + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = aii, z__1.i = 0.; + zgemv_("No transpose", &i__2, &i__3, &c_b57, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + z__1, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + } else { + zdscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } +/* L10: */ + } + + } else { + +/* Compute the product L' * L. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + zdotc_(&z__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + d__1 = aii * aii + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = aii, z__1.i = 0.; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + z__1, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + } else { + zdscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } +/* L20: */ + } + } + + return 0; + +/* End of ZLAUU2 */ + +} /* zlauu2_ */ + +/* Subroutine */ int zlauum_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, ib, nb; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zherk_(char *, char *, integer *, + integer *, doublereal *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *); + static logical upper; + extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + zlauu2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZLAUUM computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the blocked form of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLAUUM", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "ZLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + zlauu2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + ztrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", & + i__3, &ib, &c_b57, &a[i__ + i__ * a_dim1], lda, &a[ + i__ * a_dim1 + 1], lda); + zlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + zgemm_("No transpose", "Conjugate transpose", &i__3, &ib, + &i__4, &c_b57, &a[(i__ + ib) * a_dim1 + 1], lda, & + a[i__ + (i__ + ib) * a_dim1], lda, &c_b57, &a[i__ + * a_dim1 + 1], lda); + i__3 = *n - i__ - ib + 1; + zherk_("Upper", "No transpose", &ib, &i__3, &c_b1034, &a[ + i__ + (i__ + ib) * a_dim1], lda, &c_b1034, &a[i__ + + i__ * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the product L' * L. */ + + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + ztrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", & + ib, &i__3, &c_b57, &a[i__ + i__ * a_dim1], lda, &a[ + i__ + a_dim1], lda); + zlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + zgemm_("Conjugate transpose", "No transpose", &ib, &i__3, + &i__4, &c_b57, &a[i__ + ib + i__ * a_dim1], lda, & + a[i__ + ib + a_dim1], lda, &c_b57, &a[i__ + + a_dim1], lda); + i__3 = *n - i__ - ib + 1; + zherk_("Lower", "Conjugate transpose", &ib, &i__3, & + c_b1034, &a[i__ + ib + i__ * a_dim1], lda, & + c_b1034, &a[i__ + i__ * a_dim1], lda); + } +/* L20: */ + } + } + } + + return 0; + +/* End of ZLAUUM */ + +} /* zlauum_ */ + +/* Subroutine */ int zpotf2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer j; + static doublereal ajj; + extern logical lsame_(char *, char *); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + static logical upper; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( + integer *, doublereal *, doublecomplex *, integer *), zlacgv_( + integer *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZPOTF2 computes the Cholesky factorization of a complex Hermitian + positive definite matrix A. + + The factorization has the form + A = U' * U , if UPLO = 'U', or + A = L * L', if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the unblocked version of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored. + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n by n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U'*U or A = L*L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, the leading minor of order k is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZPOTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + i__3 = j - 1; + zdotc_(&z__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1] + , &c__1); + z__1.r = d__1 - z__2.r, z__1.i = -z__2.i; + ajj = z__1.r; + if (ajj <= 0. || disnan_(&ajj)) { + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.; + goto L30; + } + ajj = sqrt(ajj); + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + i__3 = *n - j; + z__1.r = -1., z__1.i = -0.; + zgemv_("Transpose", &i__2, &i__3, &z__1, &a[(j + 1) * a_dim1 + + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b57, &a[j + ( + j + 1) * a_dim1], lda); + i__2 = j - 1; + zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); + i__2 = *n - j; + d__1 = 1. / ajj; + zdscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + i__3 = j - 1; + zdotc_(&z__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda); + z__1.r = d__1 - z__2.r, z__1.i = -z__2.i; + ajj = z__1.r; + if (ajj <= 0. || disnan_(&ajj)) { + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.; + goto L30; + } + ajj = sqrt(ajj); + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = j - 1; + zlacgv_(&i__2, &a[j + a_dim1], lda); + i__2 = *n - j; + i__3 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[j + 1 + a_dim1] + , lda, &a[j + a_dim1], lda, &c_b57, &a[j + 1 + j * + a_dim1], &c__1); + i__2 = j - 1; + zlacgv_(&i__2, &a[j + a_dim1], lda); + i__2 = *n - j; + d__1 = 1. / ajj; + zdscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of ZPOTF2 */ + +} /* zpotf2_ */ + +/* Subroutine */ int zpotrf_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + + /* Local variables */ + static integer j, jb, nb; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zherk_(char *, char *, integer *, + integer *, doublereal *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *); + static logical upper; + extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + zpotf2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZPOTRF computes the Cholesky factorization of a complex Hermitian + positive definite matrix A. + + The factorization has the form + A = U**H * U, if UPLO = 'U', or + A = L * L**H, if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the block version of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**H*U or A = L*L**H. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + zpotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + zherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1276, & + a[j * a_dim1 + 1], lda, &c_b1034, &a[j + j * a_dim1], + lda); + zpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("Conjugate transpose", "No transpose", &jb, &i__3, + &i__4, &z__1, &a[j * a_dim1 + 1], lda, &a[(j + jb) + * a_dim1 + 1], lda, &c_b57, &a[j + (j + jb) * + a_dim1], lda); + i__3 = *n - j - jb + 1; + ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", + &jb, &i__3, &c_b57, &a[j + j * a_dim1], lda, &a[ + j + (j + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + zherk_("Lower", "No transpose", &jb, &i__3, &c_b1276, &a[j + + a_dim1], lda, &c_b1034, &a[j + j * a_dim1], lda); + zpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", &i__3, &jb, + &i__4, &z__1, &a[j + jb + a_dim1], lda, &a[j + + a_dim1], lda, &c_b57, &a[j + jb + j * a_dim1], + lda); + i__3 = *n - j - jb + 1; + ztrsm_("Right", "Lower", "Conjugate transpose", "Non-unit" + , &i__3, &jb, &c_b57, &a[j + j * a_dim1], lda, &a[ + j + jb + j * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of ZPOTRF */ + +} /* zpotrf_ */ + +/* Subroutine */ int zpotri_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *), zlauum_( + char *, integer *, doublecomplex *, integer *, integer *), + ztrtri_(char *, char *, integer *, doublecomplex *, integer *, + integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZPOTRI computes the inverse of a complex Hermitian positive definite + matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + computed by ZPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the triangular factor U or L from the Cholesky + factorization A = U**H*U or A = L*L**H, as computed by + ZPOTRF. + On exit, the upper or lower triangle of the (Hermitian) + inverse of A, overwriting the input factor U or L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the (i,i) element of the factor U or L is + zero, and the inverse could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZPOTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + ztrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + +/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ + + zlauum_(uplo, n, &a[a_offset], lda, info); + + return 0; + +/* End of ZPOTRI */ + +} /* zpotri_ */ + +/* Subroutine */ int zpotrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZPOTRS solves a system of linear equations A*X = B with a Hermitian + positive definite matrix A using the Cholesky factorization + A = U**H*U or A = L*L**H computed by ZPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The triangular factor U or L from the Cholesky factorization + A = U**H*U or A = L*L**H, as computed by ZPOTRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZPOTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* + Solve A*X = B where A = U'*U. + + Solve U'*X = B, overwriting B with X. +*/ + + ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, & + c_b57, &a[a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b57, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A*X = B where A = L*L'. + + Solve L*X = B, overwriting B with X. +*/ + + ztrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b57, & + a[a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + ztrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, & + c_b57, &a[a_offset], lda, &b[b_offset], ldb); + } + + return 0; + +/* End of ZPOTRS */ + +} /* zpotrs_ */ + +/* Subroutine */ int zrot_(integer *n, doublecomplex *cx, integer *incx, + doublecomplex *cy, integer *incy, doublereal *c__, doublecomplex *s) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + static integer i__, ix, iy; + static doublecomplex stemp; + + +/* + -- LAPACK auxiliary routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZROT applies a plane rotation, where the cos (C) is real and the + sin (S) is complex, and the vectors CX and CY are complex. + + Arguments + ========= + + N (input) INTEGER + The number of elements in the vectors CX and CY. + + CX (input/output) COMPLEX*16 array, dimension (N) + On input, the vector X. + On output, CX is overwritten with C*X + S*Y. + + INCX (input) INTEGER + The increment between successive values of CY. INCX <> 0. + + CY (input/output) COMPLEX*16 array, dimension (N) + On input, the vector Y. + On output, CY is overwritten with -CONJG(S)*X + C*Y. + + INCY (input) INTEGER + The increment between successive values of CY. INCX <> 0. + + C (input) DOUBLE PRECISION + S (input) COMPLEX*16 + C and S define a rotation + [ C S ] + [ -conjg(S) C ] + where C*C + S*CONJG(S) = 1.0. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* Code for unequal increments or equal increments not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i; + i__3 = iy; + z__3.r = s->r * cy[i__3].r - s->i * cy[i__3].i, z__3.i = s->r * cy[ + i__3].i + s->i * cy[i__3].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + stemp.r = z__1.r, stemp.i = z__1.i; + i__2 = iy; + i__3 = iy; + z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i; + d_cnjg(&z__4, s); + i__4 = ix; + z__3.r = z__4.r * cx[i__4].r - z__4.i * cx[i__4].i, z__3.i = z__4.r * + cx[i__4].i + z__4.i * cx[i__4].r; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + cy[i__2].r = z__1.r, cy[i__2].i = z__1.i; + i__2 = ix; + cx[i__2].r = stemp.r, cx[i__2].i = stemp.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* Code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i; + i__3 = i__; + z__3.r = s->r * cy[i__3].r - s->i * cy[i__3].i, z__3.i = s->r * cy[ + i__3].i + s->i * cy[i__3].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + stemp.r = z__1.r, stemp.i = z__1.i; + i__2 = i__; + i__3 = i__; + z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i; + d_cnjg(&z__4, s); + i__4 = i__; + z__3.r = z__4.r * cx[i__4].r - z__4.i * cx[i__4].i, z__3.i = z__4.r * + cx[i__4].i + z__4.i * cx[i__4].r; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + cy[i__2].r = z__1.r, cy[i__2].i = z__1.i; + i__2 = i__; + cx[i__2].r = stemp.r, cx[i__2].i = stemp.i; +/* L30: */ + } + return 0; +} /* zrot_ */ + +/* Subroutine */ int zstedc_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__, j, k, m; + static doublereal p; + static integer ii, ll, lgn; + static doublereal eps, tiny; + extern logical lsame_(char *, char *); + static integer lwmin, start; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlaed0_(integer *, integer *, + doublereal *, doublereal *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, integer *, integer *); + + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dstedc_(char *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *), dlaset_( + char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer finish; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlacrm_(integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, doublecomplex *, integer *, + doublereal *); + static integer liwmin, icompz; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + static doublereal orgnrm; + static integer lrwmin; + static logical lquery; + static integer smlsiz; + extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + The eigenvectors of a full or band complex Hermitian matrix can also + be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this + matrix to tridiagonal form. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. See DLAED3 for details. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'I': Compute eigenvectors of tridiagonal matrix also. + = 'V': Compute eigenvectors of original Hermitian matrix + also. On entry, Z contains the unitary matrix used + to reduce the original matrix to tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the subdiagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Z (input/output) COMPLEX*16 array, dimension (LDZ,N) + On entry, if COMPZ = 'V', then Z contains the unitary + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original Hermitian matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1. + If eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1. + If COMPZ = 'V' and N > 1, LWORK must be at least N*N. + Note that for COMPZ = 'V', then if N is less than or + equal to the minimum divide size, usually 25, then LWORK need + only be 1. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal sizes of the WORK, RWORK and + IWORK arrays, returns these values as the first entries of + the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + RWORK (workspace/output) DOUBLE PRECISION array, + dimension (LRWORK) + On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. + + LRWORK (input) INTEGER + The dimension of the array RWORK. + If COMPZ = 'N' or N <= 1, LRWORK must be at least 1. + If COMPZ = 'V' and N > 1, LRWORK must be at least + 1 + 3*N + 2*N*lg N + 3*N**2 , + where lg( N ) = smallest integer k such + that 2**k >= N. + If COMPZ = 'I' and N > 1, LRWORK must be at least + 1 + 4*N + 2*N**2 . + Note that for COMPZ = 'I' or 'V', then if N is less than or + equal to the minimum divide size, usually 25, then LRWORK + need only be max(1,2*(N-1)). + + If LRWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal sizes of the WORK, RWORK + and IWORK arrays, returns these values as the first entries + of the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If COMPZ = 'N' or N <= 1, LIWORK must be at least 1. + If COMPZ = 'V' or N > 1, LIWORK must be at least + 6 + 6*N + 5*N*lg N. + If COMPZ = 'I' or N > 1, LIWORK must be at least + 3 + 5*N . + Note that for COMPZ = 'I' or 'V', then if N is less than or + equal to the minimum divide size, usually 25, then LIWORK + need only be 1. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal sizes of the WORK, RWORK + and IWORK arrays, returns these values as the first entries + of the WORK, RWORK and IWORK arrays, and no error message + related to LWORK or LRWORK or LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + + smlsiz = ilaenv_(&c__9, "ZSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + if (*n <= 1 || icompz == 0) { + lwmin = 1; + liwmin = 1; + lrwmin = 1; + } else if (*n <= smlsiz) { + lwmin = 1; + liwmin = 1; + lrwmin = *n - 1 << 1; + } else if (icompz == 1) { + lgn = (integer) (log((doublereal) (*n)) / log(2.)); + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + lwmin = *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; + liwmin = *n * 6 + 6 + *n * 5 * lgn; + } else if (icompz == 2) { + lwmin = 1; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZSTEDC", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*n == 1) { + if (icompz != 0) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + +/* + If the following conditional clause is removed, then the routine + will use the Divide and Conquer routine to compute only the + eigenvalues, which requires (3N + 3N**2) real workspace and + (2 + 5N + 2N lg(N)) integer workspace. + Since on many architectures DSTERF is much faster than any other + algorithm for finding eigenvalues only, it is used here + as the default. If the conditional clause is removed, then + information on the size of workspace needs to be changed. + + If COMPZ = 'N', use DSTERF to compute the eigenvalues. +*/ + + if (icompz == 0) { + dsterf_(n, &d__[1], &e[1], info); + goto L70; + } + +/* + If N is smaller than the minimum divide size (SMLSIZ+1), then + solve the problem with another solver. +*/ + + if (*n <= smlsiz) { + + zsteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1], + info); + + } else { + +/* If COMPZ = 'I', we simply call DSTEDC instead. */ + + if (icompz == 2) { + dlaset_("Full", n, n, &c_b328, &c_b1034, &rwork[1], n); + ll = *n * *n + 1; + i__1 = *lrwork - ll + 1; + dstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, & + iwork[1], liwork, info); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * z_dim1; + i__4 = (j - 1) * *n + i__; + z__[i__3].r = rwork[i__4], z__[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + goto L70; + } + +/* + From now on, only option left to be handled is COMPZ = 'V', + i.e. ICOMPZ = 1. + + Scale. +*/ + + orgnrm = dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + goto L70; + } + + eps = EPSILON; + + start = 1; + +/* while ( START <= N ) */ + +L30: + if (start <= *n) { + +/* + Let FINISH be the position of the next subdiagonal entry + such that E( FINISH ) <= TINY or FINISH = N if no such + subdiagonal exists. The matrix identified by the elements + between START and FINISH constitutes an independent + sub-problem. +*/ + + finish = start; +L40: + if (finish < *n) { + tiny = eps * sqrt((d__1 = d__[finish], abs(d__1))) * sqrt(( + d__2 = d__[finish + 1], abs(d__2))); + if ((d__1 = e[finish], abs(d__1)) > tiny) { + ++finish; + goto L40; + } + } + +/* (Sub) Problem determined. Compute its size and solve it. */ + + m = finish - start + 1; + if (m > smlsiz) { + +/* Scale. */ + + orgnrm = dlanst_("M", &m, &d__[start], &e[start]); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, &m, &c__1, &d__[ + start], &m, info); + i__1 = m - 1; + i__2 = m - 1; + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, &i__1, &c__1, & + e[start], &i__2, info); + + zlaed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + + 1], ldz, &work[1], n, &rwork[1], &iwork[1], info); + if (*info > 0) { + *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % + (m + 1) + start - 1; + goto L70; + } + +/* Scale back. */ + + dlascl_("G", &c__0, &c__0, &c_b1034, &orgnrm, &m, &c__1, &d__[ + start], &m, info); + + } else { + dsteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, & + rwork[m * m + 1], info); + zlacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, & + work[1], n, &rwork[m * m + 1]); + zlacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], + ldz); + if (*info > 0) { + *info = start * (*n + 1) + finish; + goto L70; + } + } + + start = finish + 1; + goto L30; + } + +/* + endwhile + + If the problem split any number of times, then the eigenvalues + will not be properly ordered. Here we permute the eigenvalues + (and the associated eigenvectors) into ascending order. +*/ + + if (m != *n) { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L50: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + + 1], &c__1); + } +/* L60: */ + } + } + } + +L70: + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + return 0; + +/* End of ZSTEDC */ + +} /* zstedc_ */ + +/* Subroutine */ int zsteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal b, c__, f, g; + static integer i__, j, k, l, m; + static doublereal p, r__, s; + static integer l1, ii, mm, lm1, mm1, nm1; + static doublereal rt1, rt2, eps; + static integer lsv; + static doublereal tst, eps2; + static integer lend, jtot; + extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + extern logical lsame_(char *, char *); + static doublereal anorm; + extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *), dlaev2_(doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *); + static integer lendm1, lendp1; + + static integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + static doublereal safmin; + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + static doublereal safmax; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *); + static integer lendsv; + static doublereal ssfmin; + static integer nmaxit, icompz; + static doublereal ssfmax; + extern /* Subroutine */ int zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the implicit QL or QR method. + The eigenvectors of a full or band complex Hermitian matrix can also + be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this + matrix to tridiagonal form. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors of the original + Hermitian matrix. On entry, Z must contain the + unitary matrix used to reduce the original matrix + to tridiagonal form. + = 'I': Compute eigenvalues and eigenvectors of the + tridiagonal matrix. Z is initialized to the identity + matrix. + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + Z (input/output) COMPLEX*16 array, dimension (LDZ, N) + On entry, if COMPZ = 'V', then Z contains the unitary + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original Hermitian matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1, and if + eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) + If COMPZ = 'N', then WORK is not referenced. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm has failed to find all the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero; on exit, D + and E contain the elements of a symmetric tridiagonal + matrix which is unitarily similar to the original + matrix. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZSTEQR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz == 2) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + +/* Determine the unit roundoff and over/underflow thresholds. */ + + eps = EPSILON; +/* Computing 2nd power */ + d__1 = eps; + eps2 = d__1 * d__1; + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + ssfmax = sqrt(safmax) / 3.; + ssfmin = sqrt(safmin) / eps2; + +/* + Compute the eigenvalues and eigenvectors of the tridiagonal + matrix. +*/ + + if (icompz == 2) { + zlaset_("Full", n, n, &c_b56, &c_b57, &z__[z_offset], ldz); + } + + nmaxit = *n * 30; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + nm1 = *n - 1; + +L10: + if (l1 > *n) { + goto L160; + } + if (l1 > 1) { + e[l1 - 1] = 0.; + } + if (l1 <= nm1) { + i__1 = nm1; + for (m = l1; m <= i__1; ++m) { + tst = (d__1 = e[m], abs(d__1)); + if (tst == 0.) { + goto L30; + } + if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m + + 1], abs(d__2))) * eps) { + e[m] = 0.; + goto L30; + } +/* L20: */ + } + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = dlanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm == 0.) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + +/* Choose between QL and QR iteration */ + + if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { + lend = lsv; + l = lendsv; + } + + if (lend > l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L40: + if (l != lend) { + lendm1 = lend - 1; + i__1 = lendm1; + for (m = l; m <= i__1; ++m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + + 1], abs(d__2)) + safmin) { + goto L60; + } +/* L50: */ + } + } + + m = lend; + +L60: + if (m < lend) { + e[m] = 0.; + } + p = d__[l]; + if (m == l) { + goto L80; + } + +/* + If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l + 1) { + if (icompz > 0) { + dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); + work[l] = c__; + work[*n - 1 + l] = s; + zlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & + z__[l * z_dim1 + 1], ldz); + } else { + dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); + } + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.; + l += 2; + if (l <= lend) { + goto L40; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l + 1] - p) / (e[l] * 2.); + r__ = dlapy2_(&g, &c_b1034); + g = d__[m] - p + e[l] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + mm1 = m - 1; + i__1 = l; + for (i__ = mm1; i__ >= i__1; --i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m - 1) { + e[i__ + 1] = r__; + } + g = d__[i__ + 1] - p; + r__ = (d__[i__] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__ + 1] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = -s; + } + +/* L70: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = m - l + 1; + zlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[l] = g; + goto L40; + +/* Eigenvalue found. */ + +L80: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L40; + } + goto L140; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L90: + if (l != lend) { + lendp1 = lend + 1; + i__1 = lendp1; + for (m = l; m >= i__1; --m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m - 1], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + - 1], abs(d__2)) + safmin) { + goto L110; + } +/* L100: */ + } + } + + m = lend; + +L110: + if (m > lend) { + e[m - 1] = 0.; + } + p = d__[l]; + if (m == l) { + goto L130; + } + +/* + If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l - 1) { + if (icompz > 0) { + dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) + ; + work[m] = c__; + work[*n - 1 + m] = s; + zlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & + z__[(l - 1) * z_dim1 + 1], ldz); + } else { + dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); + } + d__[l - 1] = rt1; + d__[l] = rt2; + e[l - 1] = 0.; + l += -2; + if (l >= lend) { + goto L90; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l - 1] - p) / (e[l - 1] * 2.); + r__ = dlapy2_(&g, &c_b1034); + g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + lm1 = l - 1; + i__1 = lm1; + for (i__ = m; i__ <= i__1; ++i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m) { + e[i__ - 1] = r__; + } + g = d__[i__] - p; + r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = s; + } + +/* L120: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = l - m + 1; + zlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[lm1] = g; + goto L90; + +/* Eigenvalue found. */ + +L130: + d__[l] = p; + + --l; + if (l >= lend) { + goto L90; + } + goto L140; + + } + +/* Undo scaling if necessary */ + +L140: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } else if (iscale == 2) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot == nmaxit) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L150: */ + } + return 0; + } + goto L10; + +/* Order eigenvalues and eigenvectors. */ + +L160: + if (icompz == 0) { + +/* Use Quick Sort */ + + dlasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L170: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L180: */ + } + } + return 0; + +/* End of ZSTEQR */ + +} /* zsteqr_ */ + +/* Subroutine */ int ztrevc_(char *side, char *howmny, logical *select, + integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, + integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer + *m, doublecomplex *work, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, j, k, ii, ki, is; + static doublereal ulp; + static logical allv; + static doublereal unfl, ovfl, smin; + static logical over; + static doublereal scale; + extern logical lsame_(char *, char *); + static doublereal remax; + static logical leftv, bothv; + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + static logical somev; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); + + extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + extern integer izamax_(integer *, doublecomplex *, integer *); + static logical rightv; + extern doublereal dzasum_(integer *, doublecomplex *, integer *); + static doublereal smlnum; + extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublereal *, doublereal *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZTREVC computes some or all of the right and/or left eigenvectors of + a complex upper triangular matrix T. + Matrices of this type are produced by the Schur factorization of + a complex general matrix: A = Q*T*Q**H, as computed by ZHSEQR. + + The right eigenvector x and the left eigenvector y of T corresponding + to an eigenvalue w are defined by: + + T*x = w*x, (y**H)*T = w*(y**H) + + where y**H denotes the conjugate transpose of the vector y. + The eigenvalues are not input to this routine, but are read directly + from the diagonal of T. + + This routine returns the matrices X and/or Y of right and left + eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an + input matrix. If Q is the unitary factor that reduces a matrix A to + Schur form T, then Q*X and Q*Y are the matrices of right and left + eigenvectors of A. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'R': compute right eigenvectors only; + = 'L': compute left eigenvectors only; + = 'B': compute both right and left eigenvectors. + + HOWMNY (input) CHARACTER*1 + = 'A': compute all right and/or left eigenvectors; + = 'B': compute all right and/or left eigenvectors, + backtransformed using the matrices supplied in + VR and/or VL; + = 'S': compute selected right and/or left eigenvectors, + as indicated by the logical array SELECT. + + SELECT (input) LOGICAL array, dimension (N) + If HOWMNY = 'S', SELECT specifies the eigenvectors to be + computed. + The eigenvector corresponding to the j-th eigenvalue is + computed if SELECT(j) = .TRUE.. + Not referenced if HOWMNY = 'A' or 'B'. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input/output) COMPLEX*16 array, dimension (LDT,N) + The upper triangular matrix T. T is modified, but restored + on exit. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + VL (input/output) COMPLEX*16 array, dimension (LDVL,MM) + On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must + contain an N-by-N matrix Q (usually the unitary matrix Q of + Schur vectors returned by ZHSEQR). + On exit, if SIDE = 'L' or 'B', VL contains: + if HOWMNY = 'A', the matrix Y of left eigenvectors of T; + if HOWMNY = 'B', the matrix Q*Y; + if HOWMNY = 'S', the left eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VL, in the same order as their + eigenvalues. + Not referenced if SIDE = 'R'. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1, and if + SIDE = 'L' or 'B', LDVL >= N. + + VR (input/output) COMPLEX*16 array, dimension (LDVR,MM) + On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must + contain an N-by-N matrix Q (usually the unitary matrix Q of + Schur vectors returned by ZHSEQR). + On exit, if SIDE = 'R' or 'B', VR contains: + if HOWMNY = 'A', the matrix X of right eigenvectors of T; + if HOWMNY = 'B', the matrix Q*X; + if HOWMNY = 'S', the right eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VR, in the same order as their + eigenvalues. + Not referenced if SIDE = 'L'. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1, and if + SIDE = 'R' or 'B'; LDVR >= N. + + MM (input) INTEGER + The number of columns in the arrays VL and/or VR. MM >= M. + + M (output) INTEGER + The number of columns in the arrays VL and/or VR actually + used to store the eigenvectors. If HOWMNY = 'A' or 'B', M + is set to N. Each selected eigenvector occupies one + column. + + WORK (workspace) COMPLEX*16 array, dimension (2*N) + + RWORK (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The algorithm used in this program is basically backward (forward) + substitution, with scaling to make the the code robust against + possible overflow. + + Each eigenvector is normalized so that the element of largest + magnitude has magnitude 1; here the magnitude of a complex number + (x,y) is taken to be |x| + |y|. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + bothv = lsame_(side, "B"); + rightv = lsame_(side, "R") || bothv; + leftv = lsame_(side, "L") || bothv; + + allv = lsame_(howmny, "A"); + over = lsame_(howmny, "B"); + somev = lsame_(howmny, "S"); + +/* + Set M to the number of columns required to store the selected + eigenvectors. +*/ + + if (somev) { + *m = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (select[j]) { + ++(*m); + } +/* L10: */ + } + } else { + *m = *n; + } + + *info = 0; + if (! rightv && ! leftv) { + *info = -1; + } else if (! allv && ! over && ! somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1,*n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else if (*mm < *m) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZTREVC", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* Set the constants to control overflow. */ + + unfl = SAFEMINIMUM; + ovfl = 1. / unfl; + dlabad_(&unfl, &ovfl); + ulp = PRECISION; + smlnum = unfl * (*n / ulp); + +/* Store the diagonal elements of T in working array WORK. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + *n; + i__3 = i__ + i__ * t_dim1; + work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i; +/* L20: */ + } + +/* + Compute 1-norm of each column of strictly upper triangular + part of T to control overflow in triangular solver. +*/ + + rwork[1] = 0.; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + rwork[j] = dzasum_(&i__2, &t[j * t_dim1 + 1], &c__1); +/* L30: */ + } + + if (rightv) { + +/* Compute right eigenvectors. */ + + is = *m; + for (ki = *n; ki >= 1; --ki) { + + if (somev) { + if (! select[ki]) { + goto L80; + } + } +/* Computing MAX */ + i__1 = ki + ki * t_dim1; + d__3 = ulp * ((d__1 = t[i__1].r, abs(d__1)) + (d__2 = d_imag(&t[ + ki + ki * t_dim1]), abs(d__2))); + smin = max(d__3,smlnum); + + work[1].r = 1., work[1].i = 0.; + +/* Form right-hand side. */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k; + i__3 = k + ki * t_dim1; + z__1.r = -t[i__3].r, z__1.i = -t[i__3].i; + work[i__2].r = z__1.r, work[i__2].i = z__1.i; +/* L40: */ + } + +/* + Solve the triangular system: + (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. +*/ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * t_dim1; + i__3 = k + k * t_dim1; + i__4 = ki + ki * t_dim1; + z__1.r = t[i__3].r - t[i__4].r, z__1.i = t[i__3].i - t[i__4] + .i; + t[i__2].r = z__1.r, t[i__2].i = z__1.i; + i__2 = k + k * t_dim1; + if ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[k + k * + t_dim1]), abs(d__2)) < smin) { + i__3 = k + k * t_dim1; + t[i__3].r = smin, t[i__3].i = 0.; + } +/* L50: */ + } + + if (ki > 1) { + i__1 = ki - 1; + zlatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[ + t_offset], ldt, &work[1], &scale, &rwork[1], info); + i__1 = ki; + work[i__1].r = scale, work[i__1].i = 0.; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + zcopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1); + + ii = izamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + i__1 = ii + is * vr_dim1; + remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag( + &vr[ii + is * vr_dim1]), abs(d__2))); + zdscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + i__2 = k + is * vr_dim1; + vr[i__2].r = 0., vr[i__2].i = 0.; +/* L60: */ + } + } else { + if (ki > 1) { + i__1 = ki - 1; + z__1.r = scale, z__1.i = 0.; + zgemv_("N", n, &i__1, &c_b57, &vr[vr_offset], ldvr, &work[ + 1], &c__1, &z__1, &vr[ki * vr_dim1 + 1], &c__1); + } + + ii = izamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + i__1 = ii + ki * vr_dim1; + remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag( + &vr[ii + ki * vr_dim1]), abs(d__2))); + zdscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + +/* Set back the original diagonal elements of T. */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * t_dim1; + i__3 = k + *n; + t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i; +/* L70: */ + } + + --is; +L80: + ; + } + } + + if (leftv) { + +/* Compute left eigenvectors. */ + + is = 1; + i__1 = *n; + for (ki = 1; ki <= i__1; ++ki) { + + if (somev) { + if (! select[ki]) { + goto L130; + } + } +/* Computing MAX */ + i__2 = ki + ki * t_dim1; + d__3 = ulp * ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[ + ki + ki * t_dim1]), abs(d__2))); + smin = max(d__3,smlnum); + + i__2 = *n; + work[i__2].r = 1., work[i__2].i = 0.; + +/* Form right-hand side. */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k; + d_cnjg(&z__2, &t[ki + k * t_dim1]); + z__1.r = -z__2.r, z__1.i = -z__2.i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; +/* L90: */ + } + +/* + Solve the triangular system: + (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. +*/ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k + k * t_dim1; + i__4 = k + k * t_dim1; + i__5 = ki + ki * t_dim1; + z__1.r = t[i__4].r - t[i__5].r, z__1.i = t[i__4].i - t[i__5] + .i; + t[i__3].r = z__1.r, t[i__3].i = z__1.i; + i__3 = k + k * t_dim1; + if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[k + k * + t_dim1]), abs(d__2)) < smin) { + i__4 = k + k * t_dim1; + t[i__4].r = smin, t[i__4].i = 0.; + } +/* L100: */ + } + + if (ki < *n) { + i__2 = *n - ki; + zlatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", & + i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki + + 1], &scale, &rwork[1], info); + i__2 = ki; + work[i__2].r = scale, work[i__2].i = 0.; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + zcopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1) + ; + + i__2 = *n - ki + 1; + ii = izamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1; + i__2 = ii + is * vl_dim1; + remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag( + &vl[ii + is * vl_dim1]), abs(d__2))); + i__2 = *n - ki + 1; + zdscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + is * vl_dim1; + vl[i__3].r = 0., vl[i__3].i = 0.; +/* L110: */ + } + } else { + if (ki < *n) { + i__2 = *n - ki; + z__1.r = scale, z__1.i = 0.; + zgemv_("N", n, &i__2, &c_b57, &vl[(ki + 1) * vl_dim1 + 1], + ldvl, &work[ki + 1], &c__1, &z__1, &vl[ki * + vl_dim1 + 1], &c__1); + } + + ii = izamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + i__2 = ii + ki * vl_dim1; + remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag( + &vl[ii + ki * vl_dim1]), abs(d__2))); + zdscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + } + +/* Set back the original diagonal elements of T. */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k + k * t_dim1; + i__4 = k + *n; + t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i; +/* L120: */ + } + + ++is; +L130: + ; + } + } + + return 0; + +/* End of ZTREVC */ + +} /* ztrevc_ */ + +/* Subroutine */ int ztrexc_(char *compq, integer *n, doublecomplex *t, + integer *ldt, doublecomplex *q, integer *ldq, integer *ifst, integer * + ilst, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, t_dim1, t_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer k, m1, m2, m3; + static doublereal cs; + static doublecomplex t11, t22, sn, temp; + extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *); + extern logical lsame_(char *, char *); + static logical wantq; + extern /* Subroutine */ int xerbla_(char *, integer *), zlartg_( + doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, + doublecomplex *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZTREXC reorders the Schur factorization of a complex matrix + A = Q*T*Q**H, so that the diagonal element of T with row index IFST + is moved to row ILST. + + The Schur form T is reordered by a unitary similarity transformation + Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by + postmultplying it with Z. + + Arguments + ========= + + COMPQ (input) CHARACTER*1 + = 'V': update the matrix Q of Schur vectors; + = 'N': do not update Q. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input/output) COMPLEX*16 array, dimension (LDT,N) + On entry, the upper triangular matrix T. + On exit, the reordered upper triangular matrix. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + Q (input/output) COMPLEX*16 array, dimension (LDQ,N) + On entry, if COMPQ = 'V', the matrix Q of Schur vectors. + On exit, if COMPQ = 'V', Q has been postmultiplied by the + unitary transformation matrix Z which reorders T. + If COMPQ = 'N', Q is not referenced. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + IFST (input) INTEGER + ILST (input) INTEGER + Specify the reordering of the diagonal elements of T: + The element with row index IFST is moved to row ILST by a + sequence of transpositions between adjacent elements. + 1 <= IFST <= N; 1 <= ILST <= N. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Decode and test the input parameters. +*/ + + /* Parameter adjustments */ + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + + /* Function Body */ + *info = 0; + wantq = lsame_(compq, "V"); + if (! lsame_(compq, "N") && ! wantq) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldt < max(1,*n)) { + *info = -4; + } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) { + *info = -6; + } else if (*ifst < 1 || *ifst > *n) { + *info = -7; + } else if (*ilst < 1 || *ilst > *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZTREXC", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 1 || *ifst == *ilst) { + return 0; + } + + if (*ifst < *ilst) { + +/* Move the IFST-th diagonal element forward down the diagonal. */ + + m1 = 0; + m2 = -1; + m3 = 1; + } else { + +/* Move the IFST-th diagonal element backward up the diagonal. */ + + m1 = -1; + m2 = 0; + m3 = -1; + } + + i__1 = *ilst + m2; + i__2 = m3; + for (k = *ifst + m1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { + +/* Interchange the k-th and (k+1)-th diagonal elements. */ + + i__3 = k + k * t_dim1; + t11.r = t[i__3].r, t11.i = t[i__3].i; + i__3 = k + 1 + (k + 1) * t_dim1; + t22.r = t[i__3].r, t22.i = t[i__3].i; + +/* Determine the transformation to perform the interchange. */ + + z__1.r = t22.r - t11.r, z__1.i = t22.i - t11.i; + zlartg_(&t[k + (k + 1) * t_dim1], &z__1, &cs, &sn, &temp); + +/* Apply transformation to the matrix T. */ + + if (k + 2 <= *n) { + i__3 = *n - k - 1; + zrot_(&i__3, &t[k + (k + 2) * t_dim1], ldt, &t[k + 1 + (k + 2) * + t_dim1], ldt, &cs, &sn); + } + i__3 = k - 1; + d_cnjg(&z__1, &sn); + zrot_(&i__3, &t[k * t_dim1 + 1], &c__1, &t[(k + 1) * t_dim1 + 1], & + c__1, &cs, &z__1); + + i__3 = k + k * t_dim1; + t[i__3].r = t22.r, t[i__3].i = t22.i; + i__3 = k + 1 + (k + 1) * t_dim1; + t[i__3].r = t11.r, t[i__3].i = t11.i; + + if (wantq) { + +/* Accumulate transformation in the matrix Q. */ + + d_cnjg(&z__1, &sn); + zrot_(n, &q[k * q_dim1 + 1], &c__1, &q[(k + 1) * q_dim1 + 1], & + c__1, &cs, &z__1); + } + +/* L10: */ + } + + return 0; + +/* End of ZTREXC */ + +} /* ztrexc_ */ + +/* Subroutine */ int ztrti2_(char *uplo, char *diag, integer *n, + doublecomplex *a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublecomplex z__1; + + /* Local variables */ + static integer j; + static doublecomplex ajj; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + static logical upper; + extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); + static logical nounit; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZTRTI2 computes the inverse of a complex upper or lower triangular + matrix. + + This is the Level 2 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading n by n upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZTRTI2", &i__1); + return 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + i__2 = j + j * a_dim1; + z_div(&z__1, &c_b57, &a[j + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + j * a_dim1; + z__1.r = -a[i__2].r, z__1.i = -a[i__2].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + ztrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & + a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + zscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Compute inverse of lower triangular matrix. */ + + for (j = *n; j >= 1; --j) { + if (nounit) { + i__1 = j + j * a_dim1; + z_div(&z__1, &c_b57, &a[j + j * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = j + j * a_dim1; + z__1.r = -a[i__1].r, z__1.i = -a[i__1].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + ztrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + + 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); + i__1 = *n - j; + zscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + + return 0; + +/* End of ZTRTI2 */ + +} /* ztrti2_ */ + +/* Subroutine */ int ztrtri_(char *uplo, char *diag, integer *n, + doublecomplex *a, integer *lda, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5; + doublecomplex z__1; + char ch__1[2]; + + /* Local variables */ + static integer j, jb, nb, nn; + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + ztrsm_(char *, char *, char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *), ztrti2_(char *, char * + , integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical nounit; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZTRTRI computes the inverse of a complex upper or lower triangular + matrix A. + + This is the Level 3 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': A is upper triangular; + = 'L': A is lower triangular. + + DIAG (input) CHARACTER*1 + = 'N': A is non-unit triangular; + = 'U': A is unit triangular. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, A(i,i) is exactly zero. The triangular + matrix is singular and its inverse can not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZTRTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (a[i__2].r == 0. && a[i__2].i == 0.) { + return 0; + } +/* L10: */ + } + *info = 0; + } + +/* + Determine the block size for this environment. + + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = uplo; + i__3[1] = 1, a__1[1] = diag; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "ZTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + ztrti2_(uplo, diag, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute inverse of upper triangular matrix */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4,i__5); + +/* Compute rows 1:j-1 of current block column */ + + i__4 = j - 1; + ztrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & + c_b57, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + ztrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & + z__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], + lda); + +/* Compute inverse of current diagonal block */ + + ztrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L20: */ + } + } else { + +/* Compute inverse of lower triangular matrix */ + + nn = (*n - 1) / nb * nb + 1; + i__2 = -nb; + for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) { +/* Computing MIN */ + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1,i__4); + if (j + jb <= *n) { + +/* Compute rows j+jb:n of current block column */ + + i__1 = *n - j - jb + 1; + ztrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, + &c_b57, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + + jb + j * a_dim1], lda); + i__1 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + ztrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, + &z__1, &a[j + j * a_dim1], lda, &a[j + jb + j * + a_dim1], lda); + } + +/* Compute inverse of current diagonal block */ + + ztrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L30: */ + } + } + } + + return 0; + +/* End of ZTRTRI */ + +} /* ztrtri_ */ + +/* Subroutine */ int zung2r_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNG2R generates an m by n complex matrix Q with orthonormal columns, + which is defined as the first n columns of a product of k elementary + reflectors of order m + + Q = H(1) H(2) . . . H(k) + + as returned by ZGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by ZGEQRF in the first k columns of its array + argument A. + On exit, the m by n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQRF. + + WORK (workspace) COMPLEX*16 array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNG2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns k+1:n to columns of the unit matrix */ + + i__1 = *n; + for (j = *k + 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + i__3 = l + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; +/* L20: */ + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the left */ + + if (i__ < *n) { + i__1 = i__ + i__ * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *m - i__ + 1; + i__2 = *n - i__; + zlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + } + if (i__ < *m) { + i__1 = *m - i__; + i__2 = i__; + z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i; + zscal_(&i__1, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + } + i__1 = i__ + i__ * a_dim1; + i__2 = i__; + z__1.r = 1. - tau[i__2].r, z__1.i = 0. - tau[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + +/* Set A(1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + i__2 = l + i__ * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of ZUNG2R */ + +} /* zung2r_ */ + +/* Subroutine */ int zungbr_(char *vect, integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, mn; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical wantq; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int zunglq_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNGBR generates one of the complex unitary matrices Q or P**H + determined by ZGEBRD when reducing a complex matrix A to bidiagonal + form: A = Q * B * P**H. Q and P**H are defined as products of + elementary reflectors H(i) or G(i) respectively. + + If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + is of order M: + if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n + columns of Q, where m >= n >= k; + if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an + M-by-M matrix. + + If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H + is of order N: + if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m + rows of P**H, where n >= m >= k; + if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as + an N-by-N matrix. + + Arguments + ========= + + VECT (input) CHARACTER*1 + Specifies whether the matrix Q or the matrix P**H is + required, as defined in the transformation applied by ZGEBRD: + = 'Q': generate Q; + = 'P': generate P**H. + + M (input) INTEGER + The number of rows of the matrix Q or P**H to be returned. + M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q or P**H to be returned. + N >= 0. + If VECT = 'Q', M >= N >= min(M,K); + if VECT = 'P', N >= M >= min(N,K). + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original M-by-K + matrix reduced by ZGEBRD. + If VECT = 'P', the number of rows in the original K-by-N + matrix reduced by ZGEBRD. + K >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by ZGEBRD. + On exit, the M-by-N matrix Q or P**H. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= M. + + TAU (input) COMPLEX*16 array, dimension + (min(M,K)) if VECT = 'Q' + (min(N,K)) if VECT = 'P' + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i), which determines Q or P**H, as + returned by ZGEBRD in its array argument TAUQ or TAUP. + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,min(M,N)). + For optimum performance LWORK >= min(M,N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(vect, "Q"); + mn = min(*m,*n); + lquery = *lwork == -1; + if (! wantq && ! lsame_(vect, "P")) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( + *m > *n || *m < min(*n,*k))) { + *info = -3; + } else if (*k < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*lwork < max(1,mn) && ! lquery) { + *info = -9; + } + + if (*info == 0) { + if (wantq) { + nb = ilaenv_(&c__1, "ZUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + nb = ilaenv_(&c__1, "ZUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } + lwkopt = max(1,mn) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNGBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + if (wantq) { + +/* + Form Q, determined by a call to ZGEBRD to reduce an m-by-k + matrix +*/ + + if (*m >= *k) { + +/* If m >= k, assume m >= n >= k */ + + zungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If m < k, assume m = n + + Shift the vectors which define the elementary reflectors one + column to the right, and set the first row and column of Q + to those of the unit matrix +*/ + + for (j = *m; j >= 2; --j) { + i__1 = j * a_dim1 + 1; + a[i__1].r = 0., a[i__1].i = 0.; + i__1 = *m; + for (i__ = j + 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__ + (j - 1) * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; +/* L10: */ + } +/* L20: */ + } + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *m; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__ + a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L30: */ + } + if (*m > 1) { + +/* Form Q(2:m,2:m) */ + + i__1 = *m - 1; + i__2 = *m - 1; + i__3 = *m - 1; + zungqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } else { + +/* + Form P', determined by a call to ZGEBRD to reduce a k-by-n + matrix +*/ + + if (*k < *n) { + +/* If k < n, assume k <= m <= n */ + + zunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If k >= n, assume m = n + + Shift the vectors which define the elementary reflectors one + row downward, and set the first row and column of P' to + those of the unit matrix +*/ + + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__ + a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L40: */ + } + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + for (i__ = j - 1; i__ >= 2; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__ - 1 + j * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; +/* L50: */ + } + i__2 = j * a_dim1 + 1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L60: */ + } + if (*n > 1) { + +/* Form P'(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + zunglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNGBR */ + +} /* zungbr_ */ + +/* Subroutine */ int zunghr_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, nb, nh, iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNGHR generates a complex unitary matrix Q which is defined as the + product of IHI-ILO elementary reflectors of order N, as returned by + ZGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + N (input) INTEGER + The order of the matrix Q. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of ZGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by ZGEHRD. + On exit, the N-by-N unitary matrix Q. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (input) COMPLEX*16 array, dimension (N-1) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEHRD. + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= IHI-ILO. + For optimum performance LWORK >= (IHI-ILO)*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,nh) && ! lquery) { + *info = -8; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "ZUNGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = max(1,nh) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNGHR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + +/* + Shift the vectors which define the elementary reflectors one + column to the right, and set the first ilo and the last n-ihi + rows and columns to those of the unit matrix +*/ + + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + (j - 1) * a_dim1; + a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i; +/* L20: */ + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L50: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; +/* L60: */ + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L70: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; +/* L80: */ + } + + if (nh > 0) { + +/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ + + zungqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* + ilo], &work[1], lwork, &iinfo); + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNGHR */ + +} /* zunghr_ */ + +/* Subroutine */ int zungl2_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlacgv_(integer *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, + which is defined as the first m rows of a product of k elementary + reflectors of order n + + Q = H(k)' . . . H(2)' H(1)' + + as returned by ZGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by ZGELQF in the first k rows of its array argument A. + On exit, the m by n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGELQF. + + WORK (workspace) COMPLEX*16 array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNGL2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows k+1:m to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = *k + 1; l <= i__2; ++l) { + i__3 = l + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } + if (j > *k && j <= *m) { + i__2 = j + j * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + } +/* L20: */ + } + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i)' to A(i:m,i:n) from the right */ + + if (i__ < *n) { + i__1 = *n - i__; + zlacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); + if (i__ < *m) { + i__1 = i__ + i__ * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *m - i__; + i__2 = *n - i__ + 1; + d_cnjg(&z__1, &tau[i__]); + zlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & + z__1, &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__1 = *n - i__; + i__2 = i__; + z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i; + zscal_(&i__1, &z__1, &a[i__ + (i__ + 1) * a_dim1], lda); + i__1 = *n - i__; + zlacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); + } + i__1 = i__ + i__ * a_dim1; + d_cnjg(&z__2, &tau[i__]); + z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + +/* Set A(i,1:i-1) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + i__2 = i__ + l * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of ZUNGL2 */ + +} /* zungl2_ */ + +/* Subroutine */ int zunglq_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zungl2_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static logical lquery; + static integer lwkopt; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNGLQ generates an M-by-N complex matrix Q with orthonormal rows, + which is defined as the first M rows of a product of K elementary + reflectors of order N + + Q = H(k)' . . . H(2)' H(1)' + + as returned by ZGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by ZGELQF in the first k rows of its array argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGELQF. + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit; + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "ZUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*m) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNGLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "ZUNGLQ", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNGLQ", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk rows are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(kk+1:m,1:kk) to zero. */ + + i__1 = kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = kk + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *m) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + zungl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *n - i__ + 1; + zlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i+ib:m,i:n) from the right */ + + i__2 = *m - i__ - ib + 1; + i__3 = *n - i__ + 1; + zlarfb_("Right", "Conjugate transpose", "Forward", "Rowwise", + &i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ + ib + 1], &ldwork); + } + +/* Apply H' to columns i:n of current block */ + + i__2 = *n - i__ + 1; + zungl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set columns 1:i-1 of current block to zero */ + + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + ib - 1; + for (l = i__; l <= i__3; ++l) { + i__4 = l + j * a_dim1; + a[i__4].r = 0., a[i__4].i = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZUNGLQ */ + +} /* zunglq_ */ + +/* Subroutine */ int zungqr_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zung2r_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNGQR generates an M-by-N complex matrix Q with orthonormal columns, + which is defined as the first N columns of a product of K elementary + reflectors of order M + + Q = H(1) H(2) . . . H(k) + + as returned by ZGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by ZGEQRF in the first k columns of its array + argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQRF. + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "ZUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*n) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNGQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "ZUNGQR", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNGQR", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk columns are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(1:kk,kk+1:n) to zero. */ + + i__1 = *n; + for (j = kk + 1; j <= i__1; ++j) { + i__2 = kk; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *n) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + zung2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *m - i__ + 1; + zlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i:m,i+ib:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__ - ib + 1; + zlarfb_("Left", "No transpose", "Forward", "Columnwise", & + i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & + work[ib + 1], &ldwork); + } + +/* Apply H to rows i:m of current block */ + + i__2 = *m - i__ + 1; + zung2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set rows 1:i-1 of current block to zero */ + + i__2 = i__ + ib - 1; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ - 1; + for (l = 1; l <= i__3; ++l) { + i__4 = l + j * a_dim1; + a[i__4].r = 0., a[i__4].i = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZUNGQR */ + +} /* zungqr_ */ + +/* Subroutine */ int zunm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__, i1, i2, i3, mi, ni, nq; + static doublecomplex aii; + static logical left; + static doublecomplex taui; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNM2L overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by ZGEQLF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQLF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX*16 array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNM2L", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } else { + d_cnjg(&z__1, &tau[i__]); + taui.r = z__1.r, taui.i = z__1.i; + } + i__3 = nq - *k + i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = nq - *k + i__ + i__ * a_dim1; + a[i__3].r = 1., a[i__3].i = 0.; + zlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &taui, &c__[ + c_offset], ldc, &work[1]); + i__3 = nq - *k + i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; +/* L10: */ + } + return 0; + +/* End of ZUNM2L */ + +} /* zunm2l_ */ + +/* Subroutine */ int zunm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static doublecomplex aii; + static logical left; + static doublecomplex taui; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNM2R overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQRF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX*16 array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNM2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } else { + d_cnjg(&z__1, &tau[i__]); + taui.r = z__1.r, taui.i = z__1.i; + } + i__3 = i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = i__ + i__ * a_dim1; + a[i__3].r = 1., a[i__3].i = 0.; + zlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &taui, &c__[ic + + jc * c_dim1], ldc, &work[1]); + i__3 = i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; +/* L10: */ + } + return 0; + +/* End of ZUNM2R */ + +} /* zunm2r_ */ + +/* Subroutine */ int zunmbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex + *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran, applyq; + static char transt[1]; + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int zunmlq_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + If VECT = 'Q', ZUNMBR overwrites the general complex M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': P * C C * P + TRANS = 'C': P**H * C C * P**H + + Here Q and P**H are the unitary matrices determined by ZGEBRD when + reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q + and P**H are defined as products of elementary reflectors H(i) and + G(i) respectively. + + Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + order of the unitary matrix Q or P**H that is applied. + + If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + if nq >= k, Q = H(1) H(2) . . . H(k); + if nq < k, Q = H(1) H(2) . . . H(nq-1). + + If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + if k < nq, P = G(1) G(2) . . . G(k); + if k >= nq, P = G(1) G(2) . . . G(nq-1). + + Arguments + ========= + + VECT (input) CHARACTER*1 + = 'Q': apply Q or Q**H; + = 'P': apply P or P**H. + + SIDE (input) CHARACTER*1 + = 'L': apply Q, Q**H, P or P**H from the Left; + = 'R': apply Q, Q**H, P or P**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q or P; + = 'C': Conjugate transpose, apply Q**H or P**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original + matrix reduced by ZGEBRD. + If VECT = 'P', the number of rows in the original + matrix reduced by ZGEBRD. + K >= 0. + + A (input) COMPLEX*16 array, dimension + (LDA,min(nq,K)) if VECT = 'Q' + (LDA,nq) if VECT = 'P' + The vectors which define the elementary reflectors H(i) and + G(i), whose products determine the matrices Q and P, as + returned by ZGEBRD. + + LDA (input) INTEGER + The leading dimension of the array A. + If VECT = 'Q', LDA >= max(1,nq); + if VECT = 'P', LDA >= max(1,min(nq,K)). + + TAU (input) COMPLEX*16 array, dimension (min(nq,K)) + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i) which determines Q or P, as returned + by ZGEBRD in the array argument TAUQ or TAUP. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q + or P*C or P**H*C or C*P or C*P**H. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M); + if N = 0 or M = 0, LWORK >= 1. + For optimum performance LWORK >= max(1,N*NB) if SIDE = 'L', + and LWORK >= max(1,M*NB) if SIDE = 'R', where NB is the + optimal blocksize. (NB = 0 if M = 0 or N = 0.) + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + applyq = lsame_(vect, "Q"); + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (*m == 0 || *n == 0) { + nw = 0; + } + if (! applyq && ! lsame_(vect, "P")) { + *info = -1; + } else if (! left && ! lsame_(side, "R")) { + *info = -2; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*k < 0) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = min(nq,*k); + if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + } + + if (*info == 0) { + if (nw > 0) { + if (applyq) { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &i__1, n, &i__2, & + c_n1, (ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &i__1, &i__2, & + c_n1, (ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, &i__1, n, &i__2, & + c_n1, (ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, m, &i__1, &i__2, & + c_n1, (ftnlen)6, (ftnlen)2); + } + } +/* Computing MAX */ + i__1 = 1, i__2 = nw * nb; + lwkopt = max(i__1,i__2); + } else { + lwkopt = 1; + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNMBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + if (applyq) { + +/* Apply Q */ + + if (nq >= *k) { + +/* Q was determined by a call to ZGEBRD with nq >= k */ + + zunmqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* Q was determined by a call to ZGEBRD with nq < k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + zunmqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] + , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + } else { + +/* Apply P */ + + if (notran) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + if (nq > *k) { + +/* P was determined by a call to ZGEBRD with nq > k */ + + zunmlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* P was determined by a call to ZGEBRD with nq <= k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + zunmlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, + &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & + iinfo); + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMBR */ + +} /* zunmbr_ */ + +/* Subroutine */ int zunmhr_(char *side, char *trans, integer *m, integer *n, + integer *ilo, integer *ihi, doublecomplex *a, integer *lda, + doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, nh, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNMHR overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + IHI-ILO elementary reflectors, as returned by ZGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q**H (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of ZGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and + ILO = 1 and IHI = 0, if M = 0; + if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and + ILO = 1 and IHI = 0, if N = 0. + + A (input) COMPLEX*16 array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by ZGEHRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) COMPLEX*16 array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEHRD. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + left = lsame_(side, "L"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ilo < 1 || *ilo > max(1,nq)) { + *info = -5; + } else if (*ihi < min(*ilo,nq) || *ihi > nq) { + *info = -6; + } else if (*lda < max(1,nq)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + + if (*info == 0) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &nh, n, &nh, &c_n1, (ftnlen) + 6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &nh, &nh, &c_n1, (ftnlen) + 6, (ftnlen)2); + } + lwkopt = max(1,nw) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("ZUNMHR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nh == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + if (left) { + mi = nh; + ni = *n; + i1 = *ilo + 1; + i2 = 1; + } else { + mi = *m; + ni = nh; + i1 = 1; + i2 = *ilo + 1; + } + + zunmqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, & + tau[*ilo], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMHR */ + +} /* zunmhr_ */ + +/* Subroutine */ int zunml2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static doublecomplex aii; + static logical left; + static doublecomplex taui; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlacgv_(integer *, doublecomplex *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNML2 overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k)' . . . H(2)' H(1)' + + as returned by ZGELQF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGELQF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX*16 array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNML2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + d_cnjg(&z__1, &tau[i__]); + taui.r = z__1.r, taui.i = z__1.i; + } else { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } + if (i__ < nq) { + i__3 = nq - i__; + zlacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); + } + i__3 = i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = i__ + i__ * a_dim1; + a[i__3].r = 1., a[i__3].i = 0.; + zlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &taui, &c__[ic + + jc * c_dim1], ldc, &work[1]); + i__3 = i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; + if (i__ < nq) { + i__3 = nq - i__; + zlacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); + } +/* L10: */ + } + return 0; + +/* End of ZUNML2 */ + +} /* zunml2_ */ + +/* Subroutine */ int zunmlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static doublecomplex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int zunml2_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static logical notran; + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNMLQ overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k)' . . . H(2)' H(1)' + + as returned by ZGELQF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGELQF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNMLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + zunml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + if (notran) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + zlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], + lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + zlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], + ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMLQ */ + +} /* zunmlq_ */ + +/* Subroutine */ int zunmql_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static doublecomplex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int zunm2l_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static logical notran; + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNMQL overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by ZGEQLF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQLF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + + if (*info == 0) { + if (*m == 0 || *n == 0) { + lwkopt = 1; + } else { + +/* + Determine the block size. NB may be at most NBMAX, where + NBMAX is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMQL", ch__1, m, n, k, &c_n1, + (ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = nw * nb; + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + if (*lwork < nw && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNMQL", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + zunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i+ib-1) . . . H(i+1) H(i) +*/ + + i__4 = nq - *k + i__ + ib - 1; + zlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] + , lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H' */ + + zlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ + i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & + work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMQL */ + +} /* zunmql_ */ + +/* Subroutine */ int zunmqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Local variables */ + static integer i__; + static doublecomplex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int zunm2r_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static logical notran; + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNMQR overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQRF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNMQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + zunm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + zlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], t, &c__65) + ; + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + zlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * + c_dim1], ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMQR */ + +} /* zunmqr_ */ + +/* Subroutine */ int zunmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; + char ch__1[2]; + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int zunmql_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.2) -- + -- LAPACK is a software package provided by Univ. of Tennessee, -- + -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- + November 2006 + + + Purpose + ======= + + ZUNMTR overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + nq-1 elementary reflectors, as returned by ZHETRD: + + if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + + if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A contains elementary reflectors + from ZHETRD; + = 'L': Lower triangle of A contains elementary reflectors + from ZHETRD. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + A (input) COMPLEX*16 array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by ZHETRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) COMPLEX*16 array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZHETRD. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >=M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + if (upper) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "ZUNMQL", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "ZUNMQL", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("ZUNMTR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nq == 1) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + if (left) { + mi = *m - 1; + ni = *n; + } else { + mi = *m; + ni = *n - 1; + } + + if (upper) { + +/* Q was determined by a call to ZHETRD with UPLO = 'U' */ + + i__2 = nq - 1; + zunmql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & + tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); + } else { + +/* Q was determined by a call to ZHETRD with UPLO = 'L' */ + + if (left) { + i1 = 2; + i2 = 1; + } else { + i1 = 1; + i2 = 2; + } + i__2 = nq - 1; + zunmqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & + c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMTR */ + +} /* zunmtr_ */ + diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_z_lapack.f.patch b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_z_lapack.f.patch new file mode 100644 index 0000000000..1e6fc8c070 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/f2c_z_lapack.f.patch @@ -0,0 +1,32 @@ +@@ -15278,5 +15278,6 @@ + ! Skip any trailing zeros. + DO LASTV = N, I+1, -1 +- IF( V( LASTV, I ).NE.ZERO ) EXIT ++ IF( V( LASTV, I ).NE.ZERO ) GO TO 15 + END DO ++ 15 CONTINUE + J = MIN( LASTV, PREVLASTV ) +@@ -15290,5 +15291,6 @@ + ! Skip any trailing zeros. + DO LASTV = N, I+1, -1 +- IF( V( I, LASTV ).NE.ZERO ) EXIT ++ IF( V( I, LASTV ).NE.ZERO ) GO TO 16 + END DO ++ 16 CONTINUE + J = MIN( LASTV, PREVLASTV ) +@@ -15338,5 +15340,6 @@ + ! Skip any leading zeros. + DO LASTV = 1, I-1 +- IF( V( LASTV, I ).NE.ZERO ) EXIT ++ IF( V( LASTV, I ).NE.ZERO ) GO TO 35 + END DO ++ 35 CONTINUE + J = MAX( LASTV, PREVLASTV ) +@@ -15354,5 +15357,6 @@ + ! Skip any leading zeros. + DO LASTV = N, I+1, -1 +- IF( V( I, LASTV ).NE.ZERO ) EXIT ++ IF( V( I, LASTV ).NE.ZERO ) GO TO 36 + END DO ++ 36 CONTINUE + J = MAX( LASTV, PREVLASTV ) diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/fortran.py b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/fortran.py new file mode 100644 index 0000000000..3b6ac70f00 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/fortran.py @@ -0,0 +1,124 @@ +from __future__ import division, absolute_import, print_function + +import re +import itertools + +def isBlank(line): + return not line +def isLabel(line): + return line[0].isdigit() +def isComment(line): + return line[0] != ' ' +def isContinuation(line): + return line[5] != ' ' + +COMMENT, STATEMENT, CONTINUATION = 0, 1, 2 +def lineType(line): + """Return the type of a line of Fortan code.""" + if isBlank(line): + return COMMENT + elif isLabel(line): + return STATEMENT + elif isComment(line): + return COMMENT + elif isContinuation(line): + return CONTINUATION + else: + return STATEMENT + +class LineIterator(object): + """LineIterator(iterable) + + Return rstrip()'d lines from iterable, while keeping a count of the + line number in the .lineno attribute. + """ + def __init__(self, iterable): + object.__init__(self) + self.iterable = iter(iterable) + self.lineno = 0 + + def __iter__(self): + return self + + def __next__(self): + self.lineno += 1 + line = next(self.iterable) + line = line.rstrip() + return line + + next = __next__ + + +class PushbackIterator(object): + """PushbackIterator(iterable) + + Return an iterator for which items can be pushed back into. + Call the .pushback(item) method to have item returned as the next + value of .next(). + """ + def __init__(self, iterable): + object.__init__(self) + self.iterable = iter(iterable) + self.buffer = [] + + def __iter__(self): + return self + + def __next__(self): + if self.buffer: + return self.buffer.pop() + else: + return next(self.iterable) + + def pushback(self, item): + self.buffer.append(item) + + next = __next__ + + +def fortranSourceLines(fo): + """Return an iterator over statement lines of a Fortran source file. + + Comment and blank lines are stripped out, and continuation lines are + merged. + """ + numberingiter = LineIterator(fo) + # add an extra '' at the end + with_extra = itertools.chain(numberingiter, ['']) + pushbackiter = PushbackIterator(with_extra) + for line in pushbackiter: + t = lineType(line) + if t == COMMENT: + continue + elif t == STATEMENT: + lines = [line] + # this is where we need the extra '', so we don't finish reading + # the iterator when we don't want to handle that + for next_line in pushbackiter: + t = lineType(next_line) + if t == CONTINUATION: + lines.append(next_line[6:]) + else: + pushbackiter.pushback(next_line) + break + yield numberingiter.lineno, ''.join(lines) + else: + raise ValueError("jammed: continuation line not expected: %s:%d" % + (fo.name, numberingiter.lineno)) + +def getDependencies(filename): + """For a Fortran source file, return a list of routines declared as EXTERNAL + in it. + """ + fo = open(filename) + external_pat = re.compile(r'^\s*EXTERNAL\s', re.I) + routines = [] + for lineno, line in fortranSourceLines(fo): + m = external_pat.match(line) + if m: + names = line = line[m.end():].strip().split(',') + names = [n.strip().lower() for n in names] + names = [n for n in names if n] + routines.extend(names) + fo.close() + return routines diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/make_lite.py b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/make_lite.py new file mode 100755 index 0000000000..61102d6ab0 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/make_lite.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python +""" +Usage: make_lite.py <wrapped_routines_file> <lapack_dir> <output_dir> + +Typical invocation: + + make_lite.py wrapped_routines /tmp/lapack-3.x.x + +Requires the following to be on the path: + * f2c + * patch + +""" +from __future__ import division, absolute_import, print_function + +import sys +import os +import subprocess +import shutil + +import fortran +import clapack_scrub + +PY2 = sys.version_info < (3, 0) + +if PY2: + from distutils.spawn import find_executable as which +else: + from shutil import which + +# Arguments to pass to f2c. You'll always want -A for ANSI C prototypes +# Others of interest: -a to not make variables static by default +# -C to check array subscripts +F2C_ARGS = ['-A', '-Nx800'] + +# The header to add to the top of the f2c_*.c file. Note that dlamch_() calls +# will be replaced by the macros below by clapack_scrub.scrub_source() +HEADER = '''\ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif +''' + +class FortranRoutine(object): + """Wrapper for a Fortran routine in a file. + """ + type = 'generic' + def __init__(self, name=None, filename=None): + self.filename = filename + if name is None: + root, ext = os.path.splitext(filename) + name = root + self.name = name + self._dependencies = None + + def dependencies(self): + if self._dependencies is None: + deps = fortran.getDependencies(self.filename) + self._dependencies = [d.lower() for d in deps] + return self._dependencies + + def __repr__(self): + return "FortranRoutine({!r}, filename={!r})".format(self.name, self.filename) + +class UnknownFortranRoutine(FortranRoutine): + """Wrapper for a Fortran routine for which the corresponding file + is not known. + """ + type = 'unknown' + def __init__(self, name): + FortranRoutine.__init__(self, name=name, filename='<unknown>') + + def dependencies(self): + return [] + +class FortranLibrary(object): + """Container for a bunch of Fortran routines. + """ + def __init__(self, src_dirs): + self._src_dirs = src_dirs + self.names_to_routines = {} + + def _findRoutine(self, rname): + rname = rname.lower() + for s in self._src_dirs: + ffilename = os.path.join(s, rname + '.f') + if os.path.exists(ffilename): + return self._newFortranRoutine(rname, ffilename) + return UnknownFortranRoutine(rname) + + def _newFortranRoutine(self, rname, filename): + return FortranRoutine(rname, filename) + + def addIgnorableRoutine(self, rname): + """Add a routine that we don't want to consider when looking at + dependencies. + """ + rname = rname.lower() + routine = UnknownFortranRoutine(rname) + self.names_to_routines[rname] = routine + + def addRoutine(self, rname): + """Add a routine to the library. + """ + self.getRoutine(rname) + + def getRoutine(self, rname): + """Get a routine from the library. Will add if it's not found. + """ + unique = [] + rname = rname.lower() + routine = self.names_to_routines.get(rname, unique) + if routine is unique: + routine = self._findRoutine(rname) + self.names_to_routines[rname] = routine + return routine + + def allRoutineNames(self): + """Return the names of all the routines. + """ + return list(self.names_to_routines.keys()) + + def allRoutines(self): + """Return all the routines. + """ + return list(self.names_to_routines.values()) + + def resolveAllDependencies(self): + """Try to add routines to the library to satisfy all the dependencies + for each routine in the library. + + Returns a set of routine names that have the dependencies unresolved. + """ + done_this = set() + last_todo = set() + while True: + todo = set(self.allRoutineNames()) - done_this + if todo == last_todo: + break + for rn in todo: + r = self.getRoutine(rn) + deps = r.dependencies() + for d in deps: + self.addRoutine(d) + done_this.add(rn) + last_todo = todo + return todo + +class LapackLibrary(FortranLibrary): + def _newFortranRoutine(self, rname, filename): + routine = FortranLibrary._newFortranRoutine(self, rname, filename) + if 'blas' in filename.lower(): + routine.type = 'blas' + elif 'install' in filename.lower(): + routine.type = 'config' + elif rname.startswith('z'): + routine.type = 'z_lapack' + elif rname.startswith('c'): + routine.type = 'c_lapack' + elif rname.startswith('s'): + routine.type = 's_lapack' + elif rname.startswith('d'): + routine.type = 'd_lapack' + else: + routine.type = 'lapack' + return routine + + def allRoutinesByType(self, typename): + routines = sorted((r.name, r) for r in self.allRoutines() if r.type == typename) + return [a[1] for a in routines] + +def printRoutineNames(desc, routines): + print(desc) + for r in routines: + print('\t%s' % r.name) + +def getLapackRoutines(wrapped_routines, ignores, lapack_dir): + blas_src_dir = os.path.join(lapack_dir, 'BLAS', 'SRC') + if not os.path.exists(blas_src_dir): + blas_src_dir = os.path.join(lapack_dir, 'blas', 'src') + lapack_src_dir = os.path.join(lapack_dir, 'SRC') + if not os.path.exists(lapack_src_dir): + lapack_src_dir = os.path.join(lapack_dir, 'src') + install_src_dir = os.path.join(lapack_dir, 'INSTALL') + if not os.path.exists(install_src_dir): + install_src_dir = os.path.join(lapack_dir, 'install') + + library = LapackLibrary([install_src_dir, blas_src_dir, lapack_src_dir]) + + for r in ignores: + library.addIgnorableRoutine(r) + + for w in wrapped_routines: + library.addRoutine(w) + + library.resolveAllDependencies() + + return library + +def getWrappedRoutineNames(wrapped_routines_file): + routines = [] + ignores = [] + with open(wrapped_routines_file) as fo: + for line in fo: + line = line.strip() + if not line or line.startswith('#'): + continue + if line.startswith('IGNORE:'): + line = line[7:].strip() + ig = line.split() + ignores.extend(ig) + else: + routines.append(line) + return routines, ignores + +types = {'blas', 'lapack', 'd_lapack', 's_lapack', 'z_lapack', 'c_lapack', 'config'} + +def dumpRoutineNames(library, output_dir): + for typename in {'unknown'} | types: + routines = library.allRoutinesByType(typename) + filename = os.path.join(output_dir, typename + '_routines.lst') + with open(filename, 'w') as fo: + for r in routines: + deps = r.dependencies() + fo.write('%s: %s\n' % (r.name, ' '.join(deps))) + +def concatenateRoutines(routines, output_file): + with open(output_file, 'w') as output_fo: + for r in routines: + with open(r.filename, 'r') as fo: + source = fo.read() + output_fo.write(source) + +class F2CError(Exception): + pass + +def runF2C(fortran_filename, output_dir): + fortran_filename = fortran_filename.replace('\\', '/') + try: + subprocess.check_call( + ["f2c"] + F2C_ARGS + ['-d', output_dir, fortran_filename] + ) + except subprocess.CalledProcessError: + raise F2CError + +def scrubF2CSource(c_file): + with open(c_file) as fo: + source = fo.read() + source = clapack_scrub.scrubSource(source, verbose=True) + with open(c_file, 'w') as fo: + fo.write(HEADER) + fo.write(source) + +def ensure_executable(name): + try: + which(name) + except: + raise SystemExit(name + ' not found') + +def main(): + if len(sys.argv) != 3: + print(__doc__) + return + # Make sure that patch and f2c are found on path + ensure_executable('f2c') + ensure_executable('patch') + + wrapped_routines_file = sys.argv[1] + lapack_src_dir = sys.argv[2] + output_dir = os.path.join(os.path.dirname(__file__), 'build') + + try: + shutil.rmtree(output_dir) + except: + pass + os.makedirs(output_dir) + + wrapped_routines, ignores = getWrappedRoutineNames(wrapped_routines_file) + library = getLapackRoutines(wrapped_routines, ignores, lapack_src_dir) + + dumpRoutineNames(library, output_dir) + + for typename in types: + fortran_file = os.path.join(output_dir, 'f2c_%s.f' % typename) + c_file = fortran_file[:-2] + '.c' + print('creating %s ...' % c_file) + routines = library.allRoutinesByType(typename) + concatenateRoutines(routines, fortran_file) + + # apply the patchpatch + patch_file = os.path.basename(fortran_file) + '.patch' + if os.path.exists(patch_file): + subprocess.check_call(['patch', '-u', fortran_file, patch_file]) + print("Patched {}".format(fortran_file)) + try: + runF2C(fortran_file, output_dir) + except F2CError: + print('f2c failed on %s' % fortran_file) + break + scrubF2CSource(c_file) + + # patch any changes needed to the C file + c_patch_file = c_file + '.patch' + if os.path.exists(c_patch_file): + subprocess.check_call(['patch', '-u', c_file, c_patch_file]) + + print() + + for fname in os.listdir(output_dir): + if fname.endswith('.c'): + print('Copying ' + fname) + shutil.copy( + os.path.join(output_dir, fname), + os.path.dirname(__file__), + ) + + +if __name__ == '__main__': + main() diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/python_xerbla.c b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/python_xerbla.c new file mode 100644 index 0000000000..dfc195556b --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/python_xerbla.c @@ -0,0 +1,48 @@ +#include "Python.h" + +#undef c_abs +#include "f2c.h" + +/* + From the original manpage: + -------------------------- + XERBLA is an error handler for the LAPACK routines. + It is called by an LAPACK routine if an input parameter has an invalid value. + A message is printed and execution stops. + + Instead of printing a message and stopping the execution, a + ValueError is raised with the message. + + Parameters: + ----------- + srname: Subroutine name to use in error message, maximum six characters. + Spaces at the end are skipped. + info: Number of the invalid parameter. +*/ + +int xerbla_(char *srname, integer *info) +{ + static const char format[] = "On entry to %.*s" \ + " parameter number %d had an illegal value"; + char buf[sizeof(format) + 6 + 4]; /* 6 for name, 4 for param. num. */ + + int len = 0; /* length of subroutine name*/ +#ifdef WITH_THREAD + PyGILState_STATE save; +#endif + + while( len<6 && srname[len]!='\0' ) + len++; + while( len && srname[len-1]==' ' ) + len--; +#ifdef WITH_THREAD + save = PyGILState_Ensure(); +#endif + PyOS_snprintf(buf, sizeof(buf), format, len, srname, *info); + PyErr_SetString(PyExc_ValueError, buf); +#ifdef WITH_THREAD + PyGILState_Release(save); +#endif + + return 0; +} diff --git a/contrib/python/numpy/py2/numpy/linalg/lapack_lite/wrapped_routines b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/wrapped_routines new file mode 100644 index 0000000000..0d99c724d2 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/lapack_lite/wrapped_routines @@ -0,0 +1,51 @@ +ccopy +cgeev +cgelsd +cgemm +cgesdd +cgesv +cgetrf +cheevd +cpotrf +cpotri +cpotrs +dcopy +dgeev +dgelsd +dgemm +dgeqrf +dgesdd +dgesv +dgetrf +dorgqr +dpotrf +dpotri +dpotrs +dsyevd +scopy +sgeev +sgelsd +sgemm +sgesdd +sgesv +sgetrf +spotrf +spotri +spotrs +ssyevd +zcopy +zgeev +zgelsd +zgemm +zgeqrf +zgesdd +zgesv +zgetrf +zheevd +zpotrf +zpotri +zpotrs +zungqr +# need this b/c it's not properly declared as external in the BLAS source +dcabs1 +IGNORE: xerbla diff --git a/contrib/python/numpy/py2/numpy/linalg/setup.py b/contrib/python/numpy/py2/numpy/linalg/setup.py new file mode 100644 index 0000000000..66c07c9e1e --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/setup.py @@ -0,0 +1,60 @@ +from __future__ import division, print_function + +import os +import sys + +def configuration(parent_package='', top_path=None): + from numpy.distutils.misc_util import Configuration + from numpy.distutils.system_info import get_info + config = Configuration('linalg', parent_package, top_path) + + config.add_data_dir('tests') + + # Configure lapack_lite + + src_dir = 'lapack_lite' + lapack_lite_src = [ + os.path.join(src_dir, 'python_xerbla.c'), + os.path.join(src_dir, 'f2c_z_lapack.c'), + os.path.join(src_dir, 'f2c_c_lapack.c'), + os.path.join(src_dir, 'f2c_d_lapack.c'), + os.path.join(src_dir, 'f2c_s_lapack.c'), + os.path.join(src_dir, 'f2c_lapack.c'), + os.path.join(src_dir, 'f2c_blas.c'), + os.path.join(src_dir, 'f2c_config.c'), + os.path.join(src_dir, 'f2c.c'), + ] + all_sources = config.paths(lapack_lite_src) + + lapack_info = get_info('lapack_opt', 0) # and {} + + def get_lapack_lite_sources(ext, build_dir): + if not lapack_info: + print("### Warning: Using unoptimized lapack ###") + return all_sources + else: + if sys.platform == 'win32': + print("### Warning: python_xerbla.c is disabled ###") + return [] + return [all_sources[0]] + + config.add_extension( + 'lapack_lite', + sources=['lapack_litemodule.c', get_lapack_lite_sources], + depends=['lapack_lite/f2c.h'], + extra_info=lapack_info, + ) + + # umath_linalg module + config.add_extension( + '_umath_linalg', + sources=['umath_linalg.c.src', get_lapack_lite_sources], + depends=['lapack_lite/f2c.h'], + extra_info=lapack_info, + libraries=['npymath'], + ) + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/contrib/python/numpy/py2/numpy/linalg/tests/__init__.py b/contrib/python/numpy/py2/numpy/linalg/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/linalg/tests/test_build.py b/contrib/python/numpy/py2/numpy/linalg/tests/test_build.py new file mode 100644 index 0000000000..921390da33 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/tests/test_build.py @@ -0,0 +1,55 @@ +from __future__ import division, absolute_import, print_function + +from subprocess import PIPE, Popen +import sys +import re +import pytest + +from numpy.linalg import lapack_lite +from numpy.testing import assert_ + + +class FindDependenciesLdd(object): + + def __init__(self): + self.cmd = ['ldd'] + + try: + p = Popen(self.cmd, stdout=PIPE, stderr=PIPE) + stdout, stderr = p.communicate() + except OSError: + raise RuntimeError("command %s cannot be run" % self.cmd) + + def get_dependencies(self, lfile): + p = Popen(self.cmd + [lfile], stdout=PIPE, stderr=PIPE) + stdout, stderr = p.communicate() + if not (p.returncode == 0): + raise RuntimeError("failed dependencies check for %s" % lfile) + + return stdout + + def grep_dependencies(self, lfile, deps): + stdout = self.get_dependencies(lfile) + + rdeps = dict([(dep, re.compile(dep)) for dep in deps]) + founds = [] + for l in stdout.splitlines(): + for k, v in rdeps.items(): + if v.search(l): + founds.append(k) + + return founds + + +class TestF77Mismatch(object): + + @pytest.mark.skipif(not(sys.platform[:5] == 'linux'), + reason="no fortran compiler on non-Linux platform") + def test_lapack(self): + f = FindDependenciesLdd() + deps = f.grep_dependencies(lapack_lite.__file__, + [b'libg2c', b'libgfortran']) + assert_(len(deps) <= 1, + """Both g77 and gfortran runtimes linked in lapack_lite ! This is likely to +cause random crashes and wrong results. See numpy INSTALL.txt for more +information.""") diff --git a/contrib/python/numpy/py2/numpy/linalg/tests/test_deprecations.py b/contrib/python/numpy/py2/numpy/linalg/tests/test_deprecations.py new file mode 100644 index 0000000000..e12755e0d5 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/tests/test_deprecations.py @@ -0,0 +1,22 @@ +"""Test deprecation and future warnings. + +""" +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import assert_warns + + +def test_qr_mode_full_future_warning(): + """Check mode='full' FutureWarning. + + In numpy 1.8 the mode options 'full' and 'economic' in linalg.qr were + deprecated. The release date will probably be sometime in the summer + of 2013. + + """ + a = np.eye(2) + assert_warns(DeprecationWarning, np.linalg.qr, a, mode='full') + assert_warns(DeprecationWarning, np.linalg.qr, a, mode='f') + assert_warns(DeprecationWarning, np.linalg.qr, a, mode='economic') + assert_warns(DeprecationWarning, np.linalg.qr, a, mode='e') diff --git a/contrib/python/numpy/py2/numpy/linalg/tests/test_linalg.py b/contrib/python/numpy/py2/numpy/linalg/tests/test_linalg.py new file mode 100644 index 0000000000..235488c6e8 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/tests/test_linalg.py @@ -0,0 +1,1964 @@ +""" Test functions for linalg module + +""" +from __future__ import division, absolute_import, print_function + +import os +import sys +import itertools +import traceback +import textwrap +import subprocess +import pytest + +import numpy as np +from numpy import array, single, double, csingle, cdouble, dot, identity, matmul +from numpy import multiply, atleast_2d, inf, asarray +from numpy import linalg +from numpy.linalg import matrix_power, norm, matrix_rank, multi_dot, LinAlgError +from numpy.linalg.linalg import _multi_dot_matrix_chain_order +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_array_equal, + assert_almost_equal, assert_allclose, suppress_warnings, + assert_raises_regex, + ) + + +def consistent_subclass(out, in_): + # For ndarray subclass input, our output should have the same subclass + # (non-ndarray input gets converted to ndarray). + return type(out) is (type(in_) if isinstance(in_, np.ndarray) + else np.ndarray) + + +old_assert_almost_equal = assert_almost_equal + + +def assert_almost_equal(a, b, single_decimal=6, double_decimal=12, **kw): + if asarray(a).dtype.type in (single, csingle): + decimal = single_decimal + else: + decimal = double_decimal + old_assert_almost_equal(a, b, decimal=decimal, **kw) + + +def get_real_dtype(dtype): + return {single: single, double: double, + csingle: single, cdouble: double}[dtype] + + +def get_complex_dtype(dtype): + return {single: csingle, double: cdouble, + csingle: csingle, cdouble: cdouble}[dtype] + + +def get_rtol(dtype): + # Choose a safe rtol + if dtype in (single, csingle): + return 1e-5 + else: + return 1e-11 + + +# used to categorize tests +all_tags = { + 'square', 'nonsquare', 'hermitian', # mutually exclusive + 'generalized', 'size-0', 'strided' # optional additions +} + + +class LinalgCase(object): + def __init__(self, name, a, b, tags=set()): + """ + A bundle of arguments to be passed to a test case, with an identifying + name, the operands a and b, and a set of tags to filter the tests + """ + assert_(isinstance(name, str)) + self.name = name + self.a = a + self.b = b + self.tags = frozenset(tags) # prevent shared tags + + def check(self, do): + """ + Run the function `do` on this test case, expanding arguments + """ + do(self.a, self.b, tags=self.tags) + + def __repr__(self): + return "<LinalgCase: %s>" % (self.name,) + + +def apply_tag(tag, cases): + """ + Add the given tag (a string) to each of the cases (a list of LinalgCase + objects) + """ + assert tag in all_tags, "Invalid tag" + for case in cases: + case.tags = case.tags | {tag} + return cases + + +# +# Base test cases +# + +np.random.seed(1234) + +CASES = [] + +# square test cases +CASES += apply_tag('square', [ + LinalgCase("single", + array([[1., 2.], [3., 4.]], dtype=single), + array([2., 1.], dtype=single)), + LinalgCase("double", + array([[1., 2.], [3., 4.]], dtype=double), + array([2., 1.], dtype=double)), + LinalgCase("double_2", + array([[1., 2.], [3., 4.]], dtype=double), + array([[2., 1., 4.], [3., 4., 6.]], dtype=double)), + LinalgCase("csingle", + array([[1. + 2j, 2 + 3j], [3 + 4j, 4 + 5j]], dtype=csingle), + array([2. + 1j, 1. + 2j], dtype=csingle)), + LinalgCase("cdouble", + array([[1. + 2j, 2 + 3j], [3 + 4j, 4 + 5j]], dtype=cdouble), + array([2. + 1j, 1. + 2j], dtype=cdouble)), + LinalgCase("cdouble_2", + array([[1. + 2j, 2 + 3j], [3 + 4j, 4 + 5j]], dtype=cdouble), + array([[2. + 1j, 1. + 2j, 1 + 3j], [1 - 2j, 1 - 3j, 1 - 6j]], dtype=cdouble)), + LinalgCase("0x0", + np.empty((0, 0), dtype=double), + np.empty((0,), dtype=double), + tags={'size-0'}), + LinalgCase("8x8", + np.random.rand(8, 8), + np.random.rand(8)), + LinalgCase("1x1", + np.random.rand(1, 1), + np.random.rand(1)), + LinalgCase("nonarray", + [[1, 2], [3, 4]], + [2, 1]), +]) + +# non-square test-cases +CASES += apply_tag('nonsquare', [ + LinalgCase("single_nsq_1", + array([[1., 2., 3.], [3., 4., 6.]], dtype=single), + array([2., 1.], dtype=single)), + LinalgCase("single_nsq_2", + array([[1., 2.], [3., 4.], [5., 6.]], dtype=single), + array([2., 1., 3.], dtype=single)), + LinalgCase("double_nsq_1", + array([[1., 2., 3.], [3., 4., 6.]], dtype=double), + array([2., 1.], dtype=double)), + LinalgCase("double_nsq_2", + array([[1., 2.], [3., 4.], [5., 6.]], dtype=double), + array([2., 1., 3.], dtype=double)), + LinalgCase("csingle_nsq_1", + array( + [[1. + 1j, 2. + 2j, 3. - 3j], [3. - 5j, 4. + 9j, 6. + 2j]], dtype=csingle), + array([2. + 1j, 1. + 2j], dtype=csingle)), + LinalgCase("csingle_nsq_2", + array( + [[1. + 1j, 2. + 2j], [3. - 3j, 4. - 9j], [5. - 4j, 6. + 8j]], dtype=csingle), + array([2. + 1j, 1. + 2j, 3. - 3j], dtype=csingle)), + LinalgCase("cdouble_nsq_1", + array( + [[1. + 1j, 2. + 2j, 3. - 3j], [3. - 5j, 4. + 9j, 6. + 2j]], dtype=cdouble), + array([2. + 1j, 1. + 2j], dtype=cdouble)), + LinalgCase("cdouble_nsq_2", + array( + [[1. + 1j, 2. + 2j], [3. - 3j, 4. - 9j], [5. - 4j, 6. + 8j]], dtype=cdouble), + array([2. + 1j, 1. + 2j, 3. - 3j], dtype=cdouble)), + LinalgCase("cdouble_nsq_1_2", + array( + [[1. + 1j, 2. + 2j, 3. - 3j], [3. - 5j, 4. + 9j, 6. + 2j]], dtype=cdouble), + array([[2. + 1j, 1. + 2j], [1 - 1j, 2 - 2j]], dtype=cdouble)), + LinalgCase("cdouble_nsq_2_2", + array( + [[1. + 1j, 2. + 2j], [3. - 3j, 4. - 9j], [5. - 4j, 6. + 8j]], dtype=cdouble), + array([[2. + 1j, 1. + 2j], [1 - 1j, 2 - 2j], [1 - 1j, 2 - 2j]], dtype=cdouble)), + LinalgCase("8x11", + np.random.rand(8, 11), + np.random.rand(8)), + LinalgCase("1x5", + np.random.rand(1, 5), + np.random.rand(1)), + LinalgCase("5x1", + np.random.rand(5, 1), + np.random.rand(5)), + LinalgCase("0x4", + np.random.rand(0, 4), + np.random.rand(0), + tags={'size-0'}), + LinalgCase("4x0", + np.random.rand(4, 0), + np.random.rand(4), + tags={'size-0'}), +]) + +# hermitian test-cases +CASES += apply_tag('hermitian', [ + LinalgCase("hsingle", + array([[1., 2.], [2., 1.]], dtype=single), + None), + LinalgCase("hdouble", + array([[1., 2.], [2., 1.]], dtype=double), + None), + LinalgCase("hcsingle", + array([[1., 2 + 3j], [2 - 3j, 1]], dtype=csingle), + None), + LinalgCase("hcdouble", + array([[1., 2 + 3j], [2 - 3j, 1]], dtype=cdouble), + None), + LinalgCase("hempty", + np.empty((0, 0), dtype=double), + None, + tags={'size-0'}), + LinalgCase("hnonarray", + [[1, 2], [2, 1]], + None), + LinalgCase("matrix_b_only", + array([[1., 2.], [2., 1.]]), + None), + LinalgCase("hmatrix_1x1", + np.random.rand(1, 1), + None), +]) + + +# +# Gufunc test cases +# +def _make_generalized_cases(): + new_cases = [] + + for case in CASES: + if not isinstance(case.a, np.ndarray): + continue + + a = np.array([case.a, 2 * case.a, 3 * case.a]) + if case.b is None: + b = None + else: + b = np.array([case.b, 7 * case.b, 6 * case.b]) + new_case = LinalgCase(case.name + "_tile3", a, b, + tags=case.tags | {'generalized'}) + new_cases.append(new_case) + + a = np.array([case.a] * 2 * 3).reshape((3, 2) + case.a.shape) + if case.b is None: + b = None + else: + b = np.array([case.b] * 2 * 3).reshape((3, 2) + case.b.shape) + new_case = LinalgCase(case.name + "_tile213", a, b, + tags=case.tags | {'generalized'}) + new_cases.append(new_case) + + return new_cases + + +CASES += _make_generalized_cases() + + +# +# Generate stride combination variations of the above +# +def _stride_comb_iter(x): + """ + Generate cartesian product of strides for all axes + """ + + if not isinstance(x, np.ndarray): + yield x, "nop" + return + + stride_set = [(1,)] * x.ndim + stride_set[-1] = (1, 3, -4) + if x.ndim > 1: + stride_set[-2] = (1, 3, -4) + if x.ndim > 2: + stride_set[-3] = (1, -4) + + for repeats in itertools.product(*tuple(stride_set)): + new_shape = [abs(a * b) for a, b in zip(x.shape, repeats)] + slices = tuple([slice(None, None, repeat) for repeat in repeats]) + + # new array with different strides, but same data + xi = np.empty(new_shape, dtype=x.dtype) + xi.view(np.uint32).fill(0xdeadbeef) + xi = xi[slices] + xi[...] = x + xi = xi.view(x.__class__) + assert_(np.all(xi == x)) + yield xi, "stride_" + "_".join(["%+d" % j for j in repeats]) + + # generate also zero strides if possible + if x.ndim >= 1 and x.shape[-1] == 1: + s = list(x.strides) + s[-1] = 0 + xi = np.lib.stride_tricks.as_strided(x, strides=s) + yield xi, "stride_xxx_0" + if x.ndim >= 2 and x.shape[-2] == 1: + s = list(x.strides) + s[-2] = 0 + xi = np.lib.stride_tricks.as_strided(x, strides=s) + yield xi, "stride_xxx_0_x" + if x.ndim >= 2 and x.shape[:-2] == (1, 1): + s = list(x.strides) + s[-1] = 0 + s[-2] = 0 + xi = np.lib.stride_tricks.as_strided(x, strides=s) + yield xi, "stride_xxx_0_0" + + +def _make_strided_cases(): + new_cases = [] + for case in CASES: + for a, a_label in _stride_comb_iter(case.a): + for b, b_label in _stride_comb_iter(case.b): + new_case = LinalgCase(case.name + "_" + a_label + "_" + b_label, a, b, + tags=case.tags | {'strided'}) + new_cases.append(new_case) + return new_cases + + +CASES += _make_strided_cases() + + +# +# Test different routines against the above cases +# +class LinalgTestCase(object): + TEST_CASES = CASES + + def check_cases(self, require=set(), exclude=set()): + """ + Run func on each of the cases with all of the tags in require, and none + of the tags in exclude + """ + for case in self.TEST_CASES: + # filter by require and exclude + if case.tags & require != require: + continue + if case.tags & exclude: + continue + + try: + case.check(self.do) + except Exception: + msg = "In test case: %r\n\n" % case + msg += traceback.format_exc() + raise AssertionError(msg) + + +class LinalgSquareTestCase(LinalgTestCase): + + def test_sq_cases(self): + self.check_cases(require={'square'}, + exclude={'generalized', 'size-0'}) + + def test_empty_sq_cases(self): + self.check_cases(require={'square', 'size-0'}, + exclude={'generalized'}) + + +class LinalgNonsquareTestCase(LinalgTestCase): + + def test_nonsq_cases(self): + self.check_cases(require={'nonsquare'}, + exclude={'generalized', 'size-0'}) + + def test_empty_nonsq_cases(self): + self.check_cases(require={'nonsquare', 'size-0'}, + exclude={'generalized'}) + + +class HermitianTestCase(LinalgTestCase): + + def test_herm_cases(self): + self.check_cases(require={'hermitian'}, + exclude={'generalized', 'size-0'}) + + def test_empty_herm_cases(self): + self.check_cases(require={'hermitian', 'size-0'}, + exclude={'generalized'}) + + +class LinalgGeneralizedSquareTestCase(LinalgTestCase): + + @pytest.mark.slow + def test_generalized_sq_cases(self): + self.check_cases(require={'generalized', 'square'}, + exclude={'size-0'}) + + @pytest.mark.slow + def test_generalized_empty_sq_cases(self): + self.check_cases(require={'generalized', 'square', 'size-0'}) + + +class LinalgGeneralizedNonsquareTestCase(LinalgTestCase): + + @pytest.mark.slow + def test_generalized_nonsq_cases(self): + self.check_cases(require={'generalized', 'nonsquare'}, + exclude={'size-0'}) + + @pytest.mark.slow + def test_generalized_empty_nonsq_cases(self): + self.check_cases(require={'generalized', 'nonsquare', 'size-0'}) + + +class HermitianGeneralizedTestCase(LinalgTestCase): + + @pytest.mark.slow + def test_generalized_herm_cases(self): + self.check_cases(require={'generalized', 'hermitian'}, + exclude={'size-0'}) + + @pytest.mark.slow + def test_generalized_empty_herm_cases(self): + self.check_cases(require={'generalized', 'hermitian', 'size-0'}, + exclude={'none'}) + + +def dot_generalized(a, b): + a = asarray(a) + if a.ndim >= 3: + if a.ndim == b.ndim: + # matrix x matrix + new_shape = a.shape[:-1] + b.shape[-1:] + elif a.ndim == b.ndim + 1: + # matrix x vector + new_shape = a.shape[:-1] + else: + raise ValueError("Not implemented...") + r = np.empty(new_shape, dtype=np.common_type(a, b)) + for c in itertools.product(*map(range, a.shape[:-2])): + r[c] = dot(a[c], b[c]) + return r + else: + return dot(a, b) + + +def identity_like_generalized(a): + a = asarray(a) + if a.ndim >= 3: + r = np.empty(a.shape, dtype=a.dtype) + r[...] = identity(a.shape[-2]) + return r + else: + return identity(a.shape[0]) + + +class SolveCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + # kept apart from TestSolve for use for testing with matrices. + def do(self, a, b, tags): + x = linalg.solve(a, b) + assert_almost_equal(b, dot_generalized(a, x)) + assert_(consistent_subclass(x, b)) + + +class TestSolve(SolveCases): + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + assert_equal(linalg.solve(x, x).dtype, dtype) + + def test_0_size(self): + class ArraySubclass(np.ndarray): + pass + # Test system of 0x0 matrices + a = np.arange(8).reshape(2, 2, 2) + b = np.arange(6).reshape(1, 2, 3).view(ArraySubclass) + + expected = linalg.solve(a, b)[:, 0:0, :] + result = linalg.solve(a[:, 0:0, 0:0], b[:, 0:0, :]) + assert_array_equal(result, expected) + assert_(isinstance(result, ArraySubclass)) + + # Test errors for non-square and only b's dimension being 0 + assert_raises(linalg.LinAlgError, linalg.solve, a[:, 0:0, 0:1], b) + assert_raises(ValueError, linalg.solve, a, b[:, 0:0, :]) + + # Test broadcasting error + b = np.arange(6).reshape(1, 3, 2) # broadcasting error + assert_raises(ValueError, linalg.solve, a, b) + assert_raises(ValueError, linalg.solve, a[0:0], b[0:0]) + + # Test zero "single equations" with 0x0 matrices. + b = np.arange(2).reshape(1, 2).view(ArraySubclass) + expected = linalg.solve(a, b)[:, 0:0] + result = linalg.solve(a[:, 0:0, 0:0], b[:, 0:0]) + assert_array_equal(result, expected) + assert_(isinstance(result, ArraySubclass)) + + b = np.arange(3).reshape(1, 3) + assert_raises(ValueError, linalg.solve, a, b) + assert_raises(ValueError, linalg.solve, a[0:0], b[0:0]) + assert_raises(ValueError, linalg.solve, a[:, 0:0, 0:0], b) + + def test_0_size_k(self): + # test zero multiple equation (K=0) case. + class ArraySubclass(np.ndarray): + pass + a = np.arange(4).reshape(1, 2, 2) + b = np.arange(6).reshape(3, 2, 1).view(ArraySubclass) + + expected = linalg.solve(a, b)[:, :, 0:0] + result = linalg.solve(a, b[:, :, 0:0]) + assert_array_equal(result, expected) + assert_(isinstance(result, ArraySubclass)) + + # test both zero. + expected = linalg.solve(a, b)[:, 0:0, 0:0] + result = linalg.solve(a[:, 0:0, 0:0], b[:, 0:0, 0:0]) + assert_array_equal(result, expected) + assert_(isinstance(result, ArraySubclass)) + + +class InvCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + + def do(self, a, b, tags): + a_inv = linalg.inv(a) + assert_almost_equal(dot_generalized(a, a_inv), + identity_like_generalized(a)) + assert_(consistent_subclass(a_inv, a)) + + +class TestInv(InvCases): + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + assert_equal(linalg.inv(x).dtype, dtype) + + def test_0_size(self): + # Check that all kinds of 0-sized arrays work + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res = linalg.inv(a) + assert_(res.dtype.type is np.float64) + assert_equal(a.shape, res.shape) + assert_(isinstance(res, ArraySubclass)) + + a = np.zeros((0, 0), dtype=np.complex64).view(ArraySubclass) + res = linalg.inv(a) + assert_(res.dtype.type is np.complex64) + assert_equal(a.shape, res.shape) + assert_(isinstance(res, ArraySubclass)) + + +class EigvalsCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + + def do(self, a, b, tags): + ev = linalg.eigvals(a) + evalues, evectors = linalg.eig(a) + assert_almost_equal(ev, evalues) + + +class TestEigvals(EigvalsCases): + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + assert_equal(linalg.eigvals(x).dtype, dtype) + x = np.array([[1, 0.5], [-1, 1]], dtype=dtype) + assert_equal(linalg.eigvals(x).dtype, get_complex_dtype(dtype)) + + def test_0_size(self): + # Check that all kinds of 0-sized arrays work + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res = linalg.eigvals(a) + assert_(res.dtype.type is np.float64) + assert_equal((0, 1), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(res, np.ndarray)) + + a = np.zeros((0, 0), dtype=np.complex64).view(ArraySubclass) + res = linalg.eigvals(a) + assert_(res.dtype.type is np.complex64) + assert_equal((0,), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(res, np.ndarray)) + + +class EigCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + + def do(self, a, b, tags): + evalues, evectors = linalg.eig(a) + assert_allclose(dot_generalized(a, evectors), + np.asarray(evectors) * np.asarray(evalues)[..., None, :], + rtol=get_rtol(evalues.dtype)) + assert_(consistent_subclass(evectors, a)) + + +class TestEig(EigCases): + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + w, v = np.linalg.eig(x) + assert_equal(w.dtype, dtype) + assert_equal(v.dtype, dtype) + + x = np.array([[1, 0.5], [-1, 1]], dtype=dtype) + w, v = np.linalg.eig(x) + assert_equal(w.dtype, get_complex_dtype(dtype)) + assert_equal(v.dtype, get_complex_dtype(dtype)) + + def test_0_size(self): + # Check that all kinds of 0-sized arrays work + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res, res_v = linalg.eig(a) + assert_(res_v.dtype.type is np.float64) + assert_(res.dtype.type is np.float64) + assert_equal(a.shape, res_v.shape) + assert_equal((0, 1), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(a, np.ndarray)) + + a = np.zeros((0, 0), dtype=np.complex64).view(ArraySubclass) + res, res_v = linalg.eig(a) + assert_(res_v.dtype.type is np.complex64) + assert_(res.dtype.type is np.complex64) + assert_equal(a.shape, res_v.shape) + assert_equal((0,), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(a, np.ndarray)) + + +class SVDCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + + def do(self, a, b, tags): + u, s, vt = linalg.svd(a, 0) + assert_allclose(a, dot_generalized(np.asarray(u) * np.asarray(s)[..., None, :], + np.asarray(vt)), + rtol=get_rtol(u.dtype)) + assert_(consistent_subclass(u, a)) + assert_(consistent_subclass(vt, a)) + + +class TestSVD(SVDCases): + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + u, s, vh = linalg.svd(x) + assert_equal(u.dtype, dtype) + assert_equal(s.dtype, get_real_dtype(dtype)) + assert_equal(vh.dtype, dtype) + s = linalg.svd(x, compute_uv=False) + assert_equal(s.dtype, get_real_dtype(dtype)) + + def test_empty_identity(self): + """ Empty input should put an identity matrix in u or vh """ + x = np.empty((4, 0)) + u, s, vh = linalg.svd(x, compute_uv=True) + assert_equal(u.shape, (4, 4)) + assert_equal(vh.shape, (0, 0)) + assert_equal(u, np.eye(4)) + + x = np.empty((0, 4)) + u, s, vh = linalg.svd(x, compute_uv=True) + assert_equal(u.shape, (0, 0)) + assert_equal(vh.shape, (4, 4)) + assert_equal(vh, np.eye(4)) + + +class CondCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + # cond(x, p) for p in (None, 2, -2) + + def do(self, a, b, tags): + c = asarray(a) # a might be a matrix + if 'size-0' in tags: + assert_raises(LinAlgError, linalg.cond, c) + return + + # +-2 norms + s = linalg.svd(c, compute_uv=False) + assert_almost_equal( + linalg.cond(a), s[..., 0] / s[..., -1], + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, 2), s[..., 0] / s[..., -1], + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, -2), s[..., -1] / s[..., 0], + single_decimal=5, double_decimal=11) + + # Other norms + cinv = np.linalg.inv(c) + assert_almost_equal( + linalg.cond(a, 1), + abs(c).sum(-2).max(-1) * abs(cinv).sum(-2).max(-1), + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, -1), + abs(c).sum(-2).min(-1) * abs(cinv).sum(-2).min(-1), + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, np.inf), + abs(c).sum(-1).max(-1) * abs(cinv).sum(-1).max(-1), + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, -np.inf), + abs(c).sum(-1).min(-1) * abs(cinv).sum(-1).min(-1), + single_decimal=5, double_decimal=11) + assert_almost_equal( + linalg.cond(a, 'fro'), + np.sqrt((abs(c)**2).sum(-1).sum(-1) + * (abs(cinv)**2).sum(-1).sum(-1)), + single_decimal=5, double_decimal=11) + + +class TestCond(CondCases): + def test_basic_nonsvd(self): + # Smoketest the non-svd norms + A = array([[1., 0, 1], [0, -2., 0], [0, 0, 3.]]) + assert_almost_equal(linalg.cond(A, inf), 4) + assert_almost_equal(linalg.cond(A, -inf), 2/3) + assert_almost_equal(linalg.cond(A, 1), 4) + assert_almost_equal(linalg.cond(A, -1), 0.5) + assert_almost_equal(linalg.cond(A, 'fro'), np.sqrt(265 / 12)) + + def test_singular(self): + # Singular matrices have infinite condition number for + # positive norms, and negative norms shouldn't raise + # exceptions + As = [np.zeros((2, 2)), np.ones((2, 2))] + p_pos = [None, 1, 2, 'fro'] + p_neg = [-1, -2] + for A, p in itertools.product(As, p_pos): + # Inversion may not hit exact infinity, so just check the + # number is large + assert_(linalg.cond(A, p) > 1e15) + for A, p in itertools.product(As, p_neg): + linalg.cond(A, p) + + def test_nan(self): + # nans should be passed through, not converted to infs + ps = [None, 1, -1, 2, -2, 'fro'] + p_pos = [None, 1, 2, 'fro'] + + A = np.ones((2, 2)) + A[0,1] = np.nan + for p in ps: + c = linalg.cond(A, p) + assert_(isinstance(c, np.float_)) + assert_(np.isnan(c)) + + A = np.ones((3, 2, 2)) + A[1,0,1] = np.nan + for p in ps: + c = linalg.cond(A, p) + assert_(np.isnan(c[1])) + if p in p_pos: + assert_(c[0] > 1e15) + assert_(c[2] > 1e15) + else: + assert_(not np.isnan(c[0])) + assert_(not np.isnan(c[2])) + + def test_stacked_singular(self): + # Check behavior when only some of the stacked matrices are + # singular + np.random.seed(1234) + A = np.random.rand(2, 2, 2, 2) + A[0,0] = 0 + A[1,1] = 0 + + for p in (None, 1, 2, 'fro', -1, -2): + c = linalg.cond(A, p) + assert_equal(c[0,0], np.inf) + assert_equal(c[1,1], np.inf) + assert_(np.isfinite(c[0,1])) + assert_(np.isfinite(c[1,0])) + + +class PinvCases(LinalgSquareTestCase, + LinalgNonsquareTestCase, + LinalgGeneralizedSquareTestCase, + LinalgGeneralizedNonsquareTestCase): + + def do(self, a, b, tags): + a_ginv = linalg.pinv(a) + # `a @ a_ginv == I` does not hold if a is singular + dot = dot_generalized + assert_almost_equal(dot(dot(a, a_ginv), a), a, single_decimal=5, double_decimal=11) + assert_(consistent_subclass(a_ginv, a)) + + +class TestPinv(PinvCases): + pass + + +class DetCases(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): + + def do(self, a, b, tags): + d = linalg.det(a) + (s, ld) = linalg.slogdet(a) + if asarray(a).dtype.type in (single, double): + ad = asarray(a).astype(double) + else: + ad = asarray(a).astype(cdouble) + ev = linalg.eigvals(ad) + assert_almost_equal(d, multiply.reduce(ev, axis=-1)) + assert_almost_equal(s * np.exp(ld), multiply.reduce(ev, axis=-1)) + + s = np.atleast_1d(s) + ld = np.atleast_1d(ld) + m = (s != 0) + assert_almost_equal(np.abs(s[m]), 1) + assert_equal(ld[~m], -inf) + + +class TestDet(DetCases): + def test_zero(self): + assert_equal(linalg.det([[0.0]]), 0.0) + assert_equal(type(linalg.det([[0.0]])), double) + assert_equal(linalg.det([[0.0j]]), 0.0) + assert_equal(type(linalg.det([[0.0j]])), cdouble) + + assert_equal(linalg.slogdet([[0.0]]), (0.0, -inf)) + assert_equal(type(linalg.slogdet([[0.0]])[0]), double) + assert_equal(type(linalg.slogdet([[0.0]])[1]), double) + assert_equal(linalg.slogdet([[0.0j]]), (0.0j, -inf)) + assert_equal(type(linalg.slogdet([[0.0j]])[0]), cdouble) + assert_equal(type(linalg.slogdet([[0.0j]])[1]), double) + + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + assert_equal(np.linalg.det(x).dtype, dtype) + ph, s = np.linalg.slogdet(x) + assert_equal(s.dtype, get_real_dtype(dtype)) + assert_equal(ph.dtype, dtype) + + def test_0_size(self): + a = np.zeros((0, 0), dtype=np.complex64) + res = linalg.det(a) + assert_equal(res, 1.) + assert_(res.dtype.type is np.complex64) + res = linalg.slogdet(a) + assert_equal(res, (1, 0)) + assert_(res[0].dtype.type is np.complex64) + assert_(res[1].dtype.type is np.float32) + + a = np.zeros((0, 0), dtype=np.float64) + res = linalg.det(a) + assert_equal(res, 1.) + assert_(res.dtype.type is np.float64) + res = linalg.slogdet(a) + assert_equal(res, (1, 0)) + assert_(res[0].dtype.type is np.float64) + assert_(res[1].dtype.type is np.float64) + + +class LstsqCases(LinalgSquareTestCase, LinalgNonsquareTestCase): + + def do(self, a, b, tags): + arr = np.asarray(a) + m, n = arr.shape + u, s, vt = linalg.svd(a, 0) + x, residuals, rank, sv = linalg.lstsq(a, b, rcond=-1) + if m == 0: + assert_((x == 0).all()) + if m <= n: + assert_almost_equal(b, dot(a, x)) + assert_equal(rank, m) + else: + assert_equal(rank, n) + assert_almost_equal(sv, sv.__array_wrap__(s)) + if rank == n and m > n: + expect_resids = ( + np.asarray(abs(np.dot(a, x) - b)) ** 2).sum(axis=0) + expect_resids = np.asarray(expect_resids) + if np.asarray(b).ndim == 1: + expect_resids.shape = (1,) + assert_equal(residuals.shape, expect_resids.shape) + else: + expect_resids = np.array([]).view(type(x)) + assert_almost_equal(residuals, expect_resids) + assert_(np.issubdtype(residuals.dtype, np.floating)) + assert_(consistent_subclass(x, b)) + assert_(consistent_subclass(residuals, b)) + + +class TestLstsq(LstsqCases): + def test_future_rcond(self): + a = np.array([[0., 1., 0., 1., 2., 0.], + [0., 2., 0., 0., 1., 0.], + [1., 0., 1., 0., 0., 4.], + [0., 0., 0., 2., 3., 0.]]).T + + b = np.array([1, 0, 0, 0, 0, 0]) + with suppress_warnings() as sup: + w = sup.record(FutureWarning, "`rcond` parameter will change") + x, residuals, rank, s = linalg.lstsq(a, b) + assert_(rank == 4) + x, residuals, rank, s = linalg.lstsq(a, b, rcond=-1) + assert_(rank == 4) + x, residuals, rank, s = linalg.lstsq(a, b, rcond=None) + assert_(rank == 3) + # Warning should be raised exactly once (first command) + assert_(len(w) == 1) + + @pytest.mark.parametrize(["m", "n", "n_rhs"], [ + (4, 2, 2), + (0, 4, 1), + (0, 4, 2), + (4, 0, 1), + (4, 0, 2), + (4, 2, 0), + (0, 0, 0) + ]) + def test_empty_a_b(self, m, n, n_rhs): + a = np.arange(m * n).reshape(m, n) + b = np.ones((m, n_rhs)) + x, residuals, rank, s = linalg.lstsq(a, b, rcond=None) + if m == 0: + assert_((x == 0).all()) + assert_equal(x.shape, (n, n_rhs)) + assert_equal(residuals.shape, ((n_rhs,) if m > n else (0,))) + if m > n and n_rhs > 0: + # residuals are exactly the squared norms of b's columns + r = b - np.dot(a, x) + assert_almost_equal(residuals, (r * r).sum(axis=-2)) + assert_equal(rank, min(m, n)) + assert_equal(s.shape, (min(m, n),)) + + def test_incompatible_dims(self): + # use modified version of docstring example + x = np.array([0, 1, 2, 3]) + y = np.array([-1, 0.2, 0.9, 2.1, 3.3]) + A = np.vstack([x, np.ones(len(x))]).T + with assert_raises_regex(LinAlgError, "Incompatible dimensions"): + linalg.lstsq(A, y, rcond=None) + + +@pytest.mark.parametrize('dt', [np.dtype(c) for c in '?bBhHiIqQefdgFDGO']) +class TestMatrixPower(object): + + rshft_0 = np.eye(4) + rshft_1 = rshft_0[[3, 0, 1, 2]] + rshft_2 = rshft_0[[2, 3, 0, 1]] + rshft_3 = rshft_0[[1, 2, 3, 0]] + rshft_all = [rshft_0, rshft_1, rshft_2, rshft_3] + noninv = array([[1, 0], [0, 0]]) + stacked = np.block([[[rshft_0]]]*2) + #FIXME the 'e' dtype might work in future + dtnoinv = [object, np.dtype('e'), np.dtype('g'), np.dtype('G')] + + def test_large_power(self, dt): + rshft = self.rshft_1.astype(dt) + assert_equal( + matrix_power(rshft, 2**100 + 2**10 + 2**5 + 0), self.rshft_0) + assert_equal( + matrix_power(rshft, 2**100 + 2**10 + 2**5 + 1), self.rshft_1) + assert_equal( + matrix_power(rshft, 2**100 + 2**10 + 2**5 + 2), self.rshft_2) + assert_equal( + matrix_power(rshft, 2**100 + 2**10 + 2**5 + 3), self.rshft_3) + + def test_power_is_zero(self, dt): + def tz(M): + mz = matrix_power(M, 0) + assert_equal(mz, identity_like_generalized(M)) + assert_equal(mz.dtype, M.dtype) + + for mat in self.rshft_all: + tz(mat.astype(dt)) + if dt != object: + tz(self.stacked.astype(dt)) + + def test_power_is_one(self, dt): + def tz(mat): + mz = matrix_power(mat, 1) + assert_equal(mz, mat) + assert_equal(mz.dtype, mat.dtype) + + for mat in self.rshft_all: + tz(mat.astype(dt)) + if dt != object: + tz(self.stacked.astype(dt)) + + def test_power_is_two(self, dt): + def tz(mat): + mz = matrix_power(mat, 2) + mmul = matmul if mat.dtype != object else dot + assert_equal(mz, mmul(mat, mat)) + assert_equal(mz.dtype, mat.dtype) + + for mat in self.rshft_all: + tz(mat.astype(dt)) + if dt != object: + tz(self.stacked.astype(dt)) + + def test_power_is_minus_one(self, dt): + def tz(mat): + invmat = matrix_power(mat, -1) + mmul = matmul if mat.dtype != object else dot + assert_almost_equal( + mmul(invmat, mat), identity_like_generalized(mat)) + + for mat in self.rshft_all: + if dt not in self.dtnoinv: + tz(mat.astype(dt)) + + def test_exceptions_bad_power(self, dt): + mat = self.rshft_0.astype(dt) + assert_raises(TypeError, matrix_power, mat, 1.5) + assert_raises(TypeError, matrix_power, mat, [1]) + + def test_exceptions_non_square(self, dt): + assert_raises(LinAlgError, matrix_power, np.array([1], dt), 1) + assert_raises(LinAlgError, matrix_power, np.array([[1], [2]], dt), 1) + assert_raises(LinAlgError, matrix_power, np.ones((4, 3, 2), dt), 1) + + def test_exceptions_not_invertible(self, dt): + if dt in self.dtnoinv: + return + mat = self.noninv.astype(dt) + assert_raises(LinAlgError, matrix_power, mat, -1) + + + +class TestEigvalshCases(HermitianTestCase, HermitianGeneralizedTestCase): + + def do(self, a, b, tags): + # note that eigenvalue arrays returned by eig must be sorted since + # their order isn't guaranteed. + ev = linalg.eigvalsh(a, 'L') + evalues, evectors = linalg.eig(a) + evalues.sort(axis=-1) + assert_allclose(ev, evalues, rtol=get_rtol(ev.dtype)) + + ev2 = linalg.eigvalsh(a, 'U') + assert_allclose(ev2, evalues, rtol=get_rtol(ev.dtype)) + + +class TestEigvalsh(object): + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + w = np.linalg.eigvalsh(x) + assert_equal(w.dtype, get_real_dtype(dtype)) + + def test_invalid(self): + x = np.array([[1, 0.5], [0.5, 1]], dtype=np.float32) + assert_raises(ValueError, np.linalg.eigvalsh, x, UPLO="lrong") + assert_raises(ValueError, np.linalg.eigvalsh, x, "lower") + assert_raises(ValueError, np.linalg.eigvalsh, x, "upper") + + def test_UPLO(self): + Klo = np.array([[0, 0], [1, 0]], dtype=np.double) + Kup = np.array([[0, 1], [0, 0]], dtype=np.double) + tgt = np.array([-1, 1], dtype=np.double) + rtol = get_rtol(np.double) + + # Check default is 'L' + w = np.linalg.eigvalsh(Klo) + assert_allclose(w, tgt, rtol=rtol) + # Check 'L' + w = np.linalg.eigvalsh(Klo, UPLO='L') + assert_allclose(w, tgt, rtol=rtol) + # Check 'l' + w = np.linalg.eigvalsh(Klo, UPLO='l') + assert_allclose(w, tgt, rtol=rtol) + # Check 'U' + w = np.linalg.eigvalsh(Kup, UPLO='U') + assert_allclose(w, tgt, rtol=rtol) + # Check 'u' + w = np.linalg.eigvalsh(Kup, UPLO='u') + assert_allclose(w, tgt, rtol=rtol) + + def test_0_size(self): + # Check that all kinds of 0-sized arrays work + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res = linalg.eigvalsh(a) + assert_(res.dtype.type is np.float64) + assert_equal((0, 1), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(res, np.ndarray)) + + a = np.zeros((0, 0), dtype=np.complex64).view(ArraySubclass) + res = linalg.eigvalsh(a) + assert_(res.dtype.type is np.float32) + assert_equal((0,), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(res, np.ndarray)) + + +class TestEighCases(HermitianTestCase, HermitianGeneralizedTestCase): + + def do(self, a, b, tags): + # note that eigenvalue arrays returned by eig must be sorted since + # their order isn't guaranteed. + ev, evc = linalg.eigh(a) + evalues, evectors = linalg.eig(a) + evalues.sort(axis=-1) + assert_almost_equal(ev, evalues) + + assert_allclose(dot_generalized(a, evc), + np.asarray(ev)[..., None, :] * np.asarray(evc), + rtol=get_rtol(ev.dtype)) + + ev2, evc2 = linalg.eigh(a, 'U') + assert_almost_equal(ev2, evalues) + + assert_allclose(dot_generalized(a, evc2), + np.asarray(ev2)[..., None, :] * np.asarray(evc2), + rtol=get_rtol(ev.dtype), err_msg=repr(a)) + + +class TestEigh(object): + @pytest.mark.parametrize('dtype', [single, double, csingle, cdouble]) + def test_types(self, dtype): + x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) + w, v = np.linalg.eigh(x) + assert_equal(w.dtype, get_real_dtype(dtype)) + assert_equal(v.dtype, dtype) + + def test_invalid(self): + x = np.array([[1, 0.5], [0.5, 1]], dtype=np.float32) + assert_raises(ValueError, np.linalg.eigh, x, UPLO="lrong") + assert_raises(ValueError, np.linalg.eigh, x, "lower") + assert_raises(ValueError, np.linalg.eigh, x, "upper") + + def test_UPLO(self): + Klo = np.array([[0, 0], [1, 0]], dtype=np.double) + Kup = np.array([[0, 1], [0, 0]], dtype=np.double) + tgt = np.array([-1, 1], dtype=np.double) + rtol = get_rtol(np.double) + + # Check default is 'L' + w, v = np.linalg.eigh(Klo) + assert_allclose(w, tgt, rtol=rtol) + # Check 'L' + w, v = np.linalg.eigh(Klo, UPLO='L') + assert_allclose(w, tgt, rtol=rtol) + # Check 'l' + w, v = np.linalg.eigh(Klo, UPLO='l') + assert_allclose(w, tgt, rtol=rtol) + # Check 'U' + w, v = np.linalg.eigh(Kup, UPLO='U') + assert_allclose(w, tgt, rtol=rtol) + # Check 'u' + w, v = np.linalg.eigh(Kup, UPLO='u') + assert_allclose(w, tgt, rtol=rtol) + + def test_0_size(self): + # Check that all kinds of 0-sized arrays work + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res, res_v = linalg.eigh(a) + assert_(res_v.dtype.type is np.float64) + assert_(res.dtype.type is np.float64) + assert_equal(a.shape, res_v.shape) + assert_equal((0, 1), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(a, np.ndarray)) + + a = np.zeros((0, 0), dtype=np.complex64).view(ArraySubclass) + res, res_v = linalg.eigh(a) + assert_(res_v.dtype.type is np.complex64) + assert_(res.dtype.type is np.float32) + assert_equal(a.shape, res_v.shape) + assert_equal((0,), res.shape) + # This is just for documentation, it might make sense to change: + assert_(isinstance(a, np.ndarray)) + + +class _TestNormBase(object): + dt = None + dec = None + + +class _TestNormGeneral(_TestNormBase): + + def test_empty(self): + assert_equal(norm([]), 0.0) + assert_equal(norm(array([], dtype=self.dt)), 0.0) + assert_equal(norm(atleast_2d(array([], dtype=self.dt))), 0.0) + + def test_vector_return_type(self): + a = np.array([1, 0, 1]) + + exact_types = np.typecodes['AllInteger'] + inexact_types = np.typecodes['AllFloat'] + + all_types = exact_types + inexact_types + + for each_inexact_types in all_types: + at = a.astype(each_inexact_types) + + an = norm(at, -np.inf) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 0.0) + + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "divide by zero encountered") + an = norm(at, -1) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 0.0) + + an = norm(at, 0) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2) + + an = norm(at, 1) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2.0) + + an = norm(at, 2) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, an.dtype.type(2.0)**an.dtype.type(1.0/2.0)) + + an = norm(at, 4) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, an.dtype.type(2.0)**an.dtype.type(1.0/4.0)) + + an = norm(at, np.inf) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 1.0) + + def test_vector(self): + a = [1, 2, 3, 4] + b = [-1, -2, -3, -4] + c = [-1, 2, -3, 4] + + def _test(v): + np.testing.assert_almost_equal(norm(v), 30 ** 0.5, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, inf), 4.0, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, -inf), 1.0, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, 1), 10.0, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, -1), 12.0 / 25, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, 2), 30 ** 0.5, + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, -2), ((205. / 144) ** -0.5), + decimal=self.dec) + np.testing.assert_almost_equal(norm(v, 0), 4, + decimal=self.dec) + + for v in (a, b, c,): + _test(v) + + for v in (array(a, dtype=self.dt), array(b, dtype=self.dt), + array(c, dtype=self.dt)): + _test(v) + + def test_axis(self): + # Vector norms. + # Compare the use of `axis` with computing the norm of each row + # or column separately. + A = array([[1, 2, 3], [4, 5, 6]], dtype=self.dt) + for order in [None, -1, 0, 1, 2, 3, np.Inf, -np.Inf]: + expected0 = [norm(A[:, k], ord=order) for k in range(A.shape[1])] + assert_almost_equal(norm(A, ord=order, axis=0), expected0) + expected1 = [norm(A[k, :], ord=order) for k in range(A.shape[0])] + assert_almost_equal(norm(A, ord=order, axis=1), expected1) + + # Matrix norms. + B = np.arange(1, 25, dtype=self.dt).reshape(2, 3, 4) + nd = B.ndim + for order in [None, -2, 2, -1, 1, np.Inf, -np.Inf, 'fro']: + for axis in itertools.combinations(range(-nd, nd), 2): + row_axis, col_axis = axis + if row_axis < 0: + row_axis += nd + if col_axis < 0: + col_axis += nd + if row_axis == col_axis: + assert_raises(ValueError, norm, B, ord=order, axis=axis) + else: + n = norm(B, ord=order, axis=axis) + + # The logic using k_index only works for nd = 3. + # This has to be changed if nd is increased. + k_index = nd - (row_axis + col_axis) + if row_axis < col_axis: + expected = [norm(B[:].take(k, axis=k_index), ord=order) + for k in range(B.shape[k_index])] + else: + expected = [norm(B[:].take(k, axis=k_index).T, ord=order) + for k in range(B.shape[k_index])] + assert_almost_equal(n, expected) + + def test_keepdims(self): + A = np.arange(1, 25, dtype=self.dt).reshape(2, 3, 4) + + allclose_err = 'order {0}, axis = {1}' + shape_err = 'Shape mismatch found {0}, expected {1}, order={2}, axis={3}' + + # check the order=None, axis=None case + expected = norm(A, ord=None, axis=None) + found = norm(A, ord=None, axis=None, keepdims=True) + assert_allclose(np.squeeze(found), expected, + err_msg=allclose_err.format(None, None)) + expected_shape = (1, 1, 1) + assert_(found.shape == expected_shape, + shape_err.format(found.shape, expected_shape, None, None)) + + # Vector norms. + for order in [None, -1, 0, 1, 2, 3, np.Inf, -np.Inf]: + for k in range(A.ndim): + expected = norm(A, ord=order, axis=k) + found = norm(A, ord=order, axis=k, keepdims=True) + assert_allclose(np.squeeze(found), expected, + err_msg=allclose_err.format(order, k)) + expected_shape = list(A.shape) + expected_shape[k] = 1 + expected_shape = tuple(expected_shape) + assert_(found.shape == expected_shape, + shape_err.format(found.shape, expected_shape, order, k)) + + # Matrix norms. + for order in [None, -2, 2, -1, 1, np.Inf, -np.Inf, 'fro', 'nuc']: + for k in itertools.permutations(range(A.ndim), 2): + expected = norm(A, ord=order, axis=k) + found = norm(A, ord=order, axis=k, keepdims=True) + assert_allclose(np.squeeze(found), expected, + err_msg=allclose_err.format(order, k)) + expected_shape = list(A.shape) + expected_shape[k[0]] = 1 + expected_shape[k[1]] = 1 + expected_shape = tuple(expected_shape) + assert_(found.shape == expected_shape, + shape_err.format(found.shape, expected_shape, order, k)) + + +class _TestNorm2D(_TestNormBase): + # Define the part for 2d arrays separately, so we can subclass this + # and run the tests using np.matrix in matrixlib.tests.test_matrix_linalg. + array = np.array + + def test_matrix_empty(self): + assert_equal(norm(self.array([[]], dtype=self.dt)), 0.0) + + def test_matrix_return_type(self): + a = self.array([[1, 0, 1], [0, 1, 1]]) + + exact_types = np.typecodes['AllInteger'] + + # float32, complex64, float64, complex128 types are the only types + # allowed by `linalg`, which performs the matrix operations used + # within `norm`. + inexact_types = 'fdFD' + + all_types = exact_types + inexact_types + + for each_inexact_types in all_types: + at = a.astype(each_inexact_types) + + an = norm(at, -np.inf) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2.0) + + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "divide by zero encountered") + an = norm(at, -1) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 1.0) + + an = norm(at, 1) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2.0) + + an = norm(at, 2) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 3.0**(1.0/2.0)) + + an = norm(at, -2) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 1.0) + + an = norm(at, np.inf) + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2.0) + + an = norm(at, 'fro') + assert_(issubclass(an.dtype.type, np.floating)) + assert_almost_equal(an, 2.0) + + an = norm(at, 'nuc') + assert_(issubclass(an.dtype.type, np.floating)) + # Lower bar needed to support low precision floats. + # They end up being off by 1 in the 7th place. + np.testing.assert_almost_equal(an, 2.7320508075688772, decimal=6) + + def test_matrix_2x2(self): + A = self.array([[1, 3], [5, 7]], dtype=self.dt) + assert_almost_equal(norm(A), 84 ** 0.5) + assert_almost_equal(norm(A, 'fro'), 84 ** 0.5) + assert_almost_equal(norm(A, 'nuc'), 10.0) + assert_almost_equal(norm(A, inf), 12.0) + assert_almost_equal(norm(A, -inf), 4.0) + assert_almost_equal(norm(A, 1), 10.0) + assert_almost_equal(norm(A, -1), 6.0) + assert_almost_equal(norm(A, 2), 9.1231056256176615) + assert_almost_equal(norm(A, -2), 0.87689437438234041) + + assert_raises(ValueError, norm, A, 'nofro') + assert_raises(ValueError, norm, A, -3) + assert_raises(ValueError, norm, A, 0) + + def test_matrix_3x3(self): + # This test has been added because the 2x2 example + # happened to have equal nuclear norm and induced 1-norm. + # The 1/10 scaling factor accommodates the absolute tolerance + # used in assert_almost_equal. + A = (1 / 10) * \ + self.array([[1, 2, 3], [6, 0, 5], [3, 2, 1]], dtype=self.dt) + assert_almost_equal(norm(A), (1 / 10) * 89 ** 0.5) + assert_almost_equal(norm(A, 'fro'), (1 / 10) * 89 ** 0.5) + assert_almost_equal(norm(A, 'nuc'), 1.3366836911774836) + assert_almost_equal(norm(A, inf), 1.1) + assert_almost_equal(norm(A, -inf), 0.6) + assert_almost_equal(norm(A, 1), 1.0) + assert_almost_equal(norm(A, -1), 0.4) + assert_almost_equal(norm(A, 2), 0.88722940323461277) + assert_almost_equal(norm(A, -2), 0.19456584790481812) + + def test_bad_args(self): + # Check that bad arguments raise the appropriate exceptions. + + A = self.array([[1, 2, 3], [4, 5, 6]], dtype=self.dt) + B = np.arange(1, 25, dtype=self.dt).reshape(2, 3, 4) + + # Using `axis=<integer>` or passing in a 1-D array implies vector + # norms are being computed, so also using `ord='fro'` + # or `ord='nuc'` raises a ValueError. + assert_raises(ValueError, norm, A, 'fro', 0) + assert_raises(ValueError, norm, A, 'nuc', 0) + assert_raises(ValueError, norm, [3, 4], 'fro', None) + assert_raises(ValueError, norm, [3, 4], 'nuc', None) + + # Similarly, norm should raise an exception when ord is any finite + # number other than 1, 2, -1 or -2 when computing matrix norms. + for order in [0, 3]: + assert_raises(ValueError, norm, A, order, None) + assert_raises(ValueError, norm, A, order, (0, 1)) + assert_raises(ValueError, norm, B, order, (1, 2)) + + # Invalid axis + assert_raises(np.AxisError, norm, B, None, 3) + assert_raises(np.AxisError, norm, B, None, (2, 3)) + assert_raises(ValueError, norm, B, None, (0, 1, 2)) + + +class _TestNorm(_TestNorm2D, _TestNormGeneral): + pass + + +class TestNorm_NonSystematic(object): + + def test_longdouble_norm(self): + # Non-regression test: p-norm of longdouble would previously raise + # UnboundLocalError. + x = np.arange(10, dtype=np.longdouble) + old_assert_almost_equal(norm(x, ord=3), 12.65, decimal=2) + + def test_intmin(self): + # Non-regression test: p-norm of signed integer would previously do + # float cast and abs in the wrong order. + x = np.array([-2 ** 31], dtype=np.int32) + old_assert_almost_equal(norm(x, ord=3), 2 ** 31, decimal=5) + + def test_complex_high_ord(self): + # gh-4156 + d = np.empty((2,), dtype=np.clongdouble) + d[0] = 6 + 7j + d[1] = -6 + 7j + res = 11.615898132184 + old_assert_almost_equal(np.linalg.norm(d, ord=3), res, decimal=10) + d = d.astype(np.complex128) + old_assert_almost_equal(np.linalg.norm(d, ord=3), res, decimal=9) + d = d.astype(np.complex64) + old_assert_almost_equal(np.linalg.norm(d, ord=3), res, decimal=5) + + +# Separate definitions so we can use them for matrix tests. +class _TestNormDoubleBase(_TestNormBase): + dt = np.double + dec = 12 + + +class _TestNormSingleBase(_TestNormBase): + dt = np.float32 + dec = 6 + + +class _TestNormInt64Base(_TestNormBase): + dt = np.int64 + dec = 12 + + +class TestNormDouble(_TestNorm, _TestNormDoubleBase): + pass + + +class TestNormSingle(_TestNorm, _TestNormSingleBase): + pass + + +class TestNormInt64(_TestNorm, _TestNormInt64Base): + pass + + +class TestMatrixRank(object): + + def test_matrix_rank(self): + # Full rank matrix + assert_equal(4, matrix_rank(np.eye(4))) + # rank deficient matrix + I = np.eye(4) + I[-1, -1] = 0. + assert_equal(matrix_rank(I), 3) + # All zeros - zero rank + assert_equal(matrix_rank(np.zeros((4, 4))), 0) + # 1 dimension - rank 1 unless all 0 + assert_equal(matrix_rank([1, 0, 0, 0]), 1) + assert_equal(matrix_rank(np.zeros((4,))), 0) + # accepts array-like + assert_equal(matrix_rank([1]), 1) + # greater than 2 dimensions treated as stacked matrices + ms = np.array([I, np.eye(4), np.zeros((4,4))]) + assert_equal(matrix_rank(ms), np.array([3, 4, 0])) + # works on scalar + assert_equal(matrix_rank(1), 1) + + def test_symmetric_rank(self): + assert_equal(4, matrix_rank(np.eye(4), hermitian=True)) + assert_equal(1, matrix_rank(np.ones((4, 4)), hermitian=True)) + assert_equal(0, matrix_rank(np.zeros((4, 4)), hermitian=True)) + # rank deficient matrix + I = np.eye(4) + I[-1, -1] = 0. + assert_equal(3, matrix_rank(I, hermitian=True)) + # manually supplied tolerance + I[-1, -1] = 1e-8 + assert_equal(4, matrix_rank(I, hermitian=True, tol=0.99e-8)) + assert_equal(3, matrix_rank(I, hermitian=True, tol=1.01e-8)) + + +def test_reduced_rank(): + # Test matrices with reduced rank + rng = np.random.RandomState(20120714) + for i in range(100): + # Make a rank deficient matrix + X = rng.normal(size=(40, 10)) + X[:, 0] = X[:, 1] + X[:, 2] + # Assert that matrix_rank detected deficiency + assert_equal(matrix_rank(X), 9) + X[:, 3] = X[:, 4] + X[:, 5] + assert_equal(matrix_rank(X), 8) + + +class TestQR(object): + # Define the array class here, so run this on matrices elsewhere. + array = np.array + + def check_qr(self, a): + # This test expects the argument `a` to be an ndarray or + # a subclass of an ndarray of inexact type. + a_type = type(a) + a_dtype = a.dtype + m, n = a.shape + k = min(m, n) + + # mode == 'complete' + q, r = linalg.qr(a, mode='complete') + assert_(q.dtype == a_dtype) + assert_(r.dtype == a_dtype) + assert_(isinstance(q, a_type)) + assert_(isinstance(r, a_type)) + assert_(q.shape == (m, m)) + assert_(r.shape == (m, n)) + assert_almost_equal(dot(q, r), a) + assert_almost_equal(dot(q.T.conj(), q), np.eye(m)) + assert_almost_equal(np.triu(r), r) + + # mode == 'reduced' + q1, r1 = linalg.qr(a, mode='reduced') + assert_(q1.dtype == a_dtype) + assert_(r1.dtype == a_dtype) + assert_(isinstance(q1, a_type)) + assert_(isinstance(r1, a_type)) + assert_(q1.shape == (m, k)) + assert_(r1.shape == (k, n)) + assert_almost_equal(dot(q1, r1), a) + assert_almost_equal(dot(q1.T.conj(), q1), np.eye(k)) + assert_almost_equal(np.triu(r1), r1) + + # mode == 'r' + r2 = linalg.qr(a, mode='r') + assert_(r2.dtype == a_dtype) + assert_(isinstance(r2, a_type)) + assert_almost_equal(r2, r1) + + + @pytest.mark.parametrize(["m", "n"], [ + (3, 0), + (0, 3), + (0, 0) + ]) + def test_qr_empty(self, m, n): + k = min(m, n) + a = np.empty((m, n)) + + self.check_qr(a) + + h, tau = np.linalg.qr(a, mode='raw') + assert_equal(h.dtype, np.double) + assert_equal(tau.dtype, np.double) + assert_equal(h.shape, (n, m)) + assert_equal(tau.shape, (k,)) + + def test_mode_raw(self): + # The factorization is not unique and varies between libraries, + # so it is not possible to check against known values. Functional + # testing is a possibility, but awaits the exposure of more + # of the functions in lapack_lite. Consequently, this test is + # very limited in scope. Note that the results are in FORTRAN + # order, hence the h arrays are transposed. + a = self.array([[1, 2], [3, 4], [5, 6]], dtype=np.double) + + # Test double + h, tau = linalg.qr(a, mode='raw') + assert_(h.dtype == np.double) + assert_(tau.dtype == np.double) + assert_(h.shape == (2, 3)) + assert_(tau.shape == (2,)) + + h, tau = linalg.qr(a.T, mode='raw') + assert_(h.dtype == np.double) + assert_(tau.dtype == np.double) + assert_(h.shape == (3, 2)) + assert_(tau.shape == (2,)) + + def test_mode_all_but_economic(self): + a = self.array([[1, 2], [3, 4]]) + b = self.array([[1, 2], [3, 4], [5, 6]]) + for dt in "fd": + m1 = a.astype(dt) + m2 = b.astype(dt) + self.check_qr(m1) + self.check_qr(m2) + self.check_qr(m2.T) + + for dt in "fd": + m1 = 1 + 1j * a.astype(dt) + m2 = 1 + 1j * b.astype(dt) + self.check_qr(m1) + self.check_qr(m2) + self.check_qr(m2.T) + + +class TestCholesky(object): + # TODO: are there no other tests for cholesky? + + def test_basic_property(self): + # Check A = L L^H + shapes = [(1, 1), (2, 2), (3, 3), (50, 50), (3, 10, 10)] + dtypes = (np.float32, np.float64, np.complex64, np.complex128) + + for shape, dtype in itertools.product(shapes, dtypes): + np.random.seed(1) + a = np.random.randn(*shape) + if np.issubdtype(dtype, np.complexfloating): + a = a + 1j*np.random.randn(*shape) + + t = list(range(len(shape))) + t[-2:] = -1, -2 + + a = np.matmul(a.transpose(t).conj(), a) + a = np.asarray(a, dtype=dtype) + + c = np.linalg.cholesky(a) + + b = np.matmul(c, c.transpose(t).conj()) + assert_allclose(b, a, + err_msg="{} {}\n{}\n{}".format(shape, dtype, a, c), + atol=500 * a.shape[0] * np.finfo(dtype).eps) + + def test_0_size(self): + class ArraySubclass(np.ndarray): + pass + a = np.zeros((0, 1, 1), dtype=np.int_).view(ArraySubclass) + res = linalg.cholesky(a) + assert_equal(a.shape, res.shape) + assert_(res.dtype.type is np.float64) + # for documentation purpose: + assert_(isinstance(res, np.ndarray)) + + a = np.zeros((1, 0, 0), dtype=np.complex64).view(ArraySubclass) + res = linalg.cholesky(a) + assert_equal(a.shape, res.shape) + assert_(res.dtype.type is np.complex64) + assert_(isinstance(res, np.ndarray)) + + +def test_byteorder_check(): + # Byte order check should pass for native order + if sys.byteorder == 'little': + native = '<' + else: + native = '>' + + for dtt in (np.float32, np.float64): + arr = np.eye(4, dtype=dtt) + n_arr = arr.newbyteorder(native) + sw_arr = arr.newbyteorder('S').byteswap() + assert_equal(arr.dtype.byteorder, '=') + for routine in (linalg.inv, linalg.det, linalg.pinv): + # Normal call + res = routine(arr) + # Native but not '=' + assert_array_equal(res, routine(n_arr)) + # Swapped + assert_array_equal(res, routine(sw_arr)) + + +def test_generalized_raise_multiloop(): + # It should raise an error even if the error doesn't occur in the + # last iteration of the ufunc inner loop + + invertible = np.array([[1, 2], [3, 4]]) + non_invertible = np.array([[1, 1], [1, 1]]) + + x = np.zeros([4, 4, 2, 2])[1::2] + x[...] = invertible + x[0, 0] = non_invertible + + assert_raises(np.linalg.LinAlgError, np.linalg.inv, x) + + +def test_xerbla_override(): + # Check that our xerbla has been successfully linked in. If it is not, + # the default xerbla routine is called, which prints a message to stdout + # and may, or may not, abort the process depending on the LAPACK package. + + XERBLA_OK = 255 + + try: + pid = os.fork() + except (OSError, AttributeError): + # fork failed, or not running on POSIX + pytest.skip("Not POSIX or fork failed.") + + if pid == 0: + # child; close i/o file handles + os.close(1) + os.close(0) + # Avoid producing core files. + import resource + resource.setrlimit(resource.RLIMIT_CORE, (0, 0)) + # These calls may abort. + try: + np.linalg.lapack_lite.xerbla() + except ValueError: + pass + except Exception: + os._exit(os.EX_CONFIG) + + try: + a = np.array([[1.]]) + np.linalg.lapack_lite.dorgqr( + 1, 1, 1, a, + 0, # <- invalid value + a, a, 0, 0) + except ValueError as e: + if "DORGQR parameter number 5" in str(e): + # success, reuse error code to mark success as + # FORTRAN STOP returns as success. + os._exit(XERBLA_OK) + + # Did not abort, but our xerbla was not linked in. + os._exit(os.EX_CONFIG) + else: + # parent + pid, status = os.wait() + if os.WEXITSTATUS(status) != XERBLA_OK: + pytest.skip('Numpy xerbla not linked in.') + + +def test_sdot_bug_8577(): + # Regression test that loading certain other libraries does not + # result to wrong results in float32 linear algebra. + # + # There's a bug gh-8577 on OSX that can trigger this, and perhaps + # there are also other situations in which it occurs. + # + # Do the check in a separate process. + + bad_libs = ['PyQt5.QtWidgets', 'IPython'] + + template = textwrap.dedent(""" + import sys + {before} + try: + import {bad_lib} + except ImportError: + sys.exit(0) + {after} + x = np.ones(2, dtype=np.float32) + sys.exit(0 if np.allclose(x.dot(x), 2.0) else 1) + """) + + for bad_lib in bad_libs: + code = template.format(before="import numpy as np", after="", + bad_lib=bad_lib) + subprocess.check_call([sys.executable, "-c", code]) + + # Swapped import order + code = template.format(after="import numpy as np", before="", + bad_lib=bad_lib) + subprocess.check_call([sys.executable, "-c", code]) + + +class TestMultiDot(object): + + def test_basic_function_with_three_arguments(self): + # multi_dot with three arguments uses a fast hand coded algorithm to + # determine the optimal order. Therefore test it separately. + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + + assert_almost_equal(multi_dot([A, B, C]), A.dot(B).dot(C)) + assert_almost_equal(multi_dot([A, B, C]), np.dot(A, np.dot(B, C))) + + def test_basic_function_with_two_arguments(self): + # separate code path with two arguments + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + + assert_almost_equal(multi_dot([A, B]), A.dot(B)) + assert_almost_equal(multi_dot([A, B]), np.dot(A, B)) + + def test_basic_function_with_dynamic_programing_optimization(self): + # multi_dot with four or more arguments uses the dynamic programing + # optimization and therefore deserve a separate + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + D = np.random.random((2, 1)) + assert_almost_equal(multi_dot([A, B, C, D]), A.dot(B).dot(C).dot(D)) + + def test_vector_as_first_argument(self): + # The first argument can be 1-D + A1d = np.random.random(2) # 1-D + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + D = np.random.random((2, 2)) + + # the result should be 1-D + assert_equal(multi_dot([A1d, B, C, D]).shape, (2,)) + + def test_vector_as_last_argument(self): + # The last argument can be 1-D + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + D1d = np.random.random(2) # 1-D + + # the result should be 1-D + assert_equal(multi_dot([A, B, C, D1d]).shape, (6,)) + + def test_vector_as_first_and_last_argument(self): + # The first and last arguments can be 1-D + A1d = np.random.random(2) # 1-D + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + D1d = np.random.random(2) # 1-D + + # the result should be a scalar + assert_equal(multi_dot([A1d, B, C, D1d]).shape, ()) + + def test_dynamic_programming_logic(self): + # Test for the dynamic programming part + # This test is directly taken from Cormen page 376. + arrays = [np.random.random((30, 35)), + np.random.random((35, 15)), + np.random.random((15, 5)), + np.random.random((5, 10)), + np.random.random((10, 20)), + np.random.random((20, 25))] + m_expected = np.array([[0., 15750., 7875., 9375., 11875., 15125.], + [0., 0., 2625., 4375., 7125., 10500.], + [0., 0., 0., 750., 2500., 5375.], + [0., 0., 0., 0., 1000., 3500.], + [0., 0., 0., 0., 0., 5000.], + [0., 0., 0., 0., 0., 0.]]) + s_expected = np.array([[0, 1, 1, 3, 3, 3], + [0, 0, 2, 3, 3, 3], + [0, 0, 0, 3, 3, 3], + [0, 0, 0, 0, 4, 5], + [0, 0, 0, 0, 0, 5], + [0, 0, 0, 0, 0, 0]], dtype=int) + s_expected -= 1 # Cormen uses 1-based index, python does not. + + s, m = _multi_dot_matrix_chain_order(arrays, return_costs=True) + + # Only the upper triangular part (without the diagonal) is interesting. + assert_almost_equal(np.triu(s[:-1, 1:]), + np.triu(s_expected[:-1, 1:])) + assert_almost_equal(np.triu(m), np.triu(m_expected)) + + def test_too_few_input_arrays(self): + assert_raises(ValueError, multi_dot, []) + assert_raises(ValueError, multi_dot, [np.random.random((3, 3))]) + + +class TestTensorinv(object): + + @pytest.mark.parametrize("arr, ind", [ + (np.ones((4, 6, 8, 2)), 2), + (np.ones((3, 3, 2)), 1), + ]) + def test_non_square_handling(self, arr, ind): + with assert_raises(LinAlgError): + linalg.tensorinv(arr, ind=ind) + + @pytest.mark.parametrize("shape, ind", [ + # examples from docstring + ((4, 6, 8, 3), 2), + ((24, 8, 3), 1), + ]) + def test_tensorinv_shape(self, shape, ind): + a = np.eye(24) + a.shape = shape + ainv = linalg.tensorinv(a=a, ind=ind) + expected = a.shape[ind:] + a.shape[:ind] + actual = ainv.shape + assert_equal(actual, expected) + + @pytest.mark.parametrize("ind", [ + 0, -2, + ]) + def test_tensorinv_ind_limit(self, ind): + a = np.eye(24) + a.shape = (4, 6, 8, 3) + with assert_raises(ValueError): + linalg.tensorinv(a=a, ind=ind) + + def test_tensorinv_result(self): + # mimic a docstring example + a = np.eye(24) + a.shape = (24, 8, 3) + ainv = linalg.tensorinv(a, ind=1) + b = np.ones(24) + assert_allclose(np.tensordot(ainv, b, 1), np.linalg.tensorsolve(a, b)) diff --git a/contrib/python/numpy/py2/numpy/linalg/tests/test_regression.py b/contrib/python/numpy/py2/numpy/linalg/tests/test_regression.py new file mode 100644 index 0000000000..bd3a45872c --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/tests/test_regression.py @@ -0,0 +1,150 @@ +""" Test functions for linalg module +""" +from __future__ import division, absolute_import, print_function + +import warnings + +import numpy as np +from numpy import linalg, arange, float64, array, dot, transpose +from numpy.testing import ( + assert_, assert_raises, assert_equal, assert_array_equal, + assert_array_almost_equal, assert_array_less +) + + +class TestRegression(object): + + def test_eig_build(self): + # Ticket #652 + rva = array([1.03221168e+02 + 0.j, + -1.91843603e+01 + 0.j, + -6.04004526e-01 + 15.84422474j, + -6.04004526e-01 - 15.84422474j, + -1.13692929e+01 + 0.j, + -6.57612485e-01 + 10.41755503j, + -6.57612485e-01 - 10.41755503j, + 1.82126812e+01 + 0.j, + 1.06011014e+01 + 0.j, + 7.80732773e+00 + 0.j, + -7.65390898e-01 + 0.j, + 1.51971555e-15 + 0.j, + -1.51308713e-15 + 0.j]) + a = arange(13 * 13, dtype=float64) + a.shape = (13, 13) + a = a % 17 + va, ve = linalg.eig(a) + va.sort() + rva.sort() + assert_array_almost_equal(va, rva) + + def test_eigh_build(self): + # Ticket 662. + rvals = [68.60568999, 89.57756725, 106.67185574] + + cov = array([[77.70273908, 3.51489954, 15.64602427], + [3.51489954, 88.97013878, -1.07431931], + [15.64602427, -1.07431931, 98.18223512]]) + + vals, vecs = linalg.eigh(cov) + assert_array_almost_equal(vals, rvals) + + def test_svd_build(self): + # Ticket 627. + a = array([[0., 1.], [1., 1.], [2., 1.], [3., 1.]]) + m, n = a.shape + u, s, vh = linalg.svd(a) + + b = dot(transpose(u[:, n:]), a) + + assert_array_almost_equal(b, np.zeros((2, 2))) + + def test_norm_vector_badarg(self): + # Regression for #786: Froebenius norm for vectors raises + # TypeError. + assert_raises(ValueError, linalg.norm, array([1., 2., 3.]), 'fro') + + def test_lapack_endian(self): + # For bug #1482 + a = array([[5.7998084, -2.1825367], + [-2.1825367, 9.85910595]], dtype='>f8') + b = array(a, dtype='<f8') + + ap = linalg.cholesky(a) + bp = linalg.cholesky(b) + assert_array_equal(ap, bp) + + def test_large_svd_32bit(self): + # See gh-4442, 64bit would require very large/slow matrices. + x = np.eye(1000, 66) + np.linalg.svd(x) + + def test_svd_no_uv(self): + # gh-4733 + for shape in (3, 4), (4, 4), (4, 3): + for t in float, complex: + a = np.ones(shape, dtype=t) + w = linalg.svd(a, compute_uv=False) + c = np.count_nonzero(np.absolute(w) > 0.5) + assert_equal(c, 1) + assert_equal(np.linalg.matrix_rank(a), 1) + assert_array_less(1, np.linalg.norm(a, ord=2)) + + def test_norm_object_array(self): + # gh-7575 + testvector = np.array([np.array([0, 1]), 0, 0], dtype=object) + + norm = linalg.norm(testvector) + assert_array_equal(norm, [0, 1]) + assert_(norm.dtype == np.dtype('float64')) + + norm = linalg.norm(testvector, ord=1) + assert_array_equal(norm, [0, 1]) + assert_(norm.dtype != np.dtype('float64')) + + norm = linalg.norm(testvector, ord=2) + assert_array_equal(norm, [0, 1]) + assert_(norm.dtype == np.dtype('float64')) + + assert_raises(ValueError, linalg.norm, testvector, ord='fro') + assert_raises(ValueError, linalg.norm, testvector, ord='nuc') + assert_raises(ValueError, linalg.norm, testvector, ord=np.inf) + assert_raises(ValueError, linalg.norm, testvector, ord=-np.inf) + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + assert_raises((AttributeError, DeprecationWarning), + linalg.norm, testvector, ord=0) + assert_raises(ValueError, linalg.norm, testvector, ord=-1) + assert_raises(ValueError, linalg.norm, testvector, ord=-2) + + testmatrix = np.array([[np.array([0, 1]), 0, 0], + [0, 0, 0]], dtype=object) + + norm = linalg.norm(testmatrix) + assert_array_equal(norm, [0, 1]) + assert_(norm.dtype == np.dtype('float64')) + + norm = linalg.norm(testmatrix, ord='fro') + assert_array_equal(norm, [0, 1]) + assert_(norm.dtype == np.dtype('float64')) + + assert_raises(TypeError, linalg.norm, testmatrix, ord='nuc') + assert_raises(ValueError, linalg.norm, testmatrix, ord=np.inf) + assert_raises(ValueError, linalg.norm, testmatrix, ord=-np.inf) + assert_raises(ValueError, linalg.norm, testmatrix, ord=0) + assert_raises(ValueError, linalg.norm, testmatrix, ord=1) + assert_raises(ValueError, linalg.norm, testmatrix, ord=-1) + assert_raises(TypeError, linalg.norm, testmatrix, ord=2) + assert_raises(TypeError, linalg.norm, testmatrix, ord=-2) + assert_raises(ValueError, linalg.norm, testmatrix, ord=3) + + def test_lstsq_complex_larger_rhs(self): + # gh-9891 + size = 20 + n_rhs = 70 + G = np.random.randn(size, size) + 1j * np.random.randn(size, size) + u = np.random.randn(size, n_rhs) + 1j * np.random.randn(size, n_rhs) + b = G.dot(u) + # This should work without segmentation fault. + u_lstsq, res, rank, sv = linalg.lstsq(G, b, rcond=None) + # check results just in case + assert_array_almost_equal(u_lstsq, u) diff --git a/contrib/python/numpy/py2/numpy/linalg/umath_linalg.c.src b/contrib/python/numpy/py2/numpy/linalg/umath_linalg.c.src new file mode 100644 index 0000000000..9fc68a7aa9 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/linalg/umath_linalg.c.src @@ -0,0 +1,3688 @@ +/* -*- c -*- */ + +/* + ***************************************************************************** + ** INCLUDES ** + ***************************************************************************** + */ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "Python.h" +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" + +#include "npy_pycompat.h" + +#include "npy_config.h" + +#include <stddef.h> +#include <stdio.h> +#include <assert.h> +#include <math.h> + + +static const char* umath_linalg_version_string = "0.1.5"; + +/* + **************************************************************************** + * Debugging support * + **************************************************************************** + */ +#define TRACE_TXT(...) do { fprintf (stderr, __VA_ARGS__); } while (0) +#define STACK_TRACE do {} while (0) +#define TRACE\ + do { \ + fprintf (stderr, \ + "%s:%d:%s\n", \ + __FILE__, \ + __LINE__, \ + __FUNCTION__); \ + STACK_TRACE; \ + } while (0) + +#if 0 +#include <execinfo.h> +void +dbg_stack_trace() +{ + void *trace[32]; + size_t size; + + size = backtrace(trace, sizeof(trace)/sizeof(trace[0])); + backtrace_symbols_fd(trace, size, 1); +} + +#undef STACK_TRACE +#define STACK_TRACE do { dbg_stack_trace(); } while (0) +#endif + +/* + ***************************************************************************** + * BLAS/LAPACK calling macros * + ***************************************************************************** + */ + +#ifdef NO_APPEND_FORTRAN +# define FNAME(x) x +#else +# define FNAME(x) x##_ +#endif + +typedef struct { float r, i; } f2c_complex; +typedef struct { double r, i; } f2c_doublecomplex; +/* typedef long int (*L_fp)(); */ + +extern int +FNAME(sgeev)(char *jobvl, char *jobvr, int *n, + float a[], int *lda, float wr[], float wi[], + float vl[], int *ldvl, float vr[], int *ldvr, + float work[], int lwork[], + int *info); +extern int +FNAME(dgeev)(char *jobvl, char *jobvr, int *n, + double a[], int *lda, double wr[], double wi[], + double vl[], int *ldvl, double vr[], int *ldvr, + double work[], int lwork[], + int *info); +extern int +FNAME(cgeev)(char *jobvl, char *jobvr, int *n, + f2c_doublecomplex a[], int *lda, + f2c_doublecomplex w[], + f2c_doublecomplex vl[], int *ldvl, + f2c_doublecomplex vr[], int *ldvr, + f2c_doublecomplex work[], int *lwork, + double rwork[], + int *info); +extern int +FNAME(zgeev)(char *jobvl, char *jobvr, int *n, + f2c_doublecomplex a[], int *lda, + f2c_doublecomplex w[], + f2c_doublecomplex vl[], int *ldvl, + f2c_doublecomplex vr[], int *ldvr, + f2c_doublecomplex work[], int *lwork, + double rwork[], + int *info); + +extern int +FNAME(ssyevd)(char *jobz, char *uplo, int *n, + float a[], int *lda, float w[], float work[], + int *lwork, int iwork[], int *liwork, + int *info); +extern int +FNAME(dsyevd)(char *jobz, char *uplo, int *n, + double a[], int *lda, double w[], double work[], + int *lwork, int iwork[], int *liwork, + int *info); +extern int +FNAME(cheevd)(char *jobz, char *uplo, int *n, + f2c_complex a[], int *lda, + float w[], f2c_complex work[], + int *lwork, float rwork[], int *lrwork, int iwork[], + int *liwork, + int *info); +extern int +FNAME(zheevd)(char *jobz, char *uplo, int *n, + f2c_doublecomplex a[], int *lda, + double w[], f2c_doublecomplex work[], + int *lwork, double rwork[], int *lrwork, int iwork[], + int *liwork, + int *info); + +extern int +FNAME(sgelsd)(int *m, int *n, int *nrhs, + float a[], int *lda, float b[], int *ldb, + float s[], float *rcond, int *rank, + float work[], int *lwork, int iwork[], + int *info); +extern int +FNAME(dgelsd)(int *m, int *n, int *nrhs, + double a[], int *lda, double b[], int *ldb, + double s[], double *rcond, int *rank, + double work[], int *lwork, int iwork[], + int *info); +extern int +FNAME(cgelsd)(int *m, int *n, int *nrhs, + f2c_complex a[], int *lda, + f2c_complex b[], int *ldb, + float s[], float *rcond, int *rank, + f2c_complex work[], int *lwork, + float rwork[], int iwork[], + int *info); +extern int +FNAME(zgelsd)(int *m, int *n, int *nrhs, + f2c_doublecomplex a[], int *lda, + f2c_doublecomplex b[], int *ldb, + double s[], double *rcond, int *rank, + f2c_doublecomplex work[], int *lwork, + double rwork[], int iwork[], + int *info); + +extern int +FNAME(sgesv)(int *n, int *nrhs, + float a[], int *lda, + int ipiv[], + float b[], int *ldb, + int *info); +extern int +FNAME(dgesv)(int *n, int *nrhs, + double a[], int *lda, + int ipiv[], + double b[], int *ldb, + int *info); +extern int +FNAME(cgesv)(int *n, int *nrhs, + f2c_complex a[], int *lda, + int ipiv[], + f2c_complex b[], int *ldb, + int *info); +extern int +FNAME(zgesv)(int *n, int *nrhs, + f2c_doublecomplex a[], int *lda, + int ipiv[], + f2c_doublecomplex b[], int *ldb, + int *info); + +extern int +FNAME(sgetrf)(int *m, int *n, + float a[], int *lda, + int ipiv[], + int *info); +extern int +FNAME(dgetrf)(int *m, int *n, + double a[], int *lda, + int ipiv[], + int *info); +extern int +FNAME(cgetrf)(int *m, int *n, + f2c_complex a[], int *lda, + int ipiv[], + int *info); +extern int +FNAME(zgetrf)(int *m, int *n, + f2c_doublecomplex a[], int *lda, + int ipiv[], + int *info); + +extern int +FNAME(spotrf)(char *uplo, int *n, + float a[], int *lda, + int *info); +extern int +FNAME(dpotrf)(char *uplo, int *n, + double a[], int *lda, + int *info); +extern int +FNAME(cpotrf)(char *uplo, int *n, + f2c_complex a[], int *lda, + int *info); +extern int +FNAME(zpotrf)(char *uplo, int *n, + f2c_doublecomplex a[], int *lda, + int *info); + +extern int +FNAME(sgesdd)(char *jobz, int *m, int *n, + float a[], int *lda, float s[], float u[], + int *ldu, float vt[], int *ldvt, float work[], + int *lwork, int iwork[], int *info); +extern int +FNAME(dgesdd)(char *jobz, int *m, int *n, + double a[], int *lda, double s[], double u[], + int *ldu, double vt[], int *ldvt, double work[], + int *lwork, int iwork[], int *info); +extern int +FNAME(cgesdd)(char *jobz, int *m, int *n, + f2c_complex a[], int *lda, + float s[], f2c_complex u[], int *ldu, + f2c_complex vt[], int *ldvt, + f2c_complex work[], int *lwork, + float rwork[], int iwork[], int *info); +extern int +FNAME(zgesdd)(char *jobz, int *m, int *n, + f2c_doublecomplex a[], int *lda, + double s[], f2c_doublecomplex u[], int *ldu, + f2c_doublecomplex vt[], int *ldvt, + f2c_doublecomplex work[], int *lwork, + double rwork[], int iwork[], int *info); + +extern int +FNAME(spotrs)(char *uplo, int *n, int *nrhs, + float a[], int *lda, + float b[], int *ldb, + int *info); +extern int +FNAME(dpotrs)(char *uplo, int *n, int *nrhs, + double a[], int *lda, + double b[], int *ldb, + int *info); +extern int +FNAME(cpotrs)(char *uplo, int *n, int *nrhs, + f2c_complex a[], int *lda, + f2c_complex b[], int *ldb, + int *info); +extern int +FNAME(zpotrs)(char *uplo, int *n, int *nrhs, + f2c_doublecomplex a[], int *lda, + f2c_doublecomplex b[], int *ldb, + int *info); + +extern int +FNAME(spotri)(char *uplo, int *n, + float a[], int *lda, + int *info); +extern int +FNAME(dpotri)(char *uplo, int *n, + double a[], int *lda, + int *info); +extern int +FNAME(cpotri)(char *uplo, int *n, + f2c_complex a[], int *lda, + int *info); +extern int +FNAME(zpotri)(char *uplo, int *n, + f2c_doublecomplex a[], int *lda, + int *info); + +extern int +FNAME(scopy)(int *n, + float *sx, int *incx, + float *sy, int *incy); +extern int +FNAME(dcopy)(int *n, + double *sx, int *incx, + double *sy, int *incy); +extern int +FNAME(ccopy)(int *n, + f2c_complex *sx, int *incx, + f2c_complex *sy, int *incy); +extern int +FNAME(zcopy)(int *n, + f2c_doublecomplex *sx, int *incx, + f2c_doublecomplex *sy, int *incy); + +extern float +FNAME(sdot)(int *n, + float *sx, int *incx, + float *sy, int *incy); +extern double +FNAME(ddot)(int *n, + double *sx, int *incx, + double *sy, int *incy); +extern void +FNAME(cdotu)(f2c_complex *ret, int *n, + f2c_complex *sx, int *incx, + f2c_complex *sy, int *incy); +extern void +FNAME(zdotu)(f2c_doublecomplex *ret, int *n, + f2c_doublecomplex *sx, int *incx, + f2c_doublecomplex *sy, int *incy); +extern void +FNAME(cdotc)(f2c_complex *ret, int *n, + f2c_complex *sx, int *incx, + f2c_complex *sy, int *incy); +extern void +FNAME(zdotc)(f2c_doublecomplex *ret, int *n, + f2c_doublecomplex *sx, int *incx, + f2c_doublecomplex *sy, int *incy); + +extern int +FNAME(sgemm)(char *transa, char *transb, + int *m, int *n, int *k, + float *alpha, + float *a, int *lda, + float *b, int *ldb, + float *beta, + float *c, int *ldc); +extern int +FNAME(dgemm)(char *transa, char *transb, + int *m, int *n, int *k, + double *alpha, + double *a, int *lda, + double *b, int *ldb, + double *beta, + double *c, int *ldc); +extern int +FNAME(cgemm)(char *transa, char *transb, + int *m, int *n, int *k, + f2c_complex *alpha, + f2c_complex *a, int *lda, + f2c_complex *b, int *ldb, + f2c_complex *beta, + f2c_complex *c, int *ldc); +extern int +FNAME(zgemm)(char *transa, char *transb, + int *m, int *n, int *k, + f2c_doublecomplex *alpha, + f2c_doublecomplex *a, int *lda, + f2c_doublecomplex *b, int *ldb, + f2c_doublecomplex *beta, + f2c_doublecomplex *c, int *ldc); + + +#define LAPACK_T(FUNC) \ + TRACE_TXT("Calling LAPACK ( " # FUNC " )\n"); \ + FNAME(FUNC) + +#define BLAS(FUNC) \ + FNAME(FUNC) + +#define LAPACK(FUNC) \ + FNAME(FUNC) + +typedef int fortran_int; +typedef float fortran_real; +typedef double fortran_doublereal; +typedef f2c_complex fortran_complex; +typedef f2c_doublecomplex fortran_doublecomplex; + + +/* + ***************************************************************************** + ** Some handy functions ** + ***************************************************************************** + */ + +static NPY_INLINE int +get_fp_invalid_and_clear(void) +{ + int status; + status = npy_clear_floatstatus_barrier((char*)&status); + return !!(status & NPY_FPE_INVALID); +} + +static NPY_INLINE void +set_fp_invalid_or_clear(int error_occurred) +{ + if (error_occurred) { + npy_set_floatstatus_invalid(); + } + else { + npy_clear_floatstatus_barrier((char*)&error_occurred); + } +} + +/* + ***************************************************************************** + ** Some handy constants ** + ***************************************************************************** + */ + +#define UMATH_LINALG_MODULE_NAME "_umath_linalg" + +typedef union { + fortran_complex f; + npy_cfloat npy; + float array[2]; +} COMPLEX_t; + +typedef union { + fortran_doublecomplex f; + npy_cdouble npy; + double array[2]; +} DOUBLECOMPLEX_t; + +static float s_one; +static float s_zero; +static float s_minus_one; +static float s_ninf; +static float s_nan; +static double d_one; +static double d_zero; +static double d_minus_one; +static double d_ninf; +static double d_nan; +static COMPLEX_t c_one; +static COMPLEX_t c_zero; +static COMPLEX_t c_minus_one; +static COMPLEX_t c_ninf; +static COMPLEX_t c_nan; +static DOUBLECOMPLEX_t z_one; +static DOUBLECOMPLEX_t z_zero; +static DOUBLECOMPLEX_t z_minus_one; +static DOUBLECOMPLEX_t z_ninf; +static DOUBLECOMPLEX_t z_nan; + +static void init_constants(void) +{ + /* + this is needed as NPY_INFINITY and NPY_NAN macros + can't be used as initializers. I prefer to just set + all the constants the same way. + */ + s_one = 1.0f; + s_zero = 0.0f; + s_minus_one = -1.0f; + s_ninf = -NPY_INFINITYF; + s_nan = NPY_NANF; + + d_one = 1.0; + d_zero = 0.0; + d_minus_one = -1.0; + d_ninf = -NPY_INFINITY; + d_nan = NPY_NAN; + + c_one.array[0] = 1.0f; + c_one.array[1] = 0.0f; + c_zero.array[0] = 0.0f; + c_zero.array[1] = 0.0f; + c_minus_one.array[0] = -1.0f; + c_minus_one.array[1] = 0.0f; + c_ninf.array[0] = -NPY_INFINITYF; + c_ninf.array[1] = 0.0f; + c_nan.array[0] = NPY_NANF; + c_nan.array[1] = NPY_NANF; + + z_one.array[0] = 1.0; + z_one.array[1] = 0.0; + z_zero.array[0] = 0.0; + z_zero.array[1] = 0.0; + z_minus_one.array[0] = -1.0; + z_minus_one.array[1] = 0.0; + z_ninf.array[0] = -NPY_INFINITY; + z_ninf.array[1] = 0.0; + z_nan.array[0] = NPY_NAN; + z_nan.array[1] = NPY_NAN; +} + +/* + ***************************************************************************** + ** Structs used for data rearrangement ** + ***************************************************************************** + */ + + +/* + * this struct contains information about how to linearize a matrix in a local + * buffer so that it can be used by blas functions. All strides are specified + * in bytes and are converted to elements later in type specific functions. + * + * rows: number of rows in the matrix + * columns: number of columns in the matrix + * row_strides: the number bytes between consecutive rows. + * column_strides: the number of bytes between consecutive columns. + * output_lead_dim: BLAS/LAPACK-side leading dimension, in elements + */ +typedef struct linearize_data_struct +{ + npy_intp rows; + npy_intp columns; + npy_intp row_strides; + npy_intp column_strides; + npy_intp output_lead_dim; +} LINEARIZE_DATA_t; + +static NPY_INLINE void +init_linearize_data_ex(LINEARIZE_DATA_t *lin_data, + npy_intp rows, + npy_intp columns, + npy_intp row_strides, + npy_intp column_strides, + npy_intp output_lead_dim) +{ + lin_data->rows = rows; + lin_data->columns = columns; + lin_data->row_strides = row_strides; + lin_data->column_strides = column_strides; + lin_data->output_lead_dim = output_lead_dim; +} + +static NPY_INLINE void +init_linearize_data(LINEARIZE_DATA_t *lin_data, + npy_intp rows, + npy_intp columns, + npy_intp row_strides, + npy_intp column_strides) +{ + init_linearize_data_ex( + lin_data, rows, columns, row_strides, column_strides, columns); +} + +static NPY_INLINE void +dump_ufunc_object(PyUFuncObject* ufunc) +{ + TRACE_TXT("\n\n%s '%s' (%d input(s), %d output(s), %d specialization(s).\n", + ufunc->core_enabled? "generalized ufunc" : "scalar ufunc", + ufunc->name, ufunc->nin, ufunc->nout, ufunc->ntypes); + if (ufunc->core_enabled) { + int arg; + int dim; + TRACE_TXT("\t%s (%d dimension(s) detected).\n", + ufunc->core_signature, ufunc->core_num_dim_ix); + + for (arg = 0; arg < ufunc->nargs; arg++){ + int * arg_dim_ix = ufunc->core_dim_ixs + ufunc->core_offsets[arg]; + TRACE_TXT("\t\targ %d (%s) has %d dimension(s): (", + arg, arg < ufunc->nin? "INPUT" : "OUTPUT", + ufunc->core_num_dims[arg]); + for (dim = 0; dim < ufunc->core_num_dims[arg]; dim ++) { + TRACE_TXT(" %d", arg_dim_ix[dim]); + } + TRACE_TXT(" )\n"); + } + } +} + +static NPY_INLINE void +dump_linearize_data(const char* name, const LINEARIZE_DATA_t* params) +{ + TRACE_TXT("\n\t%s rows: %zd columns: %zd"\ + "\n\t\trow_strides: %td column_strides: %td"\ + "\n", name, params->rows, params->columns, + params->row_strides, params->column_strides); +} + +static NPY_INLINE void +print_FLOAT(npy_float s) +{ + TRACE_TXT(" %8.4f", s); +} +static NPY_INLINE void +print_DOUBLE(npy_double d) +{ + TRACE_TXT(" %10.6f", d); +} +static NPY_INLINE void +print_CFLOAT(npy_cfloat c) +{ + float* c_parts = (float*)&c; + TRACE_TXT("(%8.4f, %8.4fj)", c_parts[0], c_parts[1]); +} +static NPY_INLINE void +print_CDOUBLE(npy_cdouble z) +{ + double* z_parts = (double*)&z; + TRACE_TXT("(%8.4f, %8.4fj)", z_parts[0], z_parts[1]); +} + +/**begin repeat + #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# + */ +static NPY_INLINE void +dump_@TYPE@_matrix(const char* name, + size_t rows, size_t columns, + const @typ@* ptr) +{ + size_t i, j; + + TRACE_TXT("\n%s %p (%zd, %zd)\n", name, ptr, rows, columns); + for (i = 0; i < rows; i++) + { + TRACE_TXT("| "); + for (j = 0; j < columns; j++) + { + print_@TYPE@(ptr[j*rows + i]); + TRACE_TXT(", "); + } + TRACE_TXT(" |\n"); + } +} +/**end repeat**/ + + +/* + ***************************************************************************** + ** Basics ** + ***************************************************************************** + */ + +static NPY_INLINE fortran_int +fortran_int_min(fortran_int x, fortran_int y) { + return x < y ? x : y; +} + +static NPY_INLINE fortran_int +fortran_int_max(fortran_int x, fortran_int y) { + return x > y ? x : y; +} + +#define INIT_OUTER_LOOP_1 \ + npy_intp dN = *dimensions++;\ + npy_intp N_;\ + npy_intp s0 = *steps++; + +#define INIT_OUTER_LOOP_2 \ + INIT_OUTER_LOOP_1\ + npy_intp s1 = *steps++; + +#define INIT_OUTER_LOOP_3 \ + INIT_OUTER_LOOP_2\ + npy_intp s2 = *steps++; + +#define INIT_OUTER_LOOP_4 \ + INIT_OUTER_LOOP_3\ + npy_intp s3 = *steps++; + +#define INIT_OUTER_LOOP_5 \ + INIT_OUTER_LOOP_4\ + npy_intp s4 = *steps++; + +#define INIT_OUTER_LOOP_6 \ + INIT_OUTER_LOOP_5\ + npy_intp s5 = *steps++; + +#define INIT_OUTER_LOOP_7 \ + INIT_OUTER_LOOP_6\ + npy_intp s6 = *steps++; + +#define BEGIN_OUTER_LOOP_2 \ + for (N_ = 0;\ + N_ < dN;\ + N_++, args[0] += s0,\ + args[1] += s1) { + +#define BEGIN_OUTER_LOOP_3 \ + for (N_ = 0;\ + N_ < dN;\ + N_++, args[0] += s0,\ + args[1] += s1,\ + args[2] += s2) { + +#define BEGIN_OUTER_LOOP_4 \ + for (N_ = 0;\ + N_ < dN;\ + N_++, args[0] += s0,\ + args[1] += s1,\ + args[2] += s2,\ + args[3] += s3) { + +#define BEGIN_OUTER_LOOP_5 \ + for (N_ = 0;\ + N_ < dN;\ + N_++, args[0] += s0,\ + args[1] += s1,\ + args[2] += s2,\ + args[3] += s3,\ + args[4] += s4) { + +#define BEGIN_OUTER_LOOP_6 \ + for (N_ = 0;\ + N_ < dN;\ + N_++, args[0] += s0,\ + args[1] += s1,\ + args[2] += s2,\ + args[3] += s3,\ + args[4] += s4,\ + args[5] += s5) { + +#define BEGIN_OUTER_LOOP_7 \ + for (N_ = 0;\ + N_ < dN;\ + N_++, args[0] += s0,\ + args[1] += s1,\ + args[2] += s2,\ + args[3] += s3,\ + args[4] += s4,\ + args[5] += s5,\ + args[6] += s6) { + +#define END_OUTER_LOOP } + +static NPY_INLINE void +update_pointers(npy_uint8** bases, ptrdiff_t* offsets, size_t count) +{ + size_t i; + for (i = 0; i < count; ++i) { + bases[i] += offsets[i]; + } +} + + +/* disable -Wmaybe-uninitialized as there is some code that generate false + positives with this warning +*/ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" + +/* + ***************************************************************************** + ** HELPER FUNCS ** + ***************************************************************************** + */ + + /* rearranging of 2D matrices using blas */ + +/**begin repeat + #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + #typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t# + #copy = scopy, dcopy, ccopy, zcopy# + #nan = s_nan, d_nan, c_nan, z_nan# + #zero = s_zero, d_zero, c_zero, z_zero# + */ +static NPY_INLINE void * +linearize_@TYPE@_matrix(void *dst_in, + void *src_in, + const LINEARIZE_DATA_t* data) +{ + @typ@ *src = (@typ@ *) src_in; + @typ@ *dst = (@typ@ *) dst_in; + + if (dst) { + int i, j; + @typ@* rv = dst; + fortran_int columns = (fortran_int)data->columns; + fortran_int column_strides = + (fortran_int)(data->column_strides/sizeof(@typ@)); + fortran_int one = 1; + for (i = 0; i < data->rows; i++) { + if (column_strides > 0) { + FNAME(@copy@)(&columns, + (void*)src, &column_strides, + (void*)dst, &one); + } + else if (column_strides < 0) { + FNAME(@copy@)(&columns, + (void*)((@typ@*)src + (columns-1)*column_strides), + &column_strides, + (void*)dst, &one); + } + else { + /* + * Zero stride has undefined behavior in some BLAS + * implementations (e.g. OSX Accelerate), so do it + * manually + */ + for (j = 0; j < columns; ++j) { + memcpy((@typ@*)dst + j, (@typ@*)src, sizeof(@typ@)); + } + } + src += data->row_strides/sizeof(@typ@); + dst += data->output_lead_dim; + } + return rv; + } else { + return src; + } +} + +static NPY_INLINE void * +delinearize_@TYPE@_matrix(void *dst_in, + void *src_in, + const LINEARIZE_DATA_t* data) +{ + @typ@ *src = (@typ@ *) src_in; + @typ@ *dst = (@typ@ *) dst_in; + + if (src) { + int i; + @typ@ *rv = src; + fortran_int columns = (fortran_int)data->columns; + fortran_int column_strides = + (fortran_int)(data->column_strides/sizeof(@typ@)); + fortran_int one = 1; + for (i = 0; i < data->rows; i++) { + if (column_strides > 0) { + FNAME(@copy@)(&columns, + (void*)src, &one, + (void*)dst, &column_strides); + } + else if (column_strides < 0) { + FNAME(@copy@)(&columns, + (void*)src, &one, + (void*)((@typ@*)dst + (columns-1)*column_strides), + &column_strides); + } + else { + /* + * Zero stride has undefined behavior in some BLAS + * implementations (e.g. OSX Accelerate), so do it + * manually + */ + if (columns > 0) { + memcpy((@typ@*)dst, + (@typ@*)src + (columns-1), + sizeof(@typ@)); + } + } + src += data->output_lead_dim; + dst += data->row_strides/sizeof(@typ@); + } + + return rv; + } else { + return src; + } +} + +static NPY_INLINE void +nan_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data) +{ + @typ@ *dst = (@typ@ *) dst_in; + + int i, j; + for (i = 0; i < data->rows; i++) { + @typ@ *cp = dst; + ptrdiff_t cs = data->column_strides/sizeof(@typ@); + for (j = 0; j < data->columns; ++j) { + *cp = @nan@; + cp += cs; + } + dst += data->row_strides/sizeof(@typ@); + } +} + +static NPY_INLINE void +zero_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data) +{ + @typ@ *dst = (@typ@ *) dst_in; + + int i, j; + for (i = 0; i < data->rows; i++) { + @typ@ *cp = dst; + ptrdiff_t cs = data->column_strides/sizeof(@typ@); + for (j = 0; j < data->columns; ++j) { + *cp = @zero@; + cp += cs; + } + dst += data->row_strides/sizeof(@typ@); + } +} + +/**end repeat**/ + + /* identity square matrix generation */ +/**begin repeat + #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + #typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t# + #cblas_type = s, d, c, z# + */ +static NPY_INLINE void +identity_@TYPE@_matrix(void *ptr, size_t n) +{ + size_t i; + @typ@ *matrix = (@typ@*) ptr; + /* in IEEE floating point, zeroes are represented as bitwise 0 */ + memset(matrix, 0, n*n*sizeof(@typ@)); + + for (i = 0; i < n; ++i) + { + *matrix = @cblas_type@_one; + matrix += n+1; + } +} +/**end repeat**/ + + /* lower/upper triangular matrix using blas (in place) */ +/**begin repeat + + #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + #typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t# + #cblas_type = s, d, c, z# + */ + +static NPY_INLINE void +triu_@TYPE@_matrix(void *ptr, size_t n) +{ + size_t i, j; + @typ@ *matrix = (@typ@*)ptr; + matrix += n; + for (i = 1; i < n; ++i) { + for (j = 0; j < i; ++j) { + matrix[j] = @cblas_type@_zero; + } + matrix += n; + } +} +/**end repeat**/ + + +/* -------------------------------------------------------------------------- */ + /* Determinants */ + +/**begin repeat + #TYPE = FLOAT, DOUBLE# + #typ = npy_float, npy_double# + #log_func = npy_logf, npy_log# + #exp_func = npy_expf, npy_exp# + #zero = 0.0f, 0.0# +*/ + +static NPY_INLINE void +@TYPE@_slogdet_from_factored_diagonal(@typ@* src, + fortran_int m, + @typ@ *sign, + @typ@ *logdet) +{ + @typ@ acc_sign = *sign; + @typ@ acc_logdet = @zero@; + int i; + for (i = 0; i < m; i++) { + @typ@ abs_element = *src; + if (abs_element < @zero@) { + acc_sign = -acc_sign; + abs_element = -abs_element; + } + + acc_logdet += @log_func@(abs_element); + src += m+1; + } + + *sign = acc_sign; + *logdet = acc_logdet; +} + +static NPY_INLINE @typ@ +@TYPE@_det_from_slogdet(@typ@ sign, @typ@ logdet) +{ + @typ@ result = sign * @exp_func@(logdet); + return result; +} + +/**end repeat**/ + + +/**begin repeat + #TYPE = CFLOAT, CDOUBLE# + #typ = npy_cfloat, npy_cdouble# + #basetyp = npy_float, npy_double# + #abs_func = npy_cabsf, npy_cabs# + #log_func = npy_logf, npy_log# + #exp_func = npy_expf, npy_exp# + #zero = 0.0f, 0.0# +*/ +#define RE(COMPLEX) (((@basetyp@*)(&COMPLEX))[0]) +#define IM(COMPLEX) (((@basetyp@*)(&COMPLEX))[1]) + +static NPY_INLINE @typ@ +@TYPE@_mult(@typ@ op1, @typ@ op2) +{ + @typ@ rv; + + RE(rv) = RE(op1)*RE(op2) - IM(op1)*IM(op2); + IM(rv) = RE(op1)*IM(op2) + IM(op1)*RE(op2); + + return rv; +} + + +static NPY_INLINE void +@TYPE@_slogdet_from_factored_diagonal(@typ@* src, + fortran_int m, + @typ@ *sign, + @basetyp@ *logdet) +{ + int i; + @typ@ sign_acc = *sign; + @basetyp@ logdet_acc = @zero@; + + for (i = 0; i < m; i++) + { + @basetyp@ abs_element = @abs_func@(*src); + @typ@ sign_element; + RE(sign_element) = RE(*src) / abs_element; + IM(sign_element) = IM(*src) / abs_element; + + sign_acc = @TYPE@_mult(sign_acc, sign_element); + logdet_acc += @log_func@(abs_element); + src += m + 1; + } + + *sign = sign_acc; + *logdet = logdet_acc; +} + +static NPY_INLINE @typ@ +@TYPE@_det_from_slogdet(@typ@ sign, @basetyp@ logdet) +{ + @typ@ tmp; + RE(tmp) = @exp_func@(logdet); + IM(tmp) = @zero@; + return @TYPE@_mult(sign, tmp); +} +#undef RE +#undef IM +/**end repeat**/ + + +/* As in the linalg package, the determinant is computed via LU factorization + * using LAPACK. + * slogdet computes sign + log(determinant). + * det computes sign * exp(slogdet). + */ +/**begin repeat + + #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# + #basetyp = npy_float, npy_double, npy_float, npy_double# + #cblas_type = s, d, c, z# +*/ + +static NPY_INLINE void +@TYPE@_slogdet_single_element(fortran_int m, + void* src, + fortran_int* pivots, + @typ@ *sign, + @basetyp@ *logdet) +{ + fortran_int info = 0; + fortran_int lda = fortran_int_max(m, 1); + int i; + /* note: done in place */ + LAPACK(@cblas_type@getrf)(&m, &m, (void *)src, &lda, pivots, &info); + + if (info == 0) { + int change_sign = 0; + /* note: fortran uses 1 based indexing */ + for (i = 0; i < m; i++) + { + change_sign += (pivots[i] != (i+1)); + } + + memcpy(sign, + (change_sign % 2)? + &@cblas_type@_minus_one : + &@cblas_type@_one + , sizeof(*sign)); + @TYPE@_slogdet_from_factored_diagonal(src, m, sign, logdet); + } else { + /* + if getrf fails, use 0 as sign and -inf as logdet + */ + memcpy(sign, &@cblas_type@_zero, sizeof(*sign)); + memcpy(logdet, &@cblas_type@_ninf, sizeof(*logdet)); + } +} + +static void +@TYPE@_slogdet(char **args, + npy_intp *dimensions, + npy_intp *steps, + void *NPY_UNUSED(func)) +{ + fortran_int m; + npy_uint8 *tmp_buff = NULL; + size_t matrix_size; + size_t pivot_size; + size_t safe_m; + /* notes: + * matrix will need to be copied always, as factorization in lapack is + * made inplace + * matrix will need to be in column-major order, as expected by lapack + * code (fortran) + * always a square matrix + * need to allocate memory for both, matrix_buffer and pivot buffer + */ + INIT_OUTER_LOOP_3 + m = (fortran_int) dimensions[0]; + safe_m = m; + matrix_size = safe_m * safe_m * sizeof(@typ@); + pivot_size = safe_m * sizeof(fortran_int); + tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); + + if (tmp_buff) { + LINEARIZE_DATA_t lin_data; + /* swapped steps to get matrix in FORTRAN order */ + init_linearize_data(&lin_data, m, m, steps[1], steps[0]); + BEGIN_OUTER_LOOP_3 + linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data); + @TYPE@_slogdet_single_element(m, + (void*)tmp_buff, + (fortran_int*)(tmp_buff+matrix_size), + (@typ@*)args[1], + (@basetyp@*)args[2]); + END_OUTER_LOOP + + free(tmp_buff); + } +} + +static void +@TYPE@_det(char **args, + npy_intp *dimensions, + npy_intp *steps, + void *NPY_UNUSED(func)) +{ + fortran_int m; + npy_uint8 *tmp_buff; + size_t matrix_size; + size_t pivot_size; + size_t safe_m; + /* notes: + * matrix will need to be copied always, as factorization in lapack is + * made inplace + * matrix will need to be in column-major order, as expected by lapack + * code (fortran) + * always a square matrix + * need to allocate memory for both, matrix_buffer and pivot buffer + */ + INIT_OUTER_LOOP_2 + m = (fortran_int) dimensions[0]; + safe_m = m; + matrix_size = safe_m * safe_m * sizeof(@typ@); + pivot_size = safe_m * sizeof(fortran_int); + tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); + + if (tmp_buff) { + LINEARIZE_DATA_t lin_data; + @typ@ sign; + @basetyp@ logdet; + /* swapped steps to get matrix in FORTRAN order */ + init_linearize_data(&lin_data, m, m, steps[1], steps[0]); + + BEGIN_OUTER_LOOP_2 + linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data); + @TYPE@_slogdet_single_element(m, + (void*)tmp_buff, + (fortran_int*)(tmp_buff + matrix_size), + &sign, + &logdet); + *(@typ@ *)args[1] = @TYPE@_det_from_slogdet(sign, logdet); + END_OUTER_LOOP + + free(tmp_buff); + } +} +/**end repeat**/ + + +/* -------------------------------------------------------------------------- */ + /* Eigh family */ + +typedef struct eigh_params_struct { + void *A; /* matrix */ + void *W; /* eigenvalue vector */ + void *WORK; /* main work buffer */ + void *RWORK; /* secondary work buffer (for complex versions) */ + void *IWORK; + fortran_int N; + fortran_int LWORK; + fortran_int LRWORK; + fortran_int LIWORK; + char JOBZ; + char UPLO; + fortran_int LDA; +} EIGH_PARAMS_t; + +/**begin repeat + #TYPE = FLOAT, DOUBLE# + #typ = npy_float, npy_double# + #ftyp = fortran_real, fortran_doublereal# + #lapack_func = ssyevd, dsyevd# +*/ + +static NPY_INLINE fortran_int +call_@lapack_func@(EIGH_PARAMS_t *params) +{ + fortran_int rv; + LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, + params->A, ¶ms->LDA, params->W, + params->WORK, ¶ms->LWORK, + params->IWORK, ¶ms->LIWORK, + &rv); + return rv; +} + +/* + * Initialize the parameters to use in for the lapack function _syevd + * Handles buffer allocation + */ +static NPY_INLINE int +init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO, + fortran_int N) +{ + npy_uint8 *mem_buff = NULL; + npy_uint8 *mem_buff2 = NULL; + fortran_int lwork; + fortran_int liwork; + npy_uint8 *a, *w, *work, *iwork; + size_t safe_N = N; + size_t alloc_size = safe_N * (safe_N + 1) * sizeof(@typ@); + fortran_int lda = fortran_int_max(N, 1); + + mem_buff = malloc(alloc_size); + + if (!mem_buff) { + goto error; + } + a = mem_buff; + w = mem_buff + safe_N * safe_N * sizeof(@typ@); + + params->A = a; + params->W = w; + params->RWORK = NULL; /* unused */ + params->N = N; + params->LRWORK = 0; /* unused */ + params->JOBZ = JOBZ; + params->UPLO = UPLO; + params->LDA = lda; + + /* Work size query */ + { + @typ@ query_work_size; + fortran_int query_iwork_size; + + params->LWORK = -1; + params->LIWORK = -1; + params->WORK = &query_work_size; + params->IWORK = &query_iwork_size; + + if (call_@lapack_func@(params) != 0) { + goto error; + } + + lwork = (fortran_int)query_work_size; + liwork = query_iwork_size; + } + + mem_buff2 = malloc(lwork*sizeof(@typ@) + liwork*sizeof(fortran_int)); + if (!mem_buff2) { + goto error; + } + + work = mem_buff2; + iwork = mem_buff2 + lwork*sizeof(@typ@); + + params->LWORK = lwork; + params->WORK = work; + params->LIWORK = liwork; + params->IWORK = iwork; + + return 1; + + error: + /* something failed */ + memset(params, 0, sizeof(*params)); + free(mem_buff2); + free(mem_buff); + + return 0; +} +/**end repeat**/ + + +/**begin repeat + #TYPE = CFLOAT, CDOUBLE# + #typ = npy_cfloat, npy_cdouble# + #basetyp = npy_float, npy_double# + #ftyp = fortran_complex, fortran_doublecomplex# + #fbasetyp = fortran_real, fortran_doublereal# + #lapack_func = cheevd, zheevd# +*/ +static NPY_INLINE fortran_int +call_@lapack_func@(EIGH_PARAMS_t *params) +{ + fortran_int rv; + LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, + params->A, ¶ms->LDA, params->W, + params->WORK, ¶ms->LWORK, + params->RWORK, ¶ms->LRWORK, + params->IWORK, ¶ms->LIWORK, + &rv); + return rv; +} + +/* + * Initialize the parameters to use in for the lapack function _heev + * Handles buffer allocation + */ +static NPY_INLINE int +init_@lapack_func@(EIGH_PARAMS_t *params, + char JOBZ, + char UPLO, + fortran_int N) +{ + npy_uint8 *mem_buff = NULL; + npy_uint8 *mem_buff2 = NULL; + fortran_int lwork; + fortran_int lrwork; + fortran_int liwork; + npy_uint8 *a, *w, *work, *rwork, *iwork; + size_t safe_N = N; + fortran_int lda = fortran_int_max(N, 1); + + mem_buff = malloc(safe_N * safe_N * sizeof(@typ@) + + safe_N * sizeof(@basetyp@)); + if (!mem_buff) { + goto error; + } + a = mem_buff; + w = mem_buff + safe_N * safe_N * sizeof(@typ@); + + params->A = a; + params->W = w; + params->N = N; + params->JOBZ = JOBZ; + params->UPLO = UPLO; + params->LDA = lda; + + /* Work size query */ + { + @ftyp@ query_work_size; + @fbasetyp@ query_rwork_size; + fortran_int query_iwork_size; + + params->LWORK = -1; + params->LRWORK = -1; + params->LIWORK = -1; + params->WORK = &query_work_size; + params->RWORK = &query_rwork_size; + params->IWORK = &query_iwork_size; + + if (call_@lapack_func@(params) != 0) { + goto error; + } + + lwork = (fortran_int)*(@fbasetyp@*)&query_work_size; + lrwork = (fortran_int)query_rwork_size; + liwork = query_iwork_size; + } + + mem_buff2 = malloc(lwork*sizeof(@typ@) + + lrwork*sizeof(@basetyp@) + + liwork*sizeof(fortran_int)); + if (!mem_buff2) { + goto error; + } + + work = mem_buff2; + rwork = work + lwork*sizeof(@typ@); + iwork = rwork + lrwork*sizeof(@basetyp@); + + params->WORK = work; + params->RWORK = rwork; + params->IWORK = iwork; + params->LWORK = lwork; + params->LRWORK = lrwork; + params->LIWORK = liwork; + + return 1; + + /* something failed */ +error: + memset(params, 0, sizeof(*params)); + free(mem_buff2); + free(mem_buff); + + return 0; +} +/**end repeat**/ + + +/**begin repeat + #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + #BASETYPE = FLOAT, DOUBLE, FLOAT, DOUBLE# + #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# + #basetyp = npy_float, npy_double, npy_float, npy_double# + #lapack_func = ssyevd, dsyevd, cheevd, zheevd# +**/ +/* + * (M, M)->(M,)(M, M) + * dimensions[1] -> M + * args[0] -> A[in] + * args[1] -> W + * args[2] -> A[out] + */ + +static NPY_INLINE void +release_@lapack_func@(EIGH_PARAMS_t *params) +{ + /* allocated memory in A and WORK */ + free(params->A); + free(params->WORK); + memset(params, 0, sizeof(*params)); +} + + +static NPY_INLINE void +@TYPE@_eigh_wrapper(char JOBZ, + char UPLO, + char**args, + npy_intp* dimensions, + npy_intp* steps) +{ + ptrdiff_t outer_steps[3]; + size_t iter; + size_t outer_dim = *dimensions++; + size_t op_count = (JOBZ=='N')?2:3; + EIGH_PARAMS_t eigh_params; + int error_occurred = get_fp_invalid_and_clear(); + + for (iter = 0; iter < op_count; ++iter) { + outer_steps[iter] = (ptrdiff_t) steps[iter]; + } + steps += op_count; + + if (init_@lapack_func@(&eigh_params, + JOBZ, + UPLO, + (fortran_int)dimensions[0])) { + LINEARIZE_DATA_t matrix_in_ld; + LINEARIZE_DATA_t eigenvectors_out_ld; + LINEARIZE_DATA_t eigenvalues_out_ld; + + init_linearize_data(&matrix_in_ld, + eigh_params.N, eigh_params.N, + steps[1], steps[0]); + init_linearize_data(&eigenvalues_out_ld, + 1, eigh_params.N, + 0, steps[2]); + if ('V' == eigh_params.JOBZ) { + init_linearize_data(&eigenvectors_out_ld, + eigh_params.N, eigh_params.N, + steps[4], steps[3]); + } + + for (iter = 0; iter < outer_dim; ++iter) { + int not_ok; + /* copy the matrix in */ + linearize_@TYPE@_matrix(eigh_params.A, args[0], &matrix_in_ld); + not_ok = call_@lapack_func@(&eigh_params); + if (!not_ok) { + /* lapack ok, copy result out */ + delinearize_@BASETYPE@_matrix(args[1], + eigh_params.W, + &eigenvalues_out_ld); + + if ('V' == eigh_params.JOBZ) { + delinearize_@TYPE@_matrix(args[2], + eigh_params.A, + &eigenvectors_out_ld); + } + } else { + /* lapack fail, set result to nan */ + error_occurred = 1; + nan_@BASETYPE@_matrix(args[1], &eigenvalues_out_ld); + if ('V' == eigh_params.JOBZ) { + nan_@TYPE@_matrix(args[2], &eigenvectors_out_ld); + } + } + update_pointers((npy_uint8**)args, outer_steps, op_count); + } + + release_@lapack_func@(&eigh_params); + } + + set_fp_invalid_or_clear(error_occurred); +} +/**end repeat**/ + + +/**begin repeat + #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + */ +static void +@TYPE@_eighlo(char **args, + npy_intp *dimensions, + npy_intp *steps, + void *NPY_UNUSED(func)) +{ + @TYPE@_eigh_wrapper('V', 'L', args, dimensions, steps); +} + +static void +@TYPE@_eighup(char **args, + npy_intp *dimensions, + npy_intp *steps, + void* NPY_UNUSED(func)) +{ + @TYPE@_eigh_wrapper('V', 'U', args, dimensions, steps); +} + +static void +@TYPE@_eigvalshlo(char **args, + npy_intp *dimensions, + npy_intp *steps, + void* NPY_UNUSED(func)) +{ + @TYPE@_eigh_wrapper('N', 'L', args, dimensions, steps); +} + +static void +@TYPE@_eigvalshup(char **args, + npy_intp *dimensions, + npy_intp *steps, + void* NPY_UNUSED(func)) +{ + @TYPE@_eigh_wrapper('N', 'U', args, dimensions, steps); +} +/**end repeat**/ + +/* -------------------------------------------------------------------------- */ + /* Solve family (includes inv) */ + +typedef struct gesv_params_struct +{ + void *A; /* A is (N, N) of base type */ + void *B; /* B is (N, NRHS) of base type */ + fortran_int * IPIV; /* IPIV is (N) */ + + fortran_int N; + fortran_int NRHS; + fortran_int LDA; + fortran_int LDB; +} GESV_PARAMS_t; + +/**begin repeat + #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# + #ftyp = fortran_real, fortran_doublereal, + fortran_complex, fortran_doublecomplex# + #lapack_func = sgesv, dgesv, cgesv, zgesv# +*/ + +static NPY_INLINE fortran_int +call_@lapack_func@(GESV_PARAMS_t *params) +{ + fortran_int rv; + LAPACK(@lapack_func@)(¶ms->N, ¶ms->NRHS, + params->A, ¶ms->LDA, + params->IPIV, + params->B, ¶ms->LDB, + &rv); + return rv; +} + +/* + * Initialize the parameters to use in for the lapack function _heev + * Handles buffer allocation + */ +static NPY_INLINE int +init_@lapack_func@(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) +{ + npy_uint8 *mem_buff = NULL; + npy_uint8 *a, *b, *ipiv; + size_t safe_N = N; + size_t safe_NRHS = NRHS; + fortran_int ld = fortran_int_max(N, 1); + mem_buff = malloc(safe_N * safe_N * sizeof(@ftyp@) + + safe_N * safe_NRHS*sizeof(@ftyp@) + + safe_N * sizeof(fortran_int)); + if (!mem_buff) { + goto error; + } + a = mem_buff; + b = a + safe_N * safe_N * sizeof(@ftyp@); + ipiv = b + safe_N * safe_NRHS * sizeof(@ftyp@); + + params->A = a; + params->B = b; + params->IPIV = (fortran_int*)ipiv; + params->N = N; + params->NRHS = NRHS; + params->LDA = ld; + params->LDB = ld; + + return 1; + error: + free(mem_buff); + memset(params, 0, sizeof(*params)); + + return 0; +} + +static NPY_INLINE void +release_@lapack_func@(GESV_PARAMS_t *params) +{ + /* memory block base is in A */ + free(params->A); + memset(params, 0, sizeof(*params)); +} + +static void +@TYPE@_solve(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) +{ + GESV_PARAMS_t params; + fortran_int n, nrhs; + int error_occurred = get_fp_invalid_and_clear(); + INIT_OUTER_LOOP_3 + + n = (fortran_int)dimensions[0]; + nrhs = (fortran_int)dimensions[1]; + if (init_@lapack_func@(¶ms, n, nrhs)) { + LINEARIZE_DATA_t a_in, b_in, r_out; + + init_linearize_data(&a_in, n, n, steps[1], steps[0]); + init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); + init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); + + BEGIN_OUTER_LOOP_3 + int not_ok; + linearize_@TYPE@_matrix(params.A, args[0], &a_in); + linearize_@TYPE@_matrix(params.B, args[1], &b_in); + not_ok =call_@lapack_func@(¶ms); + if (!not_ok) { + delinearize_@TYPE@_matrix(args[2], params.B, &r_out); + } else { + error_occurred = 1; + nan_@TYPE@_matrix(args[2], &r_out); + } + END_OUTER_LOOP + + release_@lapack_func@(¶ms); + } + + set_fp_invalid_or_clear(error_occurred); +} + +static void +@TYPE@_solve1(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) +{ + GESV_PARAMS_t params; + int error_occurred = get_fp_invalid_and_clear(); + fortran_int n; + INIT_OUTER_LOOP_3 + + n = (fortran_int)dimensions[0]; + if (init_@lapack_func@(¶ms, n, 1)) { + LINEARIZE_DATA_t a_in, b_in, r_out; + init_linearize_data(&a_in, n, n, steps[1], steps[0]); + init_linearize_data(&b_in, 1, n, 1, steps[2]); + init_linearize_data(&r_out, 1, n, 1, steps[3]); + + BEGIN_OUTER_LOOP_3 + int not_ok; + linearize_@TYPE@_matrix(params.A, args[0], &a_in); + linearize_@TYPE@_matrix(params.B, args[1], &b_in); + not_ok = call_@lapack_func@(¶ms); + if (!not_ok) { + delinearize_@TYPE@_matrix(args[2], params.B, &r_out); + } else { + error_occurred = 1; + nan_@TYPE@_matrix(args[2], &r_out); + } + END_OUTER_LOOP + + release_@lapack_func@(¶ms); + } + + set_fp_invalid_or_clear(error_occurred); +} + +static void +@TYPE@_inv(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) +{ + GESV_PARAMS_t params; + fortran_int n; + int error_occurred = get_fp_invalid_and_clear(); + INIT_OUTER_LOOP_2 + + n = (fortran_int)dimensions[0]; + if (init_@lapack_func@(¶ms, n, n)) { + LINEARIZE_DATA_t a_in, r_out; + init_linearize_data(&a_in, n, n, steps[1], steps[0]); + init_linearize_data(&r_out, n, n, steps[3], steps[2]); + + BEGIN_OUTER_LOOP_2 + int not_ok; + linearize_@TYPE@_matrix(params.A, args[0], &a_in); + identity_@TYPE@_matrix(params.B, n); + not_ok = call_@lapack_func@(¶ms); + if (!not_ok) { + delinearize_@TYPE@_matrix(args[1], params.B, &r_out); + } else { + error_occurred = 1; + nan_@TYPE@_matrix(args[1], &r_out); + } + END_OUTER_LOOP + + release_@lapack_func@(¶ms); + } + + set_fp_invalid_or_clear(error_occurred); +} + +/**end repeat**/ + + +/* -------------------------------------------------------------------------- */ + /* Cholesky decomposition */ + +typedef struct potr_params_struct +{ + void *A; + fortran_int N; + fortran_int LDA; + char UPLO; +} POTR_PARAMS_t; + +/**begin repeat + + #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + #ftyp = fortran_real, fortran_doublereal, + fortran_complex, fortran_doublecomplex# + #lapack_func = spotrf, dpotrf, cpotrf, zpotrf# + */ + +static NPY_INLINE fortran_int +call_@lapack_func@(POTR_PARAMS_t *params) +{ + fortran_int rv; + LAPACK(@lapack_func@)(¶ms->UPLO, + ¶ms->N, params->A, ¶ms->LDA, + &rv); + return rv; +} + +static NPY_INLINE int +init_@lapack_func@(POTR_PARAMS_t *params, char UPLO, fortran_int N) +{ + npy_uint8 *mem_buff = NULL; + npy_uint8 *a; + size_t safe_N = N; + fortran_int lda = fortran_int_max(N, 1); + + mem_buff = malloc(safe_N * safe_N * sizeof(@ftyp@)); + if (!mem_buff) { + goto error; + } + + a = mem_buff; + + params->A = a; + params->N = N; + params->LDA = lda; + params->UPLO = UPLO; + + return 1; + error: + free(mem_buff); + memset(params, 0, sizeof(*params)); + + return 0; +} + +static NPY_INLINE void +release_@lapack_func@(POTR_PARAMS_t *params) +{ + /* memory block base in A */ + free(params->A); + memset(params, 0, sizeof(*params)); +} + +static void +@TYPE@_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps) +{ + POTR_PARAMS_t params; + int error_occurred = get_fp_invalid_and_clear(); + fortran_int n; + INIT_OUTER_LOOP_2 + + assert(uplo == 'L'); + + n = (fortran_int)dimensions[0]; + if (init_@lapack_func@(¶ms, uplo, n)) { + LINEARIZE_DATA_t a_in, r_out; + init_linearize_data(&a_in, n, n, steps[1], steps[0]); + init_linearize_data(&r_out, n, n, steps[3], steps[2]); + BEGIN_OUTER_LOOP_2 + int not_ok; + linearize_@TYPE@_matrix(params.A, args[0], &a_in); + not_ok = call_@lapack_func@(¶ms); + if (!not_ok) { + triu_@TYPE@_matrix(params.A, params.N); + delinearize_@TYPE@_matrix(args[1], params.A, &r_out); + } else { + error_occurred = 1; + nan_@TYPE@_matrix(args[1], &r_out); + } + END_OUTER_LOOP + release_@lapack_func@(¶ms); + } + + set_fp_invalid_or_clear(error_occurred); +} + +static void +@TYPE@_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) +{ + @TYPE@_cholesky('L', args, dimensions, steps); +} + +/**end repeat**/ + +/* -------------------------------------------------------------------------- */ + /* eig family */ + +typedef struct geev_params_struct { + void *A; + void *WR; /* RWORK in complex versions, REAL W buffer for (sd)geev*/ + void *WI; + void *VLR; /* REAL VL buffers for _geev where _ is s, d */ + void *VRR; /* REAL VR buffers for _geev hwere _ is s, d */ + void *WORK; + void *W; /* final w */ + void *VL; /* final vl */ + void *VR; /* final vr */ + + fortran_int N; + fortran_int LDA; + fortran_int LDVL; + fortran_int LDVR; + fortran_int LWORK; + + char JOBVL; + char JOBVR; +} GEEV_PARAMS_t; + +static NPY_INLINE void +dump_geev_params(const char *name, GEEV_PARAMS_t* params) +{ + TRACE_TXT("\n%s\n" + + "\t%10s: %p\n"\ + "\t%10s: %p\n"\ + "\t%10s: %p\n"\ + "\t%10s: %p\n"\ + "\t%10s: %p\n"\ + "\t%10s: %p\n"\ + "\t%10s: %p\n"\ + "\t%10s: %p\n"\ + "\t%10s: %p\n"\ + + "\t%10s: %d\n"\ + "\t%10s: %d\n"\ + "\t%10s: %d\n"\ + "\t%10s: %d\n"\ + "\t%10s: %d\n"\ + + "\t%10s: %c\n"\ + "\t%10s: %c\n", + + name, + + "A", params->A, + "WR", params->WR, + "WI", params->WI, + "VLR", params->VLR, + "VRR", params->VRR, + "WORK", params->WORK, + "W", params->W, + "VL", params->VL, + "VR", params->VR, + + "N", (int)params->N, + "LDA", (int)params->LDA, + "LDVL", (int)params->LDVL, + "LDVR", (int)params->LDVR, + "LWORK", (int)params->LWORK, + + "JOBVL", params->JOBVL, + "JOBVR", params->JOBVR); +} + +/**begin repeat + #TYPE = FLOAT, DOUBLE# + #CTYPE = CFLOAT, CDOUBLE# + #typ = float, double# + #complextyp = COMPLEX_t, DOUBLECOMPLEX_t# + #lapack_func = sgeev, dgeev# + #zero = 0.0f, 0.0# +*/ + +static NPY_INLINE fortran_int +call_@lapack_func@(GEEV_PARAMS_t* params) +{ + fortran_int rv; + LAPACK(@lapack_func@)(¶ms->JOBVL, ¶ms->JOBVR, + ¶ms->N, params->A, ¶ms->LDA, + params->WR, params->WI, + params->VLR, ¶ms->LDVL, + params->VRR, ¶ms->LDVR, + params->WORK, ¶ms->LWORK, + &rv); + return rv; +} + +static NPY_INLINE int +init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) +{ + npy_uint8 *mem_buff = NULL; + npy_uint8 *mem_buff2 = NULL; + npy_uint8 *a, *wr, *wi, *vlr, *vrr, *work, *w, *vl, *vr; + size_t safe_n = n; + size_t a_size = safe_n * safe_n * sizeof(@typ@); + size_t wr_size = safe_n * sizeof(@typ@); + size_t wi_size = safe_n * sizeof(@typ@); + size_t vlr_size = jobvl=='V' ? safe_n * safe_n * sizeof(@typ@) : 0; + size_t vrr_size = jobvr=='V' ? safe_n * safe_n * sizeof(@typ@) : 0; + size_t w_size = wr_size*2; + size_t vl_size = vlr_size*2; + size_t vr_size = vrr_size*2; + size_t work_count = 0; + fortran_int ld = fortran_int_max(n, 1); + + /* allocate data for known sizes (all but work) */ + mem_buff = malloc(a_size + wr_size + wi_size + + vlr_size + vrr_size + + w_size + vl_size + vr_size); + if (!mem_buff) { + goto error; + } + + a = mem_buff; + wr = a + a_size; + wi = wr + wr_size; + vlr = wi + wi_size; + vrr = vlr + vlr_size; + w = vrr + vrr_size; + vl = w + w_size; + vr = vl + vl_size; + + params->A = a; + params->WR = wr; + params->WI = wi; + params->VLR = vlr; + params->VRR = vrr; + params->W = w; + params->VL = vl; + params->VR = vr; + params->N = n; + params->LDA = ld; + params->LDVL = ld; + params->LDVR = ld; + params->JOBVL = jobvl; + params->JOBVR = jobvr; + + /* Work size query */ + { + @typ@ work_size_query; + + params->LWORK = -1; + params->WORK = &work_size_query; + + if (call_@lapack_func@(params) != 0) { + goto error; + } + + work_count = (size_t)work_size_query; + } + + mem_buff2 = malloc(work_count*sizeof(@typ@)); + if (!mem_buff2) { + goto error; + } + work = mem_buff2; + + params->LWORK = (fortran_int)work_count; + params->WORK = work; + + return 1; + error: + free(mem_buff2); + free(mem_buff); + memset(params, 0, sizeof(*params)); + + return 0; +} + +static NPY_INLINE void +mk_@TYPE@_complex_array_from_real(@complextyp@ *c, const @typ@ *re, size_t n) +{ + size_t iter; + for (iter = 0; iter < n; ++iter) { + c[iter].array[0] = re[iter]; + c[iter].array[1] = @zero@; + } +} + +static NPY_INLINE void +mk_@TYPE@_complex_array(@complextyp@ *c, + const @typ@ *re, + const @typ@ *im, + size_t n) +{ + size_t iter; + for (iter = 0; iter < n; ++iter) { + c[iter].array[0] = re[iter]; + c[iter].array[1] = im[iter]; + } +} + +static NPY_INLINE void +mk_@TYPE@_complex_array_conjugate_pair(@complextyp@ *c, + const @typ@ *r, + size_t n) +{ + size_t iter; + for (iter = 0; iter < n; ++iter) { + @typ@ re = r[iter]; + @typ@ im = r[iter+n]; + c[iter].array[0] = re; + c[iter].array[1] = im; + c[iter+n].array[0] = re; + c[iter+n].array[1] = -im; + } +} + +/* + * make the complex eigenvectors from the real array produced by sgeev/zgeev. + * c is the array where the results will be left. + * r is the source array of reals produced by sgeev/zgeev + * i is the eigenvalue imaginary part produced by sgeev/zgeev + * n is so that the order of the matrix is n by n + */ +static NPY_INLINE void +mk_@lapack_func@_complex_eigenvectors(@complextyp@ *c, + const @typ@ *r, + const @typ@ *i, + size_t n) +{ + size_t iter = 0; + while (iter < n) + { + if (i[iter] == @zero@) { + /* eigenvalue was real, eigenvectors as well... */ + mk_@TYPE@_complex_array_from_real(c, r, n); + c += n; + r += n; + iter ++; + } else { + /* eigenvalue was complex, generate a pair of eigenvectors */ + mk_@TYPE@_complex_array_conjugate_pair(c, r, n); + c += 2*n; + r += 2*n; + iter += 2; + } + } +} + + +static NPY_INLINE void +process_@lapack_func@_results(GEEV_PARAMS_t *params) +{ + /* REAL versions of geev need the results to be translated + * into complex versions. This is the way to deal with imaginary + * results. In our gufuncs we will always return complex arrays! + */ + mk_@TYPE@_complex_array(params->W, params->WR, params->WI, params->N); + + /* handle the eigenvectors */ + if ('V' == params->JOBVL) { + mk_@lapack_func@_complex_eigenvectors(params->VL, params->VLR, + params->WI, params->N); + } + if ('V' == params->JOBVR) { + mk_@lapack_func@_complex_eigenvectors(params->VR, params->VRR, + params->WI, params->N); + } +} + +/**end repeat**/ + + +/**begin repeat + #TYPE = CFLOAT, CDOUBLE# + #typ = COMPLEX_t, DOUBLECOMPLEX_t# + #ftyp = fortran_complex, fortran_doublecomplex# + #realtyp = float, double# + #lapack_func = cgeev, zgeev# + */ + +static NPY_INLINE fortran_int +call_@lapack_func@(GEEV_PARAMS_t* params) +{ + fortran_int rv; + + LAPACK(@lapack_func@)(¶ms->JOBVL, ¶ms->JOBVR, + ¶ms->N, params->A, ¶ms->LDA, + params->W, + params->VL, ¶ms->LDVL, + params->VR, ¶ms->LDVR, + params->WORK, ¶ms->LWORK, + params->WR, /* actually RWORK */ + &rv); + return rv; +} + +static NPY_INLINE int +init_@lapack_func@(GEEV_PARAMS_t* params, + char jobvl, + char jobvr, + fortran_int n) +{ + npy_uint8 *mem_buff = NULL; + npy_uint8 *mem_buff2 = NULL; + npy_uint8 *a, *w, *vl, *vr, *work, *rwork; + size_t safe_n = n; + size_t a_size = safe_n * safe_n * sizeof(@ftyp@); + size_t w_size = safe_n * sizeof(@ftyp@); + size_t vl_size = jobvl=='V'? safe_n * safe_n * sizeof(@ftyp@) : 0; + size_t vr_size = jobvr=='V'? safe_n * safe_n * sizeof(@ftyp@) : 0; + size_t rwork_size = 2 * safe_n * sizeof(@realtyp@); + size_t work_count = 0; + size_t total_size = a_size + w_size + vl_size + vr_size + rwork_size; + fortran_int ld = fortran_int_max(n, 1); + + mem_buff = malloc(total_size); + if (!mem_buff) { + goto error; + } + + a = mem_buff; + w = a + a_size; + vl = w + w_size; + vr = vl + vl_size; + rwork = vr + vr_size; + + params->A = a; + params->WR = rwork; + params->WI = NULL; + params->VLR = NULL; + params->VRR = NULL; + params->VL = vl; + params->VR = vr; + params->W = w; + params->N = n; + params->LDA = ld; + params->LDVL = ld; + params->LDVR = ld; + params->JOBVL = jobvl; + params->JOBVR = jobvr; + + /* Work size query */ + { + @typ@ work_size_query; + + params->LWORK = -1; + params->WORK = &work_size_query; + + if (call_@lapack_func@(params) != 0) { + goto error; + } + + work_count = (size_t) work_size_query.array[0]; + /* Fix a bug in lapack 3.0.0 */ + if(work_count == 0) work_count = 1; + } + + mem_buff2 = malloc(work_count*sizeof(@ftyp@)); + if (!mem_buff2) { + goto error; + } + + work = mem_buff2; + + params->LWORK = (fortran_int)work_count; + params->WORK = work; + + return 1; + error: + free(mem_buff2); + free(mem_buff); + memset(params, 0, sizeof(*params)); + + return 0; +} + + +static NPY_INLINE void +process_@lapack_func@_results(GEEV_PARAMS_t *NPY_UNUSED(params)) +{ + /* nothing to do here, complex versions are ready to copy out */ +} +/**end repeat**/ + + +/**begin repeat + #TYPE = FLOAT, DOUBLE, CDOUBLE# + #COMPLEXTYPE = CFLOAT, CDOUBLE, CDOUBLE# + #ftype = fortran_real, fortran_doublereal, fortran_doublecomplex# + #lapack_func = sgeev, dgeev, zgeev# + */ + +static NPY_INLINE void +release_@lapack_func@(GEEV_PARAMS_t *params) +{ + free(params->WORK); + free(params->A); + memset(params, 0, sizeof(*params)); +} + +static NPY_INLINE void +@TYPE@_eig_wrapper(char JOBVL, + char JOBVR, + char**args, + npy_intp* dimensions, + npy_intp* steps) +{ + ptrdiff_t outer_steps[4]; + size_t iter; + size_t outer_dim = *dimensions++; + size_t op_count = 2; + int error_occurred = get_fp_invalid_and_clear(); + GEEV_PARAMS_t geev_params; + + assert(JOBVL == 'N'); + + STACK_TRACE; + op_count += 'V'==JOBVL?1:0; + op_count += 'V'==JOBVR?1:0; + + for (iter = 0; iter < op_count; ++iter) { + outer_steps[iter] = (ptrdiff_t) steps[iter]; + } + steps += op_count; + + if (init_@lapack_func@(&geev_params, + JOBVL, JOBVR, + (fortran_int)dimensions[0])) { + LINEARIZE_DATA_t a_in; + LINEARIZE_DATA_t w_out; + LINEARIZE_DATA_t vl_out; + LINEARIZE_DATA_t vr_out; + + init_linearize_data(&a_in, + geev_params.N, geev_params.N, + steps[1], steps[0]); + steps += 2; + init_linearize_data(&w_out, + 1, geev_params.N, + 0, steps[0]); + steps += 1; + if ('V' == geev_params.JOBVL) { + init_linearize_data(&vl_out, + geev_params.N, geev_params.N, + steps[1], steps[0]); + steps += 2; + } + if ('V' == geev_params.JOBVR) { + init_linearize_data(&vr_out, + geev_params.N, geev_params.N, + steps[1], steps[0]); + } + + for (iter = 0; iter < outer_dim; ++iter) { + int not_ok; + char **arg_iter = args; + /* copy the matrix in */ + linearize_@TYPE@_matrix(geev_params.A, *arg_iter++, &a_in); + not_ok = call_@lapack_func@(&geev_params); + + if (!not_ok) { + process_@lapack_func@_results(&geev_params); + delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, + geev_params.W, + &w_out); + + if ('V' == geev_params.JOBVL) { + delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, + geev_params.VL, + &vl_out); + } + if ('V' == geev_params.JOBVR) { + delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, + geev_params.VR, + &vr_out); + } + } else { + /* geev failed */ + error_occurred = 1; + nan_@COMPLEXTYPE@_matrix(*arg_iter++, &w_out); + if ('V' == geev_params.JOBVL) { + nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vl_out); + } + if ('V' == geev_params.JOBVR) { + nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vr_out); + } + } + update_pointers((npy_uint8**)args, outer_steps, op_count); + } + + release_@lapack_func@(&geev_params); + } + + set_fp_invalid_or_clear(error_occurred); +} + +static void +@TYPE@_eig(char **args, + npy_intp *dimensions, + npy_intp *steps, + void *NPY_UNUSED(func)) +{ + @TYPE@_eig_wrapper('N', 'V', args, dimensions, steps); +} + +static void +@TYPE@_eigvals(char **args, + npy_intp *dimensions, + npy_intp *steps, + void *NPY_UNUSED(func)) +{ + @TYPE@_eig_wrapper('N', 'N', args, dimensions, steps); +} + +/**end repeat**/ + + +/* -------------------------------------------------------------------------- */ + /* singular value decomposition */ + +typedef struct gessd_params_struct +{ + void *A; + void *S; + void *U; + void *VT; + void *WORK; + void *RWORK; + void *IWORK; + + fortran_int M; + fortran_int N; + fortran_int LDA; + fortran_int LDU; + fortran_int LDVT; + fortran_int LWORK; + char JOBZ; +} GESDD_PARAMS_t; + + +static NPY_INLINE void +dump_gesdd_params(const char *name, + GESDD_PARAMS_t *params) +{ + TRACE_TXT("\n%s:\n"\ + + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + + "%14s: %15c'%c'\n", + + name, + + "A", params->A, + "S", params->S, + "U", params->U, + "VT", params->VT, + "WORK", params->WORK, + "RWORK", params->RWORK, + "IWORK", params->IWORK, + + "M", (int)params->M, + "N", (int)params->N, + "LDA", (int)params->LDA, + "LDU", (int)params->LDU, + "LDVT", (int)params->LDVT, + "LWORK", (int)params->LWORK, + + "JOBZ", ' ', params->JOBZ); +} + +static NPY_INLINE int +compute_urows_vtcolumns(char jobz, + fortran_int m, fortran_int n, + fortran_int *urows, fortran_int *vtcolumns) +{ + fortran_int min_m_n = fortran_int_min(m, n); + switch(jobz) + { + case 'N': + *urows = 0; + *vtcolumns = 0; + break; + case 'A': + *urows = m; + *vtcolumns = n; + break; + case 'S': + { + *urows = min_m_n; + *vtcolumns = min_m_n; + } + break; + default: + return 0; + } + + return 1; +} + + +/**begin repeat + #TYPE = FLOAT, DOUBLE# + #lapack_func = sgesdd, dgesdd# + #ftyp = fortran_real, fortran_doublereal# + */ + +static NPY_INLINE fortran_int +call_@lapack_func@(GESDD_PARAMS_t *params) +{ + fortran_int rv; + LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->M, ¶ms->N, + params->A, ¶ms->LDA, + params->S, + params->U, ¶ms->LDU, + params->VT, ¶ms->LDVT, + params->WORK, ¶ms->LWORK, + params->IWORK, + &rv); + return rv; +} + +static NPY_INLINE int +init_@lapack_func@(GESDD_PARAMS_t *params, + char jobz, + fortran_int m, + fortran_int n) +{ + npy_uint8 *mem_buff = NULL; + npy_uint8 *mem_buff2 = NULL; + npy_uint8 *a, *s, *u, *vt, *work, *iwork; + size_t safe_m = m; + size_t safe_n = n; + size_t a_size = safe_m * safe_n * sizeof(@ftyp@); + fortran_int min_m_n = fortran_int_min(m, n); + size_t safe_min_m_n = min_m_n; + size_t s_size = safe_min_m_n * sizeof(@ftyp@); + fortran_int u_row_count, vt_column_count; + size_t safe_u_row_count, safe_vt_column_count; + size_t u_size, vt_size; + fortran_int work_count; + size_t work_size; + size_t iwork_size = 8 * safe_min_m_n * sizeof(fortran_int); + fortran_int ld = fortran_int_max(m, 1); + + if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) { + goto error; + } + + safe_u_row_count = u_row_count; + safe_vt_column_count = vt_column_count; + + u_size = safe_u_row_count * safe_m * sizeof(@ftyp@); + vt_size = safe_n * safe_vt_column_count * sizeof(@ftyp@); + + mem_buff = malloc(a_size + s_size + u_size + vt_size + iwork_size); + + if (!mem_buff) { + goto error; + } + + a = mem_buff; + s = a + a_size; + u = s + s_size; + vt = u + u_size; + iwork = vt + vt_size; + + /* fix vt_column_count so that it is a valid lapack parameter (0 is not) */ + vt_column_count = fortran_int_max(1, vt_column_count); + + params->M = m; + params->N = n; + params->A = a; + params->S = s; + params->U = u; + params->VT = vt; + params->RWORK = NULL; + params->IWORK = iwork; + params->M = m; + params->N = n; + params->LDA = ld; + params->LDU = ld; + params->LDVT = vt_column_count; + params->JOBZ = jobz; + + /* Work size query */ + { + @ftyp@ work_size_query; + + params->LWORK = -1; + params->WORK = &work_size_query; + + if (call_@lapack_func@(params) != 0) { + goto error; + } + + work_count = (fortran_int)work_size_query; + /* Fix a bug in lapack 3.0.0 */ + if(work_count == 0) work_count = 1; + work_size = (size_t)work_count * sizeof(@ftyp@); + } + + mem_buff2 = malloc(work_size); + if (!mem_buff2) { + goto error; + } + + work = mem_buff2; + + params->LWORK = work_count; + params->WORK = work; + + return 1; + error: + TRACE_TXT("%s failed init\n", __FUNCTION__); + free(mem_buff); + free(mem_buff2); + memset(params, 0, sizeof(*params)); + + return 0; +} + +/**end repeat**/ + +/**begin repeat + #TYPE = CFLOAT, CDOUBLE# + #ftyp = fortran_complex, fortran_doublecomplex# + #frealtyp = fortran_real, fortran_doublereal# + #typ = COMPLEX_t, DOUBLECOMPLEX_t# + #lapack_func = cgesdd, zgesdd# + */ + +static NPY_INLINE fortran_int +call_@lapack_func@(GESDD_PARAMS_t *params) +{ + fortran_int rv; + LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->M, ¶ms->N, + params->A, ¶ms->LDA, + params->S, + params->U, ¶ms->LDU, + params->VT, ¶ms->LDVT, + params->WORK, ¶ms->LWORK, + params->RWORK, + params->IWORK, + &rv); + return rv; +} + +static NPY_INLINE int +init_@lapack_func@(GESDD_PARAMS_t *params, + char jobz, + fortran_int m, + fortran_int n) +{ + npy_uint8 *mem_buff = NULL, *mem_buff2 = NULL; + npy_uint8 *a,*s, *u, *vt, *work, *rwork, *iwork; + size_t a_size, s_size, u_size, vt_size, work_size, rwork_size, iwork_size; + size_t safe_u_row_count, safe_vt_column_count; + fortran_int u_row_count, vt_column_count, work_count; + size_t safe_m = m; + size_t safe_n = n; + fortran_int min_m_n = fortran_int_min(m, n); + size_t safe_min_m_n = min_m_n; + fortran_int ld = fortran_int_max(m, 1); + + if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) { + goto error; + } + + safe_u_row_count = u_row_count; + safe_vt_column_count = vt_column_count; + + a_size = safe_m * safe_n * sizeof(@ftyp@); + s_size = safe_min_m_n * sizeof(@frealtyp@); + u_size = safe_u_row_count * safe_m * sizeof(@ftyp@); + vt_size = safe_n * safe_vt_column_count * sizeof(@ftyp@); + rwork_size = 'N'==jobz? + (7 * safe_min_m_n) : + (5*safe_min_m_n * safe_min_m_n + 5*safe_min_m_n); + rwork_size *= sizeof(@ftyp@); + iwork_size = 8 * safe_min_m_n* sizeof(fortran_int); + + mem_buff = malloc(a_size + + s_size + + u_size + + vt_size + + rwork_size + + iwork_size); + if (!mem_buff) { + goto error; + } + + a = mem_buff; + s = a + a_size; + u = s + s_size; + vt = u + u_size; + rwork = vt + vt_size; + iwork = rwork + rwork_size; + + /* fix vt_column_count so that it is a valid lapack parameter (0 is not) */ + vt_column_count = fortran_int_max(1, vt_column_count); + + params->A = a; + params->S = s; + params->U = u; + params->VT = vt; + params->RWORK = rwork; + params->IWORK = iwork; + params->M = m; + params->N = n; + params->LDA = ld; + params->LDU = ld; + params->LDVT = vt_column_count; + params->JOBZ = jobz; + + /* Work size query */ + { + @ftyp@ work_size_query; + + params->LWORK = -1; + params->WORK = &work_size_query; + + if (call_@lapack_func@(params) != 0) { + goto error; + } + + work_count = (fortran_int)((@typ@*)&work_size_query)->array[0]; + /* Fix a bug in lapack 3.0.0 */ + if(work_count == 0) work_count = 1; + work_size = (size_t)work_count * sizeof(@ftyp@); + } + + mem_buff2 = malloc(work_size); + if (!mem_buff2) { + goto error; + } + + work = mem_buff2; + + params->LWORK = work_count; + params->WORK = work; + + return 1; + error: + TRACE_TXT("%s failed init\n", __FUNCTION__); + free(mem_buff2); + free(mem_buff); + memset(params, 0, sizeof(*params)); + + return 0; +} +/**end repeat**/ + + +/**begin repeat + #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + #REALTYPE = FLOAT, DOUBLE, FLOAT, DOUBLE# + #lapack_func = sgesdd, dgesdd, cgesdd, zgesdd# + */ +static NPY_INLINE void +release_@lapack_func@(GESDD_PARAMS_t* params) +{ + /* A and WORK contain allocated blocks */ + free(params->A); + free(params->WORK); + memset(params, 0, sizeof(*params)); +} + +static NPY_INLINE void +@TYPE@_svd_wrapper(char JOBZ, + char **args, + npy_intp* dimensions, + npy_intp* steps) +{ + ptrdiff_t outer_steps[4]; + int error_occurred = get_fp_invalid_and_clear(); + size_t iter; + size_t outer_dim = *dimensions++; + size_t op_count = (JOBZ=='N')?2:4; + GESDD_PARAMS_t params; + + for (iter = 0; iter < op_count; ++iter) { + outer_steps[iter] = (ptrdiff_t) steps[iter]; + } + steps += op_count; + + if (init_@lapack_func@(¶ms, + JOBZ, + (fortran_int)dimensions[0], + (fortran_int)dimensions[1])) { + LINEARIZE_DATA_t a_in, u_out, s_out, v_out; + fortran_int min_m_n = params.M < params.N ? params.M : params.N; + + init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]); + if ('N' == params.JOBZ) { + /* only the singular values are wanted */ + init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]); + } else { + fortran_int u_columns, v_rows; + if ('S' == params.JOBZ) { + u_columns = min_m_n; + v_rows = min_m_n; + } else { /* JOBZ == 'A' */ + u_columns = params.M; + v_rows = params.N; + } + init_linearize_data(&u_out, + u_columns, params.M, + steps[3], steps[2]); + init_linearize_data(&s_out, + 1, min_m_n, + 0, steps[4]); + init_linearize_data(&v_out, + params.N, v_rows, + steps[6], steps[5]); + } + + for (iter = 0; iter < outer_dim; ++iter) { + int not_ok; + /* copy the matrix in */ + linearize_@TYPE@_matrix(params.A, args[0], &a_in); + not_ok = call_@lapack_func@(¶ms); + if (!not_ok) { + if ('N' == params.JOBZ) { + delinearize_@REALTYPE@_matrix(args[1], params.S, &s_out); + } else { + if ('A' == params.JOBZ && min_m_n == 0) { + /* Lapack has betrayed us and left these uninitialized, + * so produce an identity matrix for whichever of u + * and v is not empty. + */ + identity_@TYPE@_matrix(params.U, params.M); + identity_@TYPE@_matrix(params.VT, params.N); + } + + delinearize_@TYPE@_matrix(args[1], params.U, &u_out); + delinearize_@REALTYPE@_matrix(args[2], params.S, &s_out); + delinearize_@TYPE@_matrix(args[3], params.VT, &v_out); + } + } else { + error_occurred = 1; + if ('N' == params.JOBZ) { + nan_@REALTYPE@_matrix(args[1], &s_out); + } else { + nan_@TYPE@_matrix(args[1], &u_out); + nan_@REALTYPE@_matrix(args[2], &s_out); + nan_@TYPE@_matrix(args[3], &v_out); + } + } + update_pointers((npy_uint8**)args, outer_steps, op_count); + } + + release_@lapack_func@(¶ms); + } + + set_fp_invalid_or_clear(error_occurred); +} +/**end repeat*/ + + +/* svd gufunc entry points */ +/**begin repeat + #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + */ +static void +@TYPE@_svd_N(char **args, + npy_intp *dimensions, + npy_intp *steps, + void *NPY_UNUSED(func)) +{ + @TYPE@_svd_wrapper('N', args, dimensions, steps); +} + +static void +@TYPE@_svd_S(char **args, + npy_intp *dimensions, + npy_intp *steps, + void *NPY_UNUSED(func)) +{ + @TYPE@_svd_wrapper('S', args, dimensions, steps); +} + +static void +@TYPE@_svd_A(char **args, + npy_intp *dimensions, + npy_intp *steps, + void *NPY_UNUSED(func)) +{ + @TYPE@_svd_wrapper('A', args, dimensions, steps); +} + +/**end repeat**/ + + +/* -------------------------------------------------------------------------- */ + /* least squares */ + +typedef struct gelsd_params_struct +{ + fortran_int M; + fortran_int N; + fortran_int NRHS; + void *A; + fortran_int LDA; + void *B; + fortran_int LDB; + void *S; + void *RCOND; + fortran_int RANK; + void *WORK; + fortran_int LWORK; + void *RWORK; + void *IWORK; +} GELSD_PARAMS_t; + + +static inline void +dump_gelsd_params(const char *name, + GELSD_PARAMS_t *params) +{ + TRACE_TXT("\n%s:\n"\ + + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + + "%14s: %18p\n", + + name, + + "A", params->A, + "B", params->B, + "S", params->S, + "WORK", params->WORK, + "RWORK", params->RWORK, + "IWORK", params->IWORK, + + "M", (int)params->M, + "N", (int)params->N, + "NRHS", (int)params->NRHS, + "LDA", (int)params->LDA, + "LDB", (int)params->LDB, + "LWORK", (int)params->LWORK, + "RANK", (int)params->RANK, + + "RCOND", params->RCOND); +} + + +/**begin repeat + #TYPE=FLOAT,DOUBLE# + #lapack_func=sgelsd,dgelsd# + #ftyp=fortran_real,fortran_doublereal# + */ + +static inline fortran_int +call_@lapack_func@(GELSD_PARAMS_t *params) +{ + fortran_int rv; + LAPACK(@lapack_func@)(¶ms->M, ¶ms->N, ¶ms->NRHS, + params->A, ¶ms->LDA, + params->B, ¶ms->LDB, + params->S, + params->RCOND, ¶ms->RANK, + params->WORK, ¶ms->LWORK, + params->IWORK, + &rv); + return rv; +} + +static inline int +init_@lapack_func@(GELSD_PARAMS_t *params, + fortran_int m, + fortran_int n, + fortran_int nrhs) +{ + npy_uint8 *mem_buff = NULL; + npy_uint8 *mem_buff2 = NULL; + npy_uint8 *a, *b, *s, *work, *iwork; + fortran_int min_m_n = fortran_int_min(m, n); + fortran_int max_m_n = fortran_int_max(m, n); + size_t safe_min_m_n = min_m_n; + size_t safe_max_m_n = max_m_n; + size_t safe_m = m; + size_t safe_n = n; + size_t safe_nrhs = nrhs; + + size_t a_size = safe_m * safe_n * sizeof(@ftyp@); + size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@); + size_t s_size = safe_min_m_n * sizeof(@ftyp@); + + fortran_int work_count; + size_t work_size; + size_t iwork_size; + fortran_int lda = fortran_int_max(1, m); + fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n)); + + mem_buff = malloc(a_size + b_size + s_size); + + if (!mem_buff) + goto error; + + a = mem_buff; + b = a + a_size; + s = b + b_size; + + + params->M = m; + params->N = n; + params->NRHS = nrhs; + params->A = a; + params->B = b; + params->S = s; + params->LDA = lda; + params->LDB = ldb; + + { + /* compute optimal work size */ + @ftyp@ work_size_query; + fortran_int iwork_size_query; + + params->WORK = &work_size_query; + params->IWORK = &iwork_size_query; + params->RWORK = NULL; + params->LWORK = -1; + + if (call_@lapack_func@(params) != 0) + goto error; + + work_count = (fortran_int)work_size_query; + + work_size = (size_t) work_size_query * sizeof(@ftyp@); + iwork_size = (size_t)iwork_size_query * sizeof(fortran_int); + } + + mem_buff2 = malloc(work_size + iwork_size); + if (!mem_buff2) + goto error; + + work = mem_buff2; + iwork = work + work_size; + + params->WORK = work; + params->RWORK = NULL; + params->IWORK = iwork; + params->LWORK = work_count; + + return 1; + error: + TRACE_TXT("%s failed init\n", __FUNCTION__); + free(mem_buff); + free(mem_buff2); + memset(params, 0, sizeof(*params)); + + return 0; +} + +/**end repeat**/ + +/**begin repeat + #TYPE=CFLOAT,CDOUBLE# + #ftyp=fortran_complex,fortran_doublecomplex# + #frealtyp=fortran_real,fortran_doublereal# + #typ=COMPLEX_t,DOUBLECOMPLEX_t# + #lapack_func=cgelsd,zgelsd# + */ + +static inline fortran_int +call_@lapack_func@(GELSD_PARAMS_t *params) +{ + fortran_int rv; + LAPACK(@lapack_func@)(¶ms->M, ¶ms->N, ¶ms->NRHS, + params->A, ¶ms->LDA, + params->B, ¶ms->LDB, + params->S, + params->RCOND, ¶ms->RANK, + params->WORK, ¶ms->LWORK, + params->RWORK, params->IWORK, + &rv); + return rv; +} + +static inline int +init_@lapack_func@(GELSD_PARAMS_t *params, + fortran_int m, + fortran_int n, + fortran_int nrhs) +{ + npy_uint8 *mem_buff = NULL; + npy_uint8 *mem_buff2 = NULL; + npy_uint8 *a, *b, *s, *work, *iwork, *rwork; + fortran_int min_m_n = fortran_int_min(m, n); + fortran_int max_m_n = fortran_int_max(m, n); + size_t safe_min_m_n = min_m_n; + size_t safe_max_m_n = max_m_n; + size_t safe_m = m; + size_t safe_n = n; + size_t safe_nrhs = nrhs; + + size_t a_size = safe_m * safe_n * sizeof(@ftyp@); + size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@); + size_t s_size = safe_min_m_n * sizeof(@frealtyp@); + + fortran_int work_count; + size_t work_size, rwork_size, iwork_size; + fortran_int lda = fortran_int_max(1, m); + fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n)); + + mem_buff = malloc(a_size + b_size + s_size); + + if (!mem_buff) + goto error; + + a = mem_buff; + b = a + a_size; + s = b + b_size; + + + params->M = m; + params->N = n; + params->NRHS = nrhs; + params->A = a; + params->B = b; + params->S = s; + params->LDA = lda; + params->LDB = ldb; + + { + /* compute optimal work size */ + @ftyp@ work_size_query; + @frealtyp@ rwork_size_query; + fortran_int iwork_size_query; + + params->WORK = &work_size_query; + params->IWORK = &iwork_size_query; + params->RWORK = &rwork_size_query; + params->LWORK = -1; + + if (call_@lapack_func@(params) != 0) + goto error; + + work_count = (fortran_int)work_size_query.r; + + work_size = (size_t )work_size_query.r * sizeof(@ftyp@); + rwork_size = (size_t)rwork_size_query * sizeof(@frealtyp@); + iwork_size = (size_t)iwork_size_query * sizeof(fortran_int); + } + + mem_buff2 = malloc(work_size + rwork_size + iwork_size); + if (!mem_buff2) + goto error; + + work = mem_buff2; + rwork = work + work_size; + iwork = rwork + rwork_size; + + params->WORK = work; + params->RWORK = rwork; + params->IWORK = iwork; + params->LWORK = work_count; + + return 1; + error: + TRACE_TXT("%s failed init\n", __FUNCTION__); + free(mem_buff); + free(mem_buff2); + memset(params, 0, sizeof(*params)); + + return 0; +} + +/**end repeat**/ + + +/**begin repeat + #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# + #REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE# + #lapack_func=sgelsd,dgelsd,cgelsd,zgelsd# + #dot_func=sdot,ddot,cdotc,zdotc# + #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# + #basetyp = npy_float, npy_double, npy_float, npy_double# + #ftyp = fortran_real, fortran_doublereal, + fortran_complex, fortran_doublecomplex# + #cmplx = 0, 0, 1, 1# + */ +static inline void +release_@lapack_func@(GELSD_PARAMS_t* params) +{ + /* A and WORK contain allocated blocks */ + free(params->A); + free(params->WORK); + memset(params, 0, sizeof(*params)); +} + +/** Compute the squared l2 norm of a contiguous vector */ +static @basetyp@ +@TYPE@_abs2(@typ@ *p, npy_intp n) { + npy_intp i; + @basetyp@ res = 0; + for (i = 0; i < n; i++) { + @typ@ el = p[i]; +#if @cmplx@ + res += el.real*el.real + el.imag*el.imag; +#else + res += el*el; +#endif + } + return res; +} + +static void +@TYPE@_lstsq(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) +{ + GELSD_PARAMS_t params; + int error_occurred = get_fp_invalid_and_clear(); + fortran_int n, m, nrhs; + fortran_int excess; + + INIT_OUTER_LOOP_7 + + m = (fortran_int)dimensions[0]; + n = (fortran_int)dimensions[1]; + nrhs = (fortran_int)dimensions[2]; + excess = m - n; + + if (init_@lapack_func@(¶ms, m, n, nrhs)) { + LINEARIZE_DATA_t a_in, b_in, x_out, s_out, r_out; + + init_linearize_data(&a_in, n, m, steps[1], steps[0]); + init_linearize_data_ex(&b_in, nrhs, m, steps[3], steps[2], fortran_int_max(n, m)); + init_linearize_data_ex(&x_out, nrhs, n, steps[5], steps[4], fortran_int_max(n, m)); + init_linearize_data(&r_out, 1, nrhs, 1, steps[6]); + init_linearize_data(&s_out, 1, fortran_int_min(n, m), 1, steps[7]); + + BEGIN_OUTER_LOOP_7 + int not_ok; + linearize_@TYPE@_matrix(params.A, args[0], &a_in); + linearize_@TYPE@_matrix(params.B, args[1], &b_in); + params.RCOND = args[2]; + not_ok = call_@lapack_func@(¶ms); + if (!not_ok) { + delinearize_@TYPE@_matrix(args[3], params.B, &x_out); + *(npy_int*) args[5] = params.RANK; + delinearize_@REALTYPE@_matrix(args[6], params.S, &s_out); + + /* Note that linalg.lstsq discards this when excess == 0 */ + if (excess >= 0 && params.RANK == n) { + /* Compute the residuals as the square sum of each column */ + int i; + char *resid = args[4]; + @ftyp@ *components = (@ftyp@ *)params.B + n; + for (i = 0; i < nrhs; i++) { + @ftyp@ *vector = components + i*m; + /* Numpy and fortran floating types are the same size, + * so this cast is safe */ + @basetyp@ abs2 = @TYPE@_abs2((@typ@ *)vector, excess); + memcpy( + resid + i*r_out.column_strides, + &abs2, sizeof(abs2)); + } + } + else { + /* Note that this is always discarded by linalg.lstsq */ + nan_@REALTYPE@_matrix(args[4], &r_out); + } + } else { + error_occurred = 1; + nan_@TYPE@_matrix(args[3], &x_out); + nan_@REALTYPE@_matrix(args[4], &r_out); + *(npy_int*) args[5] = -1; + nan_@REALTYPE@_matrix(args[6], &s_out); + } + END_OUTER_LOOP + + release_@lapack_func@(¶ms); + } + + set_fp_invalid_or_clear(error_occurred); +} + +/**end repeat**/ + +#pragma GCC diagnostic pop + +/* -------------------------------------------------------------------------- */ + /* gufunc registration */ + +static void *array_of_nulls[] = { + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL, + + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL, + + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL, + + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL +}; + +#define FUNC_ARRAY_NAME(NAME) NAME ## _funcs + +#define GUFUNC_FUNC_ARRAY_REAL(NAME) \ + static PyUFuncGenericFunction \ + FUNC_ARRAY_NAME(NAME)[] = { \ + FLOAT_ ## NAME, \ + DOUBLE_ ## NAME \ + } + +#define GUFUNC_FUNC_ARRAY_REAL_COMPLEX(NAME) \ + static PyUFuncGenericFunction \ + FUNC_ARRAY_NAME(NAME)[] = { \ + FLOAT_ ## NAME, \ + DOUBLE_ ## NAME, \ + CFLOAT_ ## NAME, \ + CDOUBLE_ ## NAME \ + } + +/* There are problems with eig in complex single precision. + * That kernel is disabled + */ +#define GUFUNC_FUNC_ARRAY_EIG(NAME) \ + static PyUFuncGenericFunction \ + FUNC_ARRAY_NAME(NAME)[] = { \ + FLOAT_ ## NAME, \ + DOUBLE_ ## NAME, \ + CDOUBLE_ ## NAME \ + } + + +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(slogdet); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(det); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighlo); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighup); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshlo); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshup); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve1); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inv); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_lo); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_N); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_S); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_A); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(lstsq); +GUFUNC_FUNC_ARRAY_EIG(eig); +GUFUNC_FUNC_ARRAY_EIG(eigvals); + +static char equal_2_types[] = { + NPY_FLOAT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE, + NPY_CFLOAT, NPY_CFLOAT, + NPY_CDOUBLE, NPY_CDOUBLE +}; + +static char equal_3_types[] = { + NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, + NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT, + NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE +}; + +/* second result is logdet, that will always be a REAL */ +static char slogdet_types[] = { + NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, + NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, + NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE +}; + +static char eigh_types[] = { + NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, + NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, + NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE +}; + +static char eighvals_types[] = { + NPY_FLOAT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE, + NPY_CFLOAT, NPY_FLOAT, + NPY_CDOUBLE, NPY_DOUBLE +}; + +static char eig_types[] = { + NPY_FLOAT, NPY_CFLOAT, NPY_CFLOAT, + NPY_DOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, + NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE +}; + +static char eigvals_types[] = { + NPY_FLOAT, NPY_CFLOAT, + NPY_DOUBLE, NPY_CDOUBLE, + NPY_CDOUBLE, NPY_CDOUBLE +}; + +static char svd_1_1_types[] = { + NPY_FLOAT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE, + NPY_CFLOAT, NPY_FLOAT, + NPY_CDOUBLE, NPY_DOUBLE +}; + +static char svd_1_3_types[] = { + NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, + NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, + NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE +}; + +/* A, b, rcond, x, resid, rank, s, */ +static char lstsq_types[] = { + NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_INT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_INT, NPY_DOUBLE, + NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_INT, NPY_FLOAT, + NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_INT, NPY_DOUBLE, +}; + +typedef struct gufunc_descriptor_struct { + char *name; + char *signature; + char *doc; + int ntypes; + int nin; + int nout; + PyUFuncGenericFunction *funcs; + char *types; +} GUFUNC_DESCRIPTOR_t; + +GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = { + { + "slogdet", + "(m,m)->(),()", + "slogdet on the last two dimensions and broadcast on the rest. \n"\ + "Results in two arrays, one with sign and the other with log of the"\ + " determinants. \n"\ + " \"(m,m)->(),()\" \n", + 4, 1, 2, + FUNC_ARRAY_NAME(slogdet), + slogdet_types + }, + { + "det", + "(m,m)->()", + "det of the last two dimensions and broadcast on the rest. \n"\ + " \"(m,m)->()\" \n", + 4, 1, 1, + FUNC_ARRAY_NAME(det), + equal_2_types + }, + { + "eigh_lo", + "(m,m)->(m),(m,m)", + "eigh on the last two dimension and broadcast to the rest, using"\ + " lower triangle \n"\ + "Results in a vector of eigenvalues and a matrix with the"\ + "eigenvectors. \n"\ + " \"(m,m)->(m),(m,m)\" \n", + 4, 1, 2, + FUNC_ARRAY_NAME(eighlo), + eigh_types + }, + { + "eigh_up", + "(m,m)->(m),(m,m)", + "eigh on the last two dimension and broadcast to the rest, using"\ + " upper triangle. \n"\ + "Results in a vector of eigenvalues and a matrix with the"\ + " eigenvectors. \n"\ + " \"(m,m)->(m),(m,m)\" \n", + 4, 1, 2, + FUNC_ARRAY_NAME(eighup), + eigh_types + }, + { + "eigvalsh_lo", + "(m,m)->(m)", + "eigh on the last two dimension and broadcast to the rest, using"\ + " lower triangle. \n"\ + "Results in a vector of eigenvalues and a matrix with the"\ + "eigenvectors. \n"\ + " \"(m,m)->(m)\" \n", + 4, 1, 1, + FUNC_ARRAY_NAME(eigvalshlo), + eighvals_types + }, + { + "eigvalsh_up", + "(m,m)->(m)", + "eigvalsh on the last two dimension and broadcast to the rest,"\ + " using upper triangle. \n"\ + "Results in a vector of eigenvalues and a matrix with the"\ + "eigenvectors.\n"\ + " \"(m,m)->(m)\" \n", + 4, 1, 1, + FUNC_ARRAY_NAME(eigvalshup), + eighvals_types + }, + { + "solve", + "(m,m),(m,n)->(m,n)", + "solve the system a x = b, on the last two dimensions, broadcast"\ + " to the rest. \n"\ + "Results in a matrices with the solutions. \n"\ + " \"(m,m),(m,n)->(m,n)\" \n", + 4, 2, 1, + FUNC_ARRAY_NAME(solve), + equal_3_types + }, + { + "solve1", + "(m,m),(m)->(m)", + "solve the system a x = b, for b being a vector, broadcast in"\ + " the outer dimensions. \n"\ + "Results in vectors with the solutions. \n"\ + " \"(m,m),(m)->(m)\" \n", + 4, 2, 1, + FUNC_ARRAY_NAME(solve1), + equal_3_types + }, + { + "inv", + "(m, m)->(m, m)", + "compute the inverse of the last two dimensions and broadcast"\ + " to the rest. \n"\ + "Results in the inverse matrices. \n"\ + " \"(m,m)->(m,m)\" \n", + 4, 1, 1, + FUNC_ARRAY_NAME(inv), + equal_2_types + }, + { + "cholesky_lo", + "(m,m)->(m,m)", + "cholesky decomposition of hermitian positive-definite matrices. \n"\ + "Broadcast to all outer dimensions. \n"\ + " \"(m,m)->(m,m)\" \n", + 4, 1, 1, + FUNC_ARRAY_NAME(cholesky_lo), + equal_2_types + }, + { + "svd_m", + "(m,n)->(m)", + "svd when n>=m. ", + 4, 1, 1, + FUNC_ARRAY_NAME(svd_N), + svd_1_1_types + }, + { + "svd_n", + "(m,n)->(n)", + "svd when n<=m", + 4, 1, 1, + FUNC_ARRAY_NAME(svd_N), + svd_1_1_types + }, + { + "svd_m_s", + "(m,n)->(m,m),(m),(m,n)", + "svd when m<=n", + 4, 1, 3, + FUNC_ARRAY_NAME(svd_S), + svd_1_3_types + }, + { + "svd_n_s", + "(m,n)->(m,n),(n),(n,n)", + "svd when m>=n", + 4, 1, 3, + FUNC_ARRAY_NAME(svd_S), + svd_1_3_types + }, + { + "svd_m_f", + "(m,n)->(m,m),(m),(n,n)", + "svd when m<=n", + 4, 1, 3, + FUNC_ARRAY_NAME(svd_A), + svd_1_3_types + }, + { + "svd_n_f", + "(m,n)->(m,m),(n),(n,n)", + "svd when m>=n", + 4, 1, 3, + FUNC_ARRAY_NAME(svd_A), + svd_1_3_types + }, + { + "eig", + "(m,m)->(m),(m,m)", + "eig on the last two dimension and broadcast to the rest. \n"\ + "Results in a vector with the eigenvalues and a matrix with the"\ + " eigenvectors. \n"\ + " \"(m,m)->(m),(m,m)\" \n", + 3, 1, 2, + FUNC_ARRAY_NAME(eig), + eig_types + }, + { + "eigvals", + "(m,m)->(m)", + "eigvals on the last two dimension and broadcast to the rest. \n"\ + "Results in a vector of eigenvalues. \n", + 3, 1, 1, + FUNC_ARRAY_NAME(eigvals), + eigvals_types + }, + { + "lstsq_m", + "(m,n),(m,nrhs),()->(n,nrhs),(nrhs),(),(m)", + "least squares on the last two dimensions and broadcast to the rest. \n"\ + "For m <= n. \n", + 4, 3, 4, + FUNC_ARRAY_NAME(lstsq), + lstsq_types + }, + { + "lstsq_n", + "(m,n),(m,nrhs),()->(n,nrhs),(nrhs),(),(n)", + "least squares on the last two dimensions and broadcast to the rest. \n"\ + "For m >= n, meaning that residuals are produced. \n", + 4, 3, 4, + FUNC_ARRAY_NAME(lstsq), + lstsq_types + } +}; + +static void +addUfuncs(PyObject *dictionary) { + PyObject *f; + int i; + const int gufunc_count = sizeof(gufunc_descriptors)/ + sizeof(gufunc_descriptors[0]); + for (i = 0; i < gufunc_count; i++) { + GUFUNC_DESCRIPTOR_t* d = &gufunc_descriptors[i]; + f = PyUFunc_FromFuncAndDataAndSignature(d->funcs, + array_of_nulls, + d->types, + d->ntypes, + d->nin, + d->nout, + PyUFunc_None, + d->name, + d->doc, + 0, + d->signature); + PyDict_SetItemString(dictionary, d->name, f); +#if 0 + dump_ufunc_object((PyUFuncObject*) f); +#endif + Py_DECREF(f); + } +} + + + +/* -------------------------------------------------------------------------- */ + /* Module initialization stuff */ + +static PyMethodDef UMath_LinAlgMethods[] = { + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +#if defined(NPY_PY3K) +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + UMATH_LINALG_MODULE_NAME, + NULL, + -1, + UMath_LinAlgMethods, + NULL, + NULL, + NULL, + NULL +}; +#endif + +#if defined(NPY_PY3K) +#define RETVAL(x) x +PyObject *PyInit__umath_linalg(void) +#else +#define RETVAL(x) +PyMODINIT_FUNC +init_umath_linalg(void) +#endif +{ + PyObject *m; + PyObject *d; + PyObject *version; + + init_constants(); +#if defined(NPY_PY3K) + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule(UMATH_LINALG_MODULE_NAME, UMath_LinAlgMethods); +#endif + if (m == NULL) { + return RETVAL(NULL); + } + + import_array(); + import_ufunc(); + + d = PyModule_GetDict(m); + + version = PyString_FromString(umath_linalg_version_string); + PyDict_SetItemString(d, "__version__", version); + Py_DECREF(version); + + /* Load the ufunc operators into the module's namespace */ + addUfuncs(d); + + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, + "cannot load _umath_linalg module."); + return RETVAL(NULL); + } + + return RETVAL(m); +} diff --git a/contrib/python/numpy/py2/numpy/ma/setup.py b/contrib/python/numpy/py2/numpy/ma/setup.py new file mode 100644 index 0000000000..d1d6c89b51 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/ma/setup.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python +from __future__ import division, print_function + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('ma', parent_package, top_path) + config.add_data_dir('tests') + return config + +if __name__ == "__main__": + from numpy.distutils.core import setup + config = configuration(top_path='').todict() + setup(**config) diff --git a/contrib/python/numpy/py2/numpy/ma/tests/__init__.py b/contrib/python/numpy/py2/numpy/ma/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/ma/tests/test_core.py b/contrib/python/numpy/py2/numpy/ma/tests/test_core.py new file mode 100644 index 0000000000..e0dbf1b1af --- /dev/null +++ b/contrib/python/numpy/py2/numpy/ma/tests/test_core.py @@ -0,0 +1,5205 @@ +# pylint: disable-msg=W0400,W0511,W0611,W0612,W0614,R0201,E1102 +"""Tests suite for MaskedArray & subclassing. + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +""" +from __future__ import division, absolute_import, print_function + +__author__ = "Pierre GF Gerard-Marchant" + +import sys +import warnings +import operator +import itertools +import textwrap +import pytest + +from functools import reduce + + +import numpy as np +import numpy.ma.core +import numpy.core.fromnumeric as fromnumeric +import numpy.core.umath as umath +from numpy.testing import ( + assert_raises, assert_warns, suppress_warnings + ) +from numpy import ndarray +from numpy.compat import asbytes +from numpy.ma.testutils import ( + assert_, assert_array_equal, assert_equal, assert_almost_equal, + assert_equal_records, fail_if_equal, assert_not_equal, + assert_mask_equal + ) +from numpy.ma.core import ( + MAError, MaskError, MaskType, MaskedArray, abs, absolute, add, all, + allclose, allequal, alltrue, angle, anom, arange, arccos, arccosh, arctan2, + arcsin, arctan, argsort, array, asarray, choose, concatenate, + conjugate, cos, cosh, count, default_fill_value, diag, divide, empty, + empty_like, equal, exp, flatten_mask, filled, fix_invalid, + flatten_structured_array, fromflex, getmask, getmaskarray, greater, + greater_equal, identity, inner, isMaskedArray, less, less_equal, log, + log10, make_mask, make_mask_descr, mask_or, masked, masked_array, + masked_equal, masked_greater, masked_greater_equal, masked_inside, + masked_less, masked_less_equal, masked_not_equal, masked_outside, + masked_print_option, masked_values, masked_where, max, maximum, + maximum_fill_value, min, minimum, minimum_fill_value, mod, multiply, + mvoid, nomask, not_equal, ones, outer, power, product, put, putmask, + ravel, repeat, reshape, resize, shape, sin, sinh, sometrue, sort, sqrt, + subtract, sum, take, tan, tanh, transpose, where, zeros, + ) +from numpy.core.numeric import pickle + +pi = np.pi + + +suppress_copy_mask_on_assignment = suppress_warnings() +suppress_copy_mask_on_assignment.filter( + numpy.ma.core.MaskedArrayFutureWarning, + "setting an item on a masked array which has a shared mask will not copy") + + +# For parametrized numeric testing +num_dts = [np.dtype(dt_) for dt_ in '?bhilqBHILQefdgFD'] +num_ids = [dt_.char for dt_ in num_dts] + + +class TestMaskedArray(object): + # Base test class for MaskedArrays. + + def setup(self): + # Base data definition. + x = np.array([1., 1., 1., -2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + y = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.]) + a10 = 10. + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1] + xm = masked_array(x, mask=m1) + ym = masked_array(y, mask=m2) + z = np.array([-.5, 0., .5, .8]) + zm = masked_array(z, mask=[0, 1, 0, 0]) + xf = np.where(m1, 1e+20, x) + xm.set_fill_value(1e+20) + self.d = (x, y, a10, m1, m2, xm, ym, z, zm, xf) + + def test_basicattributes(self): + # Tests some basic array attributes. + a = array([1, 3, 2]) + b = array([1, 3, 2], mask=[1, 0, 1]) + assert_equal(a.ndim, 1) + assert_equal(b.ndim, 1) + assert_equal(a.size, 3) + assert_equal(b.size, 3) + assert_equal(a.shape, (3,)) + assert_equal(b.shape, (3,)) + + def test_basic0d(self): + # Checks masking a scalar + x = masked_array(0) + assert_equal(str(x), '0') + x = masked_array(0, mask=True) + assert_equal(str(x), str(masked_print_option)) + x = masked_array(0, mask=False) + assert_equal(str(x), '0') + x = array(0, mask=1) + assert_(x.filled().dtype is x._data.dtype) + + def test_basic1d(self): + # Test of basic array creation and properties in 1 dimension. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + assert_(not isMaskedArray(x)) + assert_(isMaskedArray(xm)) + assert_((xm - ym).filled(0).any()) + fail_if_equal(xm.mask.astype(int), ym.mask.astype(int)) + s = x.shape + assert_equal(np.shape(xm), s) + assert_equal(xm.shape, s) + assert_equal(xm.dtype, x.dtype) + assert_equal(zm.dtype, z.dtype) + assert_equal(xm.size, reduce(lambda x, y:x * y, s)) + assert_equal(count(xm), len(m1) - reduce(lambda x, y:x + y, m1)) + assert_array_equal(xm, xf) + assert_array_equal(filled(xm, 1.e20), xf) + assert_array_equal(x, xm) + + def test_basic2d(self): + # Test of basic array creation and properties in 2 dimensions. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + for s in [(4, 3), (6, 2)]: + x.shape = s + y.shape = s + xm.shape = s + ym.shape = s + xf.shape = s + + assert_(not isMaskedArray(x)) + assert_(isMaskedArray(xm)) + assert_equal(shape(xm), s) + assert_equal(xm.shape, s) + assert_equal(xm.size, reduce(lambda x, y:x * y, s)) + assert_equal(count(xm), len(m1) - reduce(lambda x, y:x + y, m1)) + assert_equal(xm, xf) + assert_equal(filled(xm, 1.e20), xf) + assert_equal(x, xm) + + def test_concatenate_basic(self): + # Tests concatenations. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + # basic concatenation + assert_equal(np.concatenate((x, y)), concatenate((xm, ym))) + assert_equal(np.concatenate((x, y)), concatenate((x, y))) + assert_equal(np.concatenate((x, y)), concatenate((xm, y))) + assert_equal(np.concatenate((x, y, x)), concatenate((x, ym, x))) + + def test_concatenate_alongaxis(self): + # Tests concatenations. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + # Concatenation along an axis + s = (3, 4) + x.shape = y.shape = xm.shape = ym.shape = s + assert_equal(xm.mask, np.reshape(m1, s)) + assert_equal(ym.mask, np.reshape(m2, s)) + xmym = concatenate((xm, ym), 1) + assert_equal(np.concatenate((x, y), 1), xmym) + assert_equal(np.concatenate((xm.mask, ym.mask), 1), xmym._mask) + + x = zeros(2) + y = array(ones(2), mask=[False, True]) + z = concatenate((x, y)) + assert_array_equal(z, [0, 0, 1, 1]) + assert_array_equal(z.mask, [False, False, False, True]) + z = concatenate((y, x)) + assert_array_equal(z, [1, 1, 0, 0]) + assert_array_equal(z.mask, [False, True, False, False]) + + def test_concatenate_flexible(self): + # Tests the concatenation on flexible arrays. + data = masked_array(list(zip(np.random.rand(10), + np.arange(10))), + dtype=[('a', float), ('b', int)]) + + test = concatenate([data[:5], data[5:]]) + assert_equal_records(test, data) + + def test_creation_ndmin(self): + # Check the use of ndmin + x = array([1, 2, 3], mask=[1, 0, 0], ndmin=2) + assert_equal(x.shape, (1, 3)) + assert_equal(x._data, [[1, 2, 3]]) + assert_equal(x._mask, [[1, 0, 0]]) + + def test_creation_ndmin_from_maskedarray(self): + # Make sure we're not losing the original mask w/ ndmin + x = array([1, 2, 3]) + x[-1] = masked + xx = array(x, ndmin=2, dtype=float) + assert_equal(x.shape, x._mask.shape) + assert_equal(xx.shape, xx._mask.shape) + + def test_creation_maskcreation(self): + # Tests how masks are initialized at the creation of Maskedarrays. + data = arange(24, dtype=float) + data[[3, 6, 15]] = masked + dma_1 = MaskedArray(data) + assert_equal(dma_1.mask, data.mask) + dma_2 = MaskedArray(dma_1) + assert_equal(dma_2.mask, dma_1.mask) + dma_3 = MaskedArray(dma_1, mask=[1, 0, 0, 0] * 6) + fail_if_equal(dma_3.mask, dma_1.mask) + + x = array([1, 2, 3], mask=True) + assert_equal(x._mask, [True, True, True]) + x = array([1, 2, 3], mask=False) + assert_equal(x._mask, [False, False, False]) + y = array([1, 2, 3], mask=x._mask, copy=False) + assert_(np.may_share_memory(x.mask, y.mask)) + y = array([1, 2, 3], mask=x._mask, copy=True) + assert_(not np.may_share_memory(x.mask, y.mask)) + + def test_creation_with_list_of_maskedarrays(self): + # Tests creating a masked array from a list of masked arrays. + x = array(np.arange(5), mask=[1, 0, 0, 0, 0]) + data = array((x, x[::-1])) + assert_equal(data, [[0, 1, 2, 3, 4], [4, 3, 2, 1, 0]]) + assert_equal(data._mask, [[1, 0, 0, 0, 0], [0, 0, 0, 0, 1]]) + + x.mask = nomask + data = array((x, x[::-1])) + assert_equal(data, [[0, 1, 2, 3, 4], [4, 3, 2, 1, 0]]) + assert_(data.mask is nomask) + + def test_creation_from_ndarray_with_padding(self): + x = np.array([('A', 0)], dtype={'names':['f0','f1'], + 'formats':['S4','i8'], + 'offsets':[0,8]}) + array(x) # used to fail due to 'V' padding field in x.dtype.descr + + def test_asarray(self): + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + xm.fill_value = -9999 + xm._hardmask = True + xmm = asarray(xm) + assert_equal(xmm._data, xm._data) + assert_equal(xmm._mask, xm._mask) + assert_equal(xmm.fill_value, xm.fill_value) + assert_equal(xmm._hardmask, xm._hardmask) + + def test_asarray_default_order(self): + # See Issue #6646 + m = np.eye(3).T + assert_(not m.flags.c_contiguous) + + new_m = asarray(m) + assert_(new_m.flags.c_contiguous) + + def test_asarray_enforce_order(self): + # See Issue #6646 + m = np.eye(3).T + assert_(not m.flags.c_contiguous) + + new_m = asarray(m, order='C') + assert_(new_m.flags.c_contiguous) + + def test_fix_invalid(self): + # Checks fix_invalid. + with np.errstate(invalid='ignore'): + data = masked_array([np.nan, 0., 1.], mask=[0, 0, 1]) + data_fixed = fix_invalid(data) + assert_equal(data_fixed._data, [data.fill_value, 0., 1.]) + assert_equal(data_fixed._mask, [1., 0., 1.]) + + def test_maskedelement(self): + # Test of masked element + x = arange(6) + x[1] = masked + assert_(str(masked) == '--') + assert_(x[1] is masked) + assert_equal(filled(x[1], 0), 0) + + def test_set_element_as_object(self): + # Tests setting elements with object + a = empty(1, dtype=object) + x = (1, 2, 3, 4, 5) + a[0] = x + assert_equal(a[0], x) + assert_(a[0] is x) + + import datetime + dt = datetime.datetime.now() + a[0] = dt + assert_(a[0] is dt) + + def test_indexing(self): + # Tests conversions and indexing + x1 = np.array([1, 2, 4, 3]) + x2 = array(x1, mask=[1, 0, 0, 0]) + x3 = array(x1, mask=[0, 1, 0, 1]) + x4 = array(x1) + # test conversion to strings + str(x2) # raises? + repr(x2) # raises? + assert_equal(np.sort(x1), sort(x2, endwith=False)) + # tests of indexing + assert_(type(x2[1]) is type(x1[1])) + assert_(x1[1] == x2[1]) + assert_(x2[0] is masked) + assert_equal(x1[2], x2[2]) + assert_equal(x1[2:5], x2[2:5]) + assert_equal(x1[:], x2[:]) + assert_equal(x1[1:], x3[1:]) + x1[2] = 9 + x2[2] = 9 + assert_equal(x1, x2) + x1[1:3] = 99 + x2[1:3] = 99 + assert_equal(x1, x2) + x2[1] = masked + assert_equal(x1, x2) + x2[1:3] = masked + assert_equal(x1, x2) + x2[:] = x1 + x2[1] = masked + assert_(allequal(getmask(x2), array([0, 1, 0, 0]))) + x3[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0]) + assert_(allequal(getmask(x3), array([0, 1, 1, 0]))) + x4[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0]) + assert_(allequal(getmask(x4), array([0, 1, 1, 0]))) + assert_(allequal(x4, array([1, 2, 3, 4]))) + x1 = np.arange(5) * 1.0 + x2 = masked_values(x1, 3.0) + assert_equal(x1, x2) + assert_(allequal(array([0, 0, 0, 1, 0], MaskType), x2.mask)) + assert_equal(3.0, x2.fill_value) + x1 = array([1, 'hello', 2, 3], object) + x2 = np.array([1, 'hello', 2, 3], object) + s1 = x1[1] + s2 = x2[1] + assert_equal(type(s2), str) + assert_equal(type(s1), str) + assert_equal(s1, s2) + assert_(x1[1:1].shape == (0,)) + + @suppress_copy_mask_on_assignment + def test_copy(self): + # Tests of some subtle points of copying and sizing. + n = [0, 0, 1, 0, 0] + m = make_mask(n) + m2 = make_mask(m) + assert_(m is m2) + m3 = make_mask(m, copy=1) + assert_(m is not m3) + + x1 = np.arange(5) + y1 = array(x1, mask=m) + assert_equal(y1._data.__array_interface__, x1.__array_interface__) + assert_(allequal(x1, y1.data)) + assert_equal(y1._mask.__array_interface__, m.__array_interface__) + + y1a = array(y1) + # Default for masked array is not to copy; see gh-10318. + assert_(y1a._data.__array_interface__ == + y1._data.__array_interface__) + assert_(y1a._mask.__array_interface__ == + y1._mask.__array_interface__) + + y2 = array(x1, mask=m3) + assert_(y2._data.__array_interface__ == x1.__array_interface__) + assert_(y2._mask.__array_interface__ == m3.__array_interface__) + assert_(y2[2] is masked) + y2[2] = 9 + assert_(y2[2] is not masked) + assert_(y2._mask.__array_interface__ == m3.__array_interface__) + assert_(allequal(y2.mask, 0)) + + y2a = array(x1, mask=m, copy=1) + assert_(y2a._data.__array_interface__ != x1.__array_interface__) + #assert_( y2a.mask is not m) + assert_(y2a._mask.__array_interface__ != m.__array_interface__) + assert_(y2a[2] is masked) + y2a[2] = 9 + assert_(y2a[2] is not masked) + #assert_( y2a.mask is not m) + assert_(y2a._mask.__array_interface__ != m.__array_interface__) + assert_(allequal(y2a.mask, 0)) + + y3 = array(x1 * 1.0, mask=m) + assert_(filled(y3).dtype is (x1 * 1.0).dtype) + + x4 = arange(4) + x4[2] = masked + y4 = resize(x4, (8,)) + assert_equal(concatenate([x4, x4]), y4) + assert_equal(getmask(y4), [0, 0, 1, 0, 0, 0, 1, 0]) + y5 = repeat(x4, (2, 2, 2, 2), axis=0) + assert_equal(y5, [0, 0, 1, 1, 2, 2, 3, 3]) + y6 = repeat(x4, 2, axis=0) + assert_equal(y5, y6) + y7 = x4.repeat((2, 2, 2, 2), axis=0) + assert_equal(y5, y7) + y8 = x4.repeat(2, 0) + assert_equal(y5, y8) + + y9 = x4.copy() + assert_equal(y9._data, x4._data) + assert_equal(y9._mask, x4._mask) + + x = masked_array([1, 2, 3], mask=[0, 1, 0]) + # Copy is False by default + y = masked_array(x) + assert_equal(y._data.ctypes.data, x._data.ctypes.data) + assert_equal(y._mask.ctypes.data, x._mask.ctypes.data) + y = masked_array(x, copy=True) + assert_not_equal(y._data.ctypes.data, x._data.ctypes.data) + assert_not_equal(y._mask.ctypes.data, x._mask.ctypes.data) + + def test_copy_0d(self): + # gh-9430 + x = np.ma.array(43, mask=True) + xc = x.copy() + assert_equal(xc.mask, True) + + def test_copy_on_python_builtins(self): + # Tests copy works on python builtins (issue#8019) + assert_(isMaskedArray(np.ma.copy([1,2,3]))) + assert_(isMaskedArray(np.ma.copy((1,2,3)))) + + def test_copy_immutable(self): + # Tests that the copy method is immutable, GitHub issue #5247 + a = np.ma.array([1, 2, 3]) + b = np.ma.array([4, 5, 6]) + a_copy_method = a.copy + b.copy + assert_equal(a_copy_method(), [1, 2, 3]) + + def test_deepcopy(self): + from copy import deepcopy + a = array([0, 1, 2], mask=[False, True, False]) + copied = deepcopy(a) + assert_equal(copied.mask, a.mask) + assert_not_equal(id(a._mask), id(copied._mask)) + + copied[1] = 1 + assert_equal(copied.mask, [0, 0, 0]) + assert_equal(a.mask, [0, 1, 0]) + + copied = deepcopy(a) + assert_equal(copied.mask, a.mask) + copied.mask[1] = False + assert_equal(copied.mask, [0, 0, 0]) + assert_equal(a.mask, [0, 1, 0]) + + def test_str_repr(self): + a = array([0, 1, 2], mask=[False, True, False]) + assert_equal(str(a), '[0 -- 2]') + assert_equal( + repr(a), + textwrap.dedent('''\ + masked_array(data=[0, --, 2], + mask=[False, True, False], + fill_value=999999)''') + ) + + # arrays with a continuation + a = np.ma.arange(2000) + a[1:50] = np.ma.masked + assert_equal( + repr(a), + textwrap.dedent('''\ + masked_array(data=[0, --, --, ..., 1997, 1998, 1999], + mask=[False, True, True, ..., False, False, False], + fill_value=999999)''') + ) + + # line-wrapped 1d arrays are correctly aligned + a = np.ma.arange(20) + assert_equal( + repr(a), + textwrap.dedent('''\ + masked_array(data=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19], + mask=False, + fill_value=999999)''') + ) + + # 2d arrays cause wrapping + a = array([[1, 2, 3], [4, 5, 6]], dtype=np.int8) + a[1,1] = np.ma.masked + assert_equal( + repr(a), + textwrap.dedent('''\ + masked_array( + data=[[1, 2, 3], + [4, --, 6]], + mask=[[False, False, False], + [False, True, False]], + fill_value=999999, + dtype=int8)''') + ) + + # but not it they're a row vector + assert_equal( + repr(a[:1]), + textwrap.dedent('''\ + masked_array(data=[[1, 2, 3]], + mask=[[False, False, False]], + fill_value=999999, + dtype=int8)''') + ) + + # dtype=int is implied, so not shown + assert_equal( + repr(a.astype(int)), + textwrap.dedent('''\ + masked_array( + data=[[1, 2, 3], + [4, --, 6]], + mask=[[False, False, False], + [False, True, False]], + fill_value=999999)''') + ) + + def test_str_repr_legacy(self): + oldopts = np.get_printoptions() + np.set_printoptions(legacy='1.13') + try: + a = array([0, 1, 2], mask=[False, True, False]) + assert_equal(str(a), '[0 -- 2]') + assert_equal(repr(a), 'masked_array(data = [0 -- 2],\n' + ' mask = [False True False],\n' + ' fill_value = 999999)\n') + + a = np.ma.arange(2000) + a[1:50] = np.ma.masked + assert_equal( + repr(a), + 'masked_array(data = [0 -- -- ..., 1997 1998 1999],\n' + ' mask = [False True True ..., False False False],\n' + ' fill_value = 999999)\n' + ) + finally: + np.set_printoptions(**oldopts) + + def test_0d_unicode(self): + u = u'caf\xe9' + utype = type(u) + + arr_nomask = np.ma.array(u) + arr_masked = np.ma.array(u, mask=True) + + assert_equal(utype(arr_nomask), u) + assert_equal(utype(arr_masked), u'--') + + def test_pickling(self): + # Tests pickling + for dtype in (int, float, str, object): + a = arange(10).astype(dtype) + a.fill_value = 999 + + masks = ([0, 0, 0, 1, 0, 1, 0, 1, 0, 1], # partially masked + True, # Fully masked + False) # Fully unmasked + + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + for mask in masks: + a.mask = mask + a_pickled = pickle.loads(pickle.dumps(a, protocol=proto)) + assert_equal(a_pickled._mask, a._mask) + assert_equal(a_pickled._data, a._data) + if dtype in (object, int): + assert_equal(a_pickled.fill_value, 999) + else: + assert_equal(a_pickled.fill_value, dtype(999)) + assert_array_equal(a_pickled.mask, mask) + + def test_pickling_subbaseclass(self): + # Test pickling w/ a subclass of ndarray + x = np.array([(1.0, 2), (3.0, 4)], + dtype=[('x', float), ('y', int)]).view(np.recarray) + a = masked_array(x, mask=[(True, False), (False, True)]) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + a_pickled = pickle.loads(pickle.dumps(a, protocol=proto)) + assert_equal(a_pickled._mask, a._mask) + assert_equal(a_pickled, a) + assert_(isinstance(a_pickled._data, np.recarray)) + + def test_pickling_maskedconstant(self): + # Test pickling MaskedConstant + mc = np.ma.masked + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + mc_pickled = pickle.loads(pickle.dumps(mc, protocol=proto)) + assert_equal(mc_pickled._baseclass, mc._baseclass) + assert_equal(mc_pickled._mask, mc._mask) + assert_equal(mc_pickled._data, mc._data) + + def test_pickling_wstructured(self): + # Tests pickling w/ structured array + a = array([(1, 1.), (2, 2.)], mask=[(0, 0), (0, 1)], + dtype=[('a', int), ('b', float)]) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + a_pickled = pickle.loads(pickle.dumps(a, protocol=proto)) + assert_equal(a_pickled._mask, a._mask) + assert_equal(a_pickled, a) + + def test_pickling_keepalignment(self): + # Tests pickling w/ F_CONTIGUOUS arrays + a = arange(10) + a.shape = (-1, 2) + b = a.T + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + test = pickle.loads(pickle.dumps(b, protocol=proto)) + assert_equal(test, b) + + def test_single_element_subscript(self): + # Tests single element subscripts of Maskedarrays. + a = array([1, 3, 2]) + b = array([1, 3, 2], mask=[1, 0, 1]) + assert_equal(a[0].shape, ()) + assert_equal(b[0].shape, ()) + assert_equal(b[1].shape, ()) + + def test_topython(self): + # Tests some communication issues with Python. + assert_equal(1, int(array(1))) + assert_equal(1.0, float(array(1))) + assert_equal(1, int(array([[[1]]]))) + assert_equal(1.0, float(array([[1]]))) + assert_raises(TypeError, float, array([1, 1])) + + with suppress_warnings() as sup: + sup.filter(UserWarning, 'Warning: converting a masked element') + assert_(np.isnan(float(array([1], mask=[1])))) + + a = array([1, 2, 3], mask=[1, 0, 0]) + assert_raises(TypeError, lambda: float(a)) + assert_equal(float(a[-1]), 3.) + assert_(np.isnan(float(a[0]))) + assert_raises(TypeError, int, a) + assert_equal(int(a[-1]), 3) + assert_raises(MAError, lambda:int(a[0])) + + def test_oddfeatures_1(self): + # Test of other odd features + x = arange(20) + x = x.reshape(4, 5) + x.flat[5] = 12 + assert_(x[1, 0] == 12) + z = x + 10j * x + assert_equal(z.real, x) + assert_equal(z.imag, 10 * x) + assert_equal((z * conjugate(z)).real, 101 * x * x) + z.imag[...] = 0.0 + + x = arange(10) + x[3] = masked + assert_(str(x[3]) == str(masked)) + c = x >= 8 + assert_(count(where(c, masked, masked)) == 0) + assert_(shape(where(c, masked, masked)) == c.shape) + + z = masked_where(c, x) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is not masked) + assert_(z[7] is not masked) + assert_(z[8] is masked) + assert_(z[9] is masked) + assert_equal(x, z) + + def test_oddfeatures_2(self): + # Tests some more features. + x = array([1., 2., 3., 4., 5.]) + c = array([1, 1, 1, 0, 0]) + x[2] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + c[0] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + assert_(z[0] is masked) + assert_(z[1] is not masked) + assert_(z[2] is masked) + + @suppress_copy_mask_on_assignment + def test_oddfeatures_3(self): + # Tests some generic features + atest = array([10], mask=True) + btest = array([20]) + idx = atest.mask + atest[idx] = btest[idx] + assert_equal(atest, [20]) + + def test_filled_with_object_dtype(self): + a = np.ma.masked_all(1, dtype='O') + assert_equal(a.filled('x')[0], 'x') + + def test_filled_with_flexible_dtype(self): + # Test filled w/ flexible dtype + flexi = array([(1, 1, 1)], + dtype=[('i', int), ('s', '|S8'), ('f', float)]) + flexi[0] = masked + assert_equal(flexi.filled(), + np.array([(default_fill_value(0), + default_fill_value('0'), + default_fill_value(0.),)], dtype=flexi.dtype)) + flexi[0] = masked + assert_equal(flexi.filled(1), + np.array([(1, '1', 1.)], dtype=flexi.dtype)) + + def test_filled_with_mvoid(self): + # Test filled w/ mvoid + ndtype = [('a', int), ('b', float)] + a = mvoid((1, 2.), mask=[(0, 1)], dtype=ndtype) + # Filled using default + test = a.filled() + assert_equal(tuple(test), (1, default_fill_value(1.))) + # Explicit fill_value + test = a.filled((-1, -1)) + assert_equal(tuple(test), (1, -1)) + # Using predefined filling values + a.fill_value = (-999, -999) + assert_equal(tuple(a.filled()), (1, -999)) + + def test_filled_with_nested_dtype(self): + # Test filled w/ nested dtype + ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] + a = array([(1, (1, 1)), (2, (2, 2))], + mask=[(0, (1, 0)), (0, (0, 1))], dtype=ndtype) + test = a.filled(0) + control = np.array([(1, (0, 1)), (2, (2, 0))], dtype=ndtype) + assert_equal(test, control) + + test = a['B'].filled(0) + control = np.array([(0, 1), (2, 0)], dtype=a['B'].dtype) + assert_equal(test, control) + + # test if mask gets set correctly (see #6760) + Z = numpy.ma.zeros(2, numpy.dtype([("A", "(2,2)i1,(2,2)i1", (2,2))])) + assert_equal(Z.data.dtype, numpy.dtype([('A', [('f0', 'i1', (2, 2)), + ('f1', 'i1', (2, 2))], (2, 2))])) + assert_equal(Z.mask.dtype, numpy.dtype([('A', [('f0', '?', (2, 2)), + ('f1', '?', (2, 2))], (2, 2))])) + + def test_filled_with_f_order(self): + # Test filled w/ F-contiguous array + a = array(np.array([(0, 1, 2), (4, 5, 6)], order='F'), + mask=np.array([(0, 0, 1), (1, 0, 0)], order='F'), + order='F') # this is currently ignored + assert_(a.flags['F_CONTIGUOUS']) + assert_(a.filled(0).flags['F_CONTIGUOUS']) + + def test_optinfo_propagation(self): + # Checks that _optinfo dictionary isn't back-propagated + x = array([1, 2, 3, ], dtype=float) + x._optinfo['info'] = '???' + y = x.copy() + assert_equal(y._optinfo['info'], '???') + y._optinfo['info'] = '!!!' + assert_equal(x._optinfo['info'], '???') + + def test_optinfo_forward_propagation(self): + a = array([1,2,2,4]) + a._optinfo["key"] = "value" + assert_equal(a._optinfo["key"], (a == 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a != 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a > 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a >= 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a <= 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a + 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a - 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a * 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], (a / 2)._optinfo["key"]) + assert_equal(a._optinfo["key"], a[:2]._optinfo["key"]) + assert_equal(a._optinfo["key"], a[[0,0,2]]._optinfo["key"]) + assert_equal(a._optinfo["key"], np.exp(a)._optinfo["key"]) + assert_equal(a._optinfo["key"], np.abs(a)._optinfo["key"]) + assert_equal(a._optinfo["key"], array(a, copy=True)._optinfo["key"]) + assert_equal(a._optinfo["key"], np.zeros_like(a)._optinfo["key"]) + + def test_fancy_printoptions(self): + # Test printing a masked array w/ fancy dtype. + fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) + test = array([(1, (2, 3.0)), (4, (5, 6.0))], + mask=[(1, (0, 1)), (0, (1, 0))], + dtype=fancydtype) + control = "[(--, (2, --)) (4, (--, 6.0))]" + assert_equal(str(test), control) + + # Test 0-d array with multi-dimensional dtype + t_2d0 = masked_array(data = (0, [[0.0, 0.0, 0.0], + [0.0, 0.0, 0.0]], + 0.0), + mask = (False, [[True, False, True], + [False, False, True]], + False), + dtype = "int, (2,3)float, float") + control = "(0, [[--, 0.0, --], [0.0, 0.0, --]], 0.0)" + assert_equal(str(t_2d0), control) + + def test_flatten_structured_array(self): + # Test flatten_structured_array on arrays + # On ndarray + ndtype = [('a', int), ('b', float)] + a = np.array([(1, 1), (2, 2)], dtype=ndtype) + test = flatten_structured_array(a) + control = np.array([[1., 1.], [2., 2.]], dtype=float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + # On masked_array + a = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) + test = flatten_structured_array(a) + control = array([[1., 1.], [2., 2.]], + mask=[[0, 1], [1, 0]], dtype=float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + assert_equal(test.mask, control.mask) + # On masked array with nested structure + ndtype = [('a', int), ('b', [('ba', int), ('bb', float)])] + a = array([(1, (1, 1.1)), (2, (2, 2.2))], + mask=[(0, (1, 0)), (1, (0, 1))], dtype=ndtype) + test = flatten_structured_array(a) + control = array([[1., 1., 1.1], [2., 2., 2.2]], + mask=[[0, 1, 0], [1, 0, 1]], dtype=float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + assert_equal(test.mask, control.mask) + # Keeping the initial shape + ndtype = [('a', int), ('b', float)] + a = np.array([[(1, 1), ], [(2, 2), ]], dtype=ndtype) + test = flatten_structured_array(a) + control = np.array([[[1., 1.], ], [[2., 2.], ]], dtype=float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + + def test_void0d(self): + # Test creating a mvoid object + ndtype = [('a', int), ('b', int)] + a = np.array([(1, 2,)], dtype=ndtype)[0] + f = mvoid(a) + assert_(isinstance(f, mvoid)) + + a = masked_array([(1, 2)], mask=[(1, 0)], dtype=ndtype)[0] + assert_(isinstance(a, mvoid)) + + a = masked_array([(1, 2), (1, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) + f = mvoid(a._data[0], a._mask[0]) + assert_(isinstance(f, mvoid)) + + def test_mvoid_getitem(self): + # Test mvoid.__getitem__ + ndtype = [('a', int), ('b', int)] + a = masked_array([(1, 2,), (3, 4)], mask=[(0, 0), (1, 0)], + dtype=ndtype) + # w/o mask + f = a[0] + assert_(isinstance(f, mvoid)) + assert_equal((f[0], f['a']), (1, 1)) + assert_equal(f['b'], 2) + # w/ mask + f = a[1] + assert_(isinstance(f, mvoid)) + assert_(f[0] is masked) + assert_(f['a'] is masked) + assert_equal(f[1], 4) + + # exotic dtype + A = masked_array(data=[([0,1],)], + mask=[([True, False],)], + dtype=[("A", ">i2", (2,))]) + assert_equal(A[0]["A"], A["A"][0]) + assert_equal(A[0]["A"], masked_array(data=[0, 1], + mask=[True, False], dtype=">i2")) + + def test_mvoid_iter(self): + # Test iteration on __getitem__ + ndtype = [('a', int), ('b', int)] + a = masked_array([(1, 2,), (3, 4)], mask=[(0, 0), (1, 0)], + dtype=ndtype) + # w/o mask + assert_equal(list(a[0]), [1, 2]) + # w/ mask + assert_equal(list(a[1]), [masked, 4]) + + def test_mvoid_print(self): + # Test printing a mvoid + mx = array([(1, 1), (2, 2)], dtype=[('a', int), ('b', int)]) + assert_equal(str(mx[0]), "(1, 1)") + mx['b'][0] = masked + ini_display = masked_print_option._display + masked_print_option.set_display("-X-") + try: + assert_equal(str(mx[0]), "(1, -X-)") + assert_equal(repr(mx[0]), "(1, -X-)") + finally: + masked_print_option.set_display(ini_display) + + # also check if there are object datatypes (see gh-7493) + mx = array([(1,), (2,)], dtype=[('a', 'O')]) + assert_equal(str(mx[0]), "(1,)") + + def test_mvoid_multidim_print(self): + + # regression test for gh-6019 + t_ma = masked_array(data = [([1, 2, 3],)], + mask = [([False, True, False],)], + fill_value = ([999999, 999999, 999999],), + dtype = [('a', '<i4', (3,))]) + assert_(str(t_ma[0]) == "([1, --, 3],)") + assert_(repr(t_ma[0]) == "([1, --, 3],)") + + # additional tests with structured arrays + + t_2d = masked_array(data = [([[1, 2], [3,4]],)], + mask = [([[False, True], [True, False]],)], + dtype = [('a', '<i4', (2,2))]) + assert_(str(t_2d[0]) == "([[1, --], [--, 4]],)") + assert_(repr(t_2d[0]) == "([[1, --], [--, 4]],)") + + t_0d = masked_array(data = [(1,2)], + mask = [(True,False)], + dtype = [('a', '<i4'), ('b', '<i4')]) + assert_(str(t_0d[0]) == "(--, 2)") + assert_(repr(t_0d[0]) == "(--, 2)") + + t_2d = masked_array(data = [([[1, 2], [3,4]], 1)], + mask = [([[False, True], [True, False]], False)], + dtype = [('a', '<i4', (2,2)), ('b', float)]) + assert_(str(t_2d[0]) == "([[1, --], [--, 4]], 1.0)") + assert_(repr(t_2d[0]) == "([[1, --], [--, 4]], 1.0)") + + t_ne = masked_array(data=[(1, (1, 1))], + mask=[(True, (True, False))], + dtype = [('a', '<i4'), ('b', 'i4,i4')]) + assert_(str(t_ne[0]) == "(--, (--, 1))") + assert_(repr(t_ne[0]) == "(--, (--, 1))") + + def test_object_with_array(self): + mx1 = masked_array([1.], mask=[True]) + mx2 = masked_array([1., 2.]) + mx = masked_array([mx1, mx2], mask=[False, True]) + assert_(mx[0] is mx1) + assert_(mx[1] is not mx2) + assert_(np.all(mx[1].data == mx2.data)) + assert_(np.all(mx[1].mask)) + # check that we return a view. + mx[1].data[0] = 0. + assert_(mx2[0] == 0.) + + +class TestMaskedArrayArithmetic(object): + # Base test class for MaskedArrays. + + def setup(self): + # Base data definition. + x = np.array([1., 1., 1., -2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + y = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.]) + a10 = 10. + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1] + xm = masked_array(x, mask=m1) + ym = masked_array(y, mask=m2) + z = np.array([-.5, 0., .5, .8]) + zm = masked_array(z, mask=[0, 1, 0, 0]) + xf = np.where(m1, 1e+20, x) + xm.set_fill_value(1e+20) + self.d = (x, y, a10, m1, m2, xm, ym, z, zm, xf) + self.err_status = np.geterr() + np.seterr(divide='ignore', invalid='ignore') + + def teardown(self): + np.seterr(**self.err_status) + + def test_basic_arithmetic(self): + # Test of basic arithmetic. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + a2d = array([[1, 2], [0, 4]]) + a2dm = masked_array(a2d, [[0, 0], [1, 0]]) + assert_equal(a2d * a2d, a2d * a2dm) + assert_equal(a2d + a2d, a2d + a2dm) + assert_equal(a2d - a2d, a2d - a2dm) + for s in [(12,), (4, 3), (2, 6)]: + x = x.reshape(s) + y = y.reshape(s) + xm = xm.reshape(s) + ym = ym.reshape(s) + xf = xf.reshape(s) + assert_equal(-x, -xm) + assert_equal(x + y, xm + ym) + assert_equal(x - y, xm - ym) + assert_equal(x * y, xm * ym) + assert_equal(x / y, xm / ym) + assert_equal(a10 + y, a10 + ym) + assert_equal(a10 - y, a10 - ym) + assert_equal(a10 * y, a10 * ym) + assert_equal(a10 / y, a10 / ym) + assert_equal(x + a10, xm + a10) + assert_equal(x - a10, xm - a10) + assert_equal(x * a10, xm * a10) + assert_equal(x / a10, xm / a10) + assert_equal(x ** 2, xm ** 2) + assert_equal(abs(x) ** 2.5, abs(xm) ** 2.5) + assert_equal(x ** y, xm ** ym) + assert_equal(np.add(x, y), add(xm, ym)) + assert_equal(np.subtract(x, y), subtract(xm, ym)) + assert_equal(np.multiply(x, y), multiply(xm, ym)) + assert_equal(np.divide(x, y), divide(xm, ym)) + + def test_divide_on_different_shapes(self): + x = arange(6, dtype=float) + x.shape = (2, 3) + y = arange(3, dtype=float) + + z = x / y + assert_equal(z, [[-1., 1., 1.], [-1., 4., 2.5]]) + assert_equal(z.mask, [[1, 0, 0], [1, 0, 0]]) + + z = x / y[None,:] + assert_equal(z, [[-1., 1., 1.], [-1., 4., 2.5]]) + assert_equal(z.mask, [[1, 0, 0], [1, 0, 0]]) + + y = arange(2, dtype=float) + z = x / y[:, None] + assert_equal(z, [[-1., -1., -1.], [3., 4., 5.]]) + assert_equal(z.mask, [[1, 1, 1], [0, 0, 0]]) + + def test_mixed_arithmetic(self): + # Tests mixed arithmetics. + na = np.array([1]) + ma = array([1]) + assert_(isinstance(na + ma, MaskedArray)) + assert_(isinstance(ma + na, MaskedArray)) + + def test_limits_arithmetic(self): + tiny = np.finfo(float).tiny + a = array([tiny, 1. / tiny, 0.]) + assert_equal(getmaskarray(a / 2), [0, 0, 0]) + assert_equal(getmaskarray(2 / a), [1, 0, 1]) + + def test_masked_singleton_arithmetic(self): + # Tests some scalar arithmetics on MaskedArrays. + # Masked singleton should remain masked no matter what + xm = array(0, mask=1) + assert_((1 / array(0)).mask) + assert_((1 + xm).mask) + assert_((-xm).mask) + assert_(maximum(xm, xm).mask) + assert_(minimum(xm, xm).mask) + + def test_masked_singleton_equality(self): + # Tests (in)equality on masked singleton + a = array([1, 2, 3], mask=[1, 1, 0]) + assert_((a[0] == 0) is masked) + assert_((a[0] != 0) is masked) + assert_equal((a[-1] == 0), False) + assert_equal((a[-1] != 0), True) + + def test_arithmetic_with_masked_singleton(self): + # Checks that there's no collapsing to masked + x = masked_array([1, 2]) + y = x * masked + assert_equal(y.shape, x.shape) + assert_equal(y._mask, [True, True]) + y = x[0] * masked + assert_(y is masked) + y = x + masked + assert_equal(y.shape, x.shape) + assert_equal(y._mask, [True, True]) + + def test_arithmetic_with_masked_singleton_on_1d_singleton(self): + # Check that we're not losing the shape of a singleton + x = masked_array([1, ]) + y = x + masked + assert_equal(y.shape, x.shape) + assert_equal(y.mask, [True, ]) + + def test_scalar_arithmetic(self): + x = array(0, mask=0) + assert_equal(x.filled().ctypes.data, x.ctypes.data) + # Make sure we don't lose the shape in some circumstances + xm = array((0, 0)) / 0. + assert_equal(xm.shape, (2,)) + assert_equal(xm.mask, [1, 1]) + + def test_basic_ufuncs(self): + # Test various functions such as sin, cos. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + assert_equal(np.cos(x), cos(xm)) + assert_equal(np.cosh(x), cosh(xm)) + assert_equal(np.sin(x), sin(xm)) + assert_equal(np.sinh(x), sinh(xm)) + assert_equal(np.tan(x), tan(xm)) + assert_equal(np.tanh(x), tanh(xm)) + assert_equal(np.sqrt(abs(x)), sqrt(xm)) + assert_equal(np.log(abs(x)), log(xm)) + assert_equal(np.log10(abs(x)), log10(xm)) + assert_equal(np.exp(x), exp(xm)) + assert_equal(np.arcsin(z), arcsin(zm)) + assert_equal(np.arccos(z), arccos(zm)) + assert_equal(np.arctan(z), arctan(zm)) + assert_equal(np.arctan2(x, y), arctan2(xm, ym)) + assert_equal(np.absolute(x), absolute(xm)) + assert_equal(np.angle(x + 1j*y), angle(xm + 1j*ym)) + assert_equal(np.angle(x + 1j*y, deg=True), angle(xm + 1j*ym, deg=True)) + assert_equal(np.equal(x, y), equal(xm, ym)) + assert_equal(np.not_equal(x, y), not_equal(xm, ym)) + assert_equal(np.less(x, y), less(xm, ym)) + assert_equal(np.greater(x, y), greater(xm, ym)) + assert_equal(np.less_equal(x, y), less_equal(xm, ym)) + assert_equal(np.greater_equal(x, y), greater_equal(xm, ym)) + assert_equal(np.conjugate(x), conjugate(xm)) + + def test_count_func(self): + # Tests count + assert_equal(1, count(1)) + assert_equal(0, array(1, mask=[1])) + + ott = array([0., 1., 2., 3.], mask=[1, 0, 0, 0]) + res = count(ott) + assert_(res.dtype.type is np.intp) + assert_equal(3, res) + + ott = ott.reshape((2, 2)) + res = count(ott) + assert_(res.dtype.type is np.intp) + assert_equal(3, res) + res = count(ott, 0) + assert_(isinstance(res, ndarray)) + assert_equal([1, 2], res) + assert_(getmask(res) is nomask) + + ott = array([0., 1., 2., 3.]) + res = count(ott, 0) + assert_(isinstance(res, ndarray)) + assert_(res.dtype.type is np.intp) + assert_raises(np.AxisError, ott.count, axis=1) + + def test_count_on_python_builtins(self): + # Tests count works on python builtins (issue#8019) + assert_equal(3, count([1,2,3])) + assert_equal(2, count((1,2))) + + def test_minmax_func(self): + # Tests minimum and maximum. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + # max doesn't work if shaped + xr = np.ravel(x) + xmr = ravel(xm) + # following are true because of careful selection of data + assert_equal(max(xr), maximum.reduce(xmr)) + assert_equal(min(xr), minimum.reduce(xmr)) + + assert_equal(minimum([1, 2, 3], [4, 0, 9]), [1, 0, 3]) + assert_equal(maximum([1, 2, 3], [4, 0, 9]), [4, 2, 9]) + x = arange(5) + y = arange(5) - 2 + x[3] = masked + y[0] = masked + assert_equal(minimum(x, y), where(less(x, y), x, y)) + assert_equal(maximum(x, y), where(greater(x, y), x, y)) + assert_(minimum.reduce(x) == 0) + assert_(maximum.reduce(x) == 4) + + x = arange(4).reshape(2, 2) + x[-1, -1] = masked + assert_equal(maximum.reduce(x, axis=None), 2) + + def test_minimummaximum_func(self): + a = np.ones((2, 2)) + aminimum = minimum(a, a) + assert_(isinstance(aminimum, MaskedArray)) + assert_equal(aminimum, np.minimum(a, a)) + + aminimum = minimum.outer(a, a) + assert_(isinstance(aminimum, MaskedArray)) + assert_equal(aminimum, np.minimum.outer(a, a)) + + amaximum = maximum(a, a) + assert_(isinstance(amaximum, MaskedArray)) + assert_equal(amaximum, np.maximum(a, a)) + + amaximum = maximum.outer(a, a) + assert_(isinstance(amaximum, MaskedArray)) + assert_equal(amaximum, np.maximum.outer(a, a)) + + def test_minmax_reduce(self): + # Test np.min/maximum.reduce on array w/ full False mask + a = array([1, 2, 3], mask=[False, False, False]) + b = np.maximum.reduce(a) + assert_equal(b, 3) + + def test_minmax_funcs_with_output(self): + # Tests the min/max functions with explicit outputs + mask = np.random.rand(12).round() + xm = array(np.random.uniform(0, 10, 12), mask=mask) + xm.shape = (3, 4) + for funcname in ('min', 'max'): + # Initialize + npfunc = getattr(np, funcname) + mafunc = getattr(numpy.ma.core, funcname) + # Use the np version + nout = np.empty((4,), dtype=int) + try: + result = npfunc(xm, axis=0, out=nout) + except MaskError: + pass + nout = np.empty((4,), dtype=float) + result = npfunc(xm, axis=0, out=nout) + assert_(result is nout) + # Use the ma version + nout.fill(-999) + result = mafunc(xm, axis=0, out=nout) + assert_(result is nout) + + def test_minmax_methods(self): + # Additional tests on max/min + (_, _, _, _, _, xm, _, _, _, _) = self.d + xm.shape = (xm.size,) + assert_equal(xm.max(), 10) + assert_(xm[0].max() is masked) + assert_(xm[0].max(0) is masked) + assert_(xm[0].max(-1) is masked) + assert_equal(xm.min(), -10.) + assert_(xm[0].min() is masked) + assert_(xm[0].min(0) is masked) + assert_(xm[0].min(-1) is masked) + assert_equal(xm.ptp(), 20.) + assert_(xm[0].ptp() is masked) + assert_(xm[0].ptp(0) is masked) + assert_(xm[0].ptp(-1) is masked) + + x = array([1, 2, 3], mask=True) + assert_(x.min() is masked) + assert_(x.max() is masked) + assert_(x.ptp() is masked) + + def test_addsumprod(self): + # Tests add, sum, product. + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + assert_equal(np.add.reduce(x), add.reduce(x)) + assert_equal(np.add.accumulate(x), add.accumulate(x)) + assert_equal(4, sum(array(4), axis=0)) + assert_equal(4, sum(array(4), axis=0)) + assert_equal(np.sum(x, axis=0), sum(x, axis=0)) + assert_equal(np.sum(filled(xm, 0), axis=0), sum(xm, axis=0)) + assert_equal(np.sum(x, 0), sum(x, 0)) + assert_equal(np.product(x, axis=0), product(x, axis=0)) + assert_equal(np.product(x, 0), product(x, 0)) + assert_equal(np.product(filled(xm, 1), axis=0), product(xm, axis=0)) + s = (3, 4) + x.shape = y.shape = xm.shape = ym.shape = s + if len(s) > 1: + assert_equal(np.concatenate((x, y), 1), concatenate((xm, ym), 1)) + assert_equal(np.add.reduce(x, 1), add.reduce(x, 1)) + assert_equal(np.sum(x, 1), sum(x, 1)) + assert_equal(np.product(x, 1), product(x, 1)) + + def test_binops_d2D(self): + # Test binary operations on 2D data + a = array([[1.], [2.], [3.]], mask=[[False], [True], [True]]) + b = array([[2., 3.], [4., 5.], [6., 7.]]) + + test = a * b + control = array([[2., 3.], [2., 2.], [3., 3.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + test = b * a + control = array([[2., 3.], [4., 5.], [6., 7.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + a = array([[1.], [2.], [3.]]) + b = array([[2., 3.], [4., 5.], [6., 7.]], + mask=[[0, 0], [0, 0], [0, 1]]) + test = a * b + control = array([[2, 3], [8, 10], [18, 3]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + test = b * a + control = array([[2, 3], [8, 10], [18, 7]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + def test_domained_binops_d2D(self): + # Test domained binary operations on 2D data + a = array([[1.], [2.], [3.]], mask=[[False], [True], [True]]) + b = array([[2., 3.], [4., 5.], [6., 7.]]) + + test = a / b + control = array([[1. / 2., 1. / 3.], [2., 2.], [3., 3.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + test = b / a + control = array([[2. / 1., 3. / 1.], [4., 5.], [6., 7.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + a = array([[1.], [2.], [3.]]) + b = array([[2., 3.], [4., 5.], [6., 7.]], + mask=[[0, 0], [0, 0], [0, 1]]) + test = a / b + control = array([[1. / 2, 1. / 3], [2. / 4, 2. / 5], [3. / 6, 3]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + test = b / a + control = array([[2 / 1., 3 / 1.], [4 / 2., 5 / 2.], [6 / 3., 7]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + def test_noshrinking(self): + # Check that we don't shrink a mask when not wanted + # Binary operations + a = masked_array([1., 2., 3.], mask=[False, False, False], + shrink=False) + b = a + 1 + assert_equal(b.mask, [0, 0, 0]) + # In place binary operation + a += 1 + assert_equal(a.mask, [0, 0, 0]) + # Domained binary operation + b = a / 1. + assert_equal(b.mask, [0, 0, 0]) + # In place binary operation + a /= 1. + assert_equal(a.mask, [0, 0, 0]) + + def test_ufunc_nomask(self): + # check the case ufuncs should set the mask to false + m = np.ma.array([1]) + # check we don't get array([False], dtype=bool) + assert_equal(np.true_divide(m, 5).mask.shape, ()) + + def test_noshink_on_creation(self): + # Check that the mask is not shrunk on array creation when not wanted + a = np.ma.masked_values([1., 2.5, 3.1], 1.5, shrink=False) + assert_equal(a.mask, [0, 0, 0]) + + def test_mod(self): + # Tests mod + (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d + assert_equal(mod(x, y), mod(xm, ym)) + test = mod(ym, xm) + assert_equal(test, np.mod(ym, xm)) + assert_equal(test.mask, mask_or(xm.mask, ym.mask)) + test = mod(xm, ym) + assert_equal(test, np.mod(xm, ym)) + assert_equal(test.mask, mask_or(mask_or(xm.mask, ym.mask), (ym == 0))) + + def test_TakeTransposeInnerOuter(self): + # Test of take, transpose, inner, outer products + x = arange(24) + y = np.arange(24) + x[5:6] = masked + x = x.reshape(2, 3, 4) + y = y.reshape(2, 3, 4) + assert_equal(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1))) + assert_equal(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1)) + assert_equal(np.inner(filled(x, 0), filled(y, 0)), + inner(x, y)) + assert_equal(np.outer(filled(x, 0), filled(y, 0)), + outer(x, y)) + y = array(['abc', 1, 'def', 2, 3], object) + y[2] = masked + t = take(y, [0, 3, 4]) + assert_(t[0] == 'abc') + assert_(t[1] == 2) + assert_(t[2] == 3) + + def test_imag_real(self): + # Check complex + xx = array([1 + 10j, 20 + 2j], mask=[1, 0]) + assert_equal(xx.imag, [10, 2]) + assert_equal(xx.imag.filled(), [1e+20, 2]) + assert_equal(xx.imag.dtype, xx._data.imag.dtype) + assert_equal(xx.real, [1, 20]) + assert_equal(xx.real.filled(), [1e+20, 20]) + assert_equal(xx.real.dtype, xx._data.real.dtype) + + def test_methods_with_output(self): + xm = array(np.random.uniform(0, 10, 12)).reshape(3, 4) + xm[:, 0] = xm[0] = xm[-1, -1] = masked + + funclist = ('sum', 'prod', 'var', 'std', 'max', 'min', 'ptp', 'mean',) + + for funcname in funclist: + npfunc = getattr(np, funcname) + xmmeth = getattr(xm, funcname) + # A ndarray as explicit input + output = np.empty(4, dtype=float) + output.fill(-9999) + result = npfunc(xm, axis=0, out=output) + # ... the result should be the given output + assert_(result is output) + assert_equal(result, xmmeth(axis=0, out=output)) + + output = empty(4, dtype=int) + result = xmmeth(axis=0, out=output) + assert_(result is output) + assert_(output[0] is masked) + + def test_eq_on_structured(self): + # Test the equality of structured arrays + ndtype = [('A', int), ('B', int)] + a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) + + test = (a == a) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + test = (a == a[0]) + assert_equal(test.data, [True, False]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) + test = (a == b) + assert_equal(test.data, [False, True]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + test = (a[0] == b) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) + test = (a == b) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + # complicated dtype, 2-dimensional array. + ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] + a = array([[(1, (1, 1)), (2, (2, 2))], + [(3, (3, 3)), (4, (4, 4))]], + mask=[[(0, (1, 0)), (0, (0, 1))], + [(1, (0, 0)), (1, (1, 1))]], dtype=ndtype) + test = (a[0, 0] == a) + assert_equal(test.data, [[True, False], [False, False]]) + assert_equal(test.mask, [[False, False], [False, True]]) + assert_(test.fill_value == True) + + def test_ne_on_structured(self): + # Test the equality of structured arrays + ndtype = [('A', int), ('B', int)] + a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) + + test = (a != a) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + test = (a != a[0]) + assert_equal(test.data, [False, True]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) + test = (a != b) + assert_equal(test.data, [True, False]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + test = (a[0] != b) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) + test = (a != b) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [False, False]) + assert_(test.fill_value == True) + + # complicated dtype, 2-dimensional array. + ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] + a = array([[(1, (1, 1)), (2, (2, 2))], + [(3, (3, 3)), (4, (4, 4))]], + mask=[[(0, (1, 0)), (0, (0, 1))], + [(1, (0, 0)), (1, (1, 1))]], dtype=ndtype) + test = (a[0, 0] != a) + assert_equal(test.data, [[False, True], [True, True]]) + assert_equal(test.mask, [[False, False], [False, True]]) + assert_(test.fill_value == True) + + def test_eq_ne_structured_extra(self): + # ensure simple examples are symmetric and make sense. + # from https://github.com/numpy/numpy/pull/8590#discussion_r101126465 + dt = np.dtype('i4,i4') + for m1 in (mvoid((1, 2), mask=(0, 0), dtype=dt), + mvoid((1, 2), mask=(0, 1), dtype=dt), + mvoid((1, 2), mask=(1, 0), dtype=dt), + mvoid((1, 2), mask=(1, 1), dtype=dt)): + ma1 = m1.view(MaskedArray) + r1 = ma1.view('2i4') + for m2 in (np.array((1, 1), dtype=dt), + mvoid((1, 1), dtype=dt), + mvoid((1, 0), mask=(0, 1), dtype=dt), + mvoid((3, 2), mask=(0, 1), dtype=dt)): + ma2 = m2.view(MaskedArray) + r2 = ma2.view('2i4') + eq_expected = (r1 == r2).all() + assert_equal(m1 == m2, eq_expected) + assert_equal(m2 == m1, eq_expected) + assert_equal(ma1 == m2, eq_expected) + assert_equal(m1 == ma2, eq_expected) + assert_equal(ma1 == ma2, eq_expected) + # Also check it is the same if we do it element by element. + el_by_el = [m1[name] == m2[name] for name in dt.names] + assert_equal(array(el_by_el, dtype=bool).all(), eq_expected) + ne_expected = (r1 != r2).any() + assert_equal(m1 != m2, ne_expected) + assert_equal(m2 != m1, ne_expected) + assert_equal(ma1 != m2, ne_expected) + assert_equal(m1 != ma2, ne_expected) + assert_equal(ma1 != ma2, ne_expected) + el_by_el = [m1[name] != m2[name] for name in dt.names] + assert_equal(array(el_by_el, dtype=bool).any(), ne_expected) + + @pytest.mark.parametrize('dt', ['S', 'U']) + @pytest.mark.parametrize('fill', [None, 'A']) + def test_eq_for_strings(self, dt, fill): + # Test the equality of structured arrays + a = array(['a', 'b'], dtype=dt, mask=[0, 1], fill_value=fill) + + test = (a == a) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + test = (a == a[0]) + assert_equal(test.data, [True, False]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + b = array(['a', 'b'], dtype=dt, mask=[1, 0], fill_value=fill) + test = (a == b) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, True]) + assert_(test.fill_value == True) + + # test = (a[0] == b) # doesn't work in Python2 + test = (b == a[0]) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + @pytest.mark.parametrize('dt', ['S', 'U']) + @pytest.mark.parametrize('fill', [None, 'A']) + def test_ne_for_strings(self, dt, fill): + # Test the equality of structured arrays + a = array(['a', 'b'], dtype=dt, mask=[0, 1], fill_value=fill) + + test = (a != a) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + test = (a != a[0]) + assert_equal(test.data, [False, True]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + b = array(['a', 'b'], dtype=dt, mask=[1, 0], fill_value=fill) + test = (a != b) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, True]) + assert_(test.fill_value == True) + + # test = (a[0] != b) # doesn't work in Python2 + test = (b != a[0]) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + @pytest.mark.parametrize('dt1', num_dts, ids=num_ids) + @pytest.mark.parametrize('dt2', num_dts, ids=num_ids) + @pytest.mark.parametrize('fill', [None, 1]) + def test_eq_for_numeric(self, dt1, dt2, fill): + # Test the equality of structured arrays + a = array([0, 1], dtype=dt1, mask=[0, 1], fill_value=fill) + + test = (a == a) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + test = (a == a[0]) + assert_equal(test.data, [True, False]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + b = array([0, 1], dtype=dt2, mask=[1, 0], fill_value=fill) + test = (a == b) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, True]) + assert_(test.fill_value == True) + + # test = (a[0] == b) # doesn't work in Python2 + test = (b == a[0]) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + @pytest.mark.parametrize('dt1', num_dts, ids=num_ids) + @pytest.mark.parametrize('dt2', num_dts, ids=num_ids) + @pytest.mark.parametrize('fill', [None, 1]) + def test_ne_for_numeric(self, dt1, dt2, fill): + # Test the equality of structured arrays + a = array([0, 1], dtype=dt1, mask=[0, 1], fill_value=fill) + + test = (a != a) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + test = (a != a[0]) + assert_equal(test.data, [False, True]) + assert_equal(test.mask, [False, True]) + assert_(test.fill_value == True) + + b = array([0, 1], dtype=dt2, mask=[1, 0], fill_value=fill) + test = (a != b) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, True]) + assert_(test.fill_value == True) + + # test = (a[0] != b) # doesn't work in Python2 + test = (b != a[0]) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [True, False]) + assert_(test.fill_value == True) + + def test_eq_with_None(self): + # Really, comparisons with None should not be done, but check them + # anyway. Note that pep8 will flag these tests. + # Deprecation is in place for arrays, and when it happens this + # test will fail (and have to be changed accordingly). + + # With partial mask + with suppress_warnings() as sup: + sup.filter(FutureWarning, "Comparison to `None`") + a = array([None, 1], mask=[0, 1]) + assert_equal(a == None, array([True, False], mask=[0, 1])) + assert_equal(a.data == None, [True, False]) + assert_equal(a != None, array([False, True], mask=[0, 1])) + # With nomask + a = array([None, 1], mask=False) + assert_equal(a == None, [True, False]) + assert_equal(a != None, [False, True]) + # With complete mask + a = array([None, 2], mask=True) + assert_equal(a == None, array([False, True], mask=True)) + assert_equal(a != None, array([True, False], mask=True)) + # Fully masked, even comparison to None should return "masked" + a = masked + assert_equal(a == None, masked) + + def test_eq_with_scalar(self): + a = array(1) + assert_equal(a == 1, True) + assert_equal(a == 0, False) + assert_equal(a != 1, False) + assert_equal(a != 0, True) + b = array(1, mask=True) + assert_equal(b == 0, masked) + assert_equal(b == 1, masked) + assert_equal(b != 0, masked) + assert_equal(b != 1, masked) + + def test_eq_different_dimensions(self): + m1 = array([1, 1], mask=[0, 1]) + # test comparison with both masked and regular arrays. + for m2 in (array([[0, 1], [1, 2]]), + np.array([[0, 1], [1, 2]])): + test = (m1 == m2) + assert_equal(test.data, [[False, False], + [True, False]]) + assert_equal(test.mask, [[False, True], + [False, True]]) + + def test_numpyarithmetics(self): + # Check that the mask is not back-propagated when using numpy functions + a = masked_array([-1, 0, 1, 2, 3], mask=[0, 0, 0, 0, 1]) + control = masked_array([np.nan, np.nan, 0, np.log(2), -1], + mask=[1, 1, 0, 0, 1]) + + test = log(a) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + assert_equal(a.mask, [0, 0, 0, 0, 1]) + + test = np.log(a) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + assert_equal(a.mask, [0, 0, 0, 0, 1]) + + +class TestMaskedArrayAttributes(object): + + def test_keepmask(self): + # Tests the keep mask flag + x = masked_array([1, 2, 3], mask=[1, 0, 0]) + mx = masked_array(x) + assert_equal(mx.mask, x.mask) + mx = masked_array(x, mask=[0, 1, 0], keep_mask=False) + assert_equal(mx.mask, [0, 1, 0]) + mx = masked_array(x, mask=[0, 1, 0], keep_mask=True) + assert_equal(mx.mask, [1, 1, 0]) + # We default to true + mx = masked_array(x, mask=[0, 1, 0]) + assert_equal(mx.mask, [1, 1, 0]) + + def test_hardmask(self): + # Test hard_mask + d = arange(5) + n = [0, 0, 0, 1, 1] + m = make_mask(n) + xh = array(d, mask=m, hard_mask=True) + # We need to copy, to avoid updating d in xh ! + xs = array(d, mask=m, hard_mask=False, copy=True) + xh[[1, 4]] = [10, 40] + xs[[1, 4]] = [10, 40] + assert_equal(xh._data, [0, 10, 2, 3, 4]) + assert_equal(xs._data, [0, 10, 2, 3, 40]) + assert_equal(xs.mask, [0, 0, 0, 1, 0]) + assert_(xh._hardmask) + assert_(not xs._hardmask) + xh[1:4] = [10, 20, 30] + xs[1:4] = [10, 20, 30] + assert_equal(xh._data, [0, 10, 20, 3, 4]) + assert_equal(xs._data, [0, 10, 20, 30, 40]) + assert_equal(xs.mask, nomask) + xh[0] = masked + xs[0] = masked + assert_equal(xh.mask, [1, 0, 0, 1, 1]) + assert_equal(xs.mask, [1, 0, 0, 0, 0]) + xh[:] = 1 + xs[:] = 1 + assert_equal(xh._data, [0, 1, 1, 3, 4]) + assert_equal(xs._data, [1, 1, 1, 1, 1]) + assert_equal(xh.mask, [1, 0, 0, 1, 1]) + assert_equal(xs.mask, nomask) + # Switch to soft mask + xh.soften_mask() + xh[:] = arange(5) + assert_equal(xh._data, [0, 1, 2, 3, 4]) + assert_equal(xh.mask, nomask) + # Switch back to hard mask + xh.harden_mask() + xh[xh < 3] = masked + assert_equal(xh._data, [0, 1, 2, 3, 4]) + assert_equal(xh._mask, [1, 1, 1, 0, 0]) + xh[filled(xh > 1, False)] = 5 + assert_equal(xh._data, [0, 1, 2, 5, 5]) + assert_equal(xh._mask, [1, 1, 1, 0, 0]) + + xh = array([[1, 2], [3, 4]], mask=[[1, 0], [0, 0]], hard_mask=True) + xh[0] = 0 + assert_equal(xh._data, [[1, 0], [3, 4]]) + assert_equal(xh._mask, [[1, 0], [0, 0]]) + xh[-1, -1] = 5 + assert_equal(xh._data, [[1, 0], [3, 5]]) + assert_equal(xh._mask, [[1, 0], [0, 0]]) + xh[filled(xh < 5, False)] = 2 + assert_equal(xh._data, [[1, 2], [2, 5]]) + assert_equal(xh._mask, [[1, 0], [0, 0]]) + + def test_hardmask_again(self): + # Another test of hardmask + d = arange(5) + n = [0, 0, 0, 1, 1] + m = make_mask(n) + xh = array(d, mask=m, hard_mask=True) + xh[4:5] = 999 + xh[0:1] = 999 + assert_equal(xh._data, [999, 1, 2, 3, 4]) + + def test_hardmask_oncemore_yay(self): + # OK, yet another test of hardmask + # Make sure that harden_mask/soften_mask//unshare_mask returns self + a = array([1, 2, 3], mask=[1, 0, 0]) + b = a.harden_mask() + assert_equal(a, b) + b[0] = 0 + assert_equal(a, b) + assert_equal(b, array([1, 2, 3], mask=[1, 0, 0])) + a = b.soften_mask() + a[0] = 0 + assert_equal(a, b) + assert_equal(b, array([0, 2, 3], mask=[0, 0, 0])) + + def test_smallmask(self): + # Checks the behaviour of _smallmask + a = arange(10) + a[1] = masked + a[1] = 1 + assert_equal(a._mask, nomask) + a = arange(10) + a._smallmask = False + a[1] = masked + a[1] = 1 + assert_equal(a._mask, zeros(10)) + + def test_shrink_mask(self): + # Tests .shrink_mask() + a = array([1, 2, 3], mask=[0, 0, 0]) + b = a.shrink_mask() + assert_equal(a, b) + assert_equal(a.mask, nomask) + + # Mask cannot be shrunk on structured types, so is a no-op + a = np.ma.array([(1, 2.0)], [('a', int), ('b', float)]) + b = a.copy() + a.shrink_mask() + assert_equal(a.mask, b.mask) + + def test_flat(self): + # Test that flat can return all types of items [#4585, #4615] + # test 2-D record array + # ... on structured array w/ masked records + x = array([[(1, 1.1, 'one'), (2, 2.2, 'two'), (3, 3.3, 'thr')], + [(4, 4.4, 'fou'), (5, 5.5, 'fiv'), (6, 6.6, 'six')]], + dtype=[('a', int), ('b', float), ('c', '|S8')]) + x['a'][0, 1] = masked + x['b'][1, 0] = masked + x['c'][0, 2] = masked + x[-1, -1] = masked + xflat = x.flat + assert_equal(xflat[0], x[0, 0]) + assert_equal(xflat[1], x[0, 1]) + assert_equal(xflat[2], x[0, 2]) + assert_equal(xflat[:3], x[0]) + assert_equal(xflat[3], x[1, 0]) + assert_equal(xflat[4], x[1, 1]) + assert_equal(xflat[5], x[1, 2]) + assert_equal(xflat[3:], x[1]) + assert_equal(xflat[-1], x[-1, -1]) + i = 0 + j = 0 + for xf in xflat: + assert_equal(xf, x[j, i]) + i += 1 + if i >= x.shape[-1]: + i = 0 + j += 1 + + def test_assign_dtype(self): + # check that the mask's dtype is updated when dtype is changed + a = np.zeros(4, dtype='f4,i4') + + m = np.ma.array(a) + m.dtype = np.dtype('f4') + repr(m) # raises? + assert_equal(m.dtype, np.dtype('f4')) + + # check that dtype changes that change shape of mask too much + # are not allowed + def assign(): + m = np.ma.array(a) + m.dtype = np.dtype('f8') + assert_raises(ValueError, assign) + + b = a.view(dtype='f4', type=np.ma.MaskedArray) # raises? + assert_equal(b.dtype, np.dtype('f4')) + + # check that nomask is preserved + a = np.zeros(4, dtype='f4') + m = np.ma.array(a) + m.dtype = np.dtype('f4,i4') + assert_equal(m.dtype, np.dtype('f4,i4')) + assert_equal(m._mask, np.ma.nomask) + + +class TestFillingValues(object): + + def test_check_on_scalar(self): + # Test _check_fill_value set to valid and invalid values + _check_fill_value = np.ma.core._check_fill_value + + fval = _check_fill_value(0, int) + assert_equal(fval, 0) + fval = _check_fill_value(None, int) + assert_equal(fval, default_fill_value(0)) + + fval = _check_fill_value(0, "|S3") + assert_equal(fval, b"0") + fval = _check_fill_value(None, "|S3") + assert_equal(fval, default_fill_value(b"camelot!")) + assert_raises(TypeError, _check_fill_value, 1e+20, int) + assert_raises(TypeError, _check_fill_value, 'stuff', int) + + def test_check_on_fields(self): + # Tests _check_fill_value with records + _check_fill_value = np.ma.core._check_fill_value + ndtype = [('a', int), ('b', float), ('c', "|S3")] + # A check on a list should return a single record + fval = _check_fill_value([-999, -12345678.9, "???"], ndtype) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), [-999, -12345678.9, b"???"]) + # A check on None should output the defaults + fval = _check_fill_value(None, ndtype) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), [default_fill_value(0), + default_fill_value(0.), + asbytes(default_fill_value("0"))]) + #.....Using a structured type as fill_value should work + fill_val = np.array((-999, -12345678.9, "???"), dtype=ndtype) + fval = _check_fill_value(fill_val, ndtype) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), [-999, -12345678.9, b"???"]) + + #.....Using a flexible type w/ a different type shouldn't matter + # BEHAVIOR in 1.5 and earlier, and 1.13 and later: match structured + # types by position + fill_val = np.array((-999, -12345678.9, "???"), + dtype=[("A", int), ("B", float), ("C", "|S3")]) + fval = _check_fill_value(fill_val, ndtype) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), [-999, -12345678.9, b"???"]) + + #.....Using an object-array shouldn't matter either + fill_val = np.ndarray(shape=(1,), dtype=object) + fill_val[0] = (-999, -12345678.9, b"???") + fval = _check_fill_value(fill_val, object) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), [-999, -12345678.9, b"???"]) + # NOTE: This test was never run properly as "fill_value" rather than + # "fill_val" was assigned. Written properly, it fails. + #fill_val = np.array((-999, -12345678.9, "???")) + #fval = _check_fill_value(fill_val, ndtype) + #assert_(isinstance(fval, ndarray)) + #assert_equal(fval.item(), [-999, -12345678.9, b"???"]) + #.....One-field-only flexible type should work as well + ndtype = [("a", int)] + fval = _check_fill_value(-999999999, ndtype) + assert_(isinstance(fval, ndarray)) + assert_equal(fval.item(), (-999999999,)) + + def test_fillvalue_conversion(self): + # Tests the behavior of fill_value during conversion + # We had a tailored comment to make sure special attributes are + # properly dealt with + a = array([b'3', b'4', b'5']) + a._optinfo.update({'comment':"updated!"}) + + b = array(a, dtype=int) + assert_equal(b._data, [3, 4, 5]) + assert_equal(b.fill_value, default_fill_value(0)) + + b = array(a, dtype=float) + assert_equal(b._data, [3, 4, 5]) + assert_equal(b.fill_value, default_fill_value(0.)) + + b = a.astype(int) + assert_equal(b._data, [3, 4, 5]) + assert_equal(b.fill_value, default_fill_value(0)) + assert_equal(b._optinfo['comment'], "updated!") + + b = a.astype([('a', '|S3')]) + assert_equal(b['a']._data, a._data) + assert_equal(b['a'].fill_value, a.fill_value) + + def test_default_fill_value(self): + # check all calling conventions + f1 = default_fill_value(1.) + f2 = default_fill_value(np.array(1.)) + f3 = default_fill_value(np.array(1.).dtype) + assert_equal(f1, f2) + assert_equal(f1, f3) + + def test_default_fill_value_structured(self): + fields = array([(1, 1, 1)], + dtype=[('i', int), ('s', '|S8'), ('f', float)]) + + f1 = default_fill_value(fields) + f2 = default_fill_value(fields.dtype) + expected = np.array((default_fill_value(0), + default_fill_value('0'), + default_fill_value(0.)), dtype=fields.dtype) + assert_equal(f1, expected) + assert_equal(f2, expected) + + def test_default_fill_value_void(self): + dt = np.dtype([('v', 'V7')]) + f = default_fill_value(dt) + assert_equal(f['v'], np.array(default_fill_value(dt['v']), dt['v'])) + + def test_fillvalue(self): + # Yet more fun with the fill_value + data = masked_array([1, 2, 3], fill_value=-999) + series = data[[0, 2, 1]] + assert_equal(series._fill_value, data._fill_value) + + mtype = [('f', float), ('s', '|S3')] + x = array([(1, 'a'), (2, 'b'), (pi, 'pi')], dtype=mtype) + x.fill_value = 999 + assert_equal(x.fill_value.item(), [999., b'999']) + assert_equal(x['f'].fill_value, 999) + assert_equal(x['s'].fill_value, b'999') + + x.fill_value = (9, '???') + assert_equal(x.fill_value.item(), (9, b'???')) + assert_equal(x['f'].fill_value, 9) + assert_equal(x['s'].fill_value, b'???') + + x = array([1, 2, 3.1]) + x.fill_value = 999 + assert_equal(np.asarray(x.fill_value).dtype, float) + assert_equal(x.fill_value, 999.) + assert_equal(x._fill_value, np.array(999.)) + + def test_subarray_fillvalue(self): + # gh-10483 test multi-field index fill value + fields = array([(1, 1, 1)], + dtype=[('i', int), ('s', '|S8'), ('f', float)]) + with suppress_warnings() as sup: + sup.filter(FutureWarning, "Numpy has detected") + subfields = fields[['i', 'f']] + assert_equal(tuple(subfields.fill_value), (999999, 1.e+20)) + # test comparison does not raise: + subfields[1:] == subfields[:-1] + + def test_fillvalue_exotic_dtype(self): + # Tests yet more exotic flexible dtypes + _check_fill_value = np.ma.core._check_fill_value + ndtype = [('i', int), ('s', '|S8'), ('f', float)] + control = np.array((default_fill_value(0), + default_fill_value('0'), + default_fill_value(0.),), + dtype=ndtype) + assert_equal(_check_fill_value(None, ndtype), control) + # The shape shouldn't matter + ndtype = [('f0', float, (2, 2))] + control = np.array((default_fill_value(0.),), + dtype=[('f0', float)]).astype(ndtype) + assert_equal(_check_fill_value(None, ndtype), control) + control = np.array((0,), dtype=[('f0', float)]).astype(ndtype) + assert_equal(_check_fill_value(0, ndtype), control) + + ndtype = np.dtype("int, (2,3)float, float") + control = np.array((default_fill_value(0), + default_fill_value(0.), + default_fill_value(0.),), + dtype="int, float, float").astype(ndtype) + test = _check_fill_value(None, ndtype) + assert_equal(test, control) + control = np.array((0, 0, 0), dtype="int, float, float").astype(ndtype) + assert_equal(_check_fill_value(0, ndtype), control) + # but when indexing, fill value should become scalar not tuple + # See issue #6723 + M = masked_array(control) + assert_equal(M["f1"].fill_value.ndim, 0) + + def test_fillvalue_datetime_timedelta(self): + # Test default fillvalue for datetime64 and timedelta64 types. + # See issue #4476, this would return '?' which would cause errors + # elsewhere + + for timecode in ("as", "fs", "ps", "ns", "us", "ms", "s", "m", + "h", "D", "W", "M", "Y"): + control = numpy.datetime64("NaT", timecode) + test = default_fill_value(numpy.dtype("<M8[" + timecode + "]")) + np.testing.assert_equal(test, control) + + control = numpy.timedelta64("NaT", timecode) + test = default_fill_value(numpy.dtype("<m8[" + timecode + "]")) + np.testing.assert_equal(test, control) + + def test_extremum_fill_value(self): + # Tests extremum fill values for flexible type. + a = array([(1, (2, 3)), (4, (5, 6))], + dtype=[('A', int), ('B', [('BA', int), ('BB', int)])]) + test = a.fill_value + assert_equal(test.dtype, a.dtype) + assert_equal(test['A'], default_fill_value(a['A'])) + assert_equal(test['B']['BA'], default_fill_value(a['B']['BA'])) + assert_equal(test['B']['BB'], default_fill_value(a['B']['BB'])) + + test = minimum_fill_value(a) + assert_equal(test.dtype, a.dtype) + assert_equal(test[0], minimum_fill_value(a['A'])) + assert_equal(test[1][0], minimum_fill_value(a['B']['BA'])) + assert_equal(test[1][1], minimum_fill_value(a['B']['BB'])) + assert_equal(test[1], minimum_fill_value(a['B'])) + + test = maximum_fill_value(a) + assert_equal(test.dtype, a.dtype) + assert_equal(test[0], maximum_fill_value(a['A'])) + assert_equal(test[1][0], maximum_fill_value(a['B']['BA'])) + assert_equal(test[1][1], maximum_fill_value(a['B']['BB'])) + assert_equal(test[1], maximum_fill_value(a['B'])) + + def test_extremum_fill_value_subdtype(self): + a = array(([2, 3, 4],), dtype=[('value', np.int8, 3)]) + + test = minimum_fill_value(a) + assert_equal(test.dtype, a.dtype) + assert_equal(test[0], np.full(3, minimum_fill_value(a['value']))) + + test = maximum_fill_value(a) + assert_equal(test.dtype, a.dtype) + assert_equal(test[0], np.full(3, maximum_fill_value(a['value']))) + + def test_fillvalue_individual_fields(self): + # Test setting fill_value on individual fields + ndtype = [('a', int), ('b', int)] + # Explicit fill_value + a = array(list(zip([1, 2, 3], [4, 5, 6])), + fill_value=(-999, -999), dtype=ndtype) + aa = a['a'] + aa.set_fill_value(10) + assert_equal(aa._fill_value, np.array(10)) + assert_equal(tuple(a.fill_value), (10, -999)) + a.fill_value['b'] = -10 + assert_equal(tuple(a.fill_value), (10, -10)) + # Implicit fill_value + t = array(list(zip([1, 2, 3], [4, 5, 6])), dtype=ndtype) + tt = t['a'] + tt.set_fill_value(10) + assert_equal(tt._fill_value, np.array(10)) + assert_equal(tuple(t.fill_value), (10, default_fill_value(0))) + + def test_fillvalue_implicit_structured_array(self): + # Check that fill_value is always defined for structured arrays + ndtype = ('b', float) + adtype = ('a', float) + a = array([(1.,), (2.,)], mask=[(False,), (False,)], + fill_value=(np.nan,), dtype=np.dtype([adtype])) + b = empty(a.shape, dtype=[adtype, ndtype]) + b['a'] = a['a'] + b['a'].set_fill_value(a['a'].fill_value) + f = b._fill_value[()] + assert_(np.isnan(f[0])) + assert_equal(f[-1], default_fill_value(1.)) + + def test_fillvalue_as_arguments(self): + # Test adding a fill_value parameter to empty/ones/zeros + a = empty(3, fill_value=999.) + assert_equal(a.fill_value, 999.) + + a = ones(3, fill_value=999., dtype=float) + assert_equal(a.fill_value, 999.) + + a = zeros(3, fill_value=0., dtype=complex) + assert_equal(a.fill_value, 0.) + + a = identity(3, fill_value=0., dtype=complex) + assert_equal(a.fill_value, 0.) + + def test_shape_argument(self): + # Test that shape can be provides as an argument + # GH issue 6106 + a = empty(shape=(3, )) + assert_equal(a.shape, (3, )) + + a = ones(shape=(3, ), dtype=float) + assert_equal(a.shape, (3, )) + + a = zeros(shape=(3, ), dtype=complex) + assert_equal(a.shape, (3, )) + + def test_fillvalue_in_view(self): + # Test the behavior of fill_value in view + + # Create initial masked array + x = array([1, 2, 3], fill_value=1, dtype=np.int64) + + # Check that fill_value is preserved by default + y = x.view() + assert_(y.fill_value == 1) + + # Check that fill_value is preserved if dtype is specified and the + # dtype is an ndarray sub-class and has a _fill_value attribute + y = x.view(MaskedArray) + assert_(y.fill_value == 1) + + # Check that fill_value is preserved if type is specified and the + # dtype is an ndarray sub-class and has a _fill_value attribute (by + # default, the first argument is dtype, not type) + y = x.view(type=MaskedArray) + assert_(y.fill_value == 1) + + # Check that code does not crash if passed an ndarray sub-class that + # does not have a _fill_value attribute + y = x.view(np.ndarray) + y = x.view(type=np.ndarray) + + # Check that fill_value can be overridden with view + y = x.view(MaskedArray, fill_value=2) + assert_(y.fill_value == 2) + + # Check that fill_value can be overridden with view (using type=) + y = x.view(type=MaskedArray, fill_value=2) + assert_(y.fill_value == 2) + + # Check that fill_value gets reset if passed a dtype but not a + # fill_value. This is because even though in some cases one can safely + # cast the fill_value, e.g. if taking an int64 view of an int32 array, + # in other cases, this cannot be done (e.g. int32 view of an int64 + # array with a large fill_value). + y = x.view(dtype=np.int32) + assert_(y.fill_value == 999999) + + def test_fillvalue_bytes_or_str(self): + # Test whether fill values work as expected for structured dtypes + # containing bytes or str. See issue #7259. + a = empty(shape=(3, ), dtype="(2)3S,(2)3U") + assert_equal(a["f0"].fill_value, default_fill_value(b"spam")) + assert_equal(a["f1"].fill_value, default_fill_value("eggs")) + + +class TestUfuncs(object): + # Test class for the application of ufuncs on MaskedArrays. + + def setup(self): + # Base data definition. + self.d = (array([1.0, 0, -1, pi / 2] * 2, mask=[0, 1] + [0] * 6), + array([1.0, 0, -1, pi / 2] * 2, mask=[1, 0] + [0] * 6),) + self.err_status = np.geterr() + np.seterr(divide='ignore', invalid='ignore') + + def teardown(self): + np.seterr(**self.err_status) + + def test_testUfuncRegression(self): + # Tests new ufuncs on MaskedArrays. + for f in ['sqrt', 'log', 'log10', 'exp', 'conjugate', + 'sin', 'cos', 'tan', + 'arcsin', 'arccos', 'arctan', + 'sinh', 'cosh', 'tanh', + 'arcsinh', + 'arccosh', + 'arctanh', + 'absolute', 'fabs', 'negative', + 'floor', 'ceil', + 'logical_not', + 'add', 'subtract', 'multiply', + 'divide', 'true_divide', 'floor_divide', + 'remainder', 'fmod', 'hypot', 'arctan2', + 'equal', 'not_equal', 'less_equal', 'greater_equal', + 'less', 'greater', + 'logical_and', 'logical_or', 'logical_xor', + ]: + try: + uf = getattr(umath, f) + except AttributeError: + uf = getattr(fromnumeric, f) + mf = getattr(numpy.ma.core, f) + args = self.d[:uf.nin] + ur = uf(*args) + mr = mf(*args) + assert_equal(ur.filled(0), mr.filled(0), f) + assert_mask_equal(ur.mask, mr.mask, err_msg=f) + + def test_reduce(self): + # Tests reduce on MaskedArrays. + a = self.d[0] + assert_(not alltrue(a, axis=0)) + assert_(sometrue(a, axis=0)) + assert_equal(sum(a[:3], axis=0), 0) + assert_equal(product(a, axis=0), 0) + assert_equal(add.reduce(a), pi) + + def test_minmax(self): + # Tests extrema on MaskedArrays. + a = arange(1, 13).reshape(3, 4) + amask = masked_where(a < 5, a) + assert_equal(amask.max(), a.max()) + assert_equal(amask.min(), 5) + assert_equal(amask.max(0), a.max(0)) + assert_equal(amask.min(0), [5, 6, 7, 8]) + assert_(amask.max(1)[0].mask) + assert_(amask.min(1)[0].mask) + + def test_ndarray_mask(self): + # Check that the mask of the result is a ndarray (not a MaskedArray...) + a = masked_array([-1, 0, 1, 2, 3], mask=[0, 0, 0, 0, 1]) + test = np.sqrt(a) + control = masked_array([-1, 0, 1, np.sqrt(2), -1], + mask=[1, 0, 0, 0, 1]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + assert_(not isinstance(test.mask, MaskedArray)) + + def test_treatment_of_NotImplemented(self): + # Check that NotImplemented is returned at appropriate places + + a = masked_array([1., 2.], mask=[1, 0]) + assert_raises(TypeError, operator.mul, a, "abc") + assert_raises(TypeError, operator.truediv, a, "abc") + + class MyClass(object): + __array_priority__ = a.__array_priority__ + 1 + + def __mul__(self, other): + return "My mul" + + def __rmul__(self, other): + return "My rmul" + + me = MyClass() + assert_(me * a == "My mul") + assert_(a * me == "My rmul") + + # and that __array_priority__ is respected + class MyClass2(object): + __array_priority__ = 100 + + def __mul__(self, other): + return "Me2mul" + + def __rmul__(self, other): + return "Me2rmul" + + def __rdiv__(self, other): + return "Me2rdiv" + + __rtruediv__ = __rdiv__ + + me_too = MyClass2() + assert_(a.__mul__(me_too) is NotImplemented) + assert_(all(multiply.outer(a, me_too) == "Me2rmul")) + assert_(a.__truediv__(me_too) is NotImplemented) + assert_(me_too * a == "Me2mul") + assert_(a * me_too == "Me2rmul") + assert_(a / me_too == "Me2rdiv") + + def test_no_masked_nan_warnings(self): + # check that a nan in masked position does not + # cause ufunc warnings + + m = np.ma.array([0.5, np.nan], mask=[0,1]) + + with warnings.catch_warnings(): + warnings.filterwarnings("error") + + # test unary and binary ufuncs + exp(m) + add(m, 1) + m > 0 + + # test different unary domains + sqrt(m) + log(m) + tan(m) + arcsin(m) + arccos(m) + arccosh(m) + + # test binary domains + divide(m, 2) + + # also check that allclose uses ma ufuncs, to avoid warning + allclose(m, 0.5) + +class TestMaskedArrayInPlaceArithmetics(object): + # Test MaskedArray Arithmetics + + def setup(self): + x = arange(10) + y = arange(10) + xm = arange(10) + xm[2] = masked + self.intdata = (x, y, xm) + self.floatdata = (x.astype(float), y.astype(float), xm.astype(float)) + self.othertypes = np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + self.othertypes = [np.dtype(_).type for _ in self.othertypes] + self.uint8data = ( + x.astype(np.uint8), + y.astype(np.uint8), + xm.astype(np.uint8) + ) + + def test_inplace_addition_scalar(self): + # Test of inplace additions + (x, y, xm) = self.intdata + xm[2] = masked + x += 1 + assert_equal(x, y + 1) + xm += 1 + assert_equal(xm, y + 1) + + (x, _, xm) = self.floatdata + id1 = x.data.ctypes.data + x += 1. + assert_(id1 == x.data.ctypes.data) + assert_equal(x, y + 1.) + + def test_inplace_addition_array(self): + # Test of inplace additions + (x, y, xm) = self.intdata + m = xm.mask + a = arange(10, dtype=np.int16) + a[-1] = masked + x += a + xm += a + assert_equal(x, y + a) + assert_equal(xm, y + a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + def test_inplace_subtraction_scalar(self): + # Test of inplace subtractions + (x, y, xm) = self.intdata + x -= 1 + assert_equal(x, y - 1) + xm -= 1 + assert_equal(xm, y - 1) + + def test_inplace_subtraction_array(self): + # Test of inplace subtractions + (x, y, xm) = self.floatdata + m = xm.mask + a = arange(10, dtype=float) + a[-1] = masked + x -= a + xm -= a + assert_equal(x, y - a) + assert_equal(xm, y - a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + def test_inplace_multiplication_scalar(self): + # Test of inplace multiplication + (x, y, xm) = self.floatdata + x *= 2.0 + assert_equal(x, y * 2) + xm *= 2.0 + assert_equal(xm, y * 2) + + def test_inplace_multiplication_array(self): + # Test of inplace multiplication + (x, y, xm) = self.floatdata + m = xm.mask + a = arange(10, dtype=float) + a[-1] = masked + x *= a + xm *= a + assert_equal(x, y * a) + assert_equal(xm, y * a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + def test_inplace_division_scalar_int(self): + # Test of inplace division + (x, y, xm) = self.intdata + x = arange(10) * 2 + xm = arange(10) * 2 + xm[2] = masked + x //= 2 + assert_equal(x, y) + xm //= 2 + assert_equal(xm, y) + + def test_inplace_division_scalar_float(self): + # Test of inplace division + (x, y, xm) = self.floatdata + x /= 2.0 + assert_equal(x, y / 2.0) + xm /= arange(10) + assert_equal(xm, ones((10,))) + + def test_inplace_division_array_float(self): + # Test of inplace division + (x, y, xm) = self.floatdata + m = xm.mask + a = arange(10, dtype=float) + a[-1] = masked + x /= a + xm /= a + assert_equal(x, y / a) + assert_equal(xm, y / a) + assert_equal(xm.mask, mask_or(mask_or(m, a.mask), (a == 0))) + + def test_inplace_division_misc(self): + + x = [1., 1., 1., -2., pi / 2., 4., 5., -10., 10., 1., 2., 3.] + y = [5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.] + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1] + xm = masked_array(x, mask=m1) + ym = masked_array(y, mask=m2) + + z = xm / ym + assert_equal(z._mask, [1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]) + assert_equal(z._data, + [1., 1., 1., -1., -pi / 2., 4., 5., 1., 1., 1., 2., 3.]) + + xm = xm.copy() + xm /= ym + assert_equal(xm._mask, [1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]) + assert_equal(z._data, + [1., 1., 1., -1., -pi / 2., 4., 5., 1., 1., 1., 2., 3.]) + + def test_datafriendly_add(self): + # Test keeping data w/ (inplace) addition + x = array([1, 2, 3], mask=[0, 0, 1]) + # Test add w/ scalar + xx = x + 1 + assert_equal(xx.data, [2, 3, 3]) + assert_equal(xx.mask, [0, 0, 1]) + # Test iadd w/ scalar + x += 1 + assert_equal(x.data, [2, 3, 3]) + assert_equal(x.mask, [0, 0, 1]) + # Test add w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x + array([1, 2, 3], mask=[1, 0, 0]) + assert_equal(xx.data, [1, 4, 3]) + assert_equal(xx.mask, [1, 0, 1]) + # Test iadd w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + x += array([1, 2, 3], mask=[1, 0, 0]) + assert_equal(x.data, [1, 4, 3]) + assert_equal(x.mask, [1, 0, 1]) + + def test_datafriendly_sub(self): + # Test keeping data w/ (inplace) subtraction + # Test sub w/ scalar + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x - 1 + assert_equal(xx.data, [0, 1, 3]) + assert_equal(xx.mask, [0, 0, 1]) + # Test isub w/ scalar + x = array([1, 2, 3], mask=[0, 0, 1]) + x -= 1 + assert_equal(x.data, [0, 1, 3]) + assert_equal(x.mask, [0, 0, 1]) + # Test sub w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x - array([1, 2, 3], mask=[1, 0, 0]) + assert_equal(xx.data, [1, 0, 3]) + assert_equal(xx.mask, [1, 0, 1]) + # Test isub w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + x -= array([1, 2, 3], mask=[1, 0, 0]) + assert_equal(x.data, [1, 0, 3]) + assert_equal(x.mask, [1, 0, 1]) + + def test_datafriendly_mul(self): + # Test keeping data w/ (inplace) multiplication + # Test mul w/ scalar + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x * 2 + assert_equal(xx.data, [2, 4, 3]) + assert_equal(xx.mask, [0, 0, 1]) + # Test imul w/ scalar + x = array([1, 2, 3], mask=[0, 0, 1]) + x *= 2 + assert_equal(x.data, [2, 4, 3]) + assert_equal(x.mask, [0, 0, 1]) + # Test mul w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x * array([10, 20, 30], mask=[1, 0, 0]) + assert_equal(xx.data, [1, 40, 3]) + assert_equal(xx.mask, [1, 0, 1]) + # Test imul w/ array + x = array([1, 2, 3], mask=[0, 0, 1]) + x *= array([10, 20, 30], mask=[1, 0, 0]) + assert_equal(x.data, [1, 40, 3]) + assert_equal(x.mask, [1, 0, 1]) + + def test_datafriendly_div(self): + # Test keeping data w/ (inplace) division + # Test div on scalar + x = array([1, 2, 3], mask=[0, 0, 1]) + xx = x / 2. + assert_equal(xx.data, [1 / 2., 2 / 2., 3]) + assert_equal(xx.mask, [0, 0, 1]) + # Test idiv on scalar + x = array([1., 2., 3.], mask=[0, 0, 1]) + x /= 2. + assert_equal(x.data, [1 / 2., 2 / 2., 3]) + assert_equal(x.mask, [0, 0, 1]) + # Test div on array + x = array([1., 2., 3.], mask=[0, 0, 1]) + xx = x / array([10., 20., 30.], mask=[1, 0, 0]) + assert_equal(xx.data, [1., 2. / 20., 3.]) + assert_equal(xx.mask, [1, 0, 1]) + # Test idiv on array + x = array([1., 2., 3.], mask=[0, 0, 1]) + x /= array([10., 20., 30.], mask=[1, 0, 0]) + assert_equal(x.data, [1., 2 / 20., 3.]) + assert_equal(x.mask, [1, 0, 1]) + + def test_datafriendly_pow(self): + # Test keeping data w/ (inplace) power + # Test pow on scalar + x = array([1., 2., 3.], mask=[0, 0, 1]) + xx = x ** 2.5 + assert_equal(xx.data, [1., 2. ** 2.5, 3.]) + assert_equal(xx.mask, [0, 0, 1]) + # Test ipow on scalar + x **= 2.5 + assert_equal(x.data, [1., 2. ** 2.5, 3]) + assert_equal(x.mask, [0, 0, 1]) + + def test_datafriendly_add_arrays(self): + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 0]) + a += b + assert_equal(a, [[2, 2], [4, 4]]) + if a.mask is not nomask: + assert_equal(a.mask, [[0, 0], [0, 0]]) + + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 1]) + a += b + assert_equal(a, [[2, 2], [4, 4]]) + assert_equal(a.mask, [[0, 1], [0, 1]]) + + def test_datafriendly_sub_arrays(self): + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 0]) + a -= b + assert_equal(a, [[0, 0], [2, 2]]) + if a.mask is not nomask: + assert_equal(a.mask, [[0, 0], [0, 0]]) + + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 1]) + a -= b + assert_equal(a, [[0, 0], [2, 2]]) + assert_equal(a.mask, [[0, 1], [0, 1]]) + + def test_datafriendly_mul_arrays(self): + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 0]) + a *= b + assert_equal(a, [[1, 1], [3, 3]]) + if a.mask is not nomask: + assert_equal(a.mask, [[0, 0], [0, 0]]) + + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 1]) + a *= b + assert_equal(a, [[1, 1], [3, 3]]) + assert_equal(a.mask, [[0, 1], [0, 1]]) + + def test_inplace_addition_scalar_type(self): + # Test of inplace additions + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + xm[2] = masked + x += t(1) + assert_equal(x, y + t(1)) + xm += t(1) + assert_equal(xm, y + t(1)) + + assert_equal(len(w), 0, "Failed on type=%s." % t) + + def test_inplace_addition_array_type(self): + # Test of inplace additions + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + m = xm.mask + a = arange(10, dtype=t) + a[-1] = masked + x += a + xm += a + assert_equal(x, y + a) + assert_equal(xm, y + a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + assert_equal(len(w), 0, "Failed on type=%s." % t) + + def test_inplace_subtraction_scalar_type(self): + # Test of inplace subtractions + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + x -= t(1) + assert_equal(x, y - t(1)) + xm -= t(1) + assert_equal(xm, y - t(1)) + + assert_equal(len(w), 0, "Failed on type=%s." % t) + + def test_inplace_subtraction_array_type(self): + # Test of inplace subtractions + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + m = xm.mask + a = arange(10, dtype=t) + a[-1] = masked + x -= a + xm -= a + assert_equal(x, y - a) + assert_equal(xm, y - a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + assert_equal(len(w), 0, "Failed on type=%s." % t) + + def test_inplace_multiplication_scalar_type(self): + # Test of inplace multiplication + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + x *= t(2) + assert_equal(x, y * t(2)) + xm *= t(2) + assert_equal(xm, y * t(2)) + + assert_equal(len(w), 0, "Failed on type=%s." % t) + + def test_inplace_multiplication_array_type(self): + # Test of inplace multiplication + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + m = xm.mask + a = arange(10, dtype=t) + a[-1] = masked + x *= a + xm *= a + assert_equal(x, y * a) + assert_equal(xm, y * a) + assert_equal(xm.mask, mask_or(m, a.mask)) + + assert_equal(len(w), 0, "Failed on type=%s." % t) + + def test_inplace_floor_division_scalar_type(self): + # Test of inplace division + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + x = arange(10, dtype=t) * t(2) + xm = arange(10, dtype=t) * t(2) + xm[2] = masked + x //= t(2) + xm //= t(2) + assert_equal(x, y) + assert_equal(xm, y) + + assert_equal(len(w), 0, "Failed on type=%s." % t) + + def test_inplace_floor_division_array_type(self): + # Test of inplace division + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + m = xm.mask + a = arange(10, dtype=t) + a[-1] = masked + x //= a + xm //= a + assert_equal(x, y // a) + assert_equal(xm, y // a) + assert_equal( + xm.mask, + mask_or(mask_or(m, a.mask), (a == t(0))) + ) + + assert_equal(len(w), 0, "Failed on type=%s." % t) + + def test_inplace_division_scalar_type(self): + # Test of inplace division + for t in self.othertypes: + with suppress_warnings() as sup: + sup.record(UserWarning) + + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + x = arange(10, dtype=t) * t(2) + xm = arange(10, dtype=t) * t(2) + xm[2] = masked + + # May get a DeprecationWarning or a TypeError. + # + # This is a consequence of the fact that this is true divide + # and will require casting to float for calculation and + # casting back to the original type. This will only be raised + # with integers. Whether it is an error or warning is only + # dependent on how stringent the casting rules are. + # + # Will handle the same way. + try: + x /= t(2) + assert_equal(x, y) + except (DeprecationWarning, TypeError) as e: + warnings.warn(str(e), stacklevel=1) + try: + xm /= t(2) + assert_equal(xm, y) + except (DeprecationWarning, TypeError) as e: + warnings.warn(str(e), stacklevel=1) + + if issubclass(t, np.integer): + assert_equal(len(sup.log), 2, "Failed on type=%s." % t) + else: + assert_equal(len(sup.log), 0, "Failed on type=%s." % t) + + def test_inplace_division_array_type(self): + # Test of inplace division + for t in self.othertypes: + with suppress_warnings() as sup: + sup.record(UserWarning) + (x, y, xm) = (_.astype(t) for _ in self.uint8data) + m = xm.mask + a = arange(10, dtype=t) + a[-1] = masked + + # May get a DeprecationWarning or a TypeError. + # + # This is a consequence of the fact that this is true divide + # and will require casting to float for calculation and + # casting back to the original type. This will only be raised + # with integers. Whether it is an error or warning is only + # dependent on how stringent the casting rules are. + # + # Will handle the same way. + try: + x /= a + assert_equal(x, y / a) + except (DeprecationWarning, TypeError) as e: + warnings.warn(str(e), stacklevel=1) + try: + xm /= a + assert_equal(xm, y / a) + assert_equal( + xm.mask, + mask_or(mask_or(m, a.mask), (a == t(0))) + ) + except (DeprecationWarning, TypeError) as e: + warnings.warn(str(e), stacklevel=1) + + if issubclass(t, np.integer): + assert_equal(len(sup.log), 2, "Failed on type=%s." % t) + else: + assert_equal(len(sup.log), 0, "Failed on type=%s." % t) + + def test_inplace_pow_type(self): + # Test keeping data w/ (inplace) power + for t in self.othertypes: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always") + # Test pow on scalar + x = array([1, 2, 3], mask=[0, 0, 1], dtype=t) + xx = x ** t(2) + xx_r = array([1, 2 ** 2, 3], mask=[0, 0, 1], dtype=t) + assert_equal(xx.data, xx_r.data) + assert_equal(xx.mask, xx_r.mask) + # Test ipow on scalar + x **= t(2) + assert_equal(x.data, xx_r.data) + assert_equal(x.mask, xx_r.mask) + + assert_equal(len(w), 0, "Failed on type=%s." % t) + + +class TestMaskedArrayMethods(object): + # Test class for miscellaneous MaskedArrays methods. + def setup(self): + # Base data definition. + x = np.array([8.375, 7.545, 8.828, 8.5, 1.757, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479, + 7.189, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993]) + X = x.reshape(6, 6) + XX = x.reshape(3, 2, 2, 3) + + m = np.array([0, 1, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0]) + mx = array(data=x, mask=m) + mX = array(data=X, mask=m.reshape(X.shape)) + mXX = array(data=XX, mask=m.reshape(XX.shape)) + + m2 = np.array([1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 0, 1, + 0, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 1, 0, + 0, 0, 1, 0, 1, 1]) + m2x = array(data=x, mask=m2) + m2X = array(data=X, mask=m2.reshape(X.shape)) + m2XX = array(data=XX, mask=m2.reshape(XX.shape)) + self.d = (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) + + def test_generic_methods(self): + # Tests some MaskedArray methods. + a = array([1, 3, 2]) + assert_equal(a.any(), a._data.any()) + assert_equal(a.all(), a._data.all()) + assert_equal(a.argmax(), a._data.argmax()) + assert_equal(a.argmin(), a._data.argmin()) + assert_equal(a.choose(0, 1, 2, 3, 4), a._data.choose(0, 1, 2, 3, 4)) + assert_equal(a.compress([1, 0, 1]), a._data.compress([1, 0, 1])) + assert_equal(a.conj(), a._data.conj()) + assert_equal(a.conjugate(), a._data.conjugate()) + + m = array([[1, 2], [3, 4]]) + assert_equal(m.diagonal(), m._data.diagonal()) + assert_equal(a.sum(), a._data.sum()) + assert_equal(a.take([1, 2]), a._data.take([1, 2])) + assert_equal(m.transpose(), m._data.transpose()) + + def test_allclose(self): + # Tests allclose on arrays + a = np.random.rand(10) + b = a + np.random.rand(10) * 1e-8 + assert_(allclose(a, b)) + # Test allclose w/ infs + a[0] = np.inf + assert_(not allclose(a, b)) + b[0] = np.inf + assert_(allclose(a, b)) + # Test allclose w/ masked + a = masked_array(a) + a[-1] = masked + assert_(allclose(a, b, masked_equal=True)) + assert_(not allclose(a, b, masked_equal=False)) + # Test comparison w/ scalar + a *= 1e-8 + a[0] = 0 + assert_(allclose(a, 0, masked_equal=True)) + + # Test that the function works for MIN_INT integer typed arrays + a = masked_array([np.iinfo(np.int_).min], dtype=np.int_) + assert_(allclose(a, a)) + + def test_allany(self): + # Checks the any/all methods/functions. + x = np.array([[0.13, 0.26, 0.90], + [0.28, 0.33, 0.63], + [0.31, 0.87, 0.70]]) + m = np.array([[True, False, False], + [False, False, False], + [True, True, False]], dtype=np.bool_) + mx = masked_array(x, mask=m) + mxbig = (mx > 0.5) + mxsmall = (mx < 0.5) + + assert_(not mxbig.all()) + assert_(mxbig.any()) + assert_equal(mxbig.all(0), [False, False, True]) + assert_equal(mxbig.all(1), [False, False, True]) + assert_equal(mxbig.any(0), [False, False, True]) + assert_equal(mxbig.any(1), [True, True, True]) + + assert_(not mxsmall.all()) + assert_(mxsmall.any()) + assert_equal(mxsmall.all(0), [True, True, False]) + assert_equal(mxsmall.all(1), [False, False, False]) + assert_equal(mxsmall.any(0), [True, True, False]) + assert_equal(mxsmall.any(1), [True, True, False]) + + def test_allany_oddities(self): + # Some fun with all and any + store = empty((), dtype=bool) + full = array([1, 2, 3], mask=True) + + assert_(full.all() is masked) + full.all(out=store) + assert_(store) + assert_(store._mask, True) + assert_(store is not masked) + + store = empty((), dtype=bool) + assert_(full.any() is masked) + full.any(out=store) + assert_(not store) + assert_(store._mask, True) + assert_(store is not masked) + + def test_argmax_argmin(self): + # Tests argmin & argmax on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + + assert_equal(mx.argmin(), 35) + assert_equal(mX.argmin(), 35) + assert_equal(m2x.argmin(), 4) + assert_equal(m2X.argmin(), 4) + assert_equal(mx.argmax(), 28) + assert_equal(mX.argmax(), 28) + assert_equal(m2x.argmax(), 31) + assert_equal(m2X.argmax(), 31) + + assert_equal(mX.argmin(0), [2, 2, 2, 5, 0, 5]) + assert_equal(m2X.argmin(0), [2, 2, 4, 5, 0, 4]) + assert_equal(mX.argmax(0), [0, 5, 0, 5, 4, 0]) + assert_equal(m2X.argmax(0), [5, 5, 0, 5, 1, 0]) + + assert_equal(mX.argmin(1), [4, 1, 0, 0, 5, 5, ]) + assert_equal(m2X.argmin(1), [4, 4, 0, 0, 5, 3]) + assert_equal(mX.argmax(1), [2, 4, 1, 1, 4, 1]) + assert_equal(m2X.argmax(1), [2, 4, 1, 1, 1, 1]) + + def test_clip(self): + # Tests clip on MaskedArrays. + x = np.array([8.375, 7.545, 8.828, 8.5, 1.757, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479, + 7.189, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993]) + m = np.array([0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0]) + mx = array(x, mask=m) + clipped = mx.clip(2, 8) + assert_equal(clipped.mask, mx.mask) + assert_equal(clipped._data, x.clip(2, 8)) + assert_equal(clipped._data, mx._data.clip(2, 8)) + + def test_compress(self): + # test compress + a = masked_array([1., 2., 3., 4., 5.], fill_value=9999) + condition = (a > 1.5) & (a < 3.5) + assert_equal(a.compress(condition), [2., 3.]) + + a[[2, 3]] = masked + b = a.compress(condition) + assert_equal(b._data, [2., 3.]) + assert_equal(b._mask, [0, 1]) + assert_equal(b.fill_value, 9999) + assert_equal(b, a[condition]) + + condition = (a < 4.) + b = a.compress(condition) + assert_equal(b._data, [1., 2., 3.]) + assert_equal(b._mask, [0, 0, 1]) + assert_equal(b.fill_value, 9999) + assert_equal(b, a[condition]) + + a = masked_array([[10, 20, 30], [40, 50, 60]], + mask=[[0, 0, 1], [1, 0, 0]]) + b = a.compress(a.ravel() >= 22) + assert_equal(b._data, [30, 40, 50, 60]) + assert_equal(b._mask, [1, 1, 0, 0]) + + x = np.array([3, 1, 2]) + b = a.compress(x >= 2, axis=1) + assert_equal(b._data, [[10, 30], [40, 60]]) + assert_equal(b._mask, [[0, 1], [1, 0]]) + + def test_compressed(self): + # Tests compressed + a = array([1, 2, 3, 4], mask=[0, 0, 0, 0]) + b = a.compressed() + assert_equal(b, a) + a[0] = masked + b = a.compressed() + assert_equal(b, [2, 3, 4]) + + def test_empty(self): + # Tests empty/like + datatype = [('a', int), ('b', float), ('c', '|S8')] + a = masked_array([(1, 1.1, '1.1'), (2, 2.2, '2.2'), (3, 3.3, '3.3')], + dtype=datatype) + assert_equal(len(a.fill_value.item()), len(datatype)) + + b = empty_like(a) + assert_equal(b.shape, a.shape) + assert_equal(b.fill_value, a.fill_value) + + b = empty(len(a), dtype=datatype) + assert_equal(b.shape, a.shape) + assert_equal(b.fill_value, a.fill_value) + + # check empty_like mask handling + a = masked_array([1, 2, 3], mask=[False, True, False]) + b = empty_like(a) + assert_(not np.may_share_memory(a.mask, b.mask)) + b = a.view(masked_array) + assert_(np.may_share_memory(a.mask, b.mask)) + + @suppress_copy_mask_on_assignment + def test_put(self): + # Tests put. + d = arange(5) + n = [0, 0, 0, 1, 1] + m = make_mask(n) + x = array(d, mask=m) + assert_(x[3] is masked) + assert_(x[4] is masked) + x[[1, 4]] = [10, 40] + assert_(x[3] is masked) + assert_(x[4] is not masked) + assert_equal(x, [0, 10, 2, -1, 40]) + + x = masked_array(arange(10), mask=[1, 0, 0, 0, 0] * 2) + i = [0, 2, 4, 6] + x.put(i, [6, 4, 2, 0]) + assert_equal(x, asarray([6, 1, 4, 3, 2, 5, 0, 7, 8, 9, ])) + assert_equal(x.mask, [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]) + x.put(i, masked_array([0, 2, 4, 6], [1, 0, 1, 0])) + assert_array_equal(x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ]) + assert_equal(x.mask, [1, 0, 0, 0, 1, 1, 0, 0, 0, 0]) + + x = masked_array(arange(10), mask=[1, 0, 0, 0, 0] * 2) + put(x, i, [6, 4, 2, 0]) + assert_equal(x, asarray([6, 1, 4, 3, 2, 5, 0, 7, 8, 9, ])) + assert_equal(x.mask, [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]) + put(x, i, masked_array([0, 2, 4, 6], [1, 0, 1, 0])) + assert_array_equal(x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ]) + assert_equal(x.mask, [1, 0, 0, 0, 1, 1, 0, 0, 0, 0]) + + def test_put_nomask(self): + # GitHub issue 6425 + x = zeros(10) + z = array([3., -1.], mask=[False, True]) + + x.put([1, 2], z) + assert_(x[0] is not masked) + assert_equal(x[0], 0) + assert_(x[1] is not masked) + assert_equal(x[1], 3) + assert_(x[2] is masked) + assert_(x[3] is not masked) + assert_equal(x[3], 0) + + def test_put_hardmask(self): + # Tests put on hardmask + d = arange(5) + n = [0, 0, 0, 1, 1] + m = make_mask(n) + xh = array(d + 1, mask=m, hard_mask=True, copy=True) + xh.put([4, 2, 0, 1, 3], [1, 2, 3, 4, 5]) + assert_equal(xh._data, [3, 4, 2, 4, 5]) + + def test_putmask(self): + x = arange(6) + 1 + mx = array(x, mask=[0, 0, 0, 1, 1, 1]) + mask = [0, 0, 1, 0, 0, 1] + # w/o mask, w/o masked values + xx = x.copy() + putmask(xx, mask, 99) + assert_equal(xx, [1, 2, 99, 4, 5, 99]) + # w/ mask, w/o masked values + mxx = mx.copy() + putmask(mxx, mask, 99) + assert_equal(mxx._data, [1, 2, 99, 4, 5, 99]) + assert_equal(mxx._mask, [0, 0, 0, 1, 1, 0]) + # w/o mask, w/ masked values + values = array([10, 20, 30, 40, 50, 60], mask=[1, 1, 1, 0, 0, 0]) + xx = x.copy() + putmask(xx, mask, values) + assert_equal(xx._data, [1, 2, 30, 4, 5, 60]) + assert_equal(xx._mask, [0, 0, 1, 0, 0, 0]) + # w/ mask, w/ masked values + mxx = mx.copy() + putmask(mxx, mask, values) + assert_equal(mxx._data, [1, 2, 30, 4, 5, 60]) + assert_equal(mxx._mask, [0, 0, 1, 1, 1, 0]) + # w/ mask, w/ masked values + hardmask + mxx = mx.copy() + mxx.harden_mask() + putmask(mxx, mask, values) + assert_equal(mxx, [1, 2, 30, 4, 5, 60]) + + def test_ravel(self): + # Tests ravel + a = array([[1, 2, 3, 4, 5]], mask=[[0, 1, 0, 0, 0]]) + aravel = a.ravel() + assert_equal(aravel._mask.shape, aravel.shape) + a = array([0, 0], mask=[1, 1]) + aravel = a.ravel() + assert_equal(aravel._mask.shape, a.shape) + # Checks that small_mask is preserved + a = array([1, 2, 3, 4], mask=[0, 0, 0, 0], shrink=False) + assert_equal(a.ravel()._mask, [0, 0, 0, 0]) + # Test that the fill_value is preserved + a.fill_value = -99 + a.shape = (2, 2) + ar = a.ravel() + assert_equal(ar._mask, [0, 0, 0, 0]) + assert_equal(ar._data, [1, 2, 3, 4]) + assert_equal(ar.fill_value, -99) + # Test index ordering + assert_equal(a.ravel(order='C'), [1, 2, 3, 4]) + assert_equal(a.ravel(order='F'), [1, 3, 2, 4]) + + def test_reshape(self): + # Tests reshape + x = arange(4) + x[0] = masked + y = x.reshape(2, 2) + assert_equal(y.shape, (2, 2,)) + assert_equal(y._mask.shape, (2, 2,)) + assert_equal(x.shape, (4,)) + assert_equal(x._mask.shape, (4,)) + + def test_sort(self): + # Test sort + x = array([1, 4, 2, 3], mask=[0, 1, 0, 0], dtype=np.uint8) + + sortedx = sort(x) + assert_equal(sortedx._data, [1, 2, 3, 4]) + assert_equal(sortedx._mask, [0, 0, 0, 1]) + + sortedx = sort(x, endwith=False) + assert_equal(sortedx._data, [4, 1, 2, 3]) + assert_equal(sortedx._mask, [1, 0, 0, 0]) + + x.sort() + assert_equal(x._data, [1, 2, 3, 4]) + assert_equal(x._mask, [0, 0, 0, 1]) + + x = array([1, 4, 2, 3], mask=[0, 1, 0, 0], dtype=np.uint8) + x.sort(endwith=False) + assert_equal(x._data, [4, 1, 2, 3]) + assert_equal(x._mask, [1, 0, 0, 0]) + + x = [1, 4, 2, 3] + sortedx = sort(x) + assert_(not isinstance(sorted, MaskedArray)) + + x = array([0, 1, -1, -2, 2], mask=nomask, dtype=np.int8) + sortedx = sort(x, endwith=False) + assert_equal(sortedx._data, [-2, -1, 0, 1, 2]) + x = array([0, 1, -1, -2, 2], mask=[0, 1, 0, 0, 1], dtype=np.int8) + sortedx = sort(x, endwith=False) + assert_equal(sortedx._data, [1, 2, -2, -1, 0]) + assert_equal(sortedx._mask, [1, 1, 0, 0, 0]) + + def test_stable_sort(self): + x = array([1, 2, 3, 1, 2, 3], dtype=np.uint8) + expected = array([0, 3, 1, 4, 2, 5]) + computed = argsort(x, kind='stable') + assert_equal(computed, expected) + + def test_argsort_matches_sort(self): + x = array([1, 4, 2, 3], mask=[0, 1, 0, 0], dtype=np.uint8) + + for kwargs in [dict(), + dict(endwith=True), + dict(endwith=False), + dict(fill_value=2), + dict(fill_value=2, endwith=True), + dict(fill_value=2, endwith=False)]: + sortedx = sort(x, **kwargs) + argsortedx = x[argsort(x, **kwargs)] + assert_equal(sortedx._data, argsortedx._data) + assert_equal(sortedx._mask, argsortedx._mask) + + def test_sort_2d(self): + # Check sort of 2D array. + # 2D array w/o mask + a = masked_array([[8, 4, 1], [2, 0, 9]]) + a.sort(0) + assert_equal(a, [[2, 0, 1], [8, 4, 9]]) + a = masked_array([[8, 4, 1], [2, 0, 9]]) + a.sort(1) + assert_equal(a, [[1, 4, 8], [0, 2, 9]]) + # 2D array w/mask + a = masked_array([[8, 4, 1], [2, 0, 9]], mask=[[1, 0, 0], [0, 0, 1]]) + a.sort(0) + assert_equal(a, [[2, 0, 1], [8, 4, 9]]) + assert_equal(a._mask, [[0, 0, 0], [1, 0, 1]]) + a = masked_array([[8, 4, 1], [2, 0, 9]], mask=[[1, 0, 0], [0, 0, 1]]) + a.sort(1) + assert_equal(a, [[1, 4, 8], [0, 2, 9]]) + assert_equal(a._mask, [[0, 0, 1], [0, 0, 1]]) + # 3D + a = masked_array([[[7, 8, 9], [4, 5, 6], [1, 2, 3]], + [[1, 2, 3], [7, 8, 9], [4, 5, 6]], + [[7, 8, 9], [1, 2, 3], [4, 5, 6]], + [[4, 5, 6], [1, 2, 3], [7, 8, 9]]]) + a[a % 4 == 0] = masked + am = a.copy() + an = a.filled(99) + am.sort(0) + an.sort(0) + assert_equal(am, an) + am = a.copy() + an = a.filled(99) + am.sort(1) + an.sort(1) + assert_equal(am, an) + am = a.copy() + an = a.filled(99) + am.sort(2) + an.sort(2) + assert_equal(am, an) + + def test_sort_flexible(self): + # Test sort on structured dtype. + a = array( + data=[(3, 3), (3, 2), (2, 2), (2, 1), (1, 0), (1, 1), (1, 2)], + mask=[(0, 0), (0, 1), (0, 0), (0, 0), (1, 0), (0, 0), (0, 0)], + dtype=[('A', int), ('B', int)]) + mask_last = array( + data=[(1, 1), (1, 2), (2, 1), (2, 2), (3, 3), (3, 2), (1, 0)], + mask=[(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 1), (1, 0)], + dtype=[('A', int), ('B', int)]) + mask_first = array( + data=[(1, 0), (1, 1), (1, 2), (2, 1), (2, 2), (3, 2), (3, 3)], + mask=[(1, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 1), (0, 0)], + dtype=[('A', int), ('B', int)]) + + test = sort(a) + assert_equal(test, mask_last) + assert_equal(test.mask, mask_last.mask) + + test = sort(a, endwith=False) + assert_equal(test, mask_first) + assert_equal(test.mask, mask_first.mask) + + # Test sort on dtype with subarray (gh-8069) + # Just check that the sort does not error, structured array subarrays + # are treated as byte strings and that leads to differing behavior + # depending on endianess and `endwith`. + dt = np.dtype([('v', int, 2)]) + a = a.view(dt) + test = sort(a) + test = sort(a, endwith=False) + + def test_argsort(self): + # Test argsort + a = array([1, 5, 2, 4, 3], mask=[1, 0, 0, 1, 0]) + assert_equal(np.argsort(a), argsort(a)) + + def test_squeeze(self): + # Check squeeze + data = masked_array([[1, 2, 3]]) + assert_equal(data.squeeze(), [1, 2, 3]) + data = masked_array([[1, 2, 3]], mask=[[1, 1, 1]]) + assert_equal(data.squeeze(), [1, 2, 3]) + assert_equal(data.squeeze()._mask, [1, 1, 1]) + + # normal ndarrays return a view + arr = np.array([[1]]) + arr_sq = arr.squeeze() + assert_equal(arr_sq, 1) + arr_sq[...] = 2 + assert_equal(arr[0,0], 2) + + # so maskedarrays should too + m_arr = masked_array([[1]], mask=True) + m_arr_sq = m_arr.squeeze() + assert_(m_arr_sq is not np.ma.masked) + assert_equal(m_arr_sq.mask, True) + m_arr_sq[...] = 2 + assert_equal(m_arr[0,0], 2) + + def test_swapaxes(self): + # Tests swapaxes on MaskedArrays. + x = np.array([8.375, 7.545, 8.828, 8.5, 1.757, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479, + 7.189, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993]) + m = np.array([0, 1, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0]) + mX = array(x, mask=m).reshape(6, 6) + mXX = mX.reshape(3, 2, 2, 3) + + mXswapped = mX.swapaxes(0, 1) + assert_equal(mXswapped[-1], mX[:, -1]) + + mXXswapped = mXX.swapaxes(0, 2) + assert_equal(mXXswapped.shape, (2, 2, 3, 3)) + + def test_take(self): + # Tests take + x = masked_array([10, 20, 30, 40], [0, 1, 0, 1]) + assert_equal(x.take([0, 0, 3]), masked_array([10, 10, 40], [0, 0, 1])) + assert_equal(x.take([0, 0, 3]), x[[0, 0, 3]]) + assert_equal(x.take([[0, 1], [0, 1]]), + masked_array([[10, 20], [10, 20]], [[0, 1], [0, 1]])) + + # assert_equal crashes when passed np.ma.mask + assert_(x[1] is np.ma.masked) + assert_(x.take(1) is np.ma.masked) + + x = array([[10, 20, 30], [40, 50, 60]], mask=[[0, 0, 1], [1, 0, 0, ]]) + assert_equal(x.take([0, 2], axis=1), + array([[10, 30], [40, 60]], mask=[[0, 1], [1, 0]])) + assert_equal(take(x, [0, 2], axis=1), + array([[10, 30], [40, 60]], mask=[[0, 1], [1, 0]])) + + def test_take_masked_indices(self): + # Test take w/ masked indices + a = np.array((40, 18, 37, 9, 22)) + indices = np.arange(3)[None,:] + np.arange(5)[:, None] + mindices = array(indices, mask=(indices >= len(a))) + # No mask + test = take(a, mindices, mode='clip') + ctrl = array([[40, 18, 37], + [18, 37, 9], + [37, 9, 22], + [9, 22, 22], + [22, 22, 22]]) + assert_equal(test, ctrl) + # Masked indices + test = take(a, mindices) + ctrl = array([[40, 18, 37], + [18, 37, 9], + [37, 9, 22], + [9, 22, 40], + [22, 40, 40]]) + ctrl[3, 2] = ctrl[4, 1] = ctrl[4, 2] = masked + assert_equal(test, ctrl) + assert_equal(test.mask, ctrl.mask) + # Masked input + masked indices + a = array((40, 18, 37, 9, 22), mask=(0, 1, 0, 0, 0)) + test = take(a, mindices) + ctrl[0, 1] = ctrl[1, 0] = masked + assert_equal(test, ctrl) + assert_equal(test.mask, ctrl.mask) + + def test_tolist(self): + # Tests to list + # ... on 1D + x = array(np.arange(12)) + x[[1, -2]] = masked + xlist = x.tolist() + assert_(xlist[1] is None) + assert_(xlist[-2] is None) + # ... on 2D + x.shape = (3, 4) + xlist = x.tolist() + ctrl = [[0, None, 2, 3], [4, 5, 6, 7], [8, 9, None, 11]] + assert_equal(xlist[0], [0, None, 2, 3]) + assert_equal(xlist[1], [4, 5, 6, 7]) + assert_equal(xlist[2], [8, 9, None, 11]) + assert_equal(xlist, ctrl) + # ... on structured array w/ masked records + x = array(list(zip([1, 2, 3], + [1.1, 2.2, 3.3], + ['one', 'two', 'thr'])), + dtype=[('a', int), ('b', float), ('c', '|S8')]) + x[-1] = masked + assert_equal(x.tolist(), + [(1, 1.1, b'one'), + (2, 2.2, b'two'), + (None, None, None)]) + # ... on structured array w/ masked fields + a = array([(1, 2,), (3, 4)], mask=[(0, 1), (0, 0)], + dtype=[('a', int), ('b', int)]) + test = a.tolist() + assert_equal(test, [[1, None], [3, 4]]) + # ... on mvoid + a = a[0] + test = a.tolist() + assert_equal(test, [1, None]) + + def test_tolist_specialcase(self): + # Test mvoid.tolist: make sure we return a standard Python object + a = array([(0, 1), (2, 3)], dtype=[('a', int), ('b', int)]) + # w/o mask: each entry is a np.void whose elements are standard Python + for entry in a: + for item in entry.tolist(): + assert_(not isinstance(item, np.generic)) + # w/ mask: each entry is a ma.void whose elements should be + # standard Python + a.mask[0] = (0, 1) + for entry in a: + for item in entry.tolist(): + assert_(not isinstance(item, np.generic)) + + def test_toflex(self): + # Test the conversion to records + data = arange(10) + record = data.toflex() + assert_equal(record['_data'], data._data) + assert_equal(record['_mask'], data._mask) + + data[[0, 1, 2, -1]] = masked + record = data.toflex() + assert_equal(record['_data'], data._data) + assert_equal(record['_mask'], data._mask) + + ndtype = [('i', int), ('s', '|S3'), ('f', float)] + data = array([(i, s, f) for (i, s, f) in zip(np.arange(10), + 'ABCDEFGHIJKLM', + np.random.rand(10))], + dtype=ndtype) + data[[0, 1, 2, -1]] = masked + record = data.toflex() + assert_equal(record['_data'], data._data) + assert_equal(record['_mask'], data._mask) + + ndtype = np.dtype("int, (2,3)float, float") + data = array([(i, f, ff) for (i, f, ff) in zip(np.arange(10), + np.random.rand(10), + np.random.rand(10))], + dtype=ndtype) + data[[0, 1, 2, -1]] = masked + record = data.toflex() + assert_equal_records(record['_data'], data._data) + assert_equal_records(record['_mask'], data._mask) + + def test_fromflex(self): + # Test the reconstruction of a masked_array from a record + a = array([1, 2, 3]) + test = fromflex(a.toflex()) + assert_equal(test, a) + assert_equal(test.mask, a.mask) + + a = array([1, 2, 3], mask=[0, 0, 1]) + test = fromflex(a.toflex()) + assert_equal(test, a) + assert_equal(test.mask, a.mask) + + a = array([(1, 1.), (2, 2.), (3, 3.)], mask=[(1, 0), (0, 0), (0, 1)], + dtype=[('A', int), ('B', float)]) + test = fromflex(a.toflex()) + assert_equal(test, a) + assert_equal(test.data, a.data) + + def test_arraymethod(self): + # Test a _arraymethod w/ n argument + marray = masked_array([[1, 2, 3, 4, 5]], mask=[0, 0, 1, 0, 0]) + control = masked_array([[1], [2], [3], [4], [5]], + mask=[0, 0, 1, 0, 0]) + assert_equal(marray.T, control) + assert_equal(marray.transpose(), control) + + assert_equal(MaskedArray.cumsum(marray.T, 0), control.cumsum(0)) + + def test_arraymethod_0d(self): + # gh-9430 + x = np.ma.array(42, mask=True) + assert_equal(x.T.mask, x.mask) + assert_equal(x.T.data, x.data) + + def test_transpose_view(self): + x = np.ma.array([[1, 2, 3], [4, 5, 6]]) + x[0,1] = np.ma.masked + xt = x.T + + xt[1,0] = 10 + xt[0,1] = np.ma.masked + + assert_equal(x.data, xt.T.data) + assert_equal(x.mask, xt.T.mask) + + def test_diagonal_view(self): + x = np.ma.zeros((3,3)) + x[0,0] = 10 + x[1,1] = np.ma.masked + x[2,2] = 20 + xd = x.diagonal() + x[1,1] = 15 + assert_equal(xd.mask, x.diagonal().mask) + assert_equal(xd.data, x.diagonal().data) + + +class TestMaskedArrayMathMethods(object): + + def setup(self): + # Base data definition. + x = np.array([8.375, 7.545, 8.828, 8.5, 1.757, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479, + 7.189, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993]) + X = x.reshape(6, 6) + XX = x.reshape(3, 2, 2, 3) + + m = np.array([0, 1, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0]) + mx = array(data=x, mask=m) + mX = array(data=X, mask=m.reshape(X.shape)) + mXX = array(data=XX, mask=m.reshape(XX.shape)) + + m2 = np.array([1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 0, 1, + 0, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 1, 0, + 0, 0, 1, 0, 1, 1]) + m2x = array(data=x, mask=m2) + m2X = array(data=X, mask=m2.reshape(X.shape)) + m2XX = array(data=XX, mask=m2.reshape(XX.shape)) + self.d = (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) + + def test_cumsumprod(self): + # Tests cumsum & cumprod on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + mXcp = mX.cumsum(0) + assert_equal(mXcp._data, mX.filled(0).cumsum(0)) + mXcp = mX.cumsum(1) + assert_equal(mXcp._data, mX.filled(0).cumsum(1)) + + mXcp = mX.cumprod(0) + assert_equal(mXcp._data, mX.filled(1).cumprod(0)) + mXcp = mX.cumprod(1) + assert_equal(mXcp._data, mX.filled(1).cumprod(1)) + + def test_cumsumprod_with_output(self): + # Tests cumsum/cumprod w/ output + xm = array(np.random.uniform(0, 10, 12)).reshape(3, 4) + xm[:, 0] = xm[0] = xm[-1, -1] = masked + + for funcname in ('cumsum', 'cumprod'): + npfunc = getattr(np, funcname) + xmmeth = getattr(xm, funcname) + + # A ndarray as explicit input + output = np.empty((3, 4), dtype=float) + output.fill(-9999) + result = npfunc(xm, axis=0, out=output) + # ... the result should be the given output + assert_(result is output) + assert_equal(result, xmmeth(axis=0, out=output)) + + output = empty((3, 4), dtype=int) + result = xmmeth(axis=0, out=output) + assert_(result is output) + + def test_ptp(self): + # Tests ptp on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + (n, m) = X.shape + assert_equal(mx.ptp(), mx.compressed().ptp()) + rows = np.zeros(n, float) + cols = np.zeros(m, float) + for k in range(m): + cols[k] = mX[:, k].compressed().ptp() + for k in range(n): + rows[k] = mX[k].compressed().ptp() + assert_equal(mX.ptp(0), cols) + assert_equal(mX.ptp(1), rows) + + def test_add_object(self): + x = masked_array(['a', 'b'], mask=[1, 0], dtype=object) + y = x + 'x' + assert_equal(y[1], 'bx') + assert_(y.mask[0]) + + def test_sum_object(self): + # Test sum on object dtype + a = masked_array([1, 2, 3], mask=[1, 0, 0], dtype=object) + assert_equal(a.sum(), 5) + a = masked_array([[1, 2, 3], [4, 5, 6]], dtype=object) + assert_equal(a.sum(axis=0), [5, 7, 9]) + + def test_prod_object(self): + # Test prod on object dtype + a = masked_array([1, 2, 3], mask=[1, 0, 0], dtype=object) + assert_equal(a.prod(), 2 * 3) + a = masked_array([[1, 2, 3], [4, 5, 6]], dtype=object) + assert_equal(a.prod(axis=0), [4, 10, 18]) + + def test_meananom_object(self): + # Test mean/anom on object dtype + a = masked_array([1, 2, 3], dtype=object) + assert_equal(a.mean(), 2) + assert_equal(a.anom(), [-1, 0, 1]) + + def test_trace(self): + # Tests trace on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + mXdiag = mX.diagonal() + assert_equal(mX.trace(), mX.diagonal().compressed().sum()) + assert_almost_equal(mX.trace(), + X.trace() - sum(mXdiag.mask * X.diagonal(), + axis=0)) + assert_equal(np.trace(mX), mX.trace()) + + # gh-5560 + arr = np.arange(2*4*4).reshape(2,4,4) + m_arr = np.ma.masked_array(arr, False) + assert_equal(arr.trace(axis1=1, axis2=2), m_arr.trace(axis1=1, axis2=2)) + + def test_dot(self): + # Tests dot on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + fx = mx.filled(0) + r = mx.dot(mx) + assert_almost_equal(r.filled(0), fx.dot(fx)) + assert_(r.mask is nomask) + + fX = mX.filled(0) + r = mX.dot(mX) + assert_almost_equal(r.filled(0), fX.dot(fX)) + assert_(r.mask[1,3]) + r1 = empty_like(r) + mX.dot(mX, out=r1) + assert_almost_equal(r, r1) + + mYY = mXX.swapaxes(-1, -2) + fXX, fYY = mXX.filled(0), mYY.filled(0) + r = mXX.dot(mYY) + assert_almost_equal(r.filled(0), fXX.dot(fYY)) + r1 = empty_like(r) + mXX.dot(mYY, out=r1) + assert_almost_equal(r, r1) + + def test_dot_shape_mismatch(self): + # regression test + x = masked_array([[1,2],[3,4]], mask=[[0,1],[0,0]]) + y = masked_array([[1,2],[3,4]], mask=[[0,1],[0,0]]) + z = masked_array([[0,1],[3,3]]) + x.dot(y, out=z) + assert_almost_equal(z.filled(0), [[1, 0], [15, 16]]) + assert_almost_equal(z.mask, [[0, 1], [0, 0]]) + + def test_varmean_nomask(self): + # gh-5769 + foo = array([1,2,3,4], dtype='f8') + bar = array([1,2,3,4], dtype='f8') + assert_equal(type(foo.mean()), np.float64) + assert_equal(type(foo.var()), np.float64) + assert((foo.mean() == bar.mean()) is np.bool_(True)) + + # check array type is preserved and out works + foo = array(np.arange(16).reshape((4,4)), dtype='f8') + bar = empty(4, dtype='f4') + assert_equal(type(foo.mean(axis=1)), MaskedArray) + assert_equal(type(foo.var(axis=1)), MaskedArray) + assert_(foo.mean(axis=1, out=bar) is bar) + assert_(foo.var(axis=1, out=bar) is bar) + + def test_varstd(self): + # Tests var & std on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + assert_almost_equal(mX.var(axis=None), mX.compressed().var()) + assert_almost_equal(mX.std(axis=None), mX.compressed().std()) + assert_almost_equal(mX.std(axis=None, ddof=1), + mX.compressed().std(ddof=1)) + assert_almost_equal(mX.var(axis=None, ddof=1), + mX.compressed().var(ddof=1)) + assert_equal(mXX.var(axis=3).shape, XX.var(axis=3).shape) + assert_equal(mX.var().shape, X.var().shape) + (mXvar0, mXvar1) = (mX.var(axis=0), mX.var(axis=1)) + assert_almost_equal(mX.var(axis=None, ddof=2), + mX.compressed().var(ddof=2)) + assert_almost_equal(mX.std(axis=None, ddof=2), + mX.compressed().std(ddof=2)) + for k in range(6): + assert_almost_equal(mXvar1[k], mX[k].compressed().var()) + assert_almost_equal(mXvar0[k], mX[:, k].compressed().var()) + assert_almost_equal(np.sqrt(mXvar0[k]), + mX[:, k].compressed().std()) + + @pytest.mark.skipif(sys.platform=='win32' and sys.version_info < (3, 6), + reason='Fails on Python < 3.6 on Windows, gh-9671') + @suppress_copy_mask_on_assignment + def test_varstd_specialcases(self): + # Test a special case for var + nout = np.array(-1, dtype=float) + mout = array(-1, dtype=float) + + x = array(arange(10), mask=True) + for methodname in ('var', 'std'): + method = getattr(x, methodname) + assert_(method() is masked) + assert_(method(0) is masked) + assert_(method(-1) is masked) + # Using a masked array as explicit output + method(out=mout) + assert_(mout is not masked) + assert_equal(mout.mask, True) + # Using a ndarray as explicit output + method(out=nout) + assert_(np.isnan(nout)) + + x = array(arange(10), mask=True) + x[-1] = 9 + for methodname in ('var', 'std'): + method = getattr(x, methodname) + assert_(method(ddof=1) is masked) + assert_(method(0, ddof=1) is masked) + assert_(method(-1, ddof=1) is masked) + # Using a masked array as explicit output + method(out=mout, ddof=1) + assert_(mout is not masked) + assert_equal(mout.mask, True) + # Using a ndarray as explicit output + method(out=nout, ddof=1) + assert_(np.isnan(nout)) + + def test_varstd_ddof(self): + a = array([[1, 1, 0], [1, 1, 0]], mask=[[0, 0, 1], [0, 0, 1]]) + test = a.std(axis=0, ddof=0) + assert_equal(test.filled(0), [0, 0, 0]) + assert_equal(test.mask, [0, 0, 1]) + test = a.std(axis=0, ddof=1) + assert_equal(test.filled(0), [0, 0, 0]) + assert_equal(test.mask, [0, 0, 1]) + test = a.std(axis=0, ddof=2) + assert_equal(test.filled(0), [0, 0, 0]) + assert_equal(test.mask, [1, 1, 1]) + + def test_diag(self): + # Test diag + x = arange(9).reshape((3, 3)) + x[1, 1] = masked + out = np.diag(x) + assert_equal(out, [0, 4, 8]) + out = diag(x) + assert_equal(out, [0, 4, 8]) + assert_equal(out.mask, [0, 1, 0]) + out = diag(out) + control = array([[0, 0, 0], [0, 4, 0], [0, 0, 8]], + mask=[[0, 0, 0], [0, 1, 0], [0, 0, 0]]) + assert_equal(out, control) + + def test_axis_methods_nomask(self): + # Test the combination nomask & methods w/ axis + a = array([[1, 2, 3], [4, 5, 6]]) + + assert_equal(a.sum(0), [5, 7, 9]) + assert_equal(a.sum(-1), [6, 15]) + assert_equal(a.sum(1), [6, 15]) + + assert_equal(a.prod(0), [4, 10, 18]) + assert_equal(a.prod(-1), [6, 120]) + assert_equal(a.prod(1), [6, 120]) + + assert_equal(a.min(0), [1, 2, 3]) + assert_equal(a.min(-1), [1, 4]) + assert_equal(a.min(1), [1, 4]) + + assert_equal(a.max(0), [4, 5, 6]) + assert_equal(a.max(-1), [3, 6]) + assert_equal(a.max(1), [3, 6]) + + +class TestMaskedArrayMathMethodsComplex(object): + # Test class for miscellaneous MaskedArrays methods. + def setup(self): + # Base data definition. + x = np.array([8.375j, 7.545j, 8.828j, 8.5j, 1.757j, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479j, + 7.189j, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993j]) + X = x.reshape(6, 6) + XX = x.reshape(3, 2, 2, 3) + + m = np.array([0, 1, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0]) + mx = array(data=x, mask=m) + mX = array(data=X, mask=m.reshape(X.shape)) + mXX = array(data=XX, mask=m.reshape(XX.shape)) + + m2 = np.array([1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 0, 1, + 0, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 1, 0, + 0, 0, 1, 0, 1, 1]) + m2x = array(data=x, mask=m2) + m2X = array(data=X, mask=m2.reshape(X.shape)) + m2XX = array(data=XX, mask=m2.reshape(XX.shape)) + self.d = (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) + + def test_varstd(self): + # Tests var & std on MaskedArrays. + (x, X, XX, m, mx, mX, mXX, m2x, m2X, m2XX) = self.d + assert_almost_equal(mX.var(axis=None), mX.compressed().var()) + assert_almost_equal(mX.std(axis=None), mX.compressed().std()) + assert_equal(mXX.var(axis=3).shape, XX.var(axis=3).shape) + assert_equal(mX.var().shape, X.var().shape) + (mXvar0, mXvar1) = (mX.var(axis=0), mX.var(axis=1)) + assert_almost_equal(mX.var(axis=None, ddof=2), + mX.compressed().var(ddof=2)) + assert_almost_equal(mX.std(axis=None, ddof=2), + mX.compressed().std(ddof=2)) + for k in range(6): + assert_almost_equal(mXvar1[k], mX[k].compressed().var()) + assert_almost_equal(mXvar0[k], mX[:, k].compressed().var()) + assert_almost_equal(np.sqrt(mXvar0[k]), + mX[:, k].compressed().std()) + + +class TestMaskedArrayFunctions(object): + # Test class for miscellaneous functions. + + def setup(self): + x = np.array([1., 1., 1., -2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + y = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.]) + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1] + xm = masked_array(x, mask=m1) + ym = masked_array(y, mask=m2) + xm.set_fill_value(1e+20) + self.info = (xm, ym) + + def test_masked_where_bool(self): + x = [1, 2] + y = masked_where(False, x) + assert_equal(y, [1, 2]) + assert_equal(y[1], 2) + + def test_masked_equal_wlist(self): + x = [1, 2, 3] + mx = masked_equal(x, 3) + assert_equal(mx, x) + assert_equal(mx._mask, [0, 0, 1]) + mx = masked_not_equal(x, 3) + assert_equal(mx, x) + assert_equal(mx._mask, [1, 1, 0]) + + def test_masked_equal_fill_value(self): + x = [1, 2, 3] + mx = masked_equal(x, 3) + assert_equal(mx._mask, [0, 0, 1]) + assert_equal(mx.fill_value, 3) + + def test_masked_where_condition(self): + # Tests masking functions. + x = array([1., 2., 3., 4., 5.]) + x[2] = masked + assert_equal(masked_where(greater(x, 2), x), masked_greater(x, 2)) + assert_equal(masked_where(greater_equal(x, 2), x), + masked_greater_equal(x, 2)) + assert_equal(masked_where(less(x, 2), x), masked_less(x, 2)) + assert_equal(masked_where(less_equal(x, 2), x), + masked_less_equal(x, 2)) + assert_equal(masked_where(not_equal(x, 2), x), masked_not_equal(x, 2)) + assert_equal(masked_where(equal(x, 2), x), masked_equal(x, 2)) + assert_equal(masked_where(not_equal(x, 2), x), masked_not_equal(x, 2)) + assert_equal(masked_where([1, 1, 0, 0, 0], [1, 2, 3, 4, 5]), + [99, 99, 3, 4, 5]) + + def test_masked_where_oddities(self): + # Tests some generic features. + atest = ones((10, 10, 10), dtype=float) + btest = zeros(atest.shape, MaskType) + ctest = masked_where(btest, atest) + assert_equal(atest, ctest) + + def test_masked_where_shape_constraint(self): + a = arange(10) + with assert_raises(IndexError): + masked_equal(1, a) + test = masked_equal(a, 1) + assert_equal(test.mask, [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]) + + def test_masked_where_structured(self): + # test that masked_where on a structured array sets a structured + # mask (see issue #2972) + a = np.zeros(10, dtype=[("A", "<f2"), ("B", "<f4")]) + am = np.ma.masked_where(a["A"] < 5, a) + assert_equal(am.mask.dtype.names, am.dtype.names) + assert_equal(am["A"], + np.ma.masked_array(np.zeros(10), np.ones(10))) + + def test_masked_where_mismatch(self): + # gh-4520 + x = np.arange(10) + y = np.arange(5) + assert_raises(IndexError, np.ma.masked_where, y > 6, x) + + def test_masked_otherfunctions(self): + assert_equal(masked_inside(list(range(5)), 1, 3), + [0, 199, 199, 199, 4]) + assert_equal(masked_outside(list(range(5)), 1, 3), [199, 1, 2, 3, 199]) + assert_equal(masked_inside(array(list(range(5)), + mask=[1, 0, 0, 0, 0]), 1, 3).mask, + [1, 1, 1, 1, 0]) + assert_equal(masked_outside(array(list(range(5)), + mask=[0, 1, 0, 0, 0]), 1, 3).mask, + [1, 1, 0, 0, 1]) + assert_equal(masked_equal(array(list(range(5)), + mask=[1, 0, 0, 0, 0]), 2).mask, + [1, 0, 1, 0, 0]) + assert_equal(masked_not_equal(array([2, 2, 1, 2, 1], + mask=[1, 0, 0, 0, 0]), 2).mask, + [1, 0, 1, 0, 1]) + + def test_round(self): + a = array([1.23456, 2.34567, 3.45678, 4.56789, 5.67890], + mask=[0, 1, 0, 0, 0]) + assert_equal(a.round(), [1., 2., 3., 5., 6.]) + assert_equal(a.round(1), [1.2, 2.3, 3.5, 4.6, 5.7]) + assert_equal(a.round(3), [1.235, 2.346, 3.457, 4.568, 5.679]) + b = empty_like(a) + a.round(out=b) + assert_equal(b, [1., 2., 3., 5., 6.]) + + x = array([1., 2., 3., 4., 5.]) + c = array([1, 1, 1, 0, 0]) + x[2] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + c[0] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + assert_(z[0] is masked) + assert_(z[1] is not masked) + assert_(z[2] is masked) + + def test_round_with_output(self): + # Testing round with an explicit output + + xm = array(np.random.uniform(0, 10, 12)).reshape(3, 4) + xm[:, 0] = xm[0] = xm[-1, -1] = masked + + # A ndarray as explicit input + output = np.empty((3, 4), dtype=float) + output.fill(-9999) + result = np.round(xm, decimals=2, out=output) + # ... the result should be the given output + assert_(result is output) + assert_equal(result, xm.round(decimals=2, out=output)) + + output = empty((3, 4), dtype=float) + result = xm.round(decimals=2, out=output) + assert_(result is output) + + def test_round_with_scalar(self): + # Testing round with scalar/zero dimension input + # GH issue 2244 + a = array(1.1, mask=[False]) + assert_equal(a.round(), 1) + + a = array(1.1, mask=[True]) + assert_(a.round() is masked) + + a = array(1.1, mask=[False]) + output = np.empty(1, dtype=float) + output.fill(-9999) + a.round(out=output) + assert_equal(output, 1) + + a = array(1.1, mask=[False]) + output = array(-9999., mask=[True]) + a.round(out=output) + assert_equal(output[()], 1) + + a = array(1.1, mask=[True]) + output = array(-9999., mask=[False]) + a.round(out=output) + assert_(output[()] is masked) + + def test_identity(self): + a = identity(5) + assert_(isinstance(a, MaskedArray)) + assert_equal(a, np.identity(5)) + + def test_power(self): + x = -1.1 + assert_almost_equal(power(x, 2.), 1.21) + assert_(power(x, masked) is masked) + x = array([-1.1, -1.1, 1.1, 1.1, 0.]) + b = array([0.5, 2., 0.5, 2., -1.], mask=[0, 0, 0, 0, 1]) + y = power(x, b) + assert_almost_equal(y, [0, 1.21, 1.04880884817, 1.21, 0.]) + assert_equal(y._mask, [1, 0, 0, 0, 1]) + b.mask = nomask + y = power(x, b) + assert_equal(y._mask, [1, 0, 0, 0, 1]) + z = x ** b + assert_equal(z._mask, y._mask) + assert_almost_equal(z, y) + assert_almost_equal(z._data, y._data) + x **= b + assert_equal(x._mask, y._mask) + assert_almost_equal(x, y) + assert_almost_equal(x._data, y._data) + + def test_power_with_broadcasting(self): + # Test power w/ broadcasting + a2 = np.array([[1., 2., 3.], [4., 5., 6.]]) + a2m = array(a2, mask=[[1, 0, 0], [0, 0, 1]]) + b1 = np.array([2, 4, 3]) + b2 = np.array([b1, b1]) + b2m = array(b2, mask=[[0, 1, 0], [0, 1, 0]]) + + ctrl = array([[1 ** 2, 2 ** 4, 3 ** 3], [4 ** 2, 5 ** 4, 6 ** 3]], + mask=[[1, 1, 0], [0, 1, 1]]) + # No broadcasting, base & exp w/ mask + test = a2m ** b2m + assert_equal(test, ctrl) + assert_equal(test.mask, ctrl.mask) + # No broadcasting, base w/ mask, exp w/o mask + test = a2m ** b2 + assert_equal(test, ctrl) + assert_equal(test.mask, a2m.mask) + # No broadcasting, base w/o mask, exp w/ mask + test = a2 ** b2m + assert_equal(test, ctrl) + assert_equal(test.mask, b2m.mask) + + ctrl = array([[2 ** 2, 4 ** 4, 3 ** 3], [2 ** 2, 4 ** 4, 3 ** 3]], + mask=[[0, 1, 0], [0, 1, 0]]) + test = b1 ** b2m + assert_equal(test, ctrl) + assert_equal(test.mask, ctrl.mask) + test = b2m ** b1 + assert_equal(test, ctrl) + assert_equal(test.mask, ctrl.mask) + + def test_where(self): + # Test the where function + x = np.array([1., 1., 1., -2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + y = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.]) + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1] + xm = masked_array(x, mask=m1) + ym = masked_array(y, mask=m2) + xm.set_fill_value(1e+20) + + d = where(xm > 2, xm, -9) + assert_equal(d, [-9., -9., -9., -9., -9., 4., + -9., -9., 10., -9., -9., 3.]) + assert_equal(d._mask, xm._mask) + d = where(xm > 2, -9, ym) + assert_equal(d, [5., 0., 3., 2., -1., -9., + -9., -10., -9., 1., 0., -9.]) + assert_equal(d._mask, [1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]) + d = where(xm > 2, xm, masked) + assert_equal(d, [-9., -9., -9., -9., -9., 4., + -9., -9., 10., -9., -9., 3.]) + tmp = xm._mask.copy() + tmp[(xm <= 2).filled(True)] = True + assert_equal(d._mask, tmp) + + ixm = xm.astype(int) + d = where(ixm > 2, ixm, masked) + assert_equal(d, [-9, -9, -9, -9, -9, 4, -9, -9, 10, -9, -9, 3]) + assert_equal(d.dtype, ixm.dtype) + + def test_where_object(self): + a = np.array(None) + b = masked_array(None) + r = b.copy() + assert_equal(np.ma.where(True, a, a), r) + assert_equal(np.ma.where(True, b, b), r) + + def test_where_with_masked_choice(self): + x = arange(10) + x[3] = masked + c = x >= 8 + # Set False to masked + z = where(c, x, masked) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is masked) + assert_(z[7] is masked) + assert_(z[8] is not masked) + assert_(z[9] is not masked) + assert_equal(x, z) + # Set True to masked + z = where(c, masked, x) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is not masked) + assert_(z[7] is not masked) + assert_(z[8] is masked) + assert_(z[9] is masked) + + def test_where_with_masked_condition(self): + x = array([1., 2., 3., 4., 5.]) + c = array([1, 1, 1, 0, 0]) + x[2] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + c[0] = masked + z = where(c, x, -x) + assert_equal(z, [1., 2., 0., -4., -5]) + assert_(z[0] is masked) + assert_(z[1] is not masked) + assert_(z[2] is masked) + + x = arange(1, 6) + x[-1] = masked + y = arange(1, 6) * 10 + y[2] = masked + c = array([1, 1, 1, 0, 0], mask=[1, 0, 0, 0, 0]) + cm = c.filled(1) + z = where(c, x, y) + zm = where(cm, x, y) + assert_equal(z, zm) + assert_(getmask(zm) is nomask) + assert_equal(zm, [1, 2, 3, 40, 50]) + z = where(c, masked, 1) + assert_equal(z, [99, 99, 99, 1, 1]) + z = where(c, 1, masked) + assert_equal(z, [99, 1, 1, 99, 99]) + + def test_where_type(self): + # Test the type conservation with where + x = np.arange(4, dtype=np.int32) + y = np.arange(4, dtype=np.float32) * 2.2 + test = where(x > 1.5, y, x).dtype + control = np.find_common_type([np.int32, np.float32], []) + assert_equal(test, control) + + def test_where_broadcast(self): + # Issue 8599 + x = np.arange(9).reshape(3, 3) + y = np.zeros(3) + core = np.where([1, 0, 1], x, y) + ma = where([1, 0, 1], x, y) + + assert_equal(core, ma) + assert_equal(core.dtype, ma.dtype) + + def test_where_structured(self): + # Issue 8600 + dt = np.dtype([('a', int), ('b', int)]) + x = np.array([(1, 2), (3, 4), (5, 6)], dtype=dt) + y = np.array((10, 20), dtype=dt) + core = np.where([0, 1, 1], x, y) + ma = np.where([0, 1, 1], x, y) + + assert_equal(core, ma) + assert_equal(core.dtype, ma.dtype) + + def test_where_structured_masked(self): + dt = np.dtype([('a', int), ('b', int)]) + x = np.array([(1, 2), (3, 4), (5, 6)], dtype=dt) + + ma = where([0, 1, 1], x, masked) + expected = masked_where([1, 0, 0], x) + + assert_equal(ma.dtype, expected.dtype) + assert_equal(ma, expected) + assert_equal(ma.mask, expected.mask) + + def test_choose(self): + # Test choose + choices = [[0, 1, 2, 3], [10, 11, 12, 13], + [20, 21, 22, 23], [30, 31, 32, 33]] + chosen = choose([2, 3, 1, 0], choices) + assert_equal(chosen, array([20, 31, 12, 3])) + chosen = choose([2, 4, 1, 0], choices, mode='clip') + assert_equal(chosen, array([20, 31, 12, 3])) + chosen = choose([2, 4, 1, 0], choices, mode='wrap') + assert_equal(chosen, array([20, 1, 12, 3])) + # Check with some masked indices + indices_ = array([2, 4, 1, 0], mask=[1, 0, 0, 1]) + chosen = choose(indices_, choices, mode='wrap') + assert_equal(chosen, array([99, 1, 12, 99])) + assert_equal(chosen.mask, [1, 0, 0, 1]) + # Check with some masked choices + choices = array(choices, mask=[[0, 0, 0, 1], [1, 1, 0, 1], + [1, 0, 0, 0], [0, 0, 0, 0]]) + indices_ = [2, 3, 1, 0] + chosen = choose(indices_, choices, mode='wrap') + assert_equal(chosen, array([20, 31, 12, 3])) + assert_equal(chosen.mask, [1, 0, 0, 1]) + + def test_choose_with_out(self): + # Test choose with an explicit out keyword + choices = [[0, 1, 2, 3], [10, 11, 12, 13], + [20, 21, 22, 23], [30, 31, 32, 33]] + store = empty(4, dtype=int) + chosen = choose([2, 3, 1, 0], choices, out=store) + assert_equal(store, array([20, 31, 12, 3])) + assert_(store is chosen) + # Check with some masked indices + out + store = empty(4, dtype=int) + indices_ = array([2, 3, 1, 0], mask=[1, 0, 0, 1]) + chosen = choose(indices_, choices, mode='wrap', out=store) + assert_equal(store, array([99, 31, 12, 99])) + assert_equal(store.mask, [1, 0, 0, 1]) + # Check with some masked choices + out ina ndarray ! + choices = array(choices, mask=[[0, 0, 0, 1], [1, 1, 0, 1], + [1, 0, 0, 0], [0, 0, 0, 0]]) + indices_ = [2, 3, 1, 0] + store = empty(4, dtype=int).view(ndarray) + chosen = choose(indices_, choices, mode='wrap', out=store) + assert_equal(store, array([999999, 31, 12, 999999])) + + def test_reshape(self): + a = arange(10) + a[0] = masked + # Try the default + b = a.reshape((5, 2)) + assert_equal(b.shape, (5, 2)) + assert_(b.flags['C']) + # Try w/ arguments as list instead of tuple + b = a.reshape(5, 2) + assert_equal(b.shape, (5, 2)) + assert_(b.flags['C']) + # Try w/ order + b = a.reshape((5, 2), order='F') + assert_equal(b.shape, (5, 2)) + assert_(b.flags['F']) + # Try w/ order + b = a.reshape(5, 2, order='F') + assert_equal(b.shape, (5, 2)) + assert_(b.flags['F']) + + c = np.reshape(a, (2, 5)) + assert_(isinstance(c, MaskedArray)) + assert_equal(c.shape, (2, 5)) + assert_(c[0, 0] is masked) + assert_(c.flags['C']) + + def test_make_mask_descr(self): + # Flexible + ntype = [('a', float), ('b', float)] + test = make_mask_descr(ntype) + assert_equal(test, [('a', bool), ('b', bool)]) + assert_(test is make_mask_descr(test)) + + # Standard w/ shape + ntype = (float, 2) + test = make_mask_descr(ntype) + assert_equal(test, (bool, 2)) + assert_(test is make_mask_descr(test)) + + # Standard standard + ntype = float + test = make_mask_descr(ntype) + assert_equal(test, np.dtype(bool)) + assert_(test is make_mask_descr(test)) + + # Nested + ntype = [('a', float), ('b', [('ba', float), ('bb', float)])] + test = make_mask_descr(ntype) + control = np.dtype([('a', 'b1'), ('b', [('ba', 'b1'), ('bb', 'b1')])]) + assert_equal(test, control) + assert_(test is make_mask_descr(test)) + + # Named+ shape + ntype = [('a', (float, 2))] + test = make_mask_descr(ntype) + assert_equal(test, np.dtype([('a', (bool, 2))])) + assert_(test is make_mask_descr(test)) + + # 2 names + ntype = [(('A', 'a'), float)] + test = make_mask_descr(ntype) + assert_equal(test, np.dtype([(('A', 'a'), bool)])) + assert_(test is make_mask_descr(test)) + + # nested boolean types should preserve identity + base_type = np.dtype([('a', int, 3)]) + base_mtype = make_mask_descr(base_type) + sub_type = np.dtype([('a', int), ('b', base_mtype)]) + test = make_mask_descr(sub_type) + assert_equal(test, np.dtype([('a', bool), ('b', [('a', bool, 3)])])) + assert_(test.fields['b'][0] is base_mtype) + + def test_make_mask(self): + # Test make_mask + # w/ a list as an input + mask = [0, 1] + test = make_mask(mask) + assert_equal(test.dtype, MaskType) + assert_equal(test, [0, 1]) + # w/ a ndarray as an input + mask = np.array([0, 1], dtype=bool) + test = make_mask(mask) + assert_equal(test.dtype, MaskType) + assert_equal(test, [0, 1]) + # w/ a flexible-type ndarray as an input - use default + mdtype = [('a', bool), ('b', bool)] + mask = np.array([(0, 0), (0, 1)], dtype=mdtype) + test = make_mask(mask) + assert_equal(test.dtype, MaskType) + assert_equal(test, [1, 1]) + # w/ a flexible-type ndarray as an input - use input dtype + mdtype = [('a', bool), ('b', bool)] + mask = np.array([(0, 0), (0, 1)], dtype=mdtype) + test = make_mask(mask, dtype=mask.dtype) + assert_equal(test.dtype, mdtype) + assert_equal(test, mask) + # w/ a flexible-type ndarray as an input - use input dtype + mdtype = [('a', float), ('b', float)] + bdtype = [('a', bool), ('b', bool)] + mask = np.array([(0, 0), (0, 1)], dtype=mdtype) + test = make_mask(mask, dtype=mask.dtype) + assert_equal(test.dtype, bdtype) + assert_equal(test, np.array([(0, 0), (0, 1)], dtype=bdtype)) + # Ensure this also works for void + mask = np.array((False, True), dtype='?,?')[()] + assert_(isinstance(mask, np.void)) + test = make_mask(mask, dtype=mask.dtype) + assert_equal(test, mask) + assert_(test is not mask) + mask = np.array((0, 1), dtype='i4,i4')[()] + test2 = make_mask(mask, dtype=mask.dtype) + assert_equal(test2, test) + # test that nomask is returned when m is nomask. + bools = [True, False] + dtypes = [MaskType, float] + msgformat = 'copy=%s, shrink=%s, dtype=%s' + for cpy, shr, dt in itertools.product(bools, bools, dtypes): + res = make_mask(nomask, copy=cpy, shrink=shr, dtype=dt) + assert_(res is nomask, msgformat % (cpy, shr, dt)) + + def test_mask_or(self): + # Initialize + mtype = [('a', bool), ('b', bool)] + mask = np.array([(0, 0), (0, 1), (1, 0), (0, 0)], dtype=mtype) + # Test using nomask as input + test = mask_or(mask, nomask) + assert_equal(test, mask) + test = mask_or(nomask, mask) + assert_equal(test, mask) + # Using False as input + test = mask_or(mask, False) + assert_equal(test, mask) + # Using another array w / the same dtype + other = np.array([(0, 1), (0, 1), (0, 1), (0, 1)], dtype=mtype) + test = mask_or(mask, other) + control = np.array([(0, 1), (0, 1), (1, 1), (0, 1)], dtype=mtype) + assert_equal(test, control) + # Using another array w / a different dtype + othertype = [('A', bool), ('B', bool)] + other = np.array([(0, 1), (0, 1), (0, 1), (0, 1)], dtype=othertype) + try: + test = mask_or(mask, other) + except ValueError: + pass + # Using nested arrays + dtype = [('a', bool), ('b', [('ba', bool), ('bb', bool)])] + amask = np.array([(0, (1, 0)), (0, (1, 0))], dtype=dtype) + bmask = np.array([(1, (0, 1)), (0, (0, 0))], dtype=dtype) + cntrl = np.array([(1, (1, 1)), (0, (1, 0))], dtype=dtype) + assert_equal(mask_or(amask, bmask), cntrl) + + def test_flatten_mask(self): + # Tests flatten mask + # Standard dtype + mask = np.array([0, 0, 1], dtype=bool) + assert_equal(flatten_mask(mask), mask) + # Flexible dtype + mask = np.array([(0, 0), (0, 1)], dtype=[('a', bool), ('b', bool)]) + test = flatten_mask(mask) + control = np.array([0, 0, 0, 1], dtype=bool) + assert_equal(test, control) + + mdtype = [('a', bool), ('b', [('ba', bool), ('bb', bool)])] + data = [(0, (0, 0)), (0, (0, 1))] + mask = np.array(data, dtype=mdtype) + test = flatten_mask(mask) + control = np.array([0, 0, 0, 0, 0, 1], dtype=bool) + assert_equal(test, control) + + def test_on_ndarray(self): + # Test functions on ndarrays + a = np.array([1, 2, 3, 4]) + m = array(a, mask=False) + test = anom(a) + assert_equal(test, m.anom()) + test = reshape(a, (2, 2)) + assert_equal(test, m.reshape(2, 2)) + + def test_compress(self): + # Test compress function on ndarray and masked array + # Address Github #2495. + arr = np.arange(8) + arr.shape = 4, 2 + cond = np.array([True, False, True, True]) + control = arr[[0, 2, 3]] + test = np.ma.compress(cond, arr, axis=0) + assert_equal(test, control) + marr = np.ma.array(arr) + test = np.ma.compress(cond, marr, axis=0) + assert_equal(test, control) + + def test_compressed(self): + # Test ma.compressed function. + # Address gh-4026 + a = np.ma.array([1, 2]) + test = np.ma.compressed(a) + assert_(type(test) is np.ndarray) + + # Test case when input data is ndarray subclass + class A(np.ndarray): + pass + + a = np.ma.array(A(shape=0)) + test = np.ma.compressed(a) + assert_(type(test) is A) + + # Test that compress flattens + test = np.ma.compressed([[1],[2]]) + assert_equal(test.ndim, 1) + test = np.ma.compressed([[[[[1]]]]]) + assert_equal(test.ndim, 1) + + # Test case when input is MaskedArray subclass + class M(MaskedArray): + pass + + test = np.ma.compressed(M(shape=(0,1,2))) + assert_equal(test.ndim, 1) + + # with .compressed() overridden + class M(MaskedArray): + def compressed(self): + return 42 + + test = np.ma.compressed(M(shape=(0,1,2))) + assert_equal(test, 42) + + def test_convolve(self): + a = masked_equal(np.arange(5), 2) + b = np.array([1, 1]) + test = np.ma.convolve(a, b) + assert_equal(test, masked_equal([0, 1, -1, -1, 7, 4], -1)) + + test = np.ma.convolve(a, b, propagate_mask=False) + assert_equal(test, masked_equal([0, 1, 1, 3, 7, 4], -1)) + + test = np.ma.convolve([1, 1], [1, 1, 1]) + assert_equal(test, masked_equal([1, 2, 2, 1], -1)) + + a = [1, 1] + b = masked_equal([1, -1, -1, 1], -1) + test = np.ma.convolve(a, b, propagate_mask=False) + assert_equal(test, masked_equal([1, 1, -1, 1, 1], -1)) + test = np.ma.convolve(a, b, propagate_mask=True) + assert_equal(test, masked_equal([-1, -1, -1, -1, -1], -1)) + + +class TestMaskedFields(object): + + def setup(self): + ilist = [1, 2, 3, 4, 5] + flist = [1.1, 2.2, 3.3, 4.4, 5.5] + slist = ['one', 'two', 'three', 'four', 'five'] + ddtype = [('a', int), ('b', float), ('c', '|S8')] + mdtype = [('a', bool), ('b', bool), ('c', bool)] + mask = [0, 1, 0, 0, 1] + base = array(list(zip(ilist, flist, slist)), mask=mask, dtype=ddtype) + self.data = dict(base=base, mask=mask, ddtype=ddtype, mdtype=mdtype) + + def test_set_records_masks(self): + base = self.data['base'] + mdtype = self.data['mdtype'] + # Set w/ nomask or masked + base.mask = nomask + assert_equal_records(base._mask, np.zeros(base.shape, dtype=mdtype)) + base.mask = masked + assert_equal_records(base._mask, np.ones(base.shape, dtype=mdtype)) + # Set w/ simple boolean + base.mask = False + assert_equal_records(base._mask, np.zeros(base.shape, dtype=mdtype)) + base.mask = True + assert_equal_records(base._mask, np.ones(base.shape, dtype=mdtype)) + # Set w/ list + base.mask = [0, 0, 0, 1, 1] + assert_equal_records(base._mask, + np.array([(x, x, x) for x in [0, 0, 0, 1, 1]], + dtype=mdtype)) + + def test_set_record_element(self): + # Check setting an element of a record) + base = self.data['base'] + (base_a, base_b, base_c) = (base['a'], base['b'], base['c']) + base[0] = (pi, pi, 'pi') + + assert_equal(base_a.dtype, int) + assert_equal(base_a._data, [3, 2, 3, 4, 5]) + + assert_equal(base_b.dtype, float) + assert_equal(base_b._data, [pi, 2.2, 3.3, 4.4, 5.5]) + + assert_equal(base_c.dtype, '|S8') + assert_equal(base_c._data, + [b'pi', b'two', b'three', b'four', b'five']) + + def test_set_record_slice(self): + base = self.data['base'] + (base_a, base_b, base_c) = (base['a'], base['b'], base['c']) + base[:3] = (pi, pi, 'pi') + + assert_equal(base_a.dtype, int) + assert_equal(base_a._data, [3, 3, 3, 4, 5]) + + assert_equal(base_b.dtype, float) + assert_equal(base_b._data, [pi, pi, pi, 4.4, 5.5]) + + assert_equal(base_c.dtype, '|S8') + assert_equal(base_c._data, + [b'pi', b'pi', b'pi', b'four', b'five']) + + def test_mask_element(self): + "Check record access" + base = self.data['base'] + base[0] = masked + + for n in ('a', 'b', 'c'): + assert_equal(base[n].mask, [1, 1, 0, 0, 1]) + assert_equal(base[n]._data, base._data[n]) + + def test_getmaskarray(self): + # Test getmaskarray on flexible dtype + ndtype = [('a', int), ('b', float)] + test = empty(3, dtype=ndtype) + assert_equal(getmaskarray(test), + np.array([(0, 0), (0, 0), (0, 0)], + dtype=[('a', '|b1'), ('b', '|b1')])) + test[:] = masked + assert_equal(getmaskarray(test), + np.array([(1, 1), (1, 1), (1, 1)], + dtype=[('a', '|b1'), ('b', '|b1')])) + + def test_view(self): + # Test view w/ flexible dtype + iterator = list(zip(np.arange(10), np.random.rand(10))) + data = np.array(iterator) + a = array(iterator, dtype=[('a', float), ('b', float)]) + a.mask[0] = (1, 0) + controlmask = np.array([1] + 19 * [0], dtype=bool) + # Transform globally to simple dtype + test = a.view(float) + assert_equal(test, data.ravel()) + assert_equal(test.mask, controlmask) + # Transform globally to dty + test = a.view((float, 2)) + assert_equal(test, data) + assert_equal(test.mask, controlmask.reshape(-1, 2)) + + def test_getitem(self): + ndtype = [('a', float), ('b', float)] + a = array(list(zip(np.random.rand(10), np.arange(10))), dtype=ndtype) + a.mask = np.array(list(zip([0, 0, 0, 0, 0, 0, 0, 0, 1, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 1, 0])), + dtype=[('a', bool), ('b', bool)]) + + def _test_index(i): + assert_equal(type(a[i]), mvoid) + assert_equal_records(a[i]._data, a._data[i]) + assert_equal_records(a[i]._mask, a._mask[i]) + + assert_equal(type(a[i, ...]), MaskedArray) + assert_equal_records(a[i,...]._data, a._data[i,...]) + assert_equal_records(a[i,...]._mask, a._mask[i,...]) + + _test_index(1) # No mask + _test_index(0) # One element masked + _test_index(-2) # All element masked + + def test_setitem(self): + # Issue 4866: check that one can set individual items in [record][col] + # and [col][record] order + ndtype = np.dtype([('a', float), ('b', int)]) + ma = np.ma.MaskedArray([(1.0, 1), (2.0, 2)], dtype=ndtype) + ma['a'][1] = 3.0 + assert_equal(ma['a'], np.array([1.0, 3.0])) + ma[1]['a'] = 4.0 + assert_equal(ma['a'], np.array([1.0, 4.0])) + # Issue 2403 + mdtype = np.dtype([('a', bool), ('b', bool)]) + # soft mask + control = np.array([(False, True), (True, True)], dtype=mdtype) + a = np.ma.masked_all((2,), dtype=ndtype) + a['a'][0] = 2 + assert_equal(a.mask, control) + a = np.ma.masked_all((2,), dtype=ndtype) + a[0]['a'] = 2 + assert_equal(a.mask, control) + # hard mask + control = np.array([(True, True), (True, True)], dtype=mdtype) + a = np.ma.masked_all((2,), dtype=ndtype) + a.harden_mask() + a['a'][0] = 2 + assert_equal(a.mask, control) + a = np.ma.masked_all((2,), dtype=ndtype) + a.harden_mask() + a[0]['a'] = 2 + assert_equal(a.mask, control) + + def test_setitem_scalar(self): + # 8510 + mask_0d = np.ma.masked_array(1, mask=True) + arr = np.ma.arange(3) + arr[0] = mask_0d + assert_array_equal(arr.mask, [True, False, False]) + + def test_element_len(self): + # check that len() works for mvoid (Github issue #576) + for rec in self.data['base']: + assert_equal(len(rec), len(self.data['ddtype'])) + + +class TestMaskedObjectArray(object): + + def test_getitem(self): + arr = np.ma.array([None, None]) + for dt in [float, object]: + a0 = np.eye(2).astype(dt) + a1 = np.eye(3).astype(dt) + arr[0] = a0 + arr[1] = a1 + + assert_(arr[0] is a0) + assert_(arr[1] is a1) + assert_(isinstance(arr[0,...], MaskedArray)) + assert_(isinstance(arr[1,...], MaskedArray)) + assert_(arr[0,...][()] is a0) + assert_(arr[1,...][()] is a1) + + arr[0] = np.ma.masked + + assert_(arr[1] is a1) + assert_(isinstance(arr[0,...], MaskedArray)) + assert_(isinstance(arr[1,...], MaskedArray)) + assert_equal(arr[0,...].mask, True) + assert_(arr[1,...][()] is a1) + + # gh-5962 - object arrays of arrays do something special + assert_equal(arr[0].data, a0) + assert_equal(arr[0].mask, True) + assert_equal(arr[0,...][()].data, a0) + assert_equal(arr[0,...][()].mask, True) + + def test_nested_ma(self): + + arr = np.ma.array([None, None]) + # set the first object to be an unmasked masked constant. A little fiddly + arr[0,...] = np.array([np.ma.masked], object)[0,...] + + # check the above line did what we were aiming for + assert_(arr.data[0] is np.ma.masked) + + # test that getitem returned the value by identity + assert_(arr[0] is np.ma.masked) + + # now mask the masked value! + arr[0] = np.ma.masked + assert_(arr[0] is np.ma.masked) + + +class TestMaskedView(object): + + def setup(self): + iterator = list(zip(np.arange(10), np.random.rand(10))) + data = np.array(iterator) + a = array(iterator, dtype=[('a', float), ('b', float)]) + a.mask[0] = (1, 0) + controlmask = np.array([1] + 19 * [0], dtype=bool) + self.data = (data, a, controlmask) + + def test_view_to_nothing(self): + (data, a, controlmask) = self.data + test = a.view() + assert_(isinstance(test, MaskedArray)) + assert_equal(test._data, a._data) + assert_equal(test._mask, a._mask) + + def test_view_to_type(self): + (data, a, controlmask) = self.data + test = a.view(np.ndarray) + assert_(not isinstance(test, MaskedArray)) + assert_equal(test, a._data) + assert_equal_records(test, data.view(a.dtype).squeeze()) + + def test_view_to_simple_dtype(self): + (data, a, controlmask) = self.data + # View globally + test = a.view(float) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, data.ravel()) + assert_equal(test.mask, controlmask) + + def test_view_to_flexible_dtype(self): + (data, a, controlmask) = self.data + + test = a.view([('A', float), ('B', float)]) + assert_equal(test.mask.dtype.names, ('A', 'B')) + assert_equal(test['A'], a['a']) + assert_equal(test['B'], a['b']) + + test = a[0].view([('A', float), ('B', float)]) + assert_(isinstance(test, MaskedArray)) + assert_equal(test.mask.dtype.names, ('A', 'B')) + assert_equal(test['A'], a['a'][0]) + assert_equal(test['B'], a['b'][0]) + + test = a[-1].view([('A', float), ('B', float)]) + assert_(isinstance(test, MaskedArray)) + assert_equal(test.dtype.names, ('A', 'B')) + assert_equal(test['A'], a['a'][-1]) + assert_equal(test['B'], a['b'][-1]) + + def test_view_to_subdtype(self): + (data, a, controlmask) = self.data + # View globally + test = a.view((float, 2)) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, data) + assert_equal(test.mask, controlmask.reshape(-1, 2)) + # View on 1 masked element + test = a[0].view((float, 2)) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, data[0]) + assert_equal(test.mask, (1, 0)) + # View on 1 unmasked element + test = a[-1].view((float, 2)) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, data[-1]) + + def test_view_to_dtype_and_type(self): + (data, a, controlmask) = self.data + + test = a.view((float, 2), np.recarray) + assert_equal(test, data) + assert_(isinstance(test, np.recarray)) + assert_(not isinstance(test, MaskedArray)) + + +class TestOptionalArgs(object): + def test_ndarrayfuncs(self): + # test axis arg behaves the same as ndarray (including multiple axes) + + d = np.arange(24.0).reshape((2,3,4)) + m = np.zeros(24, dtype=bool).reshape((2,3,4)) + # mask out last element of last dimension + m[:,:,-1] = True + a = np.ma.array(d, mask=m) + + def testaxis(f, a, d): + numpy_f = numpy.__getattribute__(f) + ma_f = np.ma.__getattribute__(f) + + # test axis arg + assert_equal(ma_f(a, axis=1)[...,:-1], numpy_f(d[...,:-1], axis=1)) + assert_equal(ma_f(a, axis=(0,1))[...,:-1], + numpy_f(d[...,:-1], axis=(0,1))) + + def testkeepdims(f, a, d): + numpy_f = numpy.__getattribute__(f) + ma_f = np.ma.__getattribute__(f) + + # test keepdims arg + assert_equal(ma_f(a, keepdims=True).shape, + numpy_f(d, keepdims=True).shape) + assert_equal(ma_f(a, keepdims=False).shape, + numpy_f(d, keepdims=False).shape) + + # test both at once + assert_equal(ma_f(a, axis=1, keepdims=True)[...,:-1], + numpy_f(d[...,:-1], axis=1, keepdims=True)) + assert_equal(ma_f(a, axis=(0,1), keepdims=True)[...,:-1], + numpy_f(d[...,:-1], axis=(0,1), keepdims=True)) + + for f in ['sum', 'prod', 'mean', 'var', 'std']: + testaxis(f, a, d) + testkeepdims(f, a, d) + + for f in ['min', 'max']: + testaxis(f, a, d) + + d = (np.arange(24).reshape((2,3,4))%2 == 0) + a = np.ma.array(d, mask=m) + for f in ['all', 'any']: + testaxis(f, a, d) + testkeepdims(f, a, d) + + def test_count(self): + # test np.ma.count specially + + d = np.arange(24.0).reshape((2,3,4)) + m = np.zeros(24, dtype=bool).reshape((2,3,4)) + m[:,0,:] = True + a = np.ma.array(d, mask=m) + + assert_equal(count(a), 16) + assert_equal(count(a, axis=1), 2*ones((2,4))) + assert_equal(count(a, axis=(0,1)), 4*ones((4,))) + assert_equal(count(a, keepdims=True), 16*ones((1,1,1))) + assert_equal(count(a, axis=1, keepdims=True), 2*ones((2,1,4))) + assert_equal(count(a, axis=(0,1), keepdims=True), 4*ones((1,1,4))) + assert_equal(count(a, axis=-2), 2*ones((2,4))) + assert_raises(ValueError, count, a, axis=(1,1)) + assert_raises(np.AxisError, count, a, axis=3) + + # check the 'nomask' path + a = np.ma.array(d, mask=nomask) + + assert_equal(count(a), 24) + assert_equal(count(a, axis=1), 3*ones((2,4))) + assert_equal(count(a, axis=(0,1)), 6*ones((4,))) + assert_equal(count(a, keepdims=True), 24*ones((1,1,1))) + assert_equal(np.ndim(count(a, keepdims=True)), 3) + assert_equal(count(a, axis=1, keepdims=True), 3*ones((2,1,4))) + assert_equal(count(a, axis=(0,1), keepdims=True), 6*ones((1,1,4))) + assert_equal(count(a, axis=-2), 3*ones((2,4))) + assert_raises(ValueError, count, a, axis=(1,1)) + assert_raises(np.AxisError, count, a, axis=3) + + # check the 'masked' singleton + assert_equal(count(np.ma.masked), 0) + + # check 0-d arrays do not allow axis > 0 + assert_raises(np.AxisError, count, np.ma.array(1), axis=1) + + +class TestMaskedConstant(object): + def _do_add_test(self, add): + # sanity check + assert_(add(np.ma.masked, 1) is np.ma.masked) + + # now try with a vector + vector = np.array([1, 2, 3]) + result = add(np.ma.masked, vector) + + # lots of things could go wrong here + assert_(result is not np.ma.masked) + assert_(not isinstance(result, np.ma.core.MaskedConstant)) + assert_equal(result.shape, vector.shape) + assert_equal(np.ma.getmask(result), np.ones(vector.shape, dtype=bool)) + + def test_ufunc(self): + self._do_add_test(np.add) + + def test_operator(self): + self._do_add_test(lambda a, b: a + b) + + def test_ctor(self): + m = np.ma.array(np.ma.masked) + + # most importantly, we do not want to create a new MaskedConstant + # instance + assert_(not isinstance(m, np.ma.core.MaskedConstant)) + assert_(m is not np.ma.masked) + + def test_repr(self): + # copies should not exist, but if they do, it should be obvious that + # something is wrong + assert_equal(repr(np.ma.masked), 'masked') + + # create a new instance in a weird way + masked2 = np.ma.MaskedArray.__new__(np.ma.core.MaskedConstant) + assert_not_equal(repr(masked2), 'masked') + + def test_pickle(self): + from io import BytesIO + + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + with BytesIO() as f: + pickle.dump(np.ma.masked, f, protocol=proto) + f.seek(0) + res = pickle.load(f) + assert_(res is np.ma.masked) + + def test_copy(self): + # gh-9328 + # copy is a no-op, like it is with np.True_ + assert_equal( + np.ma.masked.copy() is np.ma.masked, + np.True_.copy() is np.True_) + + def test__copy(self): + import copy + assert_( + copy.copy(np.ma.masked) is np.ma.masked) + + def test_deepcopy(self): + import copy + assert_( + copy.deepcopy(np.ma.masked) is np.ma.masked) + + def test_immutable(self): + orig = np.ma.masked + assert_raises(np.ma.core.MaskError, operator.setitem, orig, (), 1) + assert_raises(ValueError,operator.setitem, orig.data, (), 1) + assert_raises(ValueError, operator.setitem, orig.mask, (), False) + + view = np.ma.masked.view(np.ma.MaskedArray) + assert_raises(ValueError, operator.setitem, view, (), 1) + assert_raises(ValueError, operator.setitem, view.data, (), 1) + assert_raises(ValueError, operator.setitem, view.mask, (), False) + + def test_coercion_int(self): + a_i = np.zeros((), int) + assert_raises(MaskError, operator.setitem, a_i, (), np.ma.masked) + assert_raises(MaskError, int, np.ma.masked) + + @pytest.mark.skipif(sys.version_info.major == 3, + reason="long doesn't exist in Python 3") + def test_coercion_long(self): + assert_raises(MaskError, long, np.ma.masked) + + def test_coercion_float(self): + a_f = np.zeros((), float) + assert_warns(UserWarning, operator.setitem, a_f, (), np.ma.masked) + assert_(np.isnan(a_f[()])) + + @pytest.mark.xfail(reason="See gh-9750") + def test_coercion_unicode(self): + a_u = np.zeros((), 'U10') + a_u[()] = np.ma.masked + assert_equal(a_u[()], u'--') + + @pytest.mark.xfail(reason="See gh-9750") + def test_coercion_bytes(self): + a_b = np.zeros((), 'S10') + a_b[()] = np.ma.masked + assert_equal(a_b[()], b'--') + + def test_subclass(self): + # https://github.com/astropy/astropy/issues/6645 + class Sub(type(np.ma.masked)): pass + + a = Sub() + assert_(a is Sub()) + assert_(a is not np.ma.masked) + assert_not_equal(repr(a), 'masked') + + def test_attributes_readonly(self): + assert_raises(AttributeError, setattr, np.ma.masked, 'shape', (1,)) + assert_raises(AttributeError, setattr, np.ma.masked, 'dtype', np.int64) + + +class TestMaskedWhereAliases(object): + + # TODO: Test masked_object, masked_equal, ... + + def test_masked_values(self): + res = masked_values(np.array([-32768.0]), np.int16(-32768)) + assert_equal(res.mask, [True]) + + res = masked_values(np.inf, np.inf) + assert_equal(res.mask, True) + + res = np.ma.masked_values(np.inf, -np.inf) + assert_equal(res.mask, False) + + res = np.ma.masked_values([1, 2, 3, 4], 5, shrink=True) + assert_(res.mask is np.ma.nomask) + + res = np.ma.masked_values([1, 2, 3, 4], 5, shrink=False) + assert_equal(res.mask, [False] * 4) + + +def test_masked_array(): + a = np.ma.array([0, 1, 2, 3], mask=[0, 0, 1, 0]) + assert_equal(np.argwhere(a), [[1], [3]]) + +def test_append_masked_array(): + a = np.ma.masked_equal([1,2,3], value=2) + b = np.ma.masked_equal([4,3,2], value=2) + + result = np.ma.append(a, b) + expected_data = [1, 2, 3, 4, 3, 2] + expected_mask = [False, True, False, False, False, True] + assert_array_equal(result.data, expected_data) + assert_array_equal(result.mask, expected_mask) + + a = np.ma.masked_all((2,2)) + b = np.ma.ones((3,1)) + + result = np.ma.append(a, b) + expected_data = [1] * 3 + expected_mask = [True] * 4 + [False] * 3 + assert_array_equal(result.data[-3], expected_data) + assert_array_equal(result.mask, expected_mask) + + result = np.ma.append(a, b, axis=None) + assert_array_equal(result.data[-3], expected_data) + assert_array_equal(result.mask, expected_mask) + + +def test_append_masked_array_along_axis(): + a = np.ma.masked_equal([1,2,3], value=2) + b = np.ma.masked_values([[4, 5, 6], [7, 8, 9]], 7) + + # When `axis` is specified, `values` must have the correct shape. + assert_raises(ValueError, np.ma.append, a, b, axis=0) + + result = np.ma.append(a[np.newaxis,:], b, axis=0) + expected = np.ma.arange(1, 10) + expected[[1, 6]] = np.ma.masked + expected = expected.reshape((3,3)) + assert_array_equal(result.data, expected.data) + assert_array_equal(result.mask, expected.mask) + + +def test_default_fill_value_complex(): + # regression test for Python 3, where 'unicode' was not defined + assert_(default_fill_value(1 + 1j) == 1.e20 + 0.0j) + + +def test_ufunc_with_output(): + # check that giving an output argument always returns that output. + # Regression test for gh-8416. + x = array([1., 2., 3.], mask=[0, 0, 1]) + y = np.add(x, 1., out=x) + assert_(y is x) + + +def test_ufunc_with_out_varied(): + """ Test that masked arrays are immune to gh-10459 """ + # the mask of the output should not affect the result, however it is passed + a = array([ 1, 2, 3], mask=[1, 0, 0]) + b = array([10, 20, 30], mask=[1, 0, 0]) + out = array([ 0, 0, 0], mask=[0, 0, 1]) + expected = array([11, 22, 33], mask=[1, 0, 0]) + + out_pos = out.copy() + res_pos = np.add(a, b, out_pos) + + out_kw = out.copy() + res_kw = np.add(a, b, out=out_kw) + + out_tup = out.copy() + res_tup = np.add(a, b, out=(out_tup,)) + + assert_equal(res_kw.mask, expected.mask) + assert_equal(res_kw.data, expected.data) + assert_equal(res_tup.mask, expected.mask) + assert_equal(res_tup.data, expected.data) + assert_equal(res_pos.mask, expected.mask) + assert_equal(res_pos.data, expected.data) + + +def test_astype_mask_ordering(): + descr = [('v', int, 3), ('x', [('y', float)])] + x = array([ + [([1, 2, 3], (1.0,)), ([1, 2, 3], (2.0,))], + [([1, 2, 3], (3.0,)), ([1, 2, 3], (4.0,))]], dtype=descr) + x[0]['v'][0] = np.ma.masked + + x_a = x.astype(descr) + assert x_a.dtype.names == np.dtype(descr).names + assert x_a.mask.dtype.names == np.dtype(descr).names + assert_equal(x, x_a) + + assert_(x is x.astype(x.dtype, copy=False)) + assert_equal(type(x.astype(x.dtype, subok=False)), np.ndarray) + + x_f = x.astype(x.dtype, order='F') + assert_(x_f.flags.f_contiguous) + assert_(x_f.mask.flags.f_contiguous) + + # Also test the same indirectly, via np.array + x_a2 = np.array(x, dtype=descr, subok=True) + assert x_a2.dtype.names == np.dtype(descr).names + assert x_a2.mask.dtype.names == np.dtype(descr).names + assert_equal(x, x_a2) + + assert_(x is np.array(x, dtype=descr, copy=False, subok=True)) + + x_f2 = np.array(x, dtype=x.dtype, order='F', subok=True) + assert_(x_f2.flags.f_contiguous) + assert_(x_f2.mask.flags.f_contiguous) + + +@pytest.mark.parametrize('dt1', num_dts, ids=num_ids) +@pytest.mark.parametrize('dt2', num_dts, ids=num_ids) +@pytest.mark.filterwarnings('ignore::numpy.ComplexWarning') +def test_astype_basic(dt1, dt2): + # See gh-12070 + src = np.ma.array(ones(3, dt1), fill_value=1) + dst = src.astype(dt2) + + assert_(src.fill_value == 1) + assert_(src.dtype == dt1) + assert_(src.fill_value.dtype == dt1) + + assert_(dst.fill_value == 1) + assert_(dst.dtype == dt2) + assert_(dst.fill_value.dtype == dt2) + + assert_equal(src, dst) + + +def test_fieldless_void(): + dt = np.dtype([]) # a void dtype with no fields + x = np.empty(4, dt) + + # these arrays contain no values, so there's little to test - but this + # shouldn't crash + mx = np.ma.array(x) + assert_equal(mx.dtype, x.dtype) + assert_equal(mx.shape, x.shape) + + mx = np.ma.array(x, mask=x) + assert_equal(mx.dtype, x.dtype) + assert_equal(mx.shape, x.shape) diff --git a/contrib/python/numpy/py2/numpy/ma/tests/test_deprecations.py b/contrib/python/numpy/py2/numpy/ma/tests/test_deprecations.py new file mode 100644 index 0000000000..72cc29aa04 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/ma/tests/test_deprecations.py @@ -0,0 +1,70 @@ +"""Test deprecation and future warnings. + +""" +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import assert_warns +from numpy.ma.testutils import assert_equal +from numpy.ma.core import MaskedArrayFutureWarning + +class TestArgsort(object): + """ gh-8701 """ + def _test_base(self, argsort, cls): + arr_0d = np.array(1).view(cls) + argsort(arr_0d) + + arr_1d = np.array([1, 2, 3]).view(cls) + argsort(arr_1d) + + # argsort has a bad default for >1d arrays + arr_2d = np.array([[1, 2], [3, 4]]).view(cls) + result = assert_warns( + np.ma.core.MaskedArrayFutureWarning, argsort, arr_2d) + assert_equal(result, argsort(arr_2d, axis=None)) + + # should be no warnings for explicitly specifying it + argsort(arr_2d, axis=None) + argsort(arr_2d, axis=-1) + + def test_function_ndarray(self): + return self._test_base(np.ma.argsort, np.ndarray) + + def test_function_maskedarray(self): + return self._test_base(np.ma.argsort, np.ma.MaskedArray) + + def test_method(self): + return self._test_base(np.ma.MaskedArray.argsort, np.ma.MaskedArray) + + +class TestMinimumMaximum(object): + def test_minimum(self): + assert_warns(DeprecationWarning, np.ma.minimum, np.ma.array([1, 2])) + + def test_maximum(self): + assert_warns(DeprecationWarning, np.ma.maximum, np.ma.array([1, 2])) + + def test_axis_default(self): + # NumPy 1.13, 2017-05-06 + + data1d = np.ma.arange(6) + data2d = data1d.reshape(2, 3) + + ma_min = np.ma.minimum.reduce + ma_max = np.ma.maximum.reduce + + # check that the default axis is still None, but warns on 2d arrays + result = assert_warns(MaskedArrayFutureWarning, ma_max, data2d) + assert_equal(result, ma_max(data2d, axis=None)) + + result = assert_warns(MaskedArrayFutureWarning, ma_min, data2d) + assert_equal(result, ma_min(data2d, axis=None)) + + # no warnings on 1d, as both new and old defaults are equivalent + result = ma_min(data1d) + assert_equal(result, ma_min(data1d, axis=None)) + assert_equal(result, ma_min(data1d, axis=0)) + + result = ma_max(data1d) + assert_equal(result, ma_max(data1d, axis=None)) + assert_equal(result, ma_max(data1d, axis=0)) diff --git a/contrib/python/numpy/py2/numpy/ma/tests/test_extras.py b/contrib/python/numpy/py2/numpy/ma/tests/test_extras.py new file mode 100644 index 0000000000..5243cf714a --- /dev/null +++ b/contrib/python/numpy/py2/numpy/ma/tests/test_extras.py @@ -0,0 +1,1678 @@ +# pylint: disable-msg=W0611, W0612, W0511 +"""Tests suite for MaskedArray. +Adapted from the original test_ma by Pierre Gerard-Marchant + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id: test_extras.py 3473 2007-10-29 15:18:13Z jarrod.millman $ + +""" +from __future__ import division, absolute_import, print_function + +import warnings +import itertools + +import numpy as np +from numpy.testing import ( + assert_warns, suppress_warnings + ) +from numpy.ma.testutils import ( + assert_, assert_array_equal, assert_equal, assert_almost_equal + ) +from numpy.ma.core import ( + array, arange, masked, MaskedArray, masked_array, getmaskarray, shape, + nomask, ones, zeros, count + ) +from numpy.ma.extras import ( + atleast_1d, atleast_2d, atleast_3d, mr_, dot, polyfit, cov, corrcoef, + median, average, unique, setxor1d, setdiff1d, union1d, intersect1d, in1d, + ediff1d, apply_over_axes, apply_along_axis, compress_nd, compress_rowcols, + mask_rowcols, clump_masked, clump_unmasked, flatnotmasked_contiguous, + notmasked_contiguous, notmasked_edges, masked_all, masked_all_like, isin, + diagflat, stack, vstack + ) + + +class TestGeneric(object): + # + def test_masked_all(self): + # Tests masked_all + # Standard dtype + test = masked_all((2,), dtype=float) + control = array([1, 1], mask=[1, 1], dtype=float) + assert_equal(test, control) + # Flexible dtype + dt = np.dtype({'names': ['a', 'b'], 'formats': ['f', 'f']}) + test = masked_all((2,), dtype=dt) + control = array([(0, 0), (0, 0)], mask=[(1, 1), (1, 1)], dtype=dt) + assert_equal(test, control) + test = masked_all((2, 2), dtype=dt) + control = array([[(0, 0), (0, 0)], [(0, 0), (0, 0)]], + mask=[[(1, 1), (1, 1)], [(1, 1), (1, 1)]], + dtype=dt) + assert_equal(test, control) + # Nested dtype + dt = np.dtype([('a', 'f'), ('b', [('ba', 'f'), ('bb', 'f')])]) + test = masked_all((2,), dtype=dt) + control = array([(1, (1, 1)), (1, (1, 1))], + mask=[(1, (1, 1)), (1, (1, 1))], dtype=dt) + assert_equal(test, control) + test = masked_all((2,), dtype=dt) + control = array([(1, (1, 1)), (1, (1, 1))], + mask=[(1, (1, 1)), (1, (1, 1))], dtype=dt) + assert_equal(test, control) + test = masked_all((1, 1), dtype=dt) + control = array([[(1, (1, 1))]], mask=[[(1, (1, 1))]], dtype=dt) + assert_equal(test, control) + + def test_masked_all_like(self): + # Tests masked_all + # Standard dtype + base = array([1, 2], dtype=float) + test = masked_all_like(base) + control = array([1, 1], mask=[1, 1], dtype=float) + assert_equal(test, control) + # Flexible dtype + dt = np.dtype({'names': ['a', 'b'], 'formats': ['f', 'f']}) + base = array([(0, 0), (0, 0)], mask=[(1, 1), (1, 1)], dtype=dt) + test = masked_all_like(base) + control = array([(10, 10), (10, 10)], mask=[(1, 1), (1, 1)], dtype=dt) + assert_equal(test, control) + # Nested dtype + dt = np.dtype([('a', 'f'), ('b', [('ba', 'f'), ('bb', 'f')])]) + control = array([(1, (1, 1)), (1, (1, 1))], + mask=[(1, (1, 1)), (1, (1, 1))], dtype=dt) + test = masked_all_like(control) + assert_equal(test, control) + + def check_clump(self, f): + for i in range(1, 7): + for j in range(2**i): + k = np.arange(i, dtype=int) + ja = np.full(i, j, dtype=int) + a = masked_array(2**k) + a.mask = (ja & (2**k)) != 0 + s = 0 + for sl in f(a): + s += a.data[sl].sum() + if f == clump_unmasked: + assert_equal(a.compressed().sum(), s) + else: + a.mask = ~a.mask + assert_equal(a.compressed().sum(), s) + + def test_clump_masked(self): + # Test clump_masked + a = masked_array(np.arange(10)) + a[[0, 1, 2, 6, 8, 9]] = masked + # + test = clump_masked(a) + control = [slice(0, 3), slice(6, 7), slice(8, 10)] + assert_equal(test, control) + + self.check_clump(clump_masked) + + def test_clump_unmasked(self): + # Test clump_unmasked + a = masked_array(np.arange(10)) + a[[0, 1, 2, 6, 8, 9]] = masked + test = clump_unmasked(a) + control = [slice(3, 6), slice(7, 8), ] + assert_equal(test, control) + + self.check_clump(clump_unmasked) + + def test_flatnotmasked_contiguous(self): + # Test flatnotmasked_contiguous + a = arange(10) + # No mask + test = flatnotmasked_contiguous(a) + assert_equal(test, [slice(0, a.size)]) + # mask of all false + a.mask = np.zeros(10, dtype=bool) + assert_equal(test, [slice(0, a.size)]) + # Some mask + a[(a < 3) | (a > 8) | (a == 5)] = masked + test = flatnotmasked_contiguous(a) + assert_equal(test, [slice(3, 5), slice(6, 9)]) + # + a[:] = masked + test = flatnotmasked_contiguous(a) + assert_equal(test, []) + + +class TestAverage(object): + # Several tests of average. Why so many ? Good point... + def test_testAverage1(self): + # Test of average. + ott = array([0., 1., 2., 3.], mask=[True, False, False, False]) + assert_equal(2.0, average(ott, axis=0)) + assert_equal(2.0, average(ott, weights=[1., 1., 2., 1.])) + result, wts = average(ott, weights=[1., 1., 2., 1.], returned=1) + assert_equal(2.0, result) + assert_(wts == 4.0) + ott[:] = masked + assert_equal(average(ott, axis=0).mask, [True]) + ott = array([0., 1., 2., 3.], mask=[True, False, False, False]) + ott = ott.reshape(2, 2) + ott[:, 1] = masked + assert_equal(average(ott, axis=0), [2.0, 0.0]) + assert_equal(average(ott, axis=1).mask[0], [True]) + assert_equal([2., 0.], average(ott, axis=0)) + result, wts = average(ott, axis=0, returned=1) + assert_equal(wts, [1., 0.]) + + def test_testAverage2(self): + # More tests of average. + w1 = [0, 1, 1, 1, 1, 0] + w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]] + x = arange(6, dtype=np.float_) + assert_equal(average(x, axis=0), 2.5) + assert_equal(average(x, axis=0, weights=w1), 2.5) + y = array([arange(6, dtype=np.float_), 2.0 * arange(6)]) + assert_equal(average(y, None), np.add.reduce(np.arange(6)) * 3. / 12.) + assert_equal(average(y, axis=0), np.arange(6) * 3. / 2.) + assert_equal(average(y, axis=1), + [average(x, axis=0), average(x, axis=0) * 2.0]) + assert_equal(average(y, None, weights=w2), 20. / 6.) + assert_equal(average(y, axis=0, weights=w2), + [0., 1., 2., 3., 4., 10.]) + assert_equal(average(y, axis=1), + [average(x, axis=0), average(x, axis=0) * 2.0]) + m1 = zeros(6) + m2 = [0, 0, 1, 1, 0, 0] + m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]] + m4 = ones(6) + m5 = [0, 1, 1, 1, 1, 1] + assert_equal(average(masked_array(x, m1), axis=0), 2.5) + assert_equal(average(masked_array(x, m2), axis=0), 2.5) + assert_equal(average(masked_array(x, m4), axis=0).mask, [True]) + assert_equal(average(masked_array(x, m5), axis=0), 0.0) + assert_equal(count(average(masked_array(x, m4), axis=0)), 0) + z = masked_array(y, m3) + assert_equal(average(z, None), 20. / 6.) + assert_equal(average(z, axis=0), [0., 1., 99., 99., 4.0, 7.5]) + assert_equal(average(z, axis=1), [2.5, 5.0]) + assert_equal(average(z, axis=0, weights=w2), + [0., 1., 99., 99., 4.0, 10.0]) + + def test_testAverage3(self): + # Yet more tests of average! + a = arange(6) + b = arange(6) * 3 + r1, w1 = average([[a, b], [b, a]], axis=1, returned=1) + assert_equal(shape(r1), shape(w1)) + assert_equal(r1.shape, w1.shape) + r2, w2 = average(ones((2, 2, 3)), axis=0, weights=[3, 1], returned=1) + assert_equal(shape(w2), shape(r2)) + r2, w2 = average(ones((2, 2, 3)), returned=1) + assert_equal(shape(w2), shape(r2)) + r2, w2 = average(ones((2, 2, 3)), weights=ones((2, 2, 3)), returned=1) + assert_equal(shape(w2), shape(r2)) + a2d = array([[1, 2], [0, 4]], float) + a2dm = masked_array(a2d, [[False, False], [True, False]]) + a2da = average(a2d, axis=0) + assert_equal(a2da, [0.5, 3.0]) + a2dma = average(a2dm, axis=0) + assert_equal(a2dma, [1.0, 3.0]) + a2dma = average(a2dm, axis=None) + assert_equal(a2dma, 7. / 3.) + a2dma = average(a2dm, axis=1) + assert_equal(a2dma, [1.5, 4.0]) + + def test_onintegers_with_mask(self): + # Test average on integers with mask + a = average(array([1, 2])) + assert_equal(a, 1.5) + a = average(array([1, 2, 3, 4], mask=[False, False, True, True])) + assert_equal(a, 1.5) + + def test_complex(self): + # Test with complex data. + # (Regression test for https://github.com/numpy/numpy/issues/2684) + mask = np.array([[0, 0, 0, 1, 0], + [0, 1, 0, 0, 0]], dtype=bool) + a = masked_array([[0, 1+2j, 3+4j, 5+6j, 7+8j], + [9j, 0+1j, 2+3j, 4+5j, 7+7j]], + mask=mask) + + av = average(a) + expected = np.average(a.compressed()) + assert_almost_equal(av.real, expected.real) + assert_almost_equal(av.imag, expected.imag) + + av0 = average(a, axis=0) + expected0 = average(a.real, axis=0) + average(a.imag, axis=0)*1j + assert_almost_equal(av0.real, expected0.real) + assert_almost_equal(av0.imag, expected0.imag) + + av1 = average(a, axis=1) + expected1 = average(a.real, axis=1) + average(a.imag, axis=1)*1j + assert_almost_equal(av1.real, expected1.real) + assert_almost_equal(av1.imag, expected1.imag) + + # Test with the 'weights' argument. + wts = np.array([[0.5, 1.0, 2.0, 1.0, 0.5], + [1.0, 1.0, 1.0, 1.0, 1.0]]) + wav = average(a, weights=wts) + expected = np.average(a.compressed(), weights=wts[~mask]) + assert_almost_equal(wav.real, expected.real) + assert_almost_equal(wav.imag, expected.imag) + + wav0 = average(a, weights=wts, axis=0) + expected0 = (average(a.real, weights=wts, axis=0) + + average(a.imag, weights=wts, axis=0)*1j) + assert_almost_equal(wav0.real, expected0.real) + assert_almost_equal(wav0.imag, expected0.imag) + + wav1 = average(a, weights=wts, axis=1) + expected1 = (average(a.real, weights=wts, axis=1) + + average(a.imag, weights=wts, axis=1)*1j) + assert_almost_equal(wav1.real, expected1.real) + assert_almost_equal(wav1.imag, expected1.imag) + + +class TestConcatenator(object): + # Tests for mr_, the equivalent of r_ for masked arrays. + + def test_1d(self): + # Tests mr_ on 1D arrays. + assert_array_equal(mr_[1, 2, 3, 4, 5, 6], array([1, 2, 3, 4, 5, 6])) + b = ones(5) + m = [1, 0, 0, 0, 0] + d = masked_array(b, mask=m) + c = mr_[d, 0, 0, d] + assert_(isinstance(c, MaskedArray)) + assert_array_equal(c, [1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1]) + assert_array_equal(c.mask, mr_[m, 0, 0, m]) + + def test_2d(self): + # Tests mr_ on 2D arrays. + a_1 = np.random.rand(5, 5) + a_2 = np.random.rand(5, 5) + m_1 = np.round_(np.random.rand(5, 5), 0) + m_2 = np.round_(np.random.rand(5, 5), 0) + b_1 = masked_array(a_1, mask=m_1) + b_2 = masked_array(a_2, mask=m_2) + # append columns + d = mr_['1', b_1, b_2] + assert_(d.shape == (5, 10)) + assert_array_equal(d[:, :5], b_1) + assert_array_equal(d[:, 5:], b_2) + assert_array_equal(d.mask, np.r_['1', m_1, m_2]) + d = mr_[b_1, b_2] + assert_(d.shape == (10, 5)) + assert_array_equal(d[:5,:], b_1) + assert_array_equal(d[5:,:], b_2) + assert_array_equal(d.mask, np.r_[m_1, m_2]) + + def test_masked_constant(self): + actual = mr_[np.ma.masked, 1] + assert_equal(actual.mask, [True, False]) + assert_equal(actual.data[1], 1) + + actual = mr_[[1, 2], np.ma.masked] + assert_equal(actual.mask, [False, False, True]) + assert_equal(actual.data[:2], [1, 2]) + + +class TestNotMasked(object): + # Tests notmasked_edges and notmasked_contiguous. + + def test_edges(self): + # Tests unmasked_edges + data = masked_array(np.arange(25).reshape(5, 5), + mask=[[0, 0, 1, 0, 0], + [0, 0, 0, 1, 1], + [1, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [1, 1, 1, 0, 0]],) + test = notmasked_edges(data, None) + assert_equal(test, [0, 24]) + test = notmasked_edges(data, 0) + assert_equal(test[0], [(0, 0, 1, 0, 0), (0, 1, 2, 3, 4)]) + assert_equal(test[1], [(3, 3, 3, 4, 4), (0, 1, 2, 3, 4)]) + test = notmasked_edges(data, 1) + assert_equal(test[0], [(0, 1, 2, 3, 4), (0, 0, 2, 0, 3)]) + assert_equal(test[1], [(0, 1, 2, 3, 4), (4, 2, 4, 4, 4)]) + # + test = notmasked_edges(data.data, None) + assert_equal(test, [0, 24]) + test = notmasked_edges(data.data, 0) + assert_equal(test[0], [(0, 0, 0, 0, 0), (0, 1, 2, 3, 4)]) + assert_equal(test[1], [(4, 4, 4, 4, 4), (0, 1, 2, 3, 4)]) + test = notmasked_edges(data.data, -1) + assert_equal(test[0], [(0, 1, 2, 3, 4), (0, 0, 0, 0, 0)]) + assert_equal(test[1], [(0, 1, 2, 3, 4), (4, 4, 4, 4, 4)]) + # + data[-2] = masked + test = notmasked_edges(data, 0) + assert_equal(test[0], [(0, 0, 1, 0, 0), (0, 1, 2, 3, 4)]) + assert_equal(test[1], [(1, 1, 2, 4, 4), (0, 1, 2, 3, 4)]) + test = notmasked_edges(data, -1) + assert_equal(test[0], [(0, 1, 2, 4), (0, 0, 2, 3)]) + assert_equal(test[1], [(0, 1, 2, 4), (4, 2, 4, 4)]) + + def test_contiguous(self): + # Tests notmasked_contiguous + a = masked_array(np.arange(24).reshape(3, 8), + mask=[[0, 0, 0, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 1, 0]]) + tmp = notmasked_contiguous(a, None) + assert_equal(tmp, [ + slice(0, 4, None), + slice(16, 22, None), + slice(23, 24, None) + ]) + + tmp = notmasked_contiguous(a, 0) + assert_equal(tmp, [ + [slice(0, 1, None), slice(2, 3, None)], + [slice(0, 1, None), slice(2, 3, None)], + [slice(0, 1, None), slice(2, 3, None)], + [slice(0, 1, None), slice(2, 3, None)], + [slice(2, 3, None)], + [slice(2, 3, None)], + [], + [slice(2, 3, None)] + ]) + # + tmp = notmasked_contiguous(a, 1) + assert_equal(tmp, [ + [slice(0, 4, None)], + [], + [slice(0, 6, None), slice(7, 8, None)] + ]) + + +class TestCompressFunctions(object): + + def test_compress_nd(self): + # Tests compress_nd + x = np.array(list(range(3*4*5))).reshape(3, 4, 5) + m = np.zeros((3,4,5)).astype(bool) + m[1,1,1] = True + x = array(x, mask=m) + + # axis=None + a = compress_nd(x) + assert_equal(a, [[[ 0, 2, 3, 4], + [10, 12, 13, 14], + [15, 17, 18, 19]], + [[40, 42, 43, 44], + [50, 52, 53, 54], + [55, 57, 58, 59]]]) + + # axis=0 + a = compress_nd(x, 0) + assert_equal(a, [[[ 0, 1, 2, 3, 4], + [ 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19]], + [[40, 41, 42, 43, 44], + [45, 46, 47, 48, 49], + [50, 51, 52, 53, 54], + [55, 56, 57, 58, 59]]]) + + # axis=1 + a = compress_nd(x, 1) + assert_equal(a, [[[ 0, 1, 2, 3, 4], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19]], + [[20, 21, 22, 23, 24], + [30, 31, 32, 33, 34], + [35, 36, 37, 38, 39]], + [[40, 41, 42, 43, 44], + [50, 51, 52, 53, 54], + [55, 56, 57, 58, 59]]]) + + a2 = compress_nd(x, (1,)) + a3 = compress_nd(x, -2) + a4 = compress_nd(x, (-2,)) + assert_equal(a, a2) + assert_equal(a, a3) + assert_equal(a, a4) + + # axis=2 + a = compress_nd(x, 2) + assert_equal(a, [[[ 0, 2, 3, 4], + [ 5, 7, 8, 9], + [10, 12, 13, 14], + [15, 17, 18, 19]], + [[20, 22, 23, 24], + [25, 27, 28, 29], + [30, 32, 33, 34], + [35, 37, 38, 39]], + [[40, 42, 43, 44], + [45, 47, 48, 49], + [50, 52, 53, 54], + [55, 57, 58, 59]]]) + + a2 = compress_nd(x, (2,)) + a3 = compress_nd(x, -1) + a4 = compress_nd(x, (-1,)) + assert_equal(a, a2) + assert_equal(a, a3) + assert_equal(a, a4) + + # axis=(0, 1) + a = compress_nd(x, (0, 1)) + assert_equal(a, [[[ 0, 1, 2, 3, 4], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19]], + [[40, 41, 42, 43, 44], + [50, 51, 52, 53, 54], + [55, 56, 57, 58, 59]]]) + a2 = compress_nd(x, (0, -2)) + assert_equal(a, a2) + + # axis=(1, 2) + a = compress_nd(x, (1, 2)) + assert_equal(a, [[[ 0, 2, 3, 4], + [10, 12, 13, 14], + [15, 17, 18, 19]], + [[20, 22, 23, 24], + [30, 32, 33, 34], + [35, 37, 38, 39]], + [[40, 42, 43, 44], + [50, 52, 53, 54], + [55, 57, 58, 59]]]) + + a2 = compress_nd(x, (-2, 2)) + a3 = compress_nd(x, (1, -1)) + a4 = compress_nd(x, (-2, -1)) + assert_equal(a, a2) + assert_equal(a, a3) + assert_equal(a, a4) + + # axis=(0, 2) + a = compress_nd(x, (0, 2)) + assert_equal(a, [[[ 0, 2, 3, 4], + [ 5, 7, 8, 9], + [10, 12, 13, 14], + [15, 17, 18, 19]], + [[40, 42, 43, 44], + [45, 47, 48, 49], + [50, 52, 53, 54], + [55, 57, 58, 59]]]) + + a2 = compress_nd(x, (0, -1)) + assert_equal(a, a2) + + def test_compress_rowcols(self): + # Tests compress_rowcols + x = array(np.arange(9).reshape(3, 3), + mask=[[1, 0, 0], [0, 0, 0], [0, 0, 0]]) + assert_equal(compress_rowcols(x), [[4, 5], [7, 8]]) + assert_equal(compress_rowcols(x, 0), [[3, 4, 5], [6, 7, 8]]) + assert_equal(compress_rowcols(x, 1), [[1, 2], [4, 5], [7, 8]]) + x = array(x._data, mask=[[0, 0, 0], [0, 1, 0], [0, 0, 0]]) + assert_equal(compress_rowcols(x), [[0, 2], [6, 8]]) + assert_equal(compress_rowcols(x, 0), [[0, 1, 2], [6, 7, 8]]) + assert_equal(compress_rowcols(x, 1), [[0, 2], [3, 5], [6, 8]]) + x = array(x._data, mask=[[1, 0, 0], [0, 1, 0], [0, 0, 0]]) + assert_equal(compress_rowcols(x), [[8]]) + assert_equal(compress_rowcols(x, 0), [[6, 7, 8]]) + assert_equal(compress_rowcols(x, 1,), [[2], [5], [8]]) + x = array(x._data, mask=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + assert_equal(compress_rowcols(x).size, 0) + assert_equal(compress_rowcols(x, 0).size, 0) + assert_equal(compress_rowcols(x, 1).size, 0) + + def test_mask_rowcols(self): + # Tests mask_rowcols. + x = array(np.arange(9).reshape(3, 3), + mask=[[1, 0, 0], [0, 0, 0], [0, 0, 0]]) + assert_equal(mask_rowcols(x).mask, + [[1, 1, 1], [1, 0, 0], [1, 0, 0]]) + assert_equal(mask_rowcols(x, 0).mask, + [[1, 1, 1], [0, 0, 0], [0, 0, 0]]) + assert_equal(mask_rowcols(x, 1).mask, + [[1, 0, 0], [1, 0, 0], [1, 0, 0]]) + x = array(x._data, mask=[[0, 0, 0], [0, 1, 0], [0, 0, 0]]) + assert_equal(mask_rowcols(x).mask, + [[0, 1, 0], [1, 1, 1], [0, 1, 0]]) + assert_equal(mask_rowcols(x, 0).mask, + [[0, 0, 0], [1, 1, 1], [0, 0, 0]]) + assert_equal(mask_rowcols(x, 1).mask, + [[0, 1, 0], [0, 1, 0], [0, 1, 0]]) + x = array(x._data, mask=[[1, 0, 0], [0, 1, 0], [0, 0, 0]]) + assert_equal(mask_rowcols(x).mask, + [[1, 1, 1], [1, 1, 1], [1, 1, 0]]) + assert_equal(mask_rowcols(x, 0).mask, + [[1, 1, 1], [1, 1, 1], [0, 0, 0]]) + assert_equal(mask_rowcols(x, 1,).mask, + [[1, 1, 0], [1, 1, 0], [1, 1, 0]]) + x = array(x._data, mask=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + assert_(mask_rowcols(x).all() is masked) + assert_(mask_rowcols(x, 0).all() is masked) + assert_(mask_rowcols(x, 1).all() is masked) + assert_(mask_rowcols(x).mask.all()) + assert_(mask_rowcols(x, 0).mask.all()) + assert_(mask_rowcols(x, 1).mask.all()) + + def test_dot(self): + # Tests dot product + n = np.arange(1, 7) + # + m = [1, 0, 0, 0, 0, 0] + a = masked_array(n, mask=m).reshape(2, 3) + b = masked_array(n, mask=m).reshape(3, 2) + c = dot(a, b, strict=True) + assert_equal(c.mask, [[1, 1], [1, 0]]) + c = dot(b, a, strict=True) + assert_equal(c.mask, [[1, 1, 1], [1, 0, 0], [1, 0, 0]]) + c = dot(a, b, strict=False) + assert_equal(c, np.dot(a.filled(0), b.filled(0))) + c = dot(b, a, strict=False) + assert_equal(c, np.dot(b.filled(0), a.filled(0))) + # + m = [0, 0, 0, 0, 0, 1] + a = masked_array(n, mask=m).reshape(2, 3) + b = masked_array(n, mask=m).reshape(3, 2) + c = dot(a, b, strict=True) + assert_equal(c.mask, [[0, 1], [1, 1]]) + c = dot(b, a, strict=True) + assert_equal(c.mask, [[0, 0, 1], [0, 0, 1], [1, 1, 1]]) + c = dot(a, b, strict=False) + assert_equal(c, np.dot(a.filled(0), b.filled(0))) + assert_equal(c, dot(a, b)) + c = dot(b, a, strict=False) + assert_equal(c, np.dot(b.filled(0), a.filled(0))) + # + m = [0, 0, 0, 0, 0, 0] + a = masked_array(n, mask=m).reshape(2, 3) + b = masked_array(n, mask=m).reshape(3, 2) + c = dot(a, b) + assert_equal(c.mask, nomask) + c = dot(b, a) + assert_equal(c.mask, nomask) + # + a = masked_array(n, mask=[1, 0, 0, 0, 0, 0]).reshape(2, 3) + b = masked_array(n, mask=[0, 0, 0, 0, 0, 0]).reshape(3, 2) + c = dot(a, b, strict=True) + assert_equal(c.mask, [[1, 1], [0, 0]]) + c = dot(a, b, strict=False) + assert_equal(c, np.dot(a.filled(0), b.filled(0))) + c = dot(b, a, strict=True) + assert_equal(c.mask, [[1, 0, 0], [1, 0, 0], [1, 0, 0]]) + c = dot(b, a, strict=False) + assert_equal(c, np.dot(b.filled(0), a.filled(0))) + # + a = masked_array(n, mask=[0, 0, 0, 0, 0, 1]).reshape(2, 3) + b = masked_array(n, mask=[0, 0, 0, 0, 0, 0]).reshape(3, 2) + c = dot(a, b, strict=True) + assert_equal(c.mask, [[0, 0], [1, 1]]) + c = dot(a, b) + assert_equal(c, np.dot(a.filled(0), b.filled(0))) + c = dot(b, a, strict=True) + assert_equal(c.mask, [[0, 0, 1], [0, 0, 1], [0, 0, 1]]) + c = dot(b, a, strict=False) + assert_equal(c, np.dot(b.filled(0), a.filled(0))) + # + a = masked_array(n, mask=[0, 0, 0, 0, 0, 1]).reshape(2, 3) + b = masked_array(n, mask=[0, 0, 1, 0, 0, 0]).reshape(3, 2) + c = dot(a, b, strict=True) + assert_equal(c.mask, [[1, 0], [1, 1]]) + c = dot(a, b, strict=False) + assert_equal(c, np.dot(a.filled(0), b.filled(0))) + c = dot(b, a, strict=True) + assert_equal(c.mask, [[0, 0, 1], [1, 1, 1], [0, 0, 1]]) + c = dot(b, a, strict=False) + assert_equal(c, np.dot(b.filled(0), a.filled(0))) + + def test_dot_returns_maskedarray(self): + # See gh-6611 + a = np.eye(3) + b = array(a) + assert_(type(dot(a, a)) is MaskedArray) + assert_(type(dot(a, b)) is MaskedArray) + assert_(type(dot(b, a)) is MaskedArray) + assert_(type(dot(b, b)) is MaskedArray) + + def test_dot_out(self): + a = array(np.eye(3)) + out = array(np.zeros((3, 3))) + res = dot(a, a, out=out) + assert_(res is out) + assert_equal(a, res) + + +class TestApplyAlongAxis(object): + # Tests 2D functions + def test_3d(self): + a = arange(12.).reshape(2, 2, 3) + + def myfunc(b): + return b[1] + + xa = apply_along_axis(myfunc, 2, a) + assert_equal(xa, [[1, 4], [7, 10]]) + + # Tests kwargs functions + def test_3d_kwargs(self): + a = arange(12).reshape(2, 2, 3) + + def myfunc(b, offset=0): + return b[1+offset] + + xa = apply_along_axis(myfunc, 2, a, offset=1) + assert_equal(xa, [[2, 5], [8, 11]]) + + +class TestApplyOverAxes(object): + # Tests apply_over_axes + def test_basic(self): + a = arange(24).reshape(2, 3, 4) + test = apply_over_axes(np.sum, a, [0, 2]) + ctrl = np.array([[[60], [92], [124]]]) + assert_equal(test, ctrl) + a[(a % 2).astype(bool)] = masked + test = apply_over_axes(np.sum, a, [0, 2]) + ctrl = np.array([[[28], [44], [60]]]) + assert_equal(test, ctrl) + + +class TestMedian(object): + def test_pytype(self): + r = np.ma.median([[np.inf, np.inf], [np.inf, np.inf]], axis=-1) + assert_equal(r, np.inf) + + def test_inf(self): + # test that even which computes handles inf / x = masked + r = np.ma.median(np.ma.masked_array([[np.inf, np.inf], + [np.inf, np.inf]]), axis=-1) + assert_equal(r, np.inf) + r = np.ma.median(np.ma.masked_array([[np.inf, np.inf], + [np.inf, np.inf]]), axis=None) + assert_equal(r, np.inf) + # all masked + r = np.ma.median(np.ma.masked_array([[np.inf, np.inf], + [np.inf, np.inf]], mask=True), + axis=-1) + assert_equal(r.mask, True) + r = np.ma.median(np.ma.masked_array([[np.inf, np.inf], + [np.inf, np.inf]], mask=True), + axis=None) + assert_equal(r.mask, True) + + def test_non_masked(self): + x = np.arange(9) + assert_equal(np.ma.median(x), 4.) + assert_(type(np.ma.median(x)) is not MaskedArray) + x = range(8) + assert_equal(np.ma.median(x), 3.5) + assert_(type(np.ma.median(x)) is not MaskedArray) + x = 5 + assert_equal(np.ma.median(x), 5.) + assert_(type(np.ma.median(x)) is not MaskedArray) + # integer + x = np.arange(9 * 8).reshape(9, 8) + assert_equal(np.ma.median(x, axis=0), np.median(x, axis=0)) + assert_equal(np.ma.median(x, axis=1), np.median(x, axis=1)) + assert_(np.ma.median(x, axis=1) is not MaskedArray) + # float + x = np.arange(9 * 8.).reshape(9, 8) + assert_equal(np.ma.median(x, axis=0), np.median(x, axis=0)) + assert_equal(np.ma.median(x, axis=1), np.median(x, axis=1)) + assert_(np.ma.median(x, axis=1) is not MaskedArray) + + def test_docstring_examples(self): + "test the examples given in the docstring of ma.median" + x = array(np.arange(8), mask=[0]*4 + [1]*4) + assert_equal(np.ma.median(x), 1.5) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + x = array(np.arange(10).reshape(2, 5), mask=[0]*6 + [1]*4) + assert_equal(np.ma.median(x), 2.5) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + ma_x = np.ma.median(x, axis=-1, overwrite_input=True) + assert_equal(ma_x, [2., 5.]) + assert_equal(ma_x.shape, (2,), "shape mismatch") + assert_(type(ma_x) is MaskedArray) + + def test_axis_argument_errors(self): + msg = "mask = %s, ndim = %s, axis = %s, overwrite_input = %s" + for ndmin in range(5): + for mask in [False, True]: + x = array(1, ndmin=ndmin, mask=mask) + + # Valid axis values should not raise exception + args = itertools.product(range(-ndmin, ndmin), [False, True]) + for axis, over in args: + try: + np.ma.median(x, axis=axis, overwrite_input=over) + except Exception: + raise AssertionError(msg % (mask, ndmin, axis, over)) + + # Invalid axis values should raise exception + args = itertools.product([-(ndmin + 1), ndmin], [False, True]) + for axis, over in args: + try: + np.ma.median(x, axis=axis, overwrite_input=over) + except np.AxisError: + pass + else: + raise AssertionError(msg % (mask, ndmin, axis, over)) + + def test_masked_0d(self): + # Check values + x = array(1, mask=False) + assert_equal(np.ma.median(x), 1) + x = array(1, mask=True) + assert_equal(np.ma.median(x), np.ma.masked) + + def test_masked_1d(self): + x = array(np.arange(5), mask=True) + assert_equal(np.ma.median(x), np.ma.masked) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is np.ma.core.MaskedConstant) + x = array(np.arange(5), mask=False) + assert_equal(np.ma.median(x), 2.) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + x = array(np.arange(5), mask=[0,1,0,0,0]) + assert_equal(np.ma.median(x), 2.5) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + x = array(np.arange(5), mask=[0,1,1,1,1]) + assert_equal(np.ma.median(x), 0.) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + # integer + x = array(np.arange(5), mask=[0,1,1,0,0]) + assert_equal(np.ma.median(x), 3.) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + # float + x = array(np.arange(5.), mask=[0,1,1,0,0]) + assert_equal(np.ma.median(x), 3.) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + # integer + x = array(np.arange(6), mask=[0,1,1,1,1,0]) + assert_equal(np.ma.median(x), 2.5) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + # float + x = array(np.arange(6.), mask=[0,1,1,1,1,0]) + assert_equal(np.ma.median(x), 2.5) + assert_equal(np.ma.median(x).shape, (), "shape mismatch") + assert_(type(np.ma.median(x)) is not MaskedArray) + + def test_1d_shape_consistency(self): + assert_equal(np.ma.median(array([1,2,3],mask=[0,0,0])).shape, + np.ma.median(array([1,2,3],mask=[0,1,0])).shape ) + + def test_2d(self): + # Tests median w/ 2D + (n, p) = (101, 30) + x = masked_array(np.linspace(-1., 1., n),) + x[:10] = x[-10:] = masked + z = masked_array(np.empty((n, p), dtype=float)) + z[:, 0] = x[:] + idx = np.arange(len(x)) + for i in range(1, p): + np.random.shuffle(idx) + z[:, i] = x[idx] + assert_equal(median(z[:, 0]), 0) + assert_equal(median(z), 0) + assert_equal(median(z, axis=0), np.zeros(p)) + assert_equal(median(z.T, axis=1), np.zeros(p)) + + def test_2d_waxis(self): + # Tests median w/ 2D arrays and different axis. + x = masked_array(np.arange(30).reshape(10, 3)) + x[:3] = x[-3:] = masked + assert_equal(median(x), 14.5) + assert_(type(np.ma.median(x)) is not MaskedArray) + assert_equal(median(x, axis=0), [13.5, 14.5, 15.5]) + assert_(type(np.ma.median(x, axis=0)) is MaskedArray) + assert_equal(median(x, axis=1), [0, 0, 0, 10, 13, 16, 19, 0, 0, 0]) + assert_(type(np.ma.median(x, axis=1)) is MaskedArray) + assert_equal(median(x, axis=1).mask, [1, 1, 1, 0, 0, 0, 0, 1, 1, 1]) + + def test_3d(self): + # Tests median w/ 3D + x = np.ma.arange(24).reshape(3, 4, 2) + x[x % 3 == 0] = masked + assert_equal(median(x, 0), [[12, 9], [6, 15], [12, 9], [18, 15]]) + x.shape = (4, 3, 2) + assert_equal(median(x, 0), [[99, 10], [11, 99], [13, 14]]) + x = np.ma.arange(24).reshape(4, 3, 2) + x[x % 5 == 0] = masked + assert_equal(median(x, 0), [[12, 10], [8, 9], [16, 17]]) + + def test_neg_axis(self): + x = masked_array(np.arange(30).reshape(10, 3)) + x[:3] = x[-3:] = masked + assert_equal(median(x, axis=-1), median(x, axis=1)) + + def test_out_1d(self): + # integer float even odd + for v in (30, 30., 31, 31.): + x = masked_array(np.arange(v)) + x[:3] = x[-3:] = masked + out = masked_array(np.ones(())) + r = median(x, out=out) + if v == 30: + assert_equal(out, 14.5) + else: + assert_equal(out, 15.) + assert_(r is out) + assert_(type(r) is MaskedArray) + + def test_out(self): + # integer float even odd + for v in (40, 40., 30, 30.): + x = masked_array(np.arange(v).reshape(10, -1)) + x[:3] = x[-3:] = masked + out = masked_array(np.ones(10)) + r = median(x, axis=1, out=out) + if v == 30: + e = masked_array([0.]*3 + [10, 13, 16, 19] + [0.]*3, + mask=[True] * 3 + [False] * 4 + [True] * 3) + else: + e = masked_array([0.]*3 + [13.5, 17.5, 21.5, 25.5] + [0.]*3, + mask=[True]*3 + [False]*4 + [True]*3) + assert_equal(r, e) + assert_(r is out) + assert_(type(r) is MaskedArray) + + def test_single_non_masked_value_on_axis(self): + data = [[1., 0.], + [0., 3.], + [0., 0.]] + masked_arr = np.ma.masked_equal(data, 0) + expected = [1., 3.] + assert_array_equal(np.ma.median(masked_arr, axis=0), + expected) + + def test_nan(self): + with suppress_warnings() as w: + w.record(RuntimeWarning) + for mask in (False, np.zeros(6, dtype=bool)): + dm = np.ma.array([[1, np.nan, 3], [1, 2, 3]]) + dm.mask = mask + + # scalar result + r = np.ma.median(dm, axis=None) + assert_(np.isscalar(r)) + assert_array_equal(r, np.nan) + r = np.ma.median(dm.ravel(), axis=0) + assert_(np.isscalar(r)) + assert_array_equal(r, np.nan) + + r = np.ma.median(dm, axis=0) + assert_equal(type(r), MaskedArray) + assert_array_equal(r, [1, np.nan, 3]) + r = np.ma.median(dm, axis=1) + assert_equal(type(r), MaskedArray) + assert_array_equal(r, [np.nan, 2]) + r = np.ma.median(dm, axis=-1) + assert_equal(type(r), MaskedArray) + assert_array_equal(r, [np.nan, 2]) + + dm = np.ma.array([[1, np.nan, 3], [1, 2, 3]]) + dm[:, 2] = np.ma.masked + assert_array_equal(np.ma.median(dm, axis=None), np.nan) + assert_array_equal(np.ma.median(dm, axis=0), [1, np.nan, 3]) + assert_array_equal(np.ma.median(dm, axis=1), [np.nan, 1.5]) + assert_equal([x.category is RuntimeWarning for x in w.log], + [True]*13) + + def test_out_nan(self): + with warnings.catch_warnings(record=True): + warnings.filterwarnings('always', '', RuntimeWarning) + o = np.ma.masked_array(np.zeros((4,))) + d = np.ma.masked_array(np.ones((3, 4))) + d[2, 1] = np.nan + d[2, 2] = np.ma.masked + assert_equal(np.ma.median(d, 0, out=o), o) + o = np.ma.masked_array(np.zeros((3,))) + assert_equal(np.ma.median(d, 1, out=o), o) + o = np.ma.masked_array(np.zeros(())) + assert_equal(np.ma.median(d, out=o), o) + + def test_nan_behavior(self): + a = np.ma.masked_array(np.arange(24, dtype=float)) + a[::3] = np.ma.masked + a[2] = np.nan + with suppress_warnings() as w: + w.record(RuntimeWarning) + assert_array_equal(np.ma.median(a), np.nan) + assert_array_equal(np.ma.median(a, axis=0), np.nan) + assert_(w.log[0].category is RuntimeWarning) + assert_(w.log[1].category is RuntimeWarning) + + a = np.ma.masked_array(np.arange(24, dtype=float).reshape(2, 3, 4)) + a.mask = np.arange(a.size) % 2 == 1 + aorig = a.copy() + a[1, 2, 3] = np.nan + a[1, 1, 2] = np.nan + + # no axis + with suppress_warnings() as w: + w.record(RuntimeWarning) + warnings.filterwarnings('always', '', RuntimeWarning) + assert_array_equal(np.ma.median(a), np.nan) + assert_(np.isscalar(np.ma.median(a))) + assert_(w.log[0].category is RuntimeWarning) + + # axis0 + b = np.ma.median(aorig, axis=0) + b[2, 3] = np.nan + b[1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.ma.median(a, 0), b) + assert_equal(len(w), 1) + + # axis1 + b = np.ma.median(aorig, axis=1) + b[1, 3] = np.nan + b[1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.ma.median(a, 1), b) + assert_equal(len(w), 1) + + # axis02 + b = np.ma.median(aorig, axis=(0, 2)) + b[1] = np.nan + b[2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.ma.median(a, (0, 2)), b) + assert_equal(len(w), 1) + + def test_ambigous_fill(self): + # 255 is max value, used as filler for sort + a = np.array([[3, 3, 255], [3, 3, 255]], dtype=np.uint8) + a = np.ma.masked_array(a, mask=a == 3) + assert_array_equal(np.ma.median(a, axis=1), 255) + assert_array_equal(np.ma.median(a, axis=1).mask, False) + assert_array_equal(np.ma.median(a, axis=0), a[0]) + assert_array_equal(np.ma.median(a), 255) + + def test_special(self): + for inf in [np.inf, -np.inf]: + a = np.array([[inf, np.nan], [np.nan, np.nan]]) + a = np.ma.masked_array(a, mask=np.isnan(a)) + assert_equal(np.ma.median(a, axis=0), [inf, np.nan]) + assert_equal(np.ma.median(a, axis=1), [inf, np.nan]) + assert_equal(np.ma.median(a), inf) + + a = np.array([[np.nan, np.nan, inf], [np.nan, np.nan, inf]]) + a = np.ma.masked_array(a, mask=np.isnan(a)) + assert_array_equal(np.ma.median(a, axis=1), inf) + assert_array_equal(np.ma.median(a, axis=1).mask, False) + assert_array_equal(np.ma.median(a, axis=0), a[0]) + assert_array_equal(np.ma.median(a), inf) + + # no mask + a = np.array([[inf, inf], [inf, inf]]) + assert_equal(np.ma.median(a), inf) + assert_equal(np.ma.median(a, axis=0), inf) + assert_equal(np.ma.median(a, axis=1), inf) + + a = np.array([[inf, 7, -inf, -9], + [-10, np.nan, np.nan, 5], + [4, np.nan, np.nan, inf]], + dtype=np.float32) + a = np.ma.masked_array(a, mask=np.isnan(a)) + if inf > 0: + assert_equal(np.ma.median(a, axis=0), [4., 7., -inf, 5.]) + assert_equal(np.ma.median(a), 4.5) + else: + assert_equal(np.ma.median(a, axis=0), [-10., 7., -inf, -9.]) + assert_equal(np.ma.median(a), -2.5) + assert_equal(np.ma.median(a, axis=1), [-1., -2.5, inf]) + + for i in range(0, 10): + for j in range(1, 10): + a = np.array([([np.nan] * i) + ([inf] * j)] * 2) + a = np.ma.masked_array(a, mask=np.isnan(a)) + assert_equal(np.ma.median(a), inf) + assert_equal(np.ma.median(a, axis=1), inf) + assert_equal(np.ma.median(a, axis=0), + ([np.nan] * i) + [inf] * j) + + def test_empty(self): + # empty arrays + a = np.ma.masked_array(np.array([], dtype=float)) + with suppress_warnings() as w: + w.record(RuntimeWarning) + assert_array_equal(np.ma.median(a), np.nan) + assert_(w.log[0].category is RuntimeWarning) + + # multiple dimensions + a = np.ma.masked_array(np.array([], dtype=float, ndmin=3)) + # no axis + with suppress_warnings() as w: + w.record(RuntimeWarning) + warnings.filterwarnings('always', '', RuntimeWarning) + assert_array_equal(np.ma.median(a), np.nan) + assert_(w.log[0].category is RuntimeWarning) + + # axis 0 and 1 + b = np.ma.masked_array(np.array([], dtype=float, ndmin=2)) + assert_equal(np.ma.median(a, axis=0), b) + assert_equal(np.ma.median(a, axis=1), b) + + # axis 2 + b = np.ma.masked_array(np.array(np.nan, dtype=float, ndmin=2)) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.ma.median(a, axis=2), b) + assert_(w[0].category is RuntimeWarning) + + def test_object(self): + o = np.ma.masked_array(np.arange(7.)) + assert_(type(np.ma.median(o.astype(object))), float) + o[2] = np.nan + assert_(type(np.ma.median(o.astype(object))), float) + + +class TestCov(object): + + def setup(self): + self.data = array(np.random.rand(12)) + + def test_1d_without_missing(self): + # Test cov on 1D variable w/o missing values + x = self.data + assert_almost_equal(np.cov(x), cov(x)) + assert_almost_equal(np.cov(x, rowvar=False), cov(x, rowvar=False)) + assert_almost_equal(np.cov(x, rowvar=False, bias=True), + cov(x, rowvar=False, bias=True)) + + def test_2d_without_missing(self): + # Test cov on 1 2D variable w/o missing values + x = self.data.reshape(3, 4) + assert_almost_equal(np.cov(x), cov(x)) + assert_almost_equal(np.cov(x, rowvar=False), cov(x, rowvar=False)) + assert_almost_equal(np.cov(x, rowvar=False, bias=True), + cov(x, rowvar=False, bias=True)) + + def test_1d_with_missing(self): + # Test cov 1 1D variable w/missing values + x = self.data + x[-1] = masked + x -= x.mean() + nx = x.compressed() + assert_almost_equal(np.cov(nx), cov(x)) + assert_almost_equal(np.cov(nx, rowvar=False), cov(x, rowvar=False)) + assert_almost_equal(np.cov(nx, rowvar=False, bias=True), + cov(x, rowvar=False, bias=True)) + # + try: + cov(x, allow_masked=False) + except ValueError: + pass + # + # 2 1D variables w/ missing values + nx = x[1:-1] + assert_almost_equal(np.cov(nx, nx[::-1]), cov(x, x[::-1])) + assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False), + cov(x, x[::-1], rowvar=False)) + assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False, bias=True), + cov(x, x[::-1], rowvar=False, bias=True)) + + def test_2d_with_missing(self): + # Test cov on 2D variable w/ missing value + x = self.data + x[-1] = masked + x = x.reshape(3, 4) + valid = np.logical_not(getmaskarray(x)).astype(int) + frac = np.dot(valid, valid.T) + xf = (x - x.mean(1)[:, None]).filled(0) + assert_almost_equal(cov(x), + np.cov(xf) * (x.shape[1] - 1) / (frac - 1.)) + assert_almost_equal(cov(x, bias=True), + np.cov(xf, bias=True) * x.shape[1] / frac) + frac = np.dot(valid.T, valid) + xf = (x - x.mean(0)).filled(0) + assert_almost_equal(cov(x, rowvar=False), + (np.cov(xf, rowvar=False) * + (x.shape[0] - 1) / (frac - 1.))) + assert_almost_equal(cov(x, rowvar=False, bias=True), + (np.cov(xf, rowvar=False, bias=True) * + x.shape[0] / frac)) + + +class TestCorrcoef(object): + + def setup(self): + self.data = array(np.random.rand(12)) + self.data2 = array(np.random.rand(12)) + + def test_ddof(self): + # ddof raises DeprecationWarning + x, y = self.data, self.data2 + expected = np.corrcoef(x) + expected2 = np.corrcoef(x, y) + with suppress_warnings() as sup: + warnings.simplefilter("always") + assert_warns(DeprecationWarning, corrcoef, x, ddof=-1) + sup.filter(DeprecationWarning, "bias and ddof have no effect") + # ddof has no or negligible effect on the function + assert_almost_equal(np.corrcoef(x, ddof=0), corrcoef(x, ddof=0)) + assert_almost_equal(corrcoef(x, ddof=-1), expected) + assert_almost_equal(corrcoef(x, y, ddof=-1), expected2) + assert_almost_equal(corrcoef(x, ddof=3), expected) + assert_almost_equal(corrcoef(x, y, ddof=3), expected2) + + def test_bias(self): + x, y = self.data, self.data2 + expected = np.corrcoef(x) + # bias raises DeprecationWarning + with suppress_warnings() as sup: + warnings.simplefilter("always") + assert_warns(DeprecationWarning, corrcoef, x, y, True, False) + assert_warns(DeprecationWarning, corrcoef, x, y, True, True) + assert_warns(DeprecationWarning, corrcoef, x, bias=False) + sup.filter(DeprecationWarning, "bias and ddof have no effect") + # bias has no or negligible effect on the function + assert_almost_equal(corrcoef(x, bias=1), expected) + + def test_1d_without_missing(self): + # Test cov on 1D variable w/o missing values + x = self.data + assert_almost_equal(np.corrcoef(x), corrcoef(x)) + assert_almost_equal(np.corrcoef(x, rowvar=False), + corrcoef(x, rowvar=False)) + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + assert_almost_equal(np.corrcoef(x, rowvar=False, bias=True), + corrcoef(x, rowvar=False, bias=True)) + + def test_2d_without_missing(self): + # Test corrcoef on 1 2D variable w/o missing values + x = self.data.reshape(3, 4) + assert_almost_equal(np.corrcoef(x), corrcoef(x)) + assert_almost_equal(np.corrcoef(x, rowvar=False), + corrcoef(x, rowvar=False)) + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + assert_almost_equal(np.corrcoef(x, rowvar=False, bias=True), + corrcoef(x, rowvar=False, bias=True)) + + def test_1d_with_missing(self): + # Test corrcoef 1 1D variable w/missing values + x = self.data + x[-1] = masked + x -= x.mean() + nx = x.compressed() + assert_almost_equal(np.corrcoef(nx), corrcoef(x)) + assert_almost_equal(np.corrcoef(nx, rowvar=False), + corrcoef(x, rowvar=False)) + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + assert_almost_equal(np.corrcoef(nx, rowvar=False, bias=True), + corrcoef(x, rowvar=False, bias=True)) + try: + corrcoef(x, allow_masked=False) + except ValueError: + pass + # 2 1D variables w/ missing values + nx = x[1:-1] + assert_almost_equal(np.corrcoef(nx, nx[::-1]), corrcoef(x, x[::-1])) + assert_almost_equal(np.corrcoef(nx, nx[::-1], rowvar=False), + corrcoef(x, x[::-1], rowvar=False)) + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + # ddof and bias have no or negligible effect on the function + assert_almost_equal(np.corrcoef(nx, nx[::-1]), + corrcoef(x, x[::-1], bias=1)) + assert_almost_equal(np.corrcoef(nx, nx[::-1]), + corrcoef(x, x[::-1], ddof=2)) + + def test_2d_with_missing(self): + # Test corrcoef on 2D variable w/ missing value + x = self.data + x[-1] = masked + x = x.reshape(3, 4) + + test = corrcoef(x) + control = np.corrcoef(x) + assert_almost_equal(test[:-1, :-1], control[:-1, :-1]) + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + # ddof and bias have no or negligible effect on the function + assert_almost_equal(corrcoef(x, ddof=-2)[:-1, :-1], + control[:-1, :-1]) + assert_almost_equal(corrcoef(x, ddof=3)[:-1, :-1], + control[:-1, :-1]) + assert_almost_equal(corrcoef(x, bias=1)[:-1, :-1], + control[:-1, :-1]) + + +class TestPolynomial(object): + # + def test_polyfit(self): + # Tests polyfit + # On ndarrays + x = np.random.rand(10) + y = np.random.rand(20).reshape(-1, 2) + assert_almost_equal(polyfit(x, y, 3), np.polyfit(x, y, 3)) + # ON 1D maskedarrays + x = x.view(MaskedArray) + x[0] = masked + y = y.view(MaskedArray) + y[0, 0] = y[-1, -1] = masked + # + (C, R, K, S, D) = polyfit(x, y[:, 0], 3, full=True) + (c, r, k, s, d) = np.polyfit(x[1:], y[1:, 0].compressed(), 3, + full=True) + for (a, a_) in zip((C, R, K, S, D), (c, r, k, s, d)): + assert_almost_equal(a, a_) + # + (C, R, K, S, D) = polyfit(x, y[:, -1], 3, full=True) + (c, r, k, s, d) = np.polyfit(x[1:-1], y[1:-1, -1], 3, full=True) + for (a, a_) in zip((C, R, K, S, D), (c, r, k, s, d)): + assert_almost_equal(a, a_) + # + (C, R, K, S, D) = polyfit(x, y, 3, full=True) + (c, r, k, s, d) = np.polyfit(x[1:-1], y[1:-1,:], 3, full=True) + for (a, a_) in zip((C, R, K, S, D), (c, r, k, s, d)): + assert_almost_equal(a, a_) + # + w = np.random.rand(10) + 1 + wo = w.copy() + xs = x[1:-1] + ys = y[1:-1] + ws = w[1:-1] + (C, R, K, S, D) = polyfit(x, y, 3, full=True, w=w) + (c, r, k, s, d) = np.polyfit(xs, ys, 3, full=True, w=ws) + assert_equal(w, wo) + for (a, a_) in zip((C, R, K, S, D), (c, r, k, s, d)): + assert_almost_equal(a, a_) + + def test_polyfit_with_masked_NaNs(self): + x = np.random.rand(10) + y = np.random.rand(20).reshape(-1, 2) + + x[0] = np.nan + y[-1,-1] = np.nan + x = x.view(MaskedArray) + y = y.view(MaskedArray) + x[0] = masked + y[-1,-1] = masked + + (C, R, K, S, D) = polyfit(x, y, 3, full=True) + (c, r, k, s, d) = np.polyfit(x[1:-1], y[1:-1,:], 3, full=True) + for (a, a_) in zip((C, R, K, S, D), (c, r, k, s, d)): + assert_almost_equal(a, a_) + + +class TestArraySetOps(object): + + def test_unique_onlist(self): + # Test unique on list + data = [1, 1, 1, 2, 2, 3] + test = unique(data, return_index=True, return_inverse=True) + assert_(isinstance(test[0], MaskedArray)) + assert_equal(test[0], masked_array([1, 2, 3], mask=[0, 0, 0])) + assert_equal(test[1], [0, 3, 5]) + assert_equal(test[2], [0, 0, 0, 1, 1, 2]) + + def test_unique_onmaskedarray(self): + # Test unique on masked data w/use_mask=True + data = masked_array([1, 1, 1, 2, 2, 3], mask=[0, 0, 1, 0, 1, 0]) + test = unique(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1])) + assert_equal(test[1], [0, 3, 5, 2]) + assert_equal(test[2], [0, 0, 3, 1, 3, 2]) + # + data.fill_value = 3 + data = masked_array(data=[1, 1, 1, 2, 2, 3], + mask=[0, 0, 1, 0, 1, 0], fill_value=3) + test = unique(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1])) + assert_equal(test[1], [0, 3, 5, 2]) + assert_equal(test[2], [0, 0, 3, 1, 3, 2]) + + def test_unique_allmasked(self): + # Test all masked + data = masked_array([1, 1, 1], mask=True) + test = unique(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array([1, ], mask=[True])) + assert_equal(test[1], [0]) + assert_equal(test[2], [0, 0, 0]) + # + # Test masked + data = masked + test = unique(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array(masked)) + assert_equal(test[1], [0]) + assert_equal(test[2], [0]) + + def test_ediff1d(self): + # Tests mediff1d + x = masked_array(np.arange(5), mask=[1, 0, 0, 0, 1]) + control = array([1, 1, 1, 4], mask=[1, 0, 0, 1]) + test = ediff1d(x) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + + def test_ediff1d_tobegin(self): + # Test ediff1d w/ to_begin + x = masked_array(np.arange(5), mask=[1, 0, 0, 0, 1]) + test = ediff1d(x, to_begin=masked) + control = array([0, 1, 1, 1, 4], mask=[1, 1, 0, 0, 1]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_begin=[1, 2, 3]) + control = array([1, 2, 3, 1, 1, 1, 4], mask=[0, 0, 0, 1, 0, 0, 1]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + + def test_ediff1d_toend(self): + # Test ediff1d w/ to_end + x = masked_array(np.arange(5), mask=[1, 0, 0, 0, 1]) + test = ediff1d(x, to_end=masked) + control = array([1, 1, 1, 4, 0], mask=[1, 0, 0, 1, 1]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_end=[1, 2, 3]) + control = array([1, 1, 1, 4, 1, 2, 3], mask=[1, 0, 0, 1, 0, 0, 0]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + + def test_ediff1d_tobegin_toend(self): + # Test ediff1d w/ to_begin and to_end + x = masked_array(np.arange(5), mask=[1, 0, 0, 0, 1]) + test = ediff1d(x, to_end=masked, to_begin=masked) + control = array([0, 1, 1, 1, 4, 0], mask=[1, 1, 0, 0, 1, 1]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_end=[1, 2, 3], to_begin=masked) + control = array([0, 1, 1, 1, 4, 1, 2, 3], + mask=[1, 1, 0, 0, 1, 0, 0, 0]) + assert_equal(test, control) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + + def test_ediff1d_ndarray(self): + # Test ediff1d w/ a ndarray + x = np.arange(5) + test = ediff1d(x) + control = array([1, 1, 1, 1], mask=[0, 0, 0, 0]) + assert_equal(test, control) + assert_(isinstance(test, MaskedArray)) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_end=masked, to_begin=masked) + control = array([0, 1, 1, 1, 1, 0], mask=[1, 0, 0, 0, 0, 1]) + assert_(isinstance(test, MaskedArray)) + assert_equal(test.filled(0), control.filled(0)) + assert_equal(test.mask, control.mask) + + def test_intersect1d(self): + # Test intersect1d + x = array([1, 3, 3, 3], mask=[0, 0, 0, 1]) + y = array([3, 1, 1, 1], mask=[0, 0, 0, 1]) + test = intersect1d(x, y) + control = array([1, 3, -1], mask=[0, 0, 1]) + assert_equal(test, control) + + def test_setxor1d(self): + # Test setxor1d + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 2, 3, 4, 5, -1], mask=[0, 0, 0, 0, 0, 1]) + test = setxor1d(a, b) + assert_equal(test, array([3, 4, 7])) + # + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = [1, 2, 3, 4, 5] + test = setxor1d(a, b) + assert_equal(test, array([3, 4, 7, -1], mask=[0, 0, 0, 1])) + # + a = array([1, 2, 3]) + b = array([6, 5, 4]) + test = setxor1d(a, b) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, [1, 2, 3, 4, 5, 6]) + # + a = array([1, 8, 2, 3], mask=[0, 1, 0, 0]) + b = array([6, 5, 4, 8], mask=[0, 0, 0, 1]) + test = setxor1d(a, b) + assert_(isinstance(test, MaskedArray)) + assert_equal(test, [1, 2, 3, 4, 5, 6]) + # + assert_array_equal([], setxor1d([], [])) + + def test_isin(self): + # the tests for in1d cover most of isin's behavior + # if in1d is removed, would need to change those tests to test + # isin instead. + a = np.arange(24).reshape([2, 3, 4]) + mask = np.zeros([2, 3, 4]) + mask[1, 2, 0] = 1 + a = array(a, mask=mask) + b = array(data=[0, 10, 20, 30, 1, 3, 11, 22, 33], + mask=[0, 1, 0, 1, 0, 1, 0, 1, 0]) + ec = zeros((2, 3, 4), dtype=bool) + ec[0, 0, 0] = True + ec[0, 0, 1] = True + ec[0, 2, 3] = True + c = isin(a, b) + assert_(isinstance(c, MaskedArray)) + assert_array_equal(c, ec) + #compare results of np.isin to ma.isin + d = np.isin(a, b[~b.mask]) & ~a.mask + assert_array_equal(c, d) + + def test_in1d(self): + # Test in1d + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 2, 3, 4, 5, -1], mask=[0, 0, 0, 0, 0, 1]) + test = in1d(a, b) + assert_equal(test, [True, True, True, False, True]) + # + a = array([5, 5, 2, 1, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 5, -1], mask=[0, 0, 1]) + test = in1d(a, b) + assert_equal(test, [True, True, False, True, True]) + # + assert_array_equal([], in1d([], [])) + + def test_in1d_invert(self): + # Test in1d's invert parameter + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 2, 3, 4, 5, -1], mask=[0, 0, 0, 0, 0, 1]) + assert_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True)) + + a = array([5, 5, 2, 1, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 5, -1], mask=[0, 0, 1]) + assert_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True)) + + assert_array_equal([], in1d([], [], invert=True)) + + def test_union1d(self): + # Test union1d + a = array([1, 2, 5, 7, 5, -1], mask=[0, 0, 0, 0, 0, 1]) + b = array([1, 2, 3, 4, 5, -1], mask=[0, 0, 0, 0, 0, 1]) + test = union1d(a, b) + control = array([1, 2, 3, 4, 5, 7, -1], mask=[0, 0, 0, 0, 0, 0, 1]) + assert_equal(test, control) + + # Tests gh-10340, arguments to union1d should be + # flattened if they are not already 1D + x = array([[0, 1, 2], [3, 4, 5]], mask=[[0, 0, 0], [0, 0, 1]]) + y = array([0, 1, 2, 3, 4], mask=[0, 0, 0, 0, 1]) + ez = array([0, 1, 2, 3, 4, 5], mask=[0, 0, 0, 0, 0, 1]) + z = union1d(x, y) + assert_equal(z, ez) + # + assert_array_equal([], union1d([], [])) + + def test_setdiff1d(self): + # Test setdiff1d + a = array([6, 5, 4, 7, 7, 1, 2, 1], mask=[0, 0, 0, 0, 0, 0, 0, 1]) + b = array([2, 4, 3, 3, 2, 1, 5]) + test = setdiff1d(a, b) + assert_equal(test, array([6, 7, -1], mask=[0, 0, 1])) + # + a = arange(10) + b = arange(8) + assert_equal(setdiff1d(a, b), array([8, 9])) + a = array([], np.uint32, mask=[]) + assert_equal(setdiff1d(a, []).dtype, np.uint32) + + def test_setdiff1d_char_array(self): + # Test setdiff1d_charray + a = np.array(['a', 'b', 'c']) + b = np.array(['a', 'b', 's']) + assert_array_equal(setdiff1d(a, b), np.array(['c'])) + + +class TestShapeBase(object): + + def test_atleast_2d(self): + # Test atleast_2d + a = masked_array([0, 1, 2], mask=[0, 1, 0]) + b = atleast_2d(a) + assert_equal(b.shape, (1, 3)) + assert_equal(b.mask.shape, b.data.shape) + assert_equal(a.shape, (3,)) + assert_equal(a.mask.shape, a.data.shape) + assert_equal(b.mask.shape, b.data.shape) + + def test_shape_scalar(self): + # the atleast and diagflat function should work with scalars + # GitHub issue #3367 + # Additionally, the atleast functions should accept multiple scalars + # correctly + b = atleast_1d(1.0) + assert_equal(b.shape, (1,)) + assert_equal(b.mask.shape, b.shape) + assert_equal(b.data.shape, b.shape) + + b = atleast_1d(1.0, 2.0) + for a in b: + assert_equal(a.shape, (1,)) + assert_equal(a.mask.shape, a.shape) + assert_equal(a.data.shape, a.shape) + + b = atleast_2d(1.0) + assert_equal(b.shape, (1, 1)) + assert_equal(b.mask.shape, b.shape) + assert_equal(b.data.shape, b.shape) + + b = atleast_2d(1.0, 2.0) + for a in b: + assert_equal(a.shape, (1, 1)) + assert_equal(a.mask.shape, a.shape) + assert_equal(a.data.shape, a.shape) + + b = atleast_3d(1.0) + assert_equal(b.shape, (1, 1, 1)) + assert_equal(b.mask.shape, b.shape) + assert_equal(b.data.shape, b.shape) + + b = atleast_3d(1.0, 2.0) + for a in b: + assert_equal(a.shape, (1, 1, 1)) + assert_equal(a.mask.shape, a.shape) + assert_equal(a.data.shape, a.shape) + + + b = diagflat(1.0) + assert_equal(b.shape, (1, 1)) + assert_equal(b.mask.shape, b.data.shape) + + +class TestStack(object): + + def test_stack_1d(self): + a = masked_array([0, 1, 2], mask=[0, 1, 0]) + b = masked_array([9, 8, 7], mask=[1, 0, 0]) + + c = stack([a, b], axis=0) + assert_equal(c.shape, (2, 3)) + assert_array_equal(a.mask, c[0].mask) + assert_array_equal(b.mask, c[1].mask) + + d = vstack([a, b]) + assert_array_equal(c.data, d.data) + assert_array_equal(c.mask, d.mask) + + c = stack([a, b], axis=1) + assert_equal(c.shape, (3, 2)) + assert_array_equal(a.mask, c[:, 0].mask) + assert_array_equal(b.mask, c[:, 1].mask) + + def test_stack_masks(self): + a = masked_array([0, 1, 2], mask=True) + b = masked_array([9, 8, 7], mask=False) + + c = stack([a, b], axis=0) + assert_equal(c.shape, (2, 3)) + assert_array_equal(a.mask, c[0].mask) + assert_array_equal(b.mask, c[1].mask) + + d = vstack([a, b]) + assert_array_equal(c.data, d.data) + assert_array_equal(c.mask, d.mask) + + c = stack([a, b], axis=1) + assert_equal(c.shape, (3, 2)) + assert_array_equal(a.mask, c[:, 0].mask) + assert_array_equal(b.mask, c[:, 1].mask) + + def test_stack_nd(self): + # 2D + shp = (3, 2) + d1 = np.random.randint(0, 10, shp) + d2 = np.random.randint(0, 10, shp) + m1 = np.random.randint(0, 2, shp).astype(bool) + m2 = np.random.randint(0, 2, shp).astype(bool) + a1 = masked_array(d1, mask=m1) + a2 = masked_array(d2, mask=m2) + + c = stack([a1, a2], axis=0) + c_shp = (2,) + shp + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[0].mask) + assert_array_equal(a2.mask, c[1].mask) + + c = stack([a1, a2], axis=-1) + c_shp = shp + (2,) + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[..., 0].mask) + assert_array_equal(a2.mask, c[..., 1].mask) + + # 4D + shp = (3, 2, 4, 5,) + d1 = np.random.randint(0, 10, shp) + d2 = np.random.randint(0, 10, shp) + m1 = np.random.randint(0, 2, shp).astype(bool) + m2 = np.random.randint(0, 2, shp).astype(bool) + a1 = masked_array(d1, mask=m1) + a2 = masked_array(d2, mask=m2) + + c = stack([a1, a2], axis=0) + c_shp = (2,) + shp + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[0].mask) + assert_array_equal(a2.mask, c[1].mask) + + c = stack([a1, a2], axis=-1) + c_shp = shp + (2,) + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[..., 0].mask) + assert_array_equal(a2.mask, c[..., 1].mask) diff --git a/contrib/python/numpy/py2/numpy/ma/tests/test_mrecords.py b/contrib/python/numpy/py2/numpy/ma/tests/test_mrecords.py new file mode 100644 index 0000000000..dbbf1c8a1e --- /dev/null +++ b/contrib/python/numpy/py2/numpy/ma/tests/test_mrecords.py @@ -0,0 +1,495 @@ +# pylint: disable-msg=W0611, W0612, W0511,R0201 +"""Tests suite for mrecords. + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu + +""" +from __future__ import division, absolute_import, print_function + +import numpy as np +import numpy.ma as ma +from numpy import recarray +from numpy.ma import masked, nomask +from numpy.testing import temppath +from numpy.core.records import ( + fromrecords as recfromrecords, fromarrays as recfromarrays + ) +from numpy.ma.mrecords import ( + MaskedRecords, mrecarray, fromarrays, fromtextfile, fromrecords, + addfield + ) +from numpy.ma.testutils import ( + assert_, assert_equal, + assert_equal_records, + ) +from numpy.core.numeric import pickle + + +class TestMRecords(object): + + ilist = [1, 2, 3, 4, 5] + flist = [1.1, 2.2, 3.3, 4.4, 5.5] + slist = [b'one', b'two', b'three', b'four', b'five'] + ddtype = [('a', int), ('b', float), ('c', '|S8')] + mask = [0, 1, 0, 0, 1] + base = ma.array(list(zip(ilist, flist, slist)), mask=mask, dtype=ddtype) + + def test_byview(self): + # Test creation by view + base = self.base + mbase = base.view(mrecarray) + assert_equal(mbase.recordmask, base.recordmask) + assert_equal_records(mbase._mask, base._mask) + assert_(isinstance(mbase._data, recarray)) + assert_equal_records(mbase._data, base._data.view(recarray)) + for field in ('a', 'b', 'c'): + assert_equal(base[field], mbase[field]) + assert_equal_records(mbase.view(mrecarray), mbase) + + def test_get(self): + # Tests fields retrieval + base = self.base.copy() + mbase = base.view(mrecarray) + # As fields.......... + for field in ('a', 'b', 'c'): + assert_equal(getattr(mbase, field), mbase[field]) + assert_equal(base[field], mbase[field]) + # as elements ....... + mbase_first = mbase[0] + assert_(isinstance(mbase_first, mrecarray)) + assert_equal(mbase_first.dtype, mbase.dtype) + assert_equal(mbase_first.tolist(), (1, 1.1, b'one')) + # Used to be mask, now it's recordmask + assert_equal(mbase_first.recordmask, nomask) + assert_equal(mbase_first._mask.item(), (False, False, False)) + assert_equal(mbase_first['a'], mbase['a'][0]) + mbase_last = mbase[-1] + assert_(isinstance(mbase_last, mrecarray)) + assert_equal(mbase_last.dtype, mbase.dtype) + assert_equal(mbase_last.tolist(), (None, None, None)) + # Used to be mask, now it's recordmask + assert_equal(mbase_last.recordmask, True) + assert_equal(mbase_last._mask.item(), (True, True, True)) + assert_equal(mbase_last['a'], mbase['a'][-1]) + assert_((mbase_last['a'] is masked)) + # as slice .......... + mbase_sl = mbase[:2] + assert_(isinstance(mbase_sl, mrecarray)) + assert_equal(mbase_sl.dtype, mbase.dtype) + # Used to be mask, now it's recordmask + assert_equal(mbase_sl.recordmask, [0, 1]) + assert_equal_records(mbase_sl.mask, + np.array([(False, False, False), + (True, True, True)], + dtype=mbase._mask.dtype)) + assert_equal_records(mbase_sl, base[:2].view(mrecarray)) + for field in ('a', 'b', 'c'): + assert_equal(getattr(mbase_sl, field), base[:2][field]) + + def test_set_fields(self): + # Tests setting fields. + base = self.base.copy() + mbase = base.view(mrecarray) + mbase = mbase.copy() + mbase.fill_value = (999999, 1e20, 'N/A') + # Change the data, the mask should be conserved + mbase.a._data[:] = 5 + assert_equal(mbase['a']._data, [5, 5, 5, 5, 5]) + assert_equal(mbase['a']._mask, [0, 1, 0, 0, 1]) + # Change the elements, and the mask will follow + mbase.a = 1 + assert_equal(mbase['a']._data, [1]*5) + assert_equal(ma.getmaskarray(mbase['a']), [0]*5) + # Use to be _mask, now it's recordmask + assert_equal(mbase.recordmask, [False]*5) + assert_equal(mbase._mask.tolist(), + np.array([(0, 0, 0), + (0, 1, 1), + (0, 0, 0), + (0, 0, 0), + (0, 1, 1)], + dtype=bool)) + # Set a field to mask ........................ + mbase.c = masked + # Use to be mask, and now it's still mask ! + assert_equal(mbase.c.mask, [1]*5) + assert_equal(mbase.c.recordmask, [1]*5) + assert_equal(ma.getmaskarray(mbase['c']), [1]*5) + assert_equal(ma.getdata(mbase['c']), [b'N/A']*5) + assert_equal(mbase._mask.tolist(), + np.array([(0, 0, 1), + (0, 1, 1), + (0, 0, 1), + (0, 0, 1), + (0, 1, 1)], + dtype=bool)) + # Set fields by slices ....................... + mbase = base.view(mrecarray).copy() + mbase.a[3:] = 5 + assert_equal(mbase.a, [1, 2, 3, 5, 5]) + assert_equal(mbase.a._mask, [0, 1, 0, 0, 0]) + mbase.b[3:] = masked + assert_equal(mbase.b, base['b']) + assert_equal(mbase.b._mask, [0, 1, 0, 1, 1]) + # Set fields globally.......................... + ndtype = [('alpha', '|S1'), ('num', int)] + data = ma.array([('a', 1), ('b', 2), ('c', 3)], dtype=ndtype) + rdata = data.view(MaskedRecords) + val = ma.array([10, 20, 30], mask=[1, 0, 0]) + + rdata['num'] = val + assert_equal(rdata.num, val) + assert_equal(rdata.num.mask, [1, 0, 0]) + + def test_set_fields_mask(self): + # Tests setting the mask of a field. + base = self.base.copy() + # This one has already a mask.... + mbase = base.view(mrecarray) + mbase['a'][-2] = masked + assert_equal(mbase.a, [1, 2, 3, 4, 5]) + assert_equal(mbase.a._mask, [0, 1, 0, 1, 1]) + # This one has not yet + mbase = fromarrays([np.arange(5), np.random.rand(5)], + dtype=[('a', int), ('b', float)]) + mbase['a'][-2] = masked + assert_equal(mbase.a, [0, 1, 2, 3, 4]) + assert_equal(mbase.a._mask, [0, 0, 0, 1, 0]) + + def test_set_mask(self): + base = self.base.copy() + mbase = base.view(mrecarray) + # Set the mask to True ....................... + mbase.mask = masked + assert_equal(ma.getmaskarray(mbase['b']), [1]*5) + assert_equal(mbase['a']._mask, mbase['b']._mask) + assert_equal(mbase['a']._mask, mbase['c']._mask) + assert_equal(mbase._mask.tolist(), + np.array([(1, 1, 1)]*5, dtype=bool)) + # Delete the mask ............................ + mbase.mask = nomask + assert_equal(ma.getmaskarray(mbase['c']), [0]*5) + assert_equal(mbase._mask.tolist(), + np.array([(0, 0, 0)]*5, dtype=bool)) + + def test_set_mask_fromarray(self): + base = self.base.copy() + mbase = base.view(mrecarray) + # Sets the mask w/ an array + mbase.mask = [1, 0, 0, 0, 1] + assert_equal(mbase.a.mask, [1, 0, 0, 0, 1]) + assert_equal(mbase.b.mask, [1, 0, 0, 0, 1]) + assert_equal(mbase.c.mask, [1, 0, 0, 0, 1]) + # Yay, once more ! + mbase.mask = [0, 0, 0, 0, 1] + assert_equal(mbase.a.mask, [0, 0, 0, 0, 1]) + assert_equal(mbase.b.mask, [0, 0, 0, 0, 1]) + assert_equal(mbase.c.mask, [0, 0, 0, 0, 1]) + + def test_set_mask_fromfields(self): + mbase = self.base.copy().view(mrecarray) + + nmask = np.array( + [(0, 1, 0), (0, 1, 0), (1, 0, 1), (1, 0, 1), (0, 0, 0)], + dtype=[('a', bool), ('b', bool), ('c', bool)]) + mbase.mask = nmask + assert_equal(mbase.a.mask, [0, 0, 1, 1, 0]) + assert_equal(mbase.b.mask, [1, 1, 0, 0, 0]) + assert_equal(mbase.c.mask, [0, 0, 1, 1, 0]) + # Reinitialize and redo + mbase.mask = False + mbase.fieldmask = nmask + assert_equal(mbase.a.mask, [0, 0, 1, 1, 0]) + assert_equal(mbase.b.mask, [1, 1, 0, 0, 0]) + assert_equal(mbase.c.mask, [0, 0, 1, 1, 0]) + + def test_set_elements(self): + base = self.base.copy() + # Set an element to mask ..................... + mbase = base.view(mrecarray).copy() + mbase[-2] = masked + assert_equal( + mbase._mask.tolist(), + np.array([(0, 0, 0), (1, 1, 1), (0, 0, 0), (1, 1, 1), (1, 1, 1)], + dtype=bool)) + # Used to be mask, now it's recordmask! + assert_equal(mbase.recordmask, [0, 1, 0, 1, 1]) + # Set slices ................................. + mbase = base.view(mrecarray).copy() + mbase[:2] = (5, 5, 5) + assert_equal(mbase.a._data, [5, 5, 3, 4, 5]) + assert_equal(mbase.a._mask, [0, 0, 0, 0, 1]) + assert_equal(mbase.b._data, [5., 5., 3.3, 4.4, 5.5]) + assert_equal(mbase.b._mask, [0, 0, 0, 0, 1]) + assert_equal(mbase.c._data, + [b'5', b'5', b'three', b'four', b'five']) + assert_equal(mbase.b._mask, [0, 0, 0, 0, 1]) + + mbase = base.view(mrecarray).copy() + mbase[:2] = masked + assert_equal(mbase.a._data, [1, 2, 3, 4, 5]) + assert_equal(mbase.a._mask, [1, 1, 0, 0, 1]) + assert_equal(mbase.b._data, [1.1, 2.2, 3.3, 4.4, 5.5]) + assert_equal(mbase.b._mask, [1, 1, 0, 0, 1]) + assert_equal(mbase.c._data, + [b'one', b'two', b'three', b'four', b'five']) + assert_equal(mbase.b._mask, [1, 1, 0, 0, 1]) + + def test_setslices_hardmask(self): + # Tests setting slices w/ hardmask. + base = self.base.copy() + mbase = base.view(mrecarray) + mbase.harden_mask() + try: + mbase[-2:] = (5, 5, 5) + assert_equal(mbase.a._data, [1, 2, 3, 5, 5]) + assert_equal(mbase.b._data, [1.1, 2.2, 3.3, 5, 5.5]) + assert_equal(mbase.c._data, + [b'one', b'two', b'three', b'5', b'five']) + assert_equal(mbase.a._mask, [0, 1, 0, 0, 1]) + assert_equal(mbase.b._mask, mbase.a._mask) + assert_equal(mbase.b._mask, mbase.c._mask) + except NotImplementedError: + # OK, not implemented yet... + pass + except AssertionError: + raise + else: + raise Exception("Flexible hard masks should be supported !") + # Not using a tuple should crash + try: + mbase[-2:] = 3 + except (NotImplementedError, TypeError): + pass + else: + raise TypeError("Should have expected a readable buffer object!") + + def test_hardmask(self): + # Test hardmask + base = self.base.copy() + mbase = base.view(mrecarray) + mbase.harden_mask() + assert_(mbase._hardmask) + mbase.mask = nomask + assert_equal_records(mbase._mask, base._mask) + mbase.soften_mask() + assert_(not mbase._hardmask) + mbase.mask = nomask + # So, the mask of a field is no longer set to nomask... + assert_equal_records(mbase._mask, + ma.make_mask_none(base.shape, base.dtype)) + assert_(ma.make_mask(mbase['b']._mask) is nomask) + assert_equal(mbase['a']._mask, mbase['b']._mask) + + def test_pickling(self): + # Test pickling + base = self.base.copy() + mrec = base.view(mrecarray) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + _ = pickle.dumps(mrec, protocol=proto) + mrec_ = pickle.loads(_) + assert_equal(mrec_.dtype, mrec.dtype) + assert_equal_records(mrec_._data, mrec._data) + assert_equal(mrec_._mask, mrec._mask) + assert_equal_records(mrec_._mask, mrec._mask) + + def test_filled(self): + # Test filling the array + _a = ma.array([1, 2, 3], mask=[0, 0, 1], dtype=int) + _b = ma.array([1.1, 2.2, 3.3], mask=[0, 0, 1], dtype=float) + _c = ma.array(['one', 'two', 'three'], mask=[0, 0, 1], dtype='|S8') + ddtype = [('a', int), ('b', float), ('c', '|S8')] + mrec = fromarrays([_a, _b, _c], dtype=ddtype, + fill_value=(99999, 99999., 'N/A')) + mrecfilled = mrec.filled() + assert_equal(mrecfilled['a'], np.array((1, 2, 99999), dtype=int)) + assert_equal(mrecfilled['b'], np.array((1.1, 2.2, 99999.), + dtype=float)) + assert_equal(mrecfilled['c'], np.array(('one', 'two', 'N/A'), + dtype='|S8')) + + def test_tolist(self): + # Test tolist. + _a = ma.array([1, 2, 3], mask=[0, 0, 1], dtype=int) + _b = ma.array([1.1, 2.2, 3.3], mask=[0, 0, 1], dtype=float) + _c = ma.array(['one', 'two', 'three'], mask=[1, 0, 0], dtype='|S8') + ddtype = [('a', int), ('b', float), ('c', '|S8')] + mrec = fromarrays([_a, _b, _c], dtype=ddtype, + fill_value=(99999, 99999., 'N/A')) + + assert_equal(mrec.tolist(), + [(1, 1.1, None), (2, 2.2, b'two'), + (None, None, b'three')]) + + def test_withnames(self): + # Test the creation w/ format and names + x = mrecarray(1, formats=float, names='base') + x[0]['base'] = 10 + assert_equal(x['base'][0], 10) + + def test_exotic_formats(self): + # Test that 'exotic' formats are processed properly + easy = mrecarray(1, dtype=[('i', int), ('s', '|S8'), ('f', float)]) + easy[0] = masked + assert_equal(easy.filled(1).item(), (1, b'1', 1.)) + + solo = mrecarray(1, dtype=[('f0', '<f8', (2, 2))]) + solo[0] = masked + assert_equal(solo.filled(1).item(), + np.array((1,), dtype=solo.dtype).item()) + + mult = mrecarray(2, dtype="i4, (2,3)float, float") + mult[0] = masked + mult[1] = (1, 1, 1) + mult.filled(0) + assert_equal_records(mult.filled(0), + np.array([(0, 0, 0), (1, 1, 1)], + dtype=mult.dtype)) + + +class TestView(object): + + def setup(self): + (a, b) = (np.arange(10), np.random.rand(10)) + ndtype = [('a', float), ('b', float)] + arr = np.array(list(zip(a, b)), dtype=ndtype) + + mrec = fromarrays([a, b], dtype=ndtype, fill_value=(-9., -99.)) + mrec.mask[3] = (False, True) + self.data = (mrec, a, b, arr) + + def test_view_by_itself(self): + (mrec, a, b, arr) = self.data + test = mrec.view() + assert_(isinstance(test, MaskedRecords)) + assert_equal_records(test, mrec) + assert_equal_records(test._mask, mrec._mask) + + def test_view_simple_dtype(self): + (mrec, a, b, arr) = self.data + ntype = (float, 2) + test = mrec.view(ntype) + assert_(isinstance(test, ma.MaskedArray)) + assert_equal(test, np.array(list(zip(a, b)), dtype=float)) + assert_(test[3, 1] is ma.masked) + + def test_view_flexible_type(self): + (mrec, a, b, arr) = self.data + alttype = [('A', float), ('B', float)] + test = mrec.view(alttype) + assert_(isinstance(test, MaskedRecords)) + assert_equal_records(test, arr.view(alttype)) + assert_(test['B'][3] is masked) + assert_equal(test.dtype, np.dtype(alttype)) + assert_(test._fill_value is None) + + +############################################################################## +class TestMRecordsImport(object): + + _a = ma.array([1, 2, 3], mask=[0, 0, 1], dtype=int) + _b = ma.array([1.1, 2.2, 3.3], mask=[0, 0, 1], dtype=float) + _c = ma.array([b'one', b'two', b'three'], + mask=[0, 0, 1], dtype='|S8') + ddtype = [('a', int), ('b', float), ('c', '|S8')] + mrec = fromarrays([_a, _b, _c], dtype=ddtype, + fill_value=(b'99999', b'99999.', + b'N/A')) + nrec = recfromarrays((_a._data, _b._data, _c._data), dtype=ddtype) + data = (mrec, nrec, ddtype) + + def test_fromarrays(self): + _a = ma.array([1, 2, 3], mask=[0, 0, 1], dtype=int) + _b = ma.array([1.1, 2.2, 3.3], mask=[0, 0, 1], dtype=float) + _c = ma.array(['one', 'two', 'three'], mask=[0, 0, 1], dtype='|S8') + (mrec, nrec, _) = self.data + for (f, l) in zip(('a', 'b', 'c'), (_a, _b, _c)): + assert_equal(getattr(mrec, f)._mask, l._mask) + # One record only + _x = ma.array([1, 1.1, 'one'], mask=[1, 0, 0],) + assert_equal_records(fromarrays(_x, dtype=mrec.dtype), mrec[0]) + + def test_fromrecords(self): + # Test construction from records. + (mrec, nrec, ddtype) = self.data + #...... + palist = [(1, 'abc', 3.7000002861022949, 0), + (2, 'xy', 6.6999998092651367, 1), + (0, ' ', 0.40000000596046448, 0)] + pa = recfromrecords(palist, names='c1, c2, c3, c4') + mpa = fromrecords(palist, names='c1, c2, c3, c4') + assert_equal_records(pa, mpa) + #..... + _mrec = fromrecords(nrec) + assert_equal(_mrec.dtype, mrec.dtype) + for field in _mrec.dtype.names: + assert_equal(getattr(_mrec, field), getattr(mrec._data, field)) + + _mrec = fromrecords(nrec.tolist(), names='c1,c2,c3') + assert_equal(_mrec.dtype, [('c1', int), ('c2', float), ('c3', '|S5')]) + for (f, n) in zip(('c1', 'c2', 'c3'), ('a', 'b', 'c')): + assert_equal(getattr(_mrec, f), getattr(mrec._data, n)) + + _mrec = fromrecords(mrec) + assert_equal(_mrec.dtype, mrec.dtype) + assert_equal_records(_mrec._data, mrec.filled()) + assert_equal_records(_mrec._mask, mrec._mask) + + def test_fromrecords_wmask(self): + # Tests construction from records w/ mask. + (mrec, nrec, ddtype) = self.data + + _mrec = fromrecords(nrec.tolist(), dtype=ddtype, mask=[0, 1, 0,]) + assert_equal_records(_mrec._data, mrec._data) + assert_equal(_mrec._mask.tolist(), [(0, 0, 0), (1, 1, 1), (0, 0, 0)]) + + _mrec = fromrecords(nrec.tolist(), dtype=ddtype, mask=True) + assert_equal_records(_mrec._data, mrec._data) + assert_equal(_mrec._mask.tolist(), [(1, 1, 1), (1, 1, 1), (1, 1, 1)]) + + _mrec = fromrecords(nrec.tolist(), dtype=ddtype, mask=mrec._mask) + assert_equal_records(_mrec._data, mrec._data) + assert_equal(_mrec._mask.tolist(), mrec._mask.tolist()) + + _mrec = fromrecords(nrec.tolist(), dtype=ddtype, + mask=mrec._mask.tolist()) + assert_equal_records(_mrec._data, mrec._data) + assert_equal(_mrec._mask.tolist(), mrec._mask.tolist()) + + def test_fromtextfile(self): + # Tests reading from a text file. + fcontent = ( +"""# +'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)' +'strings',1,1.0,'mixed column',,1 +'with embedded "double quotes"',2,2.0,1.0,,1 +'strings',3,3.0E5,3,,1 +'strings',4,-1e-10,,,1 +""") + with temppath() as path: + with open(path, 'w') as f: + f.write(fcontent) + mrectxt = fromtextfile(path, delimitor=',', varnames='ABCDEFG') + assert_(isinstance(mrectxt, MaskedRecords)) + assert_equal(mrectxt.F, [1, 1, 1, 1]) + assert_equal(mrectxt.E._mask, [1, 1, 1, 1]) + assert_equal(mrectxt.C, [1, 2, 3.e+5, -1e-10]) + + def test_addfield(self): + # Tests addfield + (mrec, nrec, ddtype) = self.data + (d, m) = ([100, 200, 300], [1, 0, 0]) + mrec = addfield(mrec, ma.array(d, mask=m)) + assert_equal(mrec.f3, d) + assert_equal(mrec.f3._mask, m) + + +def test_record_array_with_object_field(): + # Trac #1839 + y = ma.masked_array( + [(1, '2'), (3, '4')], + mask=[(0, 0), (0, 1)], + dtype=[('a', int), ('b', object)]) + # getting an item used to fail + y[1] diff --git a/contrib/python/numpy/py2/numpy/ma/tests/test_old_ma.py b/contrib/python/numpy/py2/numpy/ma/tests/test_old_ma.py new file mode 100644 index 0000000000..2978be22ca --- /dev/null +++ b/contrib/python/numpy/py2/numpy/ma/tests/test_old_ma.py @@ -0,0 +1,860 @@ +from __future__ import division, absolute_import, print_function + +from functools import reduce + +import numpy as np +import numpy.core.umath as umath +import numpy.core.fromnumeric as fromnumeric +from numpy.testing import ( + assert_, assert_raises, assert_equal, + ) +from numpy.ma import ( + MaskType, MaskedArray, absolute, add, all, allclose, allequal, alltrue, + arange, arccos, arcsin, arctan, arctan2, array, average, choose, + concatenate, conjugate, cos, cosh, count, divide, equal, exp, filled, + getmask, greater, greater_equal, inner, isMaskedArray, less, + less_equal, log, log10, make_mask, masked, masked_array, masked_equal, + masked_greater, masked_greater_equal, masked_inside, masked_less, + masked_less_equal, masked_not_equal, masked_outside, + masked_print_option, masked_values, masked_where, maximum, minimum, + multiply, nomask, nonzero, not_equal, ones, outer, product, put, ravel, + repeat, resize, shape, sin, sinh, sometrue, sort, sqrt, subtract, sum, + take, tan, tanh, transpose, where, zeros, + ) +from numpy.core.numeric import pickle + +pi = np.pi + + +def eq(v, w, msg=''): + result = allclose(v, w) + if not result: + print("Not eq:%s\n%s\n----%s" % (msg, str(v), str(w))) + return result + + +class TestMa(object): + + def setup(self): + x = np.array([1., 1., 1., -2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) + y = np.array([5., 0., 3., 2., -1., -4., 0., -10., 10., 1., 0., 3.]) + a10 = 10. + m1 = [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + m2 = [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1] + xm = array(x, mask=m1) + ym = array(y, mask=m2) + z = np.array([-.5, 0., .5, .8]) + zm = array(z, mask=[0, 1, 0, 0]) + xf = np.where(m1, 1e+20, x) + s = x.shape + xm.set_fill_value(1e+20) + self.d = (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) + + def test_testBasic1d(self): + # Test of basic array creation and properties in 1 dimension. + (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d + assert_(not isMaskedArray(x)) + assert_(isMaskedArray(xm)) + assert_equal(shape(xm), s) + assert_equal(xm.shape, s) + assert_equal(xm.dtype, x.dtype) + assert_equal(xm.size, reduce(lambda x, y:x * y, s)) + assert_equal(count(xm), len(m1) - reduce(lambda x, y:x + y, m1)) + assert_(eq(xm, xf)) + assert_(eq(filled(xm, 1.e20), xf)) + assert_(eq(x, xm)) + + def test_testBasic2d(self): + # Test of basic array creation and properties in 2 dimensions. + for s in [(4, 3), (6, 2)]: + (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d + x.shape = s + y.shape = s + xm.shape = s + ym.shape = s + xf.shape = s + + assert_(not isMaskedArray(x)) + assert_(isMaskedArray(xm)) + assert_equal(shape(xm), s) + assert_equal(xm.shape, s) + assert_equal(xm.size, reduce(lambda x, y:x * y, s)) + assert_equal(count(xm), + len(m1) - reduce(lambda x, y:x + y, m1)) + assert_(eq(xm, xf)) + assert_(eq(filled(xm, 1.e20), xf)) + assert_(eq(x, xm)) + self.setup() + + def test_testArithmetic(self): + # Test of basic arithmetic. + (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d + a2d = array([[1, 2], [0, 4]]) + a2dm = masked_array(a2d, [[0, 0], [1, 0]]) + assert_(eq(a2d * a2d, a2d * a2dm)) + assert_(eq(a2d + a2d, a2d + a2dm)) + assert_(eq(a2d - a2d, a2d - a2dm)) + for s in [(12,), (4, 3), (2, 6)]: + x = x.reshape(s) + y = y.reshape(s) + xm = xm.reshape(s) + ym = ym.reshape(s) + xf = xf.reshape(s) + assert_(eq(-x, -xm)) + assert_(eq(x + y, xm + ym)) + assert_(eq(x - y, xm - ym)) + assert_(eq(x * y, xm * ym)) + with np.errstate(divide='ignore', invalid='ignore'): + assert_(eq(x / y, xm / ym)) + assert_(eq(a10 + y, a10 + ym)) + assert_(eq(a10 - y, a10 - ym)) + assert_(eq(a10 * y, a10 * ym)) + with np.errstate(divide='ignore', invalid='ignore'): + assert_(eq(a10 / y, a10 / ym)) + assert_(eq(x + a10, xm + a10)) + assert_(eq(x - a10, xm - a10)) + assert_(eq(x * a10, xm * a10)) + assert_(eq(x / a10, xm / a10)) + assert_(eq(x ** 2, xm ** 2)) + assert_(eq(abs(x) ** 2.5, abs(xm) ** 2.5)) + assert_(eq(x ** y, xm ** ym)) + assert_(eq(np.add(x, y), add(xm, ym))) + assert_(eq(np.subtract(x, y), subtract(xm, ym))) + assert_(eq(np.multiply(x, y), multiply(xm, ym))) + with np.errstate(divide='ignore', invalid='ignore'): + assert_(eq(np.divide(x, y), divide(xm, ym))) + + def test_testMixedArithmetic(self): + na = np.array([1]) + ma = array([1]) + assert_(isinstance(na + ma, MaskedArray)) + assert_(isinstance(ma + na, MaskedArray)) + + def test_testUfuncs1(self): + # Test various functions such as sin, cos. + (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d + assert_(eq(np.cos(x), cos(xm))) + assert_(eq(np.cosh(x), cosh(xm))) + assert_(eq(np.sin(x), sin(xm))) + assert_(eq(np.sinh(x), sinh(xm))) + assert_(eq(np.tan(x), tan(xm))) + assert_(eq(np.tanh(x), tanh(xm))) + with np.errstate(divide='ignore', invalid='ignore'): + assert_(eq(np.sqrt(abs(x)), sqrt(xm))) + assert_(eq(np.log(abs(x)), log(xm))) + assert_(eq(np.log10(abs(x)), log10(xm))) + assert_(eq(np.exp(x), exp(xm))) + assert_(eq(np.arcsin(z), arcsin(zm))) + assert_(eq(np.arccos(z), arccos(zm))) + assert_(eq(np.arctan(z), arctan(zm))) + assert_(eq(np.arctan2(x, y), arctan2(xm, ym))) + assert_(eq(np.absolute(x), absolute(xm))) + assert_(eq(np.equal(x, y), equal(xm, ym))) + assert_(eq(np.not_equal(x, y), not_equal(xm, ym))) + assert_(eq(np.less(x, y), less(xm, ym))) + assert_(eq(np.greater(x, y), greater(xm, ym))) + assert_(eq(np.less_equal(x, y), less_equal(xm, ym))) + assert_(eq(np.greater_equal(x, y), greater_equal(xm, ym))) + assert_(eq(np.conjugate(x), conjugate(xm))) + assert_(eq(np.concatenate((x, y)), concatenate((xm, ym)))) + assert_(eq(np.concatenate((x, y)), concatenate((x, y)))) + assert_(eq(np.concatenate((x, y)), concatenate((xm, y)))) + assert_(eq(np.concatenate((x, y, x)), concatenate((x, ym, x)))) + + def test_xtestCount(self): + # Test count + ott = array([0., 1., 2., 3.], mask=[1, 0, 0, 0]) + assert_(count(ott).dtype.type is np.intp) + assert_equal(3, count(ott)) + assert_equal(1, count(1)) + assert_(eq(0, array(1, mask=[1]))) + ott = ott.reshape((2, 2)) + assert_(count(ott).dtype.type is np.intp) + assert_(isinstance(count(ott, 0), np.ndarray)) + assert_(count(ott).dtype.type is np.intp) + assert_(eq(3, count(ott))) + assert_(getmask(count(ott, 0)) is nomask) + assert_(eq([1, 2], count(ott, 0))) + + def test_testMinMax(self): + # Test minimum and maximum. + (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d + xr = np.ravel(x) # max doesn't work if shaped + xmr = ravel(xm) + + # true because of careful selection of data + assert_(eq(max(xr), maximum.reduce(xmr))) + assert_(eq(min(xr), minimum.reduce(xmr))) + + def test_testAddSumProd(self): + # Test add, sum, product. + (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d + assert_(eq(np.add.reduce(x), add.reduce(x))) + assert_(eq(np.add.accumulate(x), add.accumulate(x))) + assert_(eq(4, sum(array(4), axis=0))) + assert_(eq(4, sum(array(4), axis=0))) + assert_(eq(np.sum(x, axis=0), sum(x, axis=0))) + assert_(eq(np.sum(filled(xm, 0), axis=0), sum(xm, axis=0))) + assert_(eq(np.sum(x, 0), sum(x, 0))) + assert_(eq(np.product(x, axis=0), product(x, axis=0))) + assert_(eq(np.product(x, 0), product(x, 0))) + assert_(eq(np.product(filled(xm, 1), axis=0), + product(xm, axis=0))) + if len(s) > 1: + assert_(eq(np.concatenate((x, y), 1), + concatenate((xm, ym), 1))) + assert_(eq(np.add.reduce(x, 1), add.reduce(x, 1))) + assert_(eq(np.sum(x, 1), sum(x, 1))) + assert_(eq(np.product(x, 1), product(x, 1))) + + def test_testCI(self): + # Test of conversions and indexing + x1 = np.array([1, 2, 4, 3]) + x2 = array(x1, mask=[1, 0, 0, 0]) + x3 = array(x1, mask=[0, 1, 0, 1]) + x4 = array(x1) + # test conversion to strings + str(x2) # raises? + repr(x2) # raises? + assert_(eq(np.sort(x1), sort(x2, fill_value=0))) + # tests of indexing + assert_(type(x2[1]) is type(x1[1])) + assert_(x1[1] == x2[1]) + assert_(x2[0] is masked) + assert_(eq(x1[2], x2[2])) + assert_(eq(x1[2:5], x2[2:5])) + assert_(eq(x1[:], x2[:])) + assert_(eq(x1[1:], x3[1:])) + x1[2] = 9 + x2[2] = 9 + assert_(eq(x1, x2)) + x1[1:3] = 99 + x2[1:3] = 99 + assert_(eq(x1, x2)) + x2[1] = masked + assert_(eq(x1, x2)) + x2[1:3] = masked + assert_(eq(x1, x2)) + x2[:] = x1 + x2[1] = masked + assert_(allequal(getmask(x2), array([0, 1, 0, 0]))) + x3[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0]) + assert_(allequal(getmask(x3), array([0, 1, 1, 0]))) + x4[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0]) + assert_(allequal(getmask(x4), array([0, 1, 1, 0]))) + assert_(allequal(x4, array([1, 2, 3, 4]))) + x1 = np.arange(5) * 1.0 + x2 = masked_values(x1, 3.0) + assert_(eq(x1, x2)) + assert_(allequal(array([0, 0, 0, 1, 0], MaskType), x2.mask)) + assert_(eq(3.0, x2.fill_value)) + x1 = array([1, 'hello', 2, 3], object) + x2 = np.array([1, 'hello', 2, 3], object) + s1 = x1[1] + s2 = x2[1] + assert_equal(type(s2), str) + assert_equal(type(s1), str) + assert_equal(s1, s2) + assert_(x1[1:1].shape == (0,)) + + def test_testCopySize(self): + # Tests of some subtle points of copying and sizing. + n = [0, 0, 1, 0, 0] + m = make_mask(n) + m2 = make_mask(m) + assert_(m is m2) + m3 = make_mask(m, copy=1) + assert_(m is not m3) + + x1 = np.arange(5) + y1 = array(x1, mask=m) + assert_(y1._data is not x1) + assert_(allequal(x1, y1._data)) + assert_(y1.mask is m) + + y1a = array(y1, copy=0) + # For copy=False, one might expect that the array would just + # passed on, i.e., that it would be "is" instead of "==". + # See gh-4043 for discussion. + assert_(y1a._mask.__array_interface__ == + y1._mask.__array_interface__) + + y2 = array(x1, mask=m3, copy=0) + assert_(y2.mask is m3) + assert_(y2[2] is masked) + y2[2] = 9 + assert_(y2[2] is not masked) + assert_(y2.mask is m3) + assert_(allequal(y2.mask, 0)) + + y2a = array(x1, mask=m, copy=1) + assert_(y2a.mask is not m) + assert_(y2a[2] is masked) + y2a[2] = 9 + assert_(y2a[2] is not masked) + assert_(y2a.mask is not m) + assert_(allequal(y2a.mask, 0)) + + y3 = array(x1 * 1.0, mask=m) + assert_(filled(y3).dtype is (x1 * 1.0).dtype) + + x4 = arange(4) + x4[2] = masked + y4 = resize(x4, (8,)) + assert_(eq(concatenate([x4, x4]), y4)) + assert_(eq(getmask(y4), [0, 0, 1, 0, 0, 0, 1, 0])) + y5 = repeat(x4, (2, 2, 2, 2), axis=0) + assert_(eq(y5, [0, 0, 1, 1, 2, 2, 3, 3])) + y6 = repeat(x4, 2, axis=0) + assert_(eq(y5, y6)) + + def test_testPut(self): + # Test of put + d = arange(5) + n = [0, 0, 0, 1, 1] + m = make_mask(n) + m2 = m.copy() + x = array(d, mask=m) + assert_(x[3] is masked) + assert_(x[4] is masked) + x[[1, 4]] = [10, 40] + assert_(x.mask is m) + assert_(x[3] is masked) + assert_(x[4] is not masked) + assert_(eq(x, [0, 10, 2, -1, 40])) + + x = array(d, mask=m2, copy=True) + x.put([0, 1, 2], [-1, 100, 200]) + assert_(x.mask is not m2) + assert_(x[3] is masked) + assert_(x[4] is masked) + assert_(eq(x, [-1, 100, 200, 0, 0])) + + def test_testPut2(self): + # Test of put + d = arange(5) + x = array(d, mask=[0, 0, 0, 0, 0]) + z = array([10, 40], mask=[1, 0]) + assert_(x[2] is not masked) + assert_(x[3] is not masked) + x[2:4] = z + assert_(x[2] is masked) + assert_(x[3] is not masked) + assert_(eq(x, [0, 1, 10, 40, 4])) + + d = arange(5) + x = array(d, mask=[0, 0, 0, 0, 0]) + y = x[2:4] + z = array([10, 40], mask=[1, 0]) + assert_(x[2] is not masked) + assert_(x[3] is not masked) + y[:] = z + assert_(y[0] is masked) + assert_(y[1] is not masked) + assert_(eq(y, [10, 40])) + assert_(x[2] is masked) + assert_(x[3] is not masked) + assert_(eq(x, [0, 1, 10, 40, 4])) + + def test_testMaPut(self): + (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d + m = [1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1] + i = np.nonzero(m)[0] + put(ym, i, zm) + assert_(all(take(ym, i, axis=0) == zm)) + + def test_testOddFeatures(self): + # Test of other odd features + x = arange(20) + x = x.reshape(4, 5) + x.flat[5] = 12 + assert_(x[1, 0] == 12) + z = x + 10j * x + assert_(eq(z.real, x)) + assert_(eq(z.imag, 10 * x)) + assert_(eq((z * conjugate(z)).real, 101 * x * x)) + z.imag[...] = 0.0 + + x = arange(10) + x[3] = masked + assert_(str(x[3]) == str(masked)) + c = x >= 8 + assert_(count(where(c, masked, masked)) == 0) + assert_(shape(where(c, masked, masked)) == c.shape) + z = where(c, x, masked) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is masked) + assert_(z[7] is masked) + assert_(z[8] is not masked) + assert_(z[9] is not masked) + assert_(eq(x, z)) + z = where(c, masked, x) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is not masked) + assert_(z[7] is not masked) + assert_(z[8] is masked) + assert_(z[9] is masked) + z = masked_where(c, x) + assert_(z.dtype is x.dtype) + assert_(z[3] is masked) + assert_(z[4] is not masked) + assert_(z[7] is not masked) + assert_(z[8] is masked) + assert_(z[9] is masked) + assert_(eq(x, z)) + x = array([1., 2., 3., 4., 5.]) + c = array([1, 1, 1, 0, 0]) + x[2] = masked + z = where(c, x, -x) + assert_(eq(z, [1., 2., 0., -4., -5])) + c[0] = masked + z = where(c, x, -x) + assert_(eq(z, [1., 2., 0., -4., -5])) + assert_(z[0] is masked) + assert_(z[1] is not masked) + assert_(z[2] is masked) + assert_(eq(masked_where(greater(x, 2), x), masked_greater(x, 2))) + assert_(eq(masked_where(greater_equal(x, 2), x), + masked_greater_equal(x, 2))) + assert_(eq(masked_where(less(x, 2), x), masked_less(x, 2))) + assert_(eq(masked_where(less_equal(x, 2), x), masked_less_equal(x, 2))) + assert_(eq(masked_where(not_equal(x, 2), x), masked_not_equal(x, 2))) + assert_(eq(masked_where(equal(x, 2), x), masked_equal(x, 2))) + assert_(eq(masked_where(not_equal(x, 2), x), masked_not_equal(x, 2))) + assert_(eq(masked_inside(list(range(5)), 1, 3), [0, 199, 199, 199, 4])) + assert_(eq(masked_outside(list(range(5)), 1, 3), [199, 1, 2, 3, 199])) + assert_(eq(masked_inside(array(list(range(5)), + mask=[1, 0, 0, 0, 0]), 1, 3).mask, + [1, 1, 1, 1, 0])) + assert_(eq(masked_outside(array(list(range(5)), + mask=[0, 1, 0, 0, 0]), 1, 3).mask, + [1, 1, 0, 0, 1])) + assert_(eq(masked_equal(array(list(range(5)), + mask=[1, 0, 0, 0, 0]), 2).mask, + [1, 0, 1, 0, 0])) + assert_(eq(masked_not_equal(array([2, 2, 1, 2, 1], + mask=[1, 0, 0, 0, 0]), 2).mask, + [1, 0, 1, 0, 1])) + assert_(eq(masked_where([1, 1, 0, 0, 0], [1, 2, 3, 4, 5]), + [99, 99, 3, 4, 5])) + atest = ones((10, 10, 10), dtype=np.float32) + btest = zeros(atest.shape, MaskType) + ctest = masked_where(btest, atest) + assert_(eq(atest, ctest)) + z = choose(c, (-x, x)) + assert_(eq(z, [1., 2., 0., -4., -5])) + assert_(z[0] is masked) + assert_(z[1] is not masked) + assert_(z[2] is masked) + x = arange(6) + x[5] = masked + y = arange(6) * 10 + y[2] = masked + c = array([1, 1, 1, 0, 0, 0], mask=[1, 0, 0, 0, 0, 0]) + cm = c.filled(1) + z = where(c, x, y) + zm = where(cm, x, y) + assert_(eq(z, zm)) + assert_(getmask(zm) is nomask) + assert_(eq(zm, [0, 1, 2, 30, 40, 50])) + z = where(c, masked, 1) + assert_(eq(z, [99, 99, 99, 1, 1, 1])) + z = where(c, 1, masked) + assert_(eq(z, [99, 1, 1, 99, 99, 99])) + + def test_testMinMax2(self): + # Test of minimum, maximum. + assert_(eq(minimum([1, 2, 3], [4, 0, 9]), [1, 0, 3])) + assert_(eq(maximum([1, 2, 3], [4, 0, 9]), [4, 2, 9])) + x = arange(5) + y = arange(5) - 2 + x[3] = masked + y[0] = masked + assert_(eq(minimum(x, y), where(less(x, y), x, y))) + assert_(eq(maximum(x, y), where(greater(x, y), x, y))) + assert_(minimum.reduce(x) == 0) + assert_(maximum.reduce(x) == 4) + + def test_testTakeTransposeInnerOuter(self): + # Test of take, transpose, inner, outer products + x = arange(24) + y = np.arange(24) + x[5:6] = masked + x = x.reshape(2, 3, 4) + y = y.reshape(2, 3, 4) + assert_(eq(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1)))) + assert_(eq(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1))) + assert_(eq(np.inner(filled(x, 0), filled(y, 0)), + inner(x, y))) + assert_(eq(np.outer(filled(x, 0), filled(y, 0)), + outer(x, y))) + y = array(['abc', 1, 'def', 2, 3], object) + y[2] = masked + t = take(y, [0, 3, 4]) + assert_(t[0] == 'abc') + assert_(t[1] == 2) + assert_(t[2] == 3) + + def test_testInplace(self): + # Test of inplace operations and rich comparisons + y = arange(10) + + x = arange(10) + xm = arange(10) + xm[2] = masked + x += 1 + assert_(eq(x, y + 1)) + xm += 1 + assert_(eq(x, y + 1)) + + x = arange(10) + xm = arange(10) + xm[2] = masked + x -= 1 + assert_(eq(x, y - 1)) + xm -= 1 + assert_(eq(xm, y - 1)) + + x = arange(10) * 1.0 + xm = arange(10) * 1.0 + xm[2] = masked + x *= 2.0 + assert_(eq(x, y * 2)) + xm *= 2.0 + assert_(eq(xm, y * 2)) + + x = arange(10) * 2 + xm = arange(10) + xm[2] = masked + x //= 2 + assert_(eq(x, y)) + xm //= 2 + assert_(eq(x, y)) + + x = arange(10) * 1.0 + xm = arange(10) * 1.0 + xm[2] = masked + x /= 2.0 + assert_(eq(x, y / 2.0)) + xm /= arange(10) + assert_(eq(xm, ones((10,)))) + + x = arange(10).astype(np.float32) + xm = arange(10) + xm[2] = masked + x += 1. + assert_(eq(x, y + 1.)) + + def test_testPickle(self): + # Test of pickling + x = arange(12) + x[4:10:2] = masked + x = x.reshape(4, 3) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + s = pickle.dumps(x, protocol=proto) + y = pickle.loads(s) + assert_(eq(x, y)) + + def test_testMasked(self): + # Test of masked element + xx = arange(6) + xx[1] = masked + assert_(str(masked) == '--') + assert_(xx[1] is masked) + assert_equal(filled(xx[1], 0), 0) + + def test_testAverage1(self): + # Test of average. + ott = array([0., 1., 2., 3.], mask=[1, 0, 0, 0]) + assert_(eq(2.0, average(ott, axis=0))) + assert_(eq(2.0, average(ott, weights=[1., 1., 2., 1.]))) + result, wts = average(ott, weights=[1., 1., 2., 1.], returned=1) + assert_(eq(2.0, result)) + assert_(wts == 4.0) + ott[:] = masked + assert_(average(ott, axis=0) is masked) + ott = array([0., 1., 2., 3.], mask=[1, 0, 0, 0]) + ott = ott.reshape(2, 2) + ott[:, 1] = masked + assert_(eq(average(ott, axis=0), [2.0, 0.0])) + assert_(average(ott, axis=1)[0] is masked) + assert_(eq([2., 0.], average(ott, axis=0))) + result, wts = average(ott, axis=0, returned=1) + assert_(eq(wts, [1., 0.])) + + def test_testAverage2(self): + # More tests of average. + w1 = [0, 1, 1, 1, 1, 0] + w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]] + x = arange(6) + assert_(allclose(average(x, axis=0), 2.5)) + assert_(allclose(average(x, axis=0, weights=w1), 2.5)) + y = array([arange(6), 2.0 * arange(6)]) + assert_(allclose(average(y, None), + np.add.reduce(np.arange(6)) * 3. / 12.)) + assert_(allclose(average(y, axis=0), np.arange(6) * 3. / 2.)) + assert_(allclose(average(y, axis=1), + [average(x, axis=0), average(x, axis=0)*2.0])) + assert_(allclose(average(y, None, weights=w2), 20. / 6.)) + assert_(allclose(average(y, axis=0, weights=w2), + [0., 1., 2., 3., 4., 10.])) + assert_(allclose(average(y, axis=1), + [average(x, axis=0), average(x, axis=0)*2.0])) + m1 = zeros(6) + m2 = [0, 0, 1, 1, 0, 0] + m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]] + m4 = ones(6) + m5 = [0, 1, 1, 1, 1, 1] + assert_(allclose(average(masked_array(x, m1), axis=0), 2.5)) + assert_(allclose(average(masked_array(x, m2), axis=0), 2.5)) + assert_(average(masked_array(x, m4), axis=0) is masked) + assert_equal(average(masked_array(x, m5), axis=0), 0.0) + assert_equal(count(average(masked_array(x, m4), axis=0)), 0) + z = masked_array(y, m3) + assert_(allclose(average(z, None), 20. / 6.)) + assert_(allclose(average(z, axis=0), + [0., 1., 99., 99., 4.0, 7.5])) + assert_(allclose(average(z, axis=1), [2.5, 5.0])) + assert_(allclose(average(z, axis=0, weights=w2), + [0., 1., 99., 99., 4.0, 10.0])) + + a = arange(6) + b = arange(6) * 3 + r1, w1 = average([[a, b], [b, a]], axis=1, returned=1) + assert_equal(shape(r1), shape(w1)) + assert_equal(r1.shape, w1.shape) + r2, w2 = average(ones((2, 2, 3)), axis=0, weights=[3, 1], returned=1) + assert_equal(shape(w2), shape(r2)) + r2, w2 = average(ones((2, 2, 3)), returned=1) + assert_equal(shape(w2), shape(r2)) + r2, w2 = average(ones((2, 2, 3)), weights=ones((2, 2, 3)), returned=1) + assert_(shape(w2) == shape(r2)) + a2d = array([[1, 2], [0, 4]], float) + a2dm = masked_array(a2d, [[0, 0], [1, 0]]) + a2da = average(a2d, axis=0) + assert_(eq(a2da, [0.5, 3.0])) + a2dma = average(a2dm, axis=0) + assert_(eq(a2dma, [1.0, 3.0])) + a2dma = average(a2dm, axis=None) + assert_(eq(a2dma, 7. / 3.)) + a2dma = average(a2dm, axis=1) + assert_(eq(a2dma, [1.5, 4.0])) + + def test_testToPython(self): + assert_equal(1, int(array(1))) + assert_equal(1.0, float(array(1))) + assert_equal(1, int(array([[[1]]]))) + assert_equal(1.0, float(array([[1]]))) + assert_raises(TypeError, float, array([1, 1])) + assert_raises(ValueError, bool, array([0, 1])) + assert_raises(ValueError, bool, array([0, 0], mask=[0, 1])) + + def test_testScalarArithmetic(self): + xm = array(0, mask=1) + #TODO FIXME: Find out what the following raises a warning in r8247 + with np.errstate(divide='ignore'): + assert_((1 / array(0)).mask) + assert_((1 + xm).mask) + assert_((-xm).mask) + assert_((-xm).mask) + assert_(maximum(xm, xm).mask) + assert_(minimum(xm, xm).mask) + assert_(xm.filled().dtype is xm._data.dtype) + x = array(0, mask=0) + assert_(x.filled() == x._data) + assert_equal(str(xm), str(masked_print_option)) + + def test_testArrayMethods(self): + a = array([1, 3, 2]) + assert_(eq(a.any(), a._data.any())) + assert_(eq(a.all(), a._data.all())) + assert_(eq(a.argmax(), a._data.argmax())) + assert_(eq(a.argmin(), a._data.argmin())) + assert_(eq(a.choose(0, 1, 2, 3, 4), + a._data.choose(0, 1, 2, 3, 4))) + assert_(eq(a.compress([1, 0, 1]), a._data.compress([1, 0, 1]))) + assert_(eq(a.conj(), a._data.conj())) + assert_(eq(a.conjugate(), a._data.conjugate())) + m = array([[1, 2], [3, 4]]) + assert_(eq(m.diagonal(), m._data.diagonal())) + assert_(eq(a.sum(), a._data.sum())) + assert_(eq(a.take([1, 2]), a._data.take([1, 2]))) + assert_(eq(m.transpose(), m._data.transpose())) + + def test_testArrayAttributes(self): + a = array([1, 3, 2]) + assert_equal(a.ndim, 1) + + def test_testAPI(self): + assert_(not [m for m in dir(np.ndarray) + if m not in dir(MaskedArray) and + not m.startswith('_')]) + + def test_testSingleElementSubscript(self): + a = array([1, 3, 2]) + b = array([1, 3, 2], mask=[1, 0, 1]) + assert_equal(a[0].shape, ()) + assert_equal(b[0].shape, ()) + assert_equal(b[1].shape, ()) + + +class TestUfuncs(object): + def setup(self): + self.d = (array([1.0, 0, -1, pi / 2] * 2, mask=[0, 1] + [0] * 6), + array([1.0, 0, -1, pi / 2] * 2, mask=[1, 0] + [0] * 6),) + + def test_testUfuncRegression(self): + f_invalid_ignore = [ + 'sqrt', 'arctanh', 'arcsin', 'arccos', + 'arccosh', 'arctanh', 'log', 'log10', 'divide', + 'true_divide', 'floor_divide', 'remainder', 'fmod'] + for f in ['sqrt', 'log', 'log10', 'exp', 'conjugate', + 'sin', 'cos', 'tan', + 'arcsin', 'arccos', 'arctan', + 'sinh', 'cosh', 'tanh', + 'arcsinh', + 'arccosh', + 'arctanh', + 'absolute', 'fabs', 'negative', + 'floor', 'ceil', + 'logical_not', + 'add', 'subtract', 'multiply', + 'divide', 'true_divide', 'floor_divide', + 'remainder', 'fmod', 'hypot', 'arctan2', + 'equal', 'not_equal', 'less_equal', 'greater_equal', + 'less', 'greater', + 'logical_and', 'logical_or', 'logical_xor']: + try: + uf = getattr(umath, f) + except AttributeError: + uf = getattr(fromnumeric, f) + mf = getattr(np.ma, f) + args = self.d[:uf.nin] + with np.errstate(): + if f in f_invalid_ignore: + np.seterr(invalid='ignore') + if f in ['arctanh', 'log', 'log10']: + np.seterr(divide='ignore') + ur = uf(*args) + mr = mf(*args) + assert_(eq(ur.filled(0), mr.filled(0), f)) + assert_(eqmask(ur.mask, mr.mask)) + + def test_reduce(self): + a = self.d[0] + assert_(not alltrue(a, axis=0)) + assert_(sometrue(a, axis=0)) + assert_equal(sum(a[:3], axis=0), 0) + assert_equal(product(a, axis=0), 0) + + def test_minmax(self): + a = arange(1, 13).reshape(3, 4) + amask = masked_where(a < 5, a) + assert_equal(amask.max(), a.max()) + assert_equal(amask.min(), 5) + assert_((amask.max(0) == a.max(0)).all()) + assert_((amask.min(0) == [5, 6, 7, 8]).all()) + assert_(amask.max(1)[0].mask) + assert_(amask.min(1)[0].mask) + + def test_nonzero(self): + for t in "?bhilqpBHILQPfdgFDGO": + x = array([1, 0, 2, 0], mask=[0, 0, 1, 1]) + assert_(eq(nonzero(x), [0])) + + +class TestArrayMethods(object): + + def setup(self): + x = np.array([8.375, 7.545, 8.828, 8.5, 1.757, 5.928, + 8.43, 7.78, 9.865, 5.878, 8.979, 4.732, + 3.012, 6.022, 5.095, 3.116, 5.238, 3.957, + 6.04, 9.63, 7.712, 3.382, 4.489, 6.479, + 7.189, 9.645, 5.395, 4.961, 9.894, 2.893, + 7.357, 9.828, 6.272, 3.758, 6.693, 0.993]) + X = x.reshape(6, 6) + XX = x.reshape(3, 2, 2, 3) + + m = np.array([0, 1, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 1, + 0, 0, 0, 1, 0, 1, + 0, 0, 0, 1, 1, 1, + 1, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 1, 0]) + mx = array(data=x, mask=m) + mX = array(data=X, mask=m.reshape(X.shape)) + mXX = array(data=XX, mask=m.reshape(XX.shape)) + + self.d = (x, X, XX, m, mx, mX, mXX) + + def test_trace(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + mXdiag = mX.diagonal() + assert_equal(mX.trace(), mX.diagonal().compressed().sum()) + assert_(eq(mX.trace(), + X.trace() - sum(mXdiag.mask * X.diagonal(), + axis=0))) + + def test_clip(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + clipped = mx.clip(2, 8) + assert_(eq(clipped.mask, mx.mask)) + assert_(eq(clipped._data, x.clip(2, 8))) + assert_(eq(clipped._data, mx._data.clip(2, 8))) + + def test_ptp(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + (n, m) = X.shape + assert_equal(mx.ptp(), mx.compressed().ptp()) + rows = np.zeros(n, np.float_) + cols = np.zeros(m, np.float_) + for k in range(m): + cols[k] = mX[:, k].compressed().ptp() + for k in range(n): + rows[k] = mX[k].compressed().ptp() + assert_(eq(mX.ptp(0), cols)) + assert_(eq(mX.ptp(1), rows)) + + def test_swapaxes(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + mXswapped = mX.swapaxes(0, 1) + assert_(eq(mXswapped[-1], mX[:, -1])) + mXXswapped = mXX.swapaxes(0, 2) + assert_equal(mXXswapped.shape, (2, 2, 3, 3)) + + def test_cumprod(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + mXcp = mX.cumprod(0) + assert_(eq(mXcp._data, mX.filled(1).cumprod(0))) + mXcp = mX.cumprod(1) + assert_(eq(mXcp._data, mX.filled(1).cumprod(1))) + + def test_cumsum(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + mXcp = mX.cumsum(0) + assert_(eq(mXcp._data, mX.filled(0).cumsum(0))) + mXcp = mX.cumsum(1) + assert_(eq(mXcp._data, mX.filled(0).cumsum(1))) + + def test_varstd(self): + (x, X, XX, m, mx, mX, mXX,) = self.d + assert_(eq(mX.var(axis=None), mX.compressed().var())) + assert_(eq(mX.std(axis=None), mX.compressed().std())) + assert_(eq(mXX.var(axis=3).shape, XX.var(axis=3).shape)) + assert_(eq(mX.var().shape, X.var().shape)) + (mXvar0, mXvar1) = (mX.var(axis=0), mX.var(axis=1)) + for k in range(6): + assert_(eq(mXvar1[k], mX[k].compressed().var())) + assert_(eq(mXvar0[k], mX[:, k].compressed().var())) + assert_(eq(np.sqrt(mXvar0[k]), + mX[:, k].compressed().std())) + + +def eqmask(m1, m2): + if m1 is nomask: + return m2 is nomask + if m2 is nomask: + return m1 is nomask + return (m1 == m2).all() diff --git a/contrib/python/numpy/py2/numpy/ma/tests/test_regression.py b/contrib/python/numpy/py2/numpy/ma/tests/test_regression.py new file mode 100644 index 0000000000..54f1bda7db --- /dev/null +++ b/contrib/python/numpy/py2/numpy/ma/tests/test_regression.py @@ -0,0 +1,89 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import ( + assert_, assert_array_equal, assert_allclose, suppress_warnings + ) + + +class TestRegression(object): + def test_masked_array_create(self): + # Ticket #17 + x = np.ma.masked_array([0, 1, 2, 3, 0, 4, 5, 6], + mask=[0, 0, 0, 1, 1, 1, 0, 0]) + assert_array_equal(np.ma.nonzero(x), [[1, 2, 6, 7]]) + + def test_masked_array(self): + # Ticket #61 + np.ma.array(1, mask=[1]) + + def test_mem_masked_where(self): + # Ticket #62 + from numpy.ma import masked_where, MaskType + a = np.zeros((1, 1)) + b = np.zeros(a.shape, MaskType) + c = masked_where(b, a) + a-c + + def test_masked_array_multiply(self): + # Ticket #254 + a = np.ma.zeros((4, 1)) + a[2, 0] = np.ma.masked + b = np.zeros((4, 2)) + a*b + b*a + + def test_masked_array_repeat(self): + # Ticket #271 + np.ma.array([1], mask=False).repeat(10) + + def test_masked_array_repr_unicode(self): + # Ticket #1256 + repr(np.ma.array(u"Unicode")) + + def test_atleast_2d(self): + # Ticket #1559 + a = np.ma.masked_array([0.0, 1.2, 3.5], mask=[False, True, False]) + b = np.atleast_2d(a) + assert_(a.mask.ndim == 1) + assert_(b.mask.ndim == 2) + + def test_set_fill_value_unicode_py3(self): + # Ticket #2733 + a = np.ma.masked_array(['a', 'b', 'c'], mask=[1, 0, 0]) + a.fill_value = 'X' + assert_(a.fill_value == 'X') + + def test_var_sets_maskedarray_scalar(self): + # Issue gh-2757 + a = np.ma.array(np.arange(5), mask=True) + mout = np.ma.array(-1, dtype=float) + a.var(out=mout) + assert_(mout._data == 0) + + def test_ddof_corrcoef(self): + # See gh-3336 + x = np.ma.masked_equal([1, 2, 3, 4, 5], 4) + y = np.array([2, 2.5, 3.1, 3, 5]) + # this test can be removed after deprecation. + with suppress_warnings() as sup: + sup.filter(DeprecationWarning, "bias and ddof have no effect") + r0 = np.ma.corrcoef(x, y, ddof=0) + r1 = np.ma.corrcoef(x, y, ddof=1) + # ddof should not have an effect (it gets cancelled out) + assert_allclose(r0.data, r1.data) + + def test_mask_not_backmangled(self): + # See gh-10314. Test case taken from gh-3140. + a = np.ma.MaskedArray([1., 2.], mask=[False, False]) + assert_(a.mask.shape == (2,)) + b = np.tile(a, (2, 1)) + # Check that the above no longer changes a.shape to (1, 2) + assert_(a.mask.shape == (2,)) + assert_(b.shape == (2, 2)) + assert_(b.mask.shape == (2, 2)) + + def test_empty_list_on_structured(self): + # See gh-12464. Indexing with empty list should give empty result. + ma = np.ma.MaskedArray([(1, 1.), (2, 2.), (3, 3.)], dtype='i4,f4') + assert_array_equal(ma[[]], ma[:0]) diff --git a/contrib/python/numpy/py2/numpy/ma/tests/test_subclassing.py b/contrib/python/numpy/py2/numpy/ma/tests/test_subclassing.py new file mode 100644 index 0000000000..f8ab52bb9e --- /dev/null +++ b/contrib/python/numpy/py2/numpy/ma/tests/test_subclassing.py @@ -0,0 +1,351 @@ +# pylint: disable-msg=W0611, W0612, W0511,R0201 +"""Tests suite for MaskedArray & subclassing. + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id: test_subclassing.py 3473 2007-10-29 15:18:13Z jarrod.millman $ + +""" +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import assert_, assert_raises +from numpy.ma.testutils import assert_equal +from numpy.ma.core import ( + array, arange, masked, MaskedArray, masked_array, log, add, hypot, + divide, asarray, asanyarray, nomask + ) +# from numpy.ma.core import ( + +def assert_startswith(a, b): + # produces a better error message than assert_(a.startswith(b)) + assert_equal(a[:len(b)], b) + +class SubArray(np.ndarray): + # Defines a generic np.ndarray subclass, that stores some metadata + # in the dictionary `info`. + def __new__(cls,arr,info={}): + x = np.asanyarray(arr).view(cls) + x.info = info.copy() + return x + + def __array_finalize__(self, obj): + if callable(getattr(super(SubArray, self), + '__array_finalize__', None)): + super(SubArray, self).__array_finalize__(obj) + self.info = getattr(obj, 'info', {}).copy() + return + + def __add__(self, other): + result = super(SubArray, self).__add__(other) + result.info['added'] = result.info.get('added', 0) + 1 + return result + + def __iadd__(self, other): + result = super(SubArray, self).__iadd__(other) + result.info['iadded'] = result.info.get('iadded', 0) + 1 + return result + + +subarray = SubArray + + +class SubMaskedArray(MaskedArray): + """Pure subclass of MaskedArray, keeping some info on subclass.""" + def __new__(cls, info=None, **kwargs): + obj = super(SubMaskedArray, cls).__new__(cls, **kwargs) + obj._optinfo['info'] = info + return obj + + +class MSubArray(SubArray, MaskedArray): + + def __new__(cls, data, info={}, mask=nomask): + subarr = SubArray(data, info) + _data = MaskedArray.__new__(cls, data=subarr, mask=mask) + _data.info = subarr.info + return _data + + def _get_series(self): + _view = self.view(MaskedArray) + _view._sharedmask = False + return _view + _series = property(fget=_get_series) + +msubarray = MSubArray + + +# Also a subclass that overrides __str__, __repr__ and __setitem__, disallowing +# setting to non-class values (and thus np.ma.core.masked_print_option) +# and overrides __array_wrap__, updating the info dict, to check that this +# doesn't get destroyed by MaskedArray._update_from. But this one also needs +# its own iterator... +class CSAIterator(object): + """ + Flat iterator object that uses its own setter/getter + (works around ndarray.flat not propagating subclass setters/getters + see https://github.com/numpy/numpy/issues/4564) + roughly following MaskedIterator + """ + def __init__(self, a): + self._original = a + self._dataiter = a.view(np.ndarray).flat + + def __iter__(self): + return self + + def __getitem__(self, indx): + out = self._dataiter.__getitem__(indx) + if not isinstance(out, np.ndarray): + out = out.__array__() + out = out.view(type(self._original)) + return out + + def __setitem__(self, index, value): + self._dataiter[index] = self._original._validate_input(value) + + def __next__(self): + return next(self._dataiter).__array__().view(type(self._original)) + + next = __next__ + + +class ComplicatedSubArray(SubArray): + + def __str__(self): + return 'myprefix {0} mypostfix'.format(self.view(SubArray)) + + def __repr__(self): + # Return a repr that does not start with 'name(' + return '<{0} {1}>'.format(self.__class__.__name__, self) + + def _validate_input(self, value): + if not isinstance(value, ComplicatedSubArray): + raise ValueError("Can only set to MySubArray values") + return value + + def __setitem__(self, item, value): + # validation ensures direct assignment with ndarray or + # masked_print_option will fail + super(ComplicatedSubArray, self).__setitem__( + item, self._validate_input(value)) + + def __getitem__(self, item): + # ensure getter returns our own class also for scalars + value = super(ComplicatedSubArray, self).__getitem__(item) + if not isinstance(value, np.ndarray): # scalar + value = value.__array__().view(ComplicatedSubArray) + return value + + @property + def flat(self): + return CSAIterator(self) + + @flat.setter + def flat(self, value): + y = self.ravel() + y[:] = value + + def __array_wrap__(self, obj, context=None): + obj = super(ComplicatedSubArray, self).__array_wrap__(obj, context) + if context is not None and context[0] is np.multiply: + obj.info['multiplied'] = obj.info.get('multiplied', 0) + 1 + + return obj + + +class TestSubclassing(object): + # Test suite for masked subclasses of ndarray. + + def setup(self): + x = np.arange(5, dtype='float') + mx = msubarray(x, mask=[0, 1, 0, 0, 0]) + self.data = (x, mx) + + def test_data_subclassing(self): + # Tests whether the subclass is kept. + x = np.arange(5) + m = [0, 0, 1, 0, 0] + xsub = SubArray(x) + xmsub = masked_array(xsub, mask=m) + assert_(isinstance(xmsub, MaskedArray)) + assert_equal(xmsub._data, xsub) + assert_(isinstance(xmsub._data, SubArray)) + + def test_maskedarray_subclassing(self): + # Tests subclassing MaskedArray + (x, mx) = self.data + assert_(isinstance(mx._data, subarray)) + + def test_masked_unary_operations(self): + # Tests masked_unary_operation + (x, mx) = self.data + with np.errstate(divide='ignore'): + assert_(isinstance(log(mx), msubarray)) + assert_equal(log(x), np.log(x)) + + def test_masked_binary_operations(self): + # Tests masked_binary_operation + (x, mx) = self.data + # Result should be a msubarray + assert_(isinstance(add(mx, mx), msubarray)) + assert_(isinstance(add(mx, x), msubarray)) + # Result should work + assert_equal(add(mx, x), mx+x) + assert_(isinstance(add(mx, mx)._data, subarray)) + assert_(isinstance(add.outer(mx, mx), msubarray)) + assert_(isinstance(hypot(mx, mx), msubarray)) + assert_(isinstance(hypot(mx, x), msubarray)) + + def test_masked_binary_operations2(self): + # Tests domained_masked_binary_operation + (x, mx) = self.data + xmx = masked_array(mx.data.__array__(), mask=mx.mask) + assert_(isinstance(divide(mx, mx), msubarray)) + assert_(isinstance(divide(mx, x), msubarray)) + assert_equal(divide(mx, mx), divide(xmx, xmx)) + + def test_attributepropagation(self): + x = array(arange(5), mask=[0]+[1]*4) + my = masked_array(subarray(x)) + ym = msubarray(x) + # + z = (my+1) + assert_(isinstance(z, MaskedArray)) + assert_(not isinstance(z, MSubArray)) + assert_(isinstance(z._data, SubArray)) + assert_equal(z._data.info, {}) + # + z = (ym+1) + assert_(isinstance(z, MaskedArray)) + assert_(isinstance(z, MSubArray)) + assert_(isinstance(z._data, SubArray)) + assert_(z._data.info['added'] > 0) + # Test that inplace methods from data get used (gh-4617) + ym += 1 + assert_(isinstance(ym, MaskedArray)) + assert_(isinstance(ym, MSubArray)) + assert_(isinstance(ym._data, SubArray)) + assert_(ym._data.info['iadded'] > 0) + # + ym._set_mask([1, 0, 0, 0, 1]) + assert_equal(ym._mask, [1, 0, 0, 0, 1]) + ym._series._set_mask([0, 0, 0, 0, 1]) + assert_equal(ym._mask, [0, 0, 0, 0, 1]) + # + xsub = subarray(x, info={'name':'x'}) + mxsub = masked_array(xsub) + assert_(hasattr(mxsub, 'info')) + assert_equal(mxsub.info, xsub.info) + + def test_subclasspreservation(self): + # Checks that masked_array(...,subok=True) preserves the class. + x = np.arange(5) + m = [0, 0, 1, 0, 0] + xinfo = [(i, j) for (i, j) in zip(x, m)] + xsub = MSubArray(x, mask=m, info={'xsub':xinfo}) + # + mxsub = masked_array(xsub, subok=False) + assert_(not isinstance(mxsub, MSubArray)) + assert_(isinstance(mxsub, MaskedArray)) + assert_equal(mxsub._mask, m) + # + mxsub = asarray(xsub) + assert_(not isinstance(mxsub, MSubArray)) + assert_(isinstance(mxsub, MaskedArray)) + assert_equal(mxsub._mask, m) + # + mxsub = masked_array(xsub, subok=True) + assert_(isinstance(mxsub, MSubArray)) + assert_equal(mxsub.info, xsub.info) + assert_equal(mxsub._mask, xsub._mask) + # + mxsub = asanyarray(xsub) + assert_(isinstance(mxsub, MSubArray)) + assert_equal(mxsub.info, xsub.info) + assert_equal(mxsub._mask, m) + + def test_subclass_items(self): + """test that getter and setter go via baseclass""" + x = np.arange(5) + xcsub = ComplicatedSubArray(x) + mxcsub = masked_array(xcsub, mask=[True, False, True, False, False]) + # getter should return a ComplicatedSubArray, even for single item + # first check we wrote ComplicatedSubArray correctly + assert_(isinstance(xcsub[1], ComplicatedSubArray)) + assert_(isinstance(xcsub[1,...], ComplicatedSubArray)) + assert_(isinstance(xcsub[1:4], ComplicatedSubArray)) + + # now that it propagates inside the MaskedArray + assert_(isinstance(mxcsub[1], ComplicatedSubArray)) + assert_(isinstance(mxcsub[1,...].data, ComplicatedSubArray)) + assert_(mxcsub[0] is masked) + assert_(isinstance(mxcsub[0,...].data, ComplicatedSubArray)) + assert_(isinstance(mxcsub[1:4].data, ComplicatedSubArray)) + + # also for flattened version (which goes via MaskedIterator) + assert_(isinstance(mxcsub.flat[1].data, ComplicatedSubArray)) + assert_(mxcsub.flat[0] is masked) + assert_(isinstance(mxcsub.flat[1:4].base, ComplicatedSubArray)) + + # setter should only work with ComplicatedSubArray input + # first check we wrote ComplicatedSubArray correctly + assert_raises(ValueError, xcsub.__setitem__, 1, x[4]) + # now that it propagates inside the MaskedArray + assert_raises(ValueError, mxcsub.__setitem__, 1, x[4]) + assert_raises(ValueError, mxcsub.__setitem__, slice(1, 4), x[1:4]) + mxcsub[1] = xcsub[4] + mxcsub[1:4] = xcsub[1:4] + # also for flattened version (which goes via MaskedIterator) + assert_raises(ValueError, mxcsub.flat.__setitem__, 1, x[4]) + assert_raises(ValueError, mxcsub.flat.__setitem__, slice(1, 4), x[1:4]) + mxcsub.flat[1] = xcsub[4] + mxcsub.flat[1:4] = xcsub[1:4] + + def test_subclass_nomask_items(self): + x = np.arange(5) + xcsub = ComplicatedSubArray(x) + mxcsub_nomask = masked_array(xcsub) + + assert_(isinstance(mxcsub_nomask[1,...].data, ComplicatedSubArray)) + assert_(isinstance(mxcsub_nomask[0,...].data, ComplicatedSubArray)) + + assert_(isinstance(mxcsub_nomask[1], ComplicatedSubArray)) + assert_(isinstance(mxcsub_nomask[0], ComplicatedSubArray)) + + def test_subclass_repr(self): + """test that repr uses the name of the subclass + and 'array' for np.ndarray""" + x = np.arange(5) + mx = masked_array(x, mask=[True, False, True, False, False]) + assert_startswith(repr(mx), 'masked_array') + xsub = SubArray(x) + mxsub = masked_array(xsub, mask=[True, False, True, False, False]) + assert_startswith(repr(mxsub), + 'masked_{0}(data=[--, 1, --, 3, 4]'.format(SubArray.__name__)) + + def test_subclass_str(self): + """test str with subclass that has overridden str, setitem""" + # first without override + x = np.arange(5) + xsub = SubArray(x) + mxsub = masked_array(xsub, mask=[True, False, True, False, False]) + assert_equal(str(mxsub), '[-- 1 -- 3 4]') + + xcsub = ComplicatedSubArray(x) + assert_raises(ValueError, xcsub.__setitem__, 0, + np.ma.core.masked_print_option) + mxcsub = masked_array(xcsub, mask=[True, False, True, False, False]) + assert_equal(str(mxcsub), 'myprefix [-- 1 -- 3 4] mypostfix') + + def test_pure_subclass_info_preservation(self): + # Test that ufuncs and methods conserve extra information consistently; + # see gh-7122. + arr1 = SubMaskedArray('test', data=[1,2,3,4,5,6]) + arr2 = SubMaskedArray(data=[0,1,2,3,4,5]) + diff1 = np.subtract(arr1, arr2) + assert_('info' in diff1._optinfo) + assert_(diff1._optinfo['info'] == 'test') + diff2 = arr1 - arr2 + assert_('info' in diff2._optinfo) + assert_(diff2._optinfo['info'] == 'test') diff --git a/contrib/python/numpy/py2/numpy/matrixlib/setup.py b/contrib/python/numpy/py2/numpy/matrixlib/setup.py new file mode 100644 index 0000000000..d0981d6584 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/matrixlib/setup.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python +from __future__ import division, print_function + +def configuration(parent_package='', top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('matrixlib', parent_package, top_path) + config.add_data_dir('tests') + return config + +if __name__ == "__main__": + from numpy.distutils.core import setup + config = configuration(top_path='').todict() + setup(**config) diff --git a/contrib/python/numpy/py2/numpy/matrixlib/tests/__init__.py b/contrib/python/numpy/py2/numpy/matrixlib/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/matrixlib/tests/test_defmatrix.py b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_defmatrix.py new file mode 100644 index 0000000000..aa6e08d64d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_defmatrix.py @@ -0,0 +1,460 @@ +from __future__ import division, absolute_import, print_function + +try: + # Accessing collections abstract classes from collections + # has been deprecated since Python 3.3 + import collections.abc as collections_abc +except ImportError: + import collections as collections_abc + +import numpy as np +from numpy import matrix, asmatrix, bmat +from numpy.testing import ( + assert_, assert_equal, assert_almost_equal, assert_array_equal, + assert_array_almost_equal, assert_raises + ) +from numpy.linalg import matrix_power +from numpy.matrixlib import mat + +class TestCtor(object): + def test_basic(self): + A = np.array([[1, 2], [3, 4]]) + mA = matrix(A) + assert_(np.all(mA.A == A)) + + B = bmat("A,A;A,A") + C = bmat([[A, A], [A, A]]) + D = np.array([[1, 2, 1, 2], + [3, 4, 3, 4], + [1, 2, 1, 2], + [3, 4, 3, 4]]) + assert_(np.all(B.A == D)) + assert_(np.all(C.A == D)) + + E = np.array([[5, 6], [7, 8]]) + AEresult = matrix([[1, 2, 5, 6], [3, 4, 7, 8]]) + assert_(np.all(bmat([A, E]) == AEresult)) + + vec = np.arange(5) + mvec = matrix(vec) + assert_(mvec.shape == (1, 5)) + + def test_exceptions(self): + # Check for ValueError when called with invalid string data. + assert_raises(ValueError, matrix, "invalid") + + def test_bmat_nondefault_str(self): + A = np.array([[1, 2], [3, 4]]) + B = np.array([[5, 6], [7, 8]]) + Aresult = np.array([[1, 2, 1, 2], + [3, 4, 3, 4], + [1, 2, 1, 2], + [3, 4, 3, 4]]) + mixresult = np.array([[1, 2, 5, 6], + [3, 4, 7, 8], + [5, 6, 1, 2], + [7, 8, 3, 4]]) + assert_(np.all(bmat("A,A;A,A") == Aresult)) + assert_(np.all(bmat("A,A;A,A", ldict={'A':B}) == Aresult)) + assert_raises(TypeError, bmat, "A,A;A,A", gdict={'A':B}) + assert_( + np.all(bmat("A,A;A,A", ldict={'A':A}, gdict={'A':B}) == Aresult)) + b2 = bmat("A,B;C,D", ldict={'A':A,'B':B}, gdict={'C':B,'D':A}) + assert_(np.all(b2 == mixresult)) + + +class TestProperties(object): + def test_sum(self): + """Test whether matrix.sum(axis=1) preserves orientation. + Fails in NumPy <= 0.9.6.2127. + """ + M = matrix([[1, 2, 0, 0], + [3, 4, 0, 0], + [1, 2, 1, 2], + [3, 4, 3, 4]]) + sum0 = matrix([8, 12, 4, 6]) + sum1 = matrix([3, 7, 6, 14]).T + sumall = 30 + assert_array_equal(sum0, M.sum(axis=0)) + assert_array_equal(sum1, M.sum(axis=1)) + assert_equal(sumall, M.sum()) + + assert_array_equal(sum0, np.sum(M, axis=0)) + assert_array_equal(sum1, np.sum(M, axis=1)) + assert_equal(sumall, np.sum(M)) + + def test_prod(self): + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(x.prod(), 720) + assert_equal(x.prod(0), matrix([[4, 10, 18]])) + assert_equal(x.prod(1), matrix([[6], [120]])) + + assert_equal(np.prod(x), 720) + assert_equal(np.prod(x, axis=0), matrix([[4, 10, 18]])) + assert_equal(np.prod(x, axis=1), matrix([[6], [120]])) + + y = matrix([0, 1, 3]) + assert_(y.prod() == 0) + + def test_max(self): + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(x.max(), 6) + assert_equal(x.max(0), matrix([[4, 5, 6]])) + assert_equal(x.max(1), matrix([[3], [6]])) + + assert_equal(np.max(x), 6) + assert_equal(np.max(x, axis=0), matrix([[4, 5, 6]])) + assert_equal(np.max(x, axis=1), matrix([[3], [6]])) + + def test_min(self): + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(x.min(), 1) + assert_equal(x.min(0), matrix([[1, 2, 3]])) + assert_equal(x.min(1), matrix([[1], [4]])) + + assert_equal(np.min(x), 1) + assert_equal(np.min(x, axis=0), matrix([[1, 2, 3]])) + assert_equal(np.min(x, axis=1), matrix([[1], [4]])) + + def test_ptp(self): + x = np.arange(4).reshape((2, 2)) + assert_(x.ptp() == 3) + assert_(np.all(x.ptp(0) == np.array([2, 2]))) + assert_(np.all(x.ptp(1) == np.array([1, 1]))) + + def test_var(self): + x = np.arange(9).reshape((3, 3)) + mx = x.view(np.matrix) + assert_equal(x.var(ddof=0), mx.var(ddof=0)) + assert_equal(x.var(ddof=1), mx.var(ddof=1)) + + def test_basic(self): + import numpy.linalg as linalg + + A = np.array([[1., 2.], + [3., 4.]]) + mA = matrix(A) + assert_(np.allclose(linalg.inv(A), mA.I)) + assert_(np.all(np.array(np.transpose(A) == mA.T))) + assert_(np.all(np.array(np.transpose(A) == mA.H))) + assert_(np.all(A == mA.A)) + + B = A + 2j*A + mB = matrix(B) + assert_(np.allclose(linalg.inv(B), mB.I)) + assert_(np.all(np.array(np.transpose(B) == mB.T))) + assert_(np.all(np.array(np.transpose(B).conj() == mB.H))) + + def test_pinv(self): + x = matrix(np.arange(6).reshape(2, 3)) + xpinv = matrix([[-0.77777778, 0.27777778], + [-0.11111111, 0.11111111], + [ 0.55555556, -0.05555556]]) + assert_almost_equal(x.I, xpinv) + + def test_comparisons(self): + A = np.arange(100).reshape(10, 10) + mA = matrix(A) + mB = matrix(A) + 0.1 + assert_(np.all(mB == A+0.1)) + assert_(np.all(mB == matrix(A+0.1))) + assert_(not np.any(mB == matrix(A-0.1))) + assert_(np.all(mA < mB)) + assert_(np.all(mA <= mB)) + assert_(np.all(mA <= mA)) + assert_(not np.any(mA < mA)) + + assert_(not np.any(mB < mA)) + assert_(np.all(mB >= mA)) + assert_(np.all(mB >= mB)) + assert_(not np.any(mB > mB)) + + assert_(np.all(mA == mA)) + assert_(not np.any(mA == mB)) + assert_(np.all(mB != mA)) + + assert_(not np.all(abs(mA) > 0)) + assert_(np.all(abs(mB > 0))) + + def test_asmatrix(self): + A = np.arange(100).reshape(10, 10) + mA = asmatrix(A) + A[0, 0] = -10 + assert_(A[0, 0] == mA[0, 0]) + + def test_noaxis(self): + A = matrix([[1, 0], [0, 1]]) + assert_(A.sum() == matrix(2)) + assert_(A.mean() == matrix(0.5)) + + def test_repr(self): + A = matrix([[1, 0], [0, 1]]) + assert_(repr(A) == "matrix([[1, 0],\n [0, 1]])") + + def test_make_bool_matrix_from_str(self): + A = matrix('True; True; False') + B = matrix([[True], [True], [False]]) + assert_array_equal(A, B) + +class TestCasting(object): + def test_basic(self): + A = np.arange(100).reshape(10, 10) + mA = matrix(A) + + mB = mA.copy() + O = np.ones((10, 10), np.float64) * 0.1 + mB = mB + O + assert_(mB.dtype.type == np.float64) + assert_(np.all(mA != mB)) + assert_(np.all(mB == mA+0.1)) + + mC = mA.copy() + O = np.ones((10, 10), np.complex128) + mC = mC * O + assert_(mC.dtype.type == np.complex128) + assert_(np.all(mA != mB)) + + +class TestAlgebra(object): + def test_basic(self): + import numpy.linalg as linalg + + A = np.array([[1., 2.], [3., 4.]]) + mA = matrix(A) + + B = np.identity(2) + for i in range(6): + assert_(np.allclose((mA ** i).A, B)) + B = np.dot(B, A) + + Ainv = linalg.inv(A) + B = np.identity(2) + for i in range(6): + assert_(np.allclose((mA ** -i).A, B)) + B = np.dot(B, Ainv) + + assert_(np.allclose((mA * mA).A, np.dot(A, A))) + assert_(np.allclose((mA + mA).A, (A + A))) + assert_(np.allclose((3*mA).A, (3*A))) + + mA2 = matrix(A) + mA2 *= 3 + assert_(np.allclose(mA2.A, 3*A)) + + def test_pow(self): + """Test raising a matrix to an integer power works as expected.""" + m = matrix("1. 2.; 3. 4.") + m2 = m.copy() + m2 **= 2 + mi = m.copy() + mi **= -1 + m4 = m2.copy() + m4 **= 2 + assert_array_almost_equal(m2, m**2) + assert_array_almost_equal(m4, np.dot(m2, m2)) + assert_array_almost_equal(np.dot(mi, m), np.eye(2)) + + def test_scalar_type_pow(self): + m = matrix([[1, 2], [3, 4]]) + for scalar_t in [np.int8, np.uint8]: + two = scalar_t(2) + assert_array_almost_equal(m ** 2, m ** two) + + def test_notimplemented(self): + '''Check that 'not implemented' operations produce a failure.''' + A = matrix([[1., 2.], + [3., 4.]]) + + # __rpow__ + with assert_raises(TypeError): + 1.0**A + + # __mul__ with something not a list, ndarray, tuple, or scalar + with assert_raises(TypeError): + A*object() + + +class TestMatrixReturn(object): + def test_instance_methods(self): + a = matrix([1.0], dtype='f8') + methodargs = { + 'astype': ('intc',), + 'clip': (0.0, 1.0), + 'compress': ([1],), + 'repeat': (1,), + 'reshape': (1,), + 'swapaxes': (0, 0), + 'dot': np.array([1.0]), + } + excluded_methods = [ + 'argmin', 'choose', 'dump', 'dumps', 'fill', 'getfield', + 'getA', 'getA1', 'item', 'nonzero', 'put', 'putmask', 'resize', + 'searchsorted', 'setflags', 'setfield', 'sort', + 'partition', 'argpartition', + 'take', 'tofile', 'tolist', 'tostring', 'tobytes', 'all', 'any', + 'sum', 'argmax', 'argmin', 'min', 'max', 'mean', 'var', 'ptp', + 'prod', 'std', 'ctypes', 'itemset', + ] + for attrib in dir(a): + if attrib.startswith('_') or attrib in excluded_methods: + continue + f = getattr(a, attrib) + if isinstance(f, collections_abc.Callable): + # reset contents of a + a.astype('f8') + a.fill(1.0) + if attrib in methodargs: + args = methodargs[attrib] + else: + args = () + b = f(*args) + assert_(type(b) is matrix, "%s" % attrib) + assert_(type(a.real) is matrix) + assert_(type(a.imag) is matrix) + c, d = matrix([0.0]).nonzero() + assert_(type(c) is np.ndarray) + assert_(type(d) is np.ndarray) + + +class TestIndexing(object): + def test_basic(self): + x = asmatrix(np.zeros((3, 2), float)) + y = np.zeros((3, 1), float) + y[:, 0] = [0.8, 0.2, 0.3] + x[:, 1] = y > 0.5 + assert_equal(x, [[0, 1], [0, 0], [0, 0]]) + + +class TestNewScalarIndexing(object): + a = matrix([[1, 2], [3, 4]]) + + def test_dimesions(self): + a = self.a + x = a[0] + assert_equal(x.ndim, 2) + + def test_array_from_matrix_list(self): + a = self.a + x = np.array([a, a]) + assert_equal(x.shape, [2, 2, 2]) + + def test_array_to_list(self): + a = self.a + assert_equal(a.tolist(), [[1, 2], [3, 4]]) + + def test_fancy_indexing(self): + a = self.a + x = a[1, [0, 1, 0]] + assert_(isinstance(x, matrix)) + assert_equal(x, matrix([[3, 4, 3]])) + x = a[[1, 0]] + assert_(isinstance(x, matrix)) + assert_equal(x, matrix([[3, 4], [1, 2]])) + x = a[[[1], [0]], [[1, 0], [0, 1]]] + assert_(isinstance(x, matrix)) + assert_equal(x, matrix([[4, 3], [1, 2]])) + + def test_matrix_element(self): + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(x[0][0], matrix([[1, 2, 3]])) + assert_equal(x[0][0].shape, (1, 3)) + assert_equal(x[0].shape, (1, 3)) + assert_equal(x[:, 0].shape, (2, 1)) + + x = matrix(0) + assert_equal(x[0, 0], 0) + assert_equal(x[0], 0) + assert_equal(x[:, 0].shape, x.shape) + + def test_scalar_indexing(self): + x = asmatrix(np.zeros((3, 2), float)) + assert_equal(x[0, 0], x[0][0]) + + def test_row_column_indexing(self): + x = asmatrix(np.eye(2)) + assert_array_equal(x[0,:], [[1, 0]]) + assert_array_equal(x[1,:], [[0, 1]]) + assert_array_equal(x[:, 0], [[1], [0]]) + assert_array_equal(x[:, 1], [[0], [1]]) + + def test_boolean_indexing(self): + A = np.arange(6) + A.shape = (3, 2) + x = asmatrix(A) + assert_array_equal(x[:, np.array([True, False])], x[:, 0]) + assert_array_equal(x[np.array([True, False, False]),:], x[0,:]) + + def test_list_indexing(self): + A = np.arange(6) + A.shape = (3, 2) + x = asmatrix(A) + assert_array_equal(x[:, [1, 0]], x[:, ::-1]) + assert_array_equal(x[[2, 1, 0],:], x[::-1,:]) + + +class TestPower(object): + def test_returntype(self): + a = np.array([[0, 1], [0, 0]]) + assert_(type(matrix_power(a, 2)) is np.ndarray) + a = mat(a) + assert_(type(matrix_power(a, 2)) is matrix) + + def test_list(self): + assert_array_equal(matrix_power([[0, 1], [0, 0]], 2), [[0, 0], [0, 0]]) + + +class TestShape(object): + + a = np.array([[1], [2]]) + m = matrix([[1], [2]]) + + def test_shape(self): + assert_equal(self.a.shape, (2, 1)) + assert_equal(self.m.shape, (2, 1)) + + def test_numpy_ravel(self): + assert_equal(np.ravel(self.a).shape, (2,)) + assert_equal(np.ravel(self.m).shape, (2,)) + + def test_member_ravel(self): + assert_equal(self.a.ravel().shape, (2,)) + assert_equal(self.m.ravel().shape, (1, 2)) + + def test_member_flatten(self): + assert_equal(self.a.flatten().shape, (2,)) + assert_equal(self.m.flatten().shape, (1, 2)) + + def test_numpy_ravel_order(self): + x = np.array([[1, 2, 3], [4, 5, 6]]) + assert_equal(np.ravel(x), [1, 2, 3, 4, 5, 6]) + assert_equal(np.ravel(x, order='F'), [1, 4, 2, 5, 3, 6]) + assert_equal(np.ravel(x.T), [1, 4, 2, 5, 3, 6]) + assert_equal(np.ravel(x.T, order='A'), [1, 2, 3, 4, 5, 6]) + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(np.ravel(x), [1, 2, 3, 4, 5, 6]) + assert_equal(np.ravel(x, order='F'), [1, 4, 2, 5, 3, 6]) + assert_equal(np.ravel(x.T), [1, 4, 2, 5, 3, 6]) + assert_equal(np.ravel(x.T, order='A'), [1, 2, 3, 4, 5, 6]) + + def test_matrix_ravel_order(self): + x = matrix([[1, 2, 3], [4, 5, 6]]) + assert_equal(x.ravel(), [[1, 2, 3, 4, 5, 6]]) + assert_equal(x.ravel(order='F'), [[1, 4, 2, 5, 3, 6]]) + assert_equal(x.T.ravel(), [[1, 4, 2, 5, 3, 6]]) + assert_equal(x.T.ravel(order='A'), [[1, 2, 3, 4, 5, 6]]) + + def test_array_memory_sharing(self): + assert_(np.may_share_memory(self.a, self.a.ravel())) + assert_(not np.may_share_memory(self.a, self.a.flatten())) + + def test_matrix_memory_sharing(self): + assert_(np.may_share_memory(self.m, self.m.ravel())) + assert_(not np.may_share_memory(self.m, self.m.flatten())) + + def test_expand_dims_matrix(self): + # matrices are always 2d - so expand_dims only makes sense when the + # type is changed away from matrix. + a = np.arange(10).reshape((2, 5)).view(np.matrix) + expanded = np.expand_dims(a, axis=1) + assert_equal(expanded.ndim, 3) + assert_(not isinstance(expanded, np.matrix)) diff --git a/contrib/python/numpy/py2/numpy/matrixlib/tests/test_interaction.py b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_interaction.py new file mode 100644 index 0000000000..088ae3c6a6 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_interaction.py @@ -0,0 +1,363 @@ +"""Tests of interaction of matrix with other parts of numpy. + +Note that tests with MaskedArray and linalg are done in separate files. +""" +from __future__ import division, absolute_import, print_function + +import pytest + +import textwrap +import warnings + +import numpy as np +from numpy.testing import (assert_, assert_equal, assert_raises, + assert_raises_regex, assert_array_equal, + assert_almost_equal, assert_array_almost_equal) + + +def test_fancy_indexing(): + # The matrix class messes with the shape. While this is always + # weird (getitem is not used, it does not have setitem nor knows + # about fancy indexing), this tests gh-3110 + # 2018-04-29: moved here from core.tests.test_index. + m = np.matrix([[1, 2], [3, 4]]) + + assert_(isinstance(m[[0, 1, 0], :], np.matrix)) + + # gh-3110. Note the transpose currently because matrices do *not* + # support dimension fixing for fancy indexing correctly. + x = np.asmatrix(np.arange(50).reshape(5, 10)) + assert_equal(x[:2, np.array(-1)], x[:2, -1].T) + + +def test_polynomial_mapdomain(): + # test that polynomial preserved matrix subtype. + # 2018-04-29: moved here from polynomial.tests.polyutils. + dom1 = [0, 4] + dom2 = [1, 3] + x = np.matrix([dom1, dom1]) + res = np.polynomial.polyutils.mapdomain(x, dom1, dom2) + assert_(isinstance(res, np.matrix)) + + +def test_sort_matrix_none(): + # 2018-04-29: moved here from core.tests.test_multiarray + a = np.matrix([[2, 1, 0]]) + actual = np.sort(a, axis=None) + expected = np.matrix([[0, 1, 2]]) + assert_equal(actual, expected) + assert_(type(expected) is np.matrix) + + +def test_partition_matrix_none(): + # gh-4301 + # 2018-04-29: moved here from core.tests.test_multiarray + a = np.matrix([[2, 1, 0]]) + actual = np.partition(a, 1, axis=None) + expected = np.matrix([[0, 1, 2]]) + assert_equal(actual, expected) + assert_(type(expected) is np.matrix) + + +def test_dot_scalar_and_matrix_of_objects(): + # Ticket #2469 + # 2018-04-29: moved here from core.tests.test_multiarray + arr = np.matrix([1, 2], dtype=object) + desired = np.matrix([[3, 6]], dtype=object) + assert_equal(np.dot(arr, 3), desired) + assert_equal(np.dot(3, arr), desired) + + +def test_inner_scalar_and_matrix(): + # 2018-04-29: moved here from core.tests.test_multiarray + for dt in np.typecodes['AllInteger'] + np.typecodes['AllFloat'] + '?': + sca = np.array(3, dtype=dt)[()] + arr = np.matrix([[1, 2], [3, 4]], dtype=dt) + desired = np.matrix([[3, 6], [9, 12]], dtype=dt) + assert_equal(np.inner(arr, sca), desired) + assert_equal(np.inner(sca, arr), desired) + + +def test_inner_scalar_and_matrix_of_objects(): + # Ticket #4482 + # 2018-04-29: moved here from core.tests.test_multiarray + arr = np.matrix([1, 2], dtype=object) + desired = np.matrix([[3, 6]], dtype=object) + assert_equal(np.inner(arr, 3), desired) + assert_equal(np.inner(3, arr), desired) + + +def test_iter_allocate_output_subtype(): + # Make sure that the subtype with priority wins + # 2018-04-29: moved here from core.tests.test_nditer, given the + # matrix specific shape test. + + # matrix vs ndarray + a = np.matrix([[1, 2], [3, 4]]) + b = np.arange(4).reshape(2, 2).T + i = np.nditer([a, b, None], [], + [['readonly'], ['readonly'], ['writeonly', 'allocate']]) + assert_(type(i.operands[2]) is np.matrix) + assert_(type(i.operands[2]) is not np.ndarray) + assert_equal(i.operands[2].shape, (2, 2)) + + # matrix always wants things to be 2D + b = np.arange(4).reshape(1, 2, 2) + assert_raises(RuntimeError, np.nditer, [a, b, None], [], + [['readonly'], ['readonly'], ['writeonly', 'allocate']]) + # but if subtypes are disabled, the result can still work + i = np.nditer([a, b, None], [], + [['readonly'], ['readonly'], + ['writeonly', 'allocate', 'no_subtype']]) + assert_(type(i.operands[2]) is np.ndarray) + assert_(type(i.operands[2]) is not np.matrix) + assert_equal(i.operands[2].shape, (1, 2, 2)) + + +def like_function(): + # 2018-04-29: moved here from core.tests.test_numeric + a = np.matrix([[1, 2], [3, 4]]) + for like_function in np.zeros_like, np.ones_like, np.empty_like: + b = like_function(a) + assert_(type(b) is np.matrix) + + c = like_function(a, subok=False) + assert_(type(c) is not np.matrix) + + +def test_array_astype(): + # 2018-04-29: copied here from core.tests.test_api + # subok=True passes through a matrix + a = np.matrix([[0, 1, 2], [3, 4, 5]], dtype='f4') + b = a.astype('f4', subok=True, copy=False) + assert_(a is b) + + # subok=True is default, and creates a subtype on a cast + b = a.astype('i4', copy=False) + assert_equal(a, b) + assert_equal(type(b), np.matrix) + + # subok=False never returns a matrix + b = a.astype('f4', subok=False, copy=False) + assert_equal(a, b) + assert_(not (a is b)) + assert_(type(b) is not np.matrix) + + +def test_stack(): + # 2018-04-29: copied here from core.tests.test_shape_base + # check np.matrix cannot be stacked + m = np.matrix([[1, 2], [3, 4]]) + assert_raises_regex(ValueError, 'shape too large to be a matrix', + np.stack, [m, m]) + + +def test_object_scalar_multiply(): + # Tickets #2469 and #4482 + # 2018-04-29: moved here from core.tests.test_ufunc + arr = np.matrix([1, 2], dtype=object) + desired = np.matrix([[3, 6]], dtype=object) + assert_equal(np.multiply(arr, 3), desired) + assert_equal(np.multiply(3, arr), desired) + + +def test_nanfunctions_matrices(): + # Check that it works and that type and + # shape are preserved + # 2018-04-29: moved here from core.tests.test_nanfunctions + mat = np.matrix(np.eye(3)) + for f in [np.nanmin, np.nanmax]: + res = f(mat, axis=0) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (1, 3)) + res = f(mat, axis=1) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (3, 1)) + res = f(mat) + assert_(np.isscalar(res)) + # check that rows of nan are dealt with for subclasses (#4628) + mat[1] = np.nan + for f in [np.nanmin, np.nanmax]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mat, axis=0) + assert_(isinstance(res, np.matrix)) + assert_(not np.any(np.isnan(res))) + assert_(len(w) == 0) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mat, axis=1) + assert_(isinstance(res, np.matrix)) + assert_(np.isnan(res[1, 0]) and not np.isnan(res[0, 0]) + and not np.isnan(res[2, 0])) + assert_(len(w) == 1, 'no warning raised') + assert_(issubclass(w[0].category, RuntimeWarning)) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + res = f(mat) + assert_(np.isscalar(res)) + assert_(res != np.nan) + assert_(len(w) == 0) + + +def test_nanfunctions_matrices_general(): + # Check that it works and that type and + # shape are preserved + # 2018-04-29: moved here from core.tests.test_nanfunctions + mat = np.matrix(np.eye(3)) + for f in (np.nanargmin, np.nanargmax, np.nansum, np.nanprod, + np.nanmean, np.nanvar, np.nanstd): + res = f(mat, axis=0) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (1, 3)) + res = f(mat, axis=1) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (3, 1)) + res = f(mat) + assert_(np.isscalar(res)) + + for f in np.nancumsum, np.nancumprod: + res = f(mat, axis=0) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (3, 3)) + res = f(mat, axis=1) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (3, 3)) + res = f(mat) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (1, 3*3)) + + +def test_average_matrix(): + # 2018-04-29: moved here from core.tests.test_function_base. + y = np.matrix(np.random.rand(5, 5)) + assert_array_equal(y.mean(0), np.average(y, 0)) + + a = np.matrix([[1, 2], [3, 4]]) + w = np.matrix([[1, 2], [3, 4]]) + + r = np.average(a, axis=0, weights=w) + assert_equal(type(r), np.matrix) + assert_equal(r, [[2.5, 10.0/3]]) + + +def test_trapz_matrix(): + # Test to make sure matrices give the same answer as ndarrays + # 2018-04-29: moved here from core.tests.test_function_base. + x = np.linspace(0, 5) + y = x * x + r = np.trapz(y, x) + mx = np.matrix(x) + my = np.matrix(y) + mr = np.trapz(my, mx) + assert_almost_equal(mr, r) + + +def test_ediff1d_matrix(): + # 2018-04-29: moved here from core.tests.test_arraysetops. + assert(isinstance(np.ediff1d(np.matrix(1)), np.matrix)) + assert(isinstance(np.ediff1d(np.matrix(1), to_begin=1), np.matrix)) + + +def test_apply_along_axis_matrix(): + # this test is particularly malicious because matrix + # refuses to become 1d + # 2018-04-29: moved here from core.tests.test_shape_base. + def double(row): + return row * 2 + + m = np.matrix([[0, 1], [2, 3]]) + expected = np.matrix([[0, 2], [4, 6]]) + + result = np.apply_along_axis(double, 0, m) + assert_(isinstance(result, np.matrix)) + assert_array_equal(result, expected) + + result = np.apply_along_axis(double, 1, m) + assert_(isinstance(result, np.matrix)) + assert_array_equal(result, expected) + + +def test_kron_matrix(): + # 2018-04-29: moved here from core.tests.test_shape_base. + a = np.ones([2, 2]) + m = np.asmatrix(a) + assert_equal(type(np.kron(a, a)), np.ndarray) + assert_equal(type(np.kron(m, m)), np.matrix) + assert_equal(type(np.kron(a, m)), np.matrix) + assert_equal(type(np.kron(m, a)), np.matrix) + + +class TestConcatenatorMatrix(object): + # 2018-04-29: moved here from core.tests.test_index_tricks. + def test_matrix(self): + a = [1, 2] + b = [3, 4] + + ab_r = np.r_['r', a, b] + ab_c = np.r_['c', a, b] + + assert_equal(type(ab_r), np.matrix) + assert_equal(type(ab_c), np.matrix) + + assert_equal(np.array(ab_r), [[1, 2, 3, 4]]) + assert_equal(np.array(ab_c), [[1], [2], [3], [4]]) + + assert_raises(ValueError, lambda: np.r_['rc', a, b]) + + def test_matrix_scalar(self): + r = np.r_['r', [1, 2], 3] + assert_equal(type(r), np.matrix) + assert_equal(np.array(r), [[1, 2, 3]]) + + def test_matrix_builder(self): + a = np.array([1]) + b = np.array([2]) + c = np.array([3]) + d = np.array([4]) + actual = np.r_['a, b; c, d'] + expected = np.bmat([[a, b], [c, d]]) + + assert_equal(actual, expected) + assert_equal(type(actual), type(expected)) + + +def test_array_equal_error_message_matrix(): + # 2018-04-29: moved here from testing.tests.test_utils. + try: + assert_equal(np.array([1, 2]), np.matrix([1, 2])) + except AssertionError as e: + msg = str(e) + msg2 = msg.replace("shapes (2L,), (1L, 2L)", "shapes (2,), (1, 2)") + msg_reference = textwrap.dedent("""\ + + Arrays are not equal + + (shapes (2,), (1, 2) mismatch) + x: array([1, 2]) + y: matrix([[1, 2]])""") + try: + assert_equal(msg, msg_reference) + except AssertionError: + assert_equal(msg2, msg_reference) + else: + raise AssertionError("Did not raise") + + +def test_array_almost_equal_matrix(): + # Matrix slicing keeps things 2-D, while array does not necessarily. + # See gh-8452. + # 2018-04-29: moved here from testing.tests.test_utils. + m1 = np.matrix([[1., 2.]]) + m2 = np.matrix([[1., np.nan]]) + m3 = np.matrix([[1., -np.inf]]) + m4 = np.matrix([[np.nan, np.inf]]) + m5 = np.matrix([[1., 2.], [np.nan, np.inf]]) + for assert_func in assert_array_almost_equal, assert_almost_equal: + for m in m1, m2, m3, m4, m5: + assert_func(m, m) + a = np.array(m) + assert_func(a, m) + assert_func(m, a) diff --git a/contrib/python/numpy/py2/numpy/matrixlib/tests/test_masked_matrix.py b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_masked_matrix.py new file mode 100644 index 0000000000..52fd185773 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_masked_matrix.py @@ -0,0 +1,231 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.ma.testutils import (assert_, assert_equal, assert_raises, + assert_array_equal) +from numpy.ma.core import (masked_array, masked_values, masked, allequal, + MaskType, getmask, MaskedArray, nomask, + log, add, hypot, divide) +from numpy.ma.extras import mr_ +from numpy.core.numeric import pickle + + +class MMatrix(MaskedArray, np.matrix,): + + def __new__(cls, data, mask=nomask): + mat = np.matrix(data) + _data = MaskedArray.__new__(cls, data=mat, mask=mask) + return _data + + def __array_finalize__(self, obj): + np.matrix.__array_finalize__(self, obj) + MaskedArray.__array_finalize__(self, obj) + return + + def _get_series(self): + _view = self.view(MaskedArray) + _view._sharedmask = False + return _view + _series = property(fget=_get_series) + + +class TestMaskedMatrix(object): + def test_matrix_indexing(self): + # Tests conversions and indexing + x1 = np.matrix([[1, 2, 3], [4, 3, 2]]) + x2 = masked_array(x1, mask=[[1, 0, 0], [0, 1, 0]]) + x3 = masked_array(x1, mask=[[0, 1, 0], [1, 0, 0]]) + x4 = masked_array(x1) + # test conversion to strings + str(x2) # raises? + repr(x2) # raises? + # tests of indexing + assert_(type(x2[1, 0]) is type(x1[1, 0])) + assert_(x1[1, 0] == x2[1, 0]) + assert_(x2[1, 1] is masked) + assert_equal(x1[0, 2], x2[0, 2]) + assert_equal(x1[0, 1:], x2[0, 1:]) + assert_equal(x1[:, 2], x2[:, 2]) + assert_equal(x1[:], x2[:]) + assert_equal(x1[1:], x3[1:]) + x1[0, 2] = 9 + x2[0, 2] = 9 + assert_equal(x1, x2) + x1[0, 1:] = 99 + x2[0, 1:] = 99 + assert_equal(x1, x2) + x2[0, 1] = masked + assert_equal(x1, x2) + x2[0, 1:] = masked + assert_equal(x1, x2) + x2[0, :] = x1[0, :] + x2[0, 1] = masked + assert_(allequal(getmask(x2), np.array([[0, 1, 0], [0, 1, 0]]))) + x3[1, :] = masked_array([1, 2, 3], [1, 1, 0]) + assert_(allequal(getmask(x3)[1], masked_array([1, 1, 0]))) + assert_(allequal(getmask(x3[1]), masked_array([1, 1, 0]))) + x4[1, :] = masked_array([1, 2, 3], [1, 1, 0]) + assert_(allequal(getmask(x4[1]), masked_array([1, 1, 0]))) + assert_(allequal(x4[1], masked_array([1, 2, 3]))) + x1 = np.matrix(np.arange(5) * 1.0) + x2 = masked_values(x1, 3.0) + assert_equal(x1, x2) + assert_(allequal(masked_array([0, 0, 0, 1, 0], dtype=MaskType), + x2.mask)) + assert_equal(3.0, x2.fill_value) + + def test_pickling_subbaseclass(self): + # Test pickling w/ a subclass of ndarray + a = masked_array(np.matrix(list(range(10))), mask=[1, 0, 1, 0, 0] * 2) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + a_pickled = pickle.loads(pickle.dumps(a, protocol=proto)) + assert_equal(a_pickled._mask, a._mask) + assert_equal(a_pickled, a) + assert_(isinstance(a_pickled._data, np.matrix)) + + def test_count_mean_with_matrix(self): + m = masked_array(np.matrix([[1, 2], [3, 4]]), mask=np.zeros((2, 2))) + + assert_equal(m.count(axis=0).shape, (1, 2)) + assert_equal(m.count(axis=1).shape, (2, 1)) + + # Make sure broadcasting inside mean and var work + assert_equal(m.mean(axis=0), [[2., 3.]]) + assert_equal(m.mean(axis=1), [[1.5], [3.5]]) + + def test_flat(self): + # Test that flat can return items even for matrices [#4585, #4615] + # test simple access + test = masked_array(np.matrix([[1, 2, 3]]), mask=[0, 0, 1]) + assert_equal(test.flat[1], 2) + assert_equal(test.flat[2], masked) + assert_(np.all(test.flat[0:2] == test[0, 0:2])) + # Test flat on masked_matrices + test = masked_array(np.matrix([[1, 2, 3]]), mask=[0, 0, 1]) + test.flat = masked_array([3, 2, 1], mask=[1, 0, 0]) + control = masked_array(np.matrix([[3, 2, 1]]), mask=[1, 0, 0]) + assert_equal(test, control) + # Test setting + test = masked_array(np.matrix([[1, 2, 3]]), mask=[0, 0, 1]) + testflat = test.flat + testflat[:] = testflat[[2, 1, 0]] + assert_equal(test, control) + testflat[0] = 9 + # test that matrices keep the correct shape (#4615) + a = masked_array(np.matrix(np.eye(2)), mask=0) + b = a.flat + b01 = b[:2] + assert_equal(b01.data, np.array([[1., 0.]])) + assert_equal(b01.mask, np.array([[False, False]])) + + def test_allany_onmatrices(self): + x = np.array([[0.13, 0.26, 0.90], + [0.28, 0.33, 0.63], + [0.31, 0.87, 0.70]]) + X = np.matrix(x) + m = np.array([[True, False, False], + [False, False, False], + [True, True, False]], dtype=np.bool_) + mX = masked_array(X, mask=m) + mXbig = (mX > 0.5) + mXsmall = (mX < 0.5) + + assert_(not mXbig.all()) + assert_(mXbig.any()) + assert_equal(mXbig.all(0), np.matrix([False, False, True])) + assert_equal(mXbig.all(1), np.matrix([False, False, True]).T) + assert_equal(mXbig.any(0), np.matrix([False, False, True])) + assert_equal(mXbig.any(1), np.matrix([True, True, True]).T) + + assert_(not mXsmall.all()) + assert_(mXsmall.any()) + assert_equal(mXsmall.all(0), np.matrix([True, True, False])) + assert_equal(mXsmall.all(1), np.matrix([False, False, False]).T) + assert_equal(mXsmall.any(0), np.matrix([True, True, False])) + assert_equal(mXsmall.any(1), np.matrix([True, True, False]).T) + + def test_compressed(self): + a = masked_array(np.matrix([1, 2, 3, 4]), mask=[0, 0, 0, 0]) + b = a.compressed() + assert_equal(b, a) + assert_(isinstance(b, np.matrix)) + a[0, 0] = masked + b = a.compressed() + assert_equal(b, [[2, 3, 4]]) + + def test_ravel(self): + a = masked_array(np.matrix([1, 2, 3, 4, 5]), mask=[[0, 1, 0, 0, 0]]) + aravel = a.ravel() + assert_equal(aravel.shape, (1, 5)) + assert_equal(aravel._mask.shape, a.shape) + + def test_view(self): + # Test view w/ flexible dtype + iterator = list(zip(np.arange(10), np.random.rand(10))) + data = np.array(iterator) + a = masked_array(iterator, dtype=[('a', float), ('b', float)]) + a.mask[0] = (1, 0) + test = a.view((float, 2), np.matrix) + assert_equal(test, data) + assert_(isinstance(test, np.matrix)) + assert_(not isinstance(test, MaskedArray)) + + +class TestSubclassing(object): + # Test suite for masked subclasses of ndarray. + + def setup(self): + x = np.arange(5, dtype='float') + mx = MMatrix(x, mask=[0, 1, 0, 0, 0]) + self.data = (x, mx) + + def test_maskedarray_subclassing(self): + # Tests subclassing MaskedArray + (x, mx) = self.data + assert_(isinstance(mx._data, np.matrix)) + + def test_masked_unary_operations(self): + # Tests masked_unary_operation + (x, mx) = self.data + with np.errstate(divide='ignore'): + assert_(isinstance(log(mx), MMatrix)) + assert_equal(log(x), np.log(x)) + + def test_masked_binary_operations(self): + # Tests masked_binary_operation + (x, mx) = self.data + # Result should be a MMatrix + assert_(isinstance(add(mx, mx), MMatrix)) + assert_(isinstance(add(mx, x), MMatrix)) + # Result should work + assert_equal(add(mx, x), mx+x) + assert_(isinstance(add(mx, mx)._data, np.matrix)) + assert_(isinstance(add.outer(mx, mx), MMatrix)) + assert_(isinstance(hypot(mx, mx), MMatrix)) + assert_(isinstance(hypot(mx, x), MMatrix)) + + def test_masked_binary_operations2(self): + # Tests domained_masked_binary_operation + (x, mx) = self.data + xmx = masked_array(mx.data.__array__(), mask=mx.mask) + assert_(isinstance(divide(mx, mx), MMatrix)) + assert_(isinstance(divide(mx, x), MMatrix)) + assert_equal(divide(mx, mx), divide(xmx, xmx)) + +class TestConcatenator(object): + # Tests for mr_, the equivalent of r_ for masked arrays. + + def test_matrix_builder(self): + assert_raises(np.ma.MAError, lambda: mr_['1, 2; 3, 4']) + + def test_matrix(self): + # Test consistency with unmasked version. If we ever deprecate + # matrix, this test should either still pass, or both actual and + # expected should fail to be build. + actual = mr_['r', 1, 2, 3] + expected = np.ma.array(np.r_['r', 1, 2, 3]) + assert_array_equal(actual, expected) + + # outer type is masked array, inner type is matrix + assert_equal(type(actual), type(expected)) + assert_equal(type(actual.data), type(expected.data)) diff --git a/contrib/python/numpy/py2/numpy/matrixlib/tests/test_matrix_linalg.py b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_matrix_linalg.py new file mode 100644 index 0000000000..6fc733c2e9 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_matrix_linalg.py @@ -0,0 +1,95 @@ +""" Test functions for linalg module using the matrix class.""" +from __future__ import division, absolute_import, print_function + +import numpy as np + +from numpy.linalg.tests.test_linalg import ( + LinalgCase, apply_tag, TestQR as _TestQR, LinalgTestCase, + _TestNorm2D, _TestNormDoubleBase, _TestNormSingleBase, _TestNormInt64Base, + SolveCases, InvCases, EigvalsCases, EigCases, SVDCases, CondCases, + PinvCases, DetCases, LstsqCases) + + +CASES = [] + +# square test cases +CASES += apply_tag('square', [ + LinalgCase("0x0_matrix", + np.empty((0, 0), dtype=np.double).view(np.matrix), + np.empty((0, 1), dtype=np.double).view(np.matrix), + tags={'size-0'}), + LinalgCase("matrix_b_only", + np.array([[1., 2.], [3., 4.]]), + np.matrix([2., 1.]).T), + LinalgCase("matrix_a_and_b", + np.matrix([[1., 2.], [3., 4.]]), + np.matrix([2., 1.]).T), +]) + +# hermitian test-cases +CASES += apply_tag('hermitian', [ + LinalgCase("hmatrix_a_and_b", + np.matrix([[1., 2.], [2., 1.]]), + None), +]) +# No need to make generalized or strided cases for matrices. + + +class MatrixTestCase(LinalgTestCase): + TEST_CASES = CASES + + +class TestSolveMatrix(SolveCases, MatrixTestCase): + pass + + +class TestInvMatrix(InvCases, MatrixTestCase): + pass + + +class TestEigvalsMatrix(EigvalsCases, MatrixTestCase): + pass + + +class TestEigMatrix(EigCases, MatrixTestCase): + pass + + +class TestSVDMatrix(SVDCases, MatrixTestCase): + pass + + +class TestCondMatrix(CondCases, MatrixTestCase): + pass + + +class TestPinvMatrix(PinvCases, MatrixTestCase): + pass + + +class TestDetMatrix(DetCases, MatrixTestCase): + pass + + +class TestLstsqMatrix(LstsqCases, MatrixTestCase): + pass + + +class _TestNorm2DMatrix(_TestNorm2D): + array = np.matrix + + +class TestNormDoubleMatrix(_TestNorm2DMatrix, _TestNormDoubleBase): + pass + + +class TestNormSingleMatrix(_TestNorm2DMatrix, _TestNormSingleBase): + pass + + +class TestNormInt64Matrix(_TestNorm2DMatrix, _TestNormInt64Base): + pass + + +class TestQRMatrix(_TestQR): + array = np.matrix diff --git a/contrib/python/numpy/py2/numpy/matrixlib/tests/test_multiarray.py b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_multiarray.py new file mode 100644 index 0000000000..6d84bd4777 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_multiarray.py @@ -0,0 +1,18 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import assert_, assert_equal, assert_array_equal + +class TestView(object): + def test_type(self): + x = np.array([1, 2, 3]) + assert_(isinstance(x.view(np.matrix), np.matrix)) + + def test_keywords(self): + x = np.array([(1, 2)], dtype=[('a', np.int8), ('b', np.int8)]) + # We must be specific about the endianness here: + y = x.view(dtype='<i2', type=np.matrix) + assert_array_equal(y, [[513]]) + + assert_(isinstance(y, np.matrix)) + assert_equal(y.dtype, np.dtype('<i2')) diff --git a/contrib/python/numpy/py2/numpy/matrixlib/tests/test_numeric.py b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_numeric.py new file mode 100644 index 0000000000..95e1c80017 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_numeric.py @@ -0,0 +1,19 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import assert_equal + +class TestDot(object): + def test_matscalar(self): + b1 = np.matrix(np.ones((3, 3), dtype=complex)) + assert_equal(b1*1.0, b1) + + +def test_diagonal(): + b1 = np.matrix([[1,2],[3,4]]) + diag_b1 = np.matrix([[1, 4]]) + array_b1 = np.array([1, 4]) + + assert_equal(b1.diagonal(), diag_b1) + assert_equal(np.diagonal(b1), array_b1) + assert_equal(np.diag(b1), array_b1) diff --git a/contrib/python/numpy/py2/numpy/matrixlib/tests/test_regression.py b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_regression.py new file mode 100644 index 0000000000..70e1472793 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/matrixlib/tests/test_regression.py @@ -0,0 +1,33 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import assert_, assert_equal, assert_raises + + +class TestRegression(object): + def test_kron_matrix(self): + # Ticket #71 + x = np.matrix('[1 0; 1 0]') + assert_equal(type(np.kron(x, x)), type(x)) + + def test_matrix_properties(self): + # Ticket #125 + a = np.matrix([1.0], dtype=float) + assert_(type(a.real) is np.matrix) + assert_(type(a.imag) is np.matrix) + c, d = np.matrix([0.0]).nonzero() + assert_(type(c) is np.ndarray) + assert_(type(d) is np.ndarray) + + def test_matrix_multiply_by_1d_vector(self): + # Ticket #473 + def mul(): + np.mat(np.eye(2))*np.ones(2) + + assert_raises(ValueError, mul) + + def test_matrix_std_argmax(self): + # Ticket #83 + x = np.asmatrix(np.random.uniform(0, 1, (3, 3))) + assert_equal(x.std().shape, ()) + assert_equal(x.argmax().shape, ()) diff --git a/contrib/python/numpy/py2/numpy/polynomial/setup.py b/contrib/python/numpy/py2/numpy/polynomial/setup.py new file mode 100644 index 0000000000..cb59ee1e56 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/polynomial/setup.py @@ -0,0 +1,11 @@ +from __future__ import division, print_function + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('polynomial', parent_package, top_path) + config.add_data_dir('tests') + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/contrib/python/numpy/py2/numpy/polynomial/tests/__init__.py b/contrib/python/numpy/py2/numpy/polynomial/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/polynomial/tests/test_chebyshev.py b/contrib/python/numpy/py2/numpy/polynomial/tests/test_chebyshev.py new file mode 100644 index 0000000000..7fb7492c62 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/polynomial/tests/test_chebyshev.py @@ -0,0 +1,621 @@ +"""Tests for chebyshev module. + +""" +from __future__ import division, absolute_import, print_function + +from functools import reduce + +import numpy as np +import numpy.polynomial.chebyshev as cheb +from numpy.polynomial.polynomial import polyval +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + + +def trim(x): + return cheb.chebtrim(x, tol=1e-6) + +T0 = [1] +T1 = [0, 1] +T2 = [-1, 0, 2] +T3 = [0, -3, 0, 4] +T4 = [1, 0, -8, 0, 8] +T5 = [0, 5, 0, -20, 0, 16] +T6 = [-1, 0, 18, 0, -48, 0, 32] +T7 = [0, -7, 0, 56, 0, -112, 0, 64] +T8 = [1, 0, -32, 0, 160, 0, -256, 0, 128] +T9 = [0, 9, 0, -120, 0, 432, 0, -576, 0, 256] + +Tlist = [T0, T1, T2, T3, T4, T5, T6, T7, T8, T9] + + +class TestPrivate(object): + + def test__cseries_to_zseries(self): + for i in range(5): + inp = np.array([2] + [1]*i, np.double) + tgt = np.array([.5]*i + [2] + [.5]*i, np.double) + res = cheb._cseries_to_zseries(inp) + assert_equal(res, tgt) + + def test__zseries_to_cseries(self): + for i in range(5): + inp = np.array([.5]*i + [2] + [.5]*i, np.double) + tgt = np.array([2] + [1]*i, np.double) + res = cheb._zseries_to_cseries(inp) + assert_equal(res, tgt) + + +class TestConstants(object): + + def test_chebdomain(self): + assert_equal(cheb.chebdomain, [-1, 1]) + + def test_chebzero(self): + assert_equal(cheb.chebzero, [0]) + + def test_chebone(self): + assert_equal(cheb.chebone, [1]) + + def test_chebx(self): + assert_equal(cheb.chebx, [0, 1]) + + +class TestArithmetic(object): + + def test_chebadd(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = cheb.chebadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_chebsub(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = cheb.chebsub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_chebmulx(self): + assert_equal(cheb.chebmulx([0]), [0]) + assert_equal(cheb.chebmulx([1]), [0, 1]) + for i in range(1, 5): + ser = [0]*i + [1] + tgt = [0]*(i - 1) + [.5, 0, .5] + assert_equal(cheb.chebmulx(ser), tgt) + + def test_chebmul(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(i + j + 1) + tgt[i + j] += .5 + tgt[abs(i - j)] += .5 + res = cheb.chebmul([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_chebdiv(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + ci = [0]*i + [1] + cj = [0]*j + [1] + tgt = cheb.chebadd(ci, cj) + quo, rem = cheb.chebdiv(tgt, ci) + res = cheb.chebadd(cheb.chebmul(quo, ci), rem) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_chebpow(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + c = np.arange(i + 1) + tgt = reduce(cheb.chebmul, [c]*j, np.array([1])) + res = cheb.chebpow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation(object): + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([2.5, 2., 1.5]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = polyval(x, [1., 2., 3.]) + + def test_chebval(self): + #check empty input + assert_equal(cheb.chebval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [polyval(x, c) for c in Tlist] + for i in range(10): + msg = "At i=%d" % i + tgt = y[i] + res = cheb.chebval(x, [0]*i + [1]) + assert_almost_equal(res, tgt, err_msg=msg) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(cheb.chebval(x, [1]).shape, dims) + assert_equal(cheb.chebval(x, [1, 0]).shape, dims) + assert_equal(cheb.chebval(x, [1, 0, 0]).shape, dims) + + def test_chebval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, cheb.chebval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = cheb.chebval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = cheb.chebval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_chebval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, cheb.chebval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = cheb.chebval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = cheb.chebval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_chebgrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = cheb.chebgrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = cheb.chebgrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_chebgrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = cheb.chebgrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = cheb.chebgrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral(object): + + def test_chebint(self): + # check exceptions + assert_raises(ValueError, cheb.chebint, [0], .5) + assert_raises(ValueError, cheb.chebint, [0], -1) + assert_raises(ValueError, cheb.chebint, [0], 1, [0, 0]) + assert_raises(ValueError, cheb.chebint, [0], lbnd=[0]) + assert_raises(ValueError, cheb.chebint, [0], scl=[0]) + assert_raises(ValueError, cheb.chebint, [0], axis=.5) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = cheb.chebint([0], m=i, k=k) + assert_almost_equal(res, [0, 1]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + chebpol = cheb.poly2cheb(pol) + chebint = cheb.chebint(chebpol, m=1, k=[i]) + res = cheb.cheb2poly(chebint) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + chebpol = cheb.poly2cheb(pol) + chebint = cheb.chebint(chebpol, m=1, k=[i], lbnd=-1) + assert_almost_equal(cheb.chebval(-1, chebint), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + chebpol = cheb.poly2cheb(pol) + chebint = cheb.chebint(chebpol, m=1, k=[i], scl=2) + res = cheb.cheb2poly(chebint) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = cheb.chebint(tgt, m=1) + res = cheb.chebint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = cheb.chebint(tgt, m=1, k=[k]) + res = cheb.chebint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = cheb.chebint(tgt, m=1, k=[k], lbnd=-1) + res = cheb.chebint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = cheb.chebint(tgt, m=1, k=[k], scl=2) + res = cheb.chebint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_chebint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([cheb.chebint(c) for c in c2d.T]).T + res = cheb.chebint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([cheb.chebint(c) for c in c2d]) + res = cheb.chebint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([cheb.chebint(c, k=3) for c in c2d]) + res = cheb.chebint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + +class TestDerivative(object): + + def test_chebder(self): + # check exceptions + assert_raises(ValueError, cheb.chebder, [0], .5) + assert_raises(ValueError, cheb.chebder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = cheb.chebder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = cheb.chebder(cheb.chebint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = cheb.chebder(cheb.chebint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_chebder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([cheb.chebder(c) for c in c2d.T]).T + res = cheb.chebder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([cheb.chebder(c) for c in c2d]) + res = cheb.chebder(c2d, axis=1) + assert_almost_equal(res, tgt) + + +class TestVander(object): + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_chebvander(self): + # check for 1d x + x = np.arange(3) + v = cheb.chebvander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], cheb.chebval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = cheb.chebvander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], cheb.chebval(x, coef)) + + def test_chebvander2d(self): + # also tests chebval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = cheb.chebvander2d(x1, x2, [1, 2]) + tgt = cheb.chebval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = cheb.chebvander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_chebvander3d(self): + # also tests chebval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = cheb.chebvander3d(x1, x2, x3, [1, 2, 3]) + tgt = cheb.chebval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = cheb.chebvander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + +class TestFitting(object): + + def test_chebfit(self): + def f(x): + return x*(x - 1)*(x - 2) + + def f2(x): + return x**4 + x**2 + 1 + + # Test exceptions + assert_raises(ValueError, cheb.chebfit, [1], [1], -1) + assert_raises(TypeError, cheb.chebfit, [[1]], [1], 0) + assert_raises(TypeError, cheb.chebfit, [], [1], 0) + assert_raises(TypeError, cheb.chebfit, [1], [[[1]]], 0) + assert_raises(TypeError, cheb.chebfit, [1, 2], [1], 0) + assert_raises(TypeError, cheb.chebfit, [1], [1, 2], 0) + assert_raises(TypeError, cheb.chebfit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, cheb.chebfit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, cheb.chebfit, [1], [1], [-1,]) + assert_raises(ValueError, cheb.chebfit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, cheb.chebfit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = cheb.chebfit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(cheb.chebval(x, coef3), y) + coef3 = cheb.chebfit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(cheb.chebval(x, coef3), y) + # + coef4 = cheb.chebfit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(cheb.chebval(x, coef4), y) + coef4 = cheb.chebfit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(cheb.chebval(x, coef4), y) + # check things still work if deg is not in strict increasing + coef4 = cheb.chebfit(x, y, [2, 3, 4, 1, 0]) + assert_equal(len(coef4), 5) + assert_almost_equal(cheb.chebval(x, coef4), y) + # + coef2d = cheb.chebfit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = cheb.chebfit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + y[0::2] = 0 + wcoef3 = cheb.chebfit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = cheb.chebfit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = cheb.chebfit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = cheb.chebfit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(cheb.chebfit(x, x, 1), [0, 1]) + assert_almost_equal(cheb.chebfit(x, x, [0, 1]), [0, 1]) + # test fitting only even polynomials + x = np.linspace(-1, 1) + y = f2(x) + coef1 = cheb.chebfit(x, y, 4) + assert_almost_equal(cheb.chebval(x, coef1), y) + coef2 = cheb.chebfit(x, y, [0, 2, 4]) + assert_almost_equal(cheb.chebval(x, coef2), y) + assert_almost_equal(coef1, coef2) + + +class TestInterpolate(object): + + def f(self, x): + return x * (x - 1) * (x - 2) + + def test_raises(self): + assert_raises(ValueError, cheb.chebinterpolate, self.f, -1) + assert_raises(TypeError, cheb.chebinterpolate, self.f, 10.) + + def test_dimensions(self): + for deg in range(1, 5): + assert_(cheb.chebinterpolate(self.f, deg).shape == (deg + 1,)) + + def test_approximation(self): + + def powx(x, p): + return x**p + + x = np.linspace(-1, 1, 10) + for deg in range(0, 10): + for p in range(0, deg + 1): + c = cheb.chebinterpolate(powx, deg, (p,)) + assert_almost_equal(cheb.chebval(x, c), powx(x, p), decimal=12) + + +class TestCompanion(object): + + def test_raises(self): + assert_raises(ValueError, cheb.chebcompanion, []) + assert_raises(ValueError, cheb.chebcompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(cheb.chebcompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(cheb.chebcompanion([1, 2])[0, 0] == -.5) + + +class TestGauss(object): + + def test_100(self): + x, w = cheb.chebgauss(100) + + # test orthogonality. Note that the results need to be normalized, + # otherwise the huge values that can arise from fast growing + # functions like Laguerre can be very confusing. + v = cheb.chebvander(x, 99) + vv = np.dot(v.T * w, v) + vd = 1/np.sqrt(vv.diagonal()) + vv = vd[:, None] * vv * vd + assert_almost_equal(vv, np.eye(100)) + + # check that the integral of 1 is correct + tgt = np.pi + assert_almost_equal(w.sum(), tgt) + + +class TestMisc(object): + + def test_chebfromroots(self): + res = cheb.chebfromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + tgt = [0]*i + [1] + res = cheb.chebfromroots(roots)*2**(i-1) + assert_almost_equal(trim(res), trim(tgt)) + + def test_chebroots(self): + assert_almost_equal(cheb.chebroots([1]), []) + assert_almost_equal(cheb.chebroots([1, 2]), [-.5]) + for i in range(2, 5): + tgt = np.linspace(-1, 1, i) + res = cheb.chebroots(cheb.chebfromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_chebtrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, cheb.chebtrim, coef, -1) + + # Test results + assert_equal(cheb.chebtrim(coef), coef[:-1]) + assert_equal(cheb.chebtrim(coef, 1), coef[:-3]) + assert_equal(cheb.chebtrim(coef, 2), [0]) + + def test_chebline(self): + assert_equal(cheb.chebline(3, 4), [3, 4]) + + def test_cheb2poly(self): + for i in range(10): + assert_almost_equal(cheb.cheb2poly([0]*i + [1]), Tlist[i]) + + def test_poly2cheb(self): + for i in range(10): + assert_almost_equal(cheb.poly2cheb(Tlist[i]), [0]*i + [1]) + + def test_weight(self): + x = np.linspace(-1, 1, 11)[1:-1] + tgt = 1./(np.sqrt(1 + x) * np.sqrt(1 - x)) + res = cheb.chebweight(x) + assert_almost_equal(res, tgt) + + def test_chebpts1(self): + #test exceptions + assert_raises(ValueError, cheb.chebpts1, 1.5) + assert_raises(ValueError, cheb.chebpts1, 0) + + #test points + tgt = [0] + assert_almost_equal(cheb.chebpts1(1), tgt) + tgt = [-0.70710678118654746, 0.70710678118654746] + assert_almost_equal(cheb.chebpts1(2), tgt) + tgt = [-0.86602540378443871, 0, 0.86602540378443871] + assert_almost_equal(cheb.chebpts1(3), tgt) + tgt = [-0.9238795325, -0.3826834323, 0.3826834323, 0.9238795325] + assert_almost_equal(cheb.chebpts1(4), tgt) + + def test_chebpts2(self): + #test exceptions + assert_raises(ValueError, cheb.chebpts2, 1.5) + assert_raises(ValueError, cheb.chebpts2, 1) + + #test points + tgt = [-1, 1] + assert_almost_equal(cheb.chebpts2(2), tgt) + tgt = [-1, 0, 1] + assert_almost_equal(cheb.chebpts2(3), tgt) + tgt = [-1, -0.5, .5, 1] + assert_almost_equal(cheb.chebpts2(4), tgt) + tgt = [-1.0, -0.707106781187, 0, 0.707106781187, 1.0] + assert_almost_equal(cheb.chebpts2(5), tgt) diff --git a/contrib/python/numpy/py2/numpy/polynomial/tests/test_classes.py b/contrib/python/numpy/py2/numpy/polynomial/tests/test_classes.py new file mode 100644 index 0000000000..15e24f92b0 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/polynomial/tests/test_classes.py @@ -0,0 +1,642 @@ +"""Test inter-conversion of different polynomial classes. + +This tests the convert and cast methods of all the polynomial classes. + +""" +from __future__ import division, absolute_import, print_function + +import operator as op +from numbers import Number + +import pytest +import numpy as np +from numpy.polynomial import ( + Polynomial, Legendre, Chebyshev, Laguerre, Hermite, HermiteE) +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) +from numpy.compat import long + + +# +# fixtures +# + +classes = ( + Polynomial, Legendre, Chebyshev, Laguerre, + Hermite, HermiteE + ) +classids = tuple(cls.__name__ for cls in classes) + +@pytest.fixture(params=classes, ids=classids) +def Poly(request): + return request.param + +# +# helper functions +# +random = np.random.random + + +def assert_poly_almost_equal(p1, p2, msg=""): + try: + assert_(np.all(p1.domain == p2.domain)) + assert_(np.all(p1.window == p2.window)) + assert_almost_equal(p1.coef, p2.coef) + except AssertionError: + msg = "Result: %s\nTarget: %s", (p1, p2) + raise AssertionError(msg) + + +# +# Test conversion methods that depend on combinations of two classes. +# + +Poly1 = Poly +Poly2 = Poly + + +def test_conversion(Poly1, Poly2): + x = np.linspace(0, 1, 10) + coef = random((3,)) + + d1 = Poly1.domain + random((2,))*.25 + w1 = Poly1.window + random((2,))*.25 + p1 = Poly1(coef, domain=d1, window=w1) + + d2 = Poly2.domain + random((2,))*.25 + w2 = Poly2.window + random((2,))*.25 + p2 = p1.convert(kind=Poly2, domain=d2, window=w2) + + assert_almost_equal(p2.domain, d2) + assert_almost_equal(p2.window, w2) + assert_almost_equal(p2(x), p1(x)) + + +def test_cast(Poly1, Poly2): + x = np.linspace(0, 1, 10) + coef = random((3,)) + + d1 = Poly1.domain + random((2,))*.25 + w1 = Poly1.window + random((2,))*.25 + p1 = Poly1(coef, domain=d1, window=w1) + + d2 = Poly2.domain + random((2,))*.25 + w2 = Poly2.window + random((2,))*.25 + p2 = Poly2.cast(p1, domain=d2, window=w2) + + assert_almost_equal(p2.domain, d2) + assert_almost_equal(p2.window, w2) + assert_almost_equal(p2(x), p1(x)) + + +# +# test methods that depend on one class +# + + +def test_identity(Poly): + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + x = np.linspace(d[0], d[1], 11) + p = Poly.identity(domain=d, window=w) + assert_equal(p.domain, d) + assert_equal(p.window, w) + assert_almost_equal(p(x), x) + + +def test_basis(Poly): + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + p = Poly.basis(5, domain=d, window=w) + assert_equal(p.domain, d) + assert_equal(p.window, w) + assert_equal(p.coef, [0]*5 + [1]) + + +def test_fromroots(Poly): + # check that requested roots are zeros of a polynomial + # of correct degree, domain, and window. + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + r = random((5,)) + p1 = Poly.fromroots(r, domain=d, window=w) + assert_equal(p1.degree(), len(r)) + assert_equal(p1.domain, d) + assert_equal(p1.window, w) + assert_almost_equal(p1(r), 0) + + # check that polynomial is monic + pdom = Polynomial.domain + pwin = Polynomial.window + p2 = Polynomial.cast(p1, domain=pdom, window=pwin) + assert_almost_equal(p2.coef[-1], 1) + + +def test_fit(Poly): + + def f(x): + return x*(x - 1)*(x - 2) + x = np.linspace(0, 3) + y = f(x) + + # check default value of domain and window + p = Poly.fit(x, y, 3) + assert_almost_equal(p.domain, [0, 3]) + assert_almost_equal(p(x), y) + assert_equal(p.degree(), 3) + + # check with given domains and window + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + p = Poly.fit(x, y, 3, domain=d, window=w) + assert_almost_equal(p(x), y) + assert_almost_equal(p.domain, d) + assert_almost_equal(p.window, w) + p = Poly.fit(x, y, [0, 1, 2, 3], domain=d, window=w) + assert_almost_equal(p(x), y) + assert_almost_equal(p.domain, d) + assert_almost_equal(p.window, w) + + # check with class domain default + p = Poly.fit(x, y, 3, []) + assert_equal(p.domain, Poly.domain) + assert_equal(p.window, Poly.window) + p = Poly.fit(x, y, [0, 1, 2, 3], []) + assert_equal(p.domain, Poly.domain) + assert_equal(p.window, Poly.window) + + # check that fit accepts weights. + w = np.zeros_like(x) + z = y + random(y.shape)*.25 + w[::2] = 1 + p1 = Poly.fit(x[::2], z[::2], 3) + p2 = Poly.fit(x, z, 3, w=w) + p3 = Poly.fit(x, z, [0, 1, 2, 3], w=w) + assert_almost_equal(p1(x), p2(x)) + assert_almost_equal(p2(x), p3(x)) + + +def test_equal(Poly): + p1 = Poly([1, 2, 3], domain=[0, 1], window=[2, 3]) + p2 = Poly([1, 1, 1], domain=[0, 1], window=[2, 3]) + p3 = Poly([1, 2, 3], domain=[1, 2], window=[2, 3]) + p4 = Poly([1, 2, 3], domain=[0, 1], window=[1, 2]) + assert_(p1 == p1) + assert_(not p1 == p2) + assert_(not p1 == p3) + assert_(not p1 == p4) + + +def test_not_equal(Poly): + p1 = Poly([1, 2, 3], domain=[0, 1], window=[2, 3]) + p2 = Poly([1, 1, 1], domain=[0, 1], window=[2, 3]) + p3 = Poly([1, 2, 3], domain=[1, 2], window=[2, 3]) + p4 = Poly([1, 2, 3], domain=[0, 1], window=[1, 2]) + assert_(not p1 != p1) + assert_(p1 != p2) + assert_(p1 != p3) + assert_(p1 != p4) + + +def test_add(Poly): + # This checks commutation, not numerical correctness + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = p1 + p2 + assert_poly_almost_equal(p2 + p1, p3) + assert_poly_almost_equal(p1 + c2, p3) + assert_poly_almost_equal(c2 + p1, p3) + assert_poly_almost_equal(p1 + tuple(c2), p3) + assert_poly_almost_equal(tuple(c2) + p1, p3) + assert_poly_almost_equal(p1 + np.array(c2), p3) + assert_poly_almost_equal(np.array(c2) + p1, p3) + assert_raises(TypeError, op.add, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises(TypeError, op.add, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, op.add, p1, Chebyshev([0])) + else: + assert_raises(TypeError, op.add, p1, Polynomial([0])) + + +def test_sub(Poly): + # This checks commutation, not numerical correctness + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = p1 - p2 + assert_poly_almost_equal(p2 - p1, -p3) + assert_poly_almost_equal(p1 - c2, p3) + assert_poly_almost_equal(c2 - p1, -p3) + assert_poly_almost_equal(p1 - tuple(c2), p3) + assert_poly_almost_equal(tuple(c2) - p1, -p3) + assert_poly_almost_equal(p1 - np.array(c2), p3) + assert_poly_almost_equal(np.array(c2) - p1, -p3) + assert_raises(TypeError, op.sub, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises(TypeError, op.sub, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, op.sub, p1, Chebyshev([0])) + else: + assert_raises(TypeError, op.sub, p1, Polynomial([0])) + + +def test_mul(Poly): + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = p1 * p2 + assert_poly_almost_equal(p2 * p1, p3) + assert_poly_almost_equal(p1 * c2, p3) + assert_poly_almost_equal(c2 * p1, p3) + assert_poly_almost_equal(p1 * tuple(c2), p3) + assert_poly_almost_equal(tuple(c2) * p1, p3) + assert_poly_almost_equal(p1 * np.array(c2), p3) + assert_poly_almost_equal(np.array(c2) * p1, p3) + assert_poly_almost_equal(p1 * 2, p1 * Poly([2])) + assert_poly_almost_equal(2 * p1, p1 * Poly([2])) + assert_raises(TypeError, op.mul, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises(TypeError, op.mul, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, op.mul, p1, Chebyshev([0])) + else: + assert_raises(TypeError, op.mul, p1, Polynomial([0])) + + +def test_floordiv(Poly): + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + c3 = list(random((2,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = Poly(c3) + p4 = p1 * p2 + p3 + c4 = list(p4.coef) + assert_poly_almost_equal(p4 // p2, p1) + assert_poly_almost_equal(p4 // c2, p1) + assert_poly_almost_equal(c4 // p2, p1) + assert_poly_almost_equal(p4 // tuple(c2), p1) + assert_poly_almost_equal(tuple(c4) // p2, p1) + assert_poly_almost_equal(p4 // np.array(c2), p1) + assert_poly_almost_equal(np.array(c4) // p2, p1) + assert_poly_almost_equal(2 // p2, Poly([0])) + assert_poly_almost_equal(p2 // 2, 0.5*p2) + assert_raises( + TypeError, op.floordiv, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises( + TypeError, op.floordiv, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, op.floordiv, p1, Chebyshev([0])) + else: + assert_raises(TypeError, op.floordiv, p1, Polynomial([0])) + + +def test_truediv(Poly): + # true division is valid only if the denominator is a Number and + # not a python bool. + p1 = Poly([1,2,3]) + p2 = p1 * 5 + + for stype in np.ScalarType: + if not issubclass(stype, Number) or issubclass(stype, bool): + continue + s = stype(5) + assert_poly_almost_equal(op.truediv(p2, s), p1) + assert_raises(TypeError, op.truediv, s, p2) + for stype in (int, long, float): + s = stype(5) + assert_poly_almost_equal(op.truediv(p2, s), p1) + assert_raises(TypeError, op.truediv, s, p2) + for stype in [complex]: + s = stype(5, 0) + assert_poly_almost_equal(op.truediv(p2, s), p1) + assert_raises(TypeError, op.truediv, s, p2) + for s in [tuple(), list(), dict(), bool(), np.array([1])]: + assert_raises(TypeError, op.truediv, p2, s) + assert_raises(TypeError, op.truediv, s, p2) + for ptype in classes: + assert_raises(TypeError, op.truediv, p2, ptype(1)) + + +def test_mod(Poly): + # This checks commutation, not numerical correctness + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + c3 = list(random((2,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = Poly(c3) + p4 = p1 * p2 + p3 + c4 = list(p4.coef) + assert_poly_almost_equal(p4 % p2, p3) + assert_poly_almost_equal(p4 % c2, p3) + assert_poly_almost_equal(c4 % p2, p3) + assert_poly_almost_equal(p4 % tuple(c2), p3) + assert_poly_almost_equal(tuple(c4) % p2, p3) + assert_poly_almost_equal(p4 % np.array(c2), p3) + assert_poly_almost_equal(np.array(c4) % p2, p3) + assert_poly_almost_equal(2 % p2, Poly([2])) + assert_poly_almost_equal(p2 % 2, Poly([0])) + assert_raises(TypeError, op.mod, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises(TypeError, op.mod, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, op.mod, p1, Chebyshev([0])) + else: + assert_raises(TypeError, op.mod, p1, Polynomial([0])) + + +def test_divmod(Poly): + # This checks commutation, not numerical correctness + c1 = list(random((4,)) + .5) + c2 = list(random((3,)) + .5) + c3 = list(random((2,)) + .5) + p1 = Poly(c1) + p2 = Poly(c2) + p3 = Poly(c3) + p4 = p1 * p2 + p3 + c4 = list(p4.coef) + quo, rem = divmod(p4, p2) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(p4, c2) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(c4, p2) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(p4, tuple(c2)) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(tuple(c4), p2) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(p4, np.array(c2)) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(np.array(c4), p2) + assert_poly_almost_equal(quo, p1) + assert_poly_almost_equal(rem, p3) + quo, rem = divmod(p2, 2) + assert_poly_almost_equal(quo, 0.5*p2) + assert_poly_almost_equal(rem, Poly([0])) + quo, rem = divmod(2, p2) + assert_poly_almost_equal(quo, Poly([0])) + assert_poly_almost_equal(rem, Poly([2])) + assert_raises(TypeError, divmod, p1, Poly([0], domain=Poly.domain + 1)) + assert_raises(TypeError, divmod, p1, Poly([0], window=Poly.window + 1)) + if Poly is Polynomial: + assert_raises(TypeError, divmod, p1, Chebyshev([0])) + else: + assert_raises(TypeError, divmod, p1, Polynomial([0])) + + +def test_roots(Poly): + d = Poly.domain * 1.25 + .25 + w = Poly.window + tgt = np.linspace(d[0], d[1], 5) + res = np.sort(Poly.fromroots(tgt, domain=d, window=w).roots()) + assert_almost_equal(res, tgt) + # default domain and window + res = np.sort(Poly.fromroots(tgt).roots()) + assert_almost_equal(res, tgt) + + +def test_degree(Poly): + p = Poly.basis(5) + assert_equal(p.degree(), 5) + + +def test_copy(Poly): + p1 = Poly.basis(5) + p2 = p1.copy() + assert_(p1 == p2) + assert_(p1 is not p2) + assert_(p1.coef is not p2.coef) + assert_(p1.domain is not p2.domain) + assert_(p1.window is not p2.window) + + +def test_integ(Poly): + P = Polynomial + # Check defaults + p0 = Poly.cast(P([1*2, 2*3, 3*4])) + p1 = P.cast(p0.integ()) + p2 = P.cast(p0.integ(2)) + assert_poly_almost_equal(p1, P([0, 2, 3, 4])) + assert_poly_almost_equal(p2, P([0, 0, 1, 1, 1])) + # Check with k + p0 = Poly.cast(P([1*2, 2*3, 3*4])) + p1 = P.cast(p0.integ(k=1)) + p2 = P.cast(p0.integ(2, k=[1, 1])) + assert_poly_almost_equal(p1, P([1, 2, 3, 4])) + assert_poly_almost_equal(p2, P([1, 1, 1, 1, 1])) + # Check with lbnd + p0 = Poly.cast(P([1*2, 2*3, 3*4])) + p1 = P.cast(p0.integ(lbnd=1)) + p2 = P.cast(p0.integ(2, lbnd=1)) + assert_poly_almost_equal(p1, P([-9, 2, 3, 4])) + assert_poly_almost_equal(p2, P([6, -9, 1, 1, 1])) + # Check scaling + d = 2*Poly.domain + p0 = Poly.cast(P([1*2, 2*3, 3*4]), domain=d) + p1 = P.cast(p0.integ()) + p2 = P.cast(p0.integ(2)) + assert_poly_almost_equal(p1, P([0, 2, 3, 4])) + assert_poly_almost_equal(p2, P([0, 0, 1, 1, 1])) + + +def test_deriv(Poly): + # Check that the derivative is the inverse of integration. It is + # assumes that the integration has been checked elsewhere. + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + p1 = Poly([1, 2, 3], domain=d, window=w) + p2 = p1.integ(2, k=[1, 2]) + p3 = p1.integ(1, k=[1]) + assert_almost_equal(p2.deriv(1).coef, p3.coef) + assert_almost_equal(p2.deriv(2).coef, p1.coef) + # default domain and window + p1 = Poly([1, 2, 3]) + p2 = p1.integ(2, k=[1, 2]) + p3 = p1.integ(1, k=[1]) + assert_almost_equal(p2.deriv(1).coef, p3.coef) + assert_almost_equal(p2.deriv(2).coef, p1.coef) + + +def test_linspace(Poly): + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + p = Poly([1, 2, 3], domain=d, window=w) + # check default domain + xtgt = np.linspace(d[0], d[1], 20) + ytgt = p(xtgt) + xres, yres = p.linspace(20) + assert_almost_equal(xres, xtgt) + assert_almost_equal(yres, ytgt) + # check specified domain + xtgt = np.linspace(0, 2, 20) + ytgt = p(xtgt) + xres, yres = p.linspace(20, domain=[0, 2]) + assert_almost_equal(xres, xtgt) + assert_almost_equal(yres, ytgt) + + +def test_pow(Poly): + d = Poly.domain + random((2,))*.25 + w = Poly.window + random((2,))*.25 + tgt = Poly([1], domain=d, window=w) + tst = Poly([1, 2, 3], domain=d, window=w) + for i in range(5): + assert_poly_almost_equal(tst**i, tgt) + tgt = tgt * tst + # default domain and window + tgt = Poly([1]) + tst = Poly([1, 2, 3]) + for i in range(5): + assert_poly_almost_equal(tst**i, tgt) + tgt = tgt * tst + # check error for invalid powers + assert_raises(ValueError, op.pow, tgt, 1.5) + assert_raises(ValueError, op.pow, tgt, -1) + + +def test_call(Poly): + P = Polynomial + d = Poly.domain + x = np.linspace(d[0], d[1], 11) + + # Check defaults + p = Poly.cast(P([1, 2, 3])) + tgt = 1 + x*(2 + 3*x) + res = p(x) + assert_almost_equal(res, tgt) + + +def test_cutdeg(Poly): + p = Poly([1, 2, 3]) + assert_raises(ValueError, p.cutdeg, .5) + assert_raises(ValueError, p.cutdeg, -1) + assert_equal(len(p.cutdeg(3)), 3) + assert_equal(len(p.cutdeg(2)), 3) + assert_equal(len(p.cutdeg(1)), 2) + assert_equal(len(p.cutdeg(0)), 1) + + +def test_truncate(Poly): + p = Poly([1, 2, 3]) + assert_raises(ValueError, p.truncate, .5) + assert_raises(ValueError, p.truncate, 0) + assert_equal(len(p.truncate(4)), 3) + assert_equal(len(p.truncate(3)), 3) + assert_equal(len(p.truncate(2)), 2) + assert_equal(len(p.truncate(1)), 1) + + +def test_trim(Poly): + c = [1, 1e-6, 1e-12, 0] + p = Poly(c) + assert_equal(p.trim().coef, c[:3]) + assert_equal(p.trim(1e-10).coef, c[:2]) + assert_equal(p.trim(1e-5).coef, c[:1]) + + +def test_mapparms(Poly): + # check with defaults. Should be identity. + d = Poly.domain + w = Poly.window + p = Poly([1], domain=d, window=w) + assert_almost_equal([0, 1], p.mapparms()) + # + w = 2*d + 1 + p = Poly([1], domain=d, window=w) + assert_almost_equal([1, 2], p.mapparms()) + + +def test_ufunc_override(Poly): + p = Poly([1, 2, 3]) + x = np.ones(3) + assert_raises(TypeError, np.add, p, x) + assert_raises(TypeError, np.add, x, p) + + + +class TestLatexRepr(object): + """Test the latex repr used by ipython """ + + def as_latex(self, obj): + # right now we ignore the formatting of scalars in our tests, since + # it makes them too verbose. Ideally, the formatting of scalars will + # be fixed such that tests below continue to pass + obj._repr_latex_scalar = lambda x: str(x) + try: + return obj._repr_latex_() + finally: + del obj._repr_latex_scalar + + def test_simple_polynomial(self): + # default input + p = Polynomial([1, 2, 3]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0 + 2.0\,x + 3.0\,x^{2}$') + + # translated input + p = Polynomial([1, 2, 3], domain=[-2, 0]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0 + 2.0\,\left(1.0 + x\right) + 3.0\,\left(1.0 + x\right)^{2}$') + + # scaled input + p = Polynomial([1, 2, 3], domain=[-0.5, 0.5]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0 + 2.0\,\left(2.0x\right) + 3.0\,\left(2.0x\right)^{2}$') + + # affine input + p = Polynomial([1, 2, 3], domain=[-1, 0]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0 + 2.0\,\left(1.0 + 2.0x\right) + 3.0\,\left(1.0 + 2.0x\right)^{2}$') + + def test_basis_func(self): + p = Chebyshev([1, 2, 3]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0\,{T}_{0}(x) + 2.0\,{T}_{1}(x) + 3.0\,{T}_{2}(x)$') + # affine input - check no surplus parens are added + p = Chebyshev([1, 2, 3], domain=[-1, 0]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0\,{T}_{0}(1.0 + 2.0x) + 2.0\,{T}_{1}(1.0 + 2.0x) + 3.0\,{T}_{2}(1.0 + 2.0x)$') + + def test_multichar_basis_func(self): + p = HermiteE([1, 2, 3]) + assert_equal(self.as_latex(p), + r'$x \mapsto 1.0\,{He}_{0}(x) + 2.0\,{He}_{1}(x) + 3.0\,{He}_{2}(x)$') + + +# +# Test class method that only exists for some classes +# + + +class TestInterpolate(object): + + def f(self, x): + return x * (x - 1) * (x - 2) + + def test_raises(self): + assert_raises(ValueError, Chebyshev.interpolate, self.f, -1) + assert_raises(TypeError, Chebyshev.interpolate, self.f, 10.) + + def test_dimensions(self): + for deg in range(1, 5): + assert_(Chebyshev.interpolate(self.f, deg).degree() == deg) + + def test_approximation(self): + + def powx(x, p): + return x**p + + x = np.linspace(0, 2, 10) + for deg in range(0, 10): + for t in range(0, deg + 1): + p = Chebyshev.interpolate(powx, deg, domain=[0, 2], args=(t,)) + assert_almost_equal(p(x), powx(x, t), decimal=12) diff --git a/contrib/python/numpy/py2/numpy/polynomial/tests/test_hermite.py b/contrib/python/numpy/py2/numpy/polynomial/tests/test_hermite.py new file mode 100644 index 0000000000..1287ef3fe1 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/polynomial/tests/test_hermite.py @@ -0,0 +1,557 @@ +"""Tests for hermite module. + +""" +from __future__ import division, absolute_import, print_function + +from functools import reduce + +import numpy as np +import numpy.polynomial.hermite as herm +from numpy.polynomial.polynomial import polyval +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + +H0 = np.array([1]) +H1 = np.array([0, 2]) +H2 = np.array([-2, 0, 4]) +H3 = np.array([0, -12, 0, 8]) +H4 = np.array([12, 0, -48, 0, 16]) +H5 = np.array([0, 120, 0, -160, 0, 32]) +H6 = np.array([-120, 0, 720, 0, -480, 0, 64]) +H7 = np.array([0, -1680, 0, 3360, 0, -1344, 0, 128]) +H8 = np.array([1680, 0, -13440, 0, 13440, 0, -3584, 0, 256]) +H9 = np.array([0, 30240, 0, -80640, 0, 48384, 0, -9216, 0, 512]) + +Hlist = [H0, H1, H2, H3, H4, H5, H6, H7, H8, H9] + + +def trim(x): + return herm.hermtrim(x, tol=1e-6) + + +class TestConstants(object): + + def test_hermdomain(self): + assert_equal(herm.hermdomain, [-1, 1]) + + def test_hermzero(self): + assert_equal(herm.hermzero, [0]) + + def test_hermone(self): + assert_equal(herm.hermone, [1]) + + def test_hermx(self): + assert_equal(herm.hermx, [0, .5]) + + +class TestArithmetic(object): + x = np.linspace(-3, 3, 100) + + def test_hermadd(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = herm.hermadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermsub(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = herm.hermsub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermmulx(self): + assert_equal(herm.hermmulx([0]), [0]) + assert_equal(herm.hermmulx([1]), [0, .5]) + for i in range(1, 5): + ser = [0]*i + [1] + tgt = [0]*(i - 1) + [i, 0, .5] + assert_equal(herm.hermmulx(ser), tgt) + + def test_hermmul(self): + # check values of result + for i in range(5): + pol1 = [0]*i + [1] + val1 = herm.hermval(self.x, pol1) + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + pol2 = [0]*j + [1] + val2 = herm.hermval(self.x, pol2) + pol3 = herm.hermmul(pol1, pol2) + val3 = herm.hermval(self.x, pol3) + assert_(len(pol3) == i + j + 1, msg) + assert_almost_equal(val3, val1*val2, err_msg=msg) + + def test_hermdiv(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + ci = [0]*i + [1] + cj = [0]*j + [1] + tgt = herm.hermadd(ci, cj) + quo, rem = herm.hermdiv(tgt, ci) + res = herm.hermadd(herm.hermmul(quo, ci), rem) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermpow(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + c = np.arange(i + 1) + tgt = reduce(herm.hermmul, [c]*j, np.array([1])) + res = herm.hermpow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation(object): + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([2.5, 1., .75]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = polyval(x, [1., 2., 3.]) + + def test_hermval(self): + #check empty input + assert_equal(herm.hermval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [polyval(x, c) for c in Hlist] + for i in range(10): + msg = "At i=%d" % i + tgt = y[i] + res = herm.hermval(x, [0]*i + [1]) + assert_almost_equal(res, tgt, err_msg=msg) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(herm.hermval(x, [1]).shape, dims) + assert_equal(herm.hermval(x, [1, 0]).shape, dims) + assert_equal(herm.hermval(x, [1, 0, 0]).shape, dims) + + def test_hermval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, herm.hermval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = herm.hermval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herm.hermval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_hermval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, herm.hermval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = herm.hermval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herm.hermval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_hermgrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = herm.hermgrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herm.hermgrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_hermgrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = herm.hermgrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herm.hermgrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral(object): + + def test_hermint(self): + # check exceptions + assert_raises(ValueError, herm.hermint, [0], .5) + assert_raises(ValueError, herm.hermint, [0], -1) + assert_raises(ValueError, herm.hermint, [0], 1, [0, 0]) + assert_raises(ValueError, herm.hermint, [0], lbnd=[0]) + assert_raises(ValueError, herm.hermint, [0], scl=[0]) + assert_raises(ValueError, herm.hermint, [0], axis=.5) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = herm.hermint([0], m=i, k=k) + assert_almost_equal(res, [0, .5]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + hermpol = herm.poly2herm(pol) + hermint = herm.hermint(hermpol, m=1, k=[i]) + res = herm.herm2poly(hermint) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + hermpol = herm.poly2herm(pol) + hermint = herm.hermint(hermpol, m=1, k=[i], lbnd=-1) + assert_almost_equal(herm.hermval(-1, hermint), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + hermpol = herm.poly2herm(pol) + hermint = herm.hermint(hermpol, m=1, k=[i], scl=2) + res = herm.herm2poly(hermint) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herm.hermint(tgt, m=1) + res = herm.hermint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herm.hermint(tgt, m=1, k=[k]) + res = herm.hermint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herm.hermint(tgt, m=1, k=[k], lbnd=-1) + res = herm.hermint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herm.hermint(tgt, m=1, k=[k], scl=2) + res = herm.hermint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([herm.hermint(c) for c in c2d.T]).T + res = herm.hermint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herm.hermint(c) for c in c2d]) + res = herm.hermint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herm.hermint(c, k=3) for c in c2d]) + res = herm.hermint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + +class TestDerivative(object): + + def test_hermder(self): + # check exceptions + assert_raises(ValueError, herm.hermder, [0], .5) + assert_raises(ValueError, herm.hermder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = herm.hermder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = herm.hermder(herm.hermint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = herm.hermder(herm.hermint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([herm.hermder(c) for c in c2d.T]).T + res = herm.hermder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herm.hermder(c) for c in c2d]) + res = herm.hermder(c2d, axis=1) + assert_almost_equal(res, tgt) + + +class TestVander(object): + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_hermvander(self): + # check for 1d x + x = np.arange(3) + v = herm.hermvander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], herm.hermval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = herm.hermvander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], herm.hermval(x, coef)) + + def test_hermvander2d(self): + # also tests hermval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = herm.hermvander2d(x1, x2, [1, 2]) + tgt = herm.hermval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = herm.hermvander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_hermvander3d(self): + # also tests hermval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = herm.hermvander3d(x1, x2, x3, [1, 2, 3]) + tgt = herm.hermval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = herm.hermvander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + +class TestFitting(object): + + def test_hermfit(self): + def f(x): + return x*(x - 1)*(x - 2) + + def f2(x): + return x**4 + x**2 + 1 + + # Test exceptions + assert_raises(ValueError, herm.hermfit, [1], [1], -1) + assert_raises(TypeError, herm.hermfit, [[1]], [1], 0) + assert_raises(TypeError, herm.hermfit, [], [1], 0) + assert_raises(TypeError, herm.hermfit, [1], [[[1]]], 0) + assert_raises(TypeError, herm.hermfit, [1, 2], [1], 0) + assert_raises(TypeError, herm.hermfit, [1], [1, 2], 0) + assert_raises(TypeError, herm.hermfit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, herm.hermfit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, herm.hermfit, [1], [1], [-1,]) + assert_raises(ValueError, herm.hermfit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, herm.hermfit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = herm.hermfit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(herm.hermval(x, coef3), y) + coef3 = herm.hermfit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(herm.hermval(x, coef3), y) + # + coef4 = herm.hermfit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(herm.hermval(x, coef4), y) + coef4 = herm.hermfit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(herm.hermval(x, coef4), y) + # check things still work if deg is not in strict increasing + coef4 = herm.hermfit(x, y, [2, 3, 4, 1, 0]) + assert_equal(len(coef4), 5) + assert_almost_equal(herm.hermval(x, coef4), y) + # + coef2d = herm.hermfit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = herm.hermfit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + y[0::2] = 0 + wcoef3 = herm.hermfit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = herm.hermfit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = herm.hermfit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = herm.hermfit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(herm.hermfit(x, x, 1), [0, .5]) + assert_almost_equal(herm.hermfit(x, x, [0, 1]), [0, .5]) + # test fitting only even Legendre polynomials + x = np.linspace(-1, 1) + y = f2(x) + coef1 = herm.hermfit(x, y, 4) + assert_almost_equal(herm.hermval(x, coef1), y) + coef2 = herm.hermfit(x, y, [0, 2, 4]) + assert_almost_equal(herm.hermval(x, coef2), y) + assert_almost_equal(coef1, coef2) + + +class TestCompanion(object): + + def test_raises(self): + assert_raises(ValueError, herm.hermcompanion, []) + assert_raises(ValueError, herm.hermcompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(herm.hermcompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(herm.hermcompanion([1, 2])[0, 0] == -.25) + + +class TestGauss(object): + + def test_100(self): + x, w = herm.hermgauss(100) + + # test orthogonality. Note that the results need to be normalized, + # otherwise the huge values that can arise from fast growing + # functions like Laguerre can be very confusing. + v = herm.hermvander(x, 99) + vv = np.dot(v.T * w, v) + vd = 1/np.sqrt(vv.diagonal()) + vv = vd[:, None] * vv * vd + assert_almost_equal(vv, np.eye(100)) + + # check that the integral of 1 is correct + tgt = np.sqrt(np.pi) + assert_almost_equal(w.sum(), tgt) + + +class TestMisc(object): + + def test_hermfromroots(self): + res = herm.hermfromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + pol = herm.hermfromroots(roots) + res = herm.hermval(roots, pol) + tgt = 0 + assert_(len(pol) == i + 1) + assert_almost_equal(herm.herm2poly(pol)[-1], 1) + assert_almost_equal(res, tgt) + + def test_hermroots(self): + assert_almost_equal(herm.hermroots([1]), []) + assert_almost_equal(herm.hermroots([1, 1]), [-.5]) + for i in range(2, 5): + tgt = np.linspace(-1, 1, i) + res = herm.hermroots(herm.hermfromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermtrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, herm.hermtrim, coef, -1) + + # Test results + assert_equal(herm.hermtrim(coef), coef[:-1]) + assert_equal(herm.hermtrim(coef, 1), coef[:-3]) + assert_equal(herm.hermtrim(coef, 2), [0]) + + def test_hermline(self): + assert_equal(herm.hermline(3, 4), [3, 2]) + + def test_herm2poly(self): + for i in range(10): + assert_almost_equal(herm.herm2poly([0]*i + [1]), Hlist[i]) + + def test_poly2herm(self): + for i in range(10): + assert_almost_equal(herm.poly2herm(Hlist[i]), [0]*i + [1]) + + def test_weight(self): + x = np.linspace(-5, 5, 11) + tgt = np.exp(-x**2) + res = herm.hermweight(x) + assert_almost_equal(res, tgt) diff --git a/contrib/python/numpy/py2/numpy/polynomial/tests/test_hermite_e.py b/contrib/python/numpy/py2/numpy/polynomial/tests/test_hermite_e.py new file mode 100644 index 0000000000..ccb44ad73c --- /dev/null +++ b/contrib/python/numpy/py2/numpy/polynomial/tests/test_hermite_e.py @@ -0,0 +1,558 @@ +"""Tests for hermite_e module. + +""" +from __future__ import division, absolute_import, print_function + +from functools import reduce + +import numpy as np +import numpy.polynomial.hermite_e as herme +from numpy.polynomial.polynomial import polyval +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + +He0 = np.array([1]) +He1 = np.array([0, 1]) +He2 = np.array([-1, 0, 1]) +He3 = np.array([0, -3, 0, 1]) +He4 = np.array([3, 0, -6, 0, 1]) +He5 = np.array([0, 15, 0, -10, 0, 1]) +He6 = np.array([-15, 0, 45, 0, -15, 0, 1]) +He7 = np.array([0, -105, 0, 105, 0, -21, 0, 1]) +He8 = np.array([105, 0, -420, 0, 210, 0, -28, 0, 1]) +He9 = np.array([0, 945, 0, -1260, 0, 378, 0, -36, 0, 1]) + +Helist = [He0, He1, He2, He3, He4, He5, He6, He7, He8, He9] + + +def trim(x): + return herme.hermetrim(x, tol=1e-6) + + +class TestConstants(object): + + def test_hermedomain(self): + assert_equal(herme.hermedomain, [-1, 1]) + + def test_hermezero(self): + assert_equal(herme.hermezero, [0]) + + def test_hermeone(self): + assert_equal(herme.hermeone, [1]) + + def test_hermex(self): + assert_equal(herme.hermex, [0, 1]) + + +class TestArithmetic(object): + x = np.linspace(-3, 3, 100) + + def test_hermeadd(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = herme.hermeadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermesub(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = herme.hermesub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermemulx(self): + assert_equal(herme.hermemulx([0]), [0]) + assert_equal(herme.hermemulx([1]), [0, 1]) + for i in range(1, 5): + ser = [0]*i + [1] + tgt = [0]*(i - 1) + [i, 0, 1] + assert_equal(herme.hermemulx(ser), tgt) + + def test_hermemul(self): + # check values of result + for i in range(5): + pol1 = [0]*i + [1] + val1 = herme.hermeval(self.x, pol1) + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + pol2 = [0]*j + [1] + val2 = herme.hermeval(self.x, pol2) + pol3 = herme.hermemul(pol1, pol2) + val3 = herme.hermeval(self.x, pol3) + assert_(len(pol3) == i + j + 1, msg) + assert_almost_equal(val3, val1*val2, err_msg=msg) + + def test_hermediv(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + ci = [0]*i + [1] + cj = [0]*j + [1] + tgt = herme.hermeadd(ci, cj) + quo, rem = herme.hermediv(tgt, ci) + res = herme.hermeadd(herme.hermemul(quo, ci), rem) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_hermepow(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + c = np.arange(i + 1) + tgt = reduce(herme.hermemul, [c]*j, np.array([1])) + res = herme.hermepow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation(object): + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([4., 2., 3.]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = polyval(x, [1., 2., 3.]) + + def test_hermeval(self): + #check empty input + assert_equal(herme.hermeval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [polyval(x, c) for c in Helist] + for i in range(10): + msg = "At i=%d" % i + tgt = y[i] + res = herme.hermeval(x, [0]*i + [1]) + assert_almost_equal(res, tgt, err_msg=msg) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(herme.hermeval(x, [1]).shape, dims) + assert_equal(herme.hermeval(x, [1, 0]).shape, dims) + assert_equal(herme.hermeval(x, [1, 0, 0]).shape, dims) + + def test_hermeval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, herme.hermeval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = herme.hermeval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herme.hermeval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_hermeval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, herme.hermeval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = herme.hermeval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herme.hermeval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_hermegrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = herme.hermegrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herme.hermegrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_hermegrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = herme.hermegrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = herme.hermegrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral(object): + + def test_hermeint(self): + # check exceptions + assert_raises(ValueError, herme.hermeint, [0], .5) + assert_raises(ValueError, herme.hermeint, [0], -1) + assert_raises(ValueError, herme.hermeint, [0], 1, [0, 0]) + assert_raises(ValueError, herme.hermeint, [0], lbnd=[0]) + assert_raises(ValueError, herme.hermeint, [0], scl=[0]) + assert_raises(ValueError, herme.hermeint, [0], axis=.5) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = herme.hermeint([0], m=i, k=k) + assert_almost_equal(res, [0, 1]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + hermepol = herme.poly2herme(pol) + hermeint = herme.hermeint(hermepol, m=1, k=[i]) + res = herme.herme2poly(hermeint) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + hermepol = herme.poly2herme(pol) + hermeint = herme.hermeint(hermepol, m=1, k=[i], lbnd=-1) + assert_almost_equal(herme.hermeval(-1, hermeint), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + hermepol = herme.poly2herme(pol) + hermeint = herme.hermeint(hermepol, m=1, k=[i], scl=2) + res = herme.herme2poly(hermeint) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herme.hermeint(tgt, m=1) + res = herme.hermeint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herme.hermeint(tgt, m=1, k=[k]) + res = herme.hermeint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herme.hermeint(tgt, m=1, k=[k], lbnd=-1) + res = herme.hermeint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = herme.hermeint(tgt, m=1, k=[k], scl=2) + res = herme.hermeint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermeint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([herme.hermeint(c) for c in c2d.T]).T + res = herme.hermeint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herme.hermeint(c) for c in c2d]) + res = herme.hermeint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herme.hermeint(c, k=3) for c in c2d]) + res = herme.hermeint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + +class TestDerivative(object): + + def test_hermeder(self): + # check exceptions + assert_raises(ValueError, herme.hermeder, [0], .5) + assert_raises(ValueError, herme.hermeder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = herme.hermeder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = herme.hermeder(herme.hermeint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = herme.hermeder( + herme.hermeint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermeder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([herme.hermeder(c) for c in c2d.T]).T + res = herme.hermeder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([herme.hermeder(c) for c in c2d]) + res = herme.hermeder(c2d, axis=1) + assert_almost_equal(res, tgt) + + +class TestVander(object): + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_hermevander(self): + # check for 1d x + x = np.arange(3) + v = herme.hermevander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], herme.hermeval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = herme.hermevander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], herme.hermeval(x, coef)) + + def test_hermevander2d(self): + # also tests hermeval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = herme.hermevander2d(x1, x2, [1, 2]) + tgt = herme.hermeval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = herme.hermevander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_hermevander3d(self): + # also tests hermeval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = herme.hermevander3d(x1, x2, x3, [1, 2, 3]) + tgt = herme.hermeval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = herme.hermevander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + +class TestFitting(object): + + def test_hermefit(self): + def f(x): + return x*(x - 1)*(x - 2) + + def f2(x): + return x**4 + x**2 + 1 + + # Test exceptions + assert_raises(ValueError, herme.hermefit, [1], [1], -1) + assert_raises(TypeError, herme.hermefit, [[1]], [1], 0) + assert_raises(TypeError, herme.hermefit, [], [1], 0) + assert_raises(TypeError, herme.hermefit, [1], [[[1]]], 0) + assert_raises(TypeError, herme.hermefit, [1, 2], [1], 0) + assert_raises(TypeError, herme.hermefit, [1], [1, 2], 0) + assert_raises(TypeError, herme.hermefit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, herme.hermefit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, herme.hermefit, [1], [1], [-1,]) + assert_raises(ValueError, herme.hermefit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, herme.hermefit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = herme.hermefit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(herme.hermeval(x, coef3), y) + coef3 = herme.hermefit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(herme.hermeval(x, coef3), y) + # + coef4 = herme.hermefit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(herme.hermeval(x, coef4), y) + coef4 = herme.hermefit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(herme.hermeval(x, coef4), y) + # check things still work if deg is not in strict increasing + coef4 = herme.hermefit(x, y, [2, 3, 4, 1, 0]) + assert_equal(len(coef4), 5) + assert_almost_equal(herme.hermeval(x, coef4), y) + # + coef2d = herme.hermefit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = herme.hermefit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + y[0::2] = 0 + wcoef3 = herme.hermefit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = herme.hermefit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = herme.hermefit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = herme.hermefit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(herme.hermefit(x, x, 1), [0, 1]) + assert_almost_equal(herme.hermefit(x, x, [0, 1]), [0, 1]) + # test fitting only even Legendre polynomials + x = np.linspace(-1, 1) + y = f2(x) + coef1 = herme.hermefit(x, y, 4) + assert_almost_equal(herme.hermeval(x, coef1), y) + coef2 = herme.hermefit(x, y, [0, 2, 4]) + assert_almost_equal(herme.hermeval(x, coef2), y) + assert_almost_equal(coef1, coef2) + + +class TestCompanion(object): + + def test_raises(self): + assert_raises(ValueError, herme.hermecompanion, []) + assert_raises(ValueError, herme.hermecompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(herme.hermecompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(herme.hermecompanion([1, 2])[0, 0] == -.5) + + +class TestGauss(object): + + def test_100(self): + x, w = herme.hermegauss(100) + + # test orthogonality. Note that the results need to be normalized, + # otherwise the huge values that can arise from fast growing + # functions like Laguerre can be very confusing. + v = herme.hermevander(x, 99) + vv = np.dot(v.T * w, v) + vd = 1/np.sqrt(vv.diagonal()) + vv = vd[:, None] * vv * vd + assert_almost_equal(vv, np.eye(100)) + + # check that the integral of 1 is correct + tgt = np.sqrt(2*np.pi) + assert_almost_equal(w.sum(), tgt) + + +class TestMisc(object): + + def test_hermefromroots(self): + res = herme.hermefromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + pol = herme.hermefromroots(roots) + res = herme.hermeval(roots, pol) + tgt = 0 + assert_(len(pol) == i + 1) + assert_almost_equal(herme.herme2poly(pol)[-1], 1) + assert_almost_equal(res, tgt) + + def test_hermeroots(self): + assert_almost_equal(herme.hermeroots([1]), []) + assert_almost_equal(herme.hermeroots([1, 1]), [-1]) + for i in range(2, 5): + tgt = np.linspace(-1, 1, i) + res = herme.hermeroots(herme.hermefromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_hermetrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, herme.hermetrim, coef, -1) + + # Test results + assert_equal(herme.hermetrim(coef), coef[:-1]) + assert_equal(herme.hermetrim(coef, 1), coef[:-3]) + assert_equal(herme.hermetrim(coef, 2), [0]) + + def test_hermeline(self): + assert_equal(herme.hermeline(3, 4), [3, 4]) + + def test_herme2poly(self): + for i in range(10): + assert_almost_equal(herme.herme2poly([0]*i + [1]), Helist[i]) + + def test_poly2herme(self): + for i in range(10): + assert_almost_equal(herme.poly2herme(Helist[i]), [0]*i + [1]) + + def test_weight(self): + x = np.linspace(-5, 5, 11) + tgt = np.exp(-.5*x**2) + res = herme.hermeweight(x) + assert_almost_equal(res, tgt) diff --git a/contrib/python/numpy/py2/numpy/polynomial/tests/test_laguerre.py b/contrib/python/numpy/py2/numpy/polynomial/tests/test_laguerre.py new file mode 100644 index 0000000000..3ababec5e9 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/polynomial/tests/test_laguerre.py @@ -0,0 +1,539 @@ +"""Tests for laguerre module. + +""" +from __future__ import division, absolute_import, print_function + +from functools import reduce + +import numpy as np +import numpy.polynomial.laguerre as lag +from numpy.polynomial.polynomial import polyval +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + +L0 = np.array([1])/1 +L1 = np.array([1, -1])/1 +L2 = np.array([2, -4, 1])/2 +L3 = np.array([6, -18, 9, -1])/6 +L4 = np.array([24, -96, 72, -16, 1])/24 +L5 = np.array([120, -600, 600, -200, 25, -1])/120 +L6 = np.array([720, -4320, 5400, -2400, 450, -36, 1])/720 + +Llist = [L0, L1, L2, L3, L4, L5, L6] + + +def trim(x): + return lag.lagtrim(x, tol=1e-6) + + +class TestConstants(object): + + def test_lagdomain(self): + assert_equal(lag.lagdomain, [0, 1]) + + def test_lagzero(self): + assert_equal(lag.lagzero, [0]) + + def test_lagone(self): + assert_equal(lag.lagone, [1]) + + def test_lagx(self): + assert_equal(lag.lagx, [1, -1]) + + +class TestArithmetic(object): + x = np.linspace(-3, 3, 100) + + def test_lagadd(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = lag.lagadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_lagsub(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = lag.lagsub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_lagmulx(self): + assert_equal(lag.lagmulx([0]), [0]) + assert_equal(lag.lagmulx([1]), [1, -1]) + for i in range(1, 5): + ser = [0]*i + [1] + tgt = [0]*(i - 1) + [-i, 2*i + 1, -(i + 1)] + assert_almost_equal(lag.lagmulx(ser), tgt) + + def test_lagmul(self): + # check values of result + for i in range(5): + pol1 = [0]*i + [1] + val1 = lag.lagval(self.x, pol1) + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + pol2 = [0]*j + [1] + val2 = lag.lagval(self.x, pol2) + pol3 = lag.lagmul(pol1, pol2) + val3 = lag.lagval(self.x, pol3) + assert_(len(pol3) == i + j + 1, msg) + assert_almost_equal(val3, val1*val2, err_msg=msg) + + def test_lagdiv(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + ci = [0]*i + [1] + cj = [0]*j + [1] + tgt = lag.lagadd(ci, cj) + quo, rem = lag.lagdiv(tgt, ci) + res = lag.lagadd(lag.lagmul(quo, ci), rem) + assert_almost_equal(trim(res), trim(tgt), err_msg=msg) + + def test_lagpow(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + c = np.arange(i + 1) + tgt = reduce(lag.lagmul, [c]*j, np.array([1])) + res = lag.lagpow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation(object): + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([9., -14., 6.]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = polyval(x, [1., 2., 3.]) + + def test_lagval(self): + #check empty input + assert_equal(lag.lagval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [polyval(x, c) for c in Llist] + for i in range(7): + msg = "At i=%d" % i + tgt = y[i] + res = lag.lagval(x, [0]*i + [1]) + assert_almost_equal(res, tgt, err_msg=msg) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(lag.lagval(x, [1]).shape, dims) + assert_equal(lag.lagval(x, [1, 0]).shape, dims) + assert_equal(lag.lagval(x, [1, 0, 0]).shape, dims) + + def test_lagval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, lag.lagval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = lag.lagval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = lag.lagval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_lagval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, lag.lagval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = lag.lagval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = lag.lagval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_laggrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = lag.laggrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = lag.laggrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_laggrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = lag.laggrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = lag.laggrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral(object): + + def test_lagint(self): + # check exceptions + assert_raises(ValueError, lag.lagint, [0], .5) + assert_raises(ValueError, lag.lagint, [0], -1) + assert_raises(ValueError, lag.lagint, [0], 1, [0, 0]) + assert_raises(ValueError, lag.lagint, [0], lbnd=[0]) + assert_raises(ValueError, lag.lagint, [0], scl=[0]) + assert_raises(ValueError, lag.lagint, [0], axis=.5) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = lag.lagint([0], m=i, k=k) + assert_almost_equal(res, [1, -1]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + lagpol = lag.poly2lag(pol) + lagint = lag.lagint(lagpol, m=1, k=[i]) + res = lag.lag2poly(lagint) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + lagpol = lag.poly2lag(pol) + lagint = lag.lagint(lagpol, m=1, k=[i], lbnd=-1) + assert_almost_equal(lag.lagval(-1, lagint), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + lagpol = lag.poly2lag(pol) + lagint = lag.lagint(lagpol, m=1, k=[i], scl=2) + res = lag.lag2poly(lagint) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = lag.lagint(tgt, m=1) + res = lag.lagint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = lag.lagint(tgt, m=1, k=[k]) + res = lag.lagint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = lag.lagint(tgt, m=1, k=[k], lbnd=-1) + res = lag.lagint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = lag.lagint(tgt, m=1, k=[k], scl=2) + res = lag.lagint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_lagint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([lag.lagint(c) for c in c2d.T]).T + res = lag.lagint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([lag.lagint(c) for c in c2d]) + res = lag.lagint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([lag.lagint(c, k=3) for c in c2d]) + res = lag.lagint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + +class TestDerivative(object): + + def test_lagder(self): + # check exceptions + assert_raises(ValueError, lag.lagder, [0], .5) + assert_raises(ValueError, lag.lagder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = lag.lagder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = lag.lagder(lag.lagint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = lag.lagder(lag.lagint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_lagder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([lag.lagder(c) for c in c2d.T]).T + res = lag.lagder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([lag.lagder(c) for c in c2d]) + res = lag.lagder(c2d, axis=1) + assert_almost_equal(res, tgt) + + +class TestVander(object): + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_lagvander(self): + # check for 1d x + x = np.arange(3) + v = lag.lagvander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], lag.lagval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = lag.lagvander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], lag.lagval(x, coef)) + + def test_lagvander2d(self): + # also tests lagval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = lag.lagvander2d(x1, x2, [1, 2]) + tgt = lag.lagval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = lag.lagvander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_lagvander3d(self): + # also tests lagval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = lag.lagvander3d(x1, x2, x3, [1, 2, 3]) + tgt = lag.lagval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = lag.lagvander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + +class TestFitting(object): + + def test_lagfit(self): + def f(x): + return x*(x - 1)*(x - 2) + + # Test exceptions + assert_raises(ValueError, lag.lagfit, [1], [1], -1) + assert_raises(TypeError, lag.lagfit, [[1]], [1], 0) + assert_raises(TypeError, lag.lagfit, [], [1], 0) + assert_raises(TypeError, lag.lagfit, [1], [[[1]]], 0) + assert_raises(TypeError, lag.lagfit, [1, 2], [1], 0) + assert_raises(TypeError, lag.lagfit, [1], [1, 2], 0) + assert_raises(TypeError, lag.lagfit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, lag.lagfit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, lag.lagfit, [1], [1], [-1,]) + assert_raises(ValueError, lag.lagfit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, lag.lagfit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = lag.lagfit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(lag.lagval(x, coef3), y) + coef3 = lag.lagfit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(lag.lagval(x, coef3), y) + # + coef4 = lag.lagfit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(lag.lagval(x, coef4), y) + coef4 = lag.lagfit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(lag.lagval(x, coef4), y) + # + coef2d = lag.lagfit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = lag.lagfit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + y[0::2] = 0 + wcoef3 = lag.lagfit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = lag.lagfit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = lag.lagfit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = lag.lagfit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(lag.lagfit(x, x, 1), [1, -1]) + assert_almost_equal(lag.lagfit(x, x, [0, 1]), [1, -1]) + + +class TestCompanion(object): + + def test_raises(self): + assert_raises(ValueError, lag.lagcompanion, []) + assert_raises(ValueError, lag.lagcompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(lag.lagcompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(lag.lagcompanion([1, 2])[0, 0] == 1.5) + + +class TestGauss(object): + + def test_100(self): + x, w = lag.laggauss(100) + + # test orthogonality. Note that the results need to be normalized, + # otherwise the huge values that can arise from fast growing + # functions like Laguerre can be very confusing. + v = lag.lagvander(x, 99) + vv = np.dot(v.T * w, v) + vd = 1/np.sqrt(vv.diagonal()) + vv = vd[:, None] * vv * vd + assert_almost_equal(vv, np.eye(100)) + + # check that the integral of 1 is correct + tgt = 1.0 + assert_almost_equal(w.sum(), tgt) + + +class TestMisc(object): + + def test_lagfromroots(self): + res = lag.lagfromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + pol = lag.lagfromroots(roots) + res = lag.lagval(roots, pol) + tgt = 0 + assert_(len(pol) == i + 1) + assert_almost_equal(lag.lag2poly(pol)[-1], 1) + assert_almost_equal(res, tgt) + + def test_lagroots(self): + assert_almost_equal(lag.lagroots([1]), []) + assert_almost_equal(lag.lagroots([0, 1]), [1]) + for i in range(2, 5): + tgt = np.linspace(0, 3, i) + res = lag.lagroots(lag.lagfromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_lagtrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, lag.lagtrim, coef, -1) + + # Test results + assert_equal(lag.lagtrim(coef), coef[:-1]) + assert_equal(lag.lagtrim(coef, 1), coef[:-3]) + assert_equal(lag.lagtrim(coef, 2), [0]) + + def test_lagline(self): + assert_equal(lag.lagline(3, 4), [7, -4]) + + def test_lag2poly(self): + for i in range(7): + assert_almost_equal(lag.lag2poly([0]*i + [1]), Llist[i]) + + def test_poly2lag(self): + for i in range(7): + assert_almost_equal(lag.poly2lag(Llist[i]), [0]*i + [1]) + + def test_weight(self): + x = np.linspace(0, 10, 11) + tgt = np.exp(-x) + res = lag.lagweight(x) + assert_almost_equal(res, tgt) diff --git a/contrib/python/numpy/py2/numpy/polynomial/tests/test_legendre.py b/contrib/python/numpy/py2/numpy/polynomial/tests/test_legendre.py new file mode 100644 index 0000000000..a23086d594 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/polynomial/tests/test_legendre.py @@ -0,0 +1,558 @@ +"""Tests for legendre module. + +""" +from __future__ import division, absolute_import, print_function + +from functools import reduce + +import numpy as np +import numpy.polynomial.legendre as leg +from numpy.polynomial.polynomial import polyval +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + +L0 = np.array([1]) +L1 = np.array([0, 1]) +L2 = np.array([-1, 0, 3])/2 +L3 = np.array([0, -3, 0, 5])/2 +L4 = np.array([3, 0, -30, 0, 35])/8 +L5 = np.array([0, 15, 0, -70, 0, 63])/8 +L6 = np.array([-5, 0, 105, 0, -315, 0, 231])/16 +L7 = np.array([0, -35, 0, 315, 0, -693, 0, 429])/16 +L8 = np.array([35, 0, -1260, 0, 6930, 0, -12012, 0, 6435])/128 +L9 = np.array([0, 315, 0, -4620, 0, 18018, 0, -25740, 0, 12155])/128 + +Llist = [L0, L1, L2, L3, L4, L5, L6, L7, L8, L9] + + +def trim(x): + return leg.legtrim(x, tol=1e-6) + + +class TestConstants(object): + + def test_legdomain(self): + assert_equal(leg.legdomain, [-1, 1]) + + def test_legzero(self): + assert_equal(leg.legzero, [0]) + + def test_legone(self): + assert_equal(leg.legone, [1]) + + def test_legx(self): + assert_equal(leg.legx, [0, 1]) + + +class TestArithmetic(object): + x = np.linspace(-1, 1, 100) + + def test_legadd(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = leg.legadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_legsub(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = leg.legsub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_legmulx(self): + assert_equal(leg.legmulx([0]), [0]) + assert_equal(leg.legmulx([1]), [0, 1]) + for i in range(1, 5): + tmp = 2*i + 1 + ser = [0]*i + [1] + tgt = [0]*(i - 1) + [i/tmp, 0, (i + 1)/tmp] + assert_equal(leg.legmulx(ser), tgt) + + def test_legmul(self): + # check values of result + for i in range(5): + pol1 = [0]*i + [1] + val1 = leg.legval(self.x, pol1) + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + pol2 = [0]*j + [1] + val2 = leg.legval(self.x, pol2) + pol3 = leg.legmul(pol1, pol2) + val3 = leg.legval(self.x, pol3) + assert_(len(pol3) == i + j + 1, msg) + assert_almost_equal(val3, val1*val2, err_msg=msg) + + def test_legdiv(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + ci = [0]*i + [1] + cj = [0]*j + [1] + tgt = leg.legadd(ci, cj) + quo, rem = leg.legdiv(tgt, ci) + res = leg.legadd(leg.legmul(quo, ci), rem) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_legpow(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + c = np.arange(i + 1) + tgt = reduce(leg.legmul, [c]*j, np.array([1])) + res = leg.legpow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation(object): + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([2., 2., 2.]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = polyval(x, [1., 2., 3.]) + + def test_legval(self): + #check empty input + assert_equal(leg.legval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [polyval(x, c) for c in Llist] + for i in range(10): + msg = "At i=%d" % i + tgt = y[i] + res = leg.legval(x, [0]*i + [1]) + assert_almost_equal(res, tgt, err_msg=msg) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(leg.legval(x, [1]).shape, dims) + assert_equal(leg.legval(x, [1, 0]).shape, dims) + assert_equal(leg.legval(x, [1, 0, 0]).shape, dims) + + def test_legval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, leg.legval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = leg.legval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = leg.legval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_legval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, leg.legval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = leg.legval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = leg.legval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_leggrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = leg.leggrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = leg.leggrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_leggrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = leg.leggrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = leg.leggrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral(object): + + def test_legint(self): + # check exceptions + assert_raises(ValueError, leg.legint, [0], .5) + assert_raises(ValueError, leg.legint, [0], -1) + assert_raises(ValueError, leg.legint, [0], 1, [0, 0]) + assert_raises(ValueError, leg.legint, [0], lbnd=[0]) + assert_raises(ValueError, leg.legint, [0], scl=[0]) + assert_raises(ValueError, leg.legint, [0], axis=.5) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = leg.legint([0], m=i, k=k) + assert_almost_equal(res, [0, 1]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + legpol = leg.poly2leg(pol) + legint = leg.legint(legpol, m=1, k=[i]) + res = leg.leg2poly(legint) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + legpol = leg.poly2leg(pol) + legint = leg.legint(legpol, m=1, k=[i], lbnd=-1) + assert_almost_equal(leg.legval(-1, legint), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + legpol = leg.poly2leg(pol) + legint = leg.legint(legpol, m=1, k=[i], scl=2) + res = leg.leg2poly(legint) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = leg.legint(tgt, m=1) + res = leg.legint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = leg.legint(tgt, m=1, k=[k]) + res = leg.legint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = leg.legint(tgt, m=1, k=[k], lbnd=-1) + res = leg.legint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = leg.legint(tgt, m=1, k=[k], scl=2) + res = leg.legint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_legint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([leg.legint(c) for c in c2d.T]).T + res = leg.legint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([leg.legint(c) for c in c2d]) + res = leg.legint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([leg.legint(c, k=3) for c in c2d]) + res = leg.legint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + +class TestDerivative(object): + + def test_legder(self): + # check exceptions + assert_raises(ValueError, leg.legder, [0], .5) + assert_raises(ValueError, leg.legder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = leg.legder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = leg.legder(leg.legint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = leg.legder(leg.legint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_legder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([leg.legder(c) for c in c2d.T]).T + res = leg.legder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([leg.legder(c) for c in c2d]) + res = leg.legder(c2d, axis=1) + assert_almost_equal(res, tgt) + + +class TestVander(object): + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_legvander(self): + # check for 1d x + x = np.arange(3) + v = leg.legvander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], leg.legval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = leg.legvander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], leg.legval(x, coef)) + + def test_legvander2d(self): + # also tests polyval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = leg.legvander2d(x1, x2, [1, 2]) + tgt = leg.legval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = leg.legvander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_legvander3d(self): + # also tests polyval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = leg.legvander3d(x1, x2, x3, [1, 2, 3]) + tgt = leg.legval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = leg.legvander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + +class TestFitting(object): + + def test_legfit(self): + def f(x): + return x*(x - 1)*(x - 2) + + def f2(x): + return x**4 + x**2 + 1 + + # Test exceptions + assert_raises(ValueError, leg.legfit, [1], [1], -1) + assert_raises(TypeError, leg.legfit, [[1]], [1], 0) + assert_raises(TypeError, leg.legfit, [], [1], 0) + assert_raises(TypeError, leg.legfit, [1], [[[1]]], 0) + assert_raises(TypeError, leg.legfit, [1, 2], [1], 0) + assert_raises(TypeError, leg.legfit, [1], [1, 2], 0) + assert_raises(TypeError, leg.legfit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, leg.legfit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, leg.legfit, [1], [1], [-1,]) + assert_raises(ValueError, leg.legfit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, leg.legfit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = leg.legfit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(leg.legval(x, coef3), y) + coef3 = leg.legfit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(leg.legval(x, coef3), y) + # + coef4 = leg.legfit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(leg.legval(x, coef4), y) + coef4 = leg.legfit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(leg.legval(x, coef4), y) + # check things still work if deg is not in strict increasing + coef4 = leg.legfit(x, y, [2, 3, 4, 1, 0]) + assert_equal(len(coef4), 5) + assert_almost_equal(leg.legval(x, coef4), y) + # + coef2d = leg.legfit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = leg.legfit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + y[0::2] = 0 + wcoef3 = leg.legfit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = leg.legfit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = leg.legfit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = leg.legfit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(leg.legfit(x, x, 1), [0, 1]) + assert_almost_equal(leg.legfit(x, x, [0, 1]), [0, 1]) + # test fitting only even Legendre polynomials + x = np.linspace(-1, 1) + y = f2(x) + coef1 = leg.legfit(x, y, 4) + assert_almost_equal(leg.legval(x, coef1), y) + coef2 = leg.legfit(x, y, [0, 2, 4]) + assert_almost_equal(leg.legval(x, coef2), y) + assert_almost_equal(coef1, coef2) + + +class TestCompanion(object): + + def test_raises(self): + assert_raises(ValueError, leg.legcompanion, []) + assert_raises(ValueError, leg.legcompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(leg.legcompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(leg.legcompanion([1, 2])[0, 0] == -.5) + + +class TestGauss(object): + + def test_100(self): + x, w = leg.leggauss(100) + + # test orthogonality. Note that the results need to be normalized, + # otherwise the huge values that can arise from fast growing + # functions like Laguerre can be very confusing. + v = leg.legvander(x, 99) + vv = np.dot(v.T * w, v) + vd = 1/np.sqrt(vv.diagonal()) + vv = vd[:, None] * vv * vd + assert_almost_equal(vv, np.eye(100)) + + # check that the integral of 1 is correct + tgt = 2.0 + assert_almost_equal(w.sum(), tgt) + + +class TestMisc(object): + + def test_legfromroots(self): + res = leg.legfromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + pol = leg.legfromroots(roots) + res = leg.legval(roots, pol) + tgt = 0 + assert_(len(pol) == i + 1) + assert_almost_equal(leg.leg2poly(pol)[-1], 1) + assert_almost_equal(res, tgt) + + def test_legroots(self): + assert_almost_equal(leg.legroots([1]), []) + assert_almost_equal(leg.legroots([1, 2]), [-.5]) + for i in range(2, 5): + tgt = np.linspace(-1, 1, i) + res = leg.legroots(leg.legfromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_legtrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, leg.legtrim, coef, -1) + + # Test results + assert_equal(leg.legtrim(coef), coef[:-1]) + assert_equal(leg.legtrim(coef, 1), coef[:-3]) + assert_equal(leg.legtrim(coef, 2), [0]) + + def test_legline(self): + assert_equal(leg.legline(3, 4), [3, 4]) + + def test_leg2poly(self): + for i in range(10): + assert_almost_equal(leg.leg2poly([0]*i + [1]), Llist[i]) + + def test_poly2leg(self): + for i in range(10): + assert_almost_equal(leg.poly2leg(Llist[i]), [0]*i + [1]) + + def test_weight(self): + x = np.linspace(-1, 1, 11) + tgt = 1. + res = leg.legweight(x) + assert_almost_equal(res, tgt) diff --git a/contrib/python/numpy/py2/numpy/polynomial/tests/test_polynomial.py b/contrib/python/numpy/py2/numpy/polynomial/tests/test_polynomial.py new file mode 100644 index 0000000000..0c93be278f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/polynomial/tests/test_polynomial.py @@ -0,0 +1,578 @@ +"""Tests for polynomial module. + +""" +from __future__ import division, absolute_import, print_function + +from functools import reduce + +import numpy as np +import numpy.polynomial.polynomial as poly +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + + +def trim(x): + return poly.polytrim(x, tol=1e-6) + +T0 = [1] +T1 = [0, 1] +T2 = [-1, 0, 2] +T3 = [0, -3, 0, 4] +T4 = [1, 0, -8, 0, 8] +T5 = [0, 5, 0, -20, 0, 16] +T6 = [-1, 0, 18, 0, -48, 0, 32] +T7 = [0, -7, 0, 56, 0, -112, 0, 64] +T8 = [1, 0, -32, 0, 160, 0, -256, 0, 128] +T9 = [0, 9, 0, -120, 0, 432, 0, -576, 0, 256] + +Tlist = [T0, T1, T2, T3, T4, T5, T6, T7, T8, T9] + + +class TestConstants(object): + + def test_polydomain(self): + assert_equal(poly.polydomain, [-1, 1]) + + def test_polyzero(self): + assert_equal(poly.polyzero, [0]) + + def test_polyone(self): + assert_equal(poly.polyone, [1]) + + def test_polyx(self): + assert_equal(poly.polyx, [0, 1]) + + +class TestArithmetic(object): + + def test_polyadd(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] += 1 + res = poly.polyadd([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_polysub(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(max(i, j) + 1) + tgt[i] += 1 + tgt[j] -= 1 + res = poly.polysub([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_polymulx(self): + assert_equal(poly.polymulx([0]), [0]) + assert_equal(poly.polymulx([1]), [0, 1]) + for i in range(1, 5): + ser = [0]*i + [1] + tgt = [0]*(i + 1) + [1] + assert_equal(poly.polymulx(ser), tgt) + + def test_polymul(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + tgt = np.zeros(i + j + 1) + tgt[i + j] += 1 + res = poly.polymul([0]*i + [1], [0]*j + [1]) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + def test_polydiv(self): + # check zero division + assert_raises(ZeroDivisionError, poly.polydiv, [1], [0]) + + # check scalar division + quo, rem = poly.polydiv([2], [2]) + assert_equal((quo, rem), (1, 0)) + quo, rem = poly.polydiv([2, 2], [2]) + assert_equal((quo, rem), ((1, 1), 0)) + + # check rest. + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + ci = [0]*i + [1, 2] + cj = [0]*j + [1, 2] + tgt = poly.polyadd(ci, cj) + quo, rem = poly.polydiv(tgt, ci) + res = poly.polyadd(poly.polymul(quo, ci), rem) + assert_equal(res, tgt, err_msg=msg) + + def test_polypow(self): + for i in range(5): + for j in range(5): + msg = "At i=%d, j=%d" % (i, j) + c = np.arange(i + 1) + tgt = reduce(poly.polymul, [c]*j, np.array([1])) + res = poly.polypow(c, j) + assert_equal(trim(res), trim(tgt), err_msg=msg) + + +class TestEvaluation(object): + # coefficients of 1 + 2*x + 3*x**2 + c1d = np.array([1., 2., 3.]) + c2d = np.einsum('i,j->ij', c1d, c1d) + c3d = np.einsum('i,j,k->ijk', c1d, c1d, c1d) + + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + y = poly.polyval(x, [1., 2., 3.]) + + def test_polyval(self): + #check empty input + assert_equal(poly.polyval([], [1]).size, 0) + + #check normal input) + x = np.linspace(-1, 1) + y = [x**i for i in range(5)] + for i in range(5): + tgt = y[i] + res = poly.polyval(x, [0]*i + [1]) + assert_almost_equal(res, tgt) + tgt = x*(x**2 - 1) + res = poly.polyval(x, [0, -1, 0, 1]) + assert_almost_equal(res, tgt) + + #check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(poly.polyval(x, [1]).shape, dims) + assert_equal(poly.polyval(x, [1, 0]).shape, dims) + assert_equal(poly.polyval(x, [1, 0, 0]).shape, dims) + + def test_polyvalfromroots(self): + # check exception for broadcasting x values over root array with + # too few dimensions + assert_raises(ValueError, poly.polyvalfromroots, + [1], [1], tensor=False) + + # check empty input + assert_equal(poly.polyvalfromroots([], [1]).size, 0) + assert_(poly.polyvalfromroots([], [1]).shape == (0,)) + + # check empty input + multidimensional roots + assert_equal(poly.polyvalfromroots([], [[1] * 5]).size, 0) + assert_(poly.polyvalfromroots([], [[1] * 5]).shape == (5, 0)) + + # check scalar input + assert_equal(poly.polyvalfromroots(1, 1), 0) + assert_(poly.polyvalfromroots(1, np.ones((3, 3))).shape == (3,)) + + # check normal input) + x = np.linspace(-1, 1) + y = [x**i for i in range(5)] + for i in range(1, 5): + tgt = y[i] + res = poly.polyvalfromroots(x, [0]*i) + assert_almost_equal(res, tgt) + tgt = x*(x - 1)*(x + 1) + res = poly.polyvalfromroots(x, [-1, 0, 1]) + assert_almost_equal(res, tgt) + + # check that shape is preserved + for i in range(3): + dims = [2]*i + x = np.zeros(dims) + assert_equal(poly.polyvalfromroots(x, [1]).shape, dims) + assert_equal(poly.polyvalfromroots(x, [1, 0]).shape, dims) + assert_equal(poly.polyvalfromroots(x, [1, 0, 0]).shape, dims) + + # check compatibility with factorization + ptest = [15, 2, -16, -2, 1] + r = poly.polyroots(ptest) + x = np.linspace(-1, 1) + assert_almost_equal(poly.polyval(x, ptest), + poly.polyvalfromroots(x, r)) + + # check multidimensional arrays of roots and values + # check tensor=False + rshape = (3, 5) + x = np.arange(-3, 2) + r = np.random.randint(-5, 5, size=rshape) + res = poly.polyvalfromroots(x, r, tensor=False) + tgt = np.empty(r.shape[1:]) + for ii in range(tgt.size): + tgt[ii] = poly.polyvalfromroots(x[ii], r[:, ii]) + assert_equal(res, tgt) + + # check tensor=True + x = np.vstack([x, 2*x]) + res = poly.polyvalfromroots(x, r, tensor=True) + tgt = np.empty(r.shape[1:] + x.shape) + for ii in range(r.shape[1]): + for jj in range(x.shape[0]): + tgt[ii, jj, :] = poly.polyvalfromroots(x[jj], r[:, ii]) + assert_equal(res, tgt) + + def test_polyval2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, poly.polyval2d, x1, x2[:2], self.c2d) + + #test values + tgt = y1*y2 + res = poly.polyval2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = poly.polyval2d(z, z, self.c2d) + assert_(res.shape == (2, 3)) + + def test_polyval3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test exceptions + assert_raises(ValueError, poly.polyval3d, x1, x2, x3[:2], self.c3d) + + #test values + tgt = y1*y2*y3 + res = poly.polyval3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = poly.polyval3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)) + + def test_polygrid2d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j->ij', y1, y2) + res = poly.polygrid2d(x1, x2, self.c2d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = poly.polygrid2d(z, z, self.c2d) + assert_(res.shape == (2, 3)*2) + + def test_polygrid3d(self): + x1, x2, x3 = self.x + y1, y2, y3 = self.y + + #test values + tgt = np.einsum('i,j,k->ijk', y1, y2, y3) + res = poly.polygrid3d(x1, x2, x3, self.c3d) + assert_almost_equal(res, tgt) + + #test shape + z = np.ones((2, 3)) + res = poly.polygrid3d(z, z, z, self.c3d) + assert_(res.shape == (2, 3)*3) + + +class TestIntegral(object): + + def test_polyint(self): + # check exceptions + assert_raises(ValueError, poly.polyint, [0], .5) + assert_raises(ValueError, poly.polyint, [0], -1) + assert_raises(ValueError, poly.polyint, [0], 1, [0, 0]) + assert_raises(ValueError, poly.polyint, [0], lbnd=[0]) + assert_raises(ValueError, poly.polyint, [0], scl=[0]) + assert_raises(ValueError, poly.polyint, [0], axis=.5) + + # test integration of zero polynomial + for i in range(2, 5): + k = [0]*(i - 2) + [1] + res = poly.polyint([0], m=i, k=k) + assert_almost_equal(res, [0, 1]) + + # check single integration with integration constant + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [1/scl] + res = poly.polyint(pol, m=1, k=[i]) + assert_almost_equal(trim(res), trim(tgt)) + + # check single integration with integration constant and lbnd + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + res = poly.polyint(pol, m=1, k=[i], lbnd=-1) + assert_almost_equal(poly.polyval(-1, res), i) + + # check single integration with integration constant and scaling + for i in range(5): + scl = i + 1 + pol = [0]*i + [1] + tgt = [i] + [0]*i + [2/scl] + res = poly.polyint(pol, m=1, k=[i], scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with default k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = poly.polyint(tgt, m=1) + res = poly.polyint(pol, m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with defined k + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = poly.polyint(tgt, m=1, k=[k]) + res = poly.polyint(pol, m=j, k=list(range(j))) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with lbnd + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = poly.polyint(tgt, m=1, k=[k], lbnd=-1) + res = poly.polyint(pol, m=j, k=list(range(j)), lbnd=-1) + assert_almost_equal(trim(res), trim(tgt)) + + # check multiple integrations with scaling + for i in range(5): + for j in range(2, 5): + pol = [0]*i + [1] + tgt = pol[:] + for k in range(j): + tgt = poly.polyint(tgt, m=1, k=[k], scl=2) + res = poly.polyint(pol, m=j, k=list(range(j)), scl=2) + assert_almost_equal(trim(res), trim(tgt)) + + def test_polyint_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([poly.polyint(c) for c in c2d.T]).T + res = poly.polyint(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([poly.polyint(c) for c in c2d]) + res = poly.polyint(c2d, axis=1) + assert_almost_equal(res, tgt) + + tgt = np.vstack([poly.polyint(c, k=3) for c in c2d]) + res = poly.polyint(c2d, k=3, axis=1) + assert_almost_equal(res, tgt) + + +class TestDerivative(object): + + def test_polyder(self): + # check exceptions + assert_raises(ValueError, poly.polyder, [0], .5) + assert_raises(ValueError, poly.polyder, [0], -1) + + # check that zeroth derivative does nothing + for i in range(5): + tgt = [0]*i + [1] + res = poly.polyder(tgt, m=0) + assert_equal(trim(res), trim(tgt)) + + # check that derivation is the inverse of integration + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = poly.polyder(poly.polyint(tgt, m=j), m=j) + assert_almost_equal(trim(res), trim(tgt)) + + # check derivation with scaling + for i in range(5): + for j in range(2, 5): + tgt = [0]*i + [1] + res = poly.polyder(poly.polyint(tgt, m=j, scl=2), m=j, scl=.5) + assert_almost_equal(trim(res), trim(tgt)) + + def test_polyder_axis(self): + # check that axis keyword works + c2d = np.random.random((3, 4)) + + tgt = np.vstack([poly.polyder(c) for c in c2d.T]).T + res = poly.polyder(c2d, axis=0) + assert_almost_equal(res, tgt) + + tgt = np.vstack([poly.polyder(c) for c in c2d]) + res = poly.polyder(c2d, axis=1) + assert_almost_equal(res, tgt) + + +class TestVander(object): + # some random values in [-1, 1) + x = np.random.random((3, 5))*2 - 1 + + def test_polyvander(self): + # check for 1d x + x = np.arange(3) + v = poly.polyvander(x, 3) + assert_(v.shape == (3, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], poly.polyval(x, coef)) + + # check for 2d x + x = np.array([[1, 2], [3, 4], [5, 6]]) + v = poly.polyvander(x, 3) + assert_(v.shape == (3, 2, 4)) + for i in range(4): + coef = [0]*i + [1] + assert_almost_equal(v[..., i], poly.polyval(x, coef)) + + def test_polyvander2d(self): + # also tests polyval2d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3)) + van = poly.polyvander2d(x1, x2, [1, 2]) + tgt = poly.polyval2d(x1, x2, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = poly.polyvander2d([x1], [x2], [1, 2]) + assert_(van.shape == (1, 5, 6)) + + def test_polyvander3d(self): + # also tests polyval3d for non-square coefficient array + x1, x2, x3 = self.x + c = np.random.random((2, 3, 4)) + van = poly.polyvander3d(x1, x2, x3, [1, 2, 3]) + tgt = poly.polyval3d(x1, x2, x3, c) + res = np.dot(van, c.flat) + assert_almost_equal(res, tgt) + + # check shape + van = poly.polyvander3d([x1], [x2], [x3], [1, 2, 3]) + assert_(van.shape == (1, 5, 24)) + + +class TestCompanion(object): + + def test_raises(self): + assert_raises(ValueError, poly.polycompanion, []) + assert_raises(ValueError, poly.polycompanion, [1]) + + def test_dimensions(self): + for i in range(1, 5): + coef = [0]*i + [1] + assert_(poly.polycompanion(coef).shape == (i, i)) + + def test_linear_root(self): + assert_(poly.polycompanion([1, 2])[0, 0] == -.5) + + +class TestMisc(object): + + def test_polyfromroots(self): + res = poly.polyfromroots([]) + assert_almost_equal(trim(res), [1]) + for i in range(1, 5): + roots = np.cos(np.linspace(-np.pi, 0, 2*i + 1)[1::2]) + tgt = Tlist[i] + res = poly.polyfromroots(roots)*2**(i-1) + assert_almost_equal(trim(res), trim(tgt)) + + def test_polyroots(self): + assert_almost_equal(poly.polyroots([1]), []) + assert_almost_equal(poly.polyroots([1, 2]), [-.5]) + for i in range(2, 5): + tgt = np.linspace(-1, 1, i) + res = poly.polyroots(poly.polyfromroots(tgt)) + assert_almost_equal(trim(res), trim(tgt)) + + def test_polyfit(self): + def f(x): + return x*(x - 1)*(x - 2) + + def f2(x): + return x**4 + x**2 + 1 + + # Test exceptions + assert_raises(ValueError, poly.polyfit, [1], [1], -1) + assert_raises(TypeError, poly.polyfit, [[1]], [1], 0) + assert_raises(TypeError, poly.polyfit, [], [1], 0) + assert_raises(TypeError, poly.polyfit, [1], [[[1]]], 0) + assert_raises(TypeError, poly.polyfit, [1, 2], [1], 0) + assert_raises(TypeError, poly.polyfit, [1], [1, 2], 0) + assert_raises(TypeError, poly.polyfit, [1], [1], 0, w=[[1]]) + assert_raises(TypeError, poly.polyfit, [1], [1], 0, w=[1, 1]) + assert_raises(ValueError, poly.polyfit, [1], [1], [-1,]) + assert_raises(ValueError, poly.polyfit, [1], [1], [2, -1, 6]) + assert_raises(TypeError, poly.polyfit, [1], [1], []) + + # Test fit + x = np.linspace(0, 2) + y = f(x) + # + coef3 = poly.polyfit(x, y, 3) + assert_equal(len(coef3), 4) + assert_almost_equal(poly.polyval(x, coef3), y) + coef3 = poly.polyfit(x, y, [0, 1, 2, 3]) + assert_equal(len(coef3), 4) + assert_almost_equal(poly.polyval(x, coef3), y) + # + coef4 = poly.polyfit(x, y, 4) + assert_equal(len(coef4), 5) + assert_almost_equal(poly.polyval(x, coef4), y) + coef4 = poly.polyfit(x, y, [0, 1, 2, 3, 4]) + assert_equal(len(coef4), 5) + assert_almost_equal(poly.polyval(x, coef4), y) + # + coef2d = poly.polyfit(x, np.array([y, y]).T, 3) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + coef2d = poly.polyfit(x, np.array([y, y]).T, [0, 1, 2, 3]) + assert_almost_equal(coef2d, np.array([coef3, coef3]).T) + # test weighting + w = np.zeros_like(x) + yw = y.copy() + w[1::2] = 1 + yw[0::2] = 0 + wcoef3 = poly.polyfit(x, yw, 3, w=w) + assert_almost_equal(wcoef3, coef3) + wcoef3 = poly.polyfit(x, yw, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef3, coef3) + # + wcoef2d = poly.polyfit(x, np.array([yw, yw]).T, 3, w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + wcoef2d = poly.polyfit(x, np.array([yw, yw]).T, [0, 1, 2, 3], w=w) + assert_almost_equal(wcoef2d, np.array([coef3, coef3]).T) + # test scaling with complex values x points whose square + # is zero when summed. + x = [1, 1j, -1, -1j] + assert_almost_equal(poly.polyfit(x, x, 1), [0, 1]) + assert_almost_equal(poly.polyfit(x, x, [0, 1]), [0, 1]) + # test fitting only even Polyendre polynomials + x = np.linspace(-1, 1) + y = f2(x) + coef1 = poly.polyfit(x, y, 4) + assert_almost_equal(poly.polyval(x, coef1), y) + coef2 = poly.polyfit(x, y, [0, 2, 4]) + assert_almost_equal(poly.polyval(x, coef2), y) + assert_almost_equal(coef1, coef2) + + def test_polytrim(self): + coef = [2, -1, 1, 0] + + # Test exceptions + assert_raises(ValueError, poly.polytrim, coef, -1) + + # Test results + assert_equal(poly.polytrim(coef), coef[:-1]) + assert_equal(poly.polytrim(coef, 1), coef[:-3]) + assert_equal(poly.polytrim(coef, 2), [0]) + + def test_polyline(self): + assert_equal(poly.polyline(3, 4), [3, 4]) diff --git a/contrib/python/numpy/py2/numpy/polynomial/tests/test_polyutils.py b/contrib/python/numpy/py2/numpy/polynomial/tests/test_polyutils.py new file mode 100644 index 0000000000..801c558ccc --- /dev/null +++ b/contrib/python/numpy/py2/numpy/polynomial/tests/test_polyutils.py @@ -0,0 +1,108 @@ +"""Tests for polyutils module. + +""" +from __future__ import division, absolute_import, print_function + +import numpy as np +import numpy.polynomial.polyutils as pu +from numpy.testing import ( + assert_almost_equal, assert_raises, assert_equal, assert_, + ) + + +class TestMisc(object): + + def test_trimseq(self): + for i in range(5): + tgt = [1] + res = pu.trimseq([1] + [0]*5) + assert_equal(res, tgt) + + def test_as_series(self): + # check exceptions + assert_raises(ValueError, pu.as_series, [[]]) + assert_raises(ValueError, pu.as_series, [[[1, 2]]]) + assert_raises(ValueError, pu.as_series, [[1], ['a']]) + # check common types + types = ['i', 'd', 'O'] + for i in range(len(types)): + for j in range(i): + ci = np.ones(1, types[i]) + cj = np.ones(1, types[j]) + [resi, resj] = pu.as_series([ci, cj]) + assert_(resi.dtype.char == resj.dtype.char) + assert_(resj.dtype.char == types[i]) + + def test_trimcoef(self): + coef = [2, -1, 1, 0] + # Test exceptions + assert_raises(ValueError, pu.trimcoef, coef, -1) + # Test results + assert_equal(pu.trimcoef(coef), coef[:-1]) + assert_equal(pu.trimcoef(coef, 1), coef[:-3]) + assert_equal(pu.trimcoef(coef, 2), [0]) + + +class TestDomain(object): + + def test_getdomain(self): + # test for real values + x = [1, 10, 3, -1] + tgt = [-1, 10] + res = pu.getdomain(x) + assert_almost_equal(res, tgt) + + # test for complex values + x = [1 + 1j, 1 - 1j, 0, 2] + tgt = [-1j, 2 + 1j] + res = pu.getdomain(x) + assert_almost_equal(res, tgt) + + def test_mapdomain(self): + # test for real values + dom1 = [0, 4] + dom2 = [1, 3] + tgt = dom2 + res = pu.mapdomain(dom1, dom1, dom2) + assert_almost_equal(res, tgt) + + # test for complex values + dom1 = [0 - 1j, 2 + 1j] + dom2 = [-2, 2] + tgt = dom2 + x = dom1 + res = pu.mapdomain(x, dom1, dom2) + assert_almost_equal(res, tgt) + + # test for multidimensional arrays + dom1 = [0, 4] + dom2 = [1, 3] + tgt = np.array([dom2, dom2]) + x = np.array([dom1, dom1]) + res = pu.mapdomain(x, dom1, dom2) + assert_almost_equal(res, tgt) + + # test that subtypes are preserved. + class MyNDArray(np.ndarray): + pass + + dom1 = [0, 4] + dom2 = [1, 3] + x = np.array([dom1, dom1]).view(MyNDArray) + res = pu.mapdomain(x, dom1, dom2) + assert_(isinstance(res, MyNDArray)) + + def test_mapparms(self): + # test for real values + dom1 = [0, 4] + dom2 = [1, 3] + tgt = [1, .5] + res = pu. mapparms(dom1, dom2) + assert_almost_equal(res, tgt) + + # test for complex values + dom1 = [0 - 1j, 2 + 1j] + dom2 = [-2, 2] + tgt = [-1 + 1j, 1 - 1j] + res = pu.mapparms(dom1, dom2) + assert_almost_equal(res, tgt) diff --git a/contrib/python/numpy/py2/numpy/polynomial/tests/test_printing.py b/contrib/python/numpy/py2/numpy/polynomial/tests/test_printing.py new file mode 100644 index 0000000000..3f12364022 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/polynomial/tests/test_printing.py @@ -0,0 +1,68 @@ +from __future__ import division, absolute_import, print_function + +import numpy.polynomial as poly +from numpy.testing import assert_equal + + +class TestStr(object): + def test_polynomial_str(self): + res = str(poly.Polynomial([0, 1])) + tgt = 'poly([0. 1.])' + assert_equal(res, tgt) + + def test_chebyshev_str(self): + res = str(poly.Chebyshev([0, 1])) + tgt = 'cheb([0. 1.])' + assert_equal(res, tgt) + + def test_legendre_str(self): + res = str(poly.Legendre([0, 1])) + tgt = 'leg([0. 1.])' + assert_equal(res, tgt) + + def test_hermite_str(self): + res = str(poly.Hermite([0, 1])) + tgt = 'herm([0. 1.])' + assert_equal(res, tgt) + + def test_hermiteE_str(self): + res = str(poly.HermiteE([0, 1])) + tgt = 'herme([0. 1.])' + assert_equal(res, tgt) + + def test_laguerre_str(self): + res = str(poly.Laguerre([0, 1])) + tgt = 'lag([0. 1.])' + assert_equal(res, tgt) + + +class TestRepr(object): + def test_polynomial_str(self): + res = repr(poly.Polynomial([0, 1])) + tgt = 'Polynomial([0., 1.], domain=[-1, 1], window=[-1, 1])' + assert_equal(res, tgt) + + def test_chebyshev_str(self): + res = repr(poly.Chebyshev([0, 1])) + tgt = 'Chebyshev([0., 1.], domain=[-1, 1], window=[-1, 1])' + assert_equal(res, tgt) + + def test_legendre_repr(self): + res = repr(poly.Legendre([0, 1])) + tgt = 'Legendre([0., 1.], domain=[-1, 1], window=[-1, 1])' + assert_equal(res, tgt) + + def test_hermite_repr(self): + res = repr(poly.Hermite([0, 1])) + tgt = 'Hermite([0., 1.], domain=[-1, 1], window=[-1, 1])' + assert_equal(res, tgt) + + def test_hermiteE_repr(self): + res = repr(poly.HermiteE([0, 1])) + tgt = 'HermiteE([0., 1.], domain=[-1, 1], window=[-1, 1])' + assert_equal(res, tgt) + + def test_laguerre_repr(self): + res = repr(poly.Laguerre([0, 1])) + tgt = 'Laguerre([0., 1.], domain=[0, 1], window=[0, 1])' + assert_equal(res, tgt) diff --git a/contrib/python/numpy/py2/numpy/random/mtrand/generate_mtrand_c.py b/contrib/python/numpy/py2/numpy/random/mtrand/generate_mtrand_c.py new file mode 100644 index 0000000000..ec935e6ddf --- /dev/null +++ b/contrib/python/numpy/py2/numpy/random/mtrand/generate_mtrand_c.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +from __future__ import division, absolute_import, print_function + +import sys +import re +import os + +unused_internal_funcs = ['__Pyx_PrintItem', + '__Pyx_PrintNewline', + '__Pyx_ReRaise', + #'__Pyx_GetExcValue', + '__Pyx_ArgTypeTest', + '__Pyx_SetVtable', + '__Pyx_GetVtable', + '__Pyx_CreateClass'] + +if __name__ == '__main__': + # Use cython here so that long docstrings are broken up. + # This is needed for some VC++ compilers. + os.system('cython mtrand.pyx') + mtrand_c = open('mtrand.c', 'r') + processed = open('mtrand_pp.c', 'w') + unused_funcs_str = '(' + '|'.join(unused_internal_funcs) + ')' + uifpat = re.compile(r'static \w+ \*?'+unused_funcs_str+r'.*/\*proto\*/') + linepat = re.compile(r'/\* ".*/mtrand.pyx":') + for linenum, line in enumerate(mtrand_c): + m = re.match(r'^(\s+arrayObject\w*\s*=\s*[(])[(]PyObject\s*[*][)]', + line) + if m: + line = '%s(PyArrayObject *)%s' % (m.group(1), line[m.end():]) + m = uifpat.match(line) + if m: + line = '' + m = re.search(unused_funcs_str, line) + if m: + print("%s was declared unused, but is used at line %d" % (m.group(), + linenum+1), file=sys.stderr) + line = linepat.sub(r'/* "mtrand.pyx":', line) + processed.write(line) + mtrand_c.close() + processed.close() + os.rename('mtrand_pp.c', 'mtrand.c') diff --git a/contrib/python/numpy/py2/numpy/random/mtrand/randint_helpers.pxi.in b/contrib/python/numpy/py2/numpy/random/mtrand/randint_helpers.pxi.in new file mode 100644 index 0000000000..894a25167f --- /dev/null +++ b/contrib/python/numpy/py2/numpy/random/mtrand/randint_helpers.pxi.in @@ -0,0 +1,77 @@ +""" +Template for each `dtype` helper function in `np.random.randint`. +""" + +{{py: + +dtypes = ( + ('bool', 'bool', 'bool_'), + ('int8', 'uint8', 'int8'), + ('int16', 'uint16', 'int16'), + ('int32', 'uint32', 'int32'), + ('int64', 'uint64', 'int64'), + ('uint8', 'uint8', 'uint8'), + ('uint16', 'uint16', 'uint16'), + ('uint32', 'uint32', 'uint32'), + ('uint64', 'uint64', 'uint64'), +) + +def get_dispatch(dtypes): + for npy_dt, npy_udt, np_dt in dtypes: + yield npy_dt, npy_udt, np_dt +}} + +{{for npy_dt, npy_udt, np_dt in get_dispatch(dtypes)}} + +def _rand_{{npy_dt}}(npy_{{npy_dt}} low, npy_{{npy_dt}} high, size, rngstate): + """ + _rand_{{npy_dt}}(low, high, size, rngstate) + + Return random np.{{np_dt}} integers between ``low`` and ``high``, inclusive. + + Return random integers from the "discrete uniform" distribution in the + closed interval [``low``, ``high``). On entry the arguments are presumed + to have been validated for size and order for the np.{{np_dt}} type. + + Parameters + ---------- + low : int + Lowest (signed) integer to be drawn from the distribution. + high : int + Highest (signed) integer to be drawn from the distribution. + size : int or tuple of ints + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. Default is None, in which case a + single value is returned. + rngstate : encapsulated pointer to rk_state + The specific type depends on the python version. In Python 2 it is + a PyCObject, in Python 3 a PyCapsule object. + + Returns + ------- + out : python integer or ndarray of np.{{np_dt}} + `size`-shaped array of random integers from the appropriate + distribution, or a single such random int if `size` not provided. + + """ + cdef npy_{{npy_udt}} off, rng, buf + cdef npy_{{npy_udt}} *out + cdef ndarray array "arrayObject" + cdef npy_intp cnt + cdef rk_state *state = <rk_state *>PyCapsule_GetPointer(rngstate, NULL) + + off = <npy_{{npy_udt}}>(low) + rng = <npy_{{npy_udt}}>(high) - <npy_{{npy_udt}}>(low) + + if size is None: + rk_random_{{npy_udt}}(off, rng, 1, &buf, state) + return np.{{np_dt}}(<npy_{{npy_dt}}>buf) + else: + array = <ndarray>np.empty(size, np.{{np_dt}}) + cnt = PyArray_SIZE(array) + array_data = <npy_{{npy_udt}} *>PyArray_DATA(array) + with nogil: + rk_random_{{npy_udt}}(off, rng, cnt, array_data, state) + return array + +{{endfor}} diff --git a/contrib/python/numpy/py2/numpy/random/setup.py b/contrib/python/numpy/py2/numpy/random/setup.py new file mode 100644 index 0000000000..394a70ead3 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/random/setup.py @@ -0,0 +1,63 @@ +from __future__ import division, print_function + +from os.path import join +import sys +from distutils.dep_util import newer +from distutils.msvccompiler import get_build_version as get_msvc_build_version + +def needs_mingw_ftime_workaround(): + # We need the mingw workaround for _ftime if the msvc runtime version is + # 7.1 or above and we build with mingw ... + # ... but we can't easily detect compiler version outside distutils command + # context, so we will need to detect in randomkit whether we build with gcc + msver = get_msvc_build_version() + if msver and msver >= 8: + return True + + return False + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration, get_mathlibs + config = Configuration('random', parent_package, top_path) + + def generate_libraries(ext, build_dir): + config_cmd = config.get_config_cmd() + libs = get_mathlibs() + if sys.platform == 'win32': + libs.append('Advapi32') + ext.libraries.extend(libs) + return None + + # enable unix large file support on 32 bit systems + # (64 bit off_t, lseek -> lseek64 etc.) + if sys.platform[:3] == "aix": + defs = [('_LARGE_FILES', None)] + else: + defs = [('_FILE_OFFSET_BITS', '64'), + ('_LARGEFILE_SOURCE', '1'), + ('_LARGEFILE64_SOURCE', '1')] + if needs_mingw_ftime_workaround(): + defs.append(("NPY_NEEDS_MINGW_TIME_WORKAROUND", None)) + + libs = [] + # Configure mtrand + config.add_extension('mtrand', + sources=[join('mtrand', x) for x in + ['mtrand.c', 'randomkit.c', 'initarray.c', + 'distributions.c']]+[generate_libraries], + libraries=libs, + depends=[join('mtrand', '*.h'), + join('mtrand', '*.pyx'), + join('mtrand', '*.pxi'),], + define_macros=defs, + ) + + config.add_data_files(('.', join('mtrand', 'randomkit.h'))) + config.add_data_dir('tests') + + return config + + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/contrib/python/numpy/py2/numpy/random/tests/__init__.py b/contrib/python/numpy/py2/numpy/random/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/random/tests/test_random.py b/contrib/python/numpy/py2/numpy/random/tests/test_random.py new file mode 100644 index 0000000000..4529b4fbd7 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/random/tests/test_random.py @@ -0,0 +1,1663 @@ +from __future__ import division, absolute_import, print_function +import warnings + +import numpy as np +from numpy.testing import ( + assert_, assert_raises, assert_equal, assert_warns, + assert_no_warnings, assert_array_equal, assert_array_almost_equal, + suppress_warnings + ) +from numpy import random +import sys + + +class TestSeed(object): + def test_scalar(self): + s = np.random.RandomState(0) + assert_equal(s.randint(1000), 684) + s = np.random.RandomState(4294967295) + assert_equal(s.randint(1000), 419) + + def test_array(self): + s = np.random.RandomState(range(10)) + assert_equal(s.randint(1000), 468) + s = np.random.RandomState(np.arange(10)) + assert_equal(s.randint(1000), 468) + s = np.random.RandomState([0]) + assert_equal(s.randint(1000), 973) + s = np.random.RandomState([4294967295]) + assert_equal(s.randint(1000), 265) + + def test_invalid_scalar(self): + # seed must be an unsigned 32 bit integer + assert_raises(TypeError, np.random.RandomState, -0.5) + assert_raises(ValueError, np.random.RandomState, -1) + + def test_invalid_array(self): + # seed must be an unsigned 32 bit integer + assert_raises(TypeError, np.random.RandomState, [-0.5]) + assert_raises(ValueError, np.random.RandomState, [-1]) + assert_raises(ValueError, np.random.RandomState, [4294967296]) + assert_raises(ValueError, np.random.RandomState, [1, 2, 4294967296]) + assert_raises(ValueError, np.random.RandomState, [1, -2, 4294967296]) + + def test_invalid_array_shape(self): + # gh-9832 + assert_raises(ValueError, np.random.RandomState, np.array([], dtype=np.int64)) + assert_raises(ValueError, np.random.RandomState, [[1, 2, 3]]) + assert_raises(ValueError, np.random.RandomState, [[1, 2, 3], + [4, 5, 6]]) + + +class TestBinomial(object): + def test_n_zero(self): + # Tests the corner case of n == 0 for the binomial distribution. + # binomial(0, p) should be zero for any p in [0, 1]. + # This test addresses issue #3480. + zeros = np.zeros(2, dtype='int') + for p in [0, .5, 1]: + assert_(random.binomial(0, p) == 0) + assert_array_equal(random.binomial(zeros, p), zeros) + + def test_p_is_nan(self): + # Issue #4571. + assert_raises(ValueError, random.binomial, 1, np.nan) + + +class TestMultinomial(object): + def test_basic(self): + random.multinomial(100, [0.2, 0.8]) + + def test_zero_probability(self): + random.multinomial(100, [0.2, 0.8, 0.0, 0.0, 0.0]) + + def test_int_negative_interval(self): + assert_(-5 <= random.randint(-5, -1) < -1) + x = random.randint(-5, -1, 5) + assert_(np.all(-5 <= x)) + assert_(np.all(x < -1)) + + def test_size(self): + # gh-3173 + p = [0.5, 0.5] + assert_equal(np.random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.multinomial(1, p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.multinomial(1, p, [2, 2]).shape, (2, 2, 2)) + assert_equal(np.random.multinomial(1, p, (2, 2)).shape, (2, 2, 2)) + assert_equal(np.random.multinomial(1, p, np.array((2, 2))).shape, + (2, 2, 2)) + + assert_raises(TypeError, np.random.multinomial, 1, p, + float(1)) + + +class TestSetState(object): + def setup(self): + self.seed = 1234567890 + self.prng = random.RandomState(self.seed) + self.state = self.prng.get_state() + + def test_basic(self): + old = self.prng.tomaxint(16) + self.prng.set_state(self.state) + new = self.prng.tomaxint(16) + assert_(np.all(old == new)) + + def test_gaussian_reset(self): + # Make sure the cached every-other-Gaussian is reset. + old = self.prng.standard_normal(size=3) + self.prng.set_state(self.state) + new = self.prng.standard_normal(size=3) + assert_(np.all(old == new)) + + def test_gaussian_reset_in_media_res(self): + # When the state is saved with a cached Gaussian, make sure the + # cached Gaussian is restored. + + self.prng.standard_normal() + state = self.prng.get_state() + old = self.prng.standard_normal(size=3) + self.prng.set_state(state) + new = self.prng.standard_normal(size=3) + assert_(np.all(old == new)) + + def test_backwards_compatibility(self): + # Make sure we can accept old state tuples that do not have the + # cached Gaussian value. + old_state = self.state[:-2] + x1 = self.prng.standard_normal(size=16) + self.prng.set_state(old_state) + x2 = self.prng.standard_normal(size=16) + self.prng.set_state(self.state) + x3 = self.prng.standard_normal(size=16) + assert_(np.all(x1 == x2)) + assert_(np.all(x1 == x3)) + + def test_negative_binomial(self): + # Ensure that the negative binomial results take floating point + # arguments without truncation. + self.prng.negative_binomial(0.5, 0.5) + + +class TestRandint(object): + + rfunc = np.random.randint + + # valid integer/boolean types + itype = [np.bool_, np.int8, np.uint8, np.int16, np.uint16, + np.int32, np.uint32, np.int64, np.uint64] + + def test_unsupported_type(self): + assert_raises(TypeError, self.rfunc, 1, dtype=float) + + def test_bounds_checking(self): + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + assert_raises(ValueError, self.rfunc, lbnd - 1, ubnd, dtype=dt) + assert_raises(ValueError, self.rfunc, lbnd, ubnd + 1, dtype=dt) + assert_raises(ValueError, self.rfunc, ubnd, lbnd, dtype=dt) + assert_raises(ValueError, self.rfunc, 1, 0, dtype=dt) + + def test_rng_zero_and_extremes(self): + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + + tgt = ubnd - 1 + assert_equal(self.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt) + + tgt = lbnd + assert_equal(self.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt) + + tgt = (lbnd + ubnd)//2 + assert_equal(self.rfunc(tgt, tgt + 1, size=1000, dtype=dt), tgt) + + def test_full_range(self): + # Test for ticket #1690 + + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + + try: + self.rfunc(lbnd, ubnd, dtype=dt) + except Exception as e: + raise AssertionError("No error should have been raised, " + "but one was with the following " + "message:\n\n%s" % str(e)) + + def test_in_bounds_fuzz(self): + # Don't use fixed seed + np.random.seed() + + for dt in self.itype[1:]: + for ubnd in [4, 8, 16]: + vals = self.rfunc(2, ubnd, size=2**16, dtype=dt) + assert_(vals.max() < ubnd) + assert_(vals.min() >= 2) + + vals = self.rfunc(0, 2, size=2**16, dtype=np.bool_) + + assert_(vals.max() < 2) + assert_(vals.min() >= 0) + + def test_repeatability(self): + import hashlib + # We use a md5 hash of generated sequences of 1000 samples + # in the range [0, 6) for all but bool, where the range + # is [0, 2). Hashes are for little endian numbers. + tgt = {'bool': '7dd3170d7aa461d201a65f8bcf3944b0', + 'int16': '1b7741b80964bb190c50d541dca1cac1', + 'int32': '4dc9fcc2b395577ebb51793e58ed1a05', + 'int64': '17db902806f448331b5a758d7d2ee672', + 'int8': '27dd30c4e08a797063dffac2490b0be6', + 'uint16': '1b7741b80964bb190c50d541dca1cac1', + 'uint32': '4dc9fcc2b395577ebb51793e58ed1a05', + 'uint64': '17db902806f448331b5a758d7d2ee672', + 'uint8': '27dd30c4e08a797063dffac2490b0be6'} + + for dt in self.itype[1:]: + np.random.seed(1234) + + # view as little endian for hash + if sys.byteorder == 'little': + val = self.rfunc(0, 6, size=1000, dtype=dt) + else: + val = self.rfunc(0, 6, size=1000, dtype=dt).byteswap() + + res = hashlib.md5(val.view(np.int8)).hexdigest() + assert_(tgt[np.dtype(dt).name] == res) + + # bools do not depend on endianness + np.random.seed(1234) + val = self.rfunc(0, 2, size=1000, dtype=bool).view(np.int8) + res = hashlib.md5(val).hexdigest() + assert_(tgt[np.dtype(bool).name] == res) + + def test_int64_uint64_corner_case(self): + # When stored in Numpy arrays, `lbnd` is casted + # as np.int64, and `ubnd` is casted as np.uint64. + # Checking whether `lbnd` >= `ubnd` used to be + # done solely via direct comparison, which is incorrect + # because when Numpy tries to compare both numbers, + # it casts both to np.float64 because there is + # no integer superset of np.int64 and np.uint64. However, + # `ubnd` is too large to be represented in np.float64, + # causing it be round down to np.iinfo(np.int64).max, + # leading to a ValueError because `lbnd` now equals + # the new `ubnd`. + + dt = np.int64 + tgt = np.iinfo(np.int64).max + lbnd = np.int64(np.iinfo(np.int64).max) + ubnd = np.uint64(np.iinfo(np.int64).max + 1) + + # None of these function calls should + # generate a ValueError now. + actual = np.random.randint(lbnd, ubnd, dtype=dt) + assert_equal(actual, tgt) + + def test_respect_dtype_singleton(self): + # See gh-7203 + for dt in self.itype: + lbnd = 0 if dt is np.bool_ else np.iinfo(dt).min + ubnd = 2 if dt is np.bool_ else np.iinfo(dt).max + 1 + + sample = self.rfunc(lbnd, ubnd, dtype=dt) + assert_equal(sample.dtype, np.dtype(dt)) + + for dt in (bool, int, np.long): + lbnd = 0 if dt is bool else np.iinfo(dt).min + ubnd = 2 if dt is bool else np.iinfo(dt).max + 1 + + # gh-7284: Ensure that we get Python data types + sample = self.rfunc(lbnd, ubnd, dtype=dt) + assert_(not hasattr(sample, 'dtype')) + assert_equal(type(sample), dt) + + +class TestRandomDist(object): + # Make sure the random distribution returns the correct value for a + # given seed + + def setup(self): + self.seed = 1234567890 + + def test_rand(self): + np.random.seed(self.seed) + actual = np.random.rand(3, 2) + desired = np.array([[0.61879477158567997, 0.59162362775974664], + [0.88868358904449662, 0.89165480011560816], + [0.4575674820298663, 0.7781880808593471]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_randn(self): + np.random.seed(self.seed) + actual = np.random.randn(3, 2) + desired = np.array([[1.34016345771863121, 1.73759122771936081], + [1.498988344300628, -0.2286433324536169], + [2.031033998682787, 2.17032494605655257]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_randint(self): + np.random.seed(self.seed) + actual = np.random.randint(-99, 99, size=(3, 2)) + desired = np.array([[31, 3], + [-52, 41], + [-48, -66]]) + assert_array_equal(actual, desired) + + def test_random_integers(self): + np.random.seed(self.seed) + with suppress_warnings() as sup: + w = sup.record(DeprecationWarning) + actual = np.random.random_integers(-99, 99, size=(3, 2)) + assert_(len(w) == 1) + desired = np.array([[31, 3], + [-52, 41], + [-48, -66]]) + assert_array_equal(actual, desired) + + def test_random_integers_max_int(self): + # Tests whether random_integers can generate the + # maximum allowed Python int that can be converted + # into a C long. Previous implementations of this + # method have thrown an OverflowError when attempting + # to generate this integer. + with suppress_warnings() as sup: + w = sup.record(DeprecationWarning) + actual = np.random.random_integers(np.iinfo('l').max, + np.iinfo('l').max) + assert_(len(w) == 1) + + desired = np.iinfo('l').max + assert_equal(actual, desired) + + def test_random_integers_deprecated(self): + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + + # DeprecationWarning raised with high == None + assert_raises(DeprecationWarning, + np.random.random_integers, + np.iinfo('l').max) + + # DeprecationWarning raised with high != None + assert_raises(DeprecationWarning, + np.random.random_integers, + np.iinfo('l').max, np.iinfo('l').max) + + def test_random_sample(self): + np.random.seed(self.seed) + actual = np.random.random_sample((3, 2)) + desired = np.array([[0.61879477158567997, 0.59162362775974664], + [0.88868358904449662, 0.89165480011560816], + [0.4575674820298663, 0.7781880808593471]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_choice_uniform_replace(self): + np.random.seed(self.seed) + actual = np.random.choice(4, 4) + desired = np.array([2, 3, 2, 3]) + assert_array_equal(actual, desired) + + def test_choice_nonuniform_replace(self): + np.random.seed(self.seed) + actual = np.random.choice(4, 4, p=[0.4, 0.4, 0.1, 0.1]) + desired = np.array([1, 1, 2, 2]) + assert_array_equal(actual, desired) + + def test_choice_uniform_noreplace(self): + np.random.seed(self.seed) + actual = np.random.choice(4, 3, replace=False) + desired = np.array([0, 1, 3]) + assert_array_equal(actual, desired) + + def test_choice_nonuniform_noreplace(self): + np.random.seed(self.seed) + actual = np.random.choice(4, 3, replace=False, + p=[0.1, 0.3, 0.5, 0.1]) + desired = np.array([2, 3, 1]) + assert_array_equal(actual, desired) + + def test_choice_noninteger(self): + np.random.seed(self.seed) + actual = np.random.choice(['a', 'b', 'c', 'd'], 4) + desired = np.array(['c', 'd', 'c', 'd']) + assert_array_equal(actual, desired) + + def test_choice_exceptions(self): + sample = np.random.choice + assert_raises(ValueError, sample, -1, 3) + assert_raises(ValueError, sample, 3., 3) + assert_raises(ValueError, sample, [[1, 2], [3, 4]], 3) + assert_raises(ValueError, sample, [], 3) + assert_raises(ValueError, sample, [1, 2, 3, 4], 3, + p=[[0.25, 0.25], [0.25, 0.25]]) + assert_raises(ValueError, sample, [1, 2], 3, p=[0.4, 0.4, 0.2]) + assert_raises(ValueError, sample, [1, 2], 3, p=[1.1, -0.1]) + assert_raises(ValueError, sample, [1, 2], 3, p=[0.4, 0.4]) + assert_raises(ValueError, sample, [1, 2, 3], 4, replace=False) + # gh-13087 + assert_raises(ValueError, sample, [1, 2, 3], -2, replace=False) + assert_raises(ValueError, sample, [1, 2, 3], (-1,), replace=False) + assert_raises(ValueError, sample, [1, 2, 3], (-1, 1), replace=False) + assert_raises(ValueError, sample, [1, 2, 3], 2, + replace=False, p=[1, 0, 0]) + + def test_choice_return_shape(self): + p = [0.1, 0.9] + # Check scalar + assert_(np.isscalar(np.random.choice(2, replace=True))) + assert_(np.isscalar(np.random.choice(2, replace=False))) + assert_(np.isscalar(np.random.choice(2, replace=True, p=p))) + assert_(np.isscalar(np.random.choice(2, replace=False, p=p))) + assert_(np.isscalar(np.random.choice([1, 2], replace=True))) + assert_(np.random.choice([None], replace=True) is None) + a = np.array([1, 2]) + arr = np.empty(1, dtype=object) + arr[0] = a + assert_(np.random.choice(arr, replace=True) is a) + + # Check 0-d array + s = tuple() + assert_(not np.isscalar(np.random.choice(2, s, replace=True))) + assert_(not np.isscalar(np.random.choice(2, s, replace=False))) + assert_(not np.isscalar(np.random.choice(2, s, replace=True, p=p))) + assert_(not np.isscalar(np.random.choice(2, s, replace=False, p=p))) + assert_(not np.isscalar(np.random.choice([1, 2], s, replace=True))) + assert_(np.random.choice([None], s, replace=True).ndim == 0) + a = np.array([1, 2]) + arr = np.empty(1, dtype=object) + arr[0] = a + assert_(np.random.choice(arr, s, replace=True).item() is a) + + # Check multi dimensional array + s = (2, 3) + p = [0.1, 0.1, 0.1, 0.1, 0.4, 0.2] + assert_equal(np.random.choice(6, s, replace=True).shape, s) + assert_equal(np.random.choice(6, s, replace=False).shape, s) + assert_equal(np.random.choice(6, s, replace=True, p=p).shape, s) + assert_equal(np.random.choice(6, s, replace=False, p=p).shape, s) + assert_equal(np.random.choice(np.arange(6), s, replace=True).shape, s) + + # Check zero-size + assert_equal(np.random.randint(0, 0, size=(3, 0, 4)).shape, (3, 0, 4)) + assert_equal(np.random.randint(0, -10, size=0).shape, (0,)) + assert_equal(np.random.randint(10, 10, size=0).shape, (0,)) + assert_equal(np.random.choice(0, size=0).shape, (0,)) + assert_equal(np.random.choice([], size=(0,)).shape, (0,)) + assert_equal(np.random.choice(['a', 'b'], size=(3, 0, 4)).shape, (3, 0, 4)) + assert_raises(ValueError, np.random.choice, [], 10) + + def test_choice_nan_probabilities(self): + a = np.array([42, 1, 2]) + p = [None, None, None] + assert_raises(ValueError, np.random.choice, a, p=p) + + def test_bytes(self): + np.random.seed(self.seed) + actual = np.random.bytes(10) + desired = b'\x82Ui\x9e\xff\x97+Wf\xa5' + assert_equal(actual, desired) + + def test_shuffle(self): + # Test lists, arrays (of various dtypes), and multidimensional versions + # of both, c-contiguous or not: + for conv in [lambda x: np.array([]), + lambda x: x, + lambda x: np.asarray(x).astype(np.int8), + lambda x: np.asarray(x).astype(np.float32), + lambda x: np.asarray(x).astype(np.complex64), + lambda x: np.asarray(x).astype(object), + lambda x: [(i, i) for i in x], + lambda x: np.asarray([[i, i] for i in x]), + lambda x: np.vstack([x, x]).T, + # gh-11442 + lambda x: (np.asarray([(i, i) for i in x], + [("a", int), ("b", int)]) + .view(np.recarray)), + # gh-4270 + lambda x: np.asarray([(i, i) for i in x], + [("a", object, 1), + ("b", np.int32, 1)])]: + np.random.seed(self.seed) + alist = conv([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]) + np.random.shuffle(alist) + actual = alist + desired = conv([0, 1, 9, 6, 2, 4, 5, 8, 7, 3]) + assert_array_equal(actual, desired) + + def test_shuffle_masked(self): + # gh-3263 + a = np.ma.masked_values(np.reshape(range(20), (5, 4)) % 3 - 1, -1) + b = np.ma.masked_values(np.arange(20) % 3 - 1, -1) + a_orig = a.copy() + b_orig = b.copy() + for i in range(50): + np.random.shuffle(a) + assert_equal( + sorted(a.data[~a.mask]), sorted(a_orig.data[~a_orig.mask])) + np.random.shuffle(b) + assert_equal( + sorted(b.data[~b.mask]), sorted(b_orig.data[~b_orig.mask])) + + def test_beta(self): + np.random.seed(self.seed) + actual = np.random.beta(.1, .9, size=(3, 2)) + desired = np.array( + [[1.45341850513746058e-02, 5.31297615662868145e-04], + [1.85366619058432324e-06, 4.19214516800110563e-03], + [1.58405155108498093e-04, 1.26252891949397652e-04]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_binomial(self): + np.random.seed(self.seed) + actual = np.random.binomial(100.123, .456, size=(3, 2)) + desired = np.array([[37, 43], + [42, 48], + [46, 45]]) + assert_array_equal(actual, desired) + + def test_chisquare(self): + np.random.seed(self.seed) + actual = np.random.chisquare(50, size=(3, 2)) + desired = np.array([[63.87858175501090585, 68.68407748911370447], + [65.77116116901505904, 47.09686762438974483], + [72.3828403199695174, 74.18408615260374006]]) + assert_array_almost_equal(actual, desired, decimal=13) + + def test_dirichlet(self): + np.random.seed(self.seed) + alpha = np.array([51.72840233779265162, 39.74494232180943953]) + actual = np.random.mtrand.dirichlet(alpha, size=(3, 2)) + desired = np.array([[[0.54539444573611562, 0.45460555426388438], + [0.62345816822039413, 0.37654183177960598]], + [[0.55206000085785778, 0.44793999914214233], + [0.58964023305154301, 0.41035976694845688]], + [[0.59266909280647828, 0.40733090719352177], + [0.56974431743975207, 0.43025568256024799]]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_dirichlet_size(self): + # gh-3173 + p = np.array([51.72840233779265162, 39.74494232180943953]) + assert_equal(np.random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.dirichlet(p, np.uint32(1)).shape, (1, 2)) + assert_equal(np.random.dirichlet(p, [2, 2]).shape, (2, 2, 2)) + assert_equal(np.random.dirichlet(p, (2, 2)).shape, (2, 2, 2)) + assert_equal(np.random.dirichlet(p, np.array((2, 2))).shape, (2, 2, 2)) + + assert_raises(TypeError, np.random.dirichlet, p, float(1)) + + def test_dirichlet_bad_alpha(self): + # gh-2089 + alpha = np.array([5.4e-01, -1.0e-16]) + assert_raises(ValueError, np.random.mtrand.dirichlet, alpha) + + def test_exponential(self): + np.random.seed(self.seed) + actual = np.random.exponential(1.1234, size=(3, 2)) + desired = np.array([[1.08342649775011624, 1.00607889924557314], + [2.46628830085216721, 2.49668106809923884], + [0.68717433461363442, 1.69175666993575979]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_exponential_0(self): + assert_equal(np.random.exponential(scale=0), 0) + assert_raises(ValueError, np.random.exponential, scale=-0.) + + def test_f(self): + np.random.seed(self.seed) + actual = np.random.f(12, 77, size=(3, 2)) + desired = np.array([[1.21975394418575878, 1.75135759791559775], + [1.44803115017146489, 1.22108959480396262], + [1.02176975757740629, 1.34431827623300415]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_gamma(self): + np.random.seed(self.seed) + actual = np.random.gamma(5, 3, size=(3, 2)) + desired = np.array([[24.60509188649287182, 28.54993563207210627], + [26.13476110204064184, 12.56988482927716078], + [31.71863275789960568, 33.30143302795922011]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_gamma_0(self): + assert_equal(np.random.gamma(shape=0, scale=0), 0) + assert_raises(ValueError, np.random.gamma, shape=-0., scale=-0.) + + def test_geometric(self): + np.random.seed(self.seed) + actual = np.random.geometric(.123456789, size=(3, 2)) + desired = np.array([[8, 7], + [17, 17], + [5, 12]]) + assert_array_equal(actual, desired) + + def test_gumbel(self): + np.random.seed(self.seed) + actual = np.random.gumbel(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[0.19591898743416816, 0.34405539668096674], + [-1.4492522252274278, -1.47374816298446865], + [1.10651090478803416, -0.69535848626236174]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_gumbel_0(self): + assert_equal(np.random.gumbel(scale=0), 0) + assert_raises(ValueError, np.random.gumbel, scale=-0.) + + def test_hypergeometric(self): + np.random.seed(self.seed) + actual = np.random.hypergeometric(10.1, 5.5, 14, size=(3, 2)) + desired = np.array([[10, 10], + [10, 10], + [9, 9]]) + assert_array_equal(actual, desired) + + # Test nbad = 0 + actual = np.random.hypergeometric(5, 0, 3, size=4) + desired = np.array([3, 3, 3, 3]) + assert_array_equal(actual, desired) + + actual = np.random.hypergeometric(15, 0, 12, size=4) + desired = np.array([12, 12, 12, 12]) + assert_array_equal(actual, desired) + + # Test ngood = 0 + actual = np.random.hypergeometric(0, 5, 3, size=4) + desired = np.array([0, 0, 0, 0]) + assert_array_equal(actual, desired) + + actual = np.random.hypergeometric(0, 15, 12, size=4) + desired = np.array([0, 0, 0, 0]) + assert_array_equal(actual, desired) + + def test_laplace(self): + np.random.seed(self.seed) + actual = np.random.laplace(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[0.66599721112760157, 0.52829452552221945], + [3.12791959514407125, 3.18202813572992005], + [-0.05391065675859356, 1.74901336242837324]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_laplace_0(self): + assert_equal(np.random.laplace(scale=0), 0) + assert_raises(ValueError, np.random.laplace, scale=-0.) + + def test_logistic(self): + np.random.seed(self.seed) + actual = np.random.logistic(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[1.09232835305011444, 0.8648196662399954], + [4.27818590694950185, 4.33897006346929714], + [-0.21682183359214885, 2.63373365386060332]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_lognormal(self): + np.random.seed(self.seed) + actual = np.random.lognormal(mean=.123456789, sigma=2.0, size=(3, 2)) + desired = np.array([[16.50698631688883822, 36.54846706092654784], + [22.67886599981281748, 0.71617561058995771], + [65.72798501792723869, 86.84341601437161273]]) + assert_array_almost_equal(actual, desired, decimal=13) + + def test_lognormal_0(self): + assert_equal(np.random.lognormal(sigma=0), 1) + assert_raises(ValueError, np.random.lognormal, sigma=-0.) + + def test_logseries(self): + np.random.seed(self.seed) + actual = np.random.logseries(p=.923456789, size=(3, 2)) + desired = np.array([[2, 2], + [6, 17], + [3, 6]]) + assert_array_equal(actual, desired) + + def test_multinomial(self): + np.random.seed(self.seed) + actual = np.random.multinomial(20, [1/6.]*6, size=(3, 2)) + desired = np.array([[[4, 3, 5, 4, 2, 2], + [5, 2, 8, 2, 2, 1]], + [[3, 4, 3, 6, 0, 4], + [2, 1, 4, 3, 6, 4]], + [[4, 4, 2, 5, 2, 3], + [4, 3, 4, 2, 3, 4]]]) + assert_array_equal(actual, desired) + + def test_multivariate_normal(self): + np.random.seed(self.seed) + mean = (.123456789, 10) + cov = [[1, 0], [0, 1]] + size = (3, 2) + actual = np.random.multivariate_normal(mean, cov, size) + desired = np.array([[[1.463620246718631, 11.73759122771936 ], + [1.622445133300628, 9.771356667546383]], + [[2.154490787682787, 12.170324946056553], + [1.719909438201865, 9.230548443648306]], + [[0.689515026297799, 9.880729819607714], + [-0.023054015651998, 9.201096623542879]]]) + + assert_array_almost_equal(actual, desired, decimal=15) + + # Check for default size, was raising deprecation warning + actual = np.random.multivariate_normal(mean, cov) + desired = np.array([0.895289569463708, 9.17180864067987]) + assert_array_almost_equal(actual, desired, decimal=15) + + # Check that non positive-semidefinite covariance warns with + # RuntimeWarning + mean = [0, 0] + cov = [[1, 2], [2, 1]] + assert_warns(RuntimeWarning, np.random.multivariate_normal, mean, cov) + + # and that it doesn't warn with RuntimeWarning check_valid='ignore' + assert_no_warnings(np.random.multivariate_normal, mean, cov, + check_valid='ignore') + + # and that it raises with RuntimeWarning check_valid='raises' + assert_raises(ValueError, np.random.multivariate_normal, mean, cov, + check_valid='raise') + + cov = np.array([[1, 0.1],[0.1, 1]], dtype=np.float32) + with suppress_warnings() as sup: + np.random.multivariate_normal(mean, cov) + w = sup.record(RuntimeWarning) + assert len(w) == 0 + + def test_negative_binomial(self): + np.random.seed(self.seed) + actual = np.random.negative_binomial(n=100, p=.12345, size=(3, 2)) + desired = np.array([[848, 841], + [892, 611], + [779, 647]]) + assert_array_equal(actual, desired) + + def test_noncentral_chisquare(self): + np.random.seed(self.seed) + actual = np.random.noncentral_chisquare(df=5, nonc=5, size=(3, 2)) + desired = np.array([[23.91905354498517511, 13.35324692733826346], + [31.22452661329736401, 16.60047399466177254], + [5.03461598262724586, 17.94973089023519464]]) + assert_array_almost_equal(actual, desired, decimal=14) + + actual = np.random.noncentral_chisquare(df=.5, nonc=.2, size=(3, 2)) + desired = np.array([[1.47145377828516666, 0.15052899268012659], + [0.00943803056963588, 1.02647251615666169], + [0.332334982684171, 0.15451287602753125]]) + assert_array_almost_equal(actual, desired, decimal=14) + + np.random.seed(self.seed) + actual = np.random.noncentral_chisquare(df=5, nonc=0, size=(3, 2)) + desired = np.array([[9.597154162763948, 11.725484450296079], + [10.413711048138335, 3.694475922923986], + [13.484222138963087, 14.377255424602957]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_noncentral_f(self): + np.random.seed(self.seed) + actual = np.random.noncentral_f(dfnum=5, dfden=2, nonc=1, + size=(3, 2)) + desired = np.array([[1.40598099674926669, 0.34207973179285761], + [3.57715069265772545, 7.92632662577829805], + [0.43741599463544162, 1.1774208752428319]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_normal(self): + np.random.seed(self.seed) + actual = np.random.normal(loc=.123456789, scale=2.0, size=(3, 2)) + desired = np.array([[2.80378370443726244, 3.59863924443872163], + [3.121433477601256, -0.33382987590723379], + [4.18552478636557357, 4.46410668111310471]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_normal_0(self): + assert_equal(np.random.normal(scale=0), 0) + assert_raises(ValueError, np.random.normal, scale=-0.) + + def test_pareto(self): + np.random.seed(self.seed) + actual = np.random.pareto(a=.123456789, size=(3, 2)) + desired = np.array( + [[2.46852460439034849e+03, 1.41286880810518346e+03], + [5.28287797029485181e+07, 6.57720981047328785e+07], + [1.40840323350391515e+02, 1.98390255135251704e+05]]) + # For some reason on 32-bit x86 Ubuntu 12.10 the [1, 0] entry in this + # matrix differs by 24 nulps. Discussion: + # https://mail.python.org/pipermail/numpy-discussion/2012-September/063801.html + # Consensus is that this is probably some gcc quirk that affects + # rounding but not in any important way, so we just use a looser + # tolerance on this test: + np.testing.assert_array_almost_equal_nulp(actual, desired, nulp=30) + + def test_poisson(self): + np.random.seed(self.seed) + actual = np.random.poisson(lam=.123456789, size=(3, 2)) + desired = np.array([[0, 0], + [1, 0], + [0, 0]]) + assert_array_equal(actual, desired) + + def test_poisson_exceptions(self): + lambig = np.iinfo('l').max + lamneg = -1 + assert_raises(ValueError, np.random.poisson, lamneg) + assert_raises(ValueError, np.random.poisson, [lamneg]*10) + assert_raises(ValueError, np.random.poisson, lambig) + assert_raises(ValueError, np.random.poisson, [lambig]*10) + + def test_power(self): + np.random.seed(self.seed) + actual = np.random.power(a=.123456789, size=(3, 2)) + desired = np.array([[0.02048932883240791, 0.01424192241128213], + [0.38446073748535298, 0.39499689943484395], + [0.00177699707563439, 0.13115505880863756]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_rayleigh(self): + np.random.seed(self.seed) + actual = np.random.rayleigh(scale=10, size=(3, 2)) + desired = np.array([[13.8882496494248393, 13.383318339044731], + [20.95413364294492098, 21.08285015800712614], + [11.06066537006854311, 17.35468505778271009]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_rayleigh_0(self): + assert_equal(np.random.rayleigh(scale=0), 0) + assert_raises(ValueError, np.random.rayleigh, scale=-0.) + + def test_standard_cauchy(self): + np.random.seed(self.seed) + actual = np.random.standard_cauchy(size=(3, 2)) + desired = np.array([[0.77127660196445336, -6.55601161955910605], + [0.93582023391158309, -2.07479293013759447], + [-4.74601644297011926, 0.18338989290760804]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_exponential(self): + np.random.seed(self.seed) + actual = np.random.standard_exponential(size=(3, 2)) + desired = np.array([[0.96441739162374596, 0.89556604882105506], + [2.1953785836319808, 2.22243285392490542], + [0.6116915921431676, 1.50592546727413201]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_gamma(self): + np.random.seed(self.seed) + actual = np.random.standard_gamma(shape=3, size=(3, 2)) + desired = np.array([[5.50841531318455058, 6.62953470301903103], + [5.93988484943779227, 2.31044849402133989], + [7.54838614231317084, 8.012756093271868]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_standard_gamma_0(self): + assert_equal(np.random.standard_gamma(shape=0), 0) + assert_raises(ValueError, np.random.standard_gamma, shape=-0.) + + def test_standard_normal(self): + np.random.seed(self.seed) + actual = np.random.standard_normal(size=(3, 2)) + desired = np.array([[1.34016345771863121, 1.73759122771936081], + [1.498988344300628, -0.2286433324536169], + [2.031033998682787, 2.17032494605655257]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_standard_t(self): + np.random.seed(self.seed) + actual = np.random.standard_t(df=10, size=(3, 2)) + desired = np.array([[0.97140611862659965, -0.08830486548450577], + [1.36311143689505321, -0.55317463909867071], + [-0.18473749069684214, 0.61181537341755321]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_triangular(self): + np.random.seed(self.seed) + actual = np.random.triangular(left=5.12, mode=10.23, right=20.34, + size=(3, 2)) + desired = np.array([[12.68117178949215784, 12.4129206149193152], + [16.20131377335158263, 16.25692138747600524], + [11.20400690911820263, 14.4978144835829923]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_uniform(self): + np.random.seed(self.seed) + actual = np.random.uniform(low=1.23, high=10.54, size=(3, 2)) + desired = np.array([[6.99097932346268003, 6.73801597444323974], + [9.50364421400426274, 9.53130618907631089], + [5.48995325769805476, 8.47493103280052118]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_uniform_range_bounds(self): + fmin = np.finfo('float').min + fmax = np.finfo('float').max + + func = np.random.uniform + assert_raises(OverflowError, func, -np.inf, 0) + assert_raises(OverflowError, func, 0, np.inf) + assert_raises(OverflowError, func, fmin, fmax) + assert_raises(OverflowError, func, [-np.inf], [0]) + assert_raises(OverflowError, func, [0], [np.inf]) + + # (fmax / 1e17) - fmin is within range, so this should not throw + # account for i386 extended precision DBL_MAX / 1e17 + DBL_MAX > + # DBL_MAX by increasing fmin a bit + np.random.uniform(low=np.nextafter(fmin, 1), high=fmax / 1e17) + + def test_scalar_exception_propagation(self): + # Tests that exceptions are correctly propagated in distributions + # when called with objects that throw exceptions when converted to + # scalars. + # + # Regression test for gh: 8865 + + class ThrowingFloat(np.ndarray): + def __float__(self): + raise TypeError + + throwing_float = np.array(1.0).view(ThrowingFloat) + assert_raises(TypeError, np.random.uniform, throwing_float, throwing_float) + + class ThrowingInteger(np.ndarray): + def __int__(self): + raise TypeError + + throwing_int = np.array(1).view(ThrowingInteger) + assert_raises(TypeError, np.random.hypergeometric, throwing_int, 1, 1) + + def test_vonmises(self): + np.random.seed(self.seed) + actual = np.random.vonmises(mu=1.23, kappa=1.54, size=(3, 2)) + desired = np.array([[2.28567572673902042, 2.89163838442285037], + [0.38198375564286025, 2.57638023113890746], + [1.19153771588353052, 1.83509849681825354]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_vonmises_small(self): + # check infinite loop, gh-4720 + np.random.seed(self.seed) + r = np.random.vonmises(mu=0., kappa=1.1e-8, size=10**6) + np.testing.assert_(np.isfinite(r).all()) + + def test_wald(self): + np.random.seed(self.seed) + actual = np.random.wald(mean=1.23, scale=1.54, size=(3, 2)) + desired = np.array([[3.82935265715889983, 5.13125249184285526], + [0.35045403618358717, 1.50832396872003538], + [0.24124319895843183, 0.22031101461955038]]) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_weibull(self): + np.random.seed(self.seed) + actual = np.random.weibull(a=1.23, size=(3, 2)) + desired = np.array([[0.97097342648766727, 0.91422896443565516], + [1.89517770034962929, 1.91414357960479564], + [0.67057783752390987, 1.39494046635066793]]) + assert_array_almost_equal(actual, desired, decimal=15) + + def test_weibull_0(self): + np.random.seed(self.seed) + assert_equal(np.random.weibull(a=0, size=12), np.zeros(12)) + assert_raises(ValueError, np.random.weibull, a=-0.) + + def test_zipf(self): + np.random.seed(self.seed) + actual = np.random.zipf(a=1.23, size=(3, 2)) + desired = np.array([[66, 29], + [1, 1], + [3, 13]]) + assert_array_equal(actual, desired) + + +class TestBroadcast(object): + # tests that functions that broadcast behave + # correctly when presented with non-scalar arguments + def setup(self): + self.seed = 123456789 + + def setSeed(self): + np.random.seed(self.seed) + + # TODO: Include test for randint once it can broadcast + # Can steal the test written in PR #6938 + + def test_uniform(self): + low = [0] + high = [1] + uniform = np.random.uniform + desired = np.array([0.53283302478975902, + 0.53413660089041659, + 0.50955303552646702]) + + self.setSeed() + actual = uniform(low * 3, high) + assert_array_almost_equal(actual, desired, decimal=14) + + self.setSeed() + actual = uniform(low, high * 3) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_normal(self): + loc = [0] + scale = [1] + bad_scale = [-1] + normal = np.random.normal + desired = np.array([2.2129019979039612, + 2.1283977976520019, + 1.8417114045748335]) + + self.setSeed() + actual = normal(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, normal, loc * 3, bad_scale) + + self.setSeed() + actual = normal(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, normal, loc, bad_scale * 3) + + def test_beta(self): + a = [1] + b = [2] + bad_a = [-1] + bad_b = [-2] + beta = np.random.beta + desired = np.array([0.19843558305989056, + 0.075230336409423643, + 0.24976865978980844]) + + self.setSeed() + actual = beta(a * 3, b) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, beta, bad_a * 3, b) + assert_raises(ValueError, beta, a * 3, bad_b) + + self.setSeed() + actual = beta(a, b * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, beta, bad_a, b * 3) + assert_raises(ValueError, beta, a, bad_b * 3) + + def test_exponential(self): + scale = [1] + bad_scale = [-1] + exponential = np.random.exponential + desired = np.array([0.76106853658845242, + 0.76386282278691653, + 0.71243813125891797]) + + self.setSeed() + actual = exponential(scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, exponential, bad_scale * 3) + + def test_standard_gamma(self): + shape = [1] + bad_shape = [-1] + std_gamma = np.random.standard_gamma + desired = np.array([0.76106853658845242, + 0.76386282278691653, + 0.71243813125891797]) + + self.setSeed() + actual = std_gamma(shape * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, std_gamma, bad_shape * 3) + + def test_gamma(self): + shape = [1] + scale = [2] + bad_shape = [-1] + bad_scale = [-2] + gamma = np.random.gamma + desired = np.array([1.5221370731769048, + 1.5277256455738331, + 1.4248762625178359]) + + self.setSeed() + actual = gamma(shape * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gamma, bad_shape * 3, scale) + assert_raises(ValueError, gamma, shape * 3, bad_scale) + + self.setSeed() + actual = gamma(shape, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gamma, bad_shape, scale * 3) + assert_raises(ValueError, gamma, shape, bad_scale * 3) + + def test_f(self): + dfnum = [1] + dfden = [2] + bad_dfnum = [-1] + bad_dfden = [-2] + f = np.random.f + desired = np.array([0.80038951638264799, + 0.86768719635363512, + 2.7251095168386801]) + + self.setSeed() + actual = f(dfnum * 3, dfden) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, f, bad_dfnum * 3, dfden) + assert_raises(ValueError, f, dfnum * 3, bad_dfden) + + self.setSeed() + actual = f(dfnum, dfden * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, f, bad_dfnum, dfden * 3) + assert_raises(ValueError, f, dfnum, bad_dfden * 3) + + def test_noncentral_f(self): + dfnum = [2] + dfden = [3] + nonc = [4] + bad_dfnum = [0] + bad_dfden = [-1] + bad_nonc = [-2] + nonc_f = np.random.noncentral_f + desired = np.array([9.1393943263705211, + 13.025456344595602, + 8.8018098359100545]) + + self.setSeed() + actual = nonc_f(dfnum * 3, dfden, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_f, bad_dfnum * 3, dfden, nonc) + assert_raises(ValueError, nonc_f, dfnum * 3, bad_dfden, nonc) + assert_raises(ValueError, nonc_f, dfnum * 3, dfden, bad_nonc) + + self.setSeed() + actual = nonc_f(dfnum, dfden * 3, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_f, bad_dfnum, dfden * 3, nonc) + assert_raises(ValueError, nonc_f, dfnum, bad_dfden * 3, nonc) + assert_raises(ValueError, nonc_f, dfnum, dfden * 3, bad_nonc) + + self.setSeed() + actual = nonc_f(dfnum, dfden, nonc * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_f, bad_dfnum, dfden, nonc * 3) + assert_raises(ValueError, nonc_f, dfnum, bad_dfden, nonc * 3) + assert_raises(ValueError, nonc_f, dfnum, dfden, bad_nonc * 3) + + def test_noncentral_f_small_df(self): + self.setSeed() + desired = np.array([6.869638627492048, 0.785880199263955]) + actual = np.random.noncentral_f(0.9, 0.9, 2, size=2) + assert_array_almost_equal(actual, desired, decimal=14) + + def test_chisquare(self): + df = [1] + bad_df = [-1] + chisquare = np.random.chisquare + desired = np.array([0.57022801133088286, + 0.51947702108840776, + 0.1320969254923558]) + + self.setSeed() + actual = chisquare(df * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, chisquare, bad_df * 3) + + def test_noncentral_chisquare(self): + df = [1] + nonc = [2] + bad_df = [-1] + bad_nonc = [-2] + nonc_chi = np.random.noncentral_chisquare + desired = np.array([9.0015599467913763, + 4.5804135049718742, + 6.0872302432834564]) + + self.setSeed() + actual = nonc_chi(df * 3, nonc) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_chi, bad_df * 3, nonc) + assert_raises(ValueError, nonc_chi, df * 3, bad_nonc) + + self.setSeed() + actual = nonc_chi(df, nonc * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, nonc_chi, bad_df, nonc * 3) + assert_raises(ValueError, nonc_chi, df, bad_nonc * 3) + + def test_standard_t(self): + df = [1] + bad_df = [-1] + t = np.random.standard_t + desired = np.array([3.0702872575217643, + 5.8560725167361607, + 1.0274791436474273]) + + self.setSeed() + actual = t(df * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, t, bad_df * 3) + + def test_vonmises(self): + mu = [2] + kappa = [1] + bad_kappa = [-1] + vonmises = np.random.vonmises + desired = np.array([2.9883443664201312, + -2.7064099483995943, + -1.8672476700665914]) + + self.setSeed() + actual = vonmises(mu * 3, kappa) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, vonmises, mu * 3, bad_kappa) + + self.setSeed() + actual = vonmises(mu, kappa * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, vonmises, mu, bad_kappa * 3) + + def test_pareto(self): + a = [1] + bad_a = [-1] + pareto = np.random.pareto + desired = np.array([1.1405622680198362, + 1.1465519762044529, + 1.0389564467453547]) + + self.setSeed() + actual = pareto(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, pareto, bad_a * 3) + + def test_weibull(self): + a = [1] + bad_a = [-1] + weibull = np.random.weibull + desired = np.array([0.76106853658845242, + 0.76386282278691653, + 0.71243813125891797]) + + self.setSeed() + actual = weibull(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, weibull, bad_a * 3) + + def test_power(self): + a = [1] + bad_a = [-1] + power = np.random.power + desired = np.array([0.53283302478975902, + 0.53413660089041659, + 0.50955303552646702]) + + self.setSeed() + actual = power(a * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, power, bad_a * 3) + + def test_laplace(self): + loc = [0] + scale = [1] + bad_scale = [-1] + laplace = np.random.laplace + desired = np.array([0.067921356028507157, + 0.070715642226971326, + 0.019290950698972624]) + + self.setSeed() + actual = laplace(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, laplace, loc * 3, bad_scale) + + self.setSeed() + actual = laplace(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, laplace, loc, bad_scale * 3) + + def test_gumbel(self): + loc = [0] + scale = [1] + bad_scale = [-1] + gumbel = np.random.gumbel + desired = np.array([0.2730318639556768, + 0.26936705726291116, + 0.33906220393037939]) + + self.setSeed() + actual = gumbel(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gumbel, loc * 3, bad_scale) + + self.setSeed() + actual = gumbel(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, gumbel, loc, bad_scale * 3) + + def test_logistic(self): + loc = [0] + scale = [1] + bad_scale = [-1] + logistic = np.random.logistic + desired = np.array([0.13152135837586171, + 0.13675915696285773, + 0.038216792802833396]) + + self.setSeed() + actual = logistic(loc * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, logistic, loc * 3, bad_scale) + + self.setSeed() + actual = logistic(loc, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, logistic, loc, bad_scale * 3) + + def test_lognormal(self): + mean = [0] + sigma = [1] + bad_sigma = [-1] + lognormal = np.random.lognormal + desired = np.array([9.1422086044848427, + 8.4013952870126261, + 6.3073234116578671]) + + self.setSeed() + actual = lognormal(mean * 3, sigma) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, lognormal, mean * 3, bad_sigma) + + self.setSeed() + actual = lognormal(mean, sigma * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, lognormal, mean, bad_sigma * 3) + + def test_rayleigh(self): + scale = [1] + bad_scale = [-1] + rayleigh = np.random.rayleigh + desired = np.array([1.2337491937897689, + 1.2360119924878694, + 1.1936818095781789]) + + self.setSeed() + actual = rayleigh(scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, rayleigh, bad_scale * 3) + + def test_wald(self): + mean = [0.5] + scale = [1] + bad_mean = [0] + bad_scale = [-2] + wald = np.random.wald + desired = np.array([0.11873681120271318, + 0.12450084820795027, + 0.9096122728408238]) + + self.setSeed() + actual = wald(mean * 3, scale) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, wald, bad_mean * 3, scale) + assert_raises(ValueError, wald, mean * 3, bad_scale) + + self.setSeed() + actual = wald(mean, scale * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, wald, bad_mean, scale * 3) + assert_raises(ValueError, wald, mean, bad_scale * 3) + + def test_triangular(self): + left = [1] + right = [3] + mode = [2] + bad_left_one = [3] + bad_mode_one = [4] + bad_left_two, bad_mode_two = right * 2 + triangular = np.random.triangular + desired = np.array([2.03339048710429, + 2.0347400359389356, + 2.0095991069536208]) + + self.setSeed() + actual = triangular(left * 3, mode, right) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one * 3, mode, right) + assert_raises(ValueError, triangular, left * 3, bad_mode_one, right) + assert_raises(ValueError, triangular, bad_left_two * 3, bad_mode_two, right) + + self.setSeed() + actual = triangular(left, mode * 3, right) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one, mode * 3, right) + assert_raises(ValueError, triangular, left, bad_mode_one * 3, right) + assert_raises(ValueError, triangular, bad_left_two, bad_mode_two * 3, right) + + self.setSeed() + actual = triangular(left, mode, right * 3) + assert_array_almost_equal(actual, desired, decimal=14) + assert_raises(ValueError, triangular, bad_left_one, mode, right * 3) + assert_raises(ValueError, triangular, left, bad_mode_one, right * 3) + assert_raises(ValueError, triangular, bad_left_two, bad_mode_two, right * 3) + + def test_binomial(self): + n = [1] + p = [0.5] + bad_n = [-1] + bad_p_one = [-1] + bad_p_two = [1.5] + binom = np.random.binomial + desired = np.array([1, 1, 1]) + + self.setSeed() + actual = binom(n * 3, p) + assert_array_equal(actual, desired) + assert_raises(ValueError, binom, bad_n * 3, p) + assert_raises(ValueError, binom, n * 3, bad_p_one) + assert_raises(ValueError, binom, n * 3, bad_p_two) + + self.setSeed() + actual = binom(n, p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, binom, bad_n, p * 3) + assert_raises(ValueError, binom, n, bad_p_one * 3) + assert_raises(ValueError, binom, n, bad_p_two * 3) + + def test_negative_binomial(self): + n = [1] + p = [0.5] + bad_n = [-1] + bad_p_one = [-1] + bad_p_two = [1.5] + neg_binom = np.random.negative_binomial + desired = np.array([1, 0, 1]) + + self.setSeed() + actual = neg_binom(n * 3, p) + assert_array_equal(actual, desired) + assert_raises(ValueError, neg_binom, bad_n * 3, p) + assert_raises(ValueError, neg_binom, n * 3, bad_p_one) + assert_raises(ValueError, neg_binom, n * 3, bad_p_two) + + self.setSeed() + actual = neg_binom(n, p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, neg_binom, bad_n, p * 3) + assert_raises(ValueError, neg_binom, n, bad_p_one * 3) + assert_raises(ValueError, neg_binom, n, bad_p_two * 3) + + def test_poisson(self): + max_lam = np.random.RandomState().poisson_lam_max + + lam = [1] + bad_lam_one = [-1] + bad_lam_two = [max_lam * 2] + poisson = np.random.poisson + desired = np.array([1, 1, 0]) + + self.setSeed() + actual = poisson(lam * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, poisson, bad_lam_one * 3) + assert_raises(ValueError, poisson, bad_lam_two * 3) + + def test_zipf(self): + a = [2] + bad_a = [0] + zipf = np.random.zipf + desired = np.array([2, 2, 1]) + + self.setSeed() + actual = zipf(a * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, zipf, bad_a * 3) + with np.errstate(invalid='ignore'): + assert_raises(ValueError, zipf, np.nan) + assert_raises(ValueError, zipf, [0, 0, np.nan]) + + def test_geometric(self): + p = [0.5] + bad_p_one = [-1] + bad_p_two = [1.5] + geom = np.random.geometric + desired = np.array([2, 2, 2]) + + self.setSeed() + actual = geom(p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, geom, bad_p_one * 3) + assert_raises(ValueError, geom, bad_p_two * 3) + + def test_hypergeometric(self): + ngood = [1] + nbad = [2] + nsample = [2] + bad_ngood = [-1] + bad_nbad = [-2] + bad_nsample_one = [0] + bad_nsample_two = [4] + hypergeom = np.random.hypergeometric + desired = np.array([1, 1, 1]) + + self.setSeed() + actual = hypergeom(ngood * 3, nbad, nsample) + assert_array_equal(actual, desired) + assert_raises(ValueError, hypergeom, bad_ngood * 3, nbad, nsample) + assert_raises(ValueError, hypergeom, ngood * 3, bad_nbad, nsample) + assert_raises(ValueError, hypergeom, ngood * 3, nbad, bad_nsample_one) + assert_raises(ValueError, hypergeom, ngood * 3, nbad, bad_nsample_two) + + self.setSeed() + actual = hypergeom(ngood, nbad * 3, nsample) + assert_array_equal(actual, desired) + assert_raises(ValueError, hypergeom, bad_ngood, nbad * 3, nsample) + assert_raises(ValueError, hypergeom, ngood, bad_nbad * 3, nsample) + assert_raises(ValueError, hypergeom, ngood, nbad * 3, bad_nsample_one) + assert_raises(ValueError, hypergeom, ngood, nbad * 3, bad_nsample_two) + + self.setSeed() + actual = hypergeom(ngood, nbad, nsample * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, hypergeom, bad_ngood, nbad, nsample * 3) + assert_raises(ValueError, hypergeom, ngood, bad_nbad, nsample * 3) + assert_raises(ValueError, hypergeom, ngood, nbad, bad_nsample_one * 3) + assert_raises(ValueError, hypergeom, ngood, nbad, bad_nsample_two * 3) + + def test_logseries(self): + p = [0.5] + bad_p_one = [2] + bad_p_two = [-1] + logseries = np.random.logseries + desired = np.array([1, 1, 1]) + + self.setSeed() + actual = logseries(p * 3) + assert_array_equal(actual, desired) + assert_raises(ValueError, logseries, bad_p_one * 3) + assert_raises(ValueError, logseries, bad_p_two * 3) + +class TestThread(object): + # make sure each state produces the same sequence even in threads + def setup(self): + self.seeds = range(4) + + def check_function(self, function, sz): + from threading import Thread + + out1 = np.empty((len(self.seeds),) + sz) + out2 = np.empty((len(self.seeds),) + sz) + + # threaded generation + t = [Thread(target=function, args=(np.random.RandomState(s), o)) + for s, o in zip(self.seeds, out1)] + [x.start() for x in t] + [x.join() for x in t] + + # the same serial + for s, o in zip(self.seeds, out2): + function(np.random.RandomState(s), o) + + # these platforms change x87 fpu precision mode in threads + if np.intp().dtype.itemsize == 4 and sys.platform == "win32": + assert_array_almost_equal(out1, out2) + else: + assert_array_equal(out1, out2) + + def test_normal(self): + def gen_random(state, out): + out[...] = state.normal(size=10000) + self.check_function(gen_random, sz=(10000,)) + + def test_exp(self): + def gen_random(state, out): + out[...] = state.exponential(scale=np.ones((100, 1000))) + self.check_function(gen_random, sz=(100, 1000)) + + def test_multinomial(self): + def gen_random(state, out): + out[...] = state.multinomial(10, [1/6.]*6, size=10000) + self.check_function(gen_random, sz=(10000, 6)) + +# See Issue #4263 +class TestSingleEltArrayInput(object): + def setup(self): + self.argOne = np.array([2]) + self.argTwo = np.array([3]) + self.argThree = np.array([4]) + self.tgtShape = (1,) + + def test_one_arg_funcs(self): + funcs = (np.random.exponential, np.random.standard_gamma, + np.random.chisquare, np.random.standard_t, + np.random.pareto, np.random.weibull, + np.random.power, np.random.rayleigh, + np.random.poisson, np.random.zipf, + np.random.geometric, np.random.logseries) + + probfuncs = (np.random.geometric, np.random.logseries) + + for func in funcs: + if func in probfuncs: # p < 1.0 + out = func(np.array([0.5])) + + else: + out = func(self.argOne) + + assert_equal(out.shape, self.tgtShape) + + def test_two_arg_funcs(self): + funcs = (np.random.uniform, np.random.normal, + np.random.beta, np.random.gamma, + np.random.f, np.random.noncentral_chisquare, + np.random.vonmises, np.random.laplace, + np.random.gumbel, np.random.logistic, + np.random.lognormal, np.random.wald, + np.random.binomial, np.random.negative_binomial) + + probfuncs = (np.random.binomial, np.random.negative_binomial) + + for func in funcs: + if func in probfuncs: # p <= 1 + argTwo = np.array([0.5]) + + else: + argTwo = self.argTwo + + out = func(self.argOne, argTwo) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne[0], argTwo) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne, argTwo[0]) + assert_equal(out.shape, self.tgtShape) + +# TODO: Uncomment once randint can broadcast arguments +# def test_randint(self): +# itype = [bool, np.int8, np.uint8, np.int16, np.uint16, +# np.int32, np.uint32, np.int64, np.uint64] +# func = np.random.randint +# high = np.array([1]) +# low = np.array([0]) +# +# for dt in itype: +# out = func(low, high, dtype=dt) +# self.assert_equal(out.shape, self.tgtShape) +# +# out = func(low[0], high, dtype=dt) +# self.assert_equal(out.shape, self.tgtShape) +# +# out = func(low, high[0], dtype=dt) +# self.assert_equal(out.shape, self.tgtShape) + + def test_three_arg_funcs(self): + funcs = [np.random.noncentral_f, np.random.triangular, + np.random.hypergeometric] + + for func in funcs: + out = func(self.argOne, self.argTwo, self.argThree) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne[0], self.argTwo, self.argThree) + assert_equal(out.shape, self.tgtShape) + + out = func(self.argOne, self.argTwo[0], self.argThree) + assert_equal(out.shape, self.tgtShape) diff --git a/contrib/python/numpy/py2/numpy/random/tests/test_regression.py b/contrib/python/numpy/py2/numpy/random/tests/test_regression.py new file mode 100644 index 0000000000..ca9bbbc719 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/random/tests/test_regression.py @@ -0,0 +1,157 @@ +from __future__ import division, absolute_import, print_function + +import sys +from numpy.testing import ( + assert_, assert_array_equal, assert_raises, + ) +from numpy import random +from numpy.compat import long +import numpy as np + + +class TestRegression(object): + + def test_VonMises_range(self): + # Make sure generated random variables are in [-pi, pi]. + # Regression test for ticket #986. + for mu in np.linspace(-7., 7., 5): + r = random.mtrand.vonmises(mu, 1, 50) + assert_(np.all(r > -np.pi) and np.all(r <= np.pi)) + + def test_hypergeometric_range(self): + # Test for ticket #921 + assert_(np.all(np.random.hypergeometric(3, 18, 11, size=10) < 4)) + assert_(np.all(np.random.hypergeometric(18, 3, 11, size=10) > 0)) + + # Test for ticket #5623 + args = [ + (2**20 - 2, 2**20 - 2, 2**20 - 2), # Check for 32-bit systems + ] + is_64bits = sys.maxsize > 2**32 + if is_64bits and sys.platform != 'win32': + args.append((2**40 - 2, 2**40 - 2, 2**40 - 2)) # Check for 64-bit systems + for arg in args: + assert_(np.random.hypergeometric(*arg) > 0) + + def test_logseries_convergence(self): + # Test for ticket #923 + N = 1000 + np.random.seed(0) + rvsn = np.random.logseries(0.8, size=N) + # these two frequency counts should be close to theoretical + # numbers with this large sample + # theoretical large N result is 0.49706795 + freq = np.sum(rvsn == 1) / float(N) + msg = "Frequency was %f, should be > 0.45" % freq + assert_(freq > 0.45, msg) + # theoretical large N result is 0.19882718 + freq = np.sum(rvsn == 2) / float(N) + msg = "Frequency was %f, should be < 0.23" % freq + assert_(freq < 0.23, msg) + + def test_permutation_longs(self): + np.random.seed(1234) + a = np.random.permutation(12) + np.random.seed(1234) + b = np.random.permutation(long(12)) + assert_array_equal(a, b) + + def test_shuffle_mixed_dimension(self): + # Test for trac ticket #2074 + for t in [[1, 2, 3, None], + [(1, 1), (2, 2), (3, 3), None], + [1, (2, 2), (3, 3), None], + [(1, 1), 2, 3, None]]: + np.random.seed(12345) + shuffled = list(t) + random.shuffle(shuffled) + assert_array_equal(shuffled, [t[0], t[3], t[1], t[2]]) + + def test_call_within_randomstate(self): + # Check that custom RandomState does not call into global state + m = np.random.RandomState() + res = np.array([0, 8, 7, 2, 1, 9, 4, 7, 0, 3]) + for i in range(3): + np.random.seed(i) + m.seed(4321) + # If m.state is not honored, the result will change + assert_array_equal(m.choice(10, size=10, p=np.ones(10)/10.), res) + + def test_multivariate_normal_size_types(self): + # Test for multivariate_normal issue with 'size' argument. + # Check that the multivariate_normal size argument can be a + # numpy integer. + np.random.multivariate_normal([0], [[0]], size=1) + np.random.multivariate_normal([0], [[0]], size=np.int_(1)) + np.random.multivariate_normal([0], [[0]], size=np.int64(1)) + + def test_beta_small_parameters(self): + # Test that beta with small a and b parameters does not produce + # NaNs due to roundoff errors causing 0 / 0, gh-5851 + np.random.seed(1234567890) + x = np.random.beta(0.0001, 0.0001, size=100) + assert_(not np.any(np.isnan(x)), 'Nans in np.random.beta') + + def test_choice_sum_of_probs_tolerance(self): + # The sum of probs should be 1.0 with some tolerance. + # For low precision dtypes the tolerance was too tight. + # See numpy github issue 6123. + np.random.seed(1234) + a = [1, 2, 3] + counts = [4, 4, 2] + for dt in np.float16, np.float32, np.float64: + probs = np.array(counts, dtype=dt) / sum(counts) + c = np.random.choice(a, p=probs) + assert_(c in a) + assert_raises(ValueError, np.random.choice, a, p=probs*0.9) + + def test_shuffle_of_array_of_different_length_strings(self): + # Test that permuting an array of different length strings + # will not cause a segfault on garbage collection + # Tests gh-7710 + np.random.seed(1234) + + a = np.array(['a', 'a' * 1000]) + + for _ in range(100): + np.random.shuffle(a) + + # Force Garbage Collection - should not segfault. + import gc + gc.collect() + + def test_shuffle_of_array_of_objects(self): + # Test that permuting an array of objects will not cause + # a segfault on garbage collection. + # See gh-7719 + np.random.seed(1234) + a = np.array([np.arange(1), np.arange(4)]) + + for _ in range(1000): + np.random.shuffle(a) + + # Force Garbage Collection - should not segfault. + import gc + gc.collect() + + def test_permutation_subclass(self): + class N(np.ndarray): + pass + + np.random.seed(1) + orig = np.arange(3).view(N) + perm = np.random.permutation(orig) + assert_array_equal(perm, np.array([0, 2, 1])) + assert_array_equal(orig, np.arange(3).view(N)) + + class M(object): + a = np.arange(5) + + def __array__(self): + return self.a + + np.random.seed(1) + m = M() + perm = np.random.permutation(m) + assert_array_equal(perm, np.array([2, 1, 4, 0, 3])) + assert_array_equal(m.__array__(), np.arange(5)) diff --git a/contrib/python/numpy/py2/numpy/setup.py b/contrib/python/numpy/py2/numpy/setup.py new file mode 100644 index 0000000000..4ccdaeea5e --- /dev/null +++ b/contrib/python/numpy/py2/numpy/setup.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +from __future__ import division, print_function + + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('numpy', parent_package, top_path) + + config.add_subpackage('compat') + config.add_subpackage('core') + config.add_subpackage('distutils') + config.add_subpackage('doc') + config.add_subpackage('f2py') + config.add_subpackage('fft') + config.add_subpackage('lib') + config.add_subpackage('linalg') + config.add_subpackage('ma') + config.add_subpackage('matrixlib') + config.add_subpackage('polynomial') + config.add_subpackage('random') + config.add_subpackage('testing') + config.add_data_dir('doc') + config.add_data_dir('tests') + config.make_config_py() # installs __config__.py + return config + +if __name__ == '__main__': + print('This is the wrong setup.py file to run') diff --git a/contrib/python/numpy/py2/numpy/testing/setup.py b/contrib/python/numpy/py2/numpy/testing/setup.py new file mode 100755 index 0000000000..7c3f2fbdfd --- /dev/null +++ b/contrib/python/numpy/py2/numpy/testing/setup.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +from __future__ import division, print_function + + +def configuration(parent_package='',top_path=None): + from numpy.distutils.misc_util import Configuration + config = Configuration('testing', parent_package, top_path) + + config.add_subpackage('_private') + config.add_data_dir('tests') + return config + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(maintainer="NumPy Developers", + maintainer_email="numpy-dev@numpy.org", + description="NumPy test module", + url="https://www.numpy.org", + license="NumPy License (BSD Style)", + configuration=configuration, + ) diff --git a/contrib/python/numpy/py2/numpy/testing/tests/__init__.py b/contrib/python/numpy/py2/numpy/testing/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/testing/tests/test_decorators.py b/contrib/python/numpy/py2/numpy/testing/tests/test_decorators.py new file mode 100644 index 0000000000..c029bf90c1 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/testing/tests/test_decorators.py @@ -0,0 +1,216 @@ +""" +Test the decorators from ``testing.decorators``. + +""" +from __future__ import division, absolute_import, print_function + +import warnings +import pytest + +from numpy.testing import ( + assert_, assert_raises, dec, SkipTest, KnownFailureException, + ) + + +try: + with warnings.catch_warnings(): + warnings.simplefilter("always") + import nose # noqa: F401 +except ImportError: + HAVE_NOSE = False +else: + HAVE_NOSE = True + + +@pytest.mark.skipif(not HAVE_NOSE, reason="Needs nose") +class TestNoseDecorators(object): + # These tests are run in a class for simplicity while still + # getting a report on each, skipped or success. + + class DidntSkipException(Exception): + pass + + def test_slow(self): + @dec.slow + def slow_func(x, y, z): + pass + + assert_(slow_func.slow) + + def test_setastest(self): + @dec.setastest() + def f_default(a): + pass + + @dec.setastest(True) + def f_istest(a): + pass + + @dec.setastest(False) + def f_isnottest(a): + pass + + assert_(f_default.__test__) + assert_(f_istest.__test__) + assert_(not f_isnottest.__test__) + + def test_skip_functions_hardcoded(self): + @dec.skipif(True) + def f1(x): + raise self.DidntSkipException + + try: + f1('a') + except self.DidntSkipException: + raise Exception('Failed to skip') + except SkipTest().__class__: + pass + + @dec.skipif(False) + def f2(x): + raise self.DidntSkipException + + try: + f2('a') + except self.DidntSkipException: + pass + except SkipTest().__class__: + raise Exception('Skipped when not expected to') + + def test_skip_functions_callable(self): + def skip_tester(): + return skip_flag == 'skip me!' + + @dec.skipif(skip_tester) + def f1(x): + raise self.DidntSkipException + + try: + skip_flag = 'skip me!' + f1('a') + except self.DidntSkipException: + raise Exception('Failed to skip') + except SkipTest().__class__: + pass + + @dec.skipif(skip_tester) + def f2(x): + raise self.DidntSkipException + + try: + skip_flag = 'five is right out!' + f2('a') + except self.DidntSkipException: + pass + except SkipTest().__class__: + raise Exception('Skipped when not expected to') + + def test_skip_generators_hardcoded(self): + @dec.knownfailureif(True, "This test is known to fail") + def g1(x): + for i in range(x): + yield i + + try: + for j in g1(10): + pass + except KnownFailureException().__class__: + pass + else: + raise Exception('Failed to mark as known failure') + + @dec.knownfailureif(False, "This test is NOT known to fail") + def g2(x): + for i in range(x): + yield i + raise self.DidntSkipException('FAIL') + + try: + for j in g2(10): + pass + except KnownFailureException().__class__: + raise Exception('Marked incorrectly as known failure') + except self.DidntSkipException: + pass + + def test_skip_generators_callable(self): + def skip_tester(): + return skip_flag == 'skip me!' + + @dec.knownfailureif(skip_tester, "This test is known to fail") + def g1(x): + for i in range(x): + yield i + + try: + skip_flag = 'skip me!' + for j in g1(10): + pass + except KnownFailureException().__class__: + pass + else: + raise Exception('Failed to mark as known failure') + + @dec.knownfailureif(skip_tester, "This test is NOT known to fail") + def g2(x): + for i in range(x): + yield i + raise self.DidntSkipException('FAIL') + + try: + skip_flag = 'do not skip' + for j in g2(10): + pass + except KnownFailureException().__class__: + raise Exception('Marked incorrectly as known failure') + except self.DidntSkipException: + pass + + def test_deprecated(self): + @dec.deprecated(True) + def non_deprecated_func(): + pass + + @dec.deprecated() + def deprecated_func(): + import warnings + warnings.warn("TEST: deprecated func", DeprecationWarning) + + @dec.deprecated() + def deprecated_func2(): + import warnings + warnings.warn("AHHHH") + raise ValueError + + @dec.deprecated() + def deprecated_func3(): + import warnings + warnings.warn("AHHHH") + + # marked as deprecated, but does not raise DeprecationWarning + assert_raises(AssertionError, non_deprecated_func) + # should be silent + deprecated_func() + with warnings.catch_warnings(record=True): + warnings.simplefilter("always") # do not propagate unrelated warnings + # fails if deprecated decorator just disables test. See #1453. + assert_raises(ValueError, deprecated_func2) + # warning is not a DeprecationWarning + assert_raises(AssertionError, deprecated_func3) + + def test_parametrize(self): + # dec.parametrize assumes that it is being run by nose. Because + # we are running under pytest, we need to explicitly check the + # results. + @dec.parametrize('base, power, expected', + [(1, 1, 1), + (2, 1, 2), + (2, 2, 4)]) + def check_parametrize(base, power, expected): + assert_(base**power == expected) + + count = 0 + for test in check_parametrize(): + test[0](*test[1:]) + count += 1 + assert_(count == 3) diff --git a/contrib/python/numpy/py2/numpy/testing/tests/test_doctesting.py b/contrib/python/numpy/py2/numpy/testing/tests/test_doctesting.py new file mode 100644 index 0000000000..b77cd93e0b --- /dev/null +++ b/contrib/python/numpy/py2/numpy/testing/tests/test_doctesting.py @@ -0,0 +1,59 @@ +""" Doctests for NumPy-specific nose/doctest modifications + +""" +from __future__ import division, absolute_import, print_function + +#FIXME: None of these tests is run, because 'check' is not a recognized +# testing prefix. + +# try the #random directive on the output line +def check_random_directive(): + ''' + >>> 2+2 + <BadExample object at 0x084D05AC> #random: may vary on your system + ''' + +# check the implicit "import numpy as np" +def check_implicit_np(): + ''' + >>> np.array([1,2,3]) + array([1, 2, 3]) + ''' + +# there's some extraneous whitespace around the correct responses +def check_whitespace_enabled(): + ''' + # whitespace after the 3 + >>> 1+2 + 3 + + # whitespace before the 7 + >>> 3+4 + 7 + ''' + +def check_empty_output(): + """ Check that no output does not cause an error. + + This is related to nose bug 445; the numpy plugin changed the + doctest-result-variable default and therefore hit this bug: + http://code.google.com/p/python-nose/issues/detail?id=445 + + >>> a = 10 + """ + +def check_skip(): + """ Check skip directive + + The test below should not run + + >>> 1/0 #doctest: +SKIP + """ + + +if __name__ == '__main__': + # Run tests outside numpy test rig + import nose + from numpy.testing.noseclasses import NumpyDoctest + argv = ['', __file__, '--with-numpydoctest'] + nose.core.TestProgram(argv=argv, addplugins=[NumpyDoctest()]) diff --git a/contrib/python/numpy/py2/numpy/testing/tests/test_utils.py b/contrib/python/numpy/py2/numpy/testing/tests/test_utils.py new file mode 100644 index 0000000000..7f6cbb8fe7 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/testing/tests/test_utils.py @@ -0,0 +1,1597 @@ +from __future__ import division, absolute_import, print_function + +import warnings +import sys +import os +import itertools +import textwrap +import pytest +import weakref + +import numpy as np +from numpy.testing import ( + assert_equal, assert_array_equal, assert_almost_equal, + assert_array_almost_equal, assert_array_less, build_err_msg, raises, + assert_raises, assert_warns, assert_no_warnings, assert_allclose, + assert_approx_equal, assert_array_almost_equal_nulp, assert_array_max_ulp, + clear_and_catch_warnings, suppress_warnings, assert_string_equal, assert_, + tempdir, temppath, assert_no_gc_cycles, HAS_REFCOUNT + ) +from numpy.core.overrides import ENABLE_ARRAY_FUNCTION + + +class _GenericTest(object): + + def _test_equal(self, a, b): + self._assert_func(a, b) + + def _test_not_equal(self, a, b): + with assert_raises(AssertionError): + self._assert_func(a, b) + + def test_array_rank1_eq(self): + """Test two equal array of rank 1 are found equal.""" + a = np.array([1, 2]) + b = np.array([1, 2]) + + self._test_equal(a, b) + + def test_array_rank1_noteq(self): + """Test two different array of rank 1 are found not equal.""" + a = np.array([1, 2]) + b = np.array([2, 2]) + + self._test_not_equal(a, b) + + def test_array_rank2_eq(self): + """Test two equal array of rank 2 are found equal.""" + a = np.array([[1, 2], [3, 4]]) + b = np.array([[1, 2], [3, 4]]) + + self._test_equal(a, b) + + def test_array_diffshape(self): + """Test two arrays with different shapes are found not equal.""" + a = np.array([1, 2]) + b = np.array([[1, 2], [1, 2]]) + + self._test_not_equal(a, b) + + def test_objarray(self): + """Test object arrays.""" + a = np.array([1, 1], dtype=object) + self._test_equal(a, 1) + + def test_array_likes(self): + self._test_equal([1, 2, 3], (1, 2, 3)) + + +class TestArrayEqual(_GenericTest): + + def setup(self): + self._assert_func = assert_array_equal + + def test_generic_rank1(self): + """Test rank 1 array for all dtypes.""" + def foo(t): + a = np.empty(2, t) + a.fill(1) + b = a.copy() + c = a.copy() + c.fill(0) + self._test_equal(a, b) + self._test_not_equal(c, b) + + # Test numeric types and object + for t in '?bhilqpBHILQPfdgFDG': + foo(t) + + # Test strings + for t in ['S1', 'U1']: + foo(t) + + def test_0_ndim_array(self): + x = np.array(473963742225900817127911193656584771) + y = np.array(18535119325151578301457182298393896) + assert_raises(AssertionError, self._assert_func, x, y) + + y = x + self._assert_func(x, y) + + x = np.array(43) + y = np.array(10) + assert_raises(AssertionError, self._assert_func, x, y) + + y = x + self._assert_func(x, y) + + def test_generic_rank3(self): + """Test rank 3 array for all dtypes.""" + def foo(t): + a = np.empty((4, 2, 3), t) + a.fill(1) + b = a.copy() + c = a.copy() + c.fill(0) + self._test_equal(a, b) + self._test_not_equal(c, b) + + # Test numeric types and object + for t in '?bhilqpBHILQPfdgFDG': + foo(t) + + # Test strings + for t in ['S1', 'U1']: + foo(t) + + def test_nan_array(self): + """Test arrays with nan values in them.""" + a = np.array([1, 2, np.nan]) + b = np.array([1, 2, np.nan]) + + self._test_equal(a, b) + + c = np.array([1, 2, 3]) + self._test_not_equal(c, b) + + def test_string_arrays(self): + """Test two arrays with different shapes are found not equal.""" + a = np.array(['floupi', 'floupa']) + b = np.array(['floupi', 'floupa']) + + self._test_equal(a, b) + + c = np.array(['floupipi', 'floupa']) + + self._test_not_equal(c, b) + + def test_recarrays(self): + """Test record arrays.""" + a = np.empty(2, [('floupi', float), ('floupa', float)]) + a['floupi'] = [1, 2] + a['floupa'] = [1, 2] + b = a.copy() + + self._test_equal(a, b) + + c = np.empty(2, [('floupipi', float), ('floupa', float)]) + c['floupipi'] = a['floupi'].copy() + c['floupa'] = a['floupa'].copy() + + with suppress_warnings() as sup: + l = sup.record(FutureWarning, message="elementwise == ") + self._test_not_equal(c, b) + assert_equal(len(l), 1) + + def test_masked_nan_inf(self): + # Regression test for gh-11121 + a = np.ma.MaskedArray([3., 4., 6.5], mask=[False, True, False]) + b = np.array([3., np.nan, 6.5]) + self._test_equal(a, b) + self._test_equal(b, a) + a = np.ma.MaskedArray([3., 4., 6.5], mask=[True, False, False]) + b = np.array([np.inf, 4., 6.5]) + self._test_equal(a, b) + self._test_equal(b, a) + + def test_subclass_that_overrides_eq(self): + # While we cannot guarantee testing functions will always work for + # subclasses, the tests should ideally rely only on subclasses having + # comparison operators, not on them being able to store booleans + # (which, e.g., astropy Quantity cannot usefully do). See gh-8452. + class MyArray(np.ndarray): + def __eq__(self, other): + return bool(np.equal(self, other).all()) + + def __ne__(self, other): + return not self == other + + a = np.array([1., 2.]).view(MyArray) + b = np.array([2., 3.]).view(MyArray) + assert_(type(a == a), bool) + assert_(a == a) + assert_(a != b) + self._test_equal(a, a) + self._test_not_equal(a, b) + self._test_not_equal(b, a) + + @pytest.mark.skipif( + not ENABLE_ARRAY_FUNCTION, reason='requires __array_function__') + def test_subclass_that_does_not_implement_npall(self): + class MyArray(np.ndarray): + def __array_function__(self, *args, **kwargs): + return NotImplemented + + a = np.array([1., 2.]).view(MyArray) + b = np.array([2., 3.]).view(MyArray) + with assert_raises(TypeError): + np.all(a) + self._test_equal(a, a) + self._test_not_equal(a, b) + self._test_not_equal(b, a) + + +class TestBuildErrorMessage(object): + + def test_build_err_msg_defaults(self): + x = np.array([1.00001, 2.00002, 3.00003]) + y = np.array([1.00002, 2.00003, 3.00004]) + err_msg = 'There is a mismatch' + + a = build_err_msg([x, y], err_msg) + b = ('\nItems are not equal: There is a mismatch\n ACTUAL: array([' + '1.00001, 2.00002, 3.00003])\n DESIRED: array([1.00002, ' + '2.00003, 3.00004])') + assert_equal(a, b) + + def test_build_err_msg_no_verbose(self): + x = np.array([1.00001, 2.00002, 3.00003]) + y = np.array([1.00002, 2.00003, 3.00004]) + err_msg = 'There is a mismatch' + + a = build_err_msg([x, y], err_msg, verbose=False) + b = '\nItems are not equal: There is a mismatch' + assert_equal(a, b) + + def test_build_err_msg_custom_names(self): + x = np.array([1.00001, 2.00002, 3.00003]) + y = np.array([1.00002, 2.00003, 3.00004]) + err_msg = 'There is a mismatch' + + a = build_err_msg([x, y], err_msg, names=('FOO', 'BAR')) + b = ('\nItems are not equal: There is a mismatch\n FOO: array([' + '1.00001, 2.00002, 3.00003])\n BAR: array([1.00002, 2.00003, ' + '3.00004])') + assert_equal(a, b) + + def test_build_err_msg_custom_precision(self): + x = np.array([1.000000001, 2.00002, 3.00003]) + y = np.array([1.000000002, 2.00003, 3.00004]) + err_msg = 'There is a mismatch' + + a = build_err_msg([x, y], err_msg, precision=10) + b = ('\nItems are not equal: There is a mismatch\n ACTUAL: array([' + '1.000000001, 2.00002 , 3.00003 ])\n DESIRED: array([' + '1.000000002, 2.00003 , 3.00004 ])') + assert_equal(a, b) + + +class TestEqual(TestArrayEqual): + + def setup(self): + self._assert_func = assert_equal + + def test_nan_items(self): + self._assert_func(np.nan, np.nan) + self._assert_func([np.nan], [np.nan]) + self._test_not_equal(np.nan, [np.nan]) + self._test_not_equal(np.nan, 1) + + def test_inf_items(self): + self._assert_func(np.inf, np.inf) + self._assert_func([np.inf], [np.inf]) + self._test_not_equal(np.inf, [np.inf]) + + def test_datetime(self): + self._test_equal( + np.datetime64("2017-01-01", "s"), + np.datetime64("2017-01-01", "s") + ) + self._test_equal( + np.datetime64("2017-01-01", "s"), + np.datetime64("2017-01-01", "m") + ) + + # gh-10081 + self._test_not_equal( + np.datetime64("2017-01-01", "s"), + np.datetime64("2017-01-02", "s") + ) + self._test_not_equal( + np.datetime64("2017-01-01", "s"), + np.datetime64("2017-01-02", "m") + ) + + def test_nat_items(self): + # not a datetime + nadt_no_unit = np.datetime64("NaT") + nadt_s = np.datetime64("NaT", "s") + nadt_d = np.datetime64("NaT", "ns") + # not a timedelta + natd_no_unit = np.timedelta64("NaT") + natd_s = np.timedelta64("NaT", "s") + natd_d = np.timedelta64("NaT", "ns") + + dts = [nadt_no_unit, nadt_s, nadt_d] + tds = [natd_no_unit, natd_s, natd_d] + for a, b in itertools.product(dts, dts): + self._assert_func(a, b) + self._assert_func([a], [b]) + self._test_not_equal([a], b) + + for a, b in itertools.product(tds, tds): + self._assert_func(a, b) + self._assert_func([a], [b]) + self._test_not_equal([a], b) + + for a, b in itertools.product(tds, dts): + self._test_not_equal(a, b) + self._test_not_equal(a, [b]) + self._test_not_equal([a], [b]) + self._test_not_equal([a], np.datetime64("2017-01-01", "s")) + self._test_not_equal([b], np.datetime64("2017-01-01", "s")) + self._test_not_equal([a], np.timedelta64(123, "s")) + self._test_not_equal([b], np.timedelta64(123, "s")) + + def test_non_numeric(self): + self._assert_func('ab', 'ab') + self._test_not_equal('ab', 'abb') + + def test_complex_item(self): + self._assert_func(complex(1, 2), complex(1, 2)) + self._assert_func(complex(1, np.nan), complex(1, np.nan)) + self._test_not_equal(complex(1, np.nan), complex(1, 2)) + self._test_not_equal(complex(np.nan, 1), complex(1, np.nan)) + self._test_not_equal(complex(np.nan, np.inf), complex(np.nan, 2)) + + def test_negative_zero(self): + self._test_not_equal(np.PZERO, np.NZERO) + + def test_complex(self): + x = np.array([complex(1, 2), complex(1, np.nan)]) + y = np.array([complex(1, 2), complex(1, 2)]) + self._assert_func(x, x) + self._test_not_equal(x, y) + + def test_error_message(self): + with pytest.raises(AssertionError) as exc_info: + self._assert_func(np.array([1, 2]), np.array([[1, 2]])) + msg = str(exc_info.value) + msg2 = msg.replace("shapes (2L,), (1L, 2L)", "shapes (2,), (1, 2)") + msg_reference = textwrap.dedent("""\ + + Arrays are not equal + + (shapes (2,), (1, 2) mismatch) + x: array([1, 2]) + y: array([[1, 2]])""") + + try: + assert_equal(msg, msg_reference) + except AssertionError: + assert_equal(msg2, msg_reference) + + def test_object(self): + #gh-12942 + import datetime + a = np.array([datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 2)]) + self._test_not_equal(a, a[::-1]) + + +class TestArrayAlmostEqual(_GenericTest): + + def setup(self): + self._assert_func = assert_array_almost_equal + + def test_closeness(self): + # Note that in the course of time we ended up with + # `abs(x - y) < 1.5 * 10**(-decimal)` + # instead of the previously documented + # `abs(x - y) < 0.5 * 10**(-decimal)` + # so this check serves to preserve the wrongness. + + # test scalars + self._assert_func(1.499999, 0.0, decimal=0) + assert_raises(AssertionError, + lambda: self._assert_func(1.5, 0.0, decimal=0)) + + # test arrays + self._assert_func([1.499999], [0.0], decimal=0) + assert_raises(AssertionError, + lambda: self._assert_func([1.5], [0.0], decimal=0)) + + def test_simple(self): + x = np.array([1234.2222]) + y = np.array([1234.2223]) + + self._assert_func(x, y, decimal=3) + self._assert_func(x, y, decimal=4) + assert_raises(AssertionError, + lambda: self._assert_func(x, y, decimal=5)) + + def test_nan(self): + anan = np.array([np.nan]) + aone = np.array([1]) + ainf = np.array([np.inf]) + self._assert_func(anan, anan) + assert_raises(AssertionError, + lambda: self._assert_func(anan, aone)) + assert_raises(AssertionError, + lambda: self._assert_func(anan, ainf)) + assert_raises(AssertionError, + lambda: self._assert_func(ainf, anan)) + + def test_inf(self): + a = np.array([[1., 2.], [3., 4.]]) + b = a.copy() + a[0, 0] = np.inf + assert_raises(AssertionError, + lambda: self._assert_func(a, b)) + b[0, 0] = -np.inf + assert_raises(AssertionError, + lambda: self._assert_func(a, b)) + + def test_subclass(self): + a = np.array([[1., 2.], [3., 4.]]) + b = np.ma.masked_array([[1., 2.], [0., 4.]], + [[False, False], [True, False]]) + self._assert_func(a, b) + self._assert_func(b, a) + self._assert_func(b, b) + + # Test fully masked as well (see gh-11123). + a = np.ma.MaskedArray(3.5, mask=True) + b = np.array([3., 4., 6.5]) + self._test_equal(a, b) + self._test_equal(b, a) + a = np.ma.masked + b = np.array([3., 4., 6.5]) + self._test_equal(a, b) + self._test_equal(b, a) + a = np.ma.MaskedArray([3., 4., 6.5], mask=[True, True, True]) + b = np.array([1., 2., 3.]) + self._test_equal(a, b) + self._test_equal(b, a) + a = np.ma.MaskedArray([3., 4., 6.5], mask=[True, True, True]) + b = np.array(1.) + self._test_equal(a, b) + self._test_equal(b, a) + + def test_subclass_that_cannot_be_bool(self): + # While we cannot guarantee testing functions will always work for + # subclasses, the tests should ideally rely only on subclasses having + # comparison operators, not on them being able to store booleans + # (which, e.g., astropy Quantity cannot usefully do). See gh-8452. + class MyArray(np.ndarray): + def __eq__(self, other): + return super(MyArray, self).__eq__(other).view(np.ndarray) + + def __lt__(self, other): + return super(MyArray, self).__lt__(other).view(np.ndarray) + + def all(self, *args, **kwargs): + raise NotImplementedError + + a = np.array([1., 2.]).view(MyArray) + self._assert_func(a, a) + + +class TestAlmostEqual(_GenericTest): + + def setup(self): + self._assert_func = assert_almost_equal + + def test_closeness(self): + # Note that in the course of time we ended up with + # `abs(x - y) < 1.5 * 10**(-decimal)` + # instead of the previously documented + # `abs(x - y) < 0.5 * 10**(-decimal)` + # so this check serves to preserve the wrongness. + + # test scalars + self._assert_func(1.499999, 0.0, decimal=0) + assert_raises(AssertionError, + lambda: self._assert_func(1.5, 0.0, decimal=0)) + + # test arrays + self._assert_func([1.499999], [0.0], decimal=0) + assert_raises(AssertionError, + lambda: self._assert_func([1.5], [0.0], decimal=0)) + + def test_nan_item(self): + self._assert_func(np.nan, np.nan) + assert_raises(AssertionError, + lambda: self._assert_func(np.nan, 1)) + assert_raises(AssertionError, + lambda: self._assert_func(np.nan, np.inf)) + assert_raises(AssertionError, + lambda: self._assert_func(np.inf, np.nan)) + + def test_inf_item(self): + self._assert_func(np.inf, np.inf) + self._assert_func(-np.inf, -np.inf) + assert_raises(AssertionError, + lambda: self._assert_func(np.inf, 1)) + assert_raises(AssertionError, + lambda: self._assert_func(-np.inf, np.inf)) + + def test_simple_item(self): + self._test_not_equal(1, 2) + + def test_complex_item(self): + self._assert_func(complex(1, 2), complex(1, 2)) + self._assert_func(complex(1, np.nan), complex(1, np.nan)) + self._assert_func(complex(np.inf, np.nan), complex(np.inf, np.nan)) + self._test_not_equal(complex(1, np.nan), complex(1, 2)) + self._test_not_equal(complex(np.nan, 1), complex(1, np.nan)) + self._test_not_equal(complex(np.nan, np.inf), complex(np.nan, 2)) + + def test_complex(self): + x = np.array([complex(1, 2), complex(1, np.nan)]) + z = np.array([complex(1, 2), complex(np.nan, 1)]) + y = np.array([complex(1, 2), complex(1, 2)]) + self._assert_func(x, x) + self._test_not_equal(x, y) + self._test_not_equal(x, z) + + def test_error_message(self): + """Check the message is formatted correctly for the decimal value. + Also check the message when input includes inf or nan (gh12200)""" + x = np.array([1.00000000001, 2.00000000002, 3.00003]) + y = np.array([1.00000000002, 2.00000000003, 3.00004]) + + # Test with a different amount of decimal digits + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y, decimal=12) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 3 / 3 (100%)') + assert_equal(msgs[4], 'Max absolute difference: 1.e-05') + assert_equal(msgs[5], 'Max relative difference: 3.33328889e-06') + assert_equal( + msgs[6], + ' x: array([1.00000000001, 2.00000000002, 3.00003 ])') + assert_equal( + msgs[7], + ' y: array([1.00000000002, 2.00000000003, 3.00004 ])') + + # With the default value of decimal digits, only the 3rd element + # differs. Note that we only check for the formatting of the arrays + # themselves. + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 1 / 3 (33.3%)') + assert_equal(msgs[4], 'Max absolute difference: 1.e-05') + assert_equal(msgs[5], 'Max relative difference: 3.33328889e-06') + assert_equal(msgs[6], ' x: array([1. , 2. , 3.00003])') + assert_equal(msgs[7], ' y: array([1. , 2. , 3.00004])') + + # Check the error message when input includes inf + x = np.array([np.inf, 0]) + y = np.array([np.inf, 1]) + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 1 / 2 (50%)') + assert_equal(msgs[4], 'Max absolute difference: 1.') + assert_equal(msgs[5], 'Max relative difference: 1.') + assert_equal(msgs[6], ' x: array([inf, 0.])') + assert_equal(msgs[7], ' y: array([inf, 1.])') + + # Check the error message when dividing by zero + x = np.array([1, 2]) + y = np.array([0, 0]) + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 2 / 2 (100%)') + assert_equal(msgs[4], 'Max absolute difference: 2') + assert_equal(msgs[5], 'Max relative difference: inf') + + def test_error_message_2(self): + """Check the message is formatted correctly when either x or y is a scalar.""" + x = 2 + y = np.ones(20) + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 20 / 20 (100%)') + assert_equal(msgs[4], 'Max absolute difference: 1.') + assert_equal(msgs[5], 'Max relative difference: 1.') + + y = 2 + x = np.ones(20) + with pytest.raises(AssertionError) as exc_info: + self._assert_func(x, y) + msgs = str(exc_info.value).split('\n') + assert_equal(msgs[3], 'Mismatched elements: 20 / 20 (100%)') + assert_equal(msgs[4], 'Max absolute difference: 1.') + assert_equal(msgs[5], 'Max relative difference: 0.5') + + def test_subclass_that_cannot_be_bool(self): + # While we cannot guarantee testing functions will always work for + # subclasses, the tests should ideally rely only on subclasses having + # comparison operators, not on them being able to store booleans + # (which, e.g., astropy Quantity cannot usefully do). See gh-8452. + class MyArray(np.ndarray): + def __eq__(self, other): + return super(MyArray, self).__eq__(other).view(np.ndarray) + + def __lt__(self, other): + return super(MyArray, self).__lt__(other).view(np.ndarray) + + def all(self, *args, **kwargs): + raise NotImplementedError + + a = np.array([1., 2.]).view(MyArray) + self._assert_func(a, a) + + +class TestApproxEqual(object): + + def setup(self): + self._assert_func = assert_approx_equal + + def test_simple_0d_arrays(self): + x = np.array(1234.22) + y = np.array(1234.23) + + self._assert_func(x, y, significant=5) + self._assert_func(x, y, significant=6) + assert_raises(AssertionError, + lambda: self._assert_func(x, y, significant=7)) + + def test_simple_items(self): + x = 1234.22 + y = 1234.23 + + self._assert_func(x, y, significant=4) + self._assert_func(x, y, significant=5) + self._assert_func(x, y, significant=6) + assert_raises(AssertionError, + lambda: self._assert_func(x, y, significant=7)) + + def test_nan_array(self): + anan = np.array(np.nan) + aone = np.array(1) + ainf = np.array(np.inf) + self._assert_func(anan, anan) + assert_raises(AssertionError, lambda: self._assert_func(anan, aone)) + assert_raises(AssertionError, lambda: self._assert_func(anan, ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, anan)) + + def test_nan_items(self): + anan = np.array(np.nan) + aone = np.array(1) + ainf = np.array(np.inf) + self._assert_func(anan, anan) + assert_raises(AssertionError, lambda: self._assert_func(anan, aone)) + assert_raises(AssertionError, lambda: self._assert_func(anan, ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, anan)) + + +class TestArrayAssertLess(object): + + def setup(self): + self._assert_func = assert_array_less + + def test_simple_arrays(self): + x = np.array([1.1, 2.2]) + y = np.array([1.2, 2.3]) + + self._assert_func(x, y) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + y = np.array([1.0, 2.3]) + + assert_raises(AssertionError, lambda: self._assert_func(x, y)) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + def test_rank2(self): + x = np.array([[1.1, 2.2], [3.3, 4.4]]) + y = np.array([[1.2, 2.3], [3.4, 4.5]]) + + self._assert_func(x, y) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + y = np.array([[1.0, 2.3], [3.4, 4.5]]) + + assert_raises(AssertionError, lambda: self._assert_func(x, y)) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + def test_rank3(self): + x = np.ones(shape=(2, 2, 2)) + y = np.ones(shape=(2, 2, 2))+1 + + self._assert_func(x, y) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + y[0, 0, 0] = 0 + + assert_raises(AssertionError, lambda: self._assert_func(x, y)) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + def test_simple_items(self): + x = 1.1 + y = 2.2 + + self._assert_func(x, y) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + y = np.array([2.2, 3.3]) + + self._assert_func(x, y) + assert_raises(AssertionError, lambda: self._assert_func(y, x)) + + y = np.array([1.0, 3.3]) + + assert_raises(AssertionError, lambda: self._assert_func(x, y)) + + def test_nan_noncompare(self): + anan = np.array(np.nan) + aone = np.array(1) + ainf = np.array(np.inf) + self._assert_func(anan, anan) + assert_raises(AssertionError, lambda: self._assert_func(aone, anan)) + assert_raises(AssertionError, lambda: self._assert_func(anan, aone)) + assert_raises(AssertionError, lambda: self._assert_func(anan, ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, anan)) + + def test_nan_noncompare_array(self): + x = np.array([1.1, 2.2, 3.3]) + anan = np.array(np.nan) + + assert_raises(AssertionError, lambda: self._assert_func(x, anan)) + assert_raises(AssertionError, lambda: self._assert_func(anan, x)) + + x = np.array([1.1, 2.2, np.nan]) + + assert_raises(AssertionError, lambda: self._assert_func(x, anan)) + assert_raises(AssertionError, lambda: self._assert_func(anan, x)) + + y = np.array([1.0, 2.0, np.nan]) + + self._assert_func(y, x) + assert_raises(AssertionError, lambda: self._assert_func(x, y)) + + def test_inf_compare(self): + aone = np.array(1) + ainf = np.array(np.inf) + + self._assert_func(aone, ainf) + self._assert_func(-ainf, aone) + self._assert_func(-ainf, ainf) + assert_raises(AssertionError, lambda: self._assert_func(ainf, aone)) + assert_raises(AssertionError, lambda: self._assert_func(aone, -ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, -ainf)) + assert_raises(AssertionError, lambda: self._assert_func(-ainf, -ainf)) + + def test_inf_compare_array(self): + x = np.array([1.1, 2.2, np.inf]) + ainf = np.array(np.inf) + + assert_raises(AssertionError, lambda: self._assert_func(x, ainf)) + assert_raises(AssertionError, lambda: self._assert_func(ainf, x)) + assert_raises(AssertionError, lambda: self._assert_func(x, -ainf)) + assert_raises(AssertionError, lambda: self._assert_func(-x, -ainf)) + assert_raises(AssertionError, lambda: self._assert_func(-ainf, -x)) + self._assert_func(-ainf, x) + + +@pytest.mark.skip(reason="The raises decorator depends on Nose") +class TestRaises(object): + + def setup(self): + class MyException(Exception): + pass + + self.e = MyException + + def raises_exception(self, e): + raise e + + def does_not_raise_exception(self): + pass + + def test_correct_catch(self): + raises(self.e)(self.raises_exception)(self.e) # raises? + + def test_wrong_exception(self): + try: + raises(self.e)(self.raises_exception)(RuntimeError) # raises? + except RuntimeError: + return + else: + raise AssertionError("should have caught RuntimeError") + + def test_catch_no_raise(self): + try: + raises(self.e)(self.does_not_raise_exception)() # raises? + except AssertionError: + return + else: + raise AssertionError("should have raised an AssertionError") + + +class TestWarns(object): + + def test_warn(self): + def f(): + warnings.warn("yo") + return 3 + + before_filters = sys.modules['warnings'].filters[:] + assert_equal(assert_warns(UserWarning, f), 3) + after_filters = sys.modules['warnings'].filters + + assert_raises(AssertionError, assert_no_warnings, f) + assert_equal(assert_no_warnings(lambda x: x, 1), 1) + + # Check that the warnings state is unchanged + assert_equal(before_filters, after_filters, + "assert_warns does not preserver warnings state") + + def test_context_manager(self): + + before_filters = sys.modules['warnings'].filters[:] + with assert_warns(UserWarning): + warnings.warn("yo") + after_filters = sys.modules['warnings'].filters + + def no_warnings(): + with assert_no_warnings(): + warnings.warn("yo") + + assert_raises(AssertionError, no_warnings) + assert_equal(before_filters, after_filters, + "assert_warns does not preserver warnings state") + + def test_warn_wrong_warning(self): + def f(): + warnings.warn("yo", DeprecationWarning) + + failed = False + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + try: + # Should raise a DeprecationWarning + assert_warns(UserWarning, f) + failed = True + except DeprecationWarning: + pass + + if failed: + raise AssertionError("wrong warning caught by assert_warn") + + +class TestAssertAllclose(object): + + def test_simple(self): + x = 1e-3 + y = 1e-9 + + assert_allclose(x, y, atol=1) + assert_raises(AssertionError, assert_allclose, x, y) + + a = np.array([x, y, x, y]) + b = np.array([x, y, x, x]) + + assert_allclose(a, b, atol=1) + assert_raises(AssertionError, assert_allclose, a, b) + + b[-1] = y * (1 + 1e-8) + assert_allclose(a, b) + assert_raises(AssertionError, assert_allclose, a, b, rtol=1e-9) + + assert_allclose(6, 10, rtol=0.5) + assert_raises(AssertionError, assert_allclose, 10, 6, rtol=0.5) + + def test_min_int(self): + a = np.array([np.iinfo(np.int_).min], dtype=np.int_) + # Should not raise: + assert_allclose(a, a) + + def test_report_fail_percentage(self): + a = np.array([1, 1, 1, 1]) + b = np.array([1, 1, 1, 2]) + + with pytest.raises(AssertionError) as exc_info: + assert_allclose(a, b) + msg = str(exc_info.value) + assert_('Mismatched elements: 1 / 4 (25%)\n' + 'Max absolute difference: 1\n' + 'Max relative difference: 0.5' in msg) + + def test_equal_nan(self): + a = np.array([np.nan]) + b = np.array([np.nan]) + # Should not raise: + assert_allclose(a, b, equal_nan=True) + + def test_not_equal_nan(self): + a = np.array([np.nan]) + b = np.array([np.nan]) + assert_raises(AssertionError, assert_allclose, a, b, equal_nan=False) + + def test_equal_nan_default(self): + # Make sure equal_nan default behavior remains unchanged. (All + # of these functions use assert_array_compare under the hood.) + # None of these should raise. + a = np.array([np.nan]) + b = np.array([np.nan]) + assert_array_equal(a, b) + assert_array_almost_equal(a, b) + assert_array_less(a, b) + assert_allclose(a, b) + + def test_report_max_relative_error(self): + a = np.array([0, 1]) + b = np.array([0, 2]) + + with pytest.raises(AssertionError) as exc_info: + assert_allclose(a, b) + msg = str(exc_info.value) + assert_('Max relative difference: 0.5' in msg) + + +class TestArrayAlmostEqualNulp(object): + + def test_float64_pass(self): + # The number of units of least precision + # In this case, use a few places above the lowest level (ie nulp=1) + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float64) + x = 10**x + x = np.r_[-x, x] + + # Addition + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + # Subtraction + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + def test_float64_fail(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float64) + x = 10**x + x = np.r_[-x, x] + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + def test_float32_pass(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float32) + x = 10**x + x = np.r_[-x, x] + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + def test_float32_fail(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float32) + x = 10**x + x = np.r_[-x, x] + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + def test_float16_pass(self): + nulp = 5 + x = np.linspace(-4, 4, 10, dtype=np.float16) + x = 10**x + x = np.r_[-x, x] + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp/2. + assert_array_almost_equal_nulp(x, y, nulp) + + def test_float16_fail(self): + nulp = 5 + x = np.linspace(-4, 4, 10, dtype=np.float16) + x = 10**x + x = np.r_[-x, x] + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + x, y, nulp) + + def test_complex128_pass(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float64) + x = 10**x + x = np.r_[-x, x] + xi = x + x*1j + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp/2. + assert_array_almost_equal_nulp(xi, x + y*1j, nulp) + assert_array_almost_equal_nulp(xi, y + x*1j, nulp) + # The test condition needs to be at least a factor of sqrt(2) smaller + # because the real and imaginary parts both change + y = x + x*eps*nulp/4. + assert_array_almost_equal_nulp(xi, y + y*1j, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp/2. + assert_array_almost_equal_nulp(xi, x + y*1j, nulp) + assert_array_almost_equal_nulp(xi, y + x*1j, nulp) + y = x - x*epsneg*nulp/4. + assert_array_almost_equal_nulp(xi, y + y*1j, nulp) + + def test_complex128_fail(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float64) + x = 10**x + x = np.r_[-x, x] + xi = x + x*1j + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, x + y*1j, nulp) + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + x*1j, nulp) + # The test condition needs to be at least a factor of sqrt(2) smaller + # because the real and imaginary parts both change + y = x + x*eps*nulp + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + y*1j, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, x + y*1j, nulp) + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + x*1j, nulp) + y = x - x*epsneg*nulp + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + y*1j, nulp) + + def test_complex64_pass(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float32) + x = 10**x + x = np.r_[-x, x] + xi = x + x*1j + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp/2. + assert_array_almost_equal_nulp(xi, x + y*1j, nulp) + assert_array_almost_equal_nulp(xi, y + x*1j, nulp) + y = x + x*eps*nulp/4. + assert_array_almost_equal_nulp(xi, y + y*1j, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp/2. + assert_array_almost_equal_nulp(xi, x + y*1j, nulp) + assert_array_almost_equal_nulp(xi, y + x*1j, nulp) + y = x - x*epsneg*nulp/4. + assert_array_almost_equal_nulp(xi, y + y*1j, nulp) + + def test_complex64_fail(self): + nulp = 5 + x = np.linspace(-20, 20, 50, dtype=np.float32) + x = 10**x + x = np.r_[-x, x] + xi = x + x*1j + + eps = np.finfo(x.dtype).eps + y = x + x*eps*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, x + y*1j, nulp) + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + x*1j, nulp) + y = x + x*eps*nulp + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + y*1j, nulp) + + epsneg = np.finfo(x.dtype).epsneg + y = x - x*epsneg*nulp*2. + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, x + y*1j, nulp) + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + x*1j, nulp) + y = x - x*epsneg*nulp + assert_raises(AssertionError, assert_array_almost_equal_nulp, + xi, y + y*1j, nulp) + + +class TestULP(object): + + def test_equal(self): + x = np.random.randn(10) + assert_array_max_ulp(x, x, maxulp=0) + + def test_single(self): + # Generate 1 + small deviation, check that adding eps gives a few UNL + x = np.ones(10).astype(np.float32) + x += 0.01 * np.random.randn(10).astype(np.float32) + eps = np.finfo(np.float32).eps + assert_array_max_ulp(x, x+eps, maxulp=20) + + def test_double(self): + # Generate 1 + small deviation, check that adding eps gives a few UNL + x = np.ones(10).astype(np.float64) + x += 0.01 * np.random.randn(10).astype(np.float64) + eps = np.finfo(np.float64).eps + assert_array_max_ulp(x, x+eps, maxulp=200) + + def test_inf(self): + for dt in [np.float32, np.float64]: + inf = np.array([np.inf]).astype(dt) + big = np.array([np.finfo(dt).max]) + assert_array_max_ulp(inf, big, maxulp=200) + + def test_nan(self): + # Test that nan is 'far' from small, tiny, inf, max and min + for dt in [np.float32, np.float64]: + if dt == np.float32: + maxulp = 1e6 + else: + maxulp = 1e12 + inf = np.array([np.inf]).astype(dt) + nan = np.array([np.nan]).astype(dt) + big = np.array([np.finfo(dt).max]) + tiny = np.array([np.finfo(dt).tiny]) + zero = np.array([np.PZERO]).astype(dt) + nzero = np.array([np.NZERO]).astype(dt) + assert_raises(AssertionError, + lambda: assert_array_max_ulp(nan, inf, + maxulp=maxulp)) + assert_raises(AssertionError, + lambda: assert_array_max_ulp(nan, big, + maxulp=maxulp)) + assert_raises(AssertionError, + lambda: assert_array_max_ulp(nan, tiny, + maxulp=maxulp)) + assert_raises(AssertionError, + lambda: assert_array_max_ulp(nan, zero, + maxulp=maxulp)) + assert_raises(AssertionError, + lambda: assert_array_max_ulp(nan, nzero, + maxulp=maxulp)) + + +class TestStringEqual(object): + def test_simple(self): + assert_string_equal("hello", "hello") + assert_string_equal("hello\nmultiline", "hello\nmultiline") + + with pytest.raises(AssertionError) as exc_info: + assert_string_equal("foo\nbar", "hello\nbar") + msg = str(exc_info.value) + assert_equal(msg, "Differences in strings:\n- foo\n+ hello") + + assert_raises(AssertionError, + lambda: assert_string_equal("foo", "hello")) + + def test_regex(self): + assert_string_equal("a+*b", "a+*b") + + assert_raises(AssertionError, + lambda: assert_string_equal("aaa", "a+b")) + + +def assert_warn_len_equal(mod, n_in_context, py34=None, py37=None): + try: + mod_warns = mod.__warningregistry__ + except AttributeError: + # the lack of a __warningregistry__ + # attribute means that no warning has + # occurred; this can be triggered in + # a parallel test scenario, while in + # a serial test scenario an initial + # warning (and therefore the attribute) + # are always created first + mod_warns = {} + + num_warns = len(mod_warns) + # Python 3.4 appears to clear any pre-existing warnings of the same type, + # when raising warnings inside a catch_warnings block. So, there is a + # warning generated by the tests within the context manager, but no + # previous warnings. + if 'version' in mod_warns: + # Python 3 adds a 'version' entry to the registry, + # do not count it. + num_warns -= 1 + + # Behavior of warnings is Python version dependent. Adjust the + # expected result to compensate. In particular, Python 3.7 does + # not make an entry for ignored warnings. + if sys.version_info[:2] >= (3, 7): + if py37 is not None: + n_in_context = py37 + elif sys.version_info[:2] >= (3, 4): + if py34 is not None: + n_in_context = py34 + assert_equal(num_warns, n_in_context) + +def test_warn_len_equal_call_scenarios(): + # assert_warn_len_equal is called under + # varying circumstances depending on serial + # vs. parallel test scenarios; this test + # simply aims to probe both code paths and + # check that no assertion is uncaught + + # parallel scenario -- no warning issued yet + class mod(object): + pass + + mod_inst = mod() + + assert_warn_len_equal(mod=mod_inst, + n_in_context=0) + + # serial test scenario -- the __warningregistry__ + # attribute should be present + class mod(object): + def __init__(self): + self.__warningregistry__ = {'warning1':1, + 'warning2':2} + + mod_inst = mod() + assert_warn_len_equal(mod=mod_inst, + n_in_context=2) + + +def _get_fresh_mod(): + # Get this module, with warning registry empty + my_mod = sys.modules[__name__] + try: + my_mod.__warningregistry__.clear() + except AttributeError: + # will not have a __warningregistry__ unless warning has been + # raised in the module at some point + pass + return my_mod + + +def test_clear_and_catch_warnings(): + # Initial state of module, no warnings + my_mod = _get_fresh_mod() + assert_equal(getattr(my_mod, '__warningregistry__', {}), {}) + with clear_and_catch_warnings(modules=[my_mod]): + warnings.simplefilter('ignore') + warnings.warn('Some warning') + assert_equal(my_mod.__warningregistry__, {}) + # Without specified modules, don't clear warnings during context + # Python 3.7 catch_warnings doesn't make an entry for 'ignore'. + with clear_and_catch_warnings(): + warnings.simplefilter('ignore') + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 1, py37=0) + # Confirm that specifying module keeps old warning, does not add new + with clear_and_catch_warnings(modules=[my_mod]): + warnings.simplefilter('ignore') + warnings.warn('Another warning') + assert_warn_len_equal(my_mod, 1, py37=0) + # Another warning, no module spec does add to warnings dict, except on + # Python 3.4 (see comments in `assert_warn_len_equal`) + # Python 3.7 catch_warnings doesn't make an entry for 'ignore'. + with clear_and_catch_warnings(): + warnings.simplefilter('ignore') + warnings.warn('Another warning') + assert_warn_len_equal(my_mod, 2, py34=1, py37=0) + + +def test_suppress_warnings_module(): + # Initial state of module, no warnings + my_mod = _get_fresh_mod() + assert_equal(getattr(my_mod, '__warningregistry__', {}), {}) + + def warn_other_module(): + # Apply along axis is implemented in python; stacklevel=2 means + # we end up inside its module, not ours. + def warn(arr): + warnings.warn("Some warning 2", stacklevel=2) + return arr + np.apply_along_axis(warn, 0, [0]) + + # Test module based warning suppression: + assert_warn_len_equal(my_mod, 0) + with suppress_warnings() as sup: + sup.record(UserWarning) + # suppress warning from other module (may have .pyc ending), + # if apply_along_axis is moved, had to be changed. + sup.filter(module=np.lib.shape_base) + warnings.warn("Some warning") + warn_other_module() + # Check that the suppression did test the file correctly (this module + # got filtered) + assert_equal(len(sup.log), 1) + assert_equal(sup.log[0].message.args[0], "Some warning") + assert_warn_len_equal(my_mod, 0, py37=0) + sup = suppress_warnings() + # Will have to be changed if apply_along_axis is moved: + sup.filter(module=my_mod) + with sup: + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 0) + # And test repeat works: + sup.filter(module=my_mod) + with sup: + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 0) + + # Without specified modules, don't clear warnings during context + # Python 3.7 does not add ignored warnings. + with suppress_warnings(): + warnings.simplefilter('ignore') + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 1, py37=0) + +def test_suppress_warnings_type(): + # Initial state of module, no warnings + my_mod = _get_fresh_mod() + assert_equal(getattr(my_mod, '__warningregistry__', {}), {}) + + # Test module based warning suppression: + with suppress_warnings() as sup: + sup.filter(UserWarning) + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 0) + sup = suppress_warnings() + sup.filter(UserWarning) + with sup: + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 0) + # And test repeat works: + sup.filter(module=my_mod) + with sup: + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 0) + + # Without specified modules, don't clear warnings during context + # Python 3.7 does not add ignored warnings. + with suppress_warnings(): + warnings.simplefilter('ignore') + warnings.warn('Some warning') + assert_warn_len_equal(my_mod, 1, py37=0) + + +def test_suppress_warnings_decorate_no_record(): + sup = suppress_warnings() + sup.filter(UserWarning) + + @sup + def warn(category): + warnings.warn('Some warning', category) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + warn(UserWarning) # should be supppressed + warn(RuntimeWarning) + assert_equal(len(w), 1) + + +def test_suppress_warnings_record(): + sup = suppress_warnings() + log1 = sup.record() + + with sup: + log2 = sup.record(message='Some other warning 2') + sup.filter(message='Some warning') + warnings.warn('Some warning') + warnings.warn('Some other warning') + warnings.warn('Some other warning 2') + + assert_equal(len(sup.log), 2) + assert_equal(len(log1), 1) + assert_equal(len(log2),1) + assert_equal(log2[0].message.args[0], 'Some other warning 2') + + # Do it again, with the same context to see if some warnings survived: + with sup: + log2 = sup.record(message='Some other warning 2') + sup.filter(message='Some warning') + warnings.warn('Some warning') + warnings.warn('Some other warning') + warnings.warn('Some other warning 2') + + assert_equal(len(sup.log), 2) + assert_equal(len(log1), 1) + assert_equal(len(log2), 1) + assert_equal(log2[0].message.args[0], 'Some other warning 2') + + # Test nested: + with suppress_warnings() as sup: + sup.record() + with suppress_warnings() as sup2: + sup2.record(message='Some warning') + warnings.warn('Some warning') + warnings.warn('Some other warning') + assert_equal(len(sup2.log), 1) + assert_equal(len(sup.log), 1) + + +def test_suppress_warnings_forwarding(): + def warn_other_module(): + # Apply along axis is implemented in python; stacklevel=2 means + # we end up inside its module, not ours. + def warn(arr): + warnings.warn("Some warning", stacklevel=2) + return arr + np.apply_along_axis(warn, 0, [0]) + + with suppress_warnings() as sup: + sup.record() + with suppress_warnings("always"): + for i in range(2): + warnings.warn("Some warning") + + assert_equal(len(sup.log), 2) + + with suppress_warnings() as sup: + sup.record() + with suppress_warnings("location"): + for i in range(2): + warnings.warn("Some warning") + warnings.warn("Some warning") + + assert_equal(len(sup.log), 2) + + with suppress_warnings() as sup: + sup.record() + with suppress_warnings("module"): + for i in range(2): + warnings.warn("Some warning") + warnings.warn("Some warning") + warn_other_module() + + assert_equal(len(sup.log), 2) + + with suppress_warnings() as sup: + sup.record() + with suppress_warnings("once"): + for i in range(2): + warnings.warn("Some warning") + warnings.warn("Some other warning") + warn_other_module() + + assert_equal(len(sup.log), 2) + + +def test_tempdir(): + with tempdir() as tdir: + fpath = os.path.join(tdir, 'tmp') + with open(fpath, 'w'): + pass + assert_(not os.path.isdir(tdir)) + + raised = False + try: + with tempdir() as tdir: + raise ValueError() + except ValueError: + raised = True + assert_(raised) + assert_(not os.path.isdir(tdir)) + + +def test_temppath(): + with temppath() as fpath: + with open(fpath, 'w'): + pass + assert_(not os.path.isfile(fpath)) + + raised = False + try: + with temppath() as fpath: + raise ValueError() + except ValueError: + raised = True + assert_(raised) + assert_(not os.path.isfile(fpath)) + + +class my_cacw(clear_and_catch_warnings): + + class_modules = (sys.modules[__name__],) + + +def test_clear_and_catch_warnings_inherit(): + # Test can subclass and add default modules + my_mod = _get_fresh_mod() + with my_cacw(): + warnings.simplefilter('ignore') + warnings.warn('Some warning') + assert_equal(my_mod.__warningregistry__, {}) + + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +class TestAssertNoGcCycles(object): + """ Test assert_no_gc_cycles """ + def test_passes(self): + def no_cycle(): + b = [] + b.append([]) + return b + + with assert_no_gc_cycles(): + no_cycle() + + assert_no_gc_cycles(no_cycle) + + def test_asserts(self): + def make_cycle(): + a = [] + a.append(a) + a.append(a) + return a + + with assert_raises(AssertionError): + with assert_no_gc_cycles(): + make_cycle() + + with assert_raises(AssertionError): + assert_no_gc_cycles(make_cycle) + + @pytest.mark.slow + def test_fails(self): + """ + Test that in cases where the garbage cannot be collected, we raise an + error, instead of hanging forever trying to clear it. + """ + + class ReferenceCycleInDel(object): + """ + An object that not only contains a reference cycle, but creates new + cycles whenever it's garbage-collected and its __del__ runs + """ + make_cycle = True + + def __init__(self): + self.cycle = self + + def __del__(self): + # break the current cycle so that `self` can be freed + self.cycle = None + + if ReferenceCycleInDel.make_cycle: + # but create a new one so that the garbage collector has more + # work to do. + ReferenceCycleInDel() + + try: + w = weakref.ref(ReferenceCycleInDel()) + try: + with assert_raises(RuntimeError): + # this will be unable to get a baseline empty garbage + assert_no_gc_cycles(lambda: None) + except AssertionError: + # the above test is only necessary if the GC actually tried to free + # our object anyway, which python 2.7 does not. + if w() is not None: + pytest.skip("GC does not call __del__ on cyclic objects") + raise + + finally: + # make sure that we stop creating reference cycles + ReferenceCycleInDel.make_cycle = False diff --git a/contrib/python/numpy/py2/numpy/tests/__init__.py b/contrib/python/numpy/py2/numpy/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contrib/python/numpy/py2/numpy/tests/test_ctypeslib.py b/contrib/python/numpy/py2/numpy/tests/test_ctypeslib.py new file mode 100644 index 0000000000..521208c36d --- /dev/null +++ b/contrib/python/numpy/py2/numpy/tests/test_ctypeslib.py @@ -0,0 +1,367 @@ +from __future__ import division, absolute_import, print_function + +import sys +import pytest +import weakref + +import numpy as np +from numpy.ctypeslib import ndpointer, load_library, as_array +from numpy.distutils.misc_util import get_shared_lib_extension +from numpy.testing import assert_, assert_array_equal, assert_raises, assert_equal + +try: + import ctypes +except ImportError: + ctypes = None +else: + cdll = None + test_cdll = None + if hasattr(sys, 'gettotalrefcount'): + try: + cdll = load_library('_multiarray_umath_d', np.core._multiarray_umath.__file__) + except OSError: + pass + try: + test_cdll = load_library('_multiarray_tests', np.core._multiarray_tests.__file__) + except OSError: + pass + if cdll is None: + cdll = load_library('_multiarray_umath', np.core._multiarray_umath.__file__) + if test_cdll is None: + test_cdll = load_library('_multiarray_tests', np.core._multiarray_tests.__file__) + + c_forward_pointer = test_cdll.forward_pointer + + +@pytest.mark.skipif(ctypes is None, + reason="ctypes not available in this python") +@pytest.mark.skipif(sys.platform == 'cygwin', + reason="Known to fail on cygwin") +class TestLoadLibrary(object): + def test_basic(self): + try: + # Should succeed + load_library('_multiarray_umath', np.core._multiarray_umath.__file__) + except ImportError as e: + msg = ("ctypes is not available on this python: skipping the test" + " (import error was: %s)" % str(e)) + print(msg) + + def test_basic2(self): + # Regression for #801: load_library with a full library name + # (including extension) does not work. + try: + try: + so = get_shared_lib_extension(is_python_ext=True) + # Should succeed + load_library('_multiarray_umath%s' % so, np.core._multiarray_umath.__file__) + except ImportError: + print("No distutils available, skipping test.") + except ImportError as e: + msg = ("ctypes is not available on this python: skipping the test" + " (import error was: %s)" % str(e)) + print(msg) + + +class TestNdpointer(object): + def test_dtype(self): + dt = np.intc + p = ndpointer(dtype=dt) + assert_(p.from_param(np.array([1], dt))) + dt = '<i4' + p = ndpointer(dtype=dt) + assert_(p.from_param(np.array([1], dt))) + dt = np.dtype('>i4') + p = ndpointer(dtype=dt) + p.from_param(np.array([1], dt)) + assert_raises(TypeError, p.from_param, + np.array([1], dt.newbyteorder('swap'))) + dtnames = ['x', 'y'] + dtformats = [np.intc, np.float64] + dtdescr = {'names': dtnames, 'formats': dtformats} + dt = np.dtype(dtdescr) + p = ndpointer(dtype=dt) + assert_(p.from_param(np.zeros((10,), dt))) + samedt = np.dtype(dtdescr) + p = ndpointer(dtype=samedt) + assert_(p.from_param(np.zeros((10,), dt))) + dt2 = np.dtype(dtdescr, align=True) + if dt.itemsize != dt2.itemsize: + assert_raises(TypeError, p.from_param, np.zeros((10,), dt2)) + else: + assert_(p.from_param(np.zeros((10,), dt2))) + + def test_ndim(self): + p = ndpointer(ndim=0) + assert_(p.from_param(np.array(1))) + assert_raises(TypeError, p.from_param, np.array([1])) + p = ndpointer(ndim=1) + assert_raises(TypeError, p.from_param, np.array(1)) + assert_(p.from_param(np.array([1]))) + p = ndpointer(ndim=2) + assert_(p.from_param(np.array([[1]]))) + + def test_shape(self): + p = ndpointer(shape=(1, 2)) + assert_(p.from_param(np.array([[1, 2]]))) + assert_raises(TypeError, p.from_param, np.array([[1], [2]])) + p = ndpointer(shape=()) + assert_(p.from_param(np.array(1))) + + def test_flags(self): + x = np.array([[1, 2], [3, 4]], order='F') + p = ndpointer(flags='FORTRAN') + assert_(p.from_param(x)) + p = ndpointer(flags='CONTIGUOUS') + assert_raises(TypeError, p.from_param, x) + p = ndpointer(flags=x.flags.num) + assert_(p.from_param(x)) + assert_raises(TypeError, p.from_param, np.array([[1, 2], [3, 4]])) + + def test_cache(self): + assert_(ndpointer(dtype=np.float64) is ndpointer(dtype=np.float64)) + + # shapes are normalized + assert_(ndpointer(shape=2) is ndpointer(shape=(2,))) + + # 1.12 <= v < 1.16 had a bug that made these fail + assert_(ndpointer(shape=2) is not ndpointer(ndim=2)) + assert_(ndpointer(ndim=2) is not ndpointer(shape=2)) + +@pytest.mark.skipif(ctypes is None, + reason="ctypes not available on this python installation") +class TestNdpointerCFunc(object): + def test_arguments(self): + """ Test that arguments are coerced from arrays """ + c_forward_pointer.restype = ctypes.c_void_p + c_forward_pointer.argtypes = (ndpointer(ndim=2),) + + c_forward_pointer(np.zeros((2, 3))) + # too many dimensions + assert_raises( + ctypes.ArgumentError, c_forward_pointer, np.zeros((2, 3, 4))) + + @pytest.mark.parametrize( + 'dt', [ + float, + np.dtype(dict( + formats=['<i4', '<i4'], + names=['a', 'b'], + offsets=[0, 2], + itemsize=6 + )) + ], ids=[ + 'float', + 'overlapping-fields' + ] + ) + def test_return(self, dt): + """ Test that return values are coerced to arrays """ + arr = np.zeros((2, 3), dt) + ptr_type = ndpointer(shape=arr.shape, dtype=arr.dtype) + + c_forward_pointer.restype = ptr_type + c_forward_pointer.argtypes = (ptr_type,) + + # check that the arrays are equivalent views on the same data + arr2 = c_forward_pointer(arr) + assert_equal(arr2.dtype, arr.dtype) + assert_equal(arr2.shape, arr.shape) + assert_equal( + arr2.__array_interface__['data'], + arr.__array_interface__['data'] + ) + + def test_vague_return_value(self): + """ Test that vague ndpointer return values do not promote to arrays """ + arr = np.zeros((2, 3)) + ptr_type = ndpointer(dtype=arr.dtype) + + c_forward_pointer.restype = ptr_type + c_forward_pointer.argtypes = (ptr_type,) + + ret = c_forward_pointer(arr) + assert_(isinstance(ret, ptr_type)) + + +@pytest.mark.skipif(ctypes is None, + reason="ctypes not available on this python installation") +class TestAsArray(object): + def test_array(self): + from ctypes import c_int + + pair_t = c_int * 2 + a = as_array(pair_t(1, 2)) + assert_equal(a.shape, (2,)) + assert_array_equal(a, np.array([1, 2])) + a = as_array((pair_t * 3)(pair_t(1, 2), pair_t(3, 4), pair_t(5, 6))) + assert_equal(a.shape, (3, 2)) + assert_array_equal(a, np.array([[1, 2], [3, 4], [5, 6]])) + + def test_pointer(self): + from ctypes import c_int, cast, POINTER + + p = cast((c_int * 10)(*range(10)), POINTER(c_int)) + + a = as_array(p, shape=(10,)) + assert_equal(a.shape, (10,)) + assert_array_equal(a, np.arange(10)) + + a = as_array(p, shape=(2, 5)) + assert_equal(a.shape, (2, 5)) + assert_array_equal(a, np.arange(10).reshape((2, 5))) + + # shape argument is required + assert_raises(TypeError, as_array, p) + + def test_struct_array_pointer(self): + from ctypes import c_int16, Structure, pointer + + class Struct(Structure): + _fields_ = [('a', c_int16)] + + Struct3 = 3 * Struct + + c_array = (2 * Struct3)( + Struct3(Struct(a=1), Struct(a=2), Struct(a=3)), + Struct3(Struct(a=4), Struct(a=5), Struct(a=6)) + ) + + expected = np.array([ + [(1,), (2,), (3,)], + [(4,), (5,), (6,)], + ], dtype=[('a', np.int16)]) + + def check(x): + assert_equal(x.dtype, expected.dtype) + assert_equal(x, expected) + + # all of these should be equivalent + check(as_array(c_array)) + check(as_array(pointer(c_array), shape=())) + check(as_array(pointer(c_array[0]), shape=(2,))) + check(as_array(pointer(c_array[0][0]), shape=(2, 3))) + + def test_reference_cycles(self): + # related to gh-6511 + import ctypes + + # create array to work with + # don't use int/long to avoid running into bpo-10746 + N = 100 + a = np.arange(N, dtype=np.short) + + # get pointer to array + pnt = np.ctypeslib.as_ctypes(a) + + with np.testing.assert_no_gc_cycles(): + # decay the array above to a pointer to its first element + newpnt = ctypes.cast(pnt, ctypes.POINTER(ctypes.c_short)) + # and construct an array using this data + b = np.ctypeslib.as_array(newpnt, (N,)) + # now delete both, which should cleanup both objects + del newpnt, b + + def test_segmentation_fault(self): + arr = np.zeros((224, 224, 3)) + c_arr = np.ctypeslib.as_ctypes(arr) + arr_ref = weakref.ref(arr) + del arr + + # check the reference wasn't cleaned up + assert_(arr_ref() is not None) + + # check we avoid the segfault + c_arr[0][0][0] + + +@pytest.mark.skipif(ctypes is None, + reason="ctypes not available on this python installation") +class TestAsCtypesType(object): + """ Test conversion from dtypes to ctypes types """ + def test_scalar(self): + dt = np.dtype('<u2') + ct = np.ctypeslib.as_ctypes_type(dt) + assert_equal(ct, ctypes.c_uint16.__ctype_le__) + + dt = np.dtype('>u2') + ct = np.ctypeslib.as_ctypes_type(dt) + assert_equal(ct, ctypes.c_uint16.__ctype_be__) + + dt = np.dtype('u2') + ct = np.ctypeslib.as_ctypes_type(dt) + assert_equal(ct, ctypes.c_uint16) + + def test_subarray(self): + dt = np.dtype((np.int32, (2, 3))) + ct = np.ctypeslib.as_ctypes_type(dt) + assert_equal(ct, 2 * (3 * ctypes.c_int32)) + + def test_structure(self): + dt = np.dtype([ + ('a', np.uint16), + ('b', np.uint32), + ]) + + ct = np.ctypeslib.as_ctypes_type(dt) + assert_(issubclass(ct, ctypes.Structure)) + assert_equal(ctypes.sizeof(ct), dt.itemsize) + assert_equal(ct._fields_, [ + ('a', ctypes.c_uint16), + ('b', ctypes.c_uint32), + ]) + + def test_structure_aligned(self): + dt = np.dtype([ + ('a', np.uint16), + ('b', np.uint32), + ], align=True) + + ct = np.ctypeslib.as_ctypes_type(dt) + assert_(issubclass(ct, ctypes.Structure)) + assert_equal(ctypes.sizeof(ct), dt.itemsize) + assert_equal(ct._fields_, [ + ('a', ctypes.c_uint16), + ('', ctypes.c_char * 2), # padding + ('b', ctypes.c_uint32), + ]) + + def test_union(self): + dt = np.dtype(dict( + names=['a', 'b'], + offsets=[0, 0], + formats=[np.uint16, np.uint32] + )) + + ct = np.ctypeslib.as_ctypes_type(dt) + assert_(issubclass(ct, ctypes.Union)) + assert_equal(ctypes.sizeof(ct), dt.itemsize) + assert_equal(ct._fields_, [ + ('a', ctypes.c_uint16), + ('b', ctypes.c_uint32), + ]) + + def test_padded_union(self): + dt = np.dtype(dict( + names=['a', 'b'], + offsets=[0, 0], + formats=[np.uint16, np.uint32], + itemsize=5, + )) + + ct = np.ctypeslib.as_ctypes_type(dt) + assert_(issubclass(ct, ctypes.Union)) + assert_equal(ctypes.sizeof(ct), dt.itemsize) + assert_equal(ct._fields_, [ + ('a', ctypes.c_uint16), + ('b', ctypes.c_uint32), + ('', ctypes.c_char * 5), # padding + ]) + + def test_overlapping(self): + dt = np.dtype(dict( + names=['a', 'b'], + offsets=[0, 2], + formats=[np.uint32, np.uint32] + )) + assert_raises(NotImplementedError, np.ctypeslib.as_ctypes_type, dt) diff --git a/contrib/python/numpy/py2/numpy/tests/test_matlib.py b/contrib/python/numpy/py2/numpy/tests/test_matlib.py new file mode 100644 index 0000000000..38a7e39dfb --- /dev/null +++ b/contrib/python/numpy/py2/numpy/tests/test_matlib.py @@ -0,0 +1,68 @@ +from __future__ import division, absolute_import, print_function + +# As we are testing matrices, we ignore its PendingDeprecationWarnings +try: + import pytest + pytestmark = pytest.mark.filterwarnings( + 'ignore:the matrix subclass is not:PendingDeprecationWarning') +except ImportError: + pass + +import numpy as np +import numpy.matlib +from numpy.testing import assert_array_equal, assert_ + +def test_empty(): + x = numpy.matlib.empty((2,)) + assert_(isinstance(x, np.matrix)) + assert_(x.shape, (1, 2)) + +def test_ones(): + assert_array_equal(numpy.matlib.ones((2, 3)), + np.matrix([[ 1., 1., 1.], + [ 1., 1., 1.]])) + + assert_array_equal(numpy.matlib.ones(2), np.matrix([[ 1., 1.]])) + +def test_zeros(): + assert_array_equal(numpy.matlib.zeros((2, 3)), + np.matrix([[ 0., 0., 0.], + [ 0., 0., 0.]])) + + assert_array_equal(numpy.matlib.zeros(2), np.matrix([[ 0., 0.]])) + +def test_identity(): + x = numpy.matlib.identity(2, dtype=int) + assert_array_equal(x, np.matrix([[1, 0], [0, 1]])) + +def test_eye(): + xc = numpy.matlib.eye(3, k=1, dtype=int) + assert_array_equal(xc, np.matrix([[ 0, 1, 0], + [ 0, 0, 1], + [ 0, 0, 0]])) + assert xc.flags.c_contiguous + assert not xc.flags.f_contiguous + + xf = numpy.matlib.eye(3, 4, dtype=int, order='F') + assert_array_equal(xf, np.matrix([[ 1, 0, 0, 0], + [ 0, 1, 0, 0], + [ 0, 0, 1, 0]])) + assert not xf.flags.c_contiguous + assert xf.flags.f_contiguous + +def test_rand(): + x = numpy.matlib.rand(3) + # check matrix type, array would have shape (3,) + assert_(x.ndim == 2) + +def test_randn(): + x = np.matlib.randn(3) + # check matrix type, array would have shape (3,) + assert_(x.ndim == 2) + +def test_repmat(): + a1 = np.arange(4) + x = numpy.matlib.repmat(a1, 2, 2) + y = np.array([[0, 1, 2, 3, 0, 1, 2, 3], + [0, 1, 2, 3, 0, 1, 2, 3]]) + assert_array_equal(x, y) diff --git a/contrib/python/numpy/py2/numpy/tests/test_numpy_version.py b/contrib/python/numpy/py2/numpy/tests/test_numpy_version.py new file mode 100644 index 0000000000..7fac8fd22e --- /dev/null +++ b/contrib/python/numpy/py2/numpy/tests/test_numpy_version.py @@ -0,0 +1,19 @@ +from __future__ import division, absolute_import, print_function + +import re + +import numpy as np +from numpy.testing import assert_ + + +def test_valid_numpy_version(): + # Verify that the numpy version is a valid one (no .post suffix or other + # nonsense). See gh-6431 for an issue caused by an invalid version. + version_pattern = r"^[0-9]+\.[0-9]+\.[0-9]+(|a[0-9]|b[0-9]|rc[0-9])" + dev_suffix = r"(\.dev0\+([0-9a-f]{7}|Unknown))" + if np.version.release: + res = re.match(version_pattern, np.__version__) + else: + res = re.match(version_pattern + dev_suffix, np.__version__) + + assert_(res is not None, np.__version__) diff --git a/contrib/python/numpy/py2/numpy/tests/test_public_api.py b/contrib/python/numpy/py2/numpy/tests/test_public_api.py new file mode 100644 index 0000000000..194f8ecbb8 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/tests/test_public_api.py @@ -0,0 +1,89 @@ +from __future__ import division, absolute_import, print_function + +import sys + +import numpy as np +import pytest +try: + import ctypes +except ImportError: + ctypes = None + +def check_dir(module, module_name=None): + """Returns a mapping of all objects with the wrong __module__ attribute.""" + if module_name is None: + module_name = module.__name__ + results = {} + for name in dir(module): + item = getattr(module, name) + if (hasattr(item, '__module__') and hasattr(item, '__name__') + and item.__module__ != module_name): + results[name] = item.__module__ + '.' + item.__name__ + return results + + +@pytest.mark.skipif( + sys.version_info[0] < 3, + reason="NumPy exposes slightly different functions on Python 2") +def test_numpy_namespace(): + # None of these objects are publicly documented. + undocumented = { + 'Tester': 'numpy.testing._private.nosetester.NoseTester', + '_add_newdoc_ufunc': 'numpy.core._multiarray_umath._add_newdoc_ufunc', + 'add_docstring': 'numpy.core._multiarray_umath.add_docstring', + 'add_newdoc': 'numpy.core.function_base.add_newdoc', + 'add_newdoc_ufunc': 'numpy.core._multiarray_umath._add_newdoc_ufunc', + 'byte_bounds': 'numpy.lib.utils.byte_bounds', + 'compare_chararrays': 'numpy.core._multiarray_umath.compare_chararrays', + 'deprecate': 'numpy.lib.utils.deprecate', + 'deprecate_with_doc': 'numpy.lib.utils.<lambda>', + 'disp': 'numpy.lib.function_base.disp', + 'fastCopyAndTranspose': 'numpy.core._multiarray_umath._fastCopyAndTranspose', + 'get_array_wrap': 'numpy.lib.shape_base.get_array_wrap', + 'get_include': 'numpy.lib.utils.get_include', + 'int_asbuffer': 'numpy.core._multiarray_umath.int_asbuffer', + 'mafromtxt': 'numpy.lib.npyio.mafromtxt', + 'maximum_sctype': 'numpy.core.numerictypes.maximum_sctype', + 'ndfromtxt': 'numpy.lib.npyio.ndfromtxt', + 'recfromcsv': 'numpy.lib.npyio.recfromcsv', + 'recfromtxt': 'numpy.lib.npyio.recfromtxt', + 'safe_eval': 'numpy.lib.utils.safe_eval', + 'set_string_function': 'numpy.core.arrayprint.set_string_function', + 'show_config': 'numpy.__config__.show', + 'who': 'numpy.lib.utils.who', + } + # These built-in types are re-exported by numpy. + builtins = { + 'bool': 'builtins.bool', + 'complex': 'builtins.complex', + 'float': 'builtins.float', + 'int': 'builtins.int', + 'long': 'builtins.int', + 'object': 'builtins.object', + 'str': 'builtins.str', + 'unicode': 'builtins.str', + } + whitelist = dict(undocumented, **builtins) + bad_results = check_dir(np) + # pytest gives better error messages with the builtin assert than with + # assert_equal + assert bad_results == whitelist + + +def test_numpy_linalg(): + bad_results = check_dir(np.linalg) + assert bad_results == {} + + +def test_numpy_fft(): + bad_results = check_dir(np.fft) + assert bad_results == {} + +@pytest.mark.skipif(ctypes is None, + reason="ctypes not available in this python") +def test_NPY_NO_EXPORT(): + cdll = ctypes.CDLL(np.core._multiarray_tests.__file__) + # Make sure an arbitrary NPY_NO_EXPORT function is actually hidden + f = getattr(cdll, 'test_not_exported', None) + assert f is None, ("'test_not_exported' is mistakenly exported, " + "NPY_NO_EXPORT does not work") diff --git a/contrib/python/numpy/py2/numpy/tests/test_reloading.py b/contrib/python/numpy/py2/numpy/tests/test_reloading.py new file mode 100644 index 0000000000..a073d691f7 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/tests/test_reloading.py @@ -0,0 +1,38 @@ +from __future__ import division, absolute_import, print_function + +import sys + +from numpy.testing import assert_raises, assert_, assert_equal +from numpy.core.numeric import pickle + +if sys.version_info[:2] >= (3, 4): + from importlib import reload +else: + from imp import reload + +def test_numpy_reloading(): + # gh-7844. Also check that relevant globals retain their identity. + import numpy as np + import numpy._globals + + _NoValue = np._NoValue + VisibleDeprecationWarning = np.VisibleDeprecationWarning + ModuleDeprecationWarning = np.ModuleDeprecationWarning + + reload(np) + assert_(_NoValue is np._NoValue) + assert_(ModuleDeprecationWarning is np.ModuleDeprecationWarning) + assert_(VisibleDeprecationWarning is np.VisibleDeprecationWarning) + + assert_raises(RuntimeError, reload, numpy._globals) + reload(np) + assert_(_NoValue is np._NoValue) + assert_(ModuleDeprecationWarning is np.ModuleDeprecationWarning) + assert_(VisibleDeprecationWarning is np.VisibleDeprecationWarning) + +def test_novalue(): + import numpy as np + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + assert_equal(repr(np._NoValue), '<no value>') + assert_(pickle.loads(pickle.dumps(np._NoValue, + protocol=proto)) is np._NoValue) diff --git a/contrib/python/numpy/py2/numpy/tests/test_scripts.py b/contrib/python/numpy/py2/numpy/tests/test_scripts.py new file mode 100644 index 0000000000..e42dc25f98 --- /dev/null +++ b/contrib/python/numpy/py2/numpy/tests/test_scripts.py @@ -0,0 +1,49 @@ +""" Test scripts + +Test that we can run executable scripts that have been installed with numpy. +""" +from __future__ import division, print_function, absolute_import + +import sys +import os +import pytest +from os.path import join as pathjoin, isfile, dirname +import subprocess + +import numpy as np +from numpy.compat.py3k import basestring +from numpy.testing import assert_, assert_equal + +is_inplace = isfile(pathjoin(dirname(np.__file__), '..', 'setup.py')) + + +def find_f2py_commands(): + if sys.platform == 'win32': + exe_dir = dirname(sys.executable) + if exe_dir.endswith('Scripts'): # virtualenv + return [os.path.join(exe_dir, 'f2py')] + else: + return [os.path.join(exe_dir, "Scripts", 'f2py')] + else: + # Three scripts are installed in Unix-like systems: + # 'f2py', 'f2py{major}', and 'f2py{major.minor}'. For example, + # if installed with python3.7 the scripts would be named + # 'f2py', 'f2py3', and 'f2py3.7'. + version = sys.version_info + major = str(version.major) + minor = str(version.minor) + return ['f2py', 'f2py' + major, 'f2py' + major + '.' + minor] + + +@pytest.mark.skipif(is_inplace, reason="Cannot test f2py command inplace") +@pytest.mark.xfail(reason="Test is unreliable") +@pytest.mark.parametrize('f2py_cmd', find_f2py_commands()) +def test_f2py(f2py_cmd): + # test that we can run f2py script + stdout = subprocess.check_output([f2py_cmd, '-v']) + assert_equal(stdout.strip(), b'2') + + +def test_pep338(): + stdout = subprocess.check_output([sys.executable, '-mnumpy.f2py', '-v']) + assert_equal(stdout.strip(), b'2') diff --git a/contrib/python/numpy/py2/numpy/tests/test_warnings.py b/contrib/python/numpy/py2/numpy/tests/test_warnings.py new file mode 100644 index 0000000000..aa6f69f7ee --- /dev/null +++ b/contrib/python/numpy/py2/numpy/tests/test_warnings.py @@ -0,0 +1,78 @@ +""" +Tests which scan for certain occurrences in the code, they may not find +all of these occurrences but should catch almost all. +""" +from __future__ import division, absolute_import, print_function + +import sys +import pytest + +if sys.version_info >= (3, 4): + from pathlib import Path + import ast + import tokenize + import numpy + + class ParseCall(ast.NodeVisitor): + def __init__(self): + self.ls = [] + + def visit_Attribute(self, node): + ast.NodeVisitor.generic_visit(self, node) + self.ls.append(node.attr) + + def visit_Name(self, node): + self.ls.append(node.id) + + + class FindFuncs(ast.NodeVisitor): + def __init__(self, filename): + super().__init__() + self.__filename = filename + + def visit_Call(self, node): + p = ParseCall() + p.visit(node.func) + ast.NodeVisitor.generic_visit(self, node) + + if p.ls[-1] == 'simplefilter' or p.ls[-1] == 'filterwarnings': + if node.args[0].s == "ignore": + raise AssertionError( + "ignore filter should not be used; found in " + "{} on line {}".format(self.__filename, node.lineno)) + + if p.ls[-1] == 'warn' and ( + len(p.ls) == 1 or p.ls[-2] == 'warnings'): + + if "testing/tests/test_warnings.py" is self.__filename: + # This file + return + + # See if stacklevel exists: + if len(node.args) == 3: + return + args = {kw.arg for kw in node.keywords} + if "stacklevel" in args: + return + raise AssertionError( + "warnings should have an appropriate stacklevel; found in " + "{} on line {}".format(self.__filename, node.lineno)) + + + @pytest.mark.slow + def test_warning_calls(): + # combined "ignore" and stacklevel error + base = Path(numpy.__file__).parent + + for path in base.rglob("*.py"): + if base / "testing" in path.parents: + continue + if path == base / "__init__.py": + continue + if path == base / "random" / "__init__.py": + continue + # use tokenize to auto-detect encoding on systems where no + # default encoding is defined (e.g. LANG='C') + with tokenize.open(str(path)) as file: + tree = ast.parse(file.read()) + FindFuncs(path).visit(tree) diff --git a/contrib/python/numpy/py3/patches/01-fix-include-simd.h.patch b/contrib/python/numpy/py3/patches/01-fix-include-simd.h.patch new file mode 100644 index 0000000000..9857c9e3c6 --- /dev/null +++ b/contrib/python/numpy/py3/patches/01-fix-include-simd.h.patch @@ -0,0 +1,105 @@ +--- contrib/python/numpy/py3/numpy/core/src/multiarray/compiled_base.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/multiarray/compiled_base.c (working tree) +@@ -14,1 +14,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/multiarray/einsum_sumprod.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/multiarray/einsum_sumprod.c (working tree) +@@ -27,1 +27,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/multiarray/item_selection.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/multiarray/item_selection.c (working tree) +@@ -28,1 +28,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_arithm_fp.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_arithm_fp.dispatch.c (working tree) +@@ -16,1 +16,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.c (working tree) +@@ -18,1 +18,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.c (working tree) +@@ -20,1 +20,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.c (working tree) +@@ -15,1 +15,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_fp.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_fp.dispatch.c (working tree) +@@ -22,1 +22,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/_simd/_simd_inc.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/_simd/_simd_inc.h (working tree) +@@ -12,1 +12,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_umath_fp.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_umath_fp.dispatch.c (working tree) +@@ -12,1 +12,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/multiarray/argfunc.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/multiarray/argfunc.dispatch.c (working tree) +@@ -18,1 +18,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_hyperbolic.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_hyperbolic.dispatch.c (working tree) +@@ -15,1 +15,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.c (working tree) +@@ -18,1 +18,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_modulo.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_modulo.dispatch.c (working tree) +@@ -15,1 +15,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_comparison.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_comparison.dispatch.c (working tree) +@@ -15,1 +15,1 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/fast_loop_macros.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/fast_loop_macros.h (working tree) +@@ -15 +15 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_autovec.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_autovec.dispatch.c (working tree) +@@ -22 +22 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_logical.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_logical.dispatch.c (working tree) +@@ -22 +22 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.c (working tree) +@@ -24 +24 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_complex.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_complex.dispatch.c (working tree) +@@ -22 +22 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_fp_le.dispatch.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_fp_le.dispatch.c (working tree) +@@ -30 +30 @@ +-#include "simd/simd.h" ++#include "contrib/python/numpy/py3/numpy/core/src/common/simd/simd.h" diff --git a/contrib/python/numpy/py3/patches/02-fix-doc.patch b/contrib/python/numpy/py3/patches/02-fix-doc.patch new file mode 100644 index 0000000000..205e1f6d94 --- /dev/null +++ b/contrib/python/numpy/py3/patches/02-fix-doc.patch @@ -0,0 +1,15 @@ +--- contrib/python/numpy/py3/numpy/doc/__init__.py (index) ++++ contrib/python/numpy/py3/numpy/doc/__init__.py (working tree) +@@ -2,8 +2,10 @@ import os + + ref_dir = os.path.join(os.path.dirname(__file__)) + +-__all__ = sorted(f[:-3] for f in os.listdir(ref_dir) if f.endswith('.py') and +- not f.startswith('__')) ++__all__ = [ ++ "constants", ++ "ufuncs", ++] + + for f in __all__: + __import__(__name__ + '.' + f) diff --git a/contrib/python/numpy/py3/patches/03-fix-name.patch b/contrib/python/numpy/py3/patches/03-fix-name.patch new file mode 100644 index 0000000000..120ece8037 --- /dev/null +++ b/contrib/python/numpy/py3/patches/03-fix-name.patch @@ -0,0 +1,22 @@ +--- contrib/python/numpy/py3/numpy/core/src/umath/_rational_tests.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/_rational_tests.c (working tree) +@@ -1106,8 +1106,8 @@ rational_ufunc_test_add_rationals(char** args, npy_intp const *dimensions, + } + } + +- +-PyMethodDef module_methods[] = { ++// XXX Arcadia confilct with contrib/python/pygit2 ++PyMethodDef rational_module_methods[] = { + {0} /* sentinel */ + }; + +@@ -1116,7 +1116,7 @@ static struct PyModuleDef moduledef = { + "_rational_tests", + NULL, + -1, +- module_methods, ++ rational_module_methods, + NULL, + NULL, + NULL, diff --git a/contrib/python/numpy/py3/patches/04-fix-tests.patch b/contrib/python/numpy/py3/patches/04-fix-tests.patch new file mode 100644 index 0000000000..d13669b9ad --- /dev/null +++ b/contrib/python/numpy/py3/patches/04-fix-tests.patch @@ -0,0 +1,281 @@ +--- contrib/python/numpy/py3/numpy/core/tests/test_longdouble.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_longdouble.py (working tree) +@@ -9,1 +9,1 @@ from numpy.testing import ( +-from numpy.core.tests._locales import CommaDecimalPointLocale ++from __tests__.numpy.core.tests._locales import CommaDecimalPointLocale +--- contrib/python/numpy/py3/numpy/core/tests/test_multiarray.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_multiarray.py (working tree) +@@ -32,1 +32,1 @@ from numpy.testing import ( +-from numpy.core.tests._locales import CommaDecimalPointLocale ++from __tests__.numpy.core.tests._locales import CommaDecimalPointLocale +--- contrib/python/numpy/py3/numpy/core/tests/test_print.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_print.py (working tree) +@@ -7,1 +7,1 @@ import pytest +-from numpy.core.tests._locales import CommaDecimalPointLocale ++from __tests__.numpy.core.tests._locales import CommaDecimalPointLocale +--- contrib/python/numpy/py3/numpy/matrixlib/tests/test_matrix_linalg.py (index) ++++ contrib/python/numpy/py3/numpy/matrixlib/tests/test_matrix_linalg.py (working tree) +@@ -4,1 +4,1 @@ +-from numpy.linalg.tests.test_linalg import ( ++from __tests__.numpy.linalg.tests.test_linalg import ( +--- contrib/python/numpy/py3/numpy/array_api/tests/test_array_object.py (index) ++++ contrib/python/numpy/py3/numpy/array_api/tests/test_array_object.py (working tree) +@@ -6,3 +6,3 @@ import operator +-from .. import ones, asarray, reshape, result_type, all, equal +-from .._array_object import Array +-from .._dtypes import ( ++from numpy.array_api import ones, asarray, reshape, result_type, all, equal ++from numpy.array_api._array_object import Array ++from numpy.array_api._dtypes import ( +--- contrib/python/numpy/py3/numpy/array_api/tests/test_creation_functions.py (index) ++++ contrib/python/numpy/py3/numpy/array_api/tests/test_creation_functions.py (working tree) +@@ -4,2 +4,2 @@ +-from .. import all +-from .._creation_functions import ( ++from numpy.array_api import all ++from numpy.array_api._creation_functions import ( +@@ -20,2 +20,2 @@ from .._creation_functions import ( +-from .._dtypes import float32, float64 +-from .._array_object import Array ++from numpy.array_api._dtypes import float32, float64 ++from numpy.array_api._array_object import Array +--- contrib/python/numpy/py3/numpy/array_api/tests/test_elementwise_functions.py (index) ++++ contrib/python/numpy/py3/numpy/array_api/tests/test_elementwise_functions.py (working tree) +@@ -5,3 +5,3 @@ from inspect import getfullargspec +-from .. import asarray, _elementwise_functions +-from .._elementwise_functions import bitwise_left_shift, bitwise_right_shift +-from .._dtypes import ( ++from numpy.array_api import asarray, _elementwise_functions ++from numpy.array_api._elementwise_functions import bitwise_left_shift, bitwise_right_shift ++from numpy.array_api._dtypes import ( +--- contrib/python/numpy/py3/numpy/array_api/tests/test_manipulation_functions.py (index) ++++ contrib/python/numpy/py3/numpy/array_api/tests/test_manipulation_functions.py (working tree) +@@ -4,4 +4,4 @@ +-from .. import all +-from .._creation_functions import asarray +-from .._dtypes import float64, int8 +-from .._manipulation_functions import ( ++from numpy.array_api import all ++from numpy.array_api._creation_functions import asarray ++from numpy.array_api._dtypes import float64, int8 ++from numpy.array_api._manipulation_functions import ( +--- contrib/python/numpy/py3/numpy/core/tests/test_records.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_records.py (working tree) +@@ -73,1 +75,2 @@ class TestFromrecords: +- data_dir = path.join(path.dirname(__file__), 'data') ++ import yatest.common as yc ++ data_dir = yc.source_path(path.join(path.dirname(__file__), 'data')) +--- contrib/python/numpy/py3/numpy/core/tests/test_regression.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_regression.py (working tree) +@@ -1099,1 +1101,2 @@ class TestRegression: +- data_dir = path.join(path.dirname(__file__), 'data') ++ import yatest.common as yc ++ data_dir = yc.source_path(path.join(path.dirname(__file__), 'data')) +--- contrib/python/numpy/py3/numpy/core/tests/test_umath_accuracy.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_umath_accuracy.py (working tree) +@@ -33,1 +35,2 @@ class TestAccuracy: +- data_dir = path.join(path.dirname(__file__), 'data') ++ import yatest.common as yc ++ data_dir = yc.source_path(path.join(path.dirname(__file__), 'data')) +--- contrib/python/numpy/py3/numpy/random/tests/test_direct.py (index) ++++ contrib/python/numpy/py3/numpy/random/tests/test_direct.py (working tree) +@@ -33,1 +33,2 @@ if sys.flags.optimize > 1: +-pwd = os.path.dirname(os.path.abspath(__file__)) ++import yatest.common as yc ++pwd = yc.source_path(os.path.dirname(__file__)) +--- contrib/python/numpy/py3/numpy/core/tests/test_numpy_2_0_compat.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_numpy_2_0_compat.py (working tree) +@@ -4,2 +4,3 @@ import pickle + import numpy as np ++import yatest.common as yc + +@@ -9 +10 @@ class TestNumPy2Compatibility: +- data_dir = path.join(path.dirname(__file__), "data") ++ data_dir = yc.source_path(path.join(path.dirname(__file__), "data")) +--- contrib/python/numpy/py3/numpy/lib/tests/test_format.py (index) ++++ contrib/python/numpy/py3/numpy/lib/tests/test_format.py (working tree) +@@ -90 +90 @@ Test the magic string writing. +- '\x93NUMPY\x01\x00' ++ b'\x93NUMPY\x01\x00' +@@ -92 +92 @@ Test the magic string writing. +- '\x93NUMPY\x00\x00' ++ b'\x93NUMPY\x00\x00' +@@ -94 +94 @@ Test the magic string writing. +- '\x93NUMPY\xff\xff' ++ b'\x93NUMPY\xff\xff' +@@ -96 +96 @@ Test the magic string writing. +- '\x93NUMPY\x02\x05' ++ b'\x93NUMPY\x02\x05' +@@ -111 +111 @@ Test the header writing. +- >>> for arr in basic_arrays + record_arrays: ++ >>> for arr in basic_arrays + record_arrays: # doctest: +SKIP +@@ -287,6 +11,8 @@ from numpy.testing import ( + ) + from numpy.lib import format + ++import yatest.common as yc ++ + + # Generate some basic arrays to test with. + scalars = [ +@@ -526,1 +252,1 @@ def test_load_padded_dtype(tmpdir, dt): +- path = os.path.join(os.path.dirname(__file__), 'data', fname) ++ path = yc.source_path(os.path.join(os.path.dirname(__file__), 'data', fname)) +@@ -536,1 +262,1 @@ def test_load_padded_dtype(tmpdir, dt): +- data_dir = os.path.join(os.path.dirname(__file__), 'data') ++ data_dir = yc.source_path(os.path.join(os.path.dirname(__file__), 'data')) +@@ -578,1 +304,1 @@ def test_pickle_python2_python3(): +- data_dir = os.path.join(os.path.dirname(__file__), 'data') ++ data_dir = yc.source_path(os.path.join(os.path.dirname(__file__), 'data')) +--- contrib/python/numpy/py3/numpy/tests/test_public_api.py (index) ++++ contrib/python/numpy/py3/numpy/tests/test_public_api.py (working tree) +@@ -34,16 +34,16 @@ def test_numpy_namespace(): + # None of these objects are publicly documented to be part of the main + # NumPy namespace (some are useful though, others need to be cleaned up) + undocumented = { +- '_add_newdoc_ufunc': 'numpy.core._multiarray_umath._add_newdoc_ufunc', +- 'add_docstring': 'numpy.core._multiarray_umath.add_docstring', ++ '_add_newdoc_ufunc': '_multiarray_umath._add_newdoc_ufunc', ++ 'add_docstring': '_multiarray_umath.add_docstring', + 'add_newdoc': 'numpy.core.function_base.add_newdoc', +- 'add_newdoc_ufunc': 'numpy.core._multiarray_umath._add_newdoc_ufunc', ++ 'add_newdoc_ufunc': '_multiarray_umath._add_newdoc_ufunc', + 'byte_bounds': 'numpy.lib.utils.byte_bounds', +- 'compare_chararrays': 'numpy.core._multiarray_umath.compare_chararrays', ++ 'compare_chararrays': '_multiarray_umath.compare_chararrays', + 'deprecate': 'numpy.lib.utils.deprecate', + 'deprecate_with_doc': 'numpy.lib.utils.deprecate_with_doc', + 'disp': 'numpy.lib.function_base.disp', +- 'fastCopyAndTranspose': 'numpy.core._multiarray_umath.fastCopyAndTranspose', ++ 'fastCopyAndTranspose': '_multiarray_umath.fastCopyAndTranspose', + 'get_array_wrap': 'numpy.lib.shape_base.get_array_wrap', + 'get_include': 'numpy.lib.utils.get_include', + 'recfromcsv': 'numpy.lib.npyio.recfromcsv', +@@ -61,6 +61,7 @@ def test_numpy_namespace(): + assert bad_results == allowlist + + ++@pytest.mark.skip + @pytest.mark.skipif(IS_WASM, reason="can't start subprocess") + @pytest.mark.parametrize('name', ['testing']) + def test_import_lazy_import(name): +@@ -99,6 +100,7 @@ def test_numpy_fft(): + assert bad_results == {} + + ++@pytest.mark.skip + @pytest.mark.skipif(ctypes is None, + reason="ctypes not available in this python") + def test_NPY_NO_EXPORT(): +--- contrib/python/numpy/py3/numpy/core/tests/test_nditer.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_nditer.py (working tree) +@@ -2031,8 +2031,11 @@ def test_buffered_cast_error_paths_unraisable(): + buf[...] = "a" + del buf, it # Flushing only happens during deallocate right now. + """) ++ import os ++ env = os.environ.copy() ++ env['Y_PYTHON_ENTRY_POINT'] = ':main' + res = subprocess.check_output([sys.executable, "-c", code], +- stderr=subprocess.STDOUT, text=True) ++ stderr=subprocess.STDOUT, text=True, env=env) + assert "ValueError" in res + + +--- contrib/python/numpy/py3/numpy/core/tests/test_protocols.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_protocols.py (working tree) +@@ -4,4 +4,5 @@ import warnings + + ++@pytest.mark.skip + @pytest.mark.filterwarnings("error") + def test_getattr_warning(): +--- contrib/python/numpy/py3/numpy/linalg/tests/test_linalg.py (index) ++++ contrib/python/numpy/py3/numpy/linalg/tests/test_linalg.py (working tree) +@@ -1898,6 +1898,7 @@ def test_xerbla_override(): + pytest.skip('Numpy xerbla not linked in.') + + ++@pytest.mark.skip + @pytest.mark.skipif(IS_WASM, reason="Cannot start subprocess") + @pytest.mark.slow + def test_sdot_bug_8577(): +--- contrib/python/numpy/py3/numpy/tests/test_reloading.py (index) ++++ contrib/python/numpy/py3/numpy/tests/test_reloading.py (working tree) +@@ -37,6 +37,7 @@ def test_novalue(): + protocol=proto)) is np._NoValue) + + ++@pytest.mark.skip + @pytest.mark.skipif(IS_WASM, reason="can't start subprocess") + def test_full_reimport(): + """At the time of writing this, it is *not* truly supported, but +--- contrib/python/numpy/py3/numpy/core/tests/test_limited_api.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_limited_api.py (working tree) +@@ -6,6 +6,7 @@ import sysconfig + import pytest + + ++@pytest.mark.skip + @pytest.mark.skipif(IS_WASM, reason="Can't start subprocess") + @pytest.mark.xfail( + sysconfig.get_config_var("Py_DEBUG"), +--- contrib/python/numpy/py3/numpy/linalg/tests/test_linalg.py (index) ++++ contrib/python/numpy/py3/numpy/linalg/tests/test_linalg.py (working tree) +@@ -1754,6 +1754,7 @@ class TestQR: + assert_(isinstance(r2, a_type)) + assert_almost_equal(r2, r1) + ++ @pytest.mark.skip + @pytest.mark.parametrize("size", [ + (3, 4), (4, 3), (4, 4), + (3, 0), (0, 3)]) +--- contrib/python/numpy/py3/numpy/lib/tests/test_loadtxt.py (index) ++++ contrib/python/numpy/py3/numpy/lib/tests/test_loadtxt.py (working tree) +@@ -388,6 +388,7 @@ def test_bool(): + assert_array_equal(res.view(np.uint8), [[1, 0], [1, 1]]) + + ++@pytest.mark.skip + @pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8), + reason="PyPy bug in error formatting") + @pytest.mark.parametrize("dtype", np.typecodes["AllInteger"]) +@@ -406,6 +407,7 @@ def test_integer_signs(dtype): + np.loadtxt([f"{sign}2\n"], dtype=dtype) + + ++@pytest.mark.skip + @pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8), + reason="PyPy bug in error formatting") + @pytest.mark.parametrize("dtype", np.typecodes["AllInteger"]) +--- contrib/python/numpy/py3/numpy/core/tests/test_nep50_promotions.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_nep50_promotions.py (working tree) +@@ -20,6 +20,7 @@ def _weak_promotion_enabled(): + np._set_promotion_state(state) + + ++@pytest.mark.skip + @pytest.mark.skipif(IS_WASM, reason="wasm doesn't have support for fp errors") + def test_nep50_examples(): + with pytest.warns(UserWarning, match="result dtype changed"): +--- contrib/python/numpy/py3/numpy/core/tests/test_cpu_features.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_cpu_features.py (working tree) +@@ -122,6 +122,7 @@ class AbstractTest: + " therefore this test class cannot be properly executed." + ), + ) ++@pytest.mark.xfail + class TestEnvPrivation: + cwd = pathlib.Path(__file__).parent.resolve() + env = os.environ.copy() +--- contrib/python/numpy/py3/numpy/core/tests/test_numeric.py (index) ++++ contrib/python/numpy/py3/numpy/core/tests/test_numeric.py (working tree) +@@ -2352,7 +2352,7 @@ class TestClip: + actual = np.clip(arr, amin, amax) + assert_equal(actual, expected) + +- @pytest.mark.xfail(reason="propagation doesn't match spec") ++ @pytest.mark.skip(reason="propagation doesn't match spec") + @pytest.mark.parametrize("arr, amin, amax", [ + (np.array([1] * 10, dtype='m8'), + np.timedelta64('NaT'), diff --git a/contrib/python/numpy/py3/patches/05-fix-win-build.patch b/contrib/python/numpy/py3/patches/05-fix-win-build.patch new file mode 100644 index 0000000000..c628efec95 --- /dev/null +++ b/contrib/python/numpy/py3/patches/05-fix-win-build.patch @@ -0,0 +1,38 @@ +--- contrib/python/numpy/py3/numpy/core/src/npymath/npy_math_private.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/npymath/npy_math_private.h (working tree) +@@ -34,6 +34,10 @@ + #include "numpy/npy_endian.h" + #include "numpy/npy_common.h" + ++#ifdef _MSC_VER // windows in ucrt\math.h makes #define complex _complex ++#undef complex ++#endif ++ + /* + * The original fdlibm code used statements like: + * n0 = ((*(int*)&one)>>29)^1; * index of high word * +--- contrib/python/numpy/py3/numpy/distutils/include/npy_cpu_dispatch_config.x86_64.h (index) ++++ contrib/python/numpy/py3/numpy/distutils/include/npy_cpu_dispatch_config.x86_64.h (working tree) +@@ -53,7 +53,11 @@ + #ifdef NPY__CPU_TARGET_POPCNT + /** POPCNT **/ + #define NPY_HAVE_POPCNT 1 ++#ifdef _MSC_VER ++ #include <nmmintrin.h> ++#else + #include <popcntintrin.h> ++#endif + #endif /*NPY__CPU_TARGET_POPCNT*/ + #ifdef NPY__CPU_TARGET_SSE42 + /** SSE42 **/ +--- contrib/python/numpy/py3/numpy/core/src/common/numpyos.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/common/numpyos.c (working tree) +@@ -783,7 +783,7 @@ NumPyOS_strtoull(const char *str, char **endptr, int base) + + #include <stdlib.h> + +-#if _MSC_VER >= 1900 ++#if _MSC_VER >= 1900 && 0 + /* npy3k_compat.h uses this function in the _Py_BEGIN/END_SUPPRESS_IPH + * macros. It does not need to be defined when building using MSVC + * earlier than 14.0 (_MSC_VER == 1900). diff --git a/contrib/python/numpy/py3/patches/06-fix-config.h.patch b/contrib/python/numpy/py3/patches/06-fix-config.h.patch new file mode 100644 index 0000000000..baddbda087 --- /dev/null +++ b/contrib/python/numpy/py3/patches/06-fix-config.h.patch @@ -0,0 +1,19 @@ +--- contrib/python/numpy/py3/numpy/core/include/numpy/config-linux-x86_64.h (index) ++++ contrib/python/numpy/py3/numpy/core/include/numpy/config-linux-x86_64.h (working tree) +@@ -31,3 +31,5 @@ + #define HAVE_FALLOCATE 1 ++/* breake test_extension_incref_elide and test_extension_incref_elide_stack + #define HAVE_BACKTRACE 1 ++*/ + #define HAVE_MADVISE 1 +@@ -90 +90 @@ +-#define NPY_CAN_LINK_SVML 1 ++//#define NPY_CAN_LINK_SVML 1 +--- contrib/python/numpy/py3/numpy/core/include/numpy/config-osx-x86_64.h (index) ++++ contrib/python/numpy/py3/numpy/core/include/numpy/config-osx-x86_64.h (working tree) +@@ -5,3 +5,5 @@ + #define MATHLIB ++/* breake test_extension_incref_elide and test_extension_incref_elide_stack + #define HAVE_BACKTRACE 1 ++*/ + #define HAVE_MADVISE 1 diff --git a/contrib/python/numpy/py3/patches/09-hack-for-matplotlib.patch b/contrib/python/numpy/py3/patches/09-hack-for-matplotlib.patch new file mode 100644 index 0000000000..1ef1f33bca --- /dev/null +++ b/contrib/python/numpy/py3/patches/09-hack-for-matplotlib.patch @@ -0,0 +1,12 @@ +--- contrib/python/numpy/py3/numpy/core/include/numpy/__multiarray_api.h (index) ++++ contrib/python/numpy/py3/numpy/core/include/numpy/__multiarray_api.h (working tree) +@@ -1,6 +1,9 @@ + + #if defined(_MULTIARRAYMODULE) || defined(WITH_CPYCHECKER_STEALS_REFERENCE_TO_ARG_ATTRIBUTE) + ++// XXX: dummy import to simplify static build ++static int import_array() { return 0; } ++ + typedef struct { + PyObject_HEAD + npy_bool obval; diff --git a/contrib/python/numpy/py3/patches/12-arrch64.patch b/contrib/python/numpy/py3/patches/12-arrch64.patch new file mode 100644 index 0000000000..4c1cf87244 --- /dev/null +++ b/contrib/python/numpy/py3/patches/12-arrch64.patch @@ -0,0 +1,327 @@ +--- contrib/python/numpy/py3/numpy/core/src/_simd/_simd.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/_simd/_simd.dispatch.h (working tree) +@@ -10,8 +10,13 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_SKX)), AVX512_SKX, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512F)), AVX512F, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(FMA3)&&CHK(AVX2)), FMA3__AVX2, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(SSE)&&CHK(SSE2)&&CHK(SSE3)&&CHK(SSSE3)&&CHK(SSE41)&&CHK(POPCNT)&&CHK(SSE42)), SSE42, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/_umath_tests.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/_umath_tests.dispatch.h (working tree) +@@ -10,6 +10,11 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(AVX2)), AVX2, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(SSE)&&CHK(SSE2)&&CHK(SSE3)&&CHK(SSSE3)&&CHK(SSE41)), SSE41, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ NPY__CPU_DISPATCH_EXPAND_(CB((CHK(ASIMD)&&CHK(ASIMDHP)), ASIMDHP, __VA_ARGS__)) ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_arithm_fp.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_arithm_fp.dispatch.h (working tree) +@@ -10,5 +10,10 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(FMA3)&&CHK(AVX2)), FMA3__AVX2, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.h (working tree) +@@ -10,8 +10,13 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_SKX)), AVX512_SKX, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512F)), AVX512F, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(AVX2)), AVX2, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(SSE)&&CHK(SSE2)&&CHK(SSE3)&&CHK(SSSE3)&&CHK(SSE41)), SSE41, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.h (working tree) +@@ -10,7 +10,12 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_SKX)), AVX512_SKX, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512F)), AVX512F, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(FMA3)&&CHK(AVX2)), FMA3__AVX2, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.h (working tree) +@@ -10,6 +10,11 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512F)), AVX512F, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(FMA3)&&CHK(AVX2)), FMA3__AVX2, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_umath_fp.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_umath_fp.dispatch.h (working tree) +@@ -10,5 +10,10 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_SKX)), AVX512_SKX, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_fp.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_fp.dispatch.h (working tree) +@@ -10,5 +10,10 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(SSE)&&CHK(SSE2)&&CHK(SSE3)&&CHK(SSSE3)&&CHK(SSE41)), SSE41, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/multiarray/argfunc.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/multiarray/argfunc.dispatch.h (working tree) +@@ -10,7 +10,12 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_SKX)), AVX512_SKX, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(AVX2)), AVX2, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(SSE)&&CHK(SSE2)&&CHK(SSE3)&&CHK(SSSE3)&&CHK(SSE41)&&CHK(POPCNT)&&CHK(SSE42)), SSE42, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_hyperbolic.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_hyperbolic.dispatch.h (working tree) +@@ -10,6 +10,11 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_SKX)), AVX512_SKX, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(FMA3)&&CHK(AVX2)), FMA3__AVX2, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.h (working tree) +@@ -10,6 +10,11 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_SKX)), AVX512_SKX, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(AVX2)), AVX2, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_comparison.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_comparison.dispatch.h (working tree) +@@ -10,8 +10,13 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_SKX)), AVX512_SKX, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512F)), AVX512F, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(AVX2)), AVX2, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(SSE)&&CHK(SSE2)&&CHK(SSE3)&&CHK(SSSE3)&&CHK(SSE41)&&CHK(POPCNT)&&CHK(SSE42)), SSE42, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/_umath_tests.dispatch.asimdhp.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/_umath_tests.dispatch.asimdhp.c (working tree) +@@ -0,0 +1,12 @@ ++/** ++ * AUTOGENERATED DON'T EDIT ++ * Please make changes to the code generator (distutils/ccompiler_opt.py) ++ */ ++#define NPY__CPU_TARGET_MODE ++#define NPY__CPU_TARGET_CURRENT ASIMDHP ++#define NPY__CPU_TARGET_NEON ++#define NPY__CPU_TARGET_NEON_FP16 ++#define NPY__CPU_TARGET_NEON_VFPV4 ++#define NPY__CPU_TARGET_ASIMD ++#define NPY__CPU_TARGET_ASIMDHP ++#include "_umath_tests.dispatch.c" +--- contrib/python/numpy/py3/numpy/distutils/include/npy_cpu_dispatch_config.aarch64.h (index) ++++ contrib/python/numpy/py3/numpy/distutils/include/npy_cpu_dispatch_config.aarch64.h (working tree) +@@ -0,0 +1,42 @@ ++/* ++ * AUTOGENERATED DON'T EDIT ++ * Please make changes to the code generator (distutils/ccompiler_opt.py) ++*/ ++#define NPY_WITH_CPU_BASELINE "NEON NEON_FP16 NEON_VFPV4 ASIMD" ++#define NPY_WITH_CPU_DISPATCH "ASIMDHP ASIMDDP ASIMDFHM" ++#define NPY_WITH_CPU_BASELINE_N 4 ++#define NPY_WITH_CPU_DISPATCH_N 3 ++#define NPY_WITH_CPU_EXPAND_(X) X ++#define NPY_WITH_CPU_BASELINE_CALL(MACRO_TO_CALL, ...) \ ++ NPY_WITH_CPU_EXPAND_(MACRO_TO_CALL(NEON, __VA_ARGS__)) \ ++ NPY_WITH_CPU_EXPAND_(MACRO_TO_CALL(NEON_FP16, __VA_ARGS__)) \ ++ NPY_WITH_CPU_EXPAND_(MACRO_TO_CALL(NEON_VFPV4, __VA_ARGS__)) \ ++ NPY_WITH_CPU_EXPAND_(MACRO_TO_CALL(ASIMD, __VA_ARGS__)) ++#define NPY_WITH_CPU_DISPATCH_CALL(MACRO_TO_CALL, ...) \ ++ NPY_WITH_CPU_EXPAND_(MACRO_TO_CALL(ASIMDHP, __VA_ARGS__)) \ ++ NPY_WITH_CPU_EXPAND_(MACRO_TO_CALL(ASIMDDP, __VA_ARGS__)) \ ++ NPY_WITH_CPU_EXPAND_(MACRO_TO_CALL(ASIMDFHM, __VA_ARGS__)) ++/******* baseline features *******/ ++ /** NEON **/ ++ #define NPY_HAVE_NEON 1 ++ #include <arm_neon.h> ++ /** NEON_FP16 **/ ++ #define NPY_HAVE_NEON_FP16 1 ++ /** NEON_VFPV4 **/ ++ #define NPY_HAVE_NEON_VFPV4 1 ++ /** ASIMD **/ ++ #define NPY_HAVE_ASIMD 1 ++ ++/******* dispatch features *******/ ++#ifdef NPY__CPU_TARGET_ASIMDHP ++ /** ASIMDHP **/ ++ #define NPY_HAVE_ASIMDHP 1 ++#endif /*NPY__CPU_TARGET_ASIMDHP*/ ++#ifdef NPY__CPU_TARGET_ASIMDDP ++ /** ASIMDDP **/ ++ #define NPY_HAVE_ASIMDDP 1 ++#endif /*NPY__CPU_TARGET_ASIMDDP*/ ++#ifdef NPY__CPU_TARGET_ASIMDFHM ++ /** ASIMDFHM **/ ++ #define NPY_HAVE_ASIMDFHM 1 ++#endif /*NPY__CPU_TARGET_ASIMDFHM*/ +--- contrib/python/numpy/py3/numpy/core/src/npysort/simd_qsort.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/npysort/simd_qsort.dispatch.h (working tree) +@@ -10,5 +10,10 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_SKX)), AVX512_SKX, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/npysort/simd_qsort_16bit.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/npysort/simd_qsort_16bit.dispatch.h (working tree) +@@ -10,5 +10,10 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_ICL)), AVX512_ICL, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_autovec.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_autovec.dispatch.h (working tree) +@@ -10,5 +10,10 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(AVX2)), AVX2, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_logical.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_logical.dispatch.h (working tree) +@@ -10,6 +10,11 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_SKX)), AVX512_SKX, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(AVX2)), AVX2, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.h (working tree) +@@ -10,6 +10,11 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512_SKX)), AVX512_SKX, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(AVX2)), AVX2, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_complex.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_complex.dispatch.h (working tree) +@@ -10,6 +10,11 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX512F)), AVX512F, __VA_ARGS__)) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(AVX)&&CHK(F16C)&&CHK(FMA3)&&CHK(AVX2)), FMA3__AVX2, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif +--- contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_fp_le.dispatch.h (index) ++++ contrib/python/numpy/py3/numpy/core/src/umath/loops_unary_fp_le.dispatch.h (working tree) +@@ -10,5 +10,10 @@ + #undef NPY__CPU_DISPATCH_CALL + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB(__VA_ARGS__)) ++#if defined(__x86_64__) || defined(_M_X64) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + NPY__CPU_DISPATCH_EXPAND_(CB((CHK(SSE)&&CHK(SSE2)&&CHK(SSE3)&&CHK(SSSE3)&&CHK(SSE41)), SSE41, __VA_ARGS__)) ++#elif defined(__aarch64__) ++#define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ ++ ++#endif diff --git a/contrib/python/numpy/py3/patches/14-libunwind.h.patch b/contrib/python/numpy/py3/patches/14-libunwind.h.patch new file mode 100644 index 0000000000..85b0f871ae --- /dev/null +++ b/contrib/python/numpy/py3/patches/14-libunwind.h.patch @@ -0,0 +1,22 @@ +--- contrib/python/numpy/py3/numpy/core/src/multiarray/temp_elide.c (index) ++++ contrib/python/numpy/py3/numpy/core/src/multiarray/temp_elide.c (working tree) +@@ -86,7 +86,7 @@ + #if defined HAVE_EXECINFO_H + #include <execinfo.h> + #elif defined HAVE_LIBUNWIND_H +-#include <libunwind.h> ++#error #include <libunwind.h> + #endif + + /* +--- contrib/python/numpy/py3/numpy/linalg/umath_linalg.cpp (index) ++++ contrib/python/numpy/py3/numpy/linalg/umath_linalg.cpp (working tree) +@@ -54,7 +54,7 @@ using dispatch_scalar = typename std::conditional<std::is_scalar<typ>::value, sc + #if defined HAVE_EXECINFO_H + #include <execinfo.h> + #elif defined HAVE_LIBUNWIND_H +-#include <libunwind.h> ++#error #include <libunwind.h> + #endif + void + dbg_stack_trace() diff --git a/contrib/python/numpy/py3/patches/15-support-python-3.12.patch b/contrib/python/numpy/py3/patches/15-support-python-3.12.patch new file mode 100644 index 0000000000..5deb065a44 --- /dev/null +++ b/contrib/python/numpy/py3/patches/15-support-python-3.12.patch @@ -0,0 +1,11 @@ +--- contrib/python/numpy/py3/numpy/tests/test_public_api.py (index) ++++ contrib/python/numpy/py3/numpy/tests/test_public_api.py (working tree) +@@ -339,6 +339,8 @@ SKIP_LIST = [ + ] + if sys.version_info < (3, 12): + SKIP_LIST += ["numpy.distutils.msvc9compiler"] ++else: ++ SKIP_LIST += ["numpy.distutils"] + + + # suppressing warnings from deprecated modules -- cgit v1.2.3