diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-02-06 15:03:31 +0300 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2024-02-09 19:18:18 +0300 |
commit | 303fba2f20dfd94603064b607671b787de12624e (patch) | |
tree | 54c22fad0bcd67bf52f78822a3ee7714fd9dbf40 | |
parent | c7854274198c4168e713732ceb13e7075fce89b0 (diff) | |
download | ydb-303fba2f20dfd94603064b607671b787de12624e.tar.gz |
Intermediate changes
65 files changed, 1516 insertions, 1283 deletions
diff --git a/contrib/python/numpy/include/numpy/core/feature_detection_misc.h b/contrib/python/numpy/include/numpy/core/feature_detection_misc.h new file mode 100644 index 0000000000..0e6447fbd1 --- /dev/null +++ b/contrib/python/numpy/include/numpy/core/feature_detection_misc.h @@ -0,0 +1,5 @@ +#ifdef USE_PYTHON3 +#include <contrib/python/numpy/py3/numpy/core/feature_detection_misc.h> +#else +#error #include <contrib/python/numpy/py2/numpy/core/feature_detection_misc.h> +#endif diff --git a/contrib/python/numpy/py3/.dist-info/METADATA b/contrib/python/numpy/py3/.dist-info/METADATA index 5e515025ec..8246dc4ed3 100644 --- a/contrib/python/numpy/py3/.dist-info/METADATA +++ b/contrib/python/numpy/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: numpy -Version: 1.26.3 +Version: 1.26.4 Summary: Fundamental package for array computing in Python Home-page: https://numpy.org Author: Travis E. Oliphant et al. @@ -70,11 +70,6 @@ License: Copyright (c) 2005-2023, NumPy Developers. License: Apache 2.0 For license text, see vendored-meson/meson/COPYING - Name: meson-python - Files: vendored-meson/meson-python/* - License: MIT - For license text, see vendored-meson/meson-python/LICENSE - Name: spin Files: .spin/cmds.py License: BSD-3 diff --git a/contrib/python/numpy/py3/LICENSES_bundled.txt b/contrib/python/numpy/py3/LICENSES_bundled.txt index 26faf7ff30..aae0e774fa 100644 --- a/contrib/python/numpy/py3/LICENSES_bundled.txt +++ b/contrib/python/numpy/py3/LICENSES_bundled.txt @@ -30,11 +30,6 @@ Files: vendored-meson/meson/* License: Apache 2.0 For license text, see vendored-meson/meson/COPYING -Name: meson-python -Files: vendored-meson/meson-python/* -License: MIT - For license text, see vendored-meson/meson-python/LICENSE - Name: spin Files: .spin/cmds.py License: BSD-3 diff --git a/contrib/python/numpy/py3/numpy/__config__.py.in b/contrib/python/numpy/py3/numpy/__config__.py.in index 6c6c21cb85..f3b32c28c1 100644 --- a/contrib/python/numpy/py3/numpy/__config__.py.in +++ b/contrib/python/numpy/py3/numpy/__config__.py.in @@ -32,21 +32,27 @@ CONFIG = _cleanup( "Compilers": { "c": { "name": "@C_COMP@", - "linker": "@C_COMP_LINKER_ID@", + "linker": r"@C_COMP_LINKER_ID@", "version": "@C_COMP_VERSION@", - "commands": "@C_COMP_CMD_ARRAY@", + "commands": r"@C_COMP_CMD_ARRAY@", + "args": r"@C_COMP_ARGS@", + "linker args": r"@C_COMP_LINK_ARGS@", }, "cython": { "name": "@CYTHON_COMP@", - "linker": "@CYTHON_COMP_LINKER_ID@", + "linker": r"@CYTHON_COMP_LINKER_ID@", "version": "@CYTHON_COMP_VERSION@", - "commands": "@CYTHON_COMP_CMD_ARRAY@", + "commands": r"@CYTHON_COMP_CMD_ARRAY@", + "args": r"@CYTHON_COMP_ARGS@", + "linker args": r"@CYTHON_COMP_LINK_ARGS@", }, "c++": { "name": "@CPP_COMP@", - "linker": "@CPP_COMP_LINKER_ID@", + "linker": r"@CPP_COMP_LINKER_ID@", "version": "@CPP_COMP_VERSION@", - "commands": "@CPP_COMP_CMD_ARRAY@", + "commands": r"@CPP_COMP_CMD_ARRAY@", + "args": r"@CPP_COMP_ARGS@", + "linker args": r"@CPP_COMP_LINK_ARGS@", }, }, "Machine Information": { @@ -72,7 +78,7 @@ CONFIG = _cleanup( "detection method": "@BLAS_TYPE_NAME@", "include directory": r"@BLAS_INCLUDEDIR@", "lib directory": r"@BLAS_LIBDIR@", - "openblas configuration": "@BLAS_OPENBLAS_CONFIG@", + "openblas configuration": r"@BLAS_OPENBLAS_CONFIG@", "pc file directory": r"@BLAS_PCFILEDIR@", }, "lapack": { @@ -82,7 +88,7 @@ CONFIG = _cleanup( "detection method": "@LAPACK_TYPE_NAME@", "include directory": r"@LAPACK_INCLUDEDIR@", "lib directory": r"@LAPACK_LIBDIR@", - "openblas configuration": "@LAPACK_OPENBLAS_CONFIG@", + "openblas configuration": r"@LAPACK_OPENBLAS_CONFIG@", "pc file directory": r"@LAPACK_PCFILEDIR@", }, }, diff --git a/contrib/python/numpy/py3/numpy/array_api/__init__.py b/contrib/python/numpy/py3/numpy/array_api/__init__.py index 77f227882e..edc3205fd5 100644 --- a/contrib/python/numpy/py3/numpy/array_api/__init__.py +++ b/contrib/python/numpy/py3/numpy/array_api/__init__.py @@ -127,7 +127,7 @@ __all__ = ["__array_api_version__"] from ._constants import e, inf, nan, pi, newaxis -__all__ += ["e", "inf", "nan", "pi"] +__all__ += ["e", "inf", "nan", "pi", "newaxis"] from ._creation_functions import ( asarray, diff --git a/contrib/python/numpy/py3/numpy/array_api/linalg.py b/contrib/python/numpy/py3/numpy/array_api/linalg.py index 09af9dfc3a..c18360f6e6 100644 --- a/contrib/python/numpy/py3/numpy/array_api/linalg.py +++ b/contrib/python/numpy/py3/numpy/array_api/linalg.py @@ -9,6 +9,7 @@ from ._dtypes import ( complex128 ) from ._manipulation_functions import reshape +from ._elementwise_functions import conj from ._array_object import Array from ..core.numeric import normalize_axis_tuple @@ -53,7 +54,10 @@ def cholesky(x: Array, /, *, upper: bool = False) -> Array: raise TypeError('Only floating-point dtypes are allowed in cholesky') L = np.linalg.cholesky(x._array) if upper: - return Array._new(L).mT + U = Array._new(L).mT + if U.dtype in [complex64, complex128]: + U = conj(U) + return U return Array._new(L) # Note: cross is the numpy top-level namespace, not np.linalg diff --git a/contrib/python/numpy/py3/numpy/core/code_generators/genapi.py b/contrib/python/numpy/py3/numpy/core/code_generators/genapi.py index 2cdaba52d9..d9d7862b28 100644 --- a/contrib/python/numpy/py3/numpy/core/code_generators/genapi.py +++ b/contrib/python/numpy/py3/numpy/core/code_generators/genapi.py @@ -304,15 +304,6 @@ def find_functions(filename, tag='API'): fo.close() return functions -def should_rebuild(targets, source_files): - from distutils.dep_util import newer_group - for t in targets: - if not os.path.exists(t): - return True - sources = API_FILES + list(source_files) + [__file__] - if newer_group(sources, targets[0], missing='newer'): - return True - return False def write_file(filename, data): """ diff --git a/contrib/python/numpy/py3/numpy/core/code_generators/generate_numpy_api.py b/contrib/python/numpy/py3/numpy/core/code_generators/generate_numpy_api.py index ae38c4efc2..640bae9e5f 100644 --- a/contrib/python/numpy/py3/numpy/core/code_generators/generate_numpy_api.py +++ b/contrib/python/numpy/py3/numpy/core/code_generators/generate_numpy_api.py @@ -148,12 +148,7 @@ def generate_api(output_dir, force=False): targets = (h_file, c_file) sources = numpy_api.multiarray_api - - if (not force and not genapi.should_rebuild(targets, [numpy_api.__file__, __file__])): - return targets - else: - do_generate_api(targets, sources) - + do_generate_api(targets, sources) return targets def do_generate_api(targets, sources): diff --git a/contrib/python/numpy/py3/numpy/core/code_generators/generate_ufunc_api.py b/contrib/python/numpy/py3/numpy/core/code_generators/generate_ufunc_api.py index e03299a52c..3734cbd6a0 100644 --- a/contrib/python/numpy/py3/numpy/core/code_generators/generate_ufunc_api.py +++ b/contrib/python/numpy/py3/numpy/core/code_generators/generate_ufunc_api.py @@ -125,12 +125,7 @@ def generate_api(output_dir, force=False): targets = (h_file, c_file) sources = ['ufunc_api_order.txt'] - - if (not force and not genapi.should_rebuild(targets, sources + [__file__])): - return targets - else: - do_generate_api(targets, sources) - + do_generate_api(targets, sources) return targets def do_generate_api(targets, sources): diff --git a/contrib/python/numpy/py3/numpy/core/feature_detection_stdio.h b/contrib/python/numpy/py3/numpy/core/feature_detection_stdio.h index bc14d16d04..d8bbfbd8b2 100644 --- a/contrib/python/numpy/py3/numpy/core/feature_detection_stdio.h +++ b/contrib/python/numpy/py3/numpy/core/feature_detection_stdio.h @@ -1,6 +1,9 @@ +#define _GNU_SOURCE #include <stdio.h> #include <fcntl.h> +#if 0 /* Only for setup_common.py, not the C compiler */ off_t ftello(FILE *stream); int fseeko(FILE *stream, off_t offset, int whence); int fallocate(int, int, off_t, off_t); +#endif diff --git a/contrib/python/numpy/py3/numpy/core/generate_numpy_api.py b/contrib/python/numpy/py3/numpy/core/generate_numpy_api.py new file mode 100644 index 0000000000..640bae9e5f --- /dev/null +++ b/contrib/python/numpy/py3/numpy/core/generate_numpy_api.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +import os +import argparse + +import genapi +from genapi import \ + TypeApi, GlobalVarApi, FunctionApi, BoolValuesApi + +import numpy_api + +# use annotated api when running under cpychecker +h_template = r""" +#if defined(_MULTIARRAYMODULE) || defined(WITH_CPYCHECKER_STEALS_REFERENCE_TO_ARG_ATTRIBUTE) + +typedef struct { + PyObject_HEAD + npy_bool obval; +} PyBoolScalarObject; + +extern NPY_NO_EXPORT PyTypeObject PyArrayMapIter_Type; +extern NPY_NO_EXPORT PyTypeObject PyArrayNeighborhoodIter_Type; +extern NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[2]; + +%s + +#else + +#if defined(PY_ARRAY_UNIQUE_SYMBOL) +#define PyArray_API PY_ARRAY_UNIQUE_SYMBOL +#endif + +#if defined(NO_IMPORT) || defined(NO_IMPORT_ARRAY) +extern void **PyArray_API; +#else +#if defined(PY_ARRAY_UNIQUE_SYMBOL) +void **PyArray_API; +#else +static void **PyArray_API=NULL; +#endif +#endif + +%s + +#if !defined(NO_IMPORT_ARRAY) && !defined(NO_IMPORT) +static int +_import_array(void) +{ + int st; + PyObject *numpy = PyImport_ImportModule("numpy.core._multiarray_umath"); + PyObject *c_api = NULL; + + if (numpy == NULL) { + return -1; + } + c_api = PyObject_GetAttrString(numpy, "_ARRAY_API"); + Py_DECREF(numpy); + if (c_api == NULL) { + return -1; + } + + if (!PyCapsule_CheckExact(c_api)) { + PyErr_SetString(PyExc_RuntimeError, "_ARRAY_API is not PyCapsule object"); + Py_DECREF(c_api); + return -1; + } + PyArray_API = (void **)PyCapsule_GetPointer(c_api, NULL); + Py_DECREF(c_api); + if (PyArray_API == NULL) { + PyErr_SetString(PyExc_RuntimeError, "_ARRAY_API is NULL pointer"); + return -1; + } + + /* Perform runtime check of C API version */ + if (NPY_VERSION != PyArray_GetNDArrayCVersion()) { + PyErr_Format(PyExc_RuntimeError, "module compiled against "\ + "ABI version 0x%%x but this version of numpy is 0x%%x", \ + (int) NPY_VERSION, (int) PyArray_GetNDArrayCVersion()); + return -1; + } + if (NPY_FEATURE_VERSION > PyArray_GetNDArrayCFeatureVersion()) { + PyErr_Format(PyExc_RuntimeError, "module compiled against "\ + "API version 0x%%x but this version of numpy is 0x%%x . "\ + "Check the section C-API incompatibility at the "\ + "Troubleshooting ImportError section at "\ + "https://numpy.org/devdocs/user/troubleshooting-importerror.html"\ + "#c-api-incompatibility "\ + "for indications on how to solve this problem .", \ + (int) NPY_FEATURE_VERSION, (int) PyArray_GetNDArrayCFeatureVersion()); + return -1; + } + + /* + * Perform runtime check of endianness and check it matches the one set by + * the headers (npy_endian.h) as a safeguard + */ + st = PyArray_GetEndianness(); + if (st == NPY_CPU_UNKNOWN_ENDIAN) { + PyErr_SetString(PyExc_RuntimeError, + "FATAL: module compiled as unknown endian"); + return -1; + } +#if NPY_BYTE_ORDER == NPY_BIG_ENDIAN + if (st != NPY_CPU_BIG) { + PyErr_SetString(PyExc_RuntimeError, + "FATAL: module compiled as big endian, but " + "detected different endianness at runtime"); + return -1; + } +#elif NPY_BYTE_ORDER == NPY_LITTLE_ENDIAN + if (st != NPY_CPU_LITTLE) { + PyErr_SetString(PyExc_RuntimeError, + "FATAL: module compiled as little endian, but " + "detected different endianness at runtime"); + return -1; + } +#endif + + return 0; +} + +#define import_array() {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import"); return NULL; } } + +#define import_array1(ret) {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import"); return ret; } } + +#define import_array2(msg, ret) {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, msg); return ret; } } + +#endif + +#endif +""" + + +c_template = r""" +/* These pointers will be stored in the C-object for use in other + extension modules +*/ + +void *PyArray_API[] = { +%s +}; +""" + +def generate_api(output_dir, force=False): + basename = 'multiarray_api' + + h_file = os.path.join(output_dir, '__%s.h' % basename) + c_file = os.path.join(output_dir, '__%s.c' % basename) + targets = (h_file, c_file) + + sources = numpy_api.multiarray_api + do_generate_api(targets, sources) + return targets + +def do_generate_api(targets, sources): + header_file = targets[0] + c_file = targets[1] + + global_vars = sources[0] + scalar_bool_values = sources[1] + types_api = sources[2] + multiarray_funcs = sources[3] + + multiarray_api = sources[:] + + module_list = [] + extension_list = [] + init_list = [] + + # Check multiarray api indexes + multiarray_api_index = genapi.merge_api_dicts(multiarray_api) + genapi.check_api_dict(multiarray_api_index) + + numpyapi_list = genapi.get_api_functions('NUMPY_API', + multiarray_funcs) + + # Create dict name -> *Api instance + api_name = 'PyArray_API' + multiarray_api_dict = {} + for f in numpyapi_list: + name = f.name + index = multiarray_funcs[name][0] + annotations = multiarray_funcs[name][1:] + multiarray_api_dict[f.name] = FunctionApi(f.name, index, annotations, + f.return_type, + f.args, api_name) + + for name, val in global_vars.items(): + index, type = val + multiarray_api_dict[name] = GlobalVarApi(name, index, type, api_name) + + for name, val in scalar_bool_values.items(): + index = val[0] + multiarray_api_dict[name] = BoolValuesApi(name, index, api_name) + + for name, val in types_api.items(): + index = val[0] + internal_type = None if len(val) == 1 else val[1] + multiarray_api_dict[name] = TypeApi( + name, index, 'PyTypeObject', api_name, internal_type) + + if len(multiarray_api_dict) != len(multiarray_api_index): + keys_dict = set(multiarray_api_dict.keys()) + keys_index = set(multiarray_api_index.keys()) + raise AssertionError( + "Multiarray API size mismatch - " + "index has extra keys {}, dict has extra keys {}" + .format(keys_index - keys_dict, keys_dict - keys_index) + ) + + extension_list = [] + for name, index in genapi.order_dict(multiarray_api_index): + api_item = multiarray_api_dict[name] + extension_list.append(api_item.define_from_array_api_string()) + init_list.append(api_item.array_api_define()) + module_list.append(api_item.internal_define()) + + # Write to header + s = h_template % ('\n'.join(module_list), '\n'.join(extension_list)) + genapi.write_file(header_file, s) + + # Write to c-code + s = c_template % ',\n'.join(init_list) + genapi.write_file(c_file, s) + + return targets + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-o", + "--outdir", + type=str, + help="Path to the output directory" + ) + parser.add_argument( + "-i", + "--ignore", + type=str, + help="An ignored input - may be useful to add a " + "dependency between custom targets" + ) + args = parser.parse_args() + + outdir_abs = os.path.join(os.getcwd(), args.outdir) + + generate_api(outdir_abs) + + +if __name__ == "__main__": + main() diff --git a/contrib/python/numpy/py3/numpy/core/src/common/npy_cpu_features.c b/contrib/python/numpy/py3/numpy/core/src/common/npy_cpu_features.c index 64a85f6fb2..bd149f8b43 100644 --- a/contrib/python/numpy/py3/numpy/core/src/common/npy_cpu_features.c +++ b/contrib/python/numpy/py3/numpy/core/src/common/npy_cpu_features.c @@ -656,7 +656,7 @@ npy__cpu_init_features(void) /***************** ARM ******************/ -#elif defined(__arm__) || defined(__aarch64__) +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM64) static inline void npy__cpu_init_features_arm8(void) @@ -781,7 +781,7 @@ npy__cpu_init_features(void) return; #endif // We have nothing else todo -#if defined(NPY_HAVE_ASIMD) || defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH >= 8) +#if defined(NPY_HAVE_ASIMD) || defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH >= 8) || defined(_M_ARM64) #if defined(NPY_HAVE_FPHP) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) npy__cpu_have[NPY_CPU_FEATURE_FPHP] = 1; #endif diff --git a/contrib/python/numpy/py3/numpy/core/src/multiarray/convert.c b/contrib/python/numpy/py3/numpy/core/src/multiarray/convert.c index 60c1a1b9b0..8ec0aeefb7 100644 --- a/contrib/python/numpy/py3/numpy/core/src/multiarray/convert.c +++ b/contrib/python/numpy/py3/numpy/core/src/multiarray/convert.c @@ -23,8 +23,9 @@ #include "array_coercion.h" #include "refcount.h" -int -fallocate(int fd, int mode, off_t offset, off_t len); +#if defined(HAVE_FALLOCATE) && defined(__linux__) +#include <fcntl.h> +#endif /* * allocate nbytes of diskspace for file fp diff --git a/contrib/python/numpy/py3/numpy/core/src/multiarray/temp_elide.c b/contrib/python/numpy/py3/numpy/core/src/multiarray/temp_elide.c index 15257804bc..a38f90e76c 100644 --- a/contrib/python/numpy/py3/numpy/core/src/multiarray/temp_elide.c +++ b/contrib/python/numpy/py3/numpy/core/src/multiarray/temp_elide.c @@ -59,6 +59,9 @@ */ #if defined HAVE_BACKTRACE && defined HAVE_DLFCN_H && ! defined PYPY_VERSION + +#include <feature_detection_misc.h> + /* 1 prints elided operations, 2 prints stacktraces */ #define NPY_ELIDE_DEBUG 0 #define NPY_MAX_STACKSIZE 10 diff --git a/contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.c b/contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.c index 25fae7f711..0d80a96966 100644 --- a/contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.c +++ b/contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.c @@ -46,8 +46,16 @@ * q = TRUNC((n - (-dsign ) + (-nsign))/d) - (-qsign); ********************************************************************************/ +#if (defined(NPY_HAVE_VSX) && !defined(NPY_HAVE_VSX4)) || defined(NPY_HAVE_NEON) + // Due to integer 128-bit multiplication emulation, SIMD 64-bit division + // may not perform well on both neon and up to VSX3 compared to scalar + // division. + #define SIMD_DISABLE_DIV64_OPT +#endif + #if NPY_SIMD -#line 45 +#line 52 +#if 8 < 64 || (8 == 64 && !defined(SIMD_DISABLE_DIV64_OPT)) static inline void simd_divide_by_scalar_contig_s8(char **args, npy_intp len) { @@ -107,8 +115,10 @@ simd_divide_by_scalar_contig_s8(char **args, npy_intp len) } npyv_cleanup(); } +#endif -#line 45 +#line 52 +#if 16 < 64 || (16 == 64 && !defined(SIMD_DISABLE_DIV64_OPT)) static inline void simd_divide_by_scalar_contig_s16(char **args, npy_intp len) { @@ -168,8 +178,10 @@ simd_divide_by_scalar_contig_s16(char **args, npy_intp len) } npyv_cleanup(); } +#endif -#line 45 +#line 52 +#if 32 < 64 || (32 == 64 && !defined(SIMD_DISABLE_DIV64_OPT)) static inline void simd_divide_by_scalar_contig_s32(char **args, npy_intp len) { @@ -229,8 +241,10 @@ simd_divide_by_scalar_contig_s32(char **args, npy_intp len) } npyv_cleanup(); } +#endif -#line 45 +#line 52 +#if 64 < 64 || (64 == 64 && !defined(SIMD_DISABLE_DIV64_OPT)) static inline void simd_divide_by_scalar_contig_s64(char **args, npy_intp len) { @@ -290,9 +304,11 @@ simd_divide_by_scalar_contig_s64(char **args, npy_intp len) } npyv_cleanup(); } +#endif -#line 111 +#line 120 +#if 8 < 64 || (8 == 64 && !defined(SIMD_DISABLE_DIV64_OPT)) static inline void simd_divide_by_scalar_contig_u8(char **args, npy_intp len) { @@ -314,8 +330,10 @@ simd_divide_by_scalar_contig_u8(char **args, npy_intp len) } npyv_cleanup(); } +#endif -#line 111 +#line 120 +#if 16 < 64 || (16 == 64 && !defined(SIMD_DISABLE_DIV64_OPT)) static inline void simd_divide_by_scalar_contig_u16(char **args, npy_intp len) { @@ -337,8 +355,10 @@ simd_divide_by_scalar_contig_u16(char **args, npy_intp len) } npyv_cleanup(); } +#endif -#line 111 +#line 120 +#if 32 < 64 || (32 == 64 && !defined(SIMD_DISABLE_DIV64_OPT)) static inline void simd_divide_by_scalar_contig_u32(char **args, npy_intp len) { @@ -360,8 +380,10 @@ simd_divide_by_scalar_contig_u32(char **args, npy_intp len) } npyv_cleanup(); } +#endif -#line 111 +#line 120 +#if 64 < 64 || (64 == 64 && !defined(SIMD_DISABLE_DIV64_OPT)) static inline void simd_divide_by_scalar_contig_u64(char **args, npy_intp len) { @@ -383,11 +405,12 @@ simd_divide_by_scalar_contig_u64(char **args, npy_intp len) } npyv_cleanup(); } +#endif #if defined(NPY_HAVE_VSX4) -#line 140 +#line 151 /* * Computes division of 2 8-bit signed/unsigned integer vectors * @@ -452,7 +475,7 @@ vsx4_div_u16(npyv_u16 a, npyv_u16 b) #define vsx4_div_u32 vec_div #define vsx4_div_u64 vec_div -#line 140 +#line 151 /* * Computes division of 2 8-bit signed/unsigned integer vectors * @@ -518,7 +541,7 @@ vsx4_div_s16(npyv_s16 a, npyv_s16 b) #define vsx4_div_s64 vec_div -#line 210 +#line 221 static inline void vsx4_simd_divide_contig_u8(char **args, npy_intp len) { @@ -552,7 +575,7 @@ vsx4_simd_divide_contig_u8(char **args, npy_intp len) npyv_cleanup(); } -#line 210 +#line 221 static inline void vsx4_simd_divide_contig_u16(char **args, npy_intp len) { @@ -586,7 +609,7 @@ vsx4_simd_divide_contig_u16(char **args, npy_intp len) npyv_cleanup(); } -#line 210 +#line 221 static inline void vsx4_simd_divide_contig_u32(char **args, npy_intp len) { @@ -620,7 +643,7 @@ vsx4_simd_divide_contig_u32(char **args, npy_intp len) npyv_cleanup(); } -#line 210 +#line 221 static inline void vsx4_simd_divide_contig_u64(char **args, npy_intp len) { @@ -655,7 +678,7 @@ vsx4_simd_divide_contig_u64(char **args, npy_intp len) } -#line 249 +#line 260 static inline void vsx4_simd_divide_contig_s8(char **args, npy_intp len) { @@ -724,7 +747,7 @@ vsx4_simd_divide_contig_s8(char **args, npy_intp len) npyv_cleanup(); } -#line 249 +#line 260 static inline void vsx4_simd_divide_contig_s16(char **args, npy_intp len) { @@ -793,7 +816,7 @@ vsx4_simd_divide_contig_s16(char **args, npy_intp len) npyv_cleanup(); } -#line 249 +#line 260 static inline void vsx4_simd_divide_contig_s32(char **args, npy_intp len) { @@ -862,7 +885,7 @@ vsx4_simd_divide_contig_s32(char **args, npy_intp len) npyv_cleanup(); } -#line 249 +#line 260 static inline void vsx4_simd_divide_contig_s64(char **args, npy_intp len) { @@ -938,28 +961,27 @@ vsx4_simd_divide_contig_s64(char **args, npy_intp len) ** Defining ufunc inner functions ********************************************************************************/ -#line 329 +#line 340 #undef TO_SIMD_SFX #if 0 -#line 334 +#line 345 #elif NPY_BITSOF_BYTE == 8 #define TO_SIMD_SFX(X) X##_s8 -#line 334 +#line 345 #elif NPY_BITSOF_BYTE == 16 #define TO_SIMD_SFX(X) X##_s16 -#line 334 +#line 345 #elif NPY_BITSOF_BYTE == 32 #define TO_SIMD_SFX(X) X##_s32 -#line 334 +#line 345 #elif NPY_BITSOF_BYTE == 64 #define TO_SIMD_SFX(X) X##_s64 #endif - -#if NPY_BITSOF_BYTE == 64 && !defined(NPY_HAVE_VSX4) && (defined(NPY_HAVE_VSX) || defined(NPY_HAVE_NEON)) +#if NPY_BITSOF_BYTE == 64 && defined(SIMD_DISABLE_DIV64_OPT) #undef TO_SIMD_SFX #endif @@ -1042,28 +1064,27 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(BYTE_divide_indexed) } -#line 329 +#line 340 #undef TO_SIMD_SFX #if 0 -#line 334 +#line 345 #elif NPY_BITSOF_SHORT == 8 #define TO_SIMD_SFX(X) X##_s8 -#line 334 +#line 345 #elif NPY_BITSOF_SHORT == 16 #define TO_SIMD_SFX(X) X##_s16 -#line 334 +#line 345 #elif NPY_BITSOF_SHORT == 32 #define TO_SIMD_SFX(X) X##_s32 -#line 334 +#line 345 #elif NPY_BITSOF_SHORT == 64 #define TO_SIMD_SFX(X) X##_s64 #endif - -#if NPY_BITSOF_SHORT == 64 && !defined(NPY_HAVE_VSX4) && (defined(NPY_HAVE_VSX) || defined(NPY_HAVE_NEON)) +#if NPY_BITSOF_SHORT == 64 && defined(SIMD_DISABLE_DIV64_OPT) #undef TO_SIMD_SFX #endif @@ -1146,28 +1167,27 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(SHORT_divide_indexed) } -#line 329 +#line 340 #undef TO_SIMD_SFX #if 0 -#line 334 +#line 345 #elif NPY_BITSOF_INT == 8 #define TO_SIMD_SFX(X) X##_s8 -#line 334 +#line 345 #elif NPY_BITSOF_INT == 16 #define TO_SIMD_SFX(X) X##_s16 -#line 334 +#line 345 #elif NPY_BITSOF_INT == 32 #define TO_SIMD_SFX(X) X##_s32 -#line 334 +#line 345 #elif NPY_BITSOF_INT == 64 #define TO_SIMD_SFX(X) X##_s64 #endif - -#if NPY_BITSOF_INT == 64 && !defined(NPY_HAVE_VSX4) && (defined(NPY_HAVE_VSX) || defined(NPY_HAVE_NEON)) +#if NPY_BITSOF_INT == 64 && defined(SIMD_DISABLE_DIV64_OPT) #undef TO_SIMD_SFX #endif @@ -1250,28 +1270,27 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(INT_divide_indexed) } -#line 329 +#line 340 #undef TO_SIMD_SFX #if 0 -#line 334 +#line 345 #elif NPY_BITSOF_LONG == 8 #define TO_SIMD_SFX(X) X##_s8 -#line 334 +#line 345 #elif NPY_BITSOF_LONG == 16 #define TO_SIMD_SFX(X) X##_s16 -#line 334 +#line 345 #elif NPY_BITSOF_LONG == 32 #define TO_SIMD_SFX(X) X##_s32 -#line 334 +#line 345 #elif NPY_BITSOF_LONG == 64 #define TO_SIMD_SFX(X) X##_s64 #endif - -#if NPY_BITSOF_LONG == 64 && !defined(NPY_HAVE_VSX4) && (defined(NPY_HAVE_VSX) || defined(NPY_HAVE_NEON)) +#if NPY_BITSOF_LONG == 64 && defined(SIMD_DISABLE_DIV64_OPT) #undef TO_SIMD_SFX #endif @@ -1354,28 +1373,27 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONG_divide_indexed) } -#line 329 +#line 340 #undef TO_SIMD_SFX #if 0 -#line 334 +#line 345 #elif NPY_BITSOF_LONGLONG == 8 #define TO_SIMD_SFX(X) X##_s8 -#line 334 +#line 345 #elif NPY_BITSOF_LONGLONG == 16 #define TO_SIMD_SFX(X) X##_s16 -#line 334 +#line 345 #elif NPY_BITSOF_LONGLONG == 32 #define TO_SIMD_SFX(X) X##_s32 -#line 334 +#line 345 #elif NPY_BITSOF_LONGLONG == 64 #define TO_SIMD_SFX(X) X##_s64 #endif - -#if NPY_BITSOF_LONGLONG == 64 && !defined(NPY_HAVE_VSX4) && (defined(NPY_HAVE_VSX) || defined(NPY_HAVE_NEON)) +#if NPY_BITSOF_LONGLONG == 64 && defined(SIMD_DISABLE_DIV64_OPT) #undef TO_SIMD_SFX #endif @@ -1459,22 +1477,22 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGLONG_divide_indexed) -#line 429 +#line 439 #undef TO_SIMD_SFX #if 0 -#line 434 +#line 444 #elif NPY_BITSOF_BYTE == 8 #define TO_SIMD_SFX(X) X##_u8 -#line 434 +#line 444 #elif NPY_BITSOF_BYTE == 16 #define TO_SIMD_SFX(X) X##_u16 -#line 434 +#line 444 #elif NPY_BITSOF_BYTE == 32 #define TO_SIMD_SFX(X) X##_u32 -#line 434 +#line 444 #elif NPY_BITSOF_BYTE == 64 #define TO_SIMD_SFX(X) X##_u64 @@ -1560,22 +1578,22 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UBYTE_divide_indexed) } -#line 429 +#line 439 #undef TO_SIMD_SFX #if 0 -#line 434 +#line 444 #elif NPY_BITSOF_SHORT == 8 #define TO_SIMD_SFX(X) X##_u8 -#line 434 +#line 444 #elif NPY_BITSOF_SHORT == 16 #define TO_SIMD_SFX(X) X##_u16 -#line 434 +#line 444 #elif NPY_BITSOF_SHORT == 32 #define TO_SIMD_SFX(X) X##_u32 -#line 434 +#line 444 #elif NPY_BITSOF_SHORT == 64 #define TO_SIMD_SFX(X) X##_u64 @@ -1661,22 +1679,22 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(USHORT_divide_indexed) } -#line 429 +#line 439 #undef TO_SIMD_SFX #if 0 -#line 434 +#line 444 #elif NPY_BITSOF_INT == 8 #define TO_SIMD_SFX(X) X##_u8 -#line 434 +#line 444 #elif NPY_BITSOF_INT == 16 #define TO_SIMD_SFX(X) X##_u16 -#line 434 +#line 444 #elif NPY_BITSOF_INT == 32 #define TO_SIMD_SFX(X) X##_u32 -#line 434 +#line 444 #elif NPY_BITSOF_INT == 64 #define TO_SIMD_SFX(X) X##_u64 @@ -1762,22 +1780,22 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UINT_divide_indexed) } -#line 429 +#line 439 #undef TO_SIMD_SFX #if 0 -#line 434 +#line 444 #elif NPY_BITSOF_LONG == 8 #define TO_SIMD_SFX(X) X##_u8 -#line 434 +#line 444 #elif NPY_BITSOF_LONG == 16 #define TO_SIMD_SFX(X) X##_u16 -#line 434 +#line 444 #elif NPY_BITSOF_LONG == 32 #define TO_SIMD_SFX(X) X##_u32 -#line 434 +#line 444 #elif NPY_BITSOF_LONG == 64 #define TO_SIMD_SFX(X) X##_u64 @@ -1863,22 +1881,22 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONG_divide_indexed) } -#line 429 +#line 439 #undef TO_SIMD_SFX #if 0 -#line 434 +#line 444 #elif NPY_BITSOF_LONGLONG == 8 #define TO_SIMD_SFX(X) X##_u8 -#line 434 +#line 444 #elif NPY_BITSOF_LONGLONG == 16 #define TO_SIMD_SFX(X) X##_u16 -#line 434 +#line 444 #elif NPY_BITSOF_LONGLONG == 32 #define TO_SIMD_SFX(X) X##_u32 -#line 434 +#line 444 #elif NPY_BITSOF_LONGLONG == 64 #define TO_SIMD_SFX(X) X##_u64 diff --git a/contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.c.src b/contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.c.src index e07bb79808..d056046e05 100644 --- a/contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.c.src +++ b/contrib/python/numpy/py3/numpy/core/src/umath/loops_arithmetic.dispatch.c.src @@ -36,12 +36,20 @@ * q = TRUNC((n - (-dsign ) + (-nsign))/d) - (-qsign); ********************************************************************************/ +#if (defined(NPY_HAVE_VSX) && !defined(NPY_HAVE_VSX4)) || defined(NPY_HAVE_NEON) + // Due to integer 128-bit multiplication emulation, SIMD 64-bit division + // may not perform well on both neon and up to VSX3 compared to scalar + // division. + #define SIMD_DISABLE_DIV64_OPT +#endif + #if NPY_SIMD /**begin repeat * Signed types * #sfx = s8, s16, s32, s64# * #len = 8, 16, 32, 64# */ +#if @len@ < 64 || (@len@ == 64 && !defined(SIMD_DISABLE_DIV64_OPT)) static inline void simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len) { @@ -101,6 +109,7 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len) } npyv_cleanup(); } +#endif /**end repeat**/ /**begin repeat @@ -108,6 +117,7 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len) * #sfx = u8, u16, u32, u64# * #len = 8, 16, 32, 64# */ +#if @len@ < 64 || (@len@ == 64 && !defined(SIMD_DISABLE_DIV64_OPT)) static inline void simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len) { @@ -129,6 +139,7 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len) } npyv_cleanup(); } +#endif /**end repeat**/ #if defined(NPY_HAVE_VSX4) @@ -335,8 +346,7 @@ vsx4_simd_divide_contig_@sfx@(char **args, npy_intp len) #define TO_SIMD_SFX(X) X##_s@len@ /**end repeat1**/ #endif - -#if NPY_BITSOF_@TYPE@ == 64 && !defined(NPY_HAVE_VSX4) && (defined(NPY_HAVE_VSX) || defined(NPY_HAVE_NEON)) +#if NPY_BITSOF_@TYPE@ == 64 && defined(SIMD_DISABLE_DIV64_OPT) #undef TO_SIMD_SFX #endif diff --git a/contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.c b/contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.c index 5e9827a14c..8f446c3a8d 100644 --- a/contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.c +++ b/contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.c @@ -134,18 +134,6 @@ fma_blend(__m256 x, __m256 y, __m256 ymask) } NPY_FINLINE __m256 -fma_invert_mask_ps(__m256 ymask) -{ - return _mm256_andnot_ps(ymask, _mm256_set1_ps(-1.0)); -} - -NPY_FINLINE __m256i -fma_invert_mask_pd(__m256i ymask) -{ - return _mm256_andnot_si256(ymask, _mm256_set1_epi32(0xFFFFFFFF)); -} - -NPY_FINLINE __m256 fma_get_exponent(__m256 x) { /* @@ -321,18 +309,6 @@ avx512_blend(__m512 x, __m512 y, __mmask16 ymask) return _mm512_mask_mov_ps(x, ymask, y); } -NPY_FINLINE __mmask16 -avx512_invert_mask_ps(__mmask16 ymask) -{ - return _mm512_knot(ymask); -} - -NPY_FINLINE __mmask8 -avx512_invert_mask_pd(__mmask8 ymask) -{ - return _mm512_knot(ymask); -} - NPY_FINLINE __m512 avx512_get_exponent(__m512 x) { @@ -384,7 +360,7 @@ avx512_permute_x8var_pd(__m512d t0, __m512d t1, __m512d t2, __m512d t3, /******************************************************************************** ** Defining the SIMD kernels ********************************************************************************/ -#line 396 +#line 372 #ifdef SIMD_AVX2_FMA3 /* * Vectorized Cody-Waite range reduction technique @@ -683,7 +659,7 @@ simd_log_FLOAT(npy_float * op, } #endif // SIMD_AVX2_FMA3 -#line 396 +#line 372 #ifdef SIMD_AVX512F /* * Vectorized Cody-Waite range reduction technique @@ -984,7 +960,7 @@ simd_log_FLOAT(npy_float * op, #if NPY_SIMD && defined(NPY_HAVE_AVX512_SKX) && defined(NPY_CAN_LINK_SVML) -#line 700 +#line 676 static void simd_exp_f64(const npyv_lanetype_f64 *src, npy_intp ssrc, npyv_lanetype_f64 *dst, npy_intp sdst, npy_intp len) @@ -1015,7 +991,7 @@ simd_exp_f64(const npyv_lanetype_f64 *src, npy_intp ssrc, npyv_cleanup(); } -#line 700 +#line 676 static void simd_log_f64(const npyv_lanetype_f64 *src, npy_intp ssrc, npyv_lanetype_f64 *dst, npy_intp sdst, npy_intp len) @@ -1298,49 +1274,49 @@ AVX512F_log_DOUBLE(npy_double * op, __m256i vindex = _mm256_loadu_si256((__m256i*)&indexarr[0]); /* Load lookup table data */ - #line 985 + #line 961 __m512d mLUT_TOP_0 = _mm512_loadu_pd(&(LOG_TABLE_TOP[8*0])); __m512d mLUT_TAIL_0 = _mm512_loadu_pd(&(LOG_TABLE_TAIL[8*0])); -#line 985 +#line 961 __m512d mLUT_TOP_1 = _mm512_loadu_pd(&(LOG_TABLE_TOP[8*1])); __m512d mLUT_TAIL_1 = _mm512_loadu_pd(&(LOG_TABLE_TAIL[8*1])); -#line 985 +#line 961 __m512d mLUT_TOP_2 = _mm512_loadu_pd(&(LOG_TABLE_TOP[8*2])); __m512d mLUT_TAIL_2 = _mm512_loadu_pd(&(LOG_TABLE_TAIL[8*2])); -#line 985 +#line 961 __m512d mLUT_TOP_3 = _mm512_loadu_pd(&(LOG_TABLE_TOP[8*3])); __m512d mLUT_TAIL_3 = _mm512_loadu_pd(&(LOG_TABLE_TAIL[8*3])); -#line 985 +#line 961 __m512d mLUT_TOP_4 = _mm512_loadu_pd(&(LOG_TABLE_TOP[8*4])); __m512d mLUT_TAIL_4 = _mm512_loadu_pd(&(LOG_TABLE_TAIL[8*4])); -#line 985 +#line 961 __m512d mLUT_TOP_5 = _mm512_loadu_pd(&(LOG_TABLE_TOP[8*5])); __m512d mLUT_TAIL_5 = _mm512_loadu_pd(&(LOG_TABLE_TAIL[8*5])); -#line 985 +#line 961 __m512d mLUT_TOP_6 = _mm512_loadu_pd(&(LOG_TABLE_TOP[8*6])); __m512d mLUT_TAIL_6 = _mm512_loadu_pd(&(LOG_TABLE_TAIL[8*6])); -#line 985 +#line 961 __m512d mLUT_TOP_7 = _mm512_loadu_pd(&(LOG_TABLE_TOP[8*7])); __m512d mLUT_TAIL_7 = _mm512_loadu_pd(&(LOG_TABLE_TAIL[8*7])); @@ -1487,7 +1463,7 @@ AVX512F_log_DOUBLE(npy_double * op, #endif // NPY_CAN_LINK_SVML #ifdef SIMD_AVX512_SKX -#line 1149 +#line 1125 static inline void AVX512_SKX_ldexp_FLOAT(char **args, npy_intp const *dimensions, npy_intp const *steps) { @@ -1634,7 +1610,7 @@ AVX512_SKX_frexp_FLOAT(char **args, npy_intp const *dimensions, npy_intp const * } } -#line 1149 +#line 1125 static inline void AVX512_SKX_ldexp_DOUBLE(char **args, npy_intp const *dimensions, npy_intp const *steps) { @@ -1787,7 +1763,7 @@ AVX512_SKX_frexp_DOUBLE(char **args, npy_intp const *dimensions, npy_intp const /******************************************************************************** ** Defining ufunc inner functions ********************************************************************************/ -#line 1305 +#line 1281 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_exp) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) { @@ -1816,7 +1792,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_exp) #endif } -#line 1305 +#line 1281 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_log) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) { @@ -1846,7 +1822,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_log) } -#line 1338 +#line 1314 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_exp) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) { @@ -1879,7 +1855,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_exp) } -#line 1338 +#line 1314 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_log) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) { @@ -1913,7 +1889,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_log) -#line 1378 +#line 1354 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_frexp) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -1945,7 +1921,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_ldexp) } } -#line 1378 +#line 1354 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_frexp) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { diff --git a/contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.c.src b/contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.c.src index 1fac3c150c..85dac9c20d 100644 --- a/contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.c.src +++ b/contrib/python/numpy/py3/numpy/core/src/umath/loops_exponent_log.dispatch.c.src @@ -124,18 +124,6 @@ fma_blend(__m256 x, __m256 y, __m256 ymask) } NPY_FINLINE __m256 -fma_invert_mask_ps(__m256 ymask) -{ - return _mm256_andnot_ps(ymask, _mm256_set1_ps(-1.0)); -} - -NPY_FINLINE __m256i -fma_invert_mask_pd(__m256i ymask) -{ - return _mm256_andnot_si256(ymask, _mm256_set1_epi32(0xFFFFFFFF)); -} - -NPY_FINLINE __m256 fma_get_exponent(__m256 x) { /* @@ -311,18 +299,6 @@ avx512_blend(__m512 x, __m512 y, __mmask16 ymask) return _mm512_mask_mov_ps(x, ymask, y); } -NPY_FINLINE __mmask16 -avx512_invert_mask_ps(__mmask16 ymask) -{ - return _mm512_knot(ymask); -} - -NPY_FINLINE __mmask8 -avx512_invert_mask_pd(__mmask8 ymask) -{ - return _mm512_knot(ymask); -} - NPY_FINLINE __m512 avx512_get_exponent(__m512 x) { diff --git a/contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.c b/contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.c index ad8c1ef397..97a78b0e12 100644 --- a/contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.c +++ b/contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.c @@ -320,7 +320,8 @@ simd_binary_ccc_max_s8(const npyv_lanetype_s8 *ip1, const npyv_lanetype_s8 *ip2, } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_max_s8(const npyv_lanetype_s8 *ip1, npy_intp sip1, const npyv_lanetype_s8 *ip2, npy_intp sip2, @@ -483,7 +484,8 @@ simd_binary_ccc_min_s8(const npyv_lanetype_s8 *ip1, const npyv_lanetype_s8 *ip2, } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_min_s8(const npyv_lanetype_s8 *ip1, npy_intp sip1, const npyv_lanetype_s8 *ip2, npy_intp sip2, @@ -646,7 +648,8 @@ simd_binary_ccc_maxp_s8(const npyv_lanetype_s8 *ip1, const npyv_lanetype_s8 *ip2 } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_maxp_s8(const npyv_lanetype_s8 *ip1, npy_intp sip1, const npyv_lanetype_s8 *ip2, npy_intp sip2, @@ -809,7 +812,8 @@ simd_binary_ccc_minp_s8(const npyv_lanetype_s8 *ip1, const npyv_lanetype_s8 *ip2 } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_minp_s8(const npyv_lanetype_s8 *ip1, npy_intp sip1, const npyv_lanetype_s8 *ip2, npy_intp sip2, @@ -974,7 +978,8 @@ simd_binary_ccc_max_u8(const npyv_lanetype_u8 *ip1, const npyv_lanetype_u8 *ip2, } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_max_u8(const npyv_lanetype_u8 *ip1, npy_intp sip1, const npyv_lanetype_u8 *ip2, npy_intp sip2, @@ -1137,7 +1142,8 @@ simd_binary_ccc_min_u8(const npyv_lanetype_u8 *ip1, const npyv_lanetype_u8 *ip2, } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_min_u8(const npyv_lanetype_u8 *ip1, npy_intp sip1, const npyv_lanetype_u8 *ip2, npy_intp sip2, @@ -1300,7 +1306,8 @@ simd_binary_ccc_maxp_u8(const npyv_lanetype_u8 *ip1, const npyv_lanetype_u8 *ip2 } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_maxp_u8(const npyv_lanetype_u8 *ip1, npy_intp sip1, const npyv_lanetype_u8 *ip2, npy_intp sip2, @@ -1463,7 +1470,8 @@ simd_binary_ccc_minp_u8(const npyv_lanetype_u8 *ip1, const npyv_lanetype_u8 *ip2 } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_minp_u8(const npyv_lanetype_u8 *ip1, npy_intp sip1, const npyv_lanetype_u8 *ip2, npy_intp sip2, @@ -1628,7 +1636,8 @@ simd_binary_ccc_max_s16(const npyv_lanetype_s16 *ip1, const npyv_lanetype_s16 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_max_s16(const npyv_lanetype_s16 *ip1, npy_intp sip1, const npyv_lanetype_s16 *ip2, npy_intp sip2, @@ -1791,7 +1800,8 @@ simd_binary_ccc_min_s16(const npyv_lanetype_s16 *ip1, const npyv_lanetype_s16 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_min_s16(const npyv_lanetype_s16 *ip1, npy_intp sip1, const npyv_lanetype_s16 *ip2, npy_intp sip2, @@ -1954,7 +1964,8 @@ simd_binary_ccc_maxp_s16(const npyv_lanetype_s16 *ip1, const npyv_lanetype_s16 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_maxp_s16(const npyv_lanetype_s16 *ip1, npy_intp sip1, const npyv_lanetype_s16 *ip2, npy_intp sip2, @@ -2117,7 +2128,8 @@ simd_binary_ccc_minp_s16(const npyv_lanetype_s16 *ip1, const npyv_lanetype_s16 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_minp_s16(const npyv_lanetype_s16 *ip1, npy_intp sip1, const npyv_lanetype_s16 *ip2, npy_intp sip2, @@ -2282,7 +2294,8 @@ simd_binary_ccc_max_u16(const npyv_lanetype_u16 *ip1, const npyv_lanetype_u16 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_max_u16(const npyv_lanetype_u16 *ip1, npy_intp sip1, const npyv_lanetype_u16 *ip2, npy_intp sip2, @@ -2445,7 +2458,8 @@ simd_binary_ccc_min_u16(const npyv_lanetype_u16 *ip1, const npyv_lanetype_u16 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_min_u16(const npyv_lanetype_u16 *ip1, npy_intp sip1, const npyv_lanetype_u16 *ip2, npy_intp sip2, @@ -2608,7 +2622,8 @@ simd_binary_ccc_maxp_u16(const npyv_lanetype_u16 *ip1, const npyv_lanetype_u16 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_maxp_u16(const npyv_lanetype_u16 *ip1, npy_intp sip1, const npyv_lanetype_u16 *ip2, npy_intp sip2, @@ -2771,7 +2786,8 @@ simd_binary_ccc_minp_u16(const npyv_lanetype_u16 *ip1, const npyv_lanetype_u16 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_minp_u16(const npyv_lanetype_u16 *ip1, npy_intp sip1, const npyv_lanetype_u16 *ip2, npy_intp sip2, @@ -2936,7 +2952,8 @@ simd_binary_ccc_max_s32(const npyv_lanetype_s32 *ip1, const npyv_lanetype_s32 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_max_s32(const npyv_lanetype_s32 *ip1, npy_intp sip1, const npyv_lanetype_s32 *ip2, npy_intp sip2, @@ -3099,7 +3116,8 @@ simd_binary_ccc_min_s32(const npyv_lanetype_s32 *ip1, const npyv_lanetype_s32 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_min_s32(const npyv_lanetype_s32 *ip1, npy_intp sip1, const npyv_lanetype_s32 *ip2, npy_intp sip2, @@ -3262,7 +3280,8 @@ simd_binary_ccc_maxp_s32(const npyv_lanetype_s32 *ip1, const npyv_lanetype_s32 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_maxp_s32(const npyv_lanetype_s32 *ip1, npy_intp sip1, const npyv_lanetype_s32 *ip2, npy_intp sip2, @@ -3425,7 +3444,8 @@ simd_binary_ccc_minp_s32(const npyv_lanetype_s32 *ip1, const npyv_lanetype_s32 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_minp_s32(const npyv_lanetype_s32 *ip1, npy_intp sip1, const npyv_lanetype_s32 *ip2, npy_intp sip2, @@ -3590,7 +3610,8 @@ simd_binary_ccc_max_u32(const npyv_lanetype_u32 *ip1, const npyv_lanetype_u32 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_max_u32(const npyv_lanetype_u32 *ip1, npy_intp sip1, const npyv_lanetype_u32 *ip2, npy_intp sip2, @@ -3753,7 +3774,8 @@ simd_binary_ccc_min_u32(const npyv_lanetype_u32 *ip1, const npyv_lanetype_u32 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_min_u32(const npyv_lanetype_u32 *ip1, npy_intp sip1, const npyv_lanetype_u32 *ip2, npy_intp sip2, @@ -3916,7 +3938,8 @@ simd_binary_ccc_maxp_u32(const npyv_lanetype_u32 *ip1, const npyv_lanetype_u32 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_maxp_u32(const npyv_lanetype_u32 *ip1, npy_intp sip1, const npyv_lanetype_u32 *ip2, npy_intp sip2, @@ -4079,7 +4102,8 @@ simd_binary_ccc_minp_u32(const npyv_lanetype_u32 *ip1, const npyv_lanetype_u32 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_minp_u32(const npyv_lanetype_u32 *ip1, npy_intp sip1, const npyv_lanetype_u32 *ip2, npy_intp sip2, @@ -4244,7 +4268,8 @@ simd_binary_ccc_max_s64(const npyv_lanetype_s64 *ip1, const npyv_lanetype_s64 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_max_s64(const npyv_lanetype_s64 *ip1, npy_intp sip1, const npyv_lanetype_s64 *ip2, npy_intp sip2, @@ -4407,7 +4432,8 @@ simd_binary_ccc_min_s64(const npyv_lanetype_s64 *ip1, const npyv_lanetype_s64 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_min_s64(const npyv_lanetype_s64 *ip1, npy_intp sip1, const npyv_lanetype_s64 *ip2, npy_intp sip2, @@ -4570,7 +4596,8 @@ simd_binary_ccc_maxp_s64(const npyv_lanetype_s64 *ip1, const npyv_lanetype_s64 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_maxp_s64(const npyv_lanetype_s64 *ip1, npy_intp sip1, const npyv_lanetype_s64 *ip2, npy_intp sip2, @@ -4733,7 +4760,8 @@ simd_binary_ccc_minp_s64(const npyv_lanetype_s64 *ip1, const npyv_lanetype_s64 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_minp_s64(const npyv_lanetype_s64 *ip1, npy_intp sip1, const npyv_lanetype_s64 *ip2, npy_intp sip2, @@ -4898,7 +4926,8 @@ simd_binary_ccc_max_u64(const npyv_lanetype_u64 *ip1, const npyv_lanetype_u64 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_max_u64(const npyv_lanetype_u64 *ip1, npy_intp sip1, const npyv_lanetype_u64 *ip2, npy_intp sip2, @@ -5061,7 +5090,8 @@ simd_binary_ccc_min_u64(const npyv_lanetype_u64 *ip1, const npyv_lanetype_u64 *i } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_min_u64(const npyv_lanetype_u64 *ip1, npy_intp sip1, const npyv_lanetype_u64 *ip2, npy_intp sip2, @@ -5224,7 +5254,8 @@ simd_binary_ccc_maxp_u64(const npyv_lanetype_u64 *ip1, const npyv_lanetype_u64 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_maxp_u64(const npyv_lanetype_u64 *ip1, npy_intp sip1, const npyv_lanetype_u64 *ip2, npy_intp sip2, @@ -5387,7 +5418,8 @@ simd_binary_ccc_minp_u64(const npyv_lanetype_u64 *ip1, const npyv_lanetype_u64 * } } // non-contiguous for float 32/64-bit memory access -#if 0 +#if 0 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_minp_u64(const npyv_lanetype_u64 *ip1, npy_intp sip1, const npyv_lanetype_u64 *ip2, npy_intp sip2, @@ -5552,7 +5584,8 @@ simd_binary_ccc_max_f32(const npyv_lanetype_f32 *ip1, const npyv_lanetype_f32 *i } } // non-contiguous for float 32/64-bit memory access -#if 1 +#if 1 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_max_f32(const npyv_lanetype_f32 *ip1, npy_intp sip1, const npyv_lanetype_f32 *ip2, npy_intp sip2, @@ -5715,7 +5748,8 @@ simd_binary_ccc_min_f32(const npyv_lanetype_f32 *ip1, const npyv_lanetype_f32 *i } } // non-contiguous for float 32/64-bit memory access -#if 1 +#if 1 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_min_f32(const npyv_lanetype_f32 *ip1, npy_intp sip1, const npyv_lanetype_f32 *ip2, npy_intp sip2, @@ -5878,7 +5912,8 @@ simd_binary_ccc_maxp_f32(const npyv_lanetype_f32 *ip1, const npyv_lanetype_f32 * } } // non-contiguous for float 32/64-bit memory access -#if 1 +#if 1 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_maxp_f32(const npyv_lanetype_f32 *ip1, npy_intp sip1, const npyv_lanetype_f32 *ip2, npy_intp sip2, @@ -6041,7 +6076,8 @@ simd_binary_ccc_minp_f32(const npyv_lanetype_f32 *ip1, const npyv_lanetype_f32 * } } // non-contiguous for float 32/64-bit memory access -#if 1 +#if 1 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_minp_f32(const npyv_lanetype_f32 *ip1, npy_intp sip1, const npyv_lanetype_f32 *ip2, npy_intp sip2, @@ -6206,7 +6242,8 @@ simd_binary_ccc_max_f64(const npyv_lanetype_f64 *ip1, const npyv_lanetype_f64 *i } } // non-contiguous for float 32/64-bit memory access -#if 1 +#if 1 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_max_f64(const npyv_lanetype_f64 *ip1, npy_intp sip1, const npyv_lanetype_f64 *ip2, npy_intp sip2, @@ -6369,7 +6406,8 @@ simd_binary_ccc_min_f64(const npyv_lanetype_f64 *ip1, const npyv_lanetype_f64 *i } } // non-contiguous for float 32/64-bit memory access -#if 1 +#if 1 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_min_f64(const npyv_lanetype_f64 *ip1, npy_intp sip1, const npyv_lanetype_f64 *ip2, npy_intp sip2, @@ -6532,7 +6570,8 @@ simd_binary_ccc_maxp_f64(const npyv_lanetype_f64 *ip1, const npyv_lanetype_f64 * } } // non-contiguous for float 32/64-bit memory access -#if 1 +#if 1 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_maxp_f64(const npyv_lanetype_f64 *ip1, npy_intp sip1, const npyv_lanetype_f64 *ip2, npy_intp sip2, @@ -6695,7 +6734,8 @@ simd_binary_ccc_minp_f64(const npyv_lanetype_f64 *ip1, const npyv_lanetype_f64 * } } // non-contiguous for float 32/64-bit memory access -#if 1 +#if 1 && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_minp_f64(const npyv_lanetype_f64 *ip1, npy_intp sip1, const npyv_lanetype_f64 *ip2, npy_intp sip2, @@ -6744,10 +6784,10 @@ simd_binary_minp_f64(const npyv_lanetype_f64 *ip1, npy_intp sip1, /******************************************************************************* ** Defining ufunc inner functions ******************************************************************************/ -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_BYTE == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -6763,7 +6803,7 @@ simd_binary_minp_f64(const npyv_lanetype_f64 *ip1, npy_intp sip1, #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_BYTE == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -6779,7 +6819,7 @@ simd_binary_minp_f64(const npyv_lanetype_f64 *ip1, npy_intp sip1, #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_BYTE == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -6795,7 +6835,7 @@ simd_binary_minp_f64(const npyv_lanetype_f64 *ip1, npy_intp sip1, #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_BYTE == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -6813,7 +6853,7 @@ simd_binary_minp_f64(const npyv_lanetype_f64 *ip1, npy_intp sip1, #endif -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_max_i @@ -6921,22 +6961,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UBYTE_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_ubyte v0 = *((npy_ubyte *)(ip1 + (i + 0) * is1)); npy_ubyte u0 = *((npy_ubyte *)(ip2 + (i + 0) * is2)); *((npy_ubyte *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ubyte v1 = *((npy_ubyte *)(ip1 + (i + 1) * is1)); npy_ubyte u1 = *((npy_ubyte *)(ip2 + (i + 1) * is2)); *((npy_ubyte *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ubyte v2 = *((npy_ubyte *)(ip1 + (i + 2) * is1)); npy_ubyte u2 = *((npy_ubyte *)(ip2 + (i + 2) * is2)); *((npy_ubyte *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ubyte v3 = *((npy_ubyte *)(ip1 + (i + 3) * is1)); npy_ubyte u3 = *((npy_ubyte *)(ip2 + (i + 3) * is2)); *((npy_ubyte *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -6988,7 +7028,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UBYTE_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_min_i @@ -7096,22 +7136,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UBYTE_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_ubyte v0 = *((npy_ubyte *)(ip1 + (i + 0) * is1)); npy_ubyte u0 = *((npy_ubyte *)(ip2 + (i + 0) * is2)); *((npy_ubyte *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ubyte v1 = *((npy_ubyte *)(ip1 + (i + 1) * is1)); npy_ubyte u1 = *((npy_ubyte *)(ip2 + (i + 1) * is2)); *((npy_ubyte *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ubyte v2 = *((npy_ubyte *)(ip1 + (i + 2) * is1)); npy_ubyte u2 = *((npy_ubyte *)(ip2 + (i + 2) * is2)); *((npy_ubyte *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ubyte v3 = *((npy_ubyte *)(ip1 + (i + 3) * is1)); npy_ubyte u3 = *((npy_ubyte *)(ip2 + (i + 3) * is2)); *((npy_ubyte *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -7163,7 +7203,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UBYTE_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_maxp_i @@ -7271,22 +7311,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UBYTE_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_ubyte v0 = *((npy_ubyte *)(ip1 + (i + 0) * is1)); npy_ubyte u0 = *((npy_ubyte *)(ip2 + (i + 0) * is2)); *((npy_ubyte *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ubyte v1 = *((npy_ubyte *)(ip1 + (i + 1) * is1)); npy_ubyte u1 = *((npy_ubyte *)(ip2 + (i + 1) * is2)); *((npy_ubyte *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ubyte v2 = *((npy_ubyte *)(ip1 + (i + 2) * is1)); npy_ubyte u2 = *((npy_ubyte *)(ip2 + (i + 2) * is2)); *((npy_ubyte *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ubyte v3 = *((npy_ubyte *)(ip1 + (i + 3) * is1)); npy_ubyte u3 = *((npy_ubyte *)(ip2 + (i + 3) * is2)); *((npy_ubyte *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -7338,7 +7378,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UBYTE_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_minp_i @@ -7446,22 +7486,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UBYTE_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_ubyte v0 = *((npy_ubyte *)(ip1 + (i + 0) * is1)); npy_ubyte u0 = *((npy_ubyte *)(ip2 + (i + 0) * is2)); *((npy_ubyte *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ubyte v1 = *((npy_ubyte *)(ip1 + (i + 1) * is1)); npy_ubyte u1 = *((npy_ubyte *)(ip2 + (i + 1) * is2)); *((npy_ubyte *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ubyte v2 = *((npy_ubyte *)(ip1 + (i + 2) * is1)); npy_ubyte u2 = *((npy_ubyte *)(ip2 + (i + 2) * is2)); *((npy_ubyte *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ubyte v3 = *((npy_ubyte *)(ip1 + (i + 3) * is1)); npy_ubyte u3 = *((npy_ubyte *)(ip2 + (i + 3) * is2)); *((npy_ubyte *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -7514,10 +7554,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UBYTE_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_SHORT == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -7533,7 +7573,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UBYTE_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_SHORT == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -7549,7 +7589,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UBYTE_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_SHORT == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -7565,7 +7605,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UBYTE_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_SHORT == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -7583,7 +7623,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UBYTE_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_max_i @@ -7691,22 +7731,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(USHORT_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_ushort v0 = *((npy_ushort *)(ip1 + (i + 0) * is1)); npy_ushort u0 = *((npy_ushort *)(ip2 + (i + 0) * is2)); *((npy_ushort *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ushort v1 = *((npy_ushort *)(ip1 + (i + 1) * is1)); npy_ushort u1 = *((npy_ushort *)(ip2 + (i + 1) * is2)); *((npy_ushort *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ushort v2 = *((npy_ushort *)(ip1 + (i + 2) * is1)); npy_ushort u2 = *((npy_ushort *)(ip2 + (i + 2) * is2)); *((npy_ushort *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ushort v3 = *((npy_ushort *)(ip1 + (i + 3) * is1)); npy_ushort u3 = *((npy_ushort *)(ip2 + (i + 3) * is2)); *((npy_ushort *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -7758,7 +7798,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(USHORT_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_min_i @@ -7866,22 +7906,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(USHORT_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_ushort v0 = *((npy_ushort *)(ip1 + (i + 0) * is1)); npy_ushort u0 = *((npy_ushort *)(ip2 + (i + 0) * is2)); *((npy_ushort *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ushort v1 = *((npy_ushort *)(ip1 + (i + 1) * is1)); npy_ushort u1 = *((npy_ushort *)(ip2 + (i + 1) * is2)); *((npy_ushort *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ushort v2 = *((npy_ushort *)(ip1 + (i + 2) * is1)); npy_ushort u2 = *((npy_ushort *)(ip2 + (i + 2) * is2)); *((npy_ushort *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ushort v3 = *((npy_ushort *)(ip1 + (i + 3) * is1)); npy_ushort u3 = *((npy_ushort *)(ip2 + (i + 3) * is2)); *((npy_ushort *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -7933,7 +7973,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(USHORT_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_maxp_i @@ -8041,22 +8081,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(USHORT_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_ushort v0 = *((npy_ushort *)(ip1 + (i + 0) * is1)); npy_ushort u0 = *((npy_ushort *)(ip2 + (i + 0) * is2)); *((npy_ushort *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ushort v1 = *((npy_ushort *)(ip1 + (i + 1) * is1)); npy_ushort u1 = *((npy_ushort *)(ip2 + (i + 1) * is2)); *((npy_ushort *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ushort v2 = *((npy_ushort *)(ip1 + (i + 2) * is1)); npy_ushort u2 = *((npy_ushort *)(ip2 + (i + 2) * is2)); *((npy_ushort *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ushort v3 = *((npy_ushort *)(ip1 + (i + 3) * is1)); npy_ushort u3 = *((npy_ushort *)(ip2 + (i + 3) * is2)); *((npy_ushort *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -8108,7 +8148,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(USHORT_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_minp_i @@ -8216,22 +8256,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(USHORT_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_ushort v0 = *((npy_ushort *)(ip1 + (i + 0) * is1)); npy_ushort u0 = *((npy_ushort *)(ip2 + (i + 0) * is2)); *((npy_ushort *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ushort v1 = *((npy_ushort *)(ip1 + (i + 1) * is1)); npy_ushort u1 = *((npy_ushort *)(ip2 + (i + 1) * is2)); *((npy_ushort *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ushort v2 = *((npy_ushort *)(ip1 + (i + 2) * is1)); npy_ushort u2 = *((npy_ushort *)(ip2 + (i + 2) * is2)); *((npy_ushort *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ushort v3 = *((npy_ushort *)(ip1 + (i + 3) * is1)); npy_ushort u3 = *((npy_ushort *)(ip2 + (i + 3) * is2)); *((npy_ushort *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -8284,10 +8324,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(USHORT_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_INT == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -8303,7 +8343,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(USHORT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_INT == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -8319,7 +8359,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(USHORT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_INT == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -8335,7 +8375,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(USHORT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_INT == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -8353,7 +8393,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(USHORT_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_max_i @@ -8461,22 +8501,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UINT_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_uint v0 = *((npy_uint *)(ip1 + (i + 0) * is1)); npy_uint u0 = *((npy_uint *)(ip2 + (i + 0) * is2)); *((npy_uint *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_uint v1 = *((npy_uint *)(ip1 + (i + 1) * is1)); npy_uint u1 = *((npy_uint *)(ip2 + (i + 1) * is2)); *((npy_uint *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_uint v2 = *((npy_uint *)(ip1 + (i + 2) * is1)); npy_uint u2 = *((npy_uint *)(ip2 + (i + 2) * is2)); *((npy_uint *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_uint v3 = *((npy_uint *)(ip1 + (i + 3) * is1)); npy_uint u3 = *((npy_uint *)(ip2 + (i + 3) * is2)); *((npy_uint *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -8528,7 +8568,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UINT_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_min_i @@ -8636,22 +8676,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UINT_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_uint v0 = *((npy_uint *)(ip1 + (i + 0) * is1)); npy_uint u0 = *((npy_uint *)(ip2 + (i + 0) * is2)); *((npy_uint *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_uint v1 = *((npy_uint *)(ip1 + (i + 1) * is1)); npy_uint u1 = *((npy_uint *)(ip2 + (i + 1) * is2)); *((npy_uint *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_uint v2 = *((npy_uint *)(ip1 + (i + 2) * is1)); npy_uint u2 = *((npy_uint *)(ip2 + (i + 2) * is2)); *((npy_uint *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_uint v3 = *((npy_uint *)(ip1 + (i + 3) * is1)); npy_uint u3 = *((npy_uint *)(ip2 + (i + 3) * is2)); *((npy_uint *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -8703,7 +8743,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UINT_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_maxp_i @@ -8811,22 +8851,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UINT_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_uint v0 = *((npy_uint *)(ip1 + (i + 0) * is1)); npy_uint u0 = *((npy_uint *)(ip2 + (i + 0) * is2)); *((npy_uint *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_uint v1 = *((npy_uint *)(ip1 + (i + 1) * is1)); npy_uint u1 = *((npy_uint *)(ip2 + (i + 1) * is2)); *((npy_uint *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_uint v2 = *((npy_uint *)(ip1 + (i + 2) * is1)); npy_uint u2 = *((npy_uint *)(ip2 + (i + 2) * is2)); *((npy_uint *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_uint v3 = *((npy_uint *)(ip1 + (i + 3) * is1)); npy_uint u3 = *((npy_uint *)(ip2 + (i + 3) * is2)); *((npy_uint *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -8878,7 +8918,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UINT_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_minp_i @@ -8986,22 +9026,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UINT_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_uint v0 = *((npy_uint *)(ip1 + (i + 0) * is1)); npy_uint u0 = *((npy_uint *)(ip2 + (i + 0) * is2)); *((npy_uint *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_uint v1 = *((npy_uint *)(ip1 + (i + 1) * is1)); npy_uint u1 = *((npy_uint *)(ip2 + (i + 1) * is2)); *((npy_uint *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_uint v2 = *((npy_uint *)(ip1 + (i + 2) * is1)); npy_uint u2 = *((npy_uint *)(ip2 + (i + 2) * is2)); *((npy_uint *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_uint v3 = *((npy_uint *)(ip1 + (i + 3) * is1)); npy_uint u3 = *((npy_uint *)(ip2 + (i + 3) * is2)); *((npy_uint *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -9054,10 +9094,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UINT_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONG == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -9073,7 +9113,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UINT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONG == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -9089,7 +9129,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UINT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONG == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -9105,7 +9145,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UINT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONG == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -9123,7 +9163,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(UINT_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_max_i @@ -9231,22 +9271,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONG_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_ulong v0 = *((npy_ulong *)(ip1 + (i + 0) * is1)); npy_ulong u0 = *((npy_ulong *)(ip2 + (i + 0) * is2)); *((npy_ulong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ulong v1 = *((npy_ulong *)(ip1 + (i + 1) * is1)); npy_ulong u1 = *((npy_ulong *)(ip2 + (i + 1) * is2)); *((npy_ulong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ulong v2 = *((npy_ulong *)(ip1 + (i + 2) * is1)); npy_ulong u2 = *((npy_ulong *)(ip2 + (i + 2) * is2)); *((npy_ulong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ulong v3 = *((npy_ulong *)(ip1 + (i + 3) * is1)); npy_ulong u3 = *((npy_ulong *)(ip2 + (i + 3) * is2)); *((npy_ulong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -9298,7 +9338,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONG_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_min_i @@ -9406,22 +9446,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONG_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_ulong v0 = *((npy_ulong *)(ip1 + (i + 0) * is1)); npy_ulong u0 = *((npy_ulong *)(ip2 + (i + 0) * is2)); *((npy_ulong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ulong v1 = *((npy_ulong *)(ip1 + (i + 1) * is1)); npy_ulong u1 = *((npy_ulong *)(ip2 + (i + 1) * is2)); *((npy_ulong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ulong v2 = *((npy_ulong *)(ip1 + (i + 2) * is1)); npy_ulong u2 = *((npy_ulong *)(ip2 + (i + 2) * is2)); *((npy_ulong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ulong v3 = *((npy_ulong *)(ip1 + (i + 3) * is1)); npy_ulong u3 = *((npy_ulong *)(ip2 + (i + 3) * is2)); *((npy_ulong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -9473,7 +9513,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONG_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_maxp_i @@ -9581,22 +9621,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONG_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_ulong v0 = *((npy_ulong *)(ip1 + (i + 0) * is1)); npy_ulong u0 = *((npy_ulong *)(ip2 + (i + 0) * is2)); *((npy_ulong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ulong v1 = *((npy_ulong *)(ip1 + (i + 1) * is1)); npy_ulong u1 = *((npy_ulong *)(ip2 + (i + 1) * is2)); *((npy_ulong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ulong v2 = *((npy_ulong *)(ip1 + (i + 2) * is1)); npy_ulong u2 = *((npy_ulong *)(ip2 + (i + 2) * is2)); *((npy_ulong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ulong v3 = *((npy_ulong *)(ip1 + (i + 3) * is1)); npy_ulong u3 = *((npy_ulong *)(ip2 + (i + 3) * is2)); *((npy_ulong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -9648,7 +9688,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONG_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_minp_i @@ -9756,22 +9796,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONG_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_ulong v0 = *((npy_ulong *)(ip1 + (i + 0) * is1)); npy_ulong u0 = *((npy_ulong *)(ip2 + (i + 0) * is2)); *((npy_ulong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ulong v1 = *((npy_ulong *)(ip1 + (i + 1) * is1)); npy_ulong u1 = *((npy_ulong *)(ip2 + (i + 1) * is2)); *((npy_ulong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ulong v2 = *((npy_ulong *)(ip1 + (i + 2) * is1)); npy_ulong u2 = *((npy_ulong *)(ip2 + (i + 2) * is2)); *((npy_ulong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ulong v3 = *((npy_ulong *)(ip1 + (i + 3) * is1)); npy_ulong u3 = *((npy_ulong *)(ip2 + (i + 3) * is2)); *((npy_ulong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -9824,10 +9864,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONG_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -9843,7 +9883,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -9859,7 +9899,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -9875,7 +9915,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -9893,7 +9933,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONG_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_max_i @@ -10001,22 +10041,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONGLONG_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_ulonglong v0 = *((npy_ulonglong *)(ip1 + (i + 0) * is1)); npy_ulonglong u0 = *((npy_ulonglong *)(ip2 + (i + 0) * is2)); *((npy_ulonglong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ulonglong v1 = *((npy_ulonglong *)(ip1 + (i + 1) * is1)); npy_ulonglong u1 = *((npy_ulonglong *)(ip2 + (i + 1) * is2)); *((npy_ulonglong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ulonglong v2 = *((npy_ulonglong *)(ip1 + (i + 2) * is1)); npy_ulonglong u2 = *((npy_ulonglong *)(ip2 + (i + 2) * is2)); *((npy_ulonglong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ulonglong v3 = *((npy_ulonglong *)(ip1 + (i + 3) * is1)); npy_ulonglong u3 = *((npy_ulonglong *)(ip2 + (i + 3) * is2)); *((npy_ulonglong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -10068,7 +10108,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONGLONG_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_min_i @@ -10176,22 +10216,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONGLONG_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_ulonglong v0 = *((npy_ulonglong *)(ip1 + (i + 0) * is1)); npy_ulonglong u0 = *((npy_ulonglong *)(ip2 + (i + 0) * is2)); *((npy_ulonglong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ulonglong v1 = *((npy_ulonglong *)(ip1 + (i + 1) * is1)); npy_ulonglong u1 = *((npy_ulonglong *)(ip2 + (i + 1) * is2)); *((npy_ulonglong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ulonglong v2 = *((npy_ulonglong *)(ip1 + (i + 2) * is1)); npy_ulonglong u2 = *((npy_ulonglong *)(ip2 + (i + 2) * is2)); *((npy_ulonglong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ulonglong v3 = *((npy_ulonglong *)(ip1 + (i + 3) * is1)); npy_ulonglong u3 = *((npy_ulonglong *)(ip2 + (i + 3) * is2)); *((npy_ulonglong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -10243,7 +10283,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONGLONG_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_maxp_i @@ -10351,22 +10391,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONGLONG_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_ulonglong v0 = *((npy_ulonglong *)(ip1 + (i + 0) * is1)); npy_ulonglong u0 = *((npy_ulonglong *)(ip2 + (i + 0) * is2)); *((npy_ulonglong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ulonglong v1 = *((npy_ulonglong *)(ip1 + (i + 1) * is1)); npy_ulonglong u1 = *((npy_ulonglong *)(ip2 + (i + 1) * is2)); *((npy_ulonglong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ulonglong v2 = *((npy_ulonglong *)(ip1 + (i + 2) * is1)); npy_ulonglong u2 = *((npy_ulonglong *)(ip2 + (i + 2) * is2)); *((npy_ulonglong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ulonglong v3 = *((npy_ulonglong *)(ip1 + (i + 3) * is1)); npy_ulonglong u3 = *((npy_ulonglong *)(ip2 + (i + 3) * is2)); *((npy_ulonglong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -10418,7 +10458,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONGLONG_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_minp_i @@ -10526,22 +10566,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONGLONG_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_ulonglong v0 = *((npy_ulonglong *)(ip1 + (i + 0) * is1)); npy_ulonglong u0 = *((npy_ulonglong *)(ip2 + (i + 0) * is2)); *((npy_ulonglong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_ulonglong v1 = *((npy_ulonglong *)(ip1 + (i + 1) * is1)); npy_ulonglong u1 = *((npy_ulonglong *)(ip2 + (i + 1) * is2)); *((npy_ulonglong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_ulonglong v2 = *((npy_ulonglong *)(ip1 + (i + 2) * is1)); npy_ulonglong u2 = *((npy_ulonglong *)(ip2 + (i + 2) * is2)); *((npy_ulonglong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_ulonglong v3 = *((npy_ulonglong *)(ip1 + (i + 3) * is1)); npy_ulonglong u3 = *((npy_ulonglong *)(ip2 + (i + 3) * is2)); *((npy_ulonglong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -10594,10 +10634,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONGLONG_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_BYTE == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -10613,7 +10653,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONGLONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_BYTE == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -10629,7 +10669,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONGLONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_BYTE == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -10645,7 +10685,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONGLONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_BYTE == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -10663,7 +10703,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(ULONGLONG_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_max_i @@ -10771,22 +10811,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(BYTE_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_byte v0 = *((npy_byte *)(ip1 + (i + 0) * is1)); npy_byte u0 = *((npy_byte *)(ip2 + (i + 0) * is2)); *((npy_byte *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_byte v1 = *((npy_byte *)(ip1 + (i + 1) * is1)); npy_byte u1 = *((npy_byte *)(ip2 + (i + 1) * is2)); *((npy_byte *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_byte v2 = *((npy_byte *)(ip1 + (i + 2) * is1)); npy_byte u2 = *((npy_byte *)(ip2 + (i + 2) * is2)); *((npy_byte *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_byte v3 = *((npy_byte *)(ip1 + (i + 3) * is1)); npy_byte u3 = *((npy_byte *)(ip2 + (i + 3) * is2)); *((npy_byte *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -10838,7 +10878,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(BYTE_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_min_i @@ -10946,22 +10986,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(BYTE_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_byte v0 = *((npy_byte *)(ip1 + (i + 0) * is1)); npy_byte u0 = *((npy_byte *)(ip2 + (i + 0) * is2)); *((npy_byte *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_byte v1 = *((npy_byte *)(ip1 + (i + 1) * is1)); npy_byte u1 = *((npy_byte *)(ip2 + (i + 1) * is2)); *((npy_byte *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_byte v2 = *((npy_byte *)(ip1 + (i + 2) * is1)); npy_byte u2 = *((npy_byte *)(ip2 + (i + 2) * is2)); *((npy_byte *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_byte v3 = *((npy_byte *)(ip1 + (i + 3) * is1)); npy_byte u3 = *((npy_byte *)(ip2 + (i + 3) * is2)); *((npy_byte *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -11013,7 +11053,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(BYTE_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_maxp_i @@ -11121,22 +11161,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(BYTE_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_byte v0 = *((npy_byte *)(ip1 + (i + 0) * is1)); npy_byte u0 = *((npy_byte *)(ip2 + (i + 0) * is2)); *((npy_byte *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_byte v1 = *((npy_byte *)(ip1 + (i + 1) * is1)); npy_byte u1 = *((npy_byte *)(ip2 + (i + 1) * is2)); *((npy_byte *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_byte v2 = *((npy_byte *)(ip1 + (i + 2) * is1)); npy_byte u2 = *((npy_byte *)(ip2 + (i + 2) * is2)); *((npy_byte *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_byte v3 = *((npy_byte *)(ip1 + (i + 3) * is1)); npy_byte u3 = *((npy_byte *)(ip2 + (i + 3) * is2)); *((npy_byte *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -11188,7 +11228,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(BYTE_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_minp_i @@ -11296,22 +11336,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(BYTE_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_byte v0 = *((npy_byte *)(ip1 + (i + 0) * is1)); npy_byte u0 = *((npy_byte *)(ip2 + (i + 0) * is2)); *((npy_byte *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_byte v1 = *((npy_byte *)(ip1 + (i + 1) * is1)); npy_byte u1 = *((npy_byte *)(ip2 + (i + 1) * is2)); *((npy_byte *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_byte v2 = *((npy_byte *)(ip1 + (i + 2) * is1)); npy_byte u2 = *((npy_byte *)(ip2 + (i + 2) * is2)); *((npy_byte *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_byte v3 = *((npy_byte *)(ip1 + (i + 3) * is1)); npy_byte u3 = *((npy_byte *)(ip2 + (i + 3) * is2)); *((npy_byte *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -11364,10 +11404,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(BYTE_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_SHORT == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -11383,7 +11423,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(BYTE_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_SHORT == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -11399,7 +11439,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(BYTE_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_SHORT == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -11415,7 +11455,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(BYTE_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_SHORT == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -11433,7 +11473,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(BYTE_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_max_i @@ -11541,22 +11581,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(SHORT_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_short v0 = *((npy_short *)(ip1 + (i + 0) * is1)); npy_short u0 = *((npy_short *)(ip2 + (i + 0) * is2)); *((npy_short *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_short v1 = *((npy_short *)(ip1 + (i + 1) * is1)); npy_short u1 = *((npy_short *)(ip2 + (i + 1) * is2)); *((npy_short *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_short v2 = *((npy_short *)(ip1 + (i + 2) * is1)); npy_short u2 = *((npy_short *)(ip2 + (i + 2) * is2)); *((npy_short *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_short v3 = *((npy_short *)(ip1 + (i + 3) * is1)); npy_short u3 = *((npy_short *)(ip2 + (i + 3) * is2)); *((npy_short *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -11608,7 +11648,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(SHORT_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_min_i @@ -11716,22 +11756,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(SHORT_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_short v0 = *((npy_short *)(ip1 + (i + 0) * is1)); npy_short u0 = *((npy_short *)(ip2 + (i + 0) * is2)); *((npy_short *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_short v1 = *((npy_short *)(ip1 + (i + 1) * is1)); npy_short u1 = *((npy_short *)(ip2 + (i + 1) * is2)); *((npy_short *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_short v2 = *((npy_short *)(ip1 + (i + 2) * is1)); npy_short u2 = *((npy_short *)(ip2 + (i + 2) * is2)); *((npy_short *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_short v3 = *((npy_short *)(ip1 + (i + 3) * is1)); npy_short u3 = *((npy_short *)(ip2 + (i + 3) * is2)); *((npy_short *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -11783,7 +11823,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(SHORT_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_maxp_i @@ -11891,22 +11931,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(SHORT_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_short v0 = *((npy_short *)(ip1 + (i + 0) * is1)); npy_short u0 = *((npy_short *)(ip2 + (i + 0) * is2)); *((npy_short *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_short v1 = *((npy_short *)(ip1 + (i + 1) * is1)); npy_short u1 = *((npy_short *)(ip2 + (i + 1) * is2)); *((npy_short *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_short v2 = *((npy_short *)(ip1 + (i + 2) * is1)); npy_short u2 = *((npy_short *)(ip2 + (i + 2) * is2)); *((npy_short *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_short v3 = *((npy_short *)(ip1 + (i + 3) * is1)); npy_short u3 = *((npy_short *)(ip2 + (i + 3) * is2)); *((npy_short *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -11958,7 +11998,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(SHORT_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_minp_i @@ -12066,22 +12106,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(SHORT_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_short v0 = *((npy_short *)(ip1 + (i + 0) * is1)); npy_short u0 = *((npy_short *)(ip2 + (i + 0) * is2)); *((npy_short *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_short v1 = *((npy_short *)(ip1 + (i + 1) * is1)); npy_short u1 = *((npy_short *)(ip2 + (i + 1) * is2)); *((npy_short *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_short v2 = *((npy_short *)(ip1 + (i + 2) * is1)); npy_short u2 = *((npy_short *)(ip2 + (i + 2) * is2)); *((npy_short *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_short v3 = *((npy_short *)(ip1 + (i + 3) * is1)); npy_short u3 = *((npy_short *)(ip2 + (i + 3) * is2)); *((npy_short *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -12134,10 +12174,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(SHORT_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_INT == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -12153,7 +12193,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(SHORT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_INT == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -12169,7 +12209,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(SHORT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_INT == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -12185,7 +12225,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(SHORT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_INT == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -12203,7 +12243,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(SHORT_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_max_i @@ -12311,22 +12351,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(INT_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_int v0 = *((npy_int *)(ip1 + (i + 0) * is1)); npy_int u0 = *((npy_int *)(ip2 + (i + 0) * is2)); *((npy_int *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_int v1 = *((npy_int *)(ip1 + (i + 1) * is1)); npy_int u1 = *((npy_int *)(ip2 + (i + 1) * is2)); *((npy_int *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_int v2 = *((npy_int *)(ip1 + (i + 2) * is1)); npy_int u2 = *((npy_int *)(ip2 + (i + 2) * is2)); *((npy_int *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_int v3 = *((npy_int *)(ip1 + (i + 3) * is1)); npy_int u3 = *((npy_int *)(ip2 + (i + 3) * is2)); *((npy_int *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -12378,7 +12418,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(INT_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_min_i @@ -12486,22 +12526,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(INT_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_int v0 = *((npy_int *)(ip1 + (i + 0) * is1)); npy_int u0 = *((npy_int *)(ip2 + (i + 0) * is2)); *((npy_int *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_int v1 = *((npy_int *)(ip1 + (i + 1) * is1)); npy_int u1 = *((npy_int *)(ip2 + (i + 1) * is2)); *((npy_int *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_int v2 = *((npy_int *)(ip1 + (i + 2) * is1)); npy_int u2 = *((npy_int *)(ip2 + (i + 2) * is2)); *((npy_int *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_int v3 = *((npy_int *)(ip1 + (i + 3) * is1)); npy_int u3 = *((npy_int *)(ip2 + (i + 3) * is2)); *((npy_int *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -12553,7 +12593,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(INT_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_maxp_i @@ -12661,22 +12701,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(INT_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_int v0 = *((npy_int *)(ip1 + (i + 0) * is1)); npy_int u0 = *((npy_int *)(ip2 + (i + 0) * is2)); *((npy_int *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_int v1 = *((npy_int *)(ip1 + (i + 1) * is1)); npy_int u1 = *((npy_int *)(ip2 + (i + 1) * is2)); *((npy_int *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_int v2 = *((npy_int *)(ip1 + (i + 2) * is1)); npy_int u2 = *((npy_int *)(ip2 + (i + 2) * is2)); *((npy_int *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_int v3 = *((npy_int *)(ip1 + (i + 3) * is1)); npy_int u3 = *((npy_int *)(ip2 + (i + 3) * is2)); *((npy_int *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -12728,7 +12768,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(INT_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_minp_i @@ -12836,22 +12876,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(INT_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_int v0 = *((npy_int *)(ip1 + (i + 0) * is1)); npy_int u0 = *((npy_int *)(ip2 + (i + 0) * is2)); *((npy_int *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_int v1 = *((npy_int *)(ip1 + (i + 1) * is1)); npy_int u1 = *((npy_int *)(ip2 + (i + 1) * is2)); *((npy_int *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_int v2 = *((npy_int *)(ip1 + (i + 2) * is1)); npy_int u2 = *((npy_int *)(ip2 + (i + 2) * is2)); *((npy_int *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_int v3 = *((npy_int *)(ip1 + (i + 3) * is1)); npy_int u3 = *((npy_int *)(ip2 + (i + 3) * is2)); *((npy_int *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -12904,10 +12944,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(INT_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONG == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -12923,7 +12963,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(INT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONG == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -12939,7 +12979,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(INT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONG == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -12955,7 +12995,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(INT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONG == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -12973,7 +13013,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(INT_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_max_i @@ -13081,22 +13121,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONG_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_long v0 = *((npy_long *)(ip1 + (i + 0) * is1)); npy_long u0 = *((npy_long *)(ip2 + (i + 0) * is2)); *((npy_long *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_long v1 = *((npy_long *)(ip1 + (i + 1) * is1)); npy_long u1 = *((npy_long *)(ip2 + (i + 1) * is2)); *((npy_long *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_long v2 = *((npy_long *)(ip1 + (i + 2) * is1)); npy_long u2 = *((npy_long *)(ip2 + (i + 2) * is2)); *((npy_long *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_long v3 = *((npy_long *)(ip1 + (i + 3) * is1)); npy_long u3 = *((npy_long *)(ip2 + (i + 3) * is2)); *((npy_long *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -13148,7 +13188,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONG_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_min_i @@ -13256,22 +13296,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONG_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_long v0 = *((npy_long *)(ip1 + (i + 0) * is1)); npy_long u0 = *((npy_long *)(ip2 + (i + 0) * is2)); *((npy_long *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_long v1 = *((npy_long *)(ip1 + (i + 1) * is1)); npy_long u1 = *((npy_long *)(ip2 + (i + 1) * is2)); *((npy_long *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_long v2 = *((npy_long *)(ip1 + (i + 2) * is1)); npy_long u2 = *((npy_long *)(ip2 + (i + 2) * is2)); *((npy_long *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_long v3 = *((npy_long *)(ip1 + (i + 3) * is1)); npy_long u3 = *((npy_long *)(ip2 + (i + 3) * is2)); *((npy_long *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -13323,7 +13363,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONG_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_maxp_i @@ -13431,22 +13471,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONG_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_long v0 = *((npy_long *)(ip1 + (i + 0) * is1)); npy_long u0 = *((npy_long *)(ip2 + (i + 0) * is2)); *((npy_long *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_long v1 = *((npy_long *)(ip1 + (i + 1) * is1)); npy_long u1 = *((npy_long *)(ip2 + (i + 1) * is2)); *((npy_long *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_long v2 = *((npy_long *)(ip1 + (i + 2) * is1)); npy_long u2 = *((npy_long *)(ip2 + (i + 2) * is2)); *((npy_long *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_long v3 = *((npy_long *)(ip1 + (i + 3) * is1)); npy_long u3 = *((npy_long *)(ip2 + (i + 3) * is2)); *((npy_long *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -13498,7 +13538,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONG_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_minp_i @@ -13606,22 +13646,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONG_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_long v0 = *((npy_long *)(ip1 + (i + 0) * is1)); npy_long u0 = *((npy_long *)(ip2 + (i + 0) * is2)); *((npy_long *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_long v1 = *((npy_long *)(ip1 + (i + 1) * is1)); npy_long u1 = *((npy_long *)(ip2 + (i + 1) * is2)); *((npy_long *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_long v2 = *((npy_long *)(ip1 + (i + 2) * is1)); npy_long u2 = *((npy_long *)(ip2 + (i + 2) * is2)); *((npy_long *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_long v3 = *((npy_long *)(ip1 + (i + 3) * is1)); npy_long u3 = *((npy_long *)(ip2 + (i + 3) * is2)); *((npy_long *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -13674,10 +13714,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONG_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -13693,7 +13733,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -13709,7 +13749,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -13725,7 +13765,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -13743,7 +13783,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONG_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_max_i @@ -13851,22 +13891,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGLONG_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_longlong v0 = *((npy_longlong *)(ip1 + (i + 0) * is1)); npy_longlong u0 = *((npy_longlong *)(ip2 + (i + 0) * is2)); *((npy_longlong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_longlong v1 = *((npy_longlong *)(ip1 + (i + 1) * is1)); npy_longlong u1 = *((npy_longlong *)(ip2 + (i + 1) * is2)); *((npy_longlong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_longlong v2 = *((npy_longlong *)(ip1 + (i + 2) * is1)); npy_longlong u2 = *((npy_longlong *)(ip2 + (i + 2) * is2)); *((npy_longlong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_longlong v3 = *((npy_longlong *)(ip1 + (i + 3) * is1)); npy_longlong u3 = *((npy_longlong *)(ip2 + (i + 3) * is2)); *((npy_longlong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -13918,7 +13958,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGLONG_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (0 && 0) #define SCALAR_OP scalar_min_i @@ -14026,22 +14066,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGLONG_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_longlong v0 = *((npy_longlong *)(ip1 + (i + 0) * is1)); npy_longlong u0 = *((npy_longlong *)(ip2 + (i + 0) * is2)); *((npy_longlong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_longlong v1 = *((npy_longlong *)(ip1 + (i + 1) * is1)); npy_longlong u1 = *((npy_longlong *)(ip2 + (i + 1) * is2)); *((npy_longlong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_longlong v2 = *((npy_longlong *)(ip1 + (i + 2) * is1)); npy_longlong u2 = *((npy_longlong *)(ip2 + (i + 2) * is2)); *((npy_longlong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_longlong v3 = *((npy_longlong *)(ip1 + (i + 3) * is1)); npy_longlong u3 = *((npy_longlong *)(ip2 + (i + 3) * is2)); *((npy_longlong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -14093,7 +14133,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGLONG_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_maxp_i @@ -14201,22 +14241,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGLONG_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_longlong v0 = *((npy_longlong *)(ip1 + (i + 0) * is1)); npy_longlong u0 = *((npy_longlong *)(ip2 + (i + 0) * is2)); *((npy_longlong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_longlong v1 = *((npy_longlong *)(ip1 + (i + 1) * is1)); npy_longlong u1 = *((npy_longlong *)(ip2 + (i + 1) * is2)); *((npy_longlong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_longlong v2 = *((npy_longlong *)(ip1 + (i + 2) * is1)); npy_longlong u2 = *((npy_longlong *)(ip2 + (i + 2) * is2)); *((npy_longlong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_longlong v3 = *((npy_longlong *)(ip1 + (i + 3) * is1)); npy_longlong u3 = *((npy_longlong *)(ip2 + (i + 3) * is2)); *((npy_longlong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -14268,7 +14308,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGLONG_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (0 && 1) #define SCALAR_OP scalar_minp_i @@ -14376,22 +14416,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGLONG_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_longlong v0 = *((npy_longlong *)(ip1 + (i + 0) * is1)); npy_longlong u0 = *((npy_longlong *)(ip2 + (i + 0) * is2)); *((npy_longlong *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_longlong v1 = *((npy_longlong *)(ip1 + (i + 1) * is1)); npy_longlong u1 = *((npy_longlong *)(ip2 + (i + 1) * is2)); *((npy_longlong *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_longlong v2 = *((npy_longlong *)(ip1 + (i + 2) * is1)); npy_longlong u2 = *((npy_longlong *)(ip2 + (i + 2) * is2)); *((npy_longlong *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_longlong v3 = *((npy_longlong *)(ip1 + (i + 3) * is1)); npy_longlong u3 = *((npy_longlong *)(ip2 + (i + 3) * is2)); *((npy_longlong *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -14444,10 +14484,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGLONG_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_FLOAT == 8 #if 1 #define TO_SIMD_SFX(X) X##_f8 @@ -14463,7 +14503,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGLONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_FLOAT == 16 #if 1 #define TO_SIMD_SFX(X) X##_f16 @@ -14479,7 +14519,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGLONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_FLOAT == 32 #if 1 #define TO_SIMD_SFX(X) X##_f32 @@ -14495,7 +14535,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGLONG_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_FLOAT == 64 #if 1 #define TO_SIMD_SFX(X) X##_f64 @@ -14513,7 +14553,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGLONG_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (1 && 0) #define SCALAR_OP scalar_max_f @@ -14621,22 +14661,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_float v0 = *((npy_float *)(ip1 + (i + 0) * is1)); npy_float u0 = *((npy_float *)(ip2 + (i + 0) * is2)); *((npy_float *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_float v1 = *((npy_float *)(ip1 + (i + 1) * is1)); npy_float u1 = *((npy_float *)(ip2 + (i + 1) * is2)); *((npy_float *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_float v2 = *((npy_float *)(ip1 + (i + 2) * is1)); npy_float u2 = *((npy_float *)(ip2 + (i + 2) * is2)); *((npy_float *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_float v3 = *((npy_float *)(ip1 + (i + 3) * is1)); npy_float u3 = *((npy_float *)(ip2 + (i + 3) * is2)); *((npy_float *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -14688,7 +14728,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(FLOAT_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (1 && 0) #define SCALAR_OP scalar_min_f @@ -14796,22 +14836,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_float v0 = *((npy_float *)(ip1 + (i + 0) * is1)); npy_float u0 = *((npy_float *)(ip2 + (i + 0) * is2)); *((npy_float *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_float v1 = *((npy_float *)(ip1 + (i + 1) * is1)); npy_float u1 = *((npy_float *)(ip2 + (i + 1) * is2)); *((npy_float *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_float v2 = *((npy_float *)(ip1 + (i + 2) * is1)); npy_float u2 = *((npy_float *)(ip2 + (i + 2) * is2)); *((npy_float *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_float v3 = *((npy_float *)(ip1 + (i + 3) * is1)); npy_float u3 = *((npy_float *)(ip2 + (i + 3) * is2)); *((npy_float *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -14863,7 +14903,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(FLOAT_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (1 && 1) #define SCALAR_OP scalar_maxp_f @@ -14971,22 +15011,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_float v0 = *((npy_float *)(ip1 + (i + 0) * is1)); npy_float u0 = *((npy_float *)(ip2 + (i + 0) * is2)); *((npy_float *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_float v1 = *((npy_float *)(ip1 + (i + 1) * is1)); npy_float u1 = *((npy_float *)(ip2 + (i + 1) * is2)); *((npy_float *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_float v2 = *((npy_float *)(ip1 + (i + 2) * is1)); npy_float u2 = *((npy_float *)(ip2 + (i + 2) * is2)); *((npy_float *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_float v3 = *((npy_float *)(ip1 + (i + 3) * is1)); npy_float u3 = *((npy_float *)(ip2 + (i + 3) * is2)); *((npy_float *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -15038,7 +15078,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(FLOAT_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (1 && 1) #define SCALAR_OP scalar_minp_f @@ -15146,22 +15186,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_float v0 = *((npy_float *)(ip1 + (i + 0) * is1)); npy_float u0 = *((npy_float *)(ip2 + (i + 0) * is2)); *((npy_float *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_float v1 = *((npy_float *)(ip1 + (i + 1) * is1)); npy_float u1 = *((npy_float *)(ip2 + (i + 1) * is2)); *((npy_float *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_float v2 = *((npy_float *)(ip1 + (i + 2) * is1)); npy_float u2 = *((npy_float *)(ip2 + (i + 2) * is2)); *((npy_float *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_float v3 = *((npy_float *)(ip1 + (i + 3) * is1)); npy_float u3 = *((npy_float *)(ip2 + (i + 3) * is2)); *((npy_float *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -15214,10 +15254,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(FLOAT_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_DOUBLE == 8 #if 1 #define TO_SIMD_SFX(X) X##_f8 @@ -15233,7 +15273,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(FLOAT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_DOUBLE == 16 #if 1 #define TO_SIMD_SFX(X) X##_f16 @@ -15249,7 +15289,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(FLOAT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_DOUBLE == 32 #if 1 #define TO_SIMD_SFX(X) X##_f32 @@ -15265,7 +15305,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(FLOAT_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_DOUBLE == 64 #if 1 #define TO_SIMD_SFX(X) X##_f64 @@ -15283,7 +15323,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(FLOAT_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (1 && 0) #define SCALAR_OP scalar_max_d @@ -15391,22 +15431,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_double v0 = *((npy_double *)(ip1 + (i + 0) * is1)); npy_double u0 = *((npy_double *)(ip2 + (i + 0) * is2)); *((npy_double *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_double v1 = *((npy_double *)(ip1 + (i + 1) * is1)); npy_double u1 = *((npy_double *)(ip2 + (i + 1) * is2)); *((npy_double *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_double v2 = *((npy_double *)(ip1 + (i + 2) * is1)); npy_double u2 = *((npy_double *)(ip2 + (i + 2) * is2)); *((npy_double *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_double v3 = *((npy_double *)(ip1 + (i + 3) * is1)); npy_double u3 = *((npy_double *)(ip2 + (i + 3) * is2)); *((npy_double *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -15458,7 +15498,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(DOUBLE_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (1 && 0) #define SCALAR_OP scalar_min_d @@ -15566,22 +15606,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_double v0 = *((npy_double *)(ip1 + (i + 0) * is1)); npy_double u0 = *((npy_double *)(ip2 + (i + 0) * is2)); *((npy_double *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_double v1 = *((npy_double *)(ip1 + (i + 1) * is1)); npy_double u1 = *((npy_double *)(ip2 + (i + 1) * is2)); *((npy_double *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_double v2 = *((npy_double *)(ip1 + (i + 2) * is1)); npy_double u2 = *((npy_double *)(ip2 + (i + 2) * is2)); *((npy_double *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_double v3 = *((npy_double *)(ip1 + (i + 3) * is1)); npy_double u3 = *((npy_double *)(ip2 + (i + 3) * is2)); *((npy_double *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -15633,7 +15673,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(DOUBLE_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (1 && 1) #define SCALAR_OP scalar_maxp_d @@ -15741,22 +15781,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_double v0 = *((npy_double *)(ip1 + (i + 0) * is1)); npy_double u0 = *((npy_double *)(ip2 + (i + 0) * is2)); *((npy_double *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_double v1 = *((npy_double *)(ip1 + (i + 1) * is1)); npy_double u1 = *((npy_double *)(ip2 + (i + 1) * is2)); *((npy_double *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_double v2 = *((npy_double *)(ip1 + (i + 2) * is1)); npy_double u2 = *((npy_double *)(ip2 + (i + 2) * is2)); *((npy_double *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_double v3 = *((npy_double *)(ip1 + (i + 3) * is1)); npy_double u3 = *((npy_double *)(ip2 + (i + 3) * is2)); *((npy_double *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -15808,7 +15848,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(DOUBLE_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (1 && 1) #define SCALAR_OP scalar_minp_d @@ -15916,22 +15956,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_double v0 = *((npy_double *)(ip1 + (i + 0) * is1)); npy_double u0 = *((npy_double *)(ip2 + (i + 0) * is2)); *((npy_double *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_double v1 = *((npy_double *)(ip1 + (i + 1) * is1)); npy_double u1 = *((npy_double *)(ip2 + (i + 1) * is2)); *((npy_double *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_double v2 = *((npy_double *)(ip1 + (i + 2) * is1)); npy_double u2 = *((npy_double *)(ip2 + (i + 2) * is2)); *((npy_double *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_double v3 = *((npy_double *)(ip1 + (i + 3) * is1)); npy_double u3 = *((npy_double *)(ip2 + (i + 3) * is2)); *((npy_double *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -15984,10 +16024,10 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(DOUBLE_fmin_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 293 +#line 294 #undef TO_SIMD_SFX #if 0 -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGDOUBLE == 8 #if 1 #define TO_SIMD_SFX(X) X##_f8 @@ -16003,7 +16043,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(DOUBLE_fmin_indexed) #define TO_SIMD_SFX(X) X##_s8 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGDOUBLE == 16 #if 1 #define TO_SIMD_SFX(X) X##_f16 @@ -16019,7 +16059,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(DOUBLE_fmin_indexed) #define TO_SIMD_SFX(X) X##_s16 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGDOUBLE == 32 #if 1 #define TO_SIMD_SFX(X) X##_f32 @@ -16035,7 +16075,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(DOUBLE_fmin_indexed) #define TO_SIMD_SFX(X) X##_s32 #endif -#line 298 +#line 299 #elif NPY_SIMD && NPY_BITSOF_LONGDOUBLE == 64 #if 1 #define TO_SIMD_SFX(X) X##_f64 @@ -16053,7 +16093,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(DOUBLE_fmin_indexed) #endif -#line 320 +#line 321 #if !0 || (1 && 0) #define SCALAR_OP scalar_max_l @@ -16161,22 +16201,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGDOUBLE_maximum) * result of iteration 1. */ - #line 430 + #line 431 npy_longdouble v0 = *((npy_longdouble *)(ip1 + (i + 0) * is1)); npy_longdouble u0 = *((npy_longdouble *)(ip2 + (i + 0) * is2)); *((npy_longdouble *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_longdouble v1 = *((npy_longdouble *)(ip1 + (i + 1) * is1)); npy_longdouble u1 = *((npy_longdouble *)(ip2 + (i + 1) * is2)); *((npy_longdouble *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_longdouble v2 = *((npy_longdouble *)(ip1 + (i + 2) * is1)); npy_longdouble u2 = *((npy_longdouble *)(ip2 + (i + 2) * is2)); *((npy_longdouble *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_longdouble v3 = *((npy_longdouble *)(ip1 + (i + 3) * is1)); npy_longdouble u3 = *((npy_longdouble *)(ip2 + (i + 3) * is2)); *((npy_longdouble *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -16228,7 +16268,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGDOUBLE_maximum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !0 || (1 && 0) #define SCALAR_OP scalar_min_l @@ -16336,22 +16376,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGDOUBLE_minimum) * result of iteration 1. */ - #line 430 + #line 431 npy_longdouble v0 = *((npy_longdouble *)(ip1 + (i + 0) * is1)); npy_longdouble u0 = *((npy_longdouble *)(ip2 + (i + 0) * is2)); *((npy_longdouble *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_longdouble v1 = *((npy_longdouble *)(ip1 + (i + 1) * is1)); npy_longdouble u1 = *((npy_longdouble *)(ip2 + (i + 1) * is2)); *((npy_longdouble *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_longdouble v2 = *((npy_longdouble *)(ip1 + (i + 2) * is1)); npy_longdouble u2 = *((npy_longdouble *)(ip2 + (i + 2) * is2)); *((npy_longdouble *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_longdouble v3 = *((npy_longdouble *)(ip1 + (i + 3) * is1)); npy_longdouble u3 = *((npy_longdouble *)(ip2 + (i + 3) * is2)); *((npy_longdouble *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -16403,7 +16443,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGDOUBLE_minimum_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (1 && 1) #define SCALAR_OP scalar_maxp_l @@ -16511,22 +16551,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGDOUBLE_fmax) * result of iteration 1. */ - #line 430 + #line 431 npy_longdouble v0 = *((npy_longdouble *)(ip1 + (i + 0) * is1)); npy_longdouble u0 = *((npy_longdouble *)(ip2 + (i + 0) * is2)); *((npy_longdouble *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_longdouble v1 = *((npy_longdouble *)(ip1 + (i + 1) * is1)); npy_longdouble u1 = *((npy_longdouble *)(ip2 + (i + 1) * is2)); *((npy_longdouble *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_longdouble v2 = *((npy_longdouble *)(ip1 + (i + 2) * is1)); npy_longdouble u2 = *((npy_longdouble *)(ip2 + (i + 2) * is2)); *((npy_longdouble *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_longdouble v3 = *((npy_longdouble *)(ip1 + (i + 3) * is1)); npy_longdouble u3 = *((npy_longdouble *)(ip2 + (i + 3) * is2)); *((npy_longdouble *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); @@ -16578,7 +16618,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(LONGDOUBLE_fmax_indexed) #endif // !fp_only || (is_fp && fp_only) -#line 320 +#line 321 #if !1 || (1 && 1) #define SCALAR_OP scalar_minp_l @@ -16686,22 +16726,22 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGDOUBLE_fmin) * result of iteration 1. */ - #line 430 + #line 431 npy_longdouble v0 = *((npy_longdouble *)(ip1 + (i + 0) * is1)); npy_longdouble u0 = *((npy_longdouble *)(ip2 + (i + 0) * is2)); *((npy_longdouble *)(op1 + (i + 0) * os1)) = SCALAR_OP(v0, u0); -#line 430 +#line 431 npy_longdouble v1 = *((npy_longdouble *)(ip1 + (i + 1) * is1)); npy_longdouble u1 = *((npy_longdouble *)(ip2 + (i + 1) * is2)); *((npy_longdouble *)(op1 + (i + 1) * os1)) = SCALAR_OP(v1, u1); -#line 430 +#line 431 npy_longdouble v2 = *((npy_longdouble *)(ip1 + (i + 2) * is1)); npy_longdouble u2 = *((npy_longdouble *)(ip2 + (i + 2) * is2)); *((npy_longdouble *)(op1 + (i + 2) * os1)) = SCALAR_OP(v2, u2); -#line 430 +#line 431 npy_longdouble v3 = *((npy_longdouble *)(ip1 + (i + 3) * is1)); npy_longdouble u3 = *((npy_longdouble *)(ip2 + (i + 3) * is2)); *((npy_longdouble *)(op1 + (i + 3) * os1)) = SCALAR_OP(v3, u3); diff --git a/contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.c.src b/contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.c.src index 236e2e2eb7..319072c01f 100644 --- a/contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.c.src +++ b/contrib/python/numpy/py3/numpy/core/src/umath/loops_minmax.dispatch.c.src @@ -225,7 +225,8 @@ simd_binary_ccc_@intrin@_@sfx@(const npyv_lanetype_@sfx@ *ip1, const npyv_lanety } } // non-contiguous for float 32/64-bit memory access -#if @is_fp@ +#if @is_fp@ && !defined(NPY_HAVE_NEON) +// unroll scalars faster than non-contiguous vector load/store on Arm static inline void simd_binary_@intrin@_@sfx@(const npyv_lanetype_@sfx@ *ip1, npy_intp sip1, const npyv_lanetype_@sfx@ *ip2, npy_intp sip2, diff --git a/contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.c b/contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.c index 9d9bc64a16..30ce938d66 100644 --- a/contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.c +++ b/contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.c @@ -26,8 +26,8 @@ * when there's no native FUSED support instead of fallback to libc */ #if NPY_SIMD_FMA3 // native support -#line 23 -#if NPY_SIMD_F64 +#line 24 +#if NPY_SIMD_F64 && 0 /* * Vectorized Cody-Waite range reduction technique * Performs the reduction step x* = x - y*C in three steps: @@ -46,8 +46,8 @@ simd_range_reduction_f64(npyv_f64 x, npyv_f64 y, npyv_f64 c1, npyv_f64 c2, npyv_ } #endif -#line 23 -#if NPY_SIMD_F32 +#line 24 +#if NPY_SIMD_F32 && 1 /* * Vectorized Cody-Waite range reduction technique * Performs the reduction step x* = x - y*C in three steps: @@ -66,9 +66,11 @@ simd_range_reduction_f32(npyv_f32 x, npyv_f32 y, npyv_f32 c1, npyv_f32 c2, npyv_ } #endif - -#if NPY_SIMD_F64 -#line 47 +/* Disable SIMD code and revert to libm: see + * https://mail.python.org/archives/list/numpy-discussion@python.org/thread/C6EYZZSR4EWGVKHAZXLE7IBILRMNVK7L/ + * for detailed discussion on this*/ +#if 0 // NPY_SIMD_F64 +#line 50 #if defined(NPY_OS_WIN32) || defined(NPY_OS_CYGWIN) NPY_FINLINE npyv_f64 #else @@ -90,7 +92,7 @@ simd_cos_scalar_f64(npyv_f64 out, npy_uint64 cmp_bits) return npyv_loada_f64(out_copy); } -#line 47 +#line 50 #if defined(NPY_OS_WIN32) || defined(NPY_OS_CYGWIN) NPY_FINLINE npyv_f64 #else @@ -208,7 +210,7 @@ simd_sin_poly_f64(npyv_f64 r, npyv_u64 ir, npyv_u64 sign) return npyv_reinterpret_f64_u64(npyv_xor_u64(npyv_xor_u64(npyv_reinterpret_u64_f64(y), sign), odd)); } -#line 167 +#line 170 NPY_FINLINE void simd_cos_f64(const double *src, npy_intp ssrc, double *dst, npy_intp sdst, npy_intp len) { @@ -254,7 +256,7 @@ simd_cos_f64(const double *src, npy_intp ssrc, double *dst, npy_intp sdst, npy_i npyv_cleanup(); } -#line 167 +#line 170 NPY_FINLINE void simd_sin_f64(const double *src, npy_intp ssrc, double *dst, npy_intp sdst, npy_intp len) { @@ -473,7 +475,7 @@ simd_sincos_f32(const float *src, npy_intp ssrc, float *dst, npy_intp sdst, #endif // NPY_SIMD_FP32 #endif // NYP_SIMD_FMA3 -#line 388 +#line 391 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_cos) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) { @@ -507,7 +509,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_cos) #endif } -#line 388 +#line 391 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_sin) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) { @@ -542,7 +544,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_sin) } -#line 426 +#line 429 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_sin) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) { @@ -572,7 +574,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_sin) #endif } -#line 426 +#line 429 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_cos) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) { diff --git a/contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.c.src b/contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.c.src index f07cb70f39..31de906098 100644 --- a/contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.c.src +++ b/contrib/python/numpy/py3/numpy/core/src/umath/loops_trigonometric.dispatch.c.src @@ -19,8 +19,9 @@ /**begin repeat * #check = F64, F32# * #sfx = f64, f32# + * #enable = 0, 1# */ -#if NPY_SIMD_@check@ +#if NPY_SIMD_@check@ && @enable@ /* * Vectorized Cody-Waite range reduction technique * Performs the reduction step x* = x - y*C in three steps: @@ -39,8 +40,10 @@ simd_range_reduction_@sfx@(npyv_@sfx@ x, npyv_@sfx@ y, npyv_@sfx@ c1, npyv_@sfx@ } #endif /**end repeat**/ - -#if NPY_SIMD_F64 +/* Disable SIMD code and revert to libm: see + * https://mail.python.org/archives/list/numpy-discussion@python.org/thread/C6EYZZSR4EWGVKHAZXLE7IBILRMNVK7L/ + * for detailed discussion on this*/ +#if 0 // NPY_SIMD_F64 /**begin repeat * #op = cos, sin# */ diff --git a/contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.c b/contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.c index 3ea2747d9e..b2d3b0976a 100644 --- a/contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.c +++ b/contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.c @@ -604,6 +604,8 @@ simd_unary_nc_negative_s8(const npyv_lanetype_s8 *ip, npy_intp istride, #undef UNROLL #define UNROLL 2 #endif +// X86 does better with unrolled scalar for heavy non-contiguous +#ifndef NPY_HAVE_SSE2 static NPY_INLINE void simd_unary_nn_negative_s8(const npyv_lanetype_s8 *ip, npy_intp istride, npyv_lanetype_s8 *op, npy_intp ostride, @@ -614,112 +616,112 @@ simd_unary_nn_negative_s8(const npyv_lanetype_s8 *ip, npy_intp istride, // unrolled vector loop for (; len >= wstep; len -= wstep, ip += istride*wstep, op += ostride*wstep) { - #line 211 + #line 213 #if UNROLL > 0 npyv_s8 v_0 = npyv_loadn_s8(ip + 0 * vstep * istride, istride); npyv_s8 r_0 = npyv_negative_s8(v_0); npyv_storen_s8(op + 0 * vstep * ostride, ostride, r_0); #endif -#line 211 +#line 213 #if UNROLL > 1 npyv_s8 v_1 = npyv_loadn_s8(ip + 1 * vstep * istride, istride); npyv_s8 r_1 = npyv_negative_s8(v_1); npyv_storen_s8(op + 1 * vstep * ostride, ostride, r_1); #endif -#line 211 +#line 213 #if UNROLL > 2 npyv_s8 v_2 = npyv_loadn_s8(ip + 2 * vstep * istride, istride); npyv_s8 r_2 = npyv_negative_s8(v_2); npyv_storen_s8(op + 2 * vstep * ostride, ostride, r_2); #endif -#line 211 +#line 213 #if UNROLL > 3 npyv_s8 v_3 = npyv_loadn_s8(ip + 3 * vstep * istride, istride); npyv_s8 r_3 = npyv_negative_s8(v_3); npyv_storen_s8(op + 3 * vstep * ostride, ostride, r_3); #endif -#line 211 +#line 213 #if UNROLL > 4 npyv_s8 v_4 = npyv_loadn_s8(ip + 4 * vstep * istride, istride); npyv_s8 r_4 = npyv_negative_s8(v_4); npyv_storen_s8(op + 4 * vstep * ostride, ostride, r_4); #endif -#line 211 +#line 213 #if UNROLL > 5 npyv_s8 v_5 = npyv_loadn_s8(ip + 5 * vstep * istride, istride); npyv_s8 r_5 = npyv_negative_s8(v_5); npyv_storen_s8(op + 5 * vstep * ostride, ostride, r_5); #endif -#line 211 +#line 213 #if UNROLL > 6 npyv_s8 v_6 = npyv_loadn_s8(ip + 6 * vstep * istride, istride); npyv_s8 r_6 = npyv_negative_s8(v_6); npyv_storen_s8(op + 6 * vstep * ostride, ostride, r_6); #endif -#line 211 +#line 213 #if UNROLL > 7 npyv_s8 v_7 = npyv_loadn_s8(ip + 7 * vstep * istride, istride); npyv_s8 r_7 = npyv_negative_s8(v_7); npyv_storen_s8(op + 7 * vstep * ostride, ostride, r_7); #endif -#line 211 +#line 213 #if UNROLL > 8 npyv_s8 v_8 = npyv_loadn_s8(ip + 8 * vstep * istride, istride); npyv_s8 r_8 = npyv_negative_s8(v_8); npyv_storen_s8(op + 8 * vstep * ostride, ostride, r_8); #endif -#line 211 +#line 213 #if UNROLL > 9 npyv_s8 v_9 = npyv_loadn_s8(ip + 9 * vstep * istride, istride); npyv_s8 r_9 = npyv_negative_s8(v_9); npyv_storen_s8(op + 9 * vstep * ostride, ostride, r_9); #endif -#line 211 +#line 213 #if UNROLL > 10 npyv_s8 v_10 = npyv_loadn_s8(ip + 10 * vstep * istride, istride); npyv_s8 r_10 = npyv_negative_s8(v_10); npyv_storen_s8(op + 10 * vstep * ostride, ostride, r_10); #endif -#line 211 +#line 213 #if UNROLL > 11 npyv_s8 v_11 = npyv_loadn_s8(ip + 11 * vstep * istride, istride); npyv_s8 r_11 = npyv_negative_s8(v_11); npyv_storen_s8(op + 11 * vstep * ostride, ostride, r_11); #endif -#line 211 +#line 213 #if UNROLL > 12 npyv_s8 v_12 = npyv_loadn_s8(ip + 12 * vstep * istride, istride); npyv_s8 r_12 = npyv_negative_s8(v_12); npyv_storen_s8(op + 12 * vstep * ostride, ostride, r_12); #endif -#line 211 +#line 213 #if UNROLL > 13 npyv_s8 v_13 = npyv_loadn_s8(ip + 13 * vstep * istride, istride); npyv_s8 r_13 = npyv_negative_s8(v_13); npyv_storen_s8(op + 13 * vstep * ostride, ostride, r_13); #endif -#line 211 +#line 213 #if UNROLL > 14 npyv_s8 v_14 = npyv_loadn_s8(ip + 14 * vstep * istride, istride); npyv_s8 r_14 = npyv_negative_s8(v_14); npyv_storen_s8(op + 14 * vstep * ostride, ostride, r_14); #endif -#line 211 +#line 213 #if UNROLL > 15 npyv_s8 v_15 = npyv_loadn_s8(ip + 15 * vstep * istride, istride); npyv_s8 r_15 = npyv_negative_s8(v_15); @@ -738,6 +740,7 @@ simd_unary_nn_negative_s8(const npyv_lanetype_s8 *ip, npy_intp istride, *op = scalar_negative(*ip); } } +#endif // NPY_HAVE_SSE2 #endif // 0 #undef UNROLL #endif // NPY_SIMD @@ -1167,6 +1170,8 @@ simd_unary_nc_negative_u8(const npyv_lanetype_u8 *ip, npy_intp istride, #undef UNROLL #define UNROLL 2 #endif +// X86 does better with unrolled scalar for heavy non-contiguous +#ifndef NPY_HAVE_SSE2 static NPY_INLINE void simd_unary_nn_negative_u8(const npyv_lanetype_u8 *ip, npy_intp istride, npyv_lanetype_u8 *op, npy_intp ostride, @@ -1177,112 +1182,112 @@ simd_unary_nn_negative_u8(const npyv_lanetype_u8 *ip, npy_intp istride, // unrolled vector loop for (; len >= wstep; len -= wstep, ip += istride*wstep, op += ostride*wstep) { - #line 211 + #line 213 #if UNROLL > 0 npyv_u8 v_0 = npyv_loadn_u8(ip + 0 * vstep * istride, istride); npyv_u8 r_0 = npyv_negative_u8(v_0); npyv_storen_u8(op + 0 * vstep * ostride, ostride, r_0); #endif -#line 211 +#line 213 #if UNROLL > 1 npyv_u8 v_1 = npyv_loadn_u8(ip + 1 * vstep * istride, istride); npyv_u8 r_1 = npyv_negative_u8(v_1); npyv_storen_u8(op + 1 * vstep * ostride, ostride, r_1); #endif -#line 211 +#line 213 #if UNROLL > 2 npyv_u8 v_2 = npyv_loadn_u8(ip + 2 * vstep * istride, istride); npyv_u8 r_2 = npyv_negative_u8(v_2); npyv_storen_u8(op + 2 * vstep * ostride, ostride, r_2); #endif -#line 211 +#line 213 #if UNROLL > 3 npyv_u8 v_3 = npyv_loadn_u8(ip + 3 * vstep * istride, istride); npyv_u8 r_3 = npyv_negative_u8(v_3); npyv_storen_u8(op + 3 * vstep * ostride, ostride, r_3); #endif -#line 211 +#line 213 #if UNROLL > 4 npyv_u8 v_4 = npyv_loadn_u8(ip + 4 * vstep * istride, istride); npyv_u8 r_4 = npyv_negative_u8(v_4); npyv_storen_u8(op + 4 * vstep * ostride, ostride, r_4); #endif -#line 211 +#line 213 #if UNROLL > 5 npyv_u8 v_5 = npyv_loadn_u8(ip + 5 * vstep * istride, istride); npyv_u8 r_5 = npyv_negative_u8(v_5); npyv_storen_u8(op + 5 * vstep * ostride, ostride, r_5); #endif -#line 211 +#line 213 #if UNROLL > 6 npyv_u8 v_6 = npyv_loadn_u8(ip + 6 * vstep * istride, istride); npyv_u8 r_6 = npyv_negative_u8(v_6); npyv_storen_u8(op + 6 * vstep * ostride, ostride, r_6); #endif -#line 211 +#line 213 #if UNROLL > 7 npyv_u8 v_7 = npyv_loadn_u8(ip + 7 * vstep * istride, istride); npyv_u8 r_7 = npyv_negative_u8(v_7); npyv_storen_u8(op + 7 * vstep * ostride, ostride, r_7); #endif -#line 211 +#line 213 #if UNROLL > 8 npyv_u8 v_8 = npyv_loadn_u8(ip + 8 * vstep * istride, istride); npyv_u8 r_8 = npyv_negative_u8(v_8); npyv_storen_u8(op + 8 * vstep * ostride, ostride, r_8); #endif -#line 211 +#line 213 #if UNROLL > 9 npyv_u8 v_9 = npyv_loadn_u8(ip + 9 * vstep * istride, istride); npyv_u8 r_9 = npyv_negative_u8(v_9); npyv_storen_u8(op + 9 * vstep * ostride, ostride, r_9); #endif -#line 211 +#line 213 #if UNROLL > 10 npyv_u8 v_10 = npyv_loadn_u8(ip + 10 * vstep * istride, istride); npyv_u8 r_10 = npyv_negative_u8(v_10); npyv_storen_u8(op + 10 * vstep * ostride, ostride, r_10); #endif -#line 211 +#line 213 #if UNROLL > 11 npyv_u8 v_11 = npyv_loadn_u8(ip + 11 * vstep * istride, istride); npyv_u8 r_11 = npyv_negative_u8(v_11); npyv_storen_u8(op + 11 * vstep * ostride, ostride, r_11); #endif -#line 211 +#line 213 #if UNROLL > 12 npyv_u8 v_12 = npyv_loadn_u8(ip + 12 * vstep * istride, istride); npyv_u8 r_12 = npyv_negative_u8(v_12); npyv_storen_u8(op + 12 * vstep * ostride, ostride, r_12); #endif -#line 211 +#line 213 #if UNROLL > 13 npyv_u8 v_13 = npyv_loadn_u8(ip + 13 * vstep * istride, istride); npyv_u8 r_13 = npyv_negative_u8(v_13); npyv_storen_u8(op + 13 * vstep * ostride, ostride, r_13); #endif -#line 211 +#line 213 #if UNROLL > 14 npyv_u8 v_14 = npyv_loadn_u8(ip + 14 * vstep * istride, istride); npyv_u8 r_14 = npyv_negative_u8(v_14); npyv_storen_u8(op + 14 * vstep * ostride, ostride, r_14); #endif -#line 211 +#line 213 #if UNROLL > 15 npyv_u8 v_15 = npyv_loadn_u8(ip + 15 * vstep * istride, istride); npyv_u8 r_15 = npyv_negative_u8(v_15); @@ -1301,6 +1306,7 @@ simd_unary_nn_negative_u8(const npyv_lanetype_u8 *ip, npy_intp istride, *op = scalar_negative(*ip); } } +#endif // NPY_HAVE_SSE2 #endif // 0 #undef UNROLL #endif // NPY_SIMD @@ -1730,6 +1736,8 @@ simd_unary_nc_negative_s16(const npyv_lanetype_s16 *ip, npy_intp istride, #undef UNROLL #define UNROLL 2 #endif +// X86 does better with unrolled scalar for heavy non-contiguous +#ifndef NPY_HAVE_SSE2 static NPY_INLINE void simd_unary_nn_negative_s16(const npyv_lanetype_s16 *ip, npy_intp istride, npyv_lanetype_s16 *op, npy_intp ostride, @@ -1740,112 +1748,112 @@ simd_unary_nn_negative_s16(const npyv_lanetype_s16 *ip, npy_intp istride, // unrolled vector loop for (; len >= wstep; len -= wstep, ip += istride*wstep, op += ostride*wstep) { - #line 211 + #line 213 #if UNROLL > 0 npyv_s16 v_0 = npyv_loadn_s16(ip + 0 * vstep * istride, istride); npyv_s16 r_0 = npyv_negative_s16(v_0); npyv_storen_s16(op + 0 * vstep * ostride, ostride, r_0); #endif -#line 211 +#line 213 #if UNROLL > 1 npyv_s16 v_1 = npyv_loadn_s16(ip + 1 * vstep * istride, istride); npyv_s16 r_1 = npyv_negative_s16(v_1); npyv_storen_s16(op + 1 * vstep * ostride, ostride, r_1); #endif -#line 211 +#line 213 #if UNROLL > 2 npyv_s16 v_2 = npyv_loadn_s16(ip + 2 * vstep * istride, istride); npyv_s16 r_2 = npyv_negative_s16(v_2); npyv_storen_s16(op + 2 * vstep * ostride, ostride, r_2); #endif -#line 211 +#line 213 #if UNROLL > 3 npyv_s16 v_3 = npyv_loadn_s16(ip + 3 * vstep * istride, istride); npyv_s16 r_3 = npyv_negative_s16(v_3); npyv_storen_s16(op + 3 * vstep * ostride, ostride, r_3); #endif -#line 211 +#line 213 #if UNROLL > 4 npyv_s16 v_4 = npyv_loadn_s16(ip + 4 * vstep * istride, istride); npyv_s16 r_4 = npyv_negative_s16(v_4); npyv_storen_s16(op + 4 * vstep * ostride, ostride, r_4); #endif -#line 211 +#line 213 #if UNROLL > 5 npyv_s16 v_5 = npyv_loadn_s16(ip + 5 * vstep * istride, istride); npyv_s16 r_5 = npyv_negative_s16(v_5); npyv_storen_s16(op + 5 * vstep * ostride, ostride, r_5); #endif -#line 211 +#line 213 #if UNROLL > 6 npyv_s16 v_6 = npyv_loadn_s16(ip + 6 * vstep * istride, istride); npyv_s16 r_6 = npyv_negative_s16(v_6); npyv_storen_s16(op + 6 * vstep * ostride, ostride, r_6); #endif -#line 211 +#line 213 #if UNROLL > 7 npyv_s16 v_7 = npyv_loadn_s16(ip + 7 * vstep * istride, istride); npyv_s16 r_7 = npyv_negative_s16(v_7); npyv_storen_s16(op + 7 * vstep * ostride, ostride, r_7); #endif -#line 211 +#line 213 #if UNROLL > 8 npyv_s16 v_8 = npyv_loadn_s16(ip + 8 * vstep * istride, istride); npyv_s16 r_8 = npyv_negative_s16(v_8); npyv_storen_s16(op + 8 * vstep * ostride, ostride, r_8); #endif -#line 211 +#line 213 #if UNROLL > 9 npyv_s16 v_9 = npyv_loadn_s16(ip + 9 * vstep * istride, istride); npyv_s16 r_9 = npyv_negative_s16(v_9); npyv_storen_s16(op + 9 * vstep * ostride, ostride, r_9); #endif -#line 211 +#line 213 #if UNROLL > 10 npyv_s16 v_10 = npyv_loadn_s16(ip + 10 * vstep * istride, istride); npyv_s16 r_10 = npyv_negative_s16(v_10); npyv_storen_s16(op + 10 * vstep * ostride, ostride, r_10); #endif -#line 211 +#line 213 #if UNROLL > 11 npyv_s16 v_11 = npyv_loadn_s16(ip + 11 * vstep * istride, istride); npyv_s16 r_11 = npyv_negative_s16(v_11); npyv_storen_s16(op + 11 * vstep * ostride, ostride, r_11); #endif -#line 211 +#line 213 #if UNROLL > 12 npyv_s16 v_12 = npyv_loadn_s16(ip + 12 * vstep * istride, istride); npyv_s16 r_12 = npyv_negative_s16(v_12); npyv_storen_s16(op + 12 * vstep * ostride, ostride, r_12); #endif -#line 211 +#line 213 #if UNROLL > 13 npyv_s16 v_13 = npyv_loadn_s16(ip + 13 * vstep * istride, istride); npyv_s16 r_13 = npyv_negative_s16(v_13); npyv_storen_s16(op + 13 * vstep * ostride, ostride, r_13); #endif -#line 211 +#line 213 #if UNROLL > 14 npyv_s16 v_14 = npyv_loadn_s16(ip + 14 * vstep * istride, istride); npyv_s16 r_14 = npyv_negative_s16(v_14); npyv_storen_s16(op + 14 * vstep * ostride, ostride, r_14); #endif -#line 211 +#line 213 #if UNROLL > 15 npyv_s16 v_15 = npyv_loadn_s16(ip + 15 * vstep * istride, istride); npyv_s16 r_15 = npyv_negative_s16(v_15); @@ -1864,6 +1872,7 @@ simd_unary_nn_negative_s16(const npyv_lanetype_s16 *ip, npy_intp istride, *op = scalar_negative(*ip); } } +#endif // NPY_HAVE_SSE2 #endif // 0 #undef UNROLL #endif // NPY_SIMD @@ -2293,6 +2302,8 @@ simd_unary_nc_negative_u16(const npyv_lanetype_u16 *ip, npy_intp istride, #undef UNROLL #define UNROLL 2 #endif +// X86 does better with unrolled scalar for heavy non-contiguous +#ifndef NPY_HAVE_SSE2 static NPY_INLINE void simd_unary_nn_negative_u16(const npyv_lanetype_u16 *ip, npy_intp istride, npyv_lanetype_u16 *op, npy_intp ostride, @@ -2303,112 +2314,112 @@ simd_unary_nn_negative_u16(const npyv_lanetype_u16 *ip, npy_intp istride, // unrolled vector loop for (; len >= wstep; len -= wstep, ip += istride*wstep, op += ostride*wstep) { - #line 211 + #line 213 #if UNROLL > 0 npyv_u16 v_0 = npyv_loadn_u16(ip + 0 * vstep * istride, istride); npyv_u16 r_0 = npyv_negative_u16(v_0); npyv_storen_u16(op + 0 * vstep * ostride, ostride, r_0); #endif -#line 211 +#line 213 #if UNROLL > 1 npyv_u16 v_1 = npyv_loadn_u16(ip + 1 * vstep * istride, istride); npyv_u16 r_1 = npyv_negative_u16(v_1); npyv_storen_u16(op + 1 * vstep * ostride, ostride, r_1); #endif -#line 211 +#line 213 #if UNROLL > 2 npyv_u16 v_2 = npyv_loadn_u16(ip + 2 * vstep * istride, istride); npyv_u16 r_2 = npyv_negative_u16(v_2); npyv_storen_u16(op + 2 * vstep * ostride, ostride, r_2); #endif -#line 211 +#line 213 #if UNROLL > 3 npyv_u16 v_3 = npyv_loadn_u16(ip + 3 * vstep * istride, istride); npyv_u16 r_3 = npyv_negative_u16(v_3); npyv_storen_u16(op + 3 * vstep * ostride, ostride, r_3); #endif -#line 211 +#line 213 #if UNROLL > 4 npyv_u16 v_4 = npyv_loadn_u16(ip + 4 * vstep * istride, istride); npyv_u16 r_4 = npyv_negative_u16(v_4); npyv_storen_u16(op + 4 * vstep * ostride, ostride, r_4); #endif -#line 211 +#line 213 #if UNROLL > 5 npyv_u16 v_5 = npyv_loadn_u16(ip + 5 * vstep * istride, istride); npyv_u16 r_5 = npyv_negative_u16(v_5); npyv_storen_u16(op + 5 * vstep * ostride, ostride, r_5); #endif -#line 211 +#line 213 #if UNROLL > 6 npyv_u16 v_6 = npyv_loadn_u16(ip + 6 * vstep * istride, istride); npyv_u16 r_6 = npyv_negative_u16(v_6); npyv_storen_u16(op + 6 * vstep * ostride, ostride, r_6); #endif -#line 211 +#line 213 #if UNROLL > 7 npyv_u16 v_7 = npyv_loadn_u16(ip + 7 * vstep * istride, istride); npyv_u16 r_7 = npyv_negative_u16(v_7); npyv_storen_u16(op + 7 * vstep * ostride, ostride, r_7); #endif -#line 211 +#line 213 #if UNROLL > 8 npyv_u16 v_8 = npyv_loadn_u16(ip + 8 * vstep * istride, istride); npyv_u16 r_8 = npyv_negative_u16(v_8); npyv_storen_u16(op + 8 * vstep * ostride, ostride, r_8); #endif -#line 211 +#line 213 #if UNROLL > 9 npyv_u16 v_9 = npyv_loadn_u16(ip + 9 * vstep * istride, istride); npyv_u16 r_9 = npyv_negative_u16(v_9); npyv_storen_u16(op + 9 * vstep * ostride, ostride, r_9); #endif -#line 211 +#line 213 #if UNROLL > 10 npyv_u16 v_10 = npyv_loadn_u16(ip + 10 * vstep * istride, istride); npyv_u16 r_10 = npyv_negative_u16(v_10); npyv_storen_u16(op + 10 * vstep * ostride, ostride, r_10); #endif -#line 211 +#line 213 #if UNROLL > 11 npyv_u16 v_11 = npyv_loadn_u16(ip + 11 * vstep * istride, istride); npyv_u16 r_11 = npyv_negative_u16(v_11); npyv_storen_u16(op + 11 * vstep * ostride, ostride, r_11); #endif -#line 211 +#line 213 #if UNROLL > 12 npyv_u16 v_12 = npyv_loadn_u16(ip + 12 * vstep * istride, istride); npyv_u16 r_12 = npyv_negative_u16(v_12); npyv_storen_u16(op + 12 * vstep * ostride, ostride, r_12); #endif -#line 211 +#line 213 #if UNROLL > 13 npyv_u16 v_13 = npyv_loadn_u16(ip + 13 * vstep * istride, istride); npyv_u16 r_13 = npyv_negative_u16(v_13); npyv_storen_u16(op + 13 * vstep * ostride, ostride, r_13); #endif -#line 211 +#line 213 #if UNROLL > 14 npyv_u16 v_14 = npyv_loadn_u16(ip + 14 * vstep * istride, istride); npyv_u16 r_14 = npyv_negative_u16(v_14); npyv_storen_u16(op + 14 * vstep * ostride, ostride, r_14); #endif -#line 211 +#line 213 #if UNROLL > 15 npyv_u16 v_15 = npyv_loadn_u16(ip + 15 * vstep * istride, istride); npyv_u16 r_15 = npyv_negative_u16(v_15); @@ -2427,6 +2438,7 @@ simd_unary_nn_negative_u16(const npyv_lanetype_u16 *ip, npy_intp istride, *op = scalar_negative(*ip); } } +#endif // NPY_HAVE_SSE2 #endif // 0 #undef UNROLL #endif // NPY_SIMD @@ -2856,6 +2868,8 @@ simd_unary_nc_negative_s32(const npyv_lanetype_s32 *ip, npy_intp istride, #undef UNROLL #define UNROLL 2 #endif +// X86 does better with unrolled scalar for heavy non-contiguous +#ifndef NPY_HAVE_SSE2 static NPY_INLINE void simd_unary_nn_negative_s32(const npyv_lanetype_s32 *ip, npy_intp istride, npyv_lanetype_s32 *op, npy_intp ostride, @@ -2866,112 +2880,112 @@ simd_unary_nn_negative_s32(const npyv_lanetype_s32 *ip, npy_intp istride, // unrolled vector loop for (; len >= wstep; len -= wstep, ip += istride*wstep, op += ostride*wstep) { - #line 211 + #line 213 #if UNROLL > 0 npyv_s32 v_0 = npyv_loadn_s32(ip + 0 * vstep * istride, istride); npyv_s32 r_0 = npyv_negative_s32(v_0); npyv_storen_s32(op + 0 * vstep * ostride, ostride, r_0); #endif -#line 211 +#line 213 #if UNROLL > 1 npyv_s32 v_1 = npyv_loadn_s32(ip + 1 * vstep * istride, istride); npyv_s32 r_1 = npyv_negative_s32(v_1); npyv_storen_s32(op + 1 * vstep * ostride, ostride, r_1); #endif -#line 211 +#line 213 #if UNROLL > 2 npyv_s32 v_2 = npyv_loadn_s32(ip + 2 * vstep * istride, istride); npyv_s32 r_2 = npyv_negative_s32(v_2); npyv_storen_s32(op + 2 * vstep * ostride, ostride, r_2); #endif -#line 211 +#line 213 #if UNROLL > 3 npyv_s32 v_3 = npyv_loadn_s32(ip + 3 * vstep * istride, istride); npyv_s32 r_3 = npyv_negative_s32(v_3); npyv_storen_s32(op + 3 * vstep * ostride, ostride, r_3); #endif -#line 211 +#line 213 #if UNROLL > 4 npyv_s32 v_4 = npyv_loadn_s32(ip + 4 * vstep * istride, istride); npyv_s32 r_4 = npyv_negative_s32(v_4); npyv_storen_s32(op + 4 * vstep * ostride, ostride, r_4); #endif -#line 211 +#line 213 #if UNROLL > 5 npyv_s32 v_5 = npyv_loadn_s32(ip + 5 * vstep * istride, istride); npyv_s32 r_5 = npyv_negative_s32(v_5); npyv_storen_s32(op + 5 * vstep * ostride, ostride, r_5); #endif -#line 211 +#line 213 #if UNROLL > 6 npyv_s32 v_6 = npyv_loadn_s32(ip + 6 * vstep * istride, istride); npyv_s32 r_6 = npyv_negative_s32(v_6); npyv_storen_s32(op + 6 * vstep * ostride, ostride, r_6); #endif -#line 211 +#line 213 #if UNROLL > 7 npyv_s32 v_7 = npyv_loadn_s32(ip + 7 * vstep * istride, istride); npyv_s32 r_7 = npyv_negative_s32(v_7); npyv_storen_s32(op + 7 * vstep * ostride, ostride, r_7); #endif -#line 211 +#line 213 #if UNROLL > 8 npyv_s32 v_8 = npyv_loadn_s32(ip + 8 * vstep * istride, istride); npyv_s32 r_8 = npyv_negative_s32(v_8); npyv_storen_s32(op + 8 * vstep * ostride, ostride, r_8); #endif -#line 211 +#line 213 #if UNROLL > 9 npyv_s32 v_9 = npyv_loadn_s32(ip + 9 * vstep * istride, istride); npyv_s32 r_9 = npyv_negative_s32(v_9); npyv_storen_s32(op + 9 * vstep * ostride, ostride, r_9); #endif -#line 211 +#line 213 #if UNROLL > 10 npyv_s32 v_10 = npyv_loadn_s32(ip + 10 * vstep * istride, istride); npyv_s32 r_10 = npyv_negative_s32(v_10); npyv_storen_s32(op + 10 * vstep * ostride, ostride, r_10); #endif -#line 211 +#line 213 #if UNROLL > 11 npyv_s32 v_11 = npyv_loadn_s32(ip + 11 * vstep * istride, istride); npyv_s32 r_11 = npyv_negative_s32(v_11); npyv_storen_s32(op + 11 * vstep * ostride, ostride, r_11); #endif -#line 211 +#line 213 #if UNROLL > 12 npyv_s32 v_12 = npyv_loadn_s32(ip + 12 * vstep * istride, istride); npyv_s32 r_12 = npyv_negative_s32(v_12); npyv_storen_s32(op + 12 * vstep * ostride, ostride, r_12); #endif -#line 211 +#line 213 #if UNROLL > 13 npyv_s32 v_13 = npyv_loadn_s32(ip + 13 * vstep * istride, istride); npyv_s32 r_13 = npyv_negative_s32(v_13); npyv_storen_s32(op + 13 * vstep * ostride, ostride, r_13); #endif -#line 211 +#line 213 #if UNROLL > 14 npyv_s32 v_14 = npyv_loadn_s32(ip + 14 * vstep * istride, istride); npyv_s32 r_14 = npyv_negative_s32(v_14); npyv_storen_s32(op + 14 * vstep * ostride, ostride, r_14); #endif -#line 211 +#line 213 #if UNROLL > 15 npyv_s32 v_15 = npyv_loadn_s32(ip + 15 * vstep * istride, istride); npyv_s32 r_15 = npyv_negative_s32(v_15); @@ -2990,6 +3004,7 @@ simd_unary_nn_negative_s32(const npyv_lanetype_s32 *ip, npy_intp istride, *op = scalar_negative(*ip); } } +#endif // NPY_HAVE_SSE2 #endif // 1 #undef UNROLL #endif // NPY_SIMD @@ -3419,6 +3434,8 @@ simd_unary_nc_negative_u32(const npyv_lanetype_u32 *ip, npy_intp istride, #undef UNROLL #define UNROLL 2 #endif +// X86 does better with unrolled scalar for heavy non-contiguous +#ifndef NPY_HAVE_SSE2 static NPY_INLINE void simd_unary_nn_negative_u32(const npyv_lanetype_u32 *ip, npy_intp istride, npyv_lanetype_u32 *op, npy_intp ostride, @@ -3429,112 +3446,112 @@ simd_unary_nn_negative_u32(const npyv_lanetype_u32 *ip, npy_intp istride, // unrolled vector loop for (; len >= wstep; len -= wstep, ip += istride*wstep, op += ostride*wstep) { - #line 211 + #line 213 #if UNROLL > 0 npyv_u32 v_0 = npyv_loadn_u32(ip + 0 * vstep * istride, istride); npyv_u32 r_0 = npyv_negative_u32(v_0); npyv_storen_u32(op + 0 * vstep * ostride, ostride, r_0); #endif -#line 211 +#line 213 #if UNROLL > 1 npyv_u32 v_1 = npyv_loadn_u32(ip + 1 * vstep * istride, istride); npyv_u32 r_1 = npyv_negative_u32(v_1); npyv_storen_u32(op + 1 * vstep * ostride, ostride, r_1); #endif -#line 211 +#line 213 #if UNROLL > 2 npyv_u32 v_2 = npyv_loadn_u32(ip + 2 * vstep * istride, istride); npyv_u32 r_2 = npyv_negative_u32(v_2); npyv_storen_u32(op + 2 * vstep * ostride, ostride, r_2); #endif -#line 211 +#line 213 #if UNROLL > 3 npyv_u32 v_3 = npyv_loadn_u32(ip + 3 * vstep * istride, istride); npyv_u32 r_3 = npyv_negative_u32(v_3); npyv_storen_u32(op + 3 * vstep * ostride, ostride, r_3); #endif -#line 211 +#line 213 #if UNROLL > 4 npyv_u32 v_4 = npyv_loadn_u32(ip + 4 * vstep * istride, istride); npyv_u32 r_4 = npyv_negative_u32(v_4); npyv_storen_u32(op + 4 * vstep * ostride, ostride, r_4); #endif -#line 211 +#line 213 #if UNROLL > 5 npyv_u32 v_5 = npyv_loadn_u32(ip + 5 * vstep * istride, istride); npyv_u32 r_5 = npyv_negative_u32(v_5); npyv_storen_u32(op + 5 * vstep * ostride, ostride, r_5); #endif -#line 211 +#line 213 #if UNROLL > 6 npyv_u32 v_6 = npyv_loadn_u32(ip + 6 * vstep * istride, istride); npyv_u32 r_6 = npyv_negative_u32(v_6); npyv_storen_u32(op + 6 * vstep * ostride, ostride, r_6); #endif -#line 211 +#line 213 #if UNROLL > 7 npyv_u32 v_7 = npyv_loadn_u32(ip + 7 * vstep * istride, istride); npyv_u32 r_7 = npyv_negative_u32(v_7); npyv_storen_u32(op + 7 * vstep * ostride, ostride, r_7); #endif -#line 211 +#line 213 #if UNROLL > 8 npyv_u32 v_8 = npyv_loadn_u32(ip + 8 * vstep * istride, istride); npyv_u32 r_8 = npyv_negative_u32(v_8); npyv_storen_u32(op + 8 * vstep * ostride, ostride, r_8); #endif -#line 211 +#line 213 #if UNROLL > 9 npyv_u32 v_9 = npyv_loadn_u32(ip + 9 * vstep * istride, istride); npyv_u32 r_9 = npyv_negative_u32(v_9); npyv_storen_u32(op + 9 * vstep * ostride, ostride, r_9); #endif -#line 211 +#line 213 #if UNROLL > 10 npyv_u32 v_10 = npyv_loadn_u32(ip + 10 * vstep * istride, istride); npyv_u32 r_10 = npyv_negative_u32(v_10); npyv_storen_u32(op + 10 * vstep * ostride, ostride, r_10); #endif -#line 211 +#line 213 #if UNROLL > 11 npyv_u32 v_11 = npyv_loadn_u32(ip + 11 * vstep * istride, istride); npyv_u32 r_11 = npyv_negative_u32(v_11); npyv_storen_u32(op + 11 * vstep * ostride, ostride, r_11); #endif -#line 211 +#line 213 #if UNROLL > 12 npyv_u32 v_12 = npyv_loadn_u32(ip + 12 * vstep * istride, istride); npyv_u32 r_12 = npyv_negative_u32(v_12); npyv_storen_u32(op + 12 * vstep * ostride, ostride, r_12); #endif -#line 211 +#line 213 #if UNROLL > 13 npyv_u32 v_13 = npyv_loadn_u32(ip + 13 * vstep * istride, istride); npyv_u32 r_13 = npyv_negative_u32(v_13); npyv_storen_u32(op + 13 * vstep * ostride, ostride, r_13); #endif -#line 211 +#line 213 #if UNROLL > 14 npyv_u32 v_14 = npyv_loadn_u32(ip + 14 * vstep * istride, istride); npyv_u32 r_14 = npyv_negative_u32(v_14); npyv_storen_u32(op + 14 * vstep * ostride, ostride, r_14); #endif -#line 211 +#line 213 #if UNROLL > 15 npyv_u32 v_15 = npyv_loadn_u32(ip + 15 * vstep * istride, istride); npyv_u32 r_15 = npyv_negative_u32(v_15); @@ -3553,6 +3570,7 @@ simd_unary_nn_negative_u32(const npyv_lanetype_u32 *ip, npy_intp istride, *op = scalar_negative(*ip); } } +#endif // NPY_HAVE_SSE2 #endif // 1 #undef UNROLL #endif // NPY_SIMD @@ -3982,6 +4000,8 @@ simd_unary_nc_negative_s64(const npyv_lanetype_s64 *ip, npy_intp istride, #undef UNROLL #define UNROLL 2 #endif +// X86 does better with unrolled scalar for heavy non-contiguous +#ifndef NPY_HAVE_SSE2 static NPY_INLINE void simd_unary_nn_negative_s64(const npyv_lanetype_s64 *ip, npy_intp istride, npyv_lanetype_s64 *op, npy_intp ostride, @@ -3992,112 +4012,112 @@ simd_unary_nn_negative_s64(const npyv_lanetype_s64 *ip, npy_intp istride, // unrolled vector loop for (; len >= wstep; len -= wstep, ip += istride*wstep, op += ostride*wstep) { - #line 211 + #line 213 #if UNROLL > 0 npyv_s64 v_0 = npyv_loadn_s64(ip + 0 * vstep * istride, istride); npyv_s64 r_0 = npyv_negative_s64(v_0); npyv_storen_s64(op + 0 * vstep * ostride, ostride, r_0); #endif -#line 211 +#line 213 #if UNROLL > 1 npyv_s64 v_1 = npyv_loadn_s64(ip + 1 * vstep * istride, istride); npyv_s64 r_1 = npyv_negative_s64(v_1); npyv_storen_s64(op + 1 * vstep * ostride, ostride, r_1); #endif -#line 211 +#line 213 #if UNROLL > 2 npyv_s64 v_2 = npyv_loadn_s64(ip + 2 * vstep * istride, istride); npyv_s64 r_2 = npyv_negative_s64(v_2); npyv_storen_s64(op + 2 * vstep * ostride, ostride, r_2); #endif -#line 211 +#line 213 #if UNROLL > 3 npyv_s64 v_3 = npyv_loadn_s64(ip + 3 * vstep * istride, istride); npyv_s64 r_3 = npyv_negative_s64(v_3); npyv_storen_s64(op + 3 * vstep * ostride, ostride, r_3); #endif -#line 211 +#line 213 #if UNROLL > 4 npyv_s64 v_4 = npyv_loadn_s64(ip + 4 * vstep * istride, istride); npyv_s64 r_4 = npyv_negative_s64(v_4); npyv_storen_s64(op + 4 * vstep * ostride, ostride, r_4); #endif -#line 211 +#line 213 #if UNROLL > 5 npyv_s64 v_5 = npyv_loadn_s64(ip + 5 * vstep * istride, istride); npyv_s64 r_5 = npyv_negative_s64(v_5); npyv_storen_s64(op + 5 * vstep * ostride, ostride, r_5); #endif -#line 211 +#line 213 #if UNROLL > 6 npyv_s64 v_6 = npyv_loadn_s64(ip + 6 * vstep * istride, istride); npyv_s64 r_6 = npyv_negative_s64(v_6); npyv_storen_s64(op + 6 * vstep * ostride, ostride, r_6); #endif -#line 211 +#line 213 #if UNROLL > 7 npyv_s64 v_7 = npyv_loadn_s64(ip + 7 * vstep * istride, istride); npyv_s64 r_7 = npyv_negative_s64(v_7); npyv_storen_s64(op + 7 * vstep * ostride, ostride, r_7); #endif -#line 211 +#line 213 #if UNROLL > 8 npyv_s64 v_8 = npyv_loadn_s64(ip + 8 * vstep * istride, istride); npyv_s64 r_8 = npyv_negative_s64(v_8); npyv_storen_s64(op + 8 * vstep * ostride, ostride, r_8); #endif -#line 211 +#line 213 #if UNROLL > 9 npyv_s64 v_9 = npyv_loadn_s64(ip + 9 * vstep * istride, istride); npyv_s64 r_9 = npyv_negative_s64(v_9); npyv_storen_s64(op + 9 * vstep * ostride, ostride, r_9); #endif -#line 211 +#line 213 #if UNROLL > 10 npyv_s64 v_10 = npyv_loadn_s64(ip + 10 * vstep * istride, istride); npyv_s64 r_10 = npyv_negative_s64(v_10); npyv_storen_s64(op + 10 * vstep * ostride, ostride, r_10); #endif -#line 211 +#line 213 #if UNROLL > 11 npyv_s64 v_11 = npyv_loadn_s64(ip + 11 * vstep * istride, istride); npyv_s64 r_11 = npyv_negative_s64(v_11); npyv_storen_s64(op + 11 * vstep * ostride, ostride, r_11); #endif -#line 211 +#line 213 #if UNROLL > 12 npyv_s64 v_12 = npyv_loadn_s64(ip + 12 * vstep * istride, istride); npyv_s64 r_12 = npyv_negative_s64(v_12); npyv_storen_s64(op + 12 * vstep * ostride, ostride, r_12); #endif -#line 211 +#line 213 #if UNROLL > 13 npyv_s64 v_13 = npyv_loadn_s64(ip + 13 * vstep * istride, istride); npyv_s64 r_13 = npyv_negative_s64(v_13); npyv_storen_s64(op + 13 * vstep * ostride, ostride, r_13); #endif -#line 211 +#line 213 #if UNROLL > 14 npyv_s64 v_14 = npyv_loadn_s64(ip + 14 * vstep * istride, istride); npyv_s64 r_14 = npyv_negative_s64(v_14); npyv_storen_s64(op + 14 * vstep * ostride, ostride, r_14); #endif -#line 211 +#line 213 #if UNROLL > 15 npyv_s64 v_15 = npyv_loadn_s64(ip + 15 * vstep * istride, istride); npyv_s64 r_15 = npyv_negative_s64(v_15); @@ -4116,6 +4136,7 @@ simd_unary_nn_negative_s64(const npyv_lanetype_s64 *ip, npy_intp istride, *op = scalar_negative(*ip); } } +#endif // NPY_HAVE_SSE2 #endif // 1 #undef UNROLL #endif // NPY_SIMD @@ -4545,6 +4566,8 @@ simd_unary_nc_negative_u64(const npyv_lanetype_u64 *ip, npy_intp istride, #undef UNROLL #define UNROLL 2 #endif +// X86 does better with unrolled scalar for heavy non-contiguous +#ifndef NPY_HAVE_SSE2 static NPY_INLINE void simd_unary_nn_negative_u64(const npyv_lanetype_u64 *ip, npy_intp istride, npyv_lanetype_u64 *op, npy_intp ostride, @@ -4555,112 +4578,112 @@ simd_unary_nn_negative_u64(const npyv_lanetype_u64 *ip, npy_intp istride, // unrolled vector loop for (; len >= wstep; len -= wstep, ip += istride*wstep, op += ostride*wstep) { - #line 211 + #line 213 #if UNROLL > 0 npyv_u64 v_0 = npyv_loadn_u64(ip + 0 * vstep * istride, istride); npyv_u64 r_0 = npyv_negative_u64(v_0); npyv_storen_u64(op + 0 * vstep * ostride, ostride, r_0); #endif -#line 211 +#line 213 #if UNROLL > 1 npyv_u64 v_1 = npyv_loadn_u64(ip + 1 * vstep * istride, istride); npyv_u64 r_1 = npyv_negative_u64(v_1); npyv_storen_u64(op + 1 * vstep * ostride, ostride, r_1); #endif -#line 211 +#line 213 #if UNROLL > 2 npyv_u64 v_2 = npyv_loadn_u64(ip + 2 * vstep * istride, istride); npyv_u64 r_2 = npyv_negative_u64(v_2); npyv_storen_u64(op + 2 * vstep * ostride, ostride, r_2); #endif -#line 211 +#line 213 #if UNROLL > 3 npyv_u64 v_3 = npyv_loadn_u64(ip + 3 * vstep * istride, istride); npyv_u64 r_3 = npyv_negative_u64(v_3); npyv_storen_u64(op + 3 * vstep * ostride, ostride, r_3); #endif -#line 211 +#line 213 #if UNROLL > 4 npyv_u64 v_4 = npyv_loadn_u64(ip + 4 * vstep * istride, istride); npyv_u64 r_4 = npyv_negative_u64(v_4); npyv_storen_u64(op + 4 * vstep * ostride, ostride, r_4); #endif -#line 211 +#line 213 #if UNROLL > 5 npyv_u64 v_5 = npyv_loadn_u64(ip + 5 * vstep * istride, istride); npyv_u64 r_5 = npyv_negative_u64(v_5); npyv_storen_u64(op + 5 * vstep * ostride, ostride, r_5); #endif -#line 211 +#line 213 #if UNROLL > 6 npyv_u64 v_6 = npyv_loadn_u64(ip + 6 * vstep * istride, istride); npyv_u64 r_6 = npyv_negative_u64(v_6); npyv_storen_u64(op + 6 * vstep * ostride, ostride, r_6); #endif -#line 211 +#line 213 #if UNROLL > 7 npyv_u64 v_7 = npyv_loadn_u64(ip + 7 * vstep * istride, istride); npyv_u64 r_7 = npyv_negative_u64(v_7); npyv_storen_u64(op + 7 * vstep * ostride, ostride, r_7); #endif -#line 211 +#line 213 #if UNROLL > 8 npyv_u64 v_8 = npyv_loadn_u64(ip + 8 * vstep * istride, istride); npyv_u64 r_8 = npyv_negative_u64(v_8); npyv_storen_u64(op + 8 * vstep * ostride, ostride, r_8); #endif -#line 211 +#line 213 #if UNROLL > 9 npyv_u64 v_9 = npyv_loadn_u64(ip + 9 * vstep * istride, istride); npyv_u64 r_9 = npyv_negative_u64(v_9); npyv_storen_u64(op + 9 * vstep * ostride, ostride, r_9); #endif -#line 211 +#line 213 #if UNROLL > 10 npyv_u64 v_10 = npyv_loadn_u64(ip + 10 * vstep * istride, istride); npyv_u64 r_10 = npyv_negative_u64(v_10); npyv_storen_u64(op + 10 * vstep * ostride, ostride, r_10); #endif -#line 211 +#line 213 #if UNROLL > 11 npyv_u64 v_11 = npyv_loadn_u64(ip + 11 * vstep * istride, istride); npyv_u64 r_11 = npyv_negative_u64(v_11); npyv_storen_u64(op + 11 * vstep * ostride, ostride, r_11); #endif -#line 211 +#line 213 #if UNROLL > 12 npyv_u64 v_12 = npyv_loadn_u64(ip + 12 * vstep * istride, istride); npyv_u64 r_12 = npyv_negative_u64(v_12); npyv_storen_u64(op + 12 * vstep * ostride, ostride, r_12); #endif -#line 211 +#line 213 #if UNROLL > 13 npyv_u64 v_13 = npyv_loadn_u64(ip + 13 * vstep * istride, istride); npyv_u64 r_13 = npyv_negative_u64(v_13); npyv_storen_u64(op + 13 * vstep * ostride, ostride, r_13); #endif -#line 211 +#line 213 #if UNROLL > 14 npyv_u64 v_14 = npyv_loadn_u64(ip + 14 * vstep * istride, istride); npyv_u64 r_14 = npyv_negative_u64(v_14); npyv_storen_u64(op + 14 * vstep * ostride, ostride, r_14); #endif -#line 211 +#line 213 #if UNROLL > 15 npyv_u64 v_15 = npyv_loadn_u64(ip + 15 * vstep * istride, istride); npyv_u64 r_15 = npyv_negative_u64(v_15); @@ -4679,6 +4702,7 @@ simd_unary_nn_negative_u64(const npyv_lanetype_u64 *ip, npy_intp istride, *op = scalar_negative(*ip); } } +#endif // NPY_HAVE_SSE2 #endif // 1 #undef UNROLL #endif // NPY_SIMD @@ -5108,6 +5132,8 @@ simd_unary_nc_negative_f32(const npyv_lanetype_f32 *ip, npy_intp istride, #undef UNROLL #define UNROLL 2 #endif +// X86 does better with unrolled scalar for heavy non-contiguous +#ifndef NPY_HAVE_SSE2 static NPY_INLINE void simd_unary_nn_negative_f32(const npyv_lanetype_f32 *ip, npy_intp istride, npyv_lanetype_f32 *op, npy_intp ostride, @@ -5118,112 +5144,112 @@ simd_unary_nn_negative_f32(const npyv_lanetype_f32 *ip, npy_intp istride, // unrolled vector loop for (; len >= wstep; len -= wstep, ip += istride*wstep, op += ostride*wstep) { - #line 211 + #line 213 #if UNROLL > 0 npyv_f32 v_0 = npyv_loadn_f32(ip + 0 * vstep * istride, istride); npyv_f32 r_0 = npyv_negative_f32(v_0); npyv_storen_f32(op + 0 * vstep * ostride, ostride, r_0); #endif -#line 211 +#line 213 #if UNROLL > 1 npyv_f32 v_1 = npyv_loadn_f32(ip + 1 * vstep * istride, istride); npyv_f32 r_1 = npyv_negative_f32(v_1); npyv_storen_f32(op + 1 * vstep * ostride, ostride, r_1); #endif -#line 211 +#line 213 #if UNROLL > 2 npyv_f32 v_2 = npyv_loadn_f32(ip + 2 * vstep * istride, istride); npyv_f32 r_2 = npyv_negative_f32(v_2); npyv_storen_f32(op + 2 * vstep * ostride, ostride, r_2); #endif -#line 211 +#line 213 #if UNROLL > 3 npyv_f32 v_3 = npyv_loadn_f32(ip + 3 * vstep * istride, istride); npyv_f32 r_3 = npyv_negative_f32(v_3); npyv_storen_f32(op + 3 * vstep * ostride, ostride, r_3); #endif -#line 211 +#line 213 #if UNROLL > 4 npyv_f32 v_4 = npyv_loadn_f32(ip + 4 * vstep * istride, istride); npyv_f32 r_4 = npyv_negative_f32(v_4); npyv_storen_f32(op + 4 * vstep * ostride, ostride, r_4); #endif -#line 211 +#line 213 #if UNROLL > 5 npyv_f32 v_5 = npyv_loadn_f32(ip + 5 * vstep * istride, istride); npyv_f32 r_5 = npyv_negative_f32(v_5); npyv_storen_f32(op + 5 * vstep * ostride, ostride, r_5); #endif -#line 211 +#line 213 #if UNROLL > 6 npyv_f32 v_6 = npyv_loadn_f32(ip + 6 * vstep * istride, istride); npyv_f32 r_6 = npyv_negative_f32(v_6); npyv_storen_f32(op + 6 * vstep * ostride, ostride, r_6); #endif -#line 211 +#line 213 #if UNROLL > 7 npyv_f32 v_7 = npyv_loadn_f32(ip + 7 * vstep * istride, istride); npyv_f32 r_7 = npyv_negative_f32(v_7); npyv_storen_f32(op + 7 * vstep * ostride, ostride, r_7); #endif -#line 211 +#line 213 #if UNROLL > 8 npyv_f32 v_8 = npyv_loadn_f32(ip + 8 * vstep * istride, istride); npyv_f32 r_8 = npyv_negative_f32(v_8); npyv_storen_f32(op + 8 * vstep * ostride, ostride, r_8); #endif -#line 211 +#line 213 #if UNROLL > 9 npyv_f32 v_9 = npyv_loadn_f32(ip + 9 * vstep * istride, istride); npyv_f32 r_9 = npyv_negative_f32(v_9); npyv_storen_f32(op + 9 * vstep * ostride, ostride, r_9); #endif -#line 211 +#line 213 #if UNROLL > 10 npyv_f32 v_10 = npyv_loadn_f32(ip + 10 * vstep * istride, istride); npyv_f32 r_10 = npyv_negative_f32(v_10); npyv_storen_f32(op + 10 * vstep * ostride, ostride, r_10); #endif -#line 211 +#line 213 #if UNROLL > 11 npyv_f32 v_11 = npyv_loadn_f32(ip + 11 * vstep * istride, istride); npyv_f32 r_11 = npyv_negative_f32(v_11); npyv_storen_f32(op + 11 * vstep * ostride, ostride, r_11); #endif -#line 211 +#line 213 #if UNROLL > 12 npyv_f32 v_12 = npyv_loadn_f32(ip + 12 * vstep * istride, istride); npyv_f32 r_12 = npyv_negative_f32(v_12); npyv_storen_f32(op + 12 * vstep * ostride, ostride, r_12); #endif -#line 211 +#line 213 #if UNROLL > 13 npyv_f32 v_13 = npyv_loadn_f32(ip + 13 * vstep * istride, istride); npyv_f32 r_13 = npyv_negative_f32(v_13); npyv_storen_f32(op + 13 * vstep * ostride, ostride, r_13); #endif -#line 211 +#line 213 #if UNROLL > 14 npyv_f32 v_14 = npyv_loadn_f32(ip + 14 * vstep * istride, istride); npyv_f32 r_14 = npyv_negative_f32(v_14); npyv_storen_f32(op + 14 * vstep * ostride, ostride, r_14); #endif -#line 211 +#line 213 #if UNROLL > 15 npyv_f32 v_15 = npyv_loadn_f32(ip + 15 * vstep * istride, istride); npyv_f32 r_15 = npyv_negative_f32(v_15); @@ -5242,6 +5268,7 @@ simd_unary_nn_negative_f32(const npyv_lanetype_f32 *ip, npy_intp istride, *op = scalar_negative(*ip); } } +#endif // NPY_HAVE_SSE2 #endif // 1 #undef UNROLL #endif // NPY_SIMD_F32 @@ -5671,6 +5698,8 @@ simd_unary_nc_negative_f64(const npyv_lanetype_f64 *ip, npy_intp istride, #undef UNROLL #define UNROLL 2 #endif +// X86 does better with unrolled scalar for heavy non-contiguous +#ifndef NPY_HAVE_SSE2 static NPY_INLINE void simd_unary_nn_negative_f64(const npyv_lanetype_f64 *ip, npy_intp istride, npyv_lanetype_f64 *op, npy_intp ostride, @@ -5681,112 +5710,112 @@ simd_unary_nn_negative_f64(const npyv_lanetype_f64 *ip, npy_intp istride, // unrolled vector loop for (; len >= wstep; len -= wstep, ip += istride*wstep, op += ostride*wstep) { - #line 211 + #line 213 #if UNROLL > 0 npyv_f64 v_0 = npyv_loadn_f64(ip + 0 * vstep * istride, istride); npyv_f64 r_0 = npyv_negative_f64(v_0); npyv_storen_f64(op + 0 * vstep * ostride, ostride, r_0); #endif -#line 211 +#line 213 #if UNROLL > 1 npyv_f64 v_1 = npyv_loadn_f64(ip + 1 * vstep * istride, istride); npyv_f64 r_1 = npyv_negative_f64(v_1); npyv_storen_f64(op + 1 * vstep * ostride, ostride, r_1); #endif -#line 211 +#line 213 #if UNROLL > 2 npyv_f64 v_2 = npyv_loadn_f64(ip + 2 * vstep * istride, istride); npyv_f64 r_2 = npyv_negative_f64(v_2); npyv_storen_f64(op + 2 * vstep * ostride, ostride, r_2); #endif -#line 211 +#line 213 #if UNROLL > 3 npyv_f64 v_3 = npyv_loadn_f64(ip + 3 * vstep * istride, istride); npyv_f64 r_3 = npyv_negative_f64(v_3); npyv_storen_f64(op + 3 * vstep * ostride, ostride, r_3); #endif -#line 211 +#line 213 #if UNROLL > 4 npyv_f64 v_4 = npyv_loadn_f64(ip + 4 * vstep * istride, istride); npyv_f64 r_4 = npyv_negative_f64(v_4); npyv_storen_f64(op + 4 * vstep * ostride, ostride, r_4); #endif -#line 211 +#line 213 #if UNROLL > 5 npyv_f64 v_5 = npyv_loadn_f64(ip + 5 * vstep * istride, istride); npyv_f64 r_5 = npyv_negative_f64(v_5); npyv_storen_f64(op + 5 * vstep * ostride, ostride, r_5); #endif -#line 211 +#line 213 #if UNROLL > 6 npyv_f64 v_6 = npyv_loadn_f64(ip + 6 * vstep * istride, istride); npyv_f64 r_6 = npyv_negative_f64(v_6); npyv_storen_f64(op + 6 * vstep * ostride, ostride, r_6); #endif -#line 211 +#line 213 #if UNROLL > 7 npyv_f64 v_7 = npyv_loadn_f64(ip + 7 * vstep * istride, istride); npyv_f64 r_7 = npyv_negative_f64(v_7); npyv_storen_f64(op + 7 * vstep * ostride, ostride, r_7); #endif -#line 211 +#line 213 #if UNROLL > 8 npyv_f64 v_8 = npyv_loadn_f64(ip + 8 * vstep * istride, istride); npyv_f64 r_8 = npyv_negative_f64(v_8); npyv_storen_f64(op + 8 * vstep * ostride, ostride, r_8); #endif -#line 211 +#line 213 #if UNROLL > 9 npyv_f64 v_9 = npyv_loadn_f64(ip + 9 * vstep * istride, istride); npyv_f64 r_9 = npyv_negative_f64(v_9); npyv_storen_f64(op + 9 * vstep * ostride, ostride, r_9); #endif -#line 211 +#line 213 #if UNROLL > 10 npyv_f64 v_10 = npyv_loadn_f64(ip + 10 * vstep * istride, istride); npyv_f64 r_10 = npyv_negative_f64(v_10); npyv_storen_f64(op + 10 * vstep * ostride, ostride, r_10); #endif -#line 211 +#line 213 #if UNROLL > 11 npyv_f64 v_11 = npyv_loadn_f64(ip + 11 * vstep * istride, istride); npyv_f64 r_11 = npyv_negative_f64(v_11); npyv_storen_f64(op + 11 * vstep * ostride, ostride, r_11); #endif -#line 211 +#line 213 #if UNROLL > 12 npyv_f64 v_12 = npyv_loadn_f64(ip + 12 * vstep * istride, istride); npyv_f64 r_12 = npyv_negative_f64(v_12); npyv_storen_f64(op + 12 * vstep * ostride, ostride, r_12); #endif -#line 211 +#line 213 #if UNROLL > 13 npyv_f64 v_13 = npyv_loadn_f64(ip + 13 * vstep * istride, istride); npyv_f64 r_13 = npyv_negative_f64(v_13); npyv_storen_f64(op + 13 * vstep * ostride, ostride, r_13); #endif -#line 211 +#line 213 #if UNROLL > 14 npyv_f64 v_14 = npyv_loadn_f64(ip + 14 * vstep * istride, istride); npyv_f64 r_14 = npyv_negative_f64(v_14); npyv_storen_f64(op + 14 * vstep * ostride, ostride, r_14); #endif -#line 211 +#line 213 #if UNROLL > 15 npyv_f64 v_15 = npyv_loadn_f64(ip + 15 * vstep * istride, istride); npyv_f64 r_15 = npyv_negative_f64(v_15); @@ -5805,6 +5834,7 @@ simd_unary_nn_negative_f64(const npyv_lanetype_f64 *ip, npy_intp istride, *op = scalar_negative(*ip); } } +#endif // NPY_HAVE_SSE2 #endif // 1 #undef UNROLL #endif // NPY_SIMD_F64 @@ -5814,10 +5844,10 @@ simd_unary_nn_negative_f64(const npyv_lanetype_f64 *ip, npy_intp istride, /******************************************************************************** ** Defining ufunc inner functions ********************************************************************************/ -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_BYTE == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -5833,7 +5863,7 @@ simd_unary_nn_negative_f64(const npyv_lanetype_f64 *ip, npy_intp istride, #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_BYTE == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -5849,7 +5879,7 @@ simd_unary_nn_negative_f64(const npyv_lanetype_f64 *ip, npy_intp istride, #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_BYTE == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -5865,7 +5895,7 @@ simd_unary_nn_negative_f64(const npyv_lanetype_f64 *ip, npy_intp istride, #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_BYTE == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -5883,7 +5913,7 @@ simd_unary_nn_negative_f64(const npyv_lanetype_f64 *ip, npy_intp istride, #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UBYTE_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -5921,8 +5951,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UBYTE_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -5945,97 +5975,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UBYTE_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_ubyte in_0 = *((const npy_ubyte *)(ip + 0 * istep)); *((npy_ubyte *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_ubyte in_1 = *((const npy_ubyte *)(ip + 1 * istep)); *((npy_ubyte *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_ubyte in_2 = *((const npy_ubyte *)(ip + 2 * istep)); *((npy_ubyte *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_ubyte in_3 = *((const npy_ubyte *)(ip + 3 * istep)); *((npy_ubyte *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_ubyte in_4 = *((const npy_ubyte *)(ip + 4 * istep)); *((npy_ubyte *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_ubyte in_5 = *((const npy_ubyte *)(ip + 5 * istep)); *((npy_ubyte *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_ubyte in_6 = *((const npy_ubyte *)(ip + 6 * istep)); *((npy_ubyte *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_ubyte in_7 = *((const npy_ubyte *)(ip + 7 * istep)); *((npy_ubyte *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_ubyte in_8 = *((const npy_ubyte *)(ip + 8 * istep)); *((npy_ubyte *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_ubyte in_9 = *((const npy_ubyte *)(ip + 9 * istep)); *((npy_ubyte *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_ubyte in_10 = *((const npy_ubyte *)(ip + 10 * istep)); *((npy_ubyte *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_ubyte in_11 = *((const npy_ubyte *)(ip + 11 * istep)); *((npy_ubyte *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_ubyte in_12 = *((const npy_ubyte *)(ip + 12 * istep)); *((npy_ubyte *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_ubyte in_13 = *((const npy_ubyte *)(ip + 13 * istep)); *((npy_ubyte *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_ubyte in_14 = *((const npy_ubyte *)(ip + 14 * istep)); *((npy_ubyte *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_ubyte in_15 = *((const npy_ubyte *)(ip + 15 * istep)); *((npy_ubyte *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -6055,10 +6085,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_SHORT == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -6074,7 +6104,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_SHORT == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -6090,7 +6120,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_SHORT == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -6106,7 +6136,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_SHORT == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -6124,7 +6154,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(USHORT_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -6162,8 +6192,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(USHORT_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -6186,97 +6216,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(USHORT_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_ushort in_0 = *((const npy_ushort *)(ip + 0 * istep)); *((npy_ushort *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_ushort in_1 = *((const npy_ushort *)(ip + 1 * istep)); *((npy_ushort *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_ushort in_2 = *((const npy_ushort *)(ip + 2 * istep)); *((npy_ushort *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_ushort in_3 = *((const npy_ushort *)(ip + 3 * istep)); *((npy_ushort *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_ushort in_4 = *((const npy_ushort *)(ip + 4 * istep)); *((npy_ushort *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_ushort in_5 = *((const npy_ushort *)(ip + 5 * istep)); *((npy_ushort *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_ushort in_6 = *((const npy_ushort *)(ip + 6 * istep)); *((npy_ushort *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_ushort in_7 = *((const npy_ushort *)(ip + 7 * istep)); *((npy_ushort *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_ushort in_8 = *((const npy_ushort *)(ip + 8 * istep)); *((npy_ushort *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_ushort in_9 = *((const npy_ushort *)(ip + 9 * istep)); *((npy_ushort *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_ushort in_10 = *((const npy_ushort *)(ip + 10 * istep)); *((npy_ushort *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_ushort in_11 = *((const npy_ushort *)(ip + 11 * istep)); *((npy_ushort *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_ushort in_12 = *((const npy_ushort *)(ip + 12 * istep)); *((npy_ushort *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_ushort in_13 = *((const npy_ushort *)(ip + 13 * istep)); *((npy_ushort *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_ushort in_14 = *((const npy_ushort *)(ip + 14 * istep)); *((npy_ushort *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_ushort in_15 = *((const npy_ushort *)(ip + 15 * istep)); *((npy_ushort *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -6296,10 +6326,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_INT == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -6315,7 +6345,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_INT == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -6331,7 +6361,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_INT == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -6347,7 +6377,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_INT == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -6365,7 +6395,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UINT_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -6403,8 +6433,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UINT_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -6427,97 +6457,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(UINT_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_uint in_0 = *((const npy_uint *)(ip + 0 * istep)); *((npy_uint *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_uint in_1 = *((const npy_uint *)(ip + 1 * istep)); *((npy_uint *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_uint in_2 = *((const npy_uint *)(ip + 2 * istep)); *((npy_uint *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_uint in_3 = *((const npy_uint *)(ip + 3 * istep)); *((npy_uint *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_uint in_4 = *((const npy_uint *)(ip + 4 * istep)); *((npy_uint *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_uint in_5 = *((const npy_uint *)(ip + 5 * istep)); *((npy_uint *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_uint in_6 = *((const npy_uint *)(ip + 6 * istep)); *((npy_uint *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_uint in_7 = *((const npy_uint *)(ip + 7 * istep)); *((npy_uint *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_uint in_8 = *((const npy_uint *)(ip + 8 * istep)); *((npy_uint *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_uint in_9 = *((const npy_uint *)(ip + 9 * istep)); *((npy_uint *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_uint in_10 = *((const npy_uint *)(ip + 10 * istep)); *((npy_uint *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_uint in_11 = *((const npy_uint *)(ip + 11 * istep)); *((npy_uint *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_uint in_12 = *((const npy_uint *)(ip + 12 * istep)); *((npy_uint *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_uint in_13 = *((const npy_uint *)(ip + 13 * istep)); *((npy_uint *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_uint in_14 = *((const npy_uint *)(ip + 14 * istep)); *((npy_uint *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_uint in_15 = *((const npy_uint *)(ip + 15 * istep)); *((npy_uint *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -6537,10 +6567,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONG == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -6556,7 +6586,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONG == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -6572,7 +6602,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONG == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -6588,7 +6618,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONG == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -6606,7 +6636,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONG_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -6644,8 +6674,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONG_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -6668,97 +6698,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONG_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_ulong in_0 = *((const npy_ulong *)(ip + 0 * istep)); *((npy_ulong *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_ulong in_1 = *((const npy_ulong *)(ip + 1 * istep)); *((npy_ulong *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_ulong in_2 = *((const npy_ulong *)(ip + 2 * istep)); *((npy_ulong *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_ulong in_3 = *((const npy_ulong *)(ip + 3 * istep)); *((npy_ulong *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_ulong in_4 = *((const npy_ulong *)(ip + 4 * istep)); *((npy_ulong *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_ulong in_5 = *((const npy_ulong *)(ip + 5 * istep)); *((npy_ulong *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_ulong in_6 = *((const npy_ulong *)(ip + 6 * istep)); *((npy_ulong *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_ulong in_7 = *((const npy_ulong *)(ip + 7 * istep)); *((npy_ulong *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_ulong in_8 = *((const npy_ulong *)(ip + 8 * istep)); *((npy_ulong *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_ulong in_9 = *((const npy_ulong *)(ip + 9 * istep)); *((npy_ulong *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_ulong in_10 = *((const npy_ulong *)(ip + 10 * istep)); *((npy_ulong *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_ulong in_11 = *((const npy_ulong *)(ip + 11 * istep)); *((npy_ulong *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_ulong in_12 = *((const npy_ulong *)(ip + 12 * istep)); *((npy_ulong *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_ulong in_13 = *((const npy_ulong *)(ip + 13 * istep)); *((npy_ulong *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_ulong in_14 = *((const npy_ulong *)(ip + 14 * istep)); *((npy_ulong *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_ulong in_15 = *((const npy_ulong *)(ip + 15 * istep)); *((npy_ulong *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -6778,10 +6808,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -6797,7 +6827,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -6813,7 +6843,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -6829,7 +6859,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -6847,7 +6877,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONGLONG_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -6885,8 +6915,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONGLONG_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -6909,97 +6939,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(ULONGLONG_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_ulonglong in_0 = *((const npy_ulonglong *)(ip + 0 * istep)); *((npy_ulonglong *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_ulonglong in_1 = *((const npy_ulonglong *)(ip + 1 * istep)); *((npy_ulonglong *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_ulonglong in_2 = *((const npy_ulonglong *)(ip + 2 * istep)); *((npy_ulonglong *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_ulonglong in_3 = *((const npy_ulonglong *)(ip + 3 * istep)); *((npy_ulonglong *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_ulonglong in_4 = *((const npy_ulonglong *)(ip + 4 * istep)); *((npy_ulonglong *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_ulonglong in_5 = *((const npy_ulonglong *)(ip + 5 * istep)); *((npy_ulonglong *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_ulonglong in_6 = *((const npy_ulonglong *)(ip + 6 * istep)); *((npy_ulonglong *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_ulonglong in_7 = *((const npy_ulonglong *)(ip + 7 * istep)); *((npy_ulonglong *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_ulonglong in_8 = *((const npy_ulonglong *)(ip + 8 * istep)); *((npy_ulonglong *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_ulonglong in_9 = *((const npy_ulonglong *)(ip + 9 * istep)); *((npy_ulonglong *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_ulonglong in_10 = *((const npy_ulonglong *)(ip + 10 * istep)); *((npy_ulonglong *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_ulonglong in_11 = *((const npy_ulonglong *)(ip + 11 * istep)); *((npy_ulonglong *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_ulonglong in_12 = *((const npy_ulonglong *)(ip + 12 * istep)); *((npy_ulonglong *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_ulonglong in_13 = *((const npy_ulonglong *)(ip + 13 * istep)); *((npy_ulonglong *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_ulonglong in_14 = *((const npy_ulonglong *)(ip + 14 * istep)); *((npy_ulonglong *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_ulonglong in_15 = *((const npy_ulonglong *)(ip + 15 * istep)); *((npy_ulonglong *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -7019,10 +7049,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_BYTE == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -7038,7 +7068,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_BYTE == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -7054,7 +7084,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_BYTE == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -7070,7 +7100,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_BYTE == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -7088,7 +7118,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(BYTE_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -7126,8 +7156,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(BYTE_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -7150,97 +7180,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(BYTE_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_byte in_0 = *((const npy_byte *)(ip + 0 * istep)); *((npy_byte *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_byte in_1 = *((const npy_byte *)(ip + 1 * istep)); *((npy_byte *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_byte in_2 = *((const npy_byte *)(ip + 2 * istep)); *((npy_byte *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_byte in_3 = *((const npy_byte *)(ip + 3 * istep)); *((npy_byte *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_byte in_4 = *((const npy_byte *)(ip + 4 * istep)); *((npy_byte *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_byte in_5 = *((const npy_byte *)(ip + 5 * istep)); *((npy_byte *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_byte in_6 = *((const npy_byte *)(ip + 6 * istep)); *((npy_byte *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_byte in_7 = *((const npy_byte *)(ip + 7 * istep)); *((npy_byte *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_byte in_8 = *((const npy_byte *)(ip + 8 * istep)); *((npy_byte *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_byte in_9 = *((const npy_byte *)(ip + 9 * istep)); *((npy_byte *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_byte in_10 = *((const npy_byte *)(ip + 10 * istep)); *((npy_byte *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_byte in_11 = *((const npy_byte *)(ip + 11 * istep)); *((npy_byte *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_byte in_12 = *((const npy_byte *)(ip + 12 * istep)); *((npy_byte *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_byte in_13 = *((const npy_byte *)(ip + 13 * istep)); *((npy_byte *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_byte in_14 = *((const npy_byte *)(ip + 14 * istep)); *((npy_byte *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_byte in_15 = *((const npy_byte *)(ip + 15 * istep)); *((npy_byte *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -7260,10 +7290,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_SHORT == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -7279,7 +7309,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_SHORT == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -7295,7 +7325,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_SHORT == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -7311,7 +7341,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_SHORT == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -7329,7 +7359,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(SHORT_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -7367,8 +7397,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(SHORT_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -7391,97 +7421,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(SHORT_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_short in_0 = *((const npy_short *)(ip + 0 * istep)); *((npy_short *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_short in_1 = *((const npy_short *)(ip + 1 * istep)); *((npy_short *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_short in_2 = *((const npy_short *)(ip + 2 * istep)); *((npy_short *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_short in_3 = *((const npy_short *)(ip + 3 * istep)); *((npy_short *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_short in_4 = *((const npy_short *)(ip + 4 * istep)); *((npy_short *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_short in_5 = *((const npy_short *)(ip + 5 * istep)); *((npy_short *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_short in_6 = *((const npy_short *)(ip + 6 * istep)); *((npy_short *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_short in_7 = *((const npy_short *)(ip + 7 * istep)); *((npy_short *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_short in_8 = *((const npy_short *)(ip + 8 * istep)); *((npy_short *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_short in_9 = *((const npy_short *)(ip + 9 * istep)); *((npy_short *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_short in_10 = *((const npy_short *)(ip + 10 * istep)); *((npy_short *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_short in_11 = *((const npy_short *)(ip + 11 * istep)); *((npy_short *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_short in_12 = *((const npy_short *)(ip + 12 * istep)); *((npy_short *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_short in_13 = *((const npy_short *)(ip + 13 * istep)); *((npy_short *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_short in_14 = *((const npy_short *)(ip + 14 * istep)); *((npy_short *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_short in_15 = *((const npy_short *)(ip + 15 * istep)); *((npy_short *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -7501,10 +7531,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_INT == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -7520,7 +7550,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_INT == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -7536,7 +7566,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_INT == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -7552,7 +7582,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_INT == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -7570,7 +7600,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(INT_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -7608,8 +7638,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(INT_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -7632,97 +7662,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(INT_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_int in_0 = *((const npy_int *)(ip + 0 * istep)); *((npy_int *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_int in_1 = *((const npy_int *)(ip + 1 * istep)); *((npy_int *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_int in_2 = *((const npy_int *)(ip + 2 * istep)); *((npy_int *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_int in_3 = *((const npy_int *)(ip + 3 * istep)); *((npy_int *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_int in_4 = *((const npy_int *)(ip + 4 * istep)); *((npy_int *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_int in_5 = *((const npy_int *)(ip + 5 * istep)); *((npy_int *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_int in_6 = *((const npy_int *)(ip + 6 * istep)); *((npy_int *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_int in_7 = *((const npy_int *)(ip + 7 * istep)); *((npy_int *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_int in_8 = *((const npy_int *)(ip + 8 * istep)); *((npy_int *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_int in_9 = *((const npy_int *)(ip + 9 * istep)); *((npy_int *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_int in_10 = *((const npy_int *)(ip + 10 * istep)); *((npy_int *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_int in_11 = *((const npy_int *)(ip + 11 * istep)); *((npy_int *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_int in_12 = *((const npy_int *)(ip + 12 * istep)); *((npy_int *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_int in_13 = *((const npy_int *)(ip + 13 * istep)); *((npy_int *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_int in_14 = *((const npy_int *)(ip + 14 * istep)); *((npy_int *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_int in_15 = *((const npy_int *)(ip + 15 * istep)); *((npy_int *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -7742,10 +7772,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONG == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -7761,7 +7791,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONG == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -7777,7 +7807,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONG == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -7793,7 +7823,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONG == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -7811,7 +7841,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONG_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -7849,8 +7879,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONG_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -7873,97 +7903,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONG_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_long in_0 = *((const npy_long *)(ip + 0 * istep)); *((npy_long *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_long in_1 = *((const npy_long *)(ip + 1 * istep)); *((npy_long *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_long in_2 = *((const npy_long *)(ip + 2 * istep)); *((npy_long *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_long in_3 = *((const npy_long *)(ip + 3 * istep)); *((npy_long *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_long in_4 = *((const npy_long *)(ip + 4 * istep)); *((npy_long *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_long in_5 = *((const npy_long *)(ip + 5 * istep)); *((npy_long *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_long in_6 = *((const npy_long *)(ip + 6 * istep)); *((npy_long *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_long in_7 = *((const npy_long *)(ip + 7 * istep)); *((npy_long *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_long in_8 = *((const npy_long *)(ip + 8 * istep)); *((npy_long *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_long in_9 = *((const npy_long *)(ip + 9 * istep)); *((npy_long *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_long in_10 = *((const npy_long *)(ip + 10 * istep)); *((npy_long *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_long in_11 = *((const npy_long *)(ip + 11 * istep)); *((npy_long *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_long in_12 = *((const npy_long *)(ip + 12 * istep)); *((npy_long *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_long in_13 = *((const npy_long *)(ip + 13 * istep)); *((npy_long *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_long in_14 = *((const npy_long *)(ip + 14 * istep)); *((npy_long *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_long in_15 = *((const npy_long *)(ip + 15 * istep)); *((npy_long *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -7983,10 +8013,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 8 #if 0 #define TO_SIMD_SFX(X) X##_f8 @@ -8002,7 +8032,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 16 #if 0 #define TO_SIMD_SFX(X) X##_f16 @@ -8018,7 +8048,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 32 #if 0 #define TO_SIMD_SFX(X) X##_f32 @@ -8034,7 +8064,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGLONG == 64 #if 0 #define TO_SIMD_SFX(X) X##_f64 @@ -8052,7 +8082,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGLONG_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -8090,8 +8120,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGLONG_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -8114,97 +8144,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGLONG_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_longlong in_0 = *((const npy_longlong *)(ip + 0 * istep)); *((npy_longlong *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_longlong in_1 = *((const npy_longlong *)(ip + 1 * istep)); *((npy_longlong *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_longlong in_2 = *((const npy_longlong *)(ip + 2 * istep)); *((npy_longlong *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_longlong in_3 = *((const npy_longlong *)(ip + 3 * istep)); *((npy_longlong *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_longlong in_4 = *((const npy_longlong *)(ip + 4 * istep)); *((npy_longlong *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_longlong in_5 = *((const npy_longlong *)(ip + 5 * istep)); *((npy_longlong *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_longlong in_6 = *((const npy_longlong *)(ip + 6 * istep)); *((npy_longlong *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_longlong in_7 = *((const npy_longlong *)(ip + 7 * istep)); *((npy_longlong *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_longlong in_8 = *((const npy_longlong *)(ip + 8 * istep)); *((npy_longlong *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_longlong in_9 = *((const npy_longlong *)(ip + 9 * istep)); *((npy_longlong *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_longlong in_10 = *((const npy_longlong *)(ip + 10 * istep)); *((npy_longlong *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_longlong in_11 = *((const npy_longlong *)(ip + 11 * istep)); *((npy_longlong *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_longlong in_12 = *((const npy_longlong *)(ip + 12 * istep)); *((npy_longlong *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_longlong in_13 = *((const npy_longlong *)(ip + 13 * istep)); *((npy_longlong *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_longlong in_14 = *((const npy_longlong *)(ip + 14 * istep)); *((npy_longlong *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_longlong in_15 = *((const npy_longlong *)(ip + 15 * istep)); *((npy_longlong *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -8224,10 +8254,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_FLOAT == 8 #if 1 #define TO_SIMD_SFX(X) X##_f8 @@ -8243,7 +8273,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_FLOAT == 16 #if 1 #define TO_SIMD_SFX(X) X##_f16 @@ -8259,7 +8289,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_FLOAT == 32 #if 1 #define TO_SIMD_SFX(X) X##_f32 @@ -8275,7 +8305,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_FLOAT == 64 #if 1 #define TO_SIMD_SFX(X) X##_f64 @@ -8293,7 +8323,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -8331,8 +8361,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -8355,97 +8385,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_float in_0 = *((const npy_float *)(ip + 0 * istep)); *((npy_float *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_float in_1 = *((const npy_float *)(ip + 1 * istep)); *((npy_float *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_float in_2 = *((const npy_float *)(ip + 2 * istep)); *((npy_float *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_float in_3 = *((const npy_float *)(ip + 3 * istep)); *((npy_float *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_float in_4 = *((const npy_float *)(ip + 4 * istep)); *((npy_float *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_float in_5 = *((const npy_float *)(ip + 5 * istep)); *((npy_float *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_float in_6 = *((const npy_float *)(ip + 6 * istep)); *((npy_float *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_float in_7 = *((const npy_float *)(ip + 7 * istep)); *((npy_float *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_float in_8 = *((const npy_float *)(ip + 8 * istep)); *((npy_float *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_float in_9 = *((const npy_float *)(ip + 9 * istep)); *((npy_float *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_float in_10 = *((const npy_float *)(ip + 10 * istep)); *((npy_float *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_float in_11 = *((const npy_float *)(ip + 11 * istep)); *((npy_float *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_float in_12 = *((const npy_float *)(ip + 12 * istep)); *((npy_float *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_float in_13 = *((const npy_float *)(ip + 13 * istep)); *((npy_float *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_float in_14 = *((const npy_float *)(ip + 14 * istep)); *((npy_float *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_float in_15 = *((const npy_float *)(ip + 15 * istep)); *((npy_float *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -8465,10 +8495,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_DOUBLE == 8 #if 1 #define TO_SIMD_SFX(X) X##_f8 @@ -8484,7 +8514,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_DOUBLE == 16 #if 1 #define TO_SIMD_SFX(X) X##_f16 @@ -8500,7 +8530,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_DOUBLE == 32 #if 1 #define TO_SIMD_SFX(X) X##_f32 @@ -8516,7 +8546,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_DOUBLE == 64 #if 1 #define TO_SIMD_SFX(X) X##_f64 @@ -8534,7 +8564,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -8572,8 +8602,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -8596,97 +8626,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_double in_0 = *((const npy_double *)(ip + 0 * istep)); *((npy_double *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_double in_1 = *((const npy_double *)(ip + 1 * istep)); *((npy_double *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_double in_2 = *((const npy_double *)(ip + 2 * istep)); *((npy_double *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_double in_3 = *((const npy_double *)(ip + 3 * istep)); *((npy_double *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_double in_4 = *((const npy_double *)(ip + 4 * istep)); *((npy_double *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_double in_5 = *((const npy_double *)(ip + 5 * istep)); *((npy_double *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_double in_6 = *((const npy_double *)(ip + 6 * istep)); *((npy_double *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_double in_7 = *((const npy_double *)(ip + 7 * istep)); *((npy_double *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_double in_8 = *((const npy_double *)(ip + 8 * istep)); *((npy_double *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_double in_9 = *((const npy_double *)(ip + 9 * istep)); *((npy_double *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_double in_10 = *((const npy_double *)(ip + 10 * istep)); *((npy_double *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_double in_11 = *((const npy_double *)(ip + 11 * istep)); *((npy_double *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_double in_12 = *((const npy_double *)(ip + 12 * istep)); *((npy_double *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_double in_13 = *((const npy_double *)(ip + 13 * istep)); *((npy_double *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_double in_14 = *((const npy_double *)(ip + 14 * istep)); *((npy_double *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_double in_15 = *((const npy_double *)(ip + 15 * istep)); *((npy_double *)(op + 15 * ostep)) = scalar_negative(in_15); @@ -8706,10 +8736,10 @@ clear: #endif } -#line 254 +#line 257 #undef TO_SIMD_SFX #if 0 -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGDOUBLE == 8 #if 1 #define TO_SIMD_SFX(X) X##_f8 @@ -8725,7 +8755,7 @@ clear: #define TO_SIMD_SFX(X) X##_s8 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGDOUBLE == 16 #if 1 #define TO_SIMD_SFX(X) X##_f16 @@ -8741,7 +8771,7 @@ clear: #define TO_SIMD_SFX(X) X##_s16 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGDOUBLE == 32 #if 1 #define TO_SIMD_SFX(X) X##_f32 @@ -8757,7 +8787,7 @@ clear: #define TO_SIMD_SFX(X) X##_s32 #endif -#line 259 +#line 262 #elif NPY_SIMD && NPY_BITSOF_LONGDOUBLE == 64 #if 1 #define TO_SIMD_SFX(X) X##_f64 @@ -8775,7 +8805,7 @@ clear: #endif -#line 280 +#line 283 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGDOUBLE_negative) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -8813,8 +8843,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGDOUBLE_negative) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_negative)( @@ -8837,97 +8867,97 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(LONGDOUBLE_negative) */ #define UNROLL 8 for (; len >= UNROLL; len -= UNROLL, ip += istep*UNROLL, op += ostep*UNROLL) { - #line 344 + #line 347 #if UNROLL > 0 const npy_longdouble in_0 = *((const npy_longdouble *)(ip + 0 * istep)); *((npy_longdouble *)(op + 0 * ostep)) = scalar_negative(in_0); #endif -#line 344 +#line 347 #if UNROLL > 1 const npy_longdouble in_1 = *((const npy_longdouble *)(ip + 1 * istep)); *((npy_longdouble *)(op + 1 * ostep)) = scalar_negative(in_1); #endif -#line 344 +#line 347 #if UNROLL > 2 const npy_longdouble in_2 = *((const npy_longdouble *)(ip + 2 * istep)); *((npy_longdouble *)(op + 2 * ostep)) = scalar_negative(in_2); #endif -#line 344 +#line 347 #if UNROLL > 3 const npy_longdouble in_3 = *((const npy_longdouble *)(ip + 3 * istep)); *((npy_longdouble *)(op + 3 * ostep)) = scalar_negative(in_3); #endif -#line 344 +#line 347 #if UNROLL > 4 const npy_longdouble in_4 = *((const npy_longdouble *)(ip + 4 * istep)); *((npy_longdouble *)(op + 4 * ostep)) = scalar_negative(in_4); #endif -#line 344 +#line 347 #if UNROLL > 5 const npy_longdouble in_5 = *((const npy_longdouble *)(ip + 5 * istep)); *((npy_longdouble *)(op + 5 * ostep)) = scalar_negative(in_5); #endif -#line 344 +#line 347 #if UNROLL > 6 const npy_longdouble in_6 = *((const npy_longdouble *)(ip + 6 * istep)); *((npy_longdouble *)(op + 6 * ostep)) = scalar_negative(in_6); #endif -#line 344 +#line 347 #if UNROLL > 7 const npy_longdouble in_7 = *((const npy_longdouble *)(ip + 7 * istep)); *((npy_longdouble *)(op + 7 * ostep)) = scalar_negative(in_7); #endif -#line 344 +#line 347 #if UNROLL > 8 const npy_longdouble in_8 = *((const npy_longdouble *)(ip + 8 * istep)); *((npy_longdouble *)(op + 8 * ostep)) = scalar_negative(in_8); #endif -#line 344 +#line 347 #if UNROLL > 9 const npy_longdouble in_9 = *((const npy_longdouble *)(ip + 9 * istep)); *((npy_longdouble *)(op + 9 * ostep)) = scalar_negative(in_9); #endif -#line 344 +#line 347 #if UNROLL > 10 const npy_longdouble in_10 = *((const npy_longdouble *)(ip + 10 * istep)); *((npy_longdouble *)(op + 10 * ostep)) = scalar_negative(in_10); #endif -#line 344 +#line 347 #if UNROLL > 11 const npy_longdouble in_11 = *((const npy_longdouble *)(ip + 11 * istep)); *((npy_longdouble *)(op + 11 * ostep)) = scalar_negative(in_11); #endif -#line 344 +#line 347 #if UNROLL > 12 const npy_longdouble in_12 = *((const npy_longdouble *)(ip + 12 * istep)); *((npy_longdouble *)(op + 12 * ostep)) = scalar_negative(in_12); #endif -#line 344 +#line 347 #if UNROLL > 13 const npy_longdouble in_13 = *((const npy_longdouble *)(ip + 13 * istep)); *((npy_longdouble *)(op + 13 * ostep)) = scalar_negative(in_13); #endif -#line 344 +#line 347 #if UNROLL > 14 const npy_longdouble in_14 = *((const npy_longdouble *)(ip + 14 * istep)); *((npy_longdouble *)(op + 14 * ostep)) = scalar_negative(in_14); #endif -#line 344 +#line 347 #if UNROLL > 15 const npy_longdouble in_15 = *((const npy_longdouble *)(ip + 15 * istep)); *((npy_longdouble *)(op + 15 * ostep)) = scalar_negative(in_15); diff --git a/contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.c.src b/contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.c.src index 1e2a81d20b..bfe4d892d0 100644 --- a/contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.c.src +++ b/contrib/python/numpy/py3/numpy/core/src/umath/loops_unary.dispatch.c.src @@ -195,6 +195,8 @@ simd_unary_nc_@intrin@_@sfx@(const npyv_lanetype_@sfx@ *ip, npy_intp istride, #undef UNROLL #define UNROLL 2 #endif +// X86 does better with unrolled scalar for heavy non-contiguous +#ifndef NPY_HAVE_SSE2 static NPY_INLINE void simd_unary_nn_@intrin@_@sfx@(const npyv_lanetype_@sfx@ *ip, npy_intp istride, npyv_lanetype_@sfx@ *op, npy_intp ostride, @@ -226,6 +228,7 @@ simd_unary_nn_@intrin@_@sfx@(const npyv_lanetype_@sfx@ *ip, npy_intp istride, *op = scalar_@intrin@(*ip); } } +#endif // NPY_HAVE_SSE2 #endif // @supports_ncontig@ #undef UNROLL #endif // @simd_chk@ @@ -314,8 +317,8 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@) ); goto clear; } - // SSE2 does better with unrolled scalar for heavy non-contiguous - #if !defined(NPY_HAVE_SSE2) + // X86 does better with unrolled scalar for heavy non-contiguous + #ifndef NPY_HAVE_SSE2 else if (istride != 1 && ostride != 1) { // non-contiguous input and output TO_SIMD_SFX(simd_unary_nn_@intrin@)( diff --git a/contrib/python/numpy/py3/numpy/core/tests/test_numeric.py b/contrib/python/numpy/py3/numpy/core/tests/test_numeric.py index 1bbdde1317..a88189e03e 100644 --- a/contrib/python/numpy/py3/numpy/core/tests/test_numeric.py +++ b/contrib/python/numpy/py3/numpy/core/tests/test_numeric.py @@ -477,7 +477,14 @@ class TestBoolCmp: self.signd[self.ed] *= -1. self.signf[1::6][self.ef[1::6]] = -np.inf self.signd[1::6][self.ed[1::6]] = -np.inf - self.signf[3::6][self.ef[3::6]] = -np.nan + # On RISC-V, many operations that produce NaNs, such as converting + # a -NaN from f64 to f32, return a canonical NaN. The canonical + # NaNs are always positive. See section 11.3 NaN Generation and + # Propagation of the RISC-V Unprivileged ISA for more details. + # We disable the float32 sign test on riscv64 for -np.nan as the sign + # of the NaN will be lost when it's converted to a float32. + if platform.processor() != 'riscv64': + self.signf[3::6][self.ef[3::6]] = -np.nan self.signd[3::6][self.ed[3::6]] = -np.nan self.signf[4::6][self.ef[4::6]] = -0. self.signd[4::6][self.ed[4::6]] = -0. diff --git a/contrib/python/numpy/py3/numpy/f2py/crackfortran.py b/contrib/python/numpy/py3/numpy/f2py/crackfortran.py index 8d3fc27608..8d3fc27608 100755..100644 --- a/contrib/python/numpy/py3/numpy/f2py/crackfortran.py +++ b/contrib/python/numpy/py3/numpy/f2py/crackfortran.py diff --git a/contrib/python/numpy/py3/numpy/f2py/f2py2e.py b/contrib/python/numpy/py3/numpy/f2py/f2py2e.py index ce22b2d8a9..ce22b2d8a9 100755..100644 --- a/contrib/python/numpy/py3/numpy/f2py/f2py2e.py +++ b/contrib/python/numpy/py3/numpy/f2py/f2py2e.py diff --git a/contrib/python/numpy/py3/numpy/f2py/rules.py b/contrib/python/numpy/py3/numpy/f2py/rules.py index 009365e047..009365e047 100755..100644 --- a/contrib/python/numpy/py3/numpy/f2py/rules.py +++ b/contrib/python/numpy/py3/numpy/f2py/rules.py diff --git a/contrib/python/numpy/py3/numpy/f2py/tests/util.py b/contrib/python/numpy/py3/numpy/f2py/tests/util.py index 75b257cdb8..6ed6c0855f 100644 --- a/contrib/python/numpy/py3/numpy/f2py/tests/util.py +++ b/contrib/python/numpy/py3/numpy/f2py/tests/util.py @@ -20,6 +20,7 @@ import contextlib import numpy from pathlib import Path +from numpy.compat import asstr from numpy._utils import asunicode from numpy.testing import temppath, IS_WASM from importlib import import_module diff --git a/contrib/python/numpy/py3/numpy/lib/function_base.py b/contrib/python/numpy/py3/numpy/lib/function_base.py index e75aca1e58..a3dab04d33 100644 --- a/contrib/python/numpy/py3/numpy/lib/function_base.py +++ b/contrib/python/numpy/py3/numpy/lib/function_base.py @@ -4655,7 +4655,8 @@ def _lerp(a, b, t, out=None): diff_b_a = subtract(b, a) # asanyarray is a stop-gap until gh-13105 lerp_interpolation = asanyarray(add(a, diff_b_a * t, out=out)) - subtract(b, diff_b_a * (1 - t), out=lerp_interpolation, where=t >= 0.5) + subtract(b, diff_b_a * (1 - t), out=lerp_interpolation, where=t >= 0.5, + casting='unsafe', dtype=type(lerp_interpolation.dtype)) if lerp_interpolation.ndim == 0 and out is None: lerp_interpolation = lerp_interpolation[()] # unpack 0d arrays return lerp_interpolation diff --git a/contrib/python/numpy/py3/numpy/lib/tests/test_function_base.py b/contrib/python/numpy/py3/numpy/lib/tests/test_function_base.py index 11e44630e7..2bb73b6003 100644 --- a/contrib/python/numpy/py3/numpy/lib/tests/test_function_base.py +++ b/contrib/python/numpy/py3/numpy/lib/tests/test_function_base.py @@ -3606,6 +3606,10 @@ class TestQuantile: assert_equal(q, Fraction(7, 2)) assert_equal(type(q), Fraction) + q = np.quantile(x, .5) + assert_equal(q, 1.75) + assert_equal(type(q), np.float64) + q = np.quantile(x, Fraction(1, 2)) assert_equal(q, Fraction(7, 4)) assert_equal(type(q), Fraction) diff --git a/contrib/python/numpy/py3/numpy/linalg/umath_linalg.cpp b/contrib/python/numpy/py3/numpy/linalg/umath_linalg.cpp index 0c0b35e9c0..3b5effe14a 100644 --- a/contrib/python/numpy/py3/numpy/linalg/umath_linalg.cpp +++ b/contrib/python/numpy/py3/numpy/linalg/umath_linalg.cpp @@ -2259,7 +2259,7 @@ process_geev_results(GEEV_PARAMS_t<typ> *params, scalar_trait) } } - +#if 0 static inline fortran_int call_geev(GEEV_PARAMS_t<fortran_complex>* params) { @@ -2275,6 +2275,8 @@ call_geev(GEEV_PARAMS_t<fortran_complex>* params) &rv); return rv; } +#endif + static inline fortran_int call_geev(GEEV_PARAMS_t<fortran_doublecomplex>* params) { diff --git a/contrib/python/numpy/py3/numpy/testing/print_coercion_tables.py b/contrib/python/numpy/py3/numpy/testing/print_coercion_tables.py index c1d4cdff8f..c1d4cdff8f 100755..100644 --- a/contrib/python/numpy/py3/numpy/testing/print_coercion_tables.py +++ b/contrib/python/numpy/py3/numpy/testing/print_coercion_tables.py diff --git a/contrib/python/numpy/py3/numpy/testing/setup.py b/contrib/python/numpy/py3/numpy/testing/setup.py index 6f203e8727..6f203e8727 100755..100644 --- a/contrib/python/numpy/py3/numpy/testing/setup.py +++ b/contrib/python/numpy/py3/numpy/testing/setup.py diff --git a/contrib/python/numpy/py3/numpy/tests/test_warnings.py b/contrib/python/numpy/py3/numpy/tests/test_warnings.py index ee5124c5d5..df90fcef8c 100644 --- a/contrib/python/numpy/py3/numpy/tests/test_warnings.py +++ b/contrib/python/numpy/py3/numpy/tests/test_warnings.py @@ -5,7 +5,6 @@ all of these occurrences but should catch almost all. import pytest from pathlib import Path -import sys import ast import tokenize import numpy @@ -33,7 +32,7 @@ class FindFuncs(ast.NodeVisitor): ast.NodeVisitor.generic_visit(self, node) if p.ls[-1] == 'simplefilter' or p.ls[-1] == 'filterwarnings': - if node.args[0].s == "ignore": + if node.args[0].value == "ignore": raise AssertionError( "warnings should have an appropriate stacklevel; found in " "{} on line {}".format(self.__filename, node.lineno)) @@ -57,8 +56,6 @@ class FindFuncs(ast.NodeVisitor): @pytest.mark.slow -@pytest.mark.skipif(sys.version_info >= (3, 12), - reason="Deprecation warning in ast") def test_warning_calls(): # combined "ignore" and stacklevel error base = Path(numpy.__file__).parent diff --git a/contrib/python/numpy/py3/numpy/typing/tests/test_typing.py b/contrib/python/numpy/py3/numpy/typing/tests/test_typing.py index 68c6f5d03f..6f778e5515 100644 --- a/contrib/python/numpy/py3/numpy/typing/tests/test_typing.py +++ b/contrib/python/numpy/py3/numpy/typing/tests/test_typing.py @@ -86,8 +86,6 @@ def strip_func(match: re.Match[str]) -> str: return match.groups()[1] -@pytest.mark.slow -@pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") @pytest.fixture(scope="module", autouse=True) def run_mypy() -> None: """Clears the cache and run mypy before running any of the typing tests. diff --git a/contrib/python/numpy/py3/numpy/version.py b/contrib/python/numpy/py3/numpy/version.py index 692240a486..e96055ea6d 100644 --- a/contrib/python/numpy/py3/numpy/version.py +++ b/contrib/python/numpy/py3/numpy/version.py @@ -1,5 +1,5 @@ -version = "1.26.3" +version = "1.26.4" __version__ = version full_version = version diff --git a/contrib/python/numpy/py3/ya.make b/contrib/python/numpy/py3/ya.make index 92042220c3..0eb98bef02 100644 --- a/contrib/python/numpy/py3/ya.make +++ b/contrib/python/numpy/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() PROVIDES(numpy) -VERSION(1.26.3) +VERSION(1.26.4) LICENSE(BSD-3-Clause) diff --git a/yt/yt/client/api/rpc_proxy/client_base.cpp b/yt/yt/client/api/rpc_proxy/client_base.cpp index e3c1283542..3cc5cdf8ea 100644 --- a/yt/yt/client/api/rpc_proxy/client_base.cpp +++ b/yt/yt/client/api/rpc_proxy/client_base.cpp @@ -80,7 +80,6 @@ TApiServiceProxy TClientBase::CreateApiServiceProxy(NRpc::IChannelPtr channel) proxy.SetDefaultTimeout(config->RpcTimeout); proxy.SetDefaultRequestCodec(config->RequestCodec); proxy.SetDefaultResponseCodec(config->ResponseCodec); - proxy.SetDefaultEnableLegacyRpcCodecs(config->EnableLegacyRpcCodecs); NRpc::TStreamingParameters streamingParameters; streamingParameters.ReadTimeout = config->DefaultStreamingStallTimeout; diff --git a/yt/yt/client/api/rpc_proxy/config.cpp b/yt/yt/client/api/rpc_proxy/config.cpp index 5a278c00bb..ce9c2f9262 100644 --- a/yt/yt/client/api/rpc_proxy/config.cpp +++ b/yt/yt/client/api/rpc_proxy/config.cpp @@ -100,9 +100,6 @@ void TConnectionConfig::Register(TRegistrar registrar) .Default(NCompression::ECodec::None); registrar.Parameter("response_codec", &TThis::ResponseCodec) .Default(NCompression::ECodec::None); - // COMPAT(kiselyovp): legacy RPC codecs - registrar.Parameter("enable_legacy_rpc_codecs", &TThis::EnableLegacyRpcCodecs) - .Default(true); registrar.Parameter("enable_retries", &TThis::EnableRetries) .Default(false); diff --git a/yt/yt/client/api/rpc_proxy/config.h b/yt/yt/client/api/rpc_proxy/config.h index 4e6a49be06..70bc9b00a3 100644 --- a/yt/yt/client/api/rpc_proxy/config.h +++ b/yt/yt/client/api/rpc_proxy/config.h @@ -66,8 +66,6 @@ public: NCompression::ECodec RequestCodec; NCompression::ECodec ResponseCodec; - bool EnableLegacyRpcCodecs; - bool EnableRetries; NRpc::TRetryingChannelConfigPtr RetryingChannel; diff --git a/yt/yt/client/api/rpc_proxy/transaction_impl.cpp b/yt/yt/client/api/rpc_proxy/transaction_impl.cpp index 7089f413e1..5bacf131a0 100644 --- a/yt/yt/client/api/rpc_proxy/transaction_impl.cpp +++ b/yt/yt/client/api/rpc_proxy/transaction_impl.cpp @@ -62,7 +62,6 @@ TTransaction::TTransaction( Proxy_.SetDefaultTimeout(config->RpcTimeout); Proxy_.SetDefaultRequestCodec(config->RequestCodec); Proxy_.SetDefaultResponseCodec(config->ResponseCodec); - Proxy_.SetDefaultEnableLegacyRpcCodecs(config->EnableLegacyRpcCodecs); YT_LOG_DEBUG("%v (Type: %v, StartTimestamp: %v, Atomicity: %v, " "Durability: %v, Timeout: %v, PingAncestors: %v, PingPeriod: %v, Sticky: %v, StickyProxyAddress: %v)", diff --git a/yt/yt/client/cache/rpc.cpp b/yt/yt/client/cache/rpc.cpp index 0512c3403b..3ae8626969 100644 --- a/yt/yt/client/cache/rpc.cpp +++ b/yt/yt/client/cache/rpc.cpp @@ -72,9 +72,6 @@ NApi::NRpcProxy::TConnectionConfigPtr GetConnectionConfig(const TConfig& config) connectionConfig->ResponseCodec = GetCompressionCodecFromProto(config.GetResponseCodec()); connectionConfig->EnableRetries = config.GetEnableRetries(); - if (config.HasEnableLegacyRpcCodecs()) { - connectionConfig->EnableLegacyRpcCodecs = config.GetEnableLegacyRpcCodecs(); - } if (config.HasEnableSelectQueryTracingTag()) { connectionConfig->EnableSelectQueryTracingTag = config.GetEnableSelectQueryTracingTag(); } diff --git a/yt/yt/core/rpc/client-inl.h b/yt/yt/core/rpc/client-inl.h index 894f21ff68..955014042d 100644 --- a/yt/yt/core/rpc/client-inl.h +++ b/yt/yt/core/rpc/client-inl.h @@ -85,15 +85,9 @@ TSharedRefArray TTypedClientRequest<TRequestMessage, TResponse>::SerializeHeader { TSharedRefArrayBuilder builder(Attachments().size() + 1); - // COMPAT(kiselyovp): legacy RPC codecs - builder.Add(EnableLegacyRpcCodecs_ - ? SerializeProtoToRefWithEnvelope(*this, RequestCodec_, false) - : SerializeProtoToRefWithCompression(*this, RequestCodec_, false)); - - auto attachmentCodecId = EnableLegacyRpcCodecs_ - ? NCompression::ECodec::None - : RequestCodec_; - auto compressedAttachments = CompressAttachments(Attachments(), attachmentCodecId); + builder.Add(SerializeProtoToRefWithCompression(*this, RequestCodec_, false)); + + auto compressedAttachments = CompressAttachments(Attachments(), RequestCodec_); for (auto&& attachment : compressedAttachments) { builder.Add(std::move(attachment)); } @@ -132,7 +126,7 @@ bool TTypedClientResponse<TResponseMessage>::TryDeserializeBody(TRef data, std:: return codecId ? TryDeserializeProtoWithCompression(this, data, *codecId) - // COMPAT(kiselyovp): legacy RPC codecs + // COMPAT(danilalexeev): legacy RPC codecs : TryDeserializeProtoWithEnvelope(this, data); } @@ -149,7 +143,6 @@ TIntrusivePtr<T> TProxyBase::CreateRequest(const TMethodDescriptor& methodDescri request->SetAcknowledgementTimeout(DefaultAcknowledgementTimeout_); request->SetRequestCodec(DefaultRequestCodec_); request->SetResponseCodec(DefaultResponseCodec_); - request->SetEnableLegacyRpcCodecs(DefaultEnableLegacyRpcCodecs_); request->SetMultiplexingBand(methodDescriptor.MultiplexingBand); if (methodDescriptor.StreamingEnabled) { diff --git a/yt/yt/core/rpc/client.cpp b/yt/yt/core/rpc/client.cpp index 74d42fc7d4..d01ec795d7 100644 --- a/yt/yt/core/rpc/client.cpp +++ b/yt/yt/core/rpc/client.cpp @@ -443,11 +443,8 @@ void TClientRequest::PrepareHeader() return; } - // COMPAT(kiselyovp): legacy RPC codecs - if (!EnableLegacyRpcCodecs_) { - Header_.set_request_codec(ToProto<int>(RequestCodec_)); - Header_.set_response_codec(ToProto<int>(ResponseCodec_)); - } + Header_.set_request_codec(ToProto<int>(RequestCodec_)); + Header_.set_response_codec(ToProto<int>(ResponseCodec_)); if (StreamingEnabled_) { ToProto(Header_.mutable_server_attachments_streaming_parameters(), ServerAttachmentsStreamingParameters_); @@ -464,11 +461,6 @@ void TClientRequest::PrepareHeader() HeaderPrepared_.store(true); } -bool TClientRequest::IsLegacyRpcCodecsEnabled() -{ - return EnableLegacyRpcCodecs_; -} - TSharedRefArray TClientRequest::GetHeaderlessMessage() const { if (SerializedHeaderlessMessageSet_.load()) { @@ -608,7 +600,7 @@ void TClientResponse::Deserialize(TSharedRefArray responseMessage) THROW_ERROR_EXCEPTION(NRpc::EErrorCode::ProtocolError, "Error deserializing response header"); } - // COMPAT(kiselyovp): legacy RPC codecs + // COMPAT(danilalexeev): legacy RPC codecs std::optional<NCompression::ECodec> bodyCodecId; NCompression::ECodec attachmentCodecId; if (Header_.has_codec()) { diff --git a/yt/yt/core/rpc/client.h b/yt/yt/core/rpc/client.h index 5a04b9baca..fe9433282e 100644 --- a/yt/yt/core/rpc/client.h +++ b/yt/yt/core/rpc/client.h @@ -75,8 +75,6 @@ struct IClientRequest virtual TMutationId GetMutationId() const = 0; virtual void SetMutationId(TMutationId id) = 0; - virtual bool IsLegacyRpcCodecsEnabled() = 0; - virtual size_t GetHash() const = 0; // Extension methods. @@ -135,7 +133,6 @@ public: DEFINE_BYVAL_RW_PROPERTY(bool, ResponseHeavy); DEFINE_BYVAL_RW_PROPERTY(NCompression::ECodec, RequestCodec, NCompression::ECodec::None); DEFINE_BYVAL_RW_PROPERTY(NCompression::ECodec, ResponseCodec, NCompression::ECodec::None); - DEFINE_BYVAL_RW_PROPERTY(bool, EnableLegacyRpcCodecs, true); DEFINE_BYVAL_RW_PROPERTY(bool, GenerateAttachmentChecksums, true); // Field is used on client side only. So it is never serialized. DEFINE_BYREF_RW_PROPERTY(NTracing::TTraceContext::TTagList, TracingTags); @@ -186,8 +183,6 @@ public: size_t GetHash() const override; - bool IsLegacyRpcCodecsEnabled() override; - EMultiplexingBand GetMultiplexingBand() const; void SetMultiplexingBand(EMultiplexingBand band); @@ -467,7 +462,6 @@ public: DEFINE_BYVAL_RW_PROPERTY(std::optional<TDuration>, DefaultAcknowledgementTimeout); DEFINE_BYVAL_RW_PROPERTY(NCompression::ECodec, DefaultRequestCodec, NCompression::ECodec::None); DEFINE_BYVAL_RW_PROPERTY(NCompression::ECodec, DefaultResponseCodec, NCompression::ECodec::None); - DEFINE_BYVAL_RW_PROPERTY(bool, DefaultEnableLegacyRpcCodecs, true); DEFINE_BYREF_RW_PROPERTY(TStreamingParameters, DefaultClientAttachmentsStreamingParameters); DEFINE_BYREF_RW_PROPERTY(TStreamingParameters, DefaultServerAttachmentsStreamingParameters); diff --git a/yt/yt/core/rpc/grpc/channel.cpp b/yt/yt/core/rpc/grpc/channel.cpp index 9a704294f1..3d9b0b8519 100644 --- a/yt/yt/core/rpc/grpc/channel.cpp +++ b/yt/yt/core/rpc/grpc/channel.cpp @@ -339,13 +339,16 @@ private: return; } + if (Request_->Header().has_request_codec()) { + InitialMetadataBuilder_.Add(RequestCodecKey, ToString(Request_->Header().request_codec())); + } + if (Request_->Header().has_response_codec()) { + InitialMetadataBuilder_.Add(ResponseCodecKey, ToString(Request_->Header().response_codec())); + } + YT_VERIFY(RequestBody_.Size() >= 2); TMessageWithAttachments messageWithAttachments; - if (Request_->IsLegacyRpcCodecsEnabled()) { - messageWithAttachments.Message = ExtractMessageFromEnvelopedMessage(RequestBody_[1]); - } else { - messageWithAttachments.Message = RequestBody_[1]; - } + messageWithAttachments.Message = RequestBody_[1]; for (int index = 2; index < std::ssize(RequestBody_); ++index) { messageWithAttachments.Attachments.push_back(RequestBody_[index]); @@ -622,6 +625,9 @@ private: NRpc::NProto::TResponseHeader responseHeader; ToProto(responseHeader.mutable_request_id(), Request_->GetRequestId()); + if (Request_->Header().has_response_codec()) { + responseHeader.set_codec(Request_->Header().response_codec()); + } auto responseMessage = CreateResponseMessage( responseHeader, diff --git a/yt/yt/core/rpc/grpc/helpers.cpp b/yt/yt/core/rpc/grpc/helpers.cpp index 4279c5840d..c35d1f0ed9 100644 --- a/yt/yt/core/rpc/grpc/helpers.cpp +++ b/yt/yt/core/rpc/grpc/helpers.cpp @@ -279,32 +279,13 @@ TMessageWithAttachments ByteBufferToMessageWithAttachments( messageBodySize = bufferSize; } - NYT::NProto::TSerializedMessageEnvelope envelope; - // Codec remains "none". - - TEnvelopeFixedHeader fixedHeader; - fixedHeader.EnvelopeSize = envelope.ByteSize(); - fixedHeader.MessageSize = *messageBodySize; - - size_t totalMessageSize = - sizeof (TEnvelopeFixedHeader) + - fixedHeader.EnvelopeSize + - fixedHeader.MessageSize; - auto data = TSharedMutableRef::Allocate<TMessageTag>( - totalMessageSize, + *messageBodySize, {.InitializeStorage = false}); - char* targetFixedHeader = data.Begin(); - char* targetHeader = targetFixedHeader + sizeof (TEnvelopeFixedHeader); - char* targetMessage = targetHeader + fixedHeader.EnvelopeSize; - - memcpy(targetFixedHeader, &fixedHeader, sizeof (fixedHeader)); - YT_VERIFY(envelope.SerializeToArray(targetHeader, fixedHeader.EnvelopeSize)); - TGrpcByteBufferStream stream(buffer); - if (stream.Load(targetMessage, *messageBodySize) != *messageBodySize) { + if (stream.Load(data.begin(), *messageBodySize) != *messageBodySize) { THROW_ERROR_EXCEPTION("Unexpected end of stream while reading message body"); } @@ -389,23 +370,6 @@ TGrpcByteBufferPtr MessageWithAttachmentsToByteBuffer(const TMessageWithAttachme return TGrpcByteBufferPtr(buffer); } -TSharedRef ExtractMessageFromEnvelopedMessage(const TSharedRef& data) -{ - YT_VERIFY(data.Size() >= sizeof(TEnvelopeFixedHeader)); - const auto* fixedHeader = reinterpret_cast<const TEnvelopeFixedHeader*>(data.Begin()); - const char* sourceHeader = data.Begin() + sizeof(TEnvelopeFixedHeader); - const char* sourceMessage = sourceHeader + fixedHeader->EnvelopeSize; - - NYT::NProto::TSerializedMessageEnvelope envelope; - YT_VERIFY(envelope.ParseFromArray(sourceHeader, fixedHeader->EnvelopeSize)); - - auto compressedMessage = data.Slice(sourceMessage, sourceMessage + fixedHeader->MessageSize); - - auto codecId = CheckedEnumCast<NCompression::ECodec>(envelope.codec()); - auto* codec = NCompression::GetCodec(codecId); - return codec->Decompress(compressedMessage); -} - TErrorCode StatusCodeToErrorCode(grpc_status_code statusCode) { switch (statusCode) { diff --git a/yt/yt/core/rpc/grpc/helpers.h b/yt/yt/core/rpc/grpc/helpers.h index ebcc9601e3..a85879dcb8 100644 --- a/yt/yt/core/rpc/grpc/helpers.h +++ b/yt/yt/core/rpc/grpc/helpers.h @@ -277,8 +277,6 @@ TMessageWithAttachments ByteBufferToMessageWithAttachments( TGrpcByteBufferPtr MessageWithAttachmentsToByteBuffer( const TMessageWithAttachments& messageWithAttachments); -TSharedRef ExtractMessageFromEnvelopedMessage(const TSharedRef& data); - //////////////////////////////////////////////////////////////////////////////// TErrorCode StatusCodeToErrorCode(grpc_status_code statusCode); diff --git a/yt/yt/core/rpc/grpc/public.cpp b/yt/yt/core/rpc/grpc/public.cpp index 132c2de784..c39b99a541 100644 --- a/yt/yt/core/rpc/grpc/public.cpp +++ b/yt/yt/core/rpc/grpc/public.cpp @@ -24,6 +24,8 @@ const char* const AuthServiceTicketMetadataKey = "yt-auth-service-ticket"; const char* const ErrorMetadataKey = "yt-error-bin"; const char* const MessageBodySizeMetadataKey = "yt-message-body-size"; const char* const ProtocolVersionMetadataKey = "yt-protocol-version"; +const char* const RequestCodecKey = "yt-request-codec"; +const char* const ResponseCodecKey = "yt-response-codec"; const THashSet<TStringBuf>& GetNativeMetadataKeys() { @@ -45,6 +47,8 @@ const THashSet<TStringBuf>& GetNativeMetadataKeys() ErrorMetadataKey, MessageBodySizeMetadataKey, ProtocolVersionMetadataKey, + RequestCodecKey, + ResponseCodecKey, }; return result; } diff --git a/yt/yt/core/rpc/grpc/public.h b/yt/yt/core/rpc/grpc/public.h index d3da4f43f7..58773a258b 100644 --- a/yt/yt/core/rpc/grpc/public.h +++ b/yt/yt/core/rpc/grpc/public.h @@ -34,6 +34,8 @@ extern const char* const AuthServiceTicketMetadataKey; extern const char* const ErrorMetadataKey; extern const char* const MessageBodySizeMetadataKey; extern const char* const ProtocolVersionMetadataKey; +extern const char* const RequestCodecKey; +extern const char* const ResponseCodecKey; // After adding a new metadata key, do not forget to add it in GetNativeMetadataKeys. const THashSet<TStringBuf>& GetNativeMetadataKeys(); diff --git a/yt/yt/core/rpc/grpc/server.cpp b/yt/yt/core/rpc/grpc/server.cpp index d9efaace43..db935778c7 100644 --- a/yt/yt/core/rpc/grpc/server.cpp +++ b/yt/yt/core/rpc/grpc/server.cpp @@ -419,6 +419,8 @@ private: TString ServiceName_; TString MethodName_; std::optional<TDuration> Timeout_; + NCompression::ECodec RequestCodec_ = NCompression::ECodec::None; + NCompression::ECodec ResponseCodec_ = NCompression::ECodec::None; IServicePtr Service_; TGrpcMetadataArrayBuilder InitialMetadataBuilder_; @@ -480,6 +482,8 @@ private: ParseRpcCredentials(); ParseCustomMetadata(); ParseTimeout(); + ParseRequestCodec(); + ParseResponseCodec(); try { SslCredentialsExt_ = WaitFor(ParseSslCredentials()) @@ -665,6 +669,54 @@ private: UserAgent_ = TString(userAgentString); } + void ParseRequestCodec() + { + auto requestCodecString = CallMetadata_.Find(RequestCodecKey); + if (!requestCodecString) { + return; + } + + NCompression::ECodec codecId; + int intCodecId; + if (!TryFromString(requestCodecString, intCodecId)) { + YT_LOG_WARNING("Failed to parse request codec from request metadata (RequestId: %v)", + RequestId_); + return; + } + if (!TryEnumCast(intCodecId, &codecId)) { + YT_LOG_WARNING("Request codec %v is not supported (RequestId: %v)", + intCodecId, + RequestId_); + return; + } + + RequestCodec_ = codecId; + } + + void ParseResponseCodec() + { + auto responseCodecString = CallMetadata_.Find(ResponseCodecKey); + if (!responseCodecString) { + return; + } + + NCompression::ECodec codecId; + int intCodecId; + if (!TryFromString(responseCodecString, intCodecId)) { + YT_LOG_WARNING("Failed to parse response codec from request metadata (RequestId: %v)", + RequestId_); + return; + } + if (!TryEnumCast(intCodecId, &codecId)) { + YT_LOG_WARNING("Response codec %v is not supported (RequestId: %v)", + intCodecId, + RequestId_); + return; + } + + ResponseCodec_ = codecId; + } + void ParseRpcCredentials() { auto tokenString = CallMetadata_.Find(AuthTokenMetadataKey); @@ -901,6 +953,9 @@ private: header->set_method(MethodName_); header->set_protocol_version_major(ProtocolVersion_.Major); header->set_protocol_version_minor(ProtocolVersion_.Minor); + header->set_request_codec(ToProto<int>(RequestCodec_)); + header->set_response_codec(ToProto<int>(ResponseCodec_)); + if (Timeout_) { header->set_timeout(ToProto<i64>(*Timeout_)); } @@ -1032,7 +1087,7 @@ private: YT_VERIFY(ResponseMessage_.Size() >= 2); TMessageWithAttachments messageWithAttachments; - messageWithAttachments.Message = ExtractMessageFromEnvelopedMessage(ResponseMessage_[1]); + messageWithAttachments.Message = ResponseMessage_[1]; for (int index = 2; index < std::ssize(ResponseMessage_); ++index) { messageWithAttachments.Attachments.push_back(ResponseMessage_[index]); } diff --git a/yt/yt/core/rpc/message.cpp b/yt/yt/core/rpc/message.cpp index d76dfa7c1e..6a5b6b7b2b 100644 --- a/yt/yt/core/rpc/message.cpp +++ b/yt/yt/core/rpc/message.cpp @@ -51,27 +51,6 @@ void SerializeAndAddProtoWithHeader( message.SerializeWithCachedSizesToArray(reinterpret_cast<google::protobuf::uint8*>(ref.Begin() + sizeof(fixedHeader))); } -size_t GetAllocationSpaceForProtoWithEnvelope(const google::protobuf::MessageLite& message) -{ - return - sizeof (TEnvelopeFixedHeader) + - message.ByteSizeLong(); -} - -void SerializeAndAddProtoWithEnvelope( - TSharedRefArrayBuilder* builder, - const google::protobuf::MessageLite& message) -{ - auto ref = builder->AllocateAndAdd( - sizeof (TEnvelopeFixedHeader) + - message.GetCachedSize()); - auto* header = static_cast<TEnvelopeFixedHeader*>(static_cast<void*>(ref.Begin())); - // Empty (default) TSerializedMessageEnvelope. - header->EnvelopeSize = 0; - header->MessageSize = message.GetCachedSize(); - message.SerializeWithCachedSizesToArray(reinterpret_cast<google::protobuf::uint8*>(ref.Begin() + sizeof(TEnvelopeFixedHeader))); -} - bool DeserializeFromProtoWithHeader( google::protobuf::MessageLite* message, TRef data) @@ -165,17 +144,17 @@ TSharedRefArray CreateResponseMessage( const std::vector<TSharedRef>& attachments) { NProto::TResponseHeader header; + header.set_codec(ToProto<int>(NCompression::ECodec::None)); TSharedRefArrayBuilder builder( 2 + attachments.size(), - GetAllocationSpaceForProtoWithHeader(header) + GetAllocationSpaceForProtoWithEnvelope(body), + GetAllocationSpaceForProtoWithHeader(header) + body.ByteSizeLong(), GetRefCountedTypeCookie<TSerializedMessageTag>()); SerializeAndAddProtoWithHeader( &builder, TFixedMessageHeader{EMessageType::Response}, header); - SerializeAndAddProtoWithEnvelope( - &builder, - body); + auto ref = builder.AllocateAndAdd(body.GetCachedSize()); + body.SerializeWithCachedSizesToArray(reinterpret_cast<google::protobuf::uint8*>(ref.Begin())); for (auto attachment : attachments) { builder.Add(std::move(attachment)); } diff --git a/yt/yt/core/rpc/message_format.cpp b/yt/yt/core/rpc/message_format.cpp index 01e1dd4d27..8ef05c7b5d 100644 --- a/yt/yt/core/rpc/message_format.cpp +++ b/yt/yt/core/rpc/message_format.cpp @@ -71,21 +71,19 @@ public: TSharedRef ConvertFrom(const TSharedRef& message, const NYson::TProtobufMessageType* messageType, const TYsonString& /*formatOptionsYson*/) override { - auto ysonBuffer = PopEnvelope(message); TString protoBuffer; { google::protobuf::io::StringOutputStream output(&protoBuffer); auto converter = CreateProtobufWriter(&output, messageType); // NB: formatOptionsYson is ignored, since YSON parser has no user-defined options. - ParseYsonStringBuffer(TStringBuf(ysonBuffer.Begin(), ysonBuffer.End()), EYsonType::Node, converter.get()); + ParseYsonStringBuffer(TStringBuf(message.Begin(), message.End()), EYsonType::Node, converter.get()); } - return PushEnvelope(TSharedRef::FromString(protoBuffer)); + return TSharedRef::FromString(protoBuffer); } TSharedRef ConvertTo(const TSharedRef& message, const NYson::TProtobufMessageType* messageType, const TYsonString& /*formatOptionsYson*/) override { - auto protoBuffer = PopEnvelope(message); - google::protobuf::io::ArrayInputStream stream(protoBuffer.Begin(), protoBuffer.Size()); + google::protobuf::io::ArrayInputStream stream(message.Begin(), message.Size()); TString ysonBuffer; { TStringOutput output(ysonBuffer); @@ -93,7 +91,7 @@ public: TYsonWriter writer{&output, EYsonFormat::Text}; ParseProtobuf(&writer, &stream, messageType); } - return PushEnvelope(TSharedRef::FromString(ysonBuffer)); + return TSharedRef::FromString(ysonBuffer); } } YsonFormat; @@ -108,25 +106,23 @@ public: TSharedRef ConvertFrom(const TSharedRef& message, const NYson::TProtobufMessageType* messageType, const TYsonString& formatOptionsYson) override { - auto jsonBuffer = PopEnvelope(message); TString protoBuffer; { google::protobuf::io::StringOutputStream output(&protoBuffer); auto converter = CreateProtobufWriter(&output, messageType); - TMemoryInput input{jsonBuffer.Begin(), jsonBuffer.Size()}; + TMemoryInput input{message.Begin(), message.Size()}; auto formatConfig = New<TJsonFormatConfig>(); if (formatOptionsYson) { formatConfig->Load(NYTree::ConvertToNode(formatOptionsYson)); } ParseJson(&input, converter.get(), formatConfig); } - return PushEnvelope(TSharedRef::FromString(protoBuffer)); + return TSharedRef::FromString(protoBuffer); } TSharedRef ConvertTo(const TSharedRef& message, const NYson::TProtobufMessageType* messageType, const TYsonString& formatOptionsYson) override { - auto protoBuffer = PopEnvelope(message); - google::protobuf::io::ArrayInputStream stream(protoBuffer.Begin(), protoBuffer.Size()); + google::protobuf::io::ArrayInputStream stream(message.Begin(), message.Size()); TString ysonBuffer; { TStringOutput output(ysonBuffer); @@ -138,7 +134,7 @@ public: ParseProtobuf(writer.get(), &stream, messageType); writer->Flush(); } - return PushEnvelope(TSharedRef::FromString(ysonBuffer)); + return TSharedRef::FromString(ysonBuffer); } } JsonFormat; diff --git a/yt/yt/core/rpc/server_detail.cpp b/yt/yt/core/rpc/server_detail.cpp index 9218725013..dcf3241ef7 100644 --- a/yt/yt/core/rpc/server_detail.cpp +++ b/yt/yt/core/rpc/server_detail.cpp @@ -88,15 +88,6 @@ void TServiceContextBase::Reply(const TSharedRefArray& responseMessage) TResponseHeader header; YT_VERIFY(TryParseResponseHeader(responseMessage, &header)); - // COMPAT(danilalexeev): legacy RPC codecs - if (header.has_codec()) { - YT_VERIFY(TryEnumCast(header.codec(), &ResponseCodec_)); - SetResponseBodySerializedWithCompression(); - } - if (header.has_format()) { - RequestHeader_->set_response_format(header.format()); - } - if (header.has_error()) { Error_ = FromProto<TError>(header.error()); } @@ -106,6 +97,11 @@ void TServiceContextBase::Reply(const TSharedRefArray& responseMessage) ResponseAttachments_ = std::vector<TSharedRef>( responseMessage.Begin() + 2, responseMessage.End()); + + YT_VERIFY(header.has_codec() && TryEnumCast(header.codec(), &ResponseCodec_)); + if (header.has_format()) { + RequestHeader_->set_response_format(header.format()); + } } else { ResponseBody_.Reset(); ResponseAttachments_.clear(); @@ -190,10 +186,7 @@ TSharedRefArray TServiceContextBase::BuildResponseMessage() header.set_format(RequestHeader_->response_format()); } - // COMPAT(danilalexeev) - if (IsResponseBodySerializedWithCompression()) { - header.set_codec(static_cast<int>(ResponseCodec_)); - } + header.set_codec(static_cast<int>(ResponseCodec_)); auto message = Error_.IsOK() ? CreateResponseMessage( @@ -471,16 +464,6 @@ void TServiceContextBase::SetResponseCodec(NCompression::ECodec codec) ResponseCodec_ = codec; } -bool TServiceContextBase::IsResponseBodySerializedWithCompression() const -{ - return ResponseBodySerializedWithCompression_; -} - -void TServiceContextBase::SetResponseBodySerializedWithCompression() -{ - ResponseBodySerializedWithCompression_ = true; -} - //////////////////////////////////////////////////////////////////////////////// TServiceContextWrapper::TServiceContextWrapper(IServiceContextPtr underlyingContext) @@ -745,16 +728,6 @@ void TServiceContextWrapper::SetResponseCodec(NCompression::ECodec codec) UnderlyingContext_->SetResponseCodec(codec); } -bool TServiceContextWrapper::IsResponseBodySerializedWithCompression() const -{ - return UnderlyingContext_->IsResponseBodySerializedWithCompression(); -} - -void TServiceContextWrapper::SetResponseBodySerializedWithCompression() -{ - UnderlyingContext_->SetResponseBodySerializedWithCompression(); -} - const IServiceContextPtr& TServiceContextWrapper::GetUnderlyingContext() const { return UnderlyingContext_; diff --git a/yt/yt/core/rpc/server_detail.h b/yt/yt/core/rpc/server_detail.h index aceba1f14e..4b6ecc834f 100644 --- a/yt/yt/core/rpc/server_detail.h +++ b/yt/yt/core/rpc/server_detail.h @@ -102,9 +102,6 @@ public: NCompression::ECodec GetResponseCodec() const override; void SetResponseCodec(NCompression::ECodec codec) override; - bool IsResponseBodySerializedWithCompression() const override; - void SetResponseBodySerializedWithCompression() override; - protected: std::unique_ptr<NProto::TRequestHeader> RequestHeader_; TSharedRefArray RequestMessage_; @@ -132,8 +129,6 @@ protected: TCompactVector<TString, 4> ResponseInfos_; NCompression::ECodec ResponseCodec_ = NCompression::ECodec::None; - // COMPAT(danilalexeev) - bool ResponseBodySerializedWithCompression_ = false; TSingleShotCallbackList<void()> RepliedList_; @@ -249,9 +244,6 @@ public: NCompression::ECodec GetResponseCodec() const override; void SetResponseCodec(NCompression::ECodec codec) override; - bool IsResponseBodySerializedWithCompression() const override; - void SetResponseBodySerializedWithCompression() override; - const IServiceContextPtr& GetUnderlyingContext() const; private: diff --git a/yt/yt/core/rpc/service.h b/yt/yt/core/rpc/service.h index 1c2e721730..2e2a087e4f 100644 --- a/yt/yt/core/rpc/service.h +++ b/yt/yt/core/rpc/service.h @@ -231,11 +231,6 @@ struct IServiceContext //! Changes the response codec. virtual void SetResponseCodec(NCompression::ECodec codec) = 0; - // COPMAT(danilalexeev) - //! Returnes true if response body has been serialized with compression. - virtual bool IsResponseBodySerializedWithCompression() const = 0; - virtual void SetResponseBodySerializedWithCompression() = 0; - // Extension methods. void SetRequestInfo(); diff --git a/yt/yt/core/rpc/service_detail.cpp b/yt/yt/core/rpc/service_detail.cpp index 003c015241..92e81ea0ed 100644 --- a/yt/yt/core/rpc/service_detail.cpp +++ b/yt/yt/core/rpc/service_detail.cpp @@ -697,32 +697,13 @@ private: MethodPerformanceCounters_->RemoteWaitTimeCounter.Record(now - retryStart); } - // COMPAT(kiselyovp): legacy RPC codecs - if (RequestHeader_->has_request_codec()) { - int intRequestCodecId = RequestHeader_->request_codec(); - if (!TryEnumCast(intRequestCodecId, &RequestCodec_)) { - Reply(TError( - NRpc::EErrorCode::ProtocolError, - "Request codec %v is not supported", - intRequestCodecId)); - return; - } - } else { - RequestCodec_ = NCompression::ECodec::None; - } - - if (RequestHeader_->has_response_codec()) { - int intResponseCodecId = RequestHeader_->response_codec(); - if (!TryEnumCast(intResponseCodecId, &ResponseCodec_)) { - Reply(TError( - NRpc::EErrorCode::ProtocolError, - "Response codec %v is not supported", - intResponseCodecId)); - return; - } - } else { - ResponseCodec_ = NCompression::ECodec::None; - } + // COMPAT(danilalexeev): legacy RPC codecs + RequestCodec_ = RequestHeader_->has_request_codec() + ? CheckedEnumCast<NCompression::ECodec>(RequestHeader_->request_codec()) + : NCompression::ECodec::None; + ResponseCodec_ = RequestHeader_->has_response_codec() + ? CheckedEnumCast<NCompression::ECodec>(RequestHeader_->response_codec()) + : NCompression::ECodec::None; Service_->IncrementActiveRequestCount(); ActiveRequestCountIncremented_ = true; @@ -786,7 +767,7 @@ private: GetTotalMessageAttachmentSize(RequestMessage_), GetMessageAttachmentCount(RequestMessage_)); - // COMPAT(kiselyovp) + // COMPAT(danilalexeev): legacy RPC codecs if (RequestHeader_->has_request_codec() && RequestHeader_->has_response_codec()) { delimitedBuilder->AppendFormat("RequestCodec: %v, ResponseCodec: %v", RequestCodec_, @@ -1928,6 +1909,9 @@ TError TServiceBase::DoCheckRequestCompatibility(const NRpc::NProto::TRequestHea if (auto error = DoCheckRequestFeatures(header); !error.IsOK()) { return error; } + if (auto error = DoCheckRequestCodecs(header); !error.IsOK()) { + return error; + } return {}; } @@ -1971,6 +1955,29 @@ TError TServiceBase::DoCheckRequestFeatures(const NRpc::NProto::TRequestHeader& return {}; } +TError TServiceBase::DoCheckRequestCodecs(const NRpc::NProto::TRequestHeader& header) +{ + if (header.has_request_codec()) { + NCompression::ECodec requestCodec; + if (!TryEnumCast(header.request_codec(), &requestCodec)) { + return TError( + NRpc::EErrorCode::ProtocolError, + "Request codec %v is not supported", + header.request_codec()); + } + } + if (header.has_response_codec()) { + NCompression::ECodec responseCodec; + if (!TryEnumCast(header.response_codec(), &responseCodec)) { + return TError( + NRpc::EErrorCode::ProtocolError, + "Response codec %v is not supported", + header.response_codec()); + } + } + return {}; +} + void TServiceBase::OnRequestTimeout(TRequestId requestId, ERequestProcessingStage stage, bool /*aborted*/) { auto context = FindRequest(requestId); diff --git a/yt/yt/core/rpc/service_detail.h b/yt/yt/core/rpc/service_detail.h index 2ce0ca71b2..822b799d50 100644 --- a/yt/yt/core/rpc/service_detail.h +++ b/yt/yt/core/rpc/service_detail.h @@ -214,7 +214,7 @@ public: } } - // COMPAT(kiselyovp): legacy RPC codecs + // COMPAT(danilalexeev): legacy RPC codecs std::optional<NCompression::ECodec> bodyCodecId; NCompression::ECodec attachmentCodecId; if (requestHeader.has_request_codec()) { @@ -325,18 +325,8 @@ protected: const auto& underlyingContext = this->GetUnderlyingContext(); const auto& requestHeader = underlyingContext->GetRequestHeader(); - // COMPAT(kiselyovp): legacy RPC codecs - NCompression::ECodec attachmentCodecId; - auto bodyCodecId = underlyingContext->GetResponseCodec(); - TSharedRef serializedBody; - if (requestHeader.has_response_codec()) { - serializedBody = SerializeProtoToRefWithCompression(*Response_, bodyCodecId, false); - attachmentCodecId = bodyCodecId; - underlyingContext->SetResponseBodySerializedWithCompression(); - } else { - serializedBody = SerializeProtoToRefWithEnvelope(*Response_, bodyCodecId); - attachmentCodecId = NCompression::ECodec::None; - } + auto codecId = underlyingContext->GetResponseCodec(); + auto serializedBody = SerializeProtoToRefWithCompression(*Response_, codecId); if (requestHeader.has_response_format()) { int intFormat = requestHeader.response_format(); @@ -362,7 +352,7 @@ protected: } } - auto responseAttachments = CompressAttachments(Response_->Attachments(), attachmentCodecId); + auto responseAttachments = CompressAttachments(Response_->Attachments(), codecId); return TSerializedResponse{ .Body = std::move(serializedBody), @@ -948,6 +938,7 @@ private: TError DoCheckRequestCompatibility(const NRpc::NProto::TRequestHeader& header); TError DoCheckRequestProtocol(const NRpc::NProto::TRequestHeader& header); TError DoCheckRequestFeatures(const NRpc::NProto::TRequestHeader& header); + TError DoCheckRequestCodecs(const NRpc::NProto::TRequestHeader& header); void OnRequestTimeout(TRequestId requestId, ERequestProcessingStage stage, bool aborted); void OnReplyBusTerminated(const NYT::NBus::IBusPtr& bus, const TError& error); diff --git a/yt/yt/core/rpc/unittests/rpc_ut.cpp b/yt/yt/core/rpc/unittests/rpc_ut.cpp index 3924bd6c20..226cd60f3f 100644 --- a/yt/yt/core/rpc/unittests/rpc_ut.cpp +++ b/yt/yt/core/rpc/unittests/rpc_ut.cpp @@ -152,7 +152,6 @@ TYPED_TEST(TNotGrpcTest, StreamingEcho) TTestProxy proxy(this->CreateChannel()); proxy.SetDefaultRequestCodec(NCompression::ECodec::Lz4); proxy.SetDefaultResponseCodec(NCompression::ECodec::Zstd_1); - proxy.SetDefaultEnableLegacyRpcCodecs(false); const int AttachmentCount = 30; const ssize_t AttachmentSize = 2_MB; @@ -540,7 +539,6 @@ TYPED_TEST(TNotGrpcTest, Compression) TTestProxy proxy(this->CreateChannel()); proxy.SetDefaultRequestCodec(requestCodecId); proxy.SetDefaultResponseCodec(responseCodecId); - proxy.SetDefaultEnableLegacyRpcCodecs(false); auto req = proxy.Compression(); req->set_request_codec(static_cast<int>(requestCodecId)); diff --git a/yt/yt/core/ytree/ypath_client.cpp b/yt/yt/core/ytree/ypath_client.cpp index cdfa0f63c3..2a40321189 100644 --- a/yt/yt/core/ytree/ypath_client.cpp +++ b/yt/yt/core/ytree/ypath_client.cpp @@ -51,6 +51,8 @@ TYPathRequest::TYPathRequest( { ToProto(Header_.mutable_service(), std::move(service)); ToProto(Header_.mutable_method(), std::move(method)); + Header_.set_request_codec(ToProto<int>(NCompression::ECodec::None)); + Header_.set_response_codec(ToProto<int>(NCompression::ECodec::None)); auto* ypathExt = Header_.MutableExtension(NProto::TYPathHeaderExt::ypath_header_ext); ypathExt->set_mutating(mutating); @@ -186,11 +188,6 @@ NConcurrency::IAsyncZeroCopyInputStreamPtr TYPathRequest::GetResponseAttachments YT_ABORT(); } -bool TYPathRequest::IsLegacyRpcCodecsEnabled() -{ - YT_ABORT(); -} - TSharedRefArray TYPathRequest::Serialize() { auto bodyData = SerializeBody(); diff --git a/yt/yt/core/ytree/ypath_client.h b/yt/yt/core/ytree/ypath_client.h index 3dfdaed64e..e5d4138568 100644 --- a/yt/yt/core/ytree/ypath_client.h +++ b/yt/yt/core/ytree/ypath_client.h @@ -67,8 +67,6 @@ public: NConcurrency::IAsyncZeroCopyOutputStreamPtr GetRequestAttachmentsStream() const override; NConcurrency::IAsyncZeroCopyInputStreamPtr GetResponseAttachmentsStream() const override; - bool IsLegacyRpcCodecsEnabled() override; - TSharedRefArray Serialize() override; protected: diff --git a/yt/yt/library/tracing/jaeger/tracer.cpp b/yt/yt/library/tracing/jaeger/tracer.cpp index 43817c119f..50541c7a18 100644 --- a/yt/yt/library/tracing/jaeger/tracer.cpp +++ b/yt/yt/library/tracing/jaeger/tracer.cpp @@ -324,7 +324,6 @@ bool TJaegerChannelManager::Push(const std::vector<TSharedRef>& batches, int spa proxy.SetDefaultTimeout(RpcTimeout_); auto req = proxy.PostSpans(); - req->SetEnableLegacyRpcCodecs(false); req->set_batch(MergeRefsToString(batches)); if (TvmService_) { diff --git a/yt/yt_proto/yt/client/cache/proto/config.proto b/yt/yt_proto/yt/client/cache/proto/config.proto index 98699e87bb..6350308d37 100644 --- a/yt/yt_proto/yt/client/cache/proto/config.proto +++ b/yt/yt_proto/yt/client/cache/proto/config.proto @@ -29,8 +29,6 @@ message TConfig optional ECompressionCodec RequestCodec = 17 [default = None]; optional ECompressionCodec ResponseCodec = 12 [default = None]; - // Should set EnableLegacyRpcCodecs=False, to enable RequestCodec & ResponseCodec: https://nda.ya.ru/t/iXCfYZjS6yNEwg - optional bool EnableLegacyRpcCodecs = 20; optional bool EnableRetries = 13; optional uint32 RetryBackoffTime = 14; diff --git a/yt/yt_proto/yt/core/rpc/proto/rpc.proto b/yt/yt_proto/yt/core/rpc/proto/rpc.proto index 043b2953e2..c9d0ea291f 100644 --- a/yt/yt_proto/yt/core/rpc/proto/rpc.proto +++ b/yt/yt_proto/yt/core/rpc/proto/rpc.proto @@ -144,7 +144,6 @@ message TResponseHeader optional int32 format = 3; // EMessageFormat - // COMPAT(kiseloyvp): this is missing when legacy mode is used optional int32 codec = 6; // ECodec reserved 5; |