diff options
author | James Almer <jamrial@gmail.com> | 2016-01-07 00:31:56 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2016-01-07 14:24:01 -0300 |
commit | f4c1a4848378d035b835e9e2ca1c62f15a5982b1 (patch) | |
tree | e07c344aa381a6d44cb83b295738e8402dafc5f7 | |
parent | 9f17d4ae7e83e4f4124b28b6aaead61f42ddf223 (diff) | |
download | ffmpeg-f4c1a4848378d035b835e9e2ca1c62f15a5982b1.tar.gz |
x86/intmath: add sse optimized av_clipf and av_clipd
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Reviewed-by: Ronald S. Bultje <rsbultje@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavutil/x86/intmath.h | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h index 611ef882e9..2b2c869533 100644 --- a/libavutil/x86/intmath.h +++ b/libavutil/x86/intmath.h @@ -22,6 +22,7 @@ #define AVUTIL_X86_INTMATH_H #include <stdint.h> +#include <stdlib.h> #if HAVE_FAST_CLZ #if defined(_MSC_VER) #include <intrin.h> @@ -98,6 +99,38 @@ static av_always_inline av_const unsigned av_mod_uintp2_bmi2(unsigned a, unsigne #endif /* __BMI2__ */ +#if defined(__SSE2__) + +#define av_clipd av_clipd_sse2 +static av_always_inline av_const double av_clipd_sse2(double a, double amin, double amax) +{ +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 + if (amin > amax) abort(); +#endif + __asm__ ("minsd %2, %0 \n\t" + "maxsd %1, %0 \n\t" + : "+x"(a) : "xm"(amin), "xm"(amax)); + return a; +} + +#endif /* __SSE2__ */ + +#if defined(__SSE__) + +#define av_clipf av_clipf_sse +static av_always_inline av_const float av_clipf_sse(float a, float amin, float amax) +{ +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 + if (amin > amax) abort(); +#endif + __asm__ ("minss %2, %0 \n\t" + "maxss %1, %0 \n\t" + : "+x"(a) : "xm"(amin), "xm"(amax)); + return a; +} + +#endif /* __SSE__ */ + #endif /* __GNUC__ */ #endif /* AVUTIL_X86_INTMATH_H */ |