diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2011-10-30 01:13:55 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2011-11-11 14:13:58 -0500 |
commit | 9d06037d48041ad8ccbae6c12aa9f3a313a89c4e (patch) | |
tree | 9521e25c2c61b0397b86f99d026b624251ee0d14 /libavcodec/x86 | |
parent | 7b966566da24598a636a433a75a7842e272b18f6 (diff) | |
download | ffmpeg-9d06037d48041ad8ccbae6c12aa9f3a313a89c4e.tar.gz |
twinvq: add SSE/AVX optimized sum/difference stereo interleaving
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 7 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_yasm.asm | 48 |
2 files changed, 55 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index dd6cbf51ad..f0de05a763 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2424,6 +2424,11 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min void ff_vector_clip_int32_sse4 (int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); +extern void ff_butterflies_float_interleave_sse(float *dst, const float *src0, + const float *src1, int len); +extern void ff_butterflies_float_interleave_avx(float *dst, const float *src0, + const float *src1, int len); + void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); @@ -2868,6 +2873,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->vector_clipf = vector_clipf_sse; #if HAVE_YASM c->scalarproduct_float = ff_scalarproduct_float_sse; + c->butterflies_float_interleave = ff_butterflies_float_interleave_sse; #endif } if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW)) @@ -2925,6 +2931,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx; c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx; } + c->butterflies_float_interleave = ff_butterflies_float_interleave_avx; } #endif } diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index 8e3cbdc03d..f2894cd501 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -1129,3 +1129,51 @@ VECTOR_CLIP_INT32 11, 1, 1, 0 %else VECTOR_CLIP_INT32 6, 1, 0, 0 %endif + +;----------------------------------------------------------------------------- +; void ff_butterflies_float_interleave(float *dst, const float *src0, +; const float *src1, int len); +;----------------------------------------------------------------------------- + +%macro BUTTERFLIES_FLOAT_INTERLEAVE 0 +cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len +%ifdef ARCH_X86_64 + movsxd lenq, lend +%endif + test lenq, lenq + jz .end + shl lenq, 2 + lea src0q, [src0q + lenq] + lea src1q, [src1q + lenq] + lea dstq, [ dstq + 2*lenq] + neg lenq +.loop: + mova m0, [src0q + lenq] + mova m1, [src1q + lenq] + subps m2, m0, m1 + addps m0, m0, m1 + unpcklps m1, m0, m2 + unpckhps m0, m0, m2 +%if cpuflag(avx) + vextractf128 [dstq + 2*lenq ], m1, 0 + vextractf128 [dstq + 2*lenq + 16], m0, 0 + vextractf128 [dstq + 2*lenq + 32], m1, 1 + vextractf128 [dstq + 2*lenq + 48], m0, 1 +%else + mova [dstq + 2*lenq ], m1 + mova [dstq + 2*lenq + mmsize], m0 +%endif + add lenq, mmsize + jl .loop +%if mmsize == 32 + vzeroupper + RET +%endif +.end: + REP_RET +%endmacro + +INIT_XMM sse +BUTTERFLIES_FLOAT_INTERLEAVE +INIT_YMM avx +BUTTERFLIES_FLOAT_INTERLEAVE |