diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2011-03-20 13:31:36 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2011-03-22 21:08:30 -0400 |
commit | e6e9823488b4cf42778411f1239592f0787e121e (patch) | |
tree | dcdb0a902e921957a3e0b912c56a73d7fb67422d /libavcodec/x86/dsputil_mmx.c | |
parent | e971d81364e93feae8c399075a3be2643192e031 (diff) | |
download | ffmpeg-e6e9823488b4cf42778411f1239592f0787e121e.tar.gz |
Add apply_window_int16() to DSPContext with x86-optimized versions and use it
in the ac3_fixed encoder.
Diffstat (limited to 'libavcodec/x86/dsputil_mmx.c')
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 40 |
1 files changed, 38 insertions, 2 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index a611a71f06..4d1a3052ba 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2388,6 +2388,20 @@ int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int or int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); + +void ff_apply_window_int16_mmxext (int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); +void ff_apply_window_int16_mmxext_ba (int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); +void ff_apply_window_int16_sse2 (int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); +void ff_apply_window_int16_sse2_ba (int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); +void ff_apply_window_int16_ssse3 (int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); +void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, + const int16_t *window, unsigned int len); + void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top); int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left); int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, int left); @@ -2749,6 +2763,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) #if HAVE_YASM c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmx2; + if (avctx->flags & CODEC_FLAG_BITEXACT) { + c->apply_window_int16 = ff_apply_window_int16_mmxext_ba; + } else { + c->apply_window_int16 = ff_apply_window_int16_mmxext; + } #endif } if(mm_flags & AV_CPU_FLAG_SSE){ @@ -2771,13 +2790,30 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) #if HAVE_YASM c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; + if (avctx->flags & CODEC_FLAG_BITEXACT) { + c->apply_window_int16 = ff_apply_window_int16_sse2_ba; + } else { + if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { + c->apply_window_int16 = ff_apply_window_int16_sse2; + } + } c->emulated_edge_mc = emulated_edge_mc_sse; c->gmc= gmc_sse; #endif } - if((mm_flags & AV_CPU_FLAG_SSSE3) && !(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW)) && HAVE_YASM) // cachesplit - c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; + if (mm_flags & AV_CPU_FLAG_SSSE3) { +#if HAVE_YASM + if (mm_flags & AV_CPU_FLAG_ATOM) { + c->apply_window_int16 = ff_apply_window_int16_ssse3_atom; + } else { + c->apply_window_int16 = ff_apply_window_int16_ssse3; + } + if (!(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW))) { // cachesplit + c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; + } +#endif + } } if (CONFIG_ENCODERS) |