diff options
author | Jason Garrett-Glaser <jason@x264.com> | 2011-05-10 07:08:24 -0700 |
---|---|---|
committer | Jason Garrett-Glaser <jason@x264.com> | 2011-05-10 20:01:58 -0700 |
commit | 8ad77b65b548a6b2f4707265ebd7e97f956acf0b (patch) | |
tree | e0d9053df2b4130023fc4b8960dc8c3e3d139fdc /libavcodec/x86/h264dsp_mmx.c | |
parent | b66752790a94820c23b0ac994d6190dd9048582d (diff) | |
download | ffmpeg-8ad77b65b548a6b2f4707265ebd7e97f956acf0b.tar.gz |
Update x86 H.264 deblock asm
Includes AVX versions from x264.
Diffstat (limited to 'libavcodec/x86/h264dsp_mmx.c')
-rw-r--r-- | libavcodec/x86/h264dsp_mmx.c | 56 |
1 files changed, 34 insertions, 22 deletions
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index 3a783a39ab..7d27c02ea2 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -219,11 +219,11 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] } #define LF_FUNC(DIR, TYPE, OPT) \ -void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ - int alpha, int beta, int8_t *tc0); +void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ + int alpha, int beta, int8_t *tc0); #define LF_IFUNC(DIR, TYPE, OPT) \ -void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ - int alpha, int beta); +void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ + int alpha, int beta); LF_FUNC (h, chroma, mmxext) LF_IFUNC(h, chroma_intra, mmxext) @@ -234,18 +234,18 @@ LF_FUNC (h, luma, mmxext) LF_IFUNC(h, luma_intra, mmxext) #if HAVE_YASM && ARCH_X86_32 LF_FUNC (v8, luma, mmxext) -static void ff_x264_deblock_v_luma_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +static void ff_deblock_v_luma_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { if((tc0[0] & tc0[1]) >= 0) - ff_x264_deblock_v8_luma_mmxext(pix+0, stride, alpha, beta, tc0); + ff_deblock_v8_luma_mmxext(pix+0, stride, alpha, beta, tc0); if((tc0[2] & tc0[3]) >= 0) - ff_x264_deblock_v8_luma_mmxext(pix+8, stride, alpha, beta, tc0+2); + ff_deblock_v8_luma_mmxext(pix+8, stride, alpha, beta, tc0+2); } LF_IFUNC(v8, luma_intra, mmxext) -static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta) +static void ff_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta) { - ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta); - ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta); + ff_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta); + ff_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta); } #endif @@ -253,6 +253,10 @@ LF_FUNC (h, luma, sse2) LF_IFUNC(h, luma_intra, sse2) LF_FUNC (v, luma, sse2) LF_IFUNC(v, luma_intra, sse2) +LF_FUNC (h, luma, avx) +LF_IFUNC(h, luma_intra, avx) +LF_FUNC (v, luma, avx) +LF_IFUNC(v, luma_intra, avx) /***********************************/ /* weighted prediction */ @@ -314,15 +318,15 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_idct_add8 = ff_h264_idct_add8_mmx2; c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; - c->h264_v_loop_filter_chroma= ff_x264_deblock_v_chroma_mmxext; - c->h264_h_loop_filter_chroma= ff_x264_deblock_h_chroma_mmxext; - c->h264_v_loop_filter_chroma_intra= ff_x264_deblock_v_chroma_intra_mmxext; - c->h264_h_loop_filter_chroma_intra= ff_x264_deblock_h_chroma_intra_mmxext; + c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_mmxext; + c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_mmxext; + c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_mmxext; + c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_mmxext; #if ARCH_X86_32 - c->h264_v_loop_filter_luma= ff_x264_deblock_v_luma_mmxext; - c->h264_h_loop_filter_luma= ff_x264_deblock_h_luma_mmxext; - c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; - c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; + c->h264_v_loop_filter_luma= ff_deblock_v_luma_mmxext; + c->h264_h_loop_filter_luma= ff_deblock_h_luma_mmxext; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_mmxext; + c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_mmxext; #endif c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; @@ -360,10 +364,10 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2; #if HAVE_ALIGNED_STACK - c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; - c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; - c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; - c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; + c->h264_v_loop_filter_luma = ff_deblock_v_luma_sse2; + c->h264_h_loop_filter_luma = ff_deblock_h_luma_sse2; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_sse2; + c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_sse2; #endif c->h264_idct_add16 = ff_h264_idct_add16_sse2; @@ -377,6 +381,14 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3; c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3; } + if (mm_flags&AV_CPU_FLAG_AVX) { +#if HAVE_ALIGNED_STACK + c->h264_v_loop_filter_luma = ff_deblock_v_luma_avx; + c->h264_h_loop_filter_luma = ff_deblock_h_luma_avx; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_avx; + c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_avx; +#endif + } } } #endif |