diff options
author | Jason Garrett-Glaser <jason@x264.com> | 2011-05-10 08:55:12 -0700 |
---|---|---|
committer | Jason Garrett-Glaser <jason@x264.com> | 2011-05-10 20:02:15 -0700 |
commit | 9f3d6ca4f16e9b1f6f89424e9d946bb3a6a40d91 (patch) | |
tree | 929096dfe2d6ee8e1f716d07383a486382d7fb15 /libavcodec/x86/h264dsp_mmx.c | |
parent | 8ad77b65b548a6b2f4707265ebd7e97f956acf0b (diff) | |
download | ffmpeg-9f3d6ca4f16e9b1f6f89424e9d946bb3a6a40d91.tar.gz |
Port x86 10-bit H.264 deblock asm from x264
Diffstat (limited to 'libavcodec/x86/h264dsp_mmx.c')
-rw-r--r-- | libavcodec/x86/h264dsp_mmx.c | 129 |
1 files changed, 81 insertions, 48 deletions
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index 7d27c02ea2..42dae93f2d 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -218,45 +218,49 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] ); } -#define LF_FUNC(DIR, TYPE, OPT) \ -void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ - int alpha, int beta, int8_t *tc0); -#define LF_IFUNC(DIR, TYPE, OPT) \ -void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ - int alpha, int beta); - -LF_FUNC (h, chroma, mmxext) -LF_IFUNC(h, chroma_intra, mmxext) -LF_FUNC (v, chroma, mmxext) -LF_IFUNC(v, chroma_intra, mmxext) - -LF_FUNC (h, luma, mmxext) -LF_IFUNC(h, luma_intra, mmxext) -#if HAVE_YASM && ARCH_X86_32 -LF_FUNC (v8, luma, mmxext) -static void ff_deblock_v_luma_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +#define LF_FUNC(DIR, TYPE, DEPTH, OPT) \ +void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \ + int alpha, int beta, int8_t *tc0); +#define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \ +void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \ + int alpha, int beta); + +#define LF_FUNCS(type, depth)\ +LF_FUNC (h, chroma, depth, mmxext)\ +LF_IFUNC(h, chroma_intra, depth, mmxext)\ +LF_FUNC (v, chroma, depth, mmxext)\ +LF_IFUNC(v, chroma_intra, depth, mmxext)\ +LF_FUNC (h, luma, depth, mmxext)\ +LF_IFUNC(h, luma_intra, depth, mmxext)\ +LF_FUNC (h, luma, depth, sse2)\ +LF_IFUNC(h, luma_intra, depth, sse2)\ +LF_FUNC (v, luma, depth, sse2)\ +LF_IFUNC(v, luma_intra, depth, sse2)\ +LF_FUNC (h, luma, depth, avx)\ +LF_IFUNC(h, luma_intra, depth, avx)\ +LF_FUNC (v, luma, depth, avx)\ +LF_IFUNC(v, luma_intra, depth, avx) + +LF_FUNCS( uint8_t, 8) +LF_FUNCS(uint16_t, 10) + +LF_FUNC (v8, luma, 8, mmxext) +static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { if((tc0[0] & tc0[1]) >= 0) - ff_deblock_v8_luma_mmxext(pix+0, stride, alpha, beta, tc0); + ff_deblock_v8_luma_8_mmxext(pix+0, stride, alpha, beta, tc0); if((tc0[2] & tc0[3]) >= 0) - ff_deblock_v8_luma_mmxext(pix+8, stride, alpha, beta, tc0+2); + ff_deblock_v8_luma_8_mmxext(pix+8, stride, alpha, beta, tc0+2); } -LF_IFUNC(v8, luma_intra, mmxext) -static void ff_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta) +LF_IFUNC(v8, luma_intra, 8, mmxext) +static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, int alpha, int beta) { - ff_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta); - ff_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta); + ff_deblock_v8_luma_intra_8_mmxext(pix+0, stride, alpha, beta); + ff_deblock_v8_luma_intra_8_mmxext(pix+8, stride, alpha, beta); } -#endif -LF_FUNC (h, luma, sse2) -LF_IFUNC(h, luma_intra, sse2) -LF_FUNC (v, luma, sse2) -LF_IFUNC(v, luma_intra, sse2) -LF_FUNC (h, luma, avx) -LF_IFUNC(h, luma_intra, avx) -LF_FUNC (v, luma, avx) -LF_IFUNC(v, luma_intra, avx) +LF_FUNC (v, luma, 10, mmxext) +LF_IFUNC(v, luma_intra, 10, mmxext) /***********************************/ /* weighted prediction */ @@ -318,15 +322,15 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_idct_add8 = ff_h264_idct_add8_mmx2; c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; - c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_mmxext; - c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_mmxext; - c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_mmxext; - c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_mmxext; + c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext; + c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext; + c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext; + c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext; #if ARCH_X86_32 - c->h264_v_loop_filter_luma= ff_deblock_v_luma_mmxext; - c->h264_h_loop_filter_luma= ff_deblock_h_luma_mmxext; - c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_mmxext; - c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_mmxext; + c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext; + c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext; + c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext; #endif c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; @@ -364,10 +368,10 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2; #if HAVE_ALIGNED_STACK - c->h264_v_loop_filter_luma = ff_deblock_v_luma_sse2; - c->h264_h_loop_filter_luma = ff_deblock_h_luma_sse2; - c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_sse2; - c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_sse2; + c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; + c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2; + c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; #endif c->h264_idct_add16 = ff_h264_idct_add16_sse2; @@ -383,10 +387,39 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) } if (mm_flags&AV_CPU_FLAG_AVX) { #if HAVE_ALIGNED_STACK - c->h264_v_loop_filter_luma = ff_deblock_v_luma_avx; - c->h264_h_loop_filter_luma = ff_deblock_h_luma_avx; - c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_avx; - c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_avx; + c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; + c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; + c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; +#endif + } + } + } +#endif + } else if (bit_depth == 10) { +#if HAVE_YASM + if (mm_flags & AV_CPU_FLAG_MMX) { + if (mm_flags & AV_CPU_FLAG_MMX2) { +#if ARCH_X86_32 + c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmxext; + c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmxext; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext; + c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext; +#endif + if (mm_flags&AV_CPU_FLAG_SSE2) { +#if HAVE_ALIGNED_STACK + c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2; + c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2; + c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; +#endif + } + if (mm_flags&AV_CPU_FLAG_AVX) { +#if HAVE_ALIGNED_STACK + c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx; + c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx; + c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; #endif } } |