diff options
author | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-06-28 23:53:07 +0000 |
---|---|---|
committer | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-06-28 23:53:07 +0000 |
commit | fb9927ad7dca23d348fa4d915328e030c40f81de (patch) | |
tree | 81787356716c5922284f8f32f61b47acad9f2e94 | |
parent | 8b746bb47308730ede2fe06e64ddfa66b64c6450 (diff) | |
download | ffmpeg-fb9927ad7dca23d348fa4d915328e030c40f81de.tar.gz |
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
Originally committed as revision 23875 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/x86/h264_intrapred.asm | 78 | ||||
-rw-r--r-- | libavcodec/x86/h264dsp_mmx.c | 6 |
2 files changed, 84 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 085460cdfd..2bd19f9ed4 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -494,3 +494,81 @@ cglobal pred4x4_dc_mmxext, 3,5 mov [r0+r2*1], r3d mov [r0+r2*2], r3d RET + +;----------------------------------------------------------------------------- +; void pred4x4_tm_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride) +;----------------------------------------------------------------------------- + +%macro PRED4x4_TM_MMX 1 +cglobal pred4x4_tm_vp8_%1, 3,6 + sub r0, r2 + pxor mm7, mm7 + movd mm0, [r0] + punpcklbw mm0, mm7 + movzx r4d, byte [r0-1] + mov r5d, 2 +.loop: + movzx r1d, byte [r0+r2*1-1] + movzx r3d, byte [r0+r2*2-1] + sub r1d, r4d + sub r3d, r4d + movd mm2, r1d + movd mm4, r3d +%ifidn %1, mmx + punpcklwd mm2, mm2 + punpcklwd mm4, mm4 + punpckldq mm2, mm2 + punpckldq mm4, mm4 +%else + pshufw mm2, mm2, 0 + pshufw mm4, mm4, 0 +%endif + paddw mm2, mm0 + paddw mm4, mm0 + packuswb mm2, mm2 + packuswb mm4, mm4 + movd [r0+r2*1], mm2 + movd [r0+r2*2], mm4 + lea r0, [r0+r2*2] + dec r5d + jg .loop + REP_RET +%endmacro + +PRED4x4_TM_MMX mmx +PRED4x4_TM_MMX mmxext + +cglobal pred4x4_tm_vp8_ssse3, 3,3 + sub r0, r2 + movq mm6, [tm_shuf] + pxor mm1, mm1 + movd mm0, [r0] + punpcklbw mm0, mm1 + movd mm7, [r0-4] + pshufb mm7, mm6 + lea r1, [r0+r2*2] + movd mm2, [r0+r2*1-4] + movd mm3, [r0+r2*2-4] + movd mm4, [r1+r2*1-4] + movd mm5, [r1+r2*2-4] + pshufb mm2, mm6 + pshufb mm3, mm6 + pshufb mm4, mm6 + pshufb mm5, mm6 + psubw mm2, mm7 + psubw mm3, mm7 + psubw mm4, mm7 + psubw mm5, mm7 + paddw mm2, mm0 + paddw mm3, mm0 + paddw mm4, mm0 + paddw mm5, mm0 + packuswb mm2, mm2 + packuswb mm3, mm3 + packuswb mm4, mm4 + packuswb mm5, mm5 + movd [r0+r2*1], mm2 + movd [r0+r2*2], mm3 + movd [r1+r2*1], mm4 + movd [r1+r2*2], mm5 + RET diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index 118dd7bd7e..60f81a64be 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -2345,6 +2345,9 @@ void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride); void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride); void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride); void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); +void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride); +void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int stride); +void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride); #if CONFIG_H264DSP void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) @@ -2358,6 +2361,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) if (codec_id == CODEC_ID_VP8) { h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx; h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx; } } @@ -2370,6 +2374,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext; h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext; h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmxext; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext; } } @@ -2392,6 +2397,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; if (codec_id == CODEC_ID_VP8) { h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3; } } #endif |