diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2010-12-29 18:00:26 +0000 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2010-12-29 18:00:26 +0000 |
commit | a2dfe8d18d30b686c2f5401d7de8399d7b3fa4eb (patch) | |
tree | c67314e233fc7a0dd69180bd7131c028f67a2468 /libavcodec/x86/h264_intrapred.asm | |
parent | 8d660f7527050671b81531743f3e26c901ae27d4 (diff) | |
download | ffmpeg-a2dfe8d18d30b686c2f5401d7de8399d7b3fa4eb.tar.gz |
Port pred8x8_dc_mmxext (H.264 intra prediction) from x264 to FFmpeg. Original
authors: Holger Lubitz <holger lubitz org>, Jason Garrett-Glaser <darkshikari
gmail com> (approves LGPL relicensing for this code) and Loren Merritt <lorenm
at u dot washington dot edu> (approves LGPL relicensing for this code). Patch
by Daniel Kang <daniel dot d dot kang at gmail com>, as part of Google's GCI
2010.
Originally committed as revision 26135 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86/h264_intrapred.asm')
-rw-r--r-- | libavcodec/x86/h264_intrapred.asm | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 4f7ba3738a..12193defc7 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -866,6 +866,68 @@ cglobal pred8x8_top_dc_mmxext, 2,5 %endif ;----------------------------------------------------------------------------- +; void pred8x8_dc_mmxext(uint8_t *src, int stride) +;----------------------------------------------------------------------------- +%ifdef CONFIG_GPL +INIT_MMX +cglobal pred8x8_dc_mmxext, 2,5 + sub r0, r1 + pxor m7, m7 + movd m0, [r0+0] + movd m1, [r0+4] + psadbw m0, m7 ; s0 + mov r4, r0 + psadbw m1, m7 ; s1 + + movzx r2d, byte [r0+r1*1-1] + movzx r3d, byte [r0+r1*2-1] + lea r0, [r0+r1*2] + add r2d, r3d + movzx r3d, byte [r0+r1*1-1] + add r2d, r3d + movzx r3d, byte [r0+r1*2-1] + add r2d, r3d + lea r0, [r0+r1*2] + movd m2, r2d ; s2 + movzx r2d, byte [r0+r1*1-1] + movzx r3d, byte [r0+r1*2-1] + lea r0, [r0+r1*2] + add r2d, r3d + movzx r3d, byte [r0+r1*1-1] + add r2d, r3d + movzx r3d, byte [r0+r1*2-1] + add r2d, r3d + movd m3, r2d ; s3 + + punpcklwd m0, m1 + mov r0, r4 + punpcklwd m2, m3 + punpckldq m0, m2 ; s0, s1, s2, s3 + pshufw m3, m0, 11110110b ; s2, s1, s3, s3 + lea r2, [r0+r1*2] + pshufw m0, m0, 01110100b ; s0, s1, s3, s1 + paddw m0, m3 + lea r3, [r2+r1*2] + psrlw m0, 2 + pavgw m0, m7 ; s0+s2, s1, s3, s1+s3 + lea r4, [r3+r1*2] + packuswb m0, m0 + punpcklbw m0, m0 + movq m1, m0 + punpcklbw m0, m0 + punpckhbw m1, m1 + movq [r0+r1*1], m0 + movq [r0+r1*2], m0 + movq [r2+r1*1], m0 + movq [r2+r1*2], m0 + movq [r3+r1*1], m1 + movq [r3+r1*2], m1 + movq [r4+r1*1], m1 + movq [r4+r1*2], m1 + RET +%endif + +;----------------------------------------------------------------------------- ; void pred8x8_dc_rv40(uint8_t *src, int stride) ;----------------------------------------------------------------------------- |