diff options
author | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-06-28 23:35:17 +0000 |
---|---|---|
committer | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-06-28 23:35:17 +0000 |
commit | 270a85d259e44a560d7da70679906fa977f96e97 (patch) | |
tree | 64892d94bef903844d84bd33f0d29fc0059f2949 /libavcodec/x86/h264_intrapred.asm | |
parent | a912da761dcf2e9e5e8d134437dd121184f434c3 (diff) | |
download | ffmpeg-270a85d259e44a560d7da70679906fa977f96e97.tar.gz |
Fix some intra pred MMX functions that used MMXEXT instructions
Also add predict_4x4_dc MMXEXT function for vp8/h264.
Originally committed as revision 23873 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86/h264_intrapred.asm')
-rw-r--r-- | libavcodec/x86/h264_intrapred.asm | 48 |
1 files changed, 27 insertions, 21 deletions
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 4f0a43fc2b..0210aa0e86 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -138,12 +138,7 @@ cglobal pred16x16_dc_%1, 2,7 add r5d, r6d lea r2d, [r2+r5+16] shr r2d, 5 -%ifidn %1, mmx - movd m0, r2d - punpcklbw m0, m0 - punpcklwd m0, m0 - punpckldq m0, m0 -%elifidn %1, mmxext +%ifidn %1, mmxext movd m0, r2d punpcklbw m0, m0 pshufw m0, m0, 0 @@ -185,7 +180,6 @@ cglobal pred16x16_dc_%1, 2,7 %endmacro INIT_MMX -PRED16x16_DC mmx, movq PRED16x16_DC mmxext, movq INIT_XMM PRED16x16_DC sse, movaps @@ -337,8 +331,7 @@ PRED8x8_H ssse3 ; void pred8x8_dc_rv40(uint8_t *src, int stride) ;----------------------------------------------------------------------------- -%macro PRED8x8_DC 1 -cglobal pred8x8_dc_rv40_%1, 2,7 +cglobal pred8x8_dc_rv40_mmxext, 2,7 mov r4, r0 sub r0, r1 pxor mm0, mm0 @@ -358,16 +351,9 @@ cglobal pred8x8_dc_rv40_%1, 2,7 add r5d, r6d lea r2d, [r2+r5+8] shr r2d, 4 -%ifidn %1, mmx - movd mm0, r2d - punpcklbw mm0, mm0 - punpcklwd mm0, mm0 - punpckldq mm0, mm0 -%else movd mm0, r2d punpcklbw mm0, mm0 pshufw mm0, mm0, 0 -%endif mov r3d, 4 .loop: movq [r4+r1*0], mm0 @@ -376,11 +362,6 @@ cglobal pred8x8_dc_rv40_%1, 2,7 dec r3d jg .loop REP_RET -%endmacro - - -PRED8x8_DC mmx -PRED8x8_DC mmxext ;----------------------------------------------------------------------------- ; void pred8x8_tm_vp8(uint8_t *src, int stride) @@ -484,3 +465,28 @@ cglobal pred8x8_tm_vp8_ssse3, 2,3,6 dec r2d jg .loop REP_RET + +cglobal pred4x4_dc_mmxext, 3,5 + pxor mm7, mm7 + mov r4, r0 + sub r0, r2 + movd mm0, [r0] + psadbw mm0, mm7 + movzx r1d, byte [r0+r2*1-1] + movd r3d, mm0 + add r3d, r1d + movzx r1d, byte [r0+r2*2-1] + lea r0, [r0+r2*2] + add r3d, r1d + movzx r1d, byte [r0+r2*1-1] + add r3d, r1d + movzx r1d, byte [r0+r2*2-1] + add r3d, r1d + add r3d, 4 + shr r3d, 3 + imul r3d, 0x01010101 + mov [r4+r2*0], r3d + mov [r0+r2*0], r3d + mov [r0+r2*1], r3d + mov [r0+r2*2], r3d + RET |