diff options
author | Daniel Kang <daniel.d.kang@gmail.com> | 2010-12-29 18:33:10 +0000 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2010-12-29 18:33:10 +0000 |
commit | abab14eac052edbde798ecd58d98e0d91eabb698 (patch) | |
tree | 973a59cbd5a8d7b9dc391bc6b245752de71a3769 /libavcodec/x86/h264_intrapred.asm | |
parent | 2e93fd4b5ec89473fc16b61090c6f26ec8b8bf21 (diff) | |
download | ffmpeg-abab14eac052edbde798ecd58d98e0d91eabb698.tar.gz |
Port pred8x8l_dc_mmx/ssse3 (H.264 intra prediction) from x264 to FFmpeg.
Original authors: Holger Lubitz <holger lubitz org>, Jason Garrett-Glaser
<darkshikari gmail com> (approves LGPL relicensing for this code) and Loren
Merritt <lorenm at u dot washington dot edu> (approves LGPL relicensing for
this code). Patch by Daniel Kang <daniel dot d dot kang at gmail com>, as
part of Google's GCI 2010.
Originally committed as revision 26138 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86/h264_intrapred.asm')
-rw-r--r-- | libavcodec/x86/h264_intrapred.asm | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 5b8b4b4eec..bb7cf7386e 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -43,6 +43,7 @@ cextern pb_1 cextern pb_3 cextern pw_4 cextern pw_5 +cextern pw_8 cextern pw_16 cextern pw_17 cextern pw_32 @@ -1139,6 +1140,110 @@ PRED8x8L_TOP_DC ssse3 %endif ;----------------------------------------------------------------------------- +;void pred8x8l_dc(uint8_t *src, int has_topleft, int has_topright, int stride) +;----------------------------------------------------------------------------- +%ifdef CONFIG_GPL +%macro PRED8x8L_DC 1 +cglobal pred8x8l_dc_%1, 4,5 + sub r0, r3 + lea r4, [r0+r3*2] + movq mm0, [r0+r3*1-8] + punpckhbw mm0, [r0+r3*0-8] + movq mm1, [r4+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r4, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r4] + mov r0, r4 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + test r1, r1 + jnz .do_left +.fix_lt_1: + movq mm5, mm3 + pxor mm5, mm4 + psrlq mm5, 56 + psllq mm5, 48 + pxor mm1, mm5 + jmp .do_left +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2, r2 + jnz .body +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .body +.do_left: + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq mm4, mm0 + movq mm7, mm2 + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + psllq mm1, 56 + PALIGNR mm7, mm1, 7, mm3 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1, r1 + jz .fix_lt_2 + test r2, r2 + jz .fix_tr_1 +.body + lea r1, [r0+r3*2] + PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5 + pxor mm0, mm0 + pxor mm1, mm1 + lea r2, [r1+r3*2] + psadbw mm0, mm7 + psadbw mm1, mm6 + paddw mm0, [pw_8] + paddw mm0, mm1 + lea r4, [r2+r3*2] + psrlw mm0, 4 + pshufw mm0, mm0, 0 + packuswb mm0, mm0 + movq [r0+r3*1], mm0 + movq [r0+r3*2], mm0 + movq [r1+r3*1], mm0 + movq [r1+r3*2], mm0 + movq [r2+r3*1], mm0 + movq [r2+r3*2], mm0 + movq [r4+r3*1], mm0 + movq [r4+r3*2], mm0 + RET +%endmacro +INIT_MMX +%define PALIGNR PALIGNR_MMX +PRED8x8L_DC mmxext +%define PALIGNR PALIGNR_SSSE3 +PRED8x8L_DC ssse3 +%endif + +;----------------------------------------------------------------------------- ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride) ;----------------------------------------------------------------------------- |