diff options
author | Daniel Kang <daniel.d.kang@gmail.com> | 2010-12-29 18:40:53 +0000 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2010-12-29 18:40:53 +0000 |
commit | 04207ef35395a4b3f23f906ad702467c37a73eb1 (patch) | |
tree | a6b3f2b41ea69113f5f82b138cda5f6a67ac7154 | |
parent | abab14eac052edbde798ecd58d98e0d91eabb698 (diff) | |
download | ffmpeg-04207ef35395a4b3f23f906ad702467c37a73eb1.tar.gz |
Port pred8x8l_horizontal_mmxext/ssse3 (H.264 intra prediction) from x264 to
FFmpeg. Original authors: Holger Lubitz <holger lubitz org>, Jason Garrett-
Glaser <darkshikari gmail com> (approves LGPL relicensing for this code) and
Loren Merritt <lorenm at u dot washington dot edu> (approves LGPL relicensing
for this code). Patch by Daniel Kang <daniel dot d dot kang at gmail com>, as
part of Google's GCI 2010.
Originally committed as revision 26139 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/x86/h264_intrapred.asm | 79 | ||||
-rw-r--r-- | libavcodec/x86/h264_intrapred_init.c | 4 |
2 files changed, 83 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index bb7cf7386e..3ab368e6fc 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -1244,6 +1244,85 @@ PRED8x8L_DC ssse3 %endif ;----------------------------------------------------------------------------- +; void pred8x8l_horizontal(uint8_t *src, int has_topleft, int has_topright, int stride) +;----------------------------------------------------------------------------- +%ifdef CONFIG_GPL +%macro PRED8x8L_HORIZONTAL 1 +cglobal pred8x8l_horizontal_%1, 4,4 + sub r0, r3 + lea r2, [r0+r3*2] + movq mm0, [r0+r3*1-8] + punpckhbw mm0, [r0+r3*0-8] + movq mm1, [r2+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r2, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r2] + mov r0, r2 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + test r1, r1 ; top_left + jnz .do_left +.fix_lt_1: + movq mm5, mm3 + pxor mm5, mm4 + psrlq mm5, 56 + psllq mm5, 48 + pxor mm1, mm5 +.do_left: + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq mm4, mm0 + movq mm7, mm2 + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + psllq mm1, 56 + PALIGNR mm7, mm1, 7, mm3 + movq mm3, mm7 + lea r1, [r0+r3*2] + movq mm7, mm3 + punpckhbw mm3, mm3 + punpcklbw mm7, mm7 + pshufw mm0, mm3, 0xff + pshufw mm1, mm3, 0xaa + lea r2, [r1+r3*2] + pshufw mm2, mm3, 0x55 + pshufw mm3, mm3, 0x00 + pshufw mm4, mm7, 0xff + pshufw mm5, mm7, 0xaa + pshufw mm6, mm7, 0x55 + pshufw mm7, mm7, 0x00 + movq [r0+r3*1], mm0 + movq [r0+r3*2], mm1 + movq [r1+r3*1], mm2 + movq [r1+r3*2], mm3 + movq [r2+r3*1], mm4 + movq [r2+r3*2], mm5 + lea r0, [r2+r3*2] + movq [r0+r3*1], mm6 + movq [r0+r3*2], mm7 + RET +%endmacro + +INIT_MMX +%define PALIGNR PALIGNR_MMX +PRED8x8L_HORIZONTAL mmxext +%define PALIGNR PALIGNR_SSSE3 +PRED8x8L_HORIZONTAL ssse3 +%endif + +;----------------------------------------------------------------------------- ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride) ;----------------------------------------------------------------------------- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index 7f4834a86c..2b5783adb3 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -63,6 +63,8 @@ void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topri void ff_pred8x8l_top_dc_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_dc_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); +void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); +void ff_pred8x8l_horizontal_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride); @@ -103,6 +105,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) #if CONFIG_GPL h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext; h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_mmxext; + h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_mmxext; #endif h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; #if CONFIG_GPL @@ -159,6 +162,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) #if CONFIG_GPL h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3; h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_ssse3; + h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_ssse3; #endif if (codec_id == CODEC_ID_VP8) { h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3; |