diff options
author | Måns Rullgård <mans@mansr.com> | 2009-12-02 00:37:36 +0000 |
---|---|---|
committer | Måns Rullgård <mans@mansr.com> | 2009-12-02 00:37:36 +0000 |
commit | 1025d19dd7b53631c77a66c9057fbf1f417fc769 (patch) | |
tree | c00415d3fbe5bbdbb32f61faf7e0e801324272a5 | |
parent | 04e7f6d2d01cba4c7fb2ad84b13819fa3e4e1425 (diff) | |
download | ffmpeg-1025d19dd7b53631c77a66c9057fbf1f417fc769.tar.gz |
ARM: NEON 2xN chroma MC
Originally committed as revision 20696 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/arm/dsputil_init_neon.c | 4 | ||||
-rw-r--r-- | libavcodec/arm/h264dsp_neon.S | 70 |
2 files changed, 74 insertions, 0 deletions
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 177497e020..920b2e87c9 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -125,9 +125,11 @@ void ff_avg_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int); void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); +void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); +void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); @@ -272,9 +274,11 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) if (CONFIG_H264_DECODER) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon; c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon; + c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon; + c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon; c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon; c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon; diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S index 5c54fa3db1..08ff20702f 100644 --- a/libavcodec/arm/h264dsp_neon.S +++ b/libavcodec/arm/h264dsp_neon.S @@ -320,6 +320,74 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1 .endfunc .endm + .macro h264_chroma_mc2 type +function ff_\type\()_h264_chroma_mc2_neon, export=1 + push {r4-r6, lr} + ldr r4, [sp, #16] + ldr lr, [sp, #20] + pld [r1] + pld [r1, r2] + orrs r5, r4, lr + beq 2f + + mul r5, r4, lr + rsb r6, r5, lr, lsl #3 + rsb r12, r5, r4, lsl #3 + sub r4, r5, r4, lsl #3 + sub r4, r4, lr, lsl #3 + add r4, r4, #64 + vdup.8 d0, r4 + vdup.8 d2, r12 + vdup.8 d1, r6 + vdup.8 d3, r5 + vtrn.16 q0, q1 +1: + vld1.32 {d4[0]}, [r1], r2 + vld1.32 {d4[1]}, [r1], r2 + vrev64.32 d5, d4 + vld1.32 {d5[1]}, [r1] + vext.8 q3, q2, q2, #1 + vtrn.16 q2, q3 + vmull.u8 q8, d4, d0 + vmlal.u8 q8, d5, d1 +.ifc \type,avg + vld1.16 {d18[0]}, [r0,:16], r2 + vld1.16 {d18[1]}, [r0,:16] + sub r0, r0, r2 +.endif + vtrn.32 d16, d17 + vadd.i16 d16, d16, d17 + vrshrn.u16 d16, q8, #6 +.ifc \type,avg + vrhadd.u8 d16, d16, d18 +.endif + vst1.16 {d16[0]}, [r0,:16], r2 + vst1.16 {d16[1]}, [r0,:16], r2 + subs r3, r3, #2 + bgt 1b + pop {r4-r6, pc} +2: +.ifc \type,put + ldrh r5, [r1], r2 + strh r5, [r0], r2 + ldrh r6, [r1], r2 + strh r6, [r0], r2 +.else + vld1.16 {d16[0]}, [r1], r2 + vld1.16 {d16[1]}, [r1], r2 + vld1.16 {d18[0]}, [r0,:16], r2 + vld1.16 {d18[1]}, [r0,:16] + sub r0, r0, r2 + vrhadd.u8 d16, d16, d18 + vst1.16 {d16[0]}, [r0,:16], r2 + vst1.16 {d16[1]}, [r0,:16], r2 +.endif + subs r3, r3, #2 + bgt 2b + pop {r4-r6, pc} + .endfunc +.endm + .text .align @@ -327,6 +395,8 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1 h264_chroma_mc8 avg h264_chroma_mc4 put h264_chroma_mc4 avg + h264_chroma_mc2 put + h264_chroma_mc2 avg /* H.264 loop filter */ |