aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMåns Rullgård <mans@mansr.com>2009-12-02 00:37:36 +0000
committerMåns Rullgård <mans@mansr.com>2009-12-02 00:37:36 +0000
commit1025d19dd7b53631c77a66c9057fbf1f417fc769 (patch)
treec00415d3fbe5bbdbb32f61faf7e0e801324272a5
parent04e7f6d2d01cba4c7fb2ad84b13819fa3e4e1425 (diff)
downloadffmpeg-1025d19dd7b53631c77a66c9057fbf1f417fc769.tar.gz
ARM: NEON 2xN chroma MC
Originally committed as revision 20696 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/arm/dsputil_init_neon.c4
-rw-r--r--libavcodec/arm/h264dsp_neon.S70
2 files changed, 74 insertions, 0 deletions
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 177497e020..920b2e87c9 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -125,9 +125,11 @@ void ff_avg_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int);
void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
+void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
+void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
@@ -272,9 +274,11 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
if (CONFIG_H264_DECODER) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
+ c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
+ c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S
index 5c54fa3db1..08ff20702f 100644
--- a/libavcodec/arm/h264dsp_neon.S
+++ b/libavcodec/arm/h264dsp_neon.S
@@ -320,6 +320,74 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
.endfunc
.endm
+ .macro h264_chroma_mc2 type
+function ff_\type\()_h264_chroma_mc2_neon, export=1
+ push {r4-r6, lr}
+ ldr r4, [sp, #16]
+ ldr lr, [sp, #20]
+ pld [r1]
+ pld [r1, r2]
+ orrs r5, r4, lr
+ beq 2f
+
+ mul r5, r4, lr
+ rsb r6, r5, lr, lsl #3
+ rsb r12, r5, r4, lsl #3
+ sub r4, r5, r4, lsl #3
+ sub r4, r4, lr, lsl #3
+ add r4, r4, #64
+ vdup.8 d0, r4
+ vdup.8 d2, r12
+ vdup.8 d1, r6
+ vdup.8 d3, r5
+ vtrn.16 q0, q1
+1:
+ vld1.32 {d4[0]}, [r1], r2
+ vld1.32 {d4[1]}, [r1], r2
+ vrev64.32 d5, d4
+ vld1.32 {d5[1]}, [r1]
+ vext.8 q3, q2, q2, #1
+ vtrn.16 q2, q3
+ vmull.u8 q8, d4, d0
+ vmlal.u8 q8, d5, d1
+.ifc \type,avg
+ vld1.16 {d18[0]}, [r0,:16], r2
+ vld1.16 {d18[1]}, [r0,:16]
+ sub r0, r0, r2
+.endif
+ vtrn.32 d16, d17
+ vadd.i16 d16, d16, d17
+ vrshrn.u16 d16, q8, #6
+.ifc \type,avg
+ vrhadd.u8 d16, d16, d18
+.endif
+ vst1.16 {d16[0]}, [r0,:16], r2
+ vst1.16 {d16[1]}, [r0,:16], r2
+ subs r3, r3, #2
+ bgt 1b
+ pop {r4-r6, pc}
+2:
+.ifc \type,put
+ ldrh r5, [r1], r2
+ strh r5, [r0], r2
+ ldrh r6, [r1], r2
+ strh r6, [r0], r2
+.else
+ vld1.16 {d16[0]}, [r1], r2
+ vld1.16 {d16[1]}, [r1], r2
+ vld1.16 {d18[0]}, [r0,:16], r2
+ vld1.16 {d18[1]}, [r0,:16]
+ sub r0, r0, r2
+ vrhadd.u8 d16, d16, d18
+ vst1.16 {d16[0]}, [r0,:16], r2
+ vst1.16 {d16[1]}, [r0,:16], r2
+.endif
+ subs r3, r3, #2
+ bgt 2b
+ pop {r4-r6, pc}
+ .endfunc
+.endm
+
.text
.align
@@ -327,6 +395,8 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
h264_chroma_mc8 avg
h264_chroma_mc4 put
h264_chroma_mc4 avg
+ h264_chroma_mc2 put
+ h264_chroma_mc2 avg
/* H.264 loop filter */