diff options
author | Måns Rullgård <mans@mansr.com> | 2010-02-09 16:13:31 +0000 |
---|---|---|
committer | Måns Rullgård <mans@mansr.com> | 2010-02-09 16:13:31 +0000 |
commit | 74cc33c2354c24986dc5332a7aeb1a9516a1d0c5 (patch) | |
tree | 71e8c3c98d676ac4261801a8eaf5d1e3e085066b /libavcodec | |
parent | 39a760f6789fd0a6a5aad1c96a7b2c125e9f52d3 (diff) | |
download | ffmpeg-74cc33c2354c24986dc5332a7aeb1a9516a1d0c5.tar.gz |
ARMv6 optimised pix_abs16_y2
Originally committed as revision 21699 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/arm/dsputil_armv6.S | 58 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_init_armv6.c | 3 |
2 files changed, 61 insertions, 0 deletions
diff --git a/libavcodec/arm/dsputil_armv6.S b/libavcodec/arm/dsputil_armv6.S index 88591cb116..5f4f5e43c9 100644 --- a/libavcodec/arm/dsputil_armv6.S +++ b/libavcodec/arm/dsputil_armv6.S @@ -370,3 +370,61 @@ function ff_pix_abs16_x2_armv6, export=1 pop {r4-r11, pc} .endfunc + +.macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3 + ldr \n0, [r2] + eor \n1, \p0, \n0 + uhadd8 \p0, \p0, \n0 + and \n1, \n1, lr + ldr \n2, [r1] + uadd8 \p0, \p0, \n1 + ldr \n1, [r2, #4] + usada8 r0, \p0, \n2, r0 + pld [r1, r3] + eor \n3, \p1, \n1 + uhadd8 \p1, \p1, \n1 + and \n3, \n3, lr + ldr \p0, [r1, #4] + uadd8 \p1, \p1, \n3 + ldr \n2, [r2, #8] + usada8 r0, \p1, \p0, r0 + pld [r2, r3] + eor \p0, \p2, \n2 + uhadd8 \p2, \p2, \n2 + and \p0, \p0, lr + ldr \p1, [r1, #8] + uadd8 \p2, \p2, \p0 + ldr \n3, [r2, #12] + usada8 r0, \p2, \p1, r0 + eor \p1, \p3, \n3 + uhadd8 \p3, \p3, \n3 + and \p1, \p1, lr + ldr \p0, [r1, #12] + uadd8 \p3, \p3, \p1 + add r1, r1, r3 + usada8 r0, \p3, \p0, r0 + add r2, r2, r3 +.endm + +function ff_pix_abs16_y2_armv6, export=1 + pld [r1] + pld [r2] + ldr r12, [sp] + push {r4-r11, lr} + mov r0, #0 + mov lr, #1 + orr lr, lr, lr, lsl #8 + orr lr, lr, lr, lsl #16 + ldr r4, [r2] + ldr r5, [r2, #4] + ldr r6, [r2, #8] + ldr r7, [r2, #12] + add r2, r2, r3 +1: + usad_y2 r4, r5, r6, r7, r8, r9, r10, r11 + subs r12, r12, #2 + usad_y2 r8, r9, r10, r11, r4, r5, r6, r7 + bgt 1b + + pop {r4-r11, pc} +.endfunc diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c index 78367150d1..83c1ea8716 100644 --- a/libavcodec/arm/dsputil_init_armv6.c +++ b/libavcodec/arm/dsputil_init_armv6.c @@ -54,6 +54,8 @@ int ff_pix_abs16_armv6(void *s, uint8_t *blk1, uint8_t *blk2, int line_size, int h); int ff_pix_abs16_x2_armv6(void *s, uint8_t *blk1, uint8_t *blk2, int line_size, int h); +int ff_pix_abs16_y2_armv6(void *s, uint8_t *blk1, uint8_t *blk2, + int line_size, int h); void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) { @@ -90,6 +92,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) c->pix_abs[0][0] = ff_pix_abs16_armv6; c->pix_abs[0][1] = ff_pix_abs16_x2_armv6; + c->pix_abs[0][2] = ff_pix_abs16_y2_armv6; c->sad[0] = ff_pix_abs16_armv6; } |