diff options
author | Måns Rullgård <mans@mansr.com> | 2008-10-27 00:25:19 +0000 |
---|---|---|
committer | Måns Rullgård <mans@mansr.com> | 2008-10-27 00:25:19 +0000 |
commit | d2d398590d6a3fef4d3ad13849a0dc733b63ad6b (patch) | |
tree | 17e0f90256a487f8ff1ddbe183323e6080969565 /libavcodec | |
parent | 83ad74e708c68c73974bfe07ec8a6c7d163a56bf (diff) | |
download | ffmpeg-d2d398590d6a3fef4d3ad13849a0dc733b63ad6b.tar.gz |
ARM: move add_pixels_clamped_ARM() to dsputil_arm_s.S
Originally committed as revision 15728 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/armv4l/dsputil_arm.c | 96 | ||||
-rw-r--r-- | libavcodec/armv4l/dsputil_arm_s.S | 89 |
2 files changed, 92 insertions, 93 deletions
diff --git a/libavcodec/armv4l/dsputil_arm.c b/libavcodec/armv4l/dsputil_arm.c index 98f49cfdb5..64153ad6cd 100644 --- a/libavcodec/armv4l/dsputil_arm.c +++ b/libavcodec/armv4l/dsputil_arm.c @@ -66,98 +66,8 @@ CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8) CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8) CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8) -static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size) -{ - __asm__ volatile ( - "mov r10, #8 \n\t" - - "1: \n\t" - - /* load dest */ - "ldr r4, [%1] \n\t" - /* block[0] and block[1]*/ - "ldrsh r5, [%0] \n\t" - "ldrsh r7, [%0, #2] \n\t" - "and r6, r4, #0xFF \n\t" - "and r8, r4, #0xFF00 \n\t" - "add r6, r5, r6 \n\t" - "add r8, r7, r8, lsr #8 \n\t" - "mvn r5, r5 \n\t" - "mvn r7, r7 \n\t" - "tst r6, #0x100 \n\t" - "movne r6, r5, lsr #24 \n\t" - "tst r8, #0x100 \n\t" - "movne r8, r7, lsr #24 \n\t" - "mov r9, r6 \n\t" - "ldrsh r5, [%0, #4] \n\t" /* moved form [A] */ - "orr r9, r9, r8, lsl #8 \n\t" - /* block[2] and block[3] */ - /* [A] */ - "ldrsh r7, [%0, #6] \n\t" - "and r6, r4, #0xFF0000 \n\t" - "and r8, r4, #0xFF000000 \n\t" - "add r6, r5, r6, lsr #16 \n\t" - "add r8, r7, r8, lsr #24 \n\t" - "mvn r5, r5 \n\t" - "mvn r7, r7 \n\t" - "tst r6, #0x100 \n\t" - "movne r6, r5, lsr #24 \n\t" - "tst r8, #0x100 \n\t" - "movne r8, r7, lsr #24 \n\t" - "orr r9, r9, r6, lsl #16 \n\t" - "ldr r4, [%1, #4] \n\t" /* moved form [B] */ - "orr r9, r9, r8, lsl #24 \n\t" - /* store dest */ - "ldrsh r5, [%0, #8] \n\t" /* moved form [C] */ - "str r9, [%1] \n\t" - - /* load dest */ - /* [B] */ - /* block[4] and block[5] */ - /* [C] */ - "ldrsh r7, [%0, #10] \n\t" - "and r6, r4, #0xFF \n\t" - "and r8, r4, #0xFF00 \n\t" - "add r6, r5, r6 \n\t" - "add r8, r7, r8, lsr #8 \n\t" - "mvn r5, r5 \n\t" - "mvn r7, r7 \n\t" - "tst r6, #0x100 \n\t" - "movne r6, r5, lsr #24 \n\t" - "tst r8, #0x100 \n\t" - "movne r8, r7, lsr #24 \n\t" - "mov r9, r6 \n\t" - "ldrsh r5, [%0, #12] \n\t" /* moved from [D] */ - "orr r9, r9, r8, lsl #8 \n\t" - /* block[6] and block[7] */ - /* [D] */ - "ldrsh r7, [%0, #14] \n\t" - "and r6, r4, #0xFF0000 \n\t" - "and r8, r4, #0xFF000000 \n\t" - "add r6, r5, r6, lsr #16 \n\t" - "add r8, r7, r8, lsr #24 \n\t" - "mvn r5, r5 \n\t" - "mvn r7, r7 \n\t" - "tst r6, #0x100 \n\t" - "movne r6, r5, lsr #24 \n\t" - "tst r8, #0x100 \n\t" - "movne r8, r7, lsr #24 \n\t" - "orr r9, r9, r6, lsl #16 \n\t" - "add %0, %0, #16 \n\t" /* moved from [E] */ - "orr r9, r9, r8, lsl #24 \n\t" - "subs r10, r10, #1 \n\t" /* moved from [F] */ - /* store dest */ - "str r9, [%1, #4] \n\t" - - /* [E] */ - /* [F] */ - "add %1, %1, %2 \n\t" - "bne 1b \n\t" - : "+r"(block), - "+r"(dest) - : "r"(line_size) - : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" ); -} +extern void ff_add_pixels_clamped_ARM(short *block, unsigned char *dest, + int line_size); /* XXX: those functions should be suppressed ASAP when all IDCTs are converted */ @@ -200,7 +110,7 @@ static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block) #ifdef HAVE_IWMMXT add_pixels_clamped_iwmmxt(block, dest, line_size); #else - add_pixels_clamped_ARM(block, dest, line_size); + ff_add_pixels_clamped_ARM(block, dest, line_size); #endif } #endif diff --git a/libavcodec/armv4l/dsputil_arm_s.S b/libavcodec/armv4l/dsputil_arm_s.S index 011925106e..ba06f3740f 100644 --- a/libavcodec/armv4l/dsputil_arm_s.S +++ b/libavcodec/armv4l/dsputil_arm_s.S @@ -708,3 +708,92 @@ function put_no_rnd_pixels8_xy2_arm, export=1 .word 0xFCFCFCFC >> 2 .word 0x0F0F0F0F .endfunc + +@ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride) +function ff_add_pixels_clamped_ARM, export=1 + push {r4-r10} + mov r10, #8 +1: + ldr r4, [r1] /* load dest */ + /* block[0] and block[1]*/ + ldrsh r5, [r0] + ldrsh r7, [r0, #2] + and r6, r4, #0xFF + and r8, r4, #0xFF00 + add r6, r5, r6 + add r8, r7, r8, lsr #8 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + movne r6, r5, lsr #24 + tst r8, #0x100 + movne r8, r7, lsr #24 + mov r9, r6 + ldrsh r5, [r0, #4] /* moved form [A] */ + orr r9, r9, r8, lsl #8 + /* block[2] and block[3] */ + /* [A] */ + ldrsh r7, [r0, #6] + and r6, r4, #0xFF0000 + and r8, r4, #0xFF000000 + add r6, r5, r6, lsr #16 + add r8, r7, r8, lsr #24 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + movne r6, r5, lsr #24 + tst r8, #0x100 + movne r8, r7, lsr #24 + orr r9, r9, r6, lsl #16 + ldr r4, [r1, #4] /* moved form [B] */ + orr r9, r9, r8, lsl #24 + /* store dest */ + ldrsh r5, [r0, #8] /* moved form [C] */ + str r9, [r1] + + /* load dest */ + /* [B] */ + /* block[4] and block[5] */ + /* [C] */ + ldrsh r7, [r0, #10] + and r6, r4, #0xFF + and r8, r4, #0xFF00 + add r6, r5, r6 + add r8, r7, r8, lsr #8 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + movne r6, r5, lsr #24 + tst r8, #0x100 + movne r8, r7, lsr #24 + mov r9, r6 + ldrsh r5, [r0, #12] /* moved from [D] */ + orr r9, r9, r8, lsl #8 + /* block[6] and block[7] */ + /* [D] */ + ldrsh r7, [r0, #14] + and r6, r4, #0xFF0000 + and r8, r4, #0xFF000000 + add r6, r5, r6, lsr #16 + add r8, r7, r8, lsr #24 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + movne r6, r5, lsr #24 + tst r8, #0x100 + movne r8, r7, lsr #24 + orr r9, r9, r6, lsl #16 + add r0, r0, #16 /* moved from [E] */ + orr r9, r9, r8, lsl #24 + subs r10, r10, #1 /* moved from [F] */ + /* store dest */ + str r9, [r1, #4] + + /* [E] */ + /* [F] */ + add r1, r1, r2 + bne 1b + + pop {r4-r10} + bx lr + .endfunc |