diff options
author | Diego Biurrun <diego@biurrun.de> | 2005-12-22 01:10:11 +0000 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2005-12-22 01:10:11 +0000 |
commit | bb270c0896b39e1ae9277355e3c120ed3feb64a3 (patch) | |
tree | fc2fc2b1216d19acb3879abb6ea5a3b400f43fe4 /libavcodec/armv4l/jrevdct_arm.S | |
parent | 50827fcf44f34521df4708cdb633809b56fb9df3 (diff) | |
download | ffmpeg-bb270c0896b39e1ae9277355e3c120ed3feb64a3.tar.gz |
COSMETICS: tabs --> spaces, some prettyprinting
Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/armv4l/jrevdct_arm.S')
-rw-r--r-- | libavcodec/armv4l/jrevdct_arm.S | 632 |
1 files changed, 316 insertions, 316 deletions
diff --git a/libavcodec/armv4l/jrevdct_arm.S b/libavcodec/armv4l/jrevdct_arm.S index 770091c736..294ea47509 100644 --- a/libavcodec/armv4l/jrevdct_arm.S +++ b/libavcodec/armv4l/jrevdct_arm.S @@ -1,6 +1,6 @@ /* C-like prototype : - void j_rev_dct_ARM(DCTBLOCK data) + void j_rev_dct_ARM(DCTBLOCK data) With DCTBLOCK being a pointer to an array of 64 'signed shorts' @@ -51,336 +51,336 @@ #define FIX_M_1_961570560_ID 40 #define FIX_M_2_562915447_ID 44 #define FIX_0xFFFF_ID 48 - .text - .align + .text + .align - .global j_rev_dct_ARM + .global j_rev_dct_ARM j_rev_dct_ARM: - stmdb sp!, { r4 - r12, lr } @ all callee saved regs + stmdb sp!, { r4 - r12, lr } @ all callee saved regs - sub sp, sp, #4 @ reserve some space on the stack - str r0, [ sp ] @ save the DCT pointer to the stack + sub sp, sp, #4 @ reserve some space on the stack + str r0, [ sp ] @ save the DCT pointer to the stack - mov lr, r0 @ lr = pointer to the current row - mov r12, #8 @ r12 = row-counter - add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array + mov lr, r0 @ lr = pointer to the current row + mov r12, #8 @ r12 = row-counter + add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array row_loop: - ldrsh r0, [lr, # 0] @ r0 = 'd0' - ldrsh r1, [lr, # 8] @ r1 = 'd1' - - @ Optimization for row that have all items except the first set to 0 - @ (this works as the DCTELEMS are always 4-byte aligned) - ldr r5, [lr, # 0] - ldr r2, [lr, # 4] - ldr r3, [lr, # 8] - ldr r4, [lr, #12] - orr r3, r3, r4 - orr r3, r3, r2 - orrs r5, r3, r5 - beq end_of_row_loop @ nothing to be done as ALL of them are '0' - orrs r2, r3, r1 - beq empty_row - - ldrsh r2, [lr, # 2] @ r2 = 'd2' - ldrsh r4, [lr, # 4] @ r4 = 'd4' - ldrsh r6, [lr, # 6] @ r6 = 'd6' - - ldr r3, [r11, #FIX_0_541196100_ID] - add r7, r2, r6 - ldr r5, [r11, #FIX_M_1_847759065_ID] - mul r7, r3, r7 @ r7 = z1 - ldr r3, [r11, #FIX_0_765366865_ID] - mla r6, r5, r6, r7 @ r6 = tmp2 - add r5, r0, r4 @ r5 = tmp0 - mla r2, r3, r2, r7 @ r2 = tmp3 - sub r3, r0, r4 @ r3 = tmp1 - - add r0, r2, r5, lsl #13 @ r0 = tmp10 - rsb r2, r2, r5, lsl #13 @ r2 = tmp13 - add r4, r6, r3, lsl #13 @ r4 = tmp11 - rsb r3, r6, r3, lsl #13 @ r3 = tmp12 - - stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 - - ldrsh r3, [lr, #10] @ r3 = 'd3' - ldrsh r5, [lr, #12] @ r5 = 'd5' - ldrsh r7, [lr, #14] @ r7 = 'd7' - - add r0, r3, r5 @ r0 = 'z2' - add r2, r1, r7 @ r2 = 'z1' - add r4, r3, r7 @ r4 = 'z3' - add r6, r1, r5 @ r6 = 'z4' - ldr r9, [r11, #FIX_1_175875602_ID] - add r8, r4, r6 @ r8 = z3 + z4 - ldr r10, [r11, #FIX_M_0_899976223_ID] - mul r8, r9, r8 @ r8 = 'z5' - ldr r9, [r11, #FIX_M_2_562915447_ID] - mul r2, r10, r2 @ r2 = 'z1' - ldr r10, [r11, #FIX_M_1_961570560_ID] - mul r0, r9, r0 @ r0 = 'z2' - ldr r9, [r11, #FIX_M_0_390180644_ID] - mla r4, r10, r4, r8 @ r4 = 'z3' - ldr r10, [r11, #FIX_0_298631336_ID] - mla r6, r9, r6, r8 @ r6 = 'z4' - ldr r9, [r11, #FIX_2_053119869_ID] - mla r7, r10, r7, r2 @ r7 = tmp0 + z1 - ldr r10, [r11, #FIX_3_072711026_ID] - mla r5, r9, r5, r0 @ r5 = tmp1 + z2 - ldr r9, [r11, #FIX_1_501321110_ID] - mla r3, r10, r3, r0 @ r3 = tmp2 + z2 - add r7, r7, r4 @ r7 = tmp0 - mla r1, r9, r1, r2 @ r1 = tmp3 + z1 - add r5, r5, r6 @ r5 = tmp1 - add r3, r3, r4 @ r3 = tmp2 - add r1, r1, r6 @ r1 = tmp3 - - ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 - @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 - - @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) - add r8, r0, r1 - add r8, r8, #(1<<10) - mov r8, r8, asr #11 - strh r8, [lr, # 0] - - @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) - sub r8, r0, r1 - add r8, r8, #(1<<10) - mov r8, r8, asr #11 - strh r8, [lr, #14] - - @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) - add r8, r6, r3 - add r8, r8, #(1<<10) - mov r8, r8, asr #11 - strh r8, [lr, # 2] - - @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) - sub r8, r6, r3 - add r8, r8, #(1<<10) - mov r8, r8, asr #11 - strh r8, [lr, #12] - - @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) - add r8, r4, r5 - add r8, r8, #(1<<10) - mov r8, r8, asr #11 - strh r8, [lr, # 4] - - @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) - sub r8, r4, r5 - add r8, r8, #(1<<10) - mov r8, r8, asr #11 - strh r8, [lr, #10] - - @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) - add r8, r2, r7 - add r8, r8, #(1<<10) - mov r8, r8, asr #11 - strh r8, [lr, # 6] - - @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) - sub r8, r2, r7 - add r8, r8, #(1<<10) - mov r8, r8, asr #11 - strh r8, [lr, # 8] - - @ End of row loop - add lr, lr, #16 - subs r12, r12, #1 - bne row_loop - beq start_column_loop + ldrsh r0, [lr, # 0] @ r0 = 'd0' + ldrsh r1, [lr, # 8] @ r1 = 'd1' + + @ Optimization for row that have all items except the first set to 0 + @ (this works as the DCTELEMS are always 4-byte aligned) + ldr r5, [lr, # 0] + ldr r2, [lr, # 4] + ldr r3, [lr, # 8] + ldr r4, [lr, #12] + orr r3, r3, r4 + orr r3, r3, r2 + orrs r5, r3, r5 + beq end_of_row_loop @ nothing to be done as ALL of them are '0' + orrs r2, r3, r1 + beq empty_row + + ldrsh r2, [lr, # 2] @ r2 = 'd2' + ldrsh r4, [lr, # 4] @ r4 = 'd4' + ldrsh r6, [lr, # 6] @ r6 = 'd6' + + ldr r3, [r11, #FIX_0_541196100_ID] + add r7, r2, r6 + ldr r5, [r11, #FIX_M_1_847759065_ID] + mul r7, r3, r7 @ r7 = z1 + ldr r3, [r11, #FIX_0_765366865_ID] + mla r6, r5, r6, r7 @ r6 = tmp2 + add r5, r0, r4 @ r5 = tmp0 + mla r2, r3, r2, r7 @ r2 = tmp3 + sub r3, r0, r4 @ r3 = tmp1 + + add r0, r2, r5, lsl #13 @ r0 = tmp10 + rsb r2, r2, r5, lsl #13 @ r2 = tmp13 + add r4, r6, r3, lsl #13 @ r4 = tmp11 + rsb r3, r6, r3, lsl #13 @ r3 = tmp12 + + stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 + + ldrsh r3, [lr, #10] @ r3 = 'd3' + ldrsh r5, [lr, #12] @ r5 = 'd5' + ldrsh r7, [lr, #14] @ r7 = 'd7' + + add r0, r3, r5 @ r0 = 'z2' + add r2, r1, r7 @ r2 = 'z1' + add r4, r3, r7 @ r4 = 'z3' + add r6, r1, r5 @ r6 = 'z4' + ldr r9, [r11, #FIX_1_175875602_ID] + add r8, r4, r6 @ r8 = z3 + z4 + ldr r10, [r11, #FIX_M_0_899976223_ID] + mul r8, r9, r8 @ r8 = 'z5' + ldr r9, [r11, #FIX_M_2_562915447_ID] + mul r2, r10, r2 @ r2 = 'z1' + ldr r10, [r11, #FIX_M_1_961570560_ID] + mul r0, r9, r0 @ r0 = 'z2' + ldr r9, [r11, #FIX_M_0_390180644_ID] + mla r4, r10, r4, r8 @ r4 = 'z3' + ldr r10, [r11, #FIX_0_298631336_ID] + mla r6, r9, r6, r8 @ r6 = 'z4' + ldr r9, [r11, #FIX_2_053119869_ID] + mla r7, r10, r7, r2 @ r7 = tmp0 + z1 + ldr r10, [r11, #FIX_3_072711026_ID] + mla r5, r9, r5, r0 @ r5 = tmp1 + z2 + ldr r9, [r11, #FIX_1_501321110_ID] + mla r3, r10, r3, r0 @ r3 = tmp2 + z2 + add r7, r7, r4 @ r7 = tmp0 + mla r1, r9, r1, r2 @ r1 = tmp3 + z1 + add r5, r5, r6 @ r5 = tmp1 + add r3, r3, r4 @ r3 = tmp2 + add r1, r1, r6 @ r1 = tmp3 + + ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 + @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 + + @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) + add r8, r0, r1 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, # 0] + + @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) + sub r8, r0, r1 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, #14] + + @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) + add r8, r6, r3 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, # 2] + + @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) + sub r8, r6, r3 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, #12] + + @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) + add r8, r4, r5 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, # 4] + + @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) + sub r8, r4, r5 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, #10] + + @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) + add r8, r2, r7 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, # 6] + + @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) + sub r8, r2, r7 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, # 8] + + @ End of row loop + add lr, lr, #16 + subs r12, r12, #1 + bne row_loop + beq start_column_loop empty_row: - ldr r1, [r11, #FIX_0xFFFF_ID] - mov r0, r0, lsl #2 - and r0, r0, r1 - add r0, r0, r0, lsl #16 - str r0, [lr, # 0] - str r0, [lr, # 4] - str r0, [lr, # 8] - str r0, [lr, #12] + ldr r1, [r11, #FIX_0xFFFF_ID] + mov r0, r0, lsl #2 + and r0, r0, r1 + add r0, r0, r0, lsl #16 + str r0, [lr, # 0] + str r0, [lr, # 4] + str r0, [lr, # 8] + str r0, [lr, #12] end_of_row_loop: - @ End of loop - add lr, lr, #16 - subs r12, r12, #1 - bne row_loop + @ End of loop + add lr, lr, #16 + subs r12, r12, #1 + bne row_loop start_column_loop: - @ Start of column loop - ldr lr, [ sp ] - mov r12, #8 + @ Start of column loop + ldr lr, [ sp ] + mov r12, #8 column_loop: - ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' - ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' - ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' - ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' - - ldr r3, [r11, #FIX_0_541196100_ID] - add r1, r2, r6 - ldr r5, [r11, #FIX_M_1_847759065_ID] - mul r1, r3, r1 @ r1 = z1 - ldr r3, [r11, #FIX_0_765366865_ID] - mla r6, r5, r6, r1 @ r6 = tmp2 - add r5, r0, r4 @ r5 = tmp0 - mla r2, r3, r2, r1 @ r2 = tmp3 - sub r3, r0, r4 @ r3 = tmp1 - - add r0, r2, r5, lsl #13 @ r0 = tmp10 - rsb r2, r2, r5, lsl #13 @ r2 = tmp13 - add r4, r6, r3, lsl #13 @ r4 = tmp11 - rsb r6, r6, r3, lsl #13 @ r6 = tmp12 - - ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' - ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' - ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' - ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' - - @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) - orr r9, r1, r3 - orr r10, r5, r7 - orrs r10, r9, r10 - beq empty_odd_column - - stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 - - add r0, r3, r5 @ r0 = 'z2' - add r2, r1, r7 @ r2 = 'z1' - add r4, r3, r7 @ r4 = 'z3' - add r6, r1, r5 @ r6 = 'z4' - ldr r9, [r11, #FIX_1_175875602_ID] - add r8, r4, r6 - ldr r10, [r11, #FIX_M_0_899976223_ID] - mul r8, r9, r8 @ r8 = 'z5' - ldr r9, [r11, #FIX_M_2_562915447_ID] - mul r2, r10, r2 @ r2 = 'z1' - ldr r10, [r11, #FIX_M_1_961570560_ID] - mul r0, r9, r0 @ r0 = 'z2' - ldr r9, [r11, #FIX_M_0_390180644_ID] - mla r4, r10, r4, r8 @ r4 = 'z3' - ldr r10, [r11, #FIX_0_298631336_ID] - mla r6, r9, r6, r8 @ r6 = 'z4' - ldr r9, [r11, #FIX_2_053119869_ID] - mla r7, r10, r7, r2 @ r7 = tmp0 + z1 - ldr r10, [r11, #FIX_3_072711026_ID] - mla r5, r9, r5, r0 @ r5 = tmp1 + z2 - ldr r9, [r11, #FIX_1_501321110_ID] - mla r3, r10, r3, r0 @ r3 = tmp2 + z2 - add r7, r7, r4 @ r7 = tmp0 - mla r1, r9, r1, r2 @ r1 = tmp3 + z1 - add r5, r5, r6 @ r5 = tmp1 - add r3, r3, r4 @ r3 = tmp2 - add r1, r1, r6 @ r1 = tmp3 - - ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 - @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 - - @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) - add r8, r0, r1 - add r8, r8, #(1<<17) - mov r8, r8, asr #18 - strh r8, [lr, #( 0*8)] - - @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) - sub r8, r0, r1 - add r8, r8, #(1<<17) - mov r8, r8, asr #18 - strh r8, [lr, #(14*8)] - - @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) - add r8, r4, r3 - add r8, r8, #(1<<17) - mov r8, r8, asr #18 - strh r8, [lr, #( 2*8)] - - @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) - sub r8, r4, r3 - add r8, r8, #(1<<17) - mov r8, r8, asr #18 - strh r8, [lr, #(12*8)] - - @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) - add r8, r6, r5 - add r8, r8, #(1<<17) - mov r8, r8, asr #18 - strh r8, [lr, #( 4*8)] - - @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) - sub r8, r6, r5 - add r8, r8, #(1<<17) - mov r8, r8, asr #18 - strh r8, [lr, #(10*8)] - - @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) - add r8, r2, r7 - add r8, r8, #(1<<17) - mov r8, r8, asr #18 - strh r8, [lr, #( 6*8)] - - @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) - sub r8, r2, r7 - add r8, r8, #(1<<17) - mov r8, r8, asr #18 - strh r8, [lr, #( 8*8)] - - @ End of row loop - add lr, lr, #2 - subs r12, r12, #1 - bne column_loop - beq the_end + ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' + ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' + ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' + ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' + + ldr r3, [r11, #FIX_0_541196100_ID] + add r1, r2, r6 + ldr r5, [r11, #FIX_M_1_847759065_ID] + mul r1, r3, r1 @ r1 = z1 + ldr r3, [r11, #FIX_0_765366865_ID] + mla r6, r5, r6, r1 @ r6 = tmp2 + add r5, r0, r4 @ r5 = tmp0 + mla r2, r3, r2, r1 @ r2 = tmp3 + sub r3, r0, r4 @ r3 = tmp1 + + add r0, r2, r5, lsl #13 @ r0 = tmp10 + rsb r2, r2, r5, lsl #13 @ r2 = tmp13 + add r4, r6, r3, lsl #13 @ r4 = tmp11 + rsb r6, r6, r3, lsl #13 @ r6 = tmp12 + + ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' + ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' + ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' + ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' + + @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) + orr r9, r1, r3 + orr r10, r5, r7 + orrs r10, r9, r10 + beq empty_odd_column + + stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 + + add r0, r3, r5 @ r0 = 'z2' + add r2, r1, r7 @ r2 = 'z1' + add r4, r3, r7 @ r4 = 'z3' + add r6, r1, r5 @ r6 = 'z4' + ldr r9, [r11, #FIX_1_175875602_ID] + add r8, r4, r6 + ldr r10, [r11, #FIX_M_0_899976223_ID] + mul r8, r9, r8 @ r8 = 'z5' + ldr r9, [r11, #FIX_M_2_562915447_ID] + mul r2, r10, r2 @ r2 = 'z1' + ldr r10, [r11, #FIX_M_1_961570560_ID] + mul r0, r9, r0 @ r0 = 'z2' + ldr r9, [r11, #FIX_M_0_390180644_ID] + mla r4, r10, r4, r8 @ r4 = 'z3' + ldr r10, [r11, #FIX_0_298631336_ID] + mla r6, r9, r6, r8 @ r6 = 'z4' + ldr r9, [r11, #FIX_2_053119869_ID] + mla r7, r10, r7, r2 @ r7 = tmp0 + z1 + ldr r10, [r11, #FIX_3_072711026_ID] + mla r5, r9, r5, r0 @ r5 = tmp1 + z2 + ldr r9, [r11, #FIX_1_501321110_ID] + mla r3, r10, r3, r0 @ r3 = tmp2 + z2 + add r7, r7, r4 @ r7 = tmp0 + mla r1, r9, r1, r2 @ r1 = tmp3 + z1 + add r5, r5, r6 @ r5 = tmp1 + add r3, r3, r4 @ r3 = tmp2 + add r1, r1, r6 @ r1 = tmp3 + + ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 + @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 + + @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) + add r8, r0, r1 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #( 0*8)] + + @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) + sub r8, r0, r1 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #(14*8)] + + @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) + add r8, r4, r3 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #( 2*8)] + + @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) + sub r8, r4, r3 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #(12*8)] + + @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) + add r8, r6, r5 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #( 4*8)] + + @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) + sub r8, r6, r5 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #(10*8)] + + @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) + add r8, r2, r7 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #( 6*8)] + + @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) + sub r8, r2, r7 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #( 8*8)] + + @ End of row loop + add lr, lr, #2 + subs r12, r12, #1 + bne column_loop + beq the_end empty_odd_column: - @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) - @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) - add r0, r0, #(1<<17) - mov r0, r0, asr #18 - strh r0, [lr, #( 0*8)] - strh r0, [lr, #(14*8)] - - @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) - @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) - add r4, r4, #(1<<17) - mov r4, r4, asr #18 - strh r4, [lr, #( 2*8)] - strh r4, [lr, #(12*8)] - - @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) - @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) - add r6, r6, #(1<<17) - mov r6, r6, asr #18 - strh r6, [lr, #( 4*8)] - strh r6, [lr, #(10*8)] - - @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) - @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) - add r2, r2, #(1<<17) - mov r2, r2, asr #18 - strh r2, [lr, #( 6*8)] - strh r2, [lr, #( 8*8)] - - @ End of row loop - add lr, lr, #2 - subs r12, r12, #1 - bne column_loop + @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) + @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) + add r0, r0, #(1<<17) + mov r0, r0, asr #18 + strh r0, [lr, #( 0*8)] + strh r0, [lr, #(14*8)] + + @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) + @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) + add r4, r4, #(1<<17) + mov r4, r4, asr #18 + strh r4, [lr, #( 2*8)] + strh r4, [lr, #(12*8)] + + @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) + @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) + add r6, r6, #(1<<17) + mov r6, r6, asr #18 + strh r6, [lr, #( 4*8)] + strh r6, [lr, #(10*8)] + + @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) + @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) + add r2, r2, #(1<<17) + mov r2, r2, asr #18 + strh r2, [lr, #( 6*8)] + strh r2, [lr, #( 8*8)] + + @ End of row loop + add lr, lr, #2 + subs r12, r12, #1 + bne column_loop the_end: - @ The end.... - add sp, sp, #4 - ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return + @ The end.... + add sp, sp, #4 + ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return const_array: - .align - .word FIX_0_298631336 - .word FIX_0_541196100 - .word FIX_0_765366865 - .word FIX_1_175875602 - .word FIX_1_501321110 - .word FIX_2_053119869 - .word FIX_3_072711026 - .word FIX_M_0_390180644 - .word FIX_M_0_899976223 - .word FIX_M_1_847759065 - .word FIX_M_1_961570560 - .word FIX_M_2_562915447 - .word FIX_0xFFFF + .align + .word FIX_0_298631336 + .word FIX_0_541196100 + .word FIX_0_765366865 + .word FIX_1_175875602 + .word FIX_1_501321110 + .word FIX_2_053119869 + .word FIX_3_072711026 + .word FIX_M_0_390180644 + .word FIX_M_0_899976223 + .word FIX_M_1_847759065 + .word FIX_M_1_961570560 + .word FIX_M_2_562915447 + .word FIX_0xFFFF |