aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/arm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-06-15 02:15:25 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-06-15 02:15:25 +0200
commitc137fdd778e1bb82c2f0d7fa4a88adc97058d6d4 (patch)
tree6ba8284ec414ae9cc98d70a80fcb81a719d66272 /libavcodec/arm
parent9e2f448d68d9df7ad79d968db315c6b0cc79c4df (diff)
parent4e0583020567dd2062a908fe59aacba484e68049 (diff)
downloadffmpeg-c137fdd778e1bb82c2f0d7fa4a88adc97058d6d4.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: swscale: remove misplaced comment. ffmpeg: fix streaming to ffserver. swscale: split out RGB48 output functions from yuv2packed[12X]_c(). build: move vpath directives to main Makefile swscale: fix JPEG-range YUV scaling artifacts. build: move ALLFFLIBS to a more logical place ARM: factor some repetitive code into macros Fix SVQ3 after adding 4:4:4 H.264 support H.264: fix CODEC_FLAG_GRAY 4:4:4 H.264 decoding support ac3enc: fix allocation of floating point samples. Conflicts: ffmpeg.c libavcodec/dsputil_template.c libavcodec/h264.c libavcodec/mpegvideo.c libavcodec/snow.c libswscale/swscale.c libswscale/swscale_internal.h Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/arm')
-rw-r--r--libavcodec/arm/h264dsp_init_arm.c3
-rw-r--r--libavcodec/arm/mpegvideo_armv5te_s.S61
-rw-r--r--libavcodec/arm/simple_idct_armv5te.S182
3 files changed, 71 insertions, 175 deletions
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
index e9146405c2..b344584799 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -122,7 +122,8 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
c->h264_idct_add16 = ff_h264_idct_add16_neon;
c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
- c->h264_idct_add8 = ff_h264_idct_add8_neon;
+ //FIXME: reenable when asm is updated.
+ //c->h264_idct_add8 = ff_h264_idct_add8_neon;
c->h264_idct8_add = ff_h264_idct8_add_neon;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon;
c->h264_idct8_add4 = ff_h264_idct8_add4_neon;
diff --git a/libavcodec/arm/mpegvideo_armv5te_s.S b/libavcodec/arm/mpegvideo_armv5te_s.S
index 0606bb43b6..82095ab15d 100644
--- a/libavcodec/arm/mpegvideo_armv5te_s.S
+++ b/libavcodec/arm/mpegvideo_armv5te_s.S
@@ -35,6 +35,21 @@
*
* Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770)
*/
+
+.macro dequant_t dst, src, mul, add, tmp
+ rsbs \tmp, ip, \src, asr #16
+ addgt \tmp, \add, #0
+ rsblt \tmp, \add, #0
+ smlatbne \dst, \src, \mul, \tmp
+.endm
+
+.macro dequant_b dst, src, mul, add, tmp
+ rsbs \tmp, ip, \src, lsl #16
+ addgt \tmp, \add, #0
+ rsblt \tmp, \add, #0
+ smlabbne \dst, \src, \mul, \tmp
+.endm
+
function ff_dct_unquantize_h263_armv5te, export=1
push {r4-r9,lr}
mov ip, #0
@@ -44,50 +59,20 @@ function ff_dct_unquantize_h263_armv5te, export=1
1:
ldrd r6, [r0, #8]
- rsbs r9, ip, r4, asr #16
- addgt r9, r2, #0
- rsblt r9, r2, #0
- smlatbne r9, r4, r1, r9
-
- rsbs lr, ip, r5, asr #16
- addgt lr, r2, #0
- rsblt lr, r2, #0
- smlatbne lr, r5, r1, lr
-
- rsbs r8, ip, r4, asl #16
- addgt r8, r2, #0
- rsblt r8, r2, #0
- smlabbne r4, r4, r1, r8
-
- rsbs r8, ip, r5, asl #16
- addgt r8, r2, #0
- rsblt r8, r2, #0
- smlabbne r5, r5, r1, r8
+ dequant_t r9, r4, r1, r2, r9
+ dequant_t lr, r5, r1, r2, lr
+ dequant_b r4, r4, r1, r2, r8
+ dequant_b r5, r5, r1, r2, r8
strh r4, [r0], #2
strh r9, [r0], #2
strh r5, [r0], #2
strh lr, [r0], #2
- rsbs r9, ip, r6, asr #16
- addgt r9, r2, #0
- rsblt r9, r2, #0
- smlatbne r9, r6, r1, r9
-
- rsbs lr, ip, r7, asr #16
- addgt lr, r2, #0
- rsblt lr, r2, #0
- smlatbne lr, r7, r1, lr
-
- rsbs r8, ip, r6, asl #16
- addgt r8, r2, #0
- rsblt r8, r2, #0
- smlabbne r6, r6, r1, r8
-
- rsbs r8, ip, r7, asl #16
- addgt r8, r2, #0
- rsblt r8, r2, #0
- smlabbne r7, r7, r1, r8
+ dequant_t r9, r6, r1, r2, r9
+ dequant_t lr, r7, r1, r2, lr
+ dequant_b r6, r6, r1, r2, r8
+ dequant_b r7, r7, r1, r2, r8
strh r6, [r0], #2
strh r9, [r0], #2
diff --git a/libavcodec/arm/simple_idct_armv5te.S b/libavcodec/arm/simple_idct_armv5te.S
index 27aeca4448..3c4b5c06d1 100644
--- a/libavcodec/arm/simple_idct_armv5te.S
+++ b/libavcodec/arm/simple_idct_armv5te.S
@@ -333,6 +333,20 @@ function idct_col_armv5te
ldr pc, [sp], #4
endfunc
+.macro clip dst, src:vararg
+ movs \dst, \src
+ movmi \dst, #0
+ cmp \dst, #255
+ movgt \dst, #255
+.endm
+
+.macro aclip dst, src:vararg
+ adds \dst, \src
+ movmi \dst, #0
+ cmp \dst, #255
+ movgt \dst, #255
+.endm
+
function idct_col_put_armv5te
str lr, [sp, #-4]!
@@ -341,27 +355,15 @@ function idct_col_put_armv5te
ldmfd sp!, {a3, a4}
ldr lr, [sp, #32]
add a2, a3, v1
- movs a2, a2, asr #20
- movmi a2, #0
- cmp a2, #255
- movgt a2, #255
+ clip a2, a2, asr #20
add ip, a4, v2
- movs ip, ip, asr #20
- movmi ip, #0
- cmp ip, #255
- movgt ip, #255
+ clip ip, ip, asr #20
orr a2, a2, ip, lsl #8
sub a3, a3, v1
- movs a3, a3, asr #20
- movmi a3, #0
- cmp a3, #255
- movgt a3, #255
+ clip a3, a3, asr #20
sub a4, a4, v2
- movs a4, a4, asr #20
- movmi a4, #0
- cmp a4, #255
+ clip a4, a4, asr #20
ldr v1, [sp, #28]
- movgt a4, #255
strh a2, [v1]
add a2, v1, #2
str a2, [sp, #28]
@@ -371,79 +373,43 @@ function idct_col_put_armv5te
strh a2, [v2, v1]!
sub a2, a3, v3
- movs a2, a2, asr #20
- movmi a2, #0
- cmp a2, #255
- movgt a2, #255
+ clip a2, a2, asr #20
sub ip, a4, v4
- movs ip, ip, asr #20
- movmi ip, #0
- cmp ip, #255
- movgt ip, #255
+ clip ip, ip, asr #20
orr a2, a2, ip, lsl #8
strh a2, [v1, lr]!
add a3, a3, v3
- movs a2, a3, asr #20
- movmi a2, #0
- cmp a2, #255
- movgt a2, #255
+ clip a2, a3, asr #20
add a4, a4, v4
- movs a4, a4, asr #20
- movmi a4, #0
- cmp a4, #255
- movgt a4, #255
+ clip a4, a4, asr #20
orr a2, a2, a4, lsl #8
ldmfd sp!, {a3, a4}
strh a2, [v2, -lr]!
add a2, a3, v5
- movs a2, a2, asr #20
- movmi a2, #0
- cmp a2, #255
- movgt a2, #255
+ clip a2, a2, asr #20
add ip, a4, v6
- movs ip, ip, asr #20
- movmi ip, #0
- cmp ip, #255
- movgt ip, #255
+ clip ip, ip, asr #20
orr a2, a2, ip, lsl #8
strh a2, [v1, lr]!
sub a3, a3, v5
- movs a2, a3, asr #20
- movmi a2, #0
- cmp a2, #255
- movgt a2, #255
+ clip a2, a3, asr #20
sub a4, a4, v6
- movs a4, a4, asr #20
- movmi a4, #0
- cmp a4, #255
- movgt a4, #255
+ clip a4, a4, asr #20
orr a2, a2, a4, lsl #8
ldmfd sp!, {a3, a4}
strh a2, [v2, -lr]!
add a2, a3, v7
- movs a2, a2, asr #20
- movmi a2, #0
- cmp a2, #255
- movgt a2, #255
+ clip a2, a2, asr #20
add ip, a4, fp
- movs ip, ip, asr #20
- movmi ip, #0
- cmp ip, #255
- movgt ip, #255
+ clip ip, ip, asr #20
orr a2, a2, ip, lsl #8
strh a2, [v1, lr]
sub a3, a3, v7
- movs a2, a3, asr #20
- movmi a2, #0
- cmp a2, #255
- movgt a2, #255
+ clip a2, a3, asr #20
sub a4, a4, fp
- movs a4, a4, asr #20
- movmi a4, #0
- cmp a4, #255
- movgt a4, #255
+ clip a4, a4, asr #20
orr a2, a2, a4, lsl #8
strh a2, [v2, -lr]
@@ -460,36 +426,22 @@ function idct_col_add_armv5te
ldmfd sp!, {a3, a4}
ldrh ip, [lr]
add a2, a3, v1
- mov a2, a2, asr #20
sub a3, a3, v1
and v1, ip, #255
- adds a2, a2, v1
- movmi a2, #0
- cmp a2, #255
- movgt a2, #255
+ aclip a2, v1, a2, asr #20
add v1, a4, v2
mov v1, v1, asr #20
- adds v1, v1, ip, lsr #8
- movmi v1, #0
- cmp v1, #255
- movgt v1, #255
+ aclip v1, v1, ip, lsr #8
orr a2, a2, v1, lsl #8
ldr v1, [sp, #32]
sub a4, a4, v2
rsb v2, v1, v1, lsl #3
ldrh ip, [v2, lr]!
strh a2, [lr]
- mov a3, a3, asr #20
and a2, ip, #255
- adds a3, a3, a2
- movmi a3, #0
- cmp a3, #255
- movgt a3, #255
+ aclip a3, a2, a3, asr #20
mov a4, a4, asr #20
- adds a4, a4, ip, lsr #8
- movmi a4, #0
- cmp a4, #255
- movgt a4, #255
+ aclip a4, a4, ip, lsr #8
add a2, lr, #2
str a2, [sp, #28]
orr a2, a3, a4, lsl #8
@@ -498,102 +450,60 @@ function idct_col_add_armv5te
ldmfd sp!, {a3, a4}
ldrh ip, [lr, v1]!
sub a2, a3, v3
- mov a2, a2, asr #20
add a3, a3, v3
and v3, ip, #255
- adds a2, a2, v3
- movmi a2, #0
- cmp a2, #255
- movgt a2, #255
+ aclip a2, v3, a2, asr #20
sub v3, a4, v4
mov v3, v3, asr #20
- adds v3, v3, ip, lsr #8
- movmi v3, #0
- cmp v3, #255
- movgt v3, #255
+ aclip v3, v3, ip, lsr #8
orr a2, a2, v3, lsl #8
add a4, a4, v4
ldrh ip, [v2, -v1]!
strh a2, [lr]
- mov a3, a3, asr #20
and a2, ip, #255
- adds a3, a3, a2
- movmi a3, #0
- cmp a3, #255
- movgt a3, #255
+ aclip a3, a2, a3, asr #20
mov a4, a4, asr #20
- adds a4, a4, ip, lsr #8
- movmi a4, #0
- cmp a4, #255
- movgt a4, #255
+ aclip a4, a4, ip, lsr #8
orr a2, a3, a4, lsl #8
strh a2, [v2]
ldmfd sp!, {a3, a4}
ldrh ip, [lr, v1]!
add a2, a3, v5
- mov a2, a2, asr #20
sub a3, a3, v5
and v3, ip, #255
- adds a2, a2, v3
- movmi a2, #0
- cmp a2, #255
- movgt a2, #255
+ aclip a2, v3, a2, asr #20
add v3, a4, v6
mov v3, v3, asr #20
- adds v3, v3, ip, lsr #8
- movmi v3, #0
- cmp v3, #255
- movgt v3, #255
+ aclip v3, v3, ip, lsr #8
orr a2, a2, v3, lsl #8
sub a4, a4, v6
ldrh ip, [v2, -v1]!
strh a2, [lr]
- mov a3, a3, asr #20
and a2, ip, #255
- adds a3, a3, a2
- movmi a3, #0
- cmp a3, #255
- movgt a3, #255
+ aclip a3, a2, a3, asr #20
mov a4, a4, asr #20
- adds a4, a4, ip, lsr #8
- movmi a4, #0
- cmp a4, #255
- movgt a4, #255
+ aclip a4, a4, ip, lsr #8
orr a2, a3, a4, lsl #8
strh a2, [v2]
ldmfd sp!, {a3, a4}
ldrh ip, [lr, v1]!
add a2, a3, v7
- mov a2, a2, asr #20
sub a3, a3, v7
and v3, ip, #255
- adds a2, a2, v3
- movmi a2, #0
- cmp a2, #255
- movgt a2, #255
+ aclip a2, v3, a2, asr #20
add v3, a4, fp
mov v3, v3, asr #20
- adds v3, v3, ip, lsr #8
- movmi v3, #0
- cmp v3, #255
- movgt v3, #255
+ aclip v3, v3, ip, lsr #8
orr a2, a2, v3, lsl #8
sub a4, a4, fp
ldrh ip, [v2, -v1]!
strh a2, [lr]
- mov a3, a3, asr #20
and a2, ip, #255
- adds a3, a3, a2
- movmi a3, #0
- cmp a3, #255
- movgt a3, #255
+ aclip a3, a2, a3, asr #20
mov a4, a4, asr #20
- adds a4, a4, ip, lsr #8
- movmi a4, #0
- cmp a4, #255
- movgt a4, #255
+ aclip a4, a4, ip, lsr #8
orr a2, a3, a4, lsl #8
strh a2, [v2]