diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-06-15 02:15:25 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-06-15 02:15:25 +0200 |
commit | c137fdd778e1bb82c2f0d7fa4a88adc97058d6d4 (patch) | |
tree | 6ba8284ec414ae9cc98d70a80fcb81a719d66272 /libavcodec/arm | |
parent | 9e2f448d68d9df7ad79d968db315c6b0cc79c4df (diff) | |
parent | 4e0583020567dd2062a908fe59aacba484e68049 (diff) | |
download | ffmpeg-c137fdd778e1bb82c2f0d7fa4a88adc97058d6d4.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
swscale: remove misplaced comment.
ffmpeg: fix streaming to ffserver.
swscale: split out RGB48 output functions from yuv2packed[12X]_c().
build: move vpath directives to main Makefile
swscale: fix JPEG-range YUV scaling artifacts.
build: move ALLFFLIBS to a more logical place
ARM: factor some repetitive code into macros
Fix SVQ3 after adding 4:4:4 H.264 support
H.264: fix CODEC_FLAG_GRAY
4:4:4 H.264 decoding support
ac3enc: fix allocation of floating point samples.
Conflicts:
ffmpeg.c
libavcodec/dsputil_template.c
libavcodec/h264.c
libavcodec/mpegvideo.c
libavcodec/snow.c
libswscale/swscale.c
libswscale/swscale_internal.h
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/arm')
-rw-r--r-- | libavcodec/arm/h264dsp_init_arm.c | 3 | ||||
-rw-r--r-- | libavcodec/arm/mpegvideo_armv5te_s.S | 61 | ||||
-rw-r--r-- | libavcodec/arm/simple_idct_armv5te.S | 182 |
3 files changed, 71 insertions, 175 deletions
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c index e9146405c2..b344584799 100644 --- a/libavcodec/arm/h264dsp_init_arm.c +++ b/libavcodec/arm/h264dsp_init_arm.c @@ -122,7 +122,8 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; c->h264_idct_add16 = ff_h264_idct_add16_neon; c->h264_idct_add16intra = ff_h264_idct_add16intra_neon; - c->h264_idct_add8 = ff_h264_idct_add8_neon; + //FIXME: reenable when asm is updated. + //c->h264_idct_add8 = ff_h264_idct_add8_neon; c->h264_idct8_add = ff_h264_idct8_add_neon; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon; c->h264_idct8_add4 = ff_h264_idct8_add4_neon; diff --git a/libavcodec/arm/mpegvideo_armv5te_s.S b/libavcodec/arm/mpegvideo_armv5te_s.S index 0606bb43b6..82095ab15d 100644 --- a/libavcodec/arm/mpegvideo_armv5te_s.S +++ b/libavcodec/arm/mpegvideo_armv5te_s.S @@ -35,6 +35,21 @@ * * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770) */ + +.macro dequant_t dst, src, mul, add, tmp + rsbs \tmp, ip, \src, asr #16 + addgt \tmp, \add, #0 + rsblt \tmp, \add, #0 + smlatbne \dst, \src, \mul, \tmp +.endm + +.macro dequant_b dst, src, mul, add, tmp + rsbs \tmp, ip, \src, lsl #16 + addgt \tmp, \add, #0 + rsblt \tmp, \add, #0 + smlabbne \dst, \src, \mul, \tmp +.endm + function ff_dct_unquantize_h263_armv5te, export=1 push {r4-r9,lr} mov ip, #0 @@ -44,50 +59,20 @@ function ff_dct_unquantize_h263_armv5te, export=1 1: ldrd r6, [r0, #8] - rsbs r9, ip, r4, asr #16 - addgt r9, r2, #0 - rsblt r9, r2, #0 - smlatbne r9, r4, r1, r9 - - rsbs lr, ip, r5, asr #16 - addgt lr, r2, #0 - rsblt lr, r2, #0 - smlatbne lr, r5, r1, lr - - rsbs r8, ip, r4, asl #16 - addgt r8, r2, #0 - rsblt r8, r2, #0 - smlabbne r4, r4, r1, r8 - - rsbs r8, ip, r5, asl #16 - addgt r8, r2, #0 - rsblt r8, r2, #0 - smlabbne r5, r5, r1, r8 + dequant_t r9, r4, r1, r2, r9 + dequant_t lr, r5, r1, r2, lr + dequant_b r4, r4, r1, r2, r8 + dequant_b r5, r5, r1, r2, r8 strh r4, [r0], #2 strh r9, [r0], #2 strh r5, [r0], #2 strh lr, [r0], #2 - rsbs r9, ip, r6, asr #16 - addgt r9, r2, #0 - rsblt r9, r2, #0 - smlatbne r9, r6, r1, r9 - - rsbs lr, ip, r7, asr #16 - addgt lr, r2, #0 - rsblt lr, r2, #0 - smlatbne lr, r7, r1, lr - - rsbs r8, ip, r6, asl #16 - addgt r8, r2, #0 - rsblt r8, r2, #0 - smlabbne r6, r6, r1, r8 - - rsbs r8, ip, r7, asl #16 - addgt r8, r2, #0 - rsblt r8, r2, #0 - smlabbne r7, r7, r1, r8 + dequant_t r9, r6, r1, r2, r9 + dequant_t lr, r7, r1, r2, lr + dequant_b r6, r6, r1, r2, r8 + dequant_b r7, r7, r1, r2, r8 strh r6, [r0], #2 strh r9, [r0], #2 diff --git a/libavcodec/arm/simple_idct_armv5te.S b/libavcodec/arm/simple_idct_armv5te.S index 27aeca4448..3c4b5c06d1 100644 --- a/libavcodec/arm/simple_idct_armv5te.S +++ b/libavcodec/arm/simple_idct_armv5te.S @@ -333,6 +333,20 @@ function idct_col_armv5te ldr pc, [sp], #4 endfunc +.macro clip dst, src:vararg + movs \dst, \src + movmi \dst, #0 + cmp \dst, #255 + movgt \dst, #255 +.endm + +.macro aclip dst, src:vararg + adds \dst, \src + movmi \dst, #0 + cmp \dst, #255 + movgt \dst, #255 +.endm + function idct_col_put_armv5te str lr, [sp, #-4]! @@ -341,27 +355,15 @@ function idct_col_put_armv5te ldmfd sp!, {a3, a4} ldr lr, [sp, #32] add a2, a3, v1 - movs a2, a2, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a2, asr #20 add ip, a4, v2 - movs ip, ip, asr #20 - movmi ip, #0 - cmp ip, #255 - movgt ip, #255 + clip ip, ip, asr #20 orr a2, a2, ip, lsl #8 sub a3, a3, v1 - movs a3, a3, asr #20 - movmi a3, #0 - cmp a3, #255 - movgt a3, #255 + clip a3, a3, asr #20 sub a4, a4, v2 - movs a4, a4, asr #20 - movmi a4, #0 - cmp a4, #255 + clip a4, a4, asr #20 ldr v1, [sp, #28] - movgt a4, #255 strh a2, [v1] add a2, v1, #2 str a2, [sp, #28] @@ -371,79 +373,43 @@ function idct_col_put_armv5te strh a2, [v2, v1]! sub a2, a3, v3 - movs a2, a2, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a2, asr #20 sub ip, a4, v4 - movs ip, ip, asr #20 - movmi ip, #0 - cmp ip, #255 - movgt ip, #255 + clip ip, ip, asr #20 orr a2, a2, ip, lsl #8 strh a2, [v1, lr]! add a3, a3, v3 - movs a2, a3, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a3, asr #20 add a4, a4, v4 - movs a4, a4, asr #20 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + clip a4, a4, asr #20 orr a2, a2, a4, lsl #8 ldmfd sp!, {a3, a4} strh a2, [v2, -lr]! add a2, a3, v5 - movs a2, a2, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a2, asr #20 add ip, a4, v6 - movs ip, ip, asr #20 - movmi ip, #0 - cmp ip, #255 - movgt ip, #255 + clip ip, ip, asr #20 orr a2, a2, ip, lsl #8 strh a2, [v1, lr]! sub a3, a3, v5 - movs a2, a3, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a3, asr #20 sub a4, a4, v6 - movs a4, a4, asr #20 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + clip a4, a4, asr #20 orr a2, a2, a4, lsl #8 ldmfd sp!, {a3, a4} strh a2, [v2, -lr]! add a2, a3, v7 - movs a2, a2, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a2, asr #20 add ip, a4, fp - movs ip, ip, asr #20 - movmi ip, #0 - cmp ip, #255 - movgt ip, #255 + clip ip, ip, asr #20 orr a2, a2, ip, lsl #8 strh a2, [v1, lr] sub a3, a3, v7 - movs a2, a3, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a3, asr #20 sub a4, a4, fp - movs a4, a4, asr #20 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + clip a4, a4, asr #20 orr a2, a2, a4, lsl #8 strh a2, [v2, -lr] @@ -460,36 +426,22 @@ function idct_col_add_armv5te ldmfd sp!, {a3, a4} ldrh ip, [lr] add a2, a3, v1 - mov a2, a2, asr #20 sub a3, a3, v1 and v1, ip, #255 - adds a2, a2, v1 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + aclip a2, v1, a2, asr #20 add v1, a4, v2 mov v1, v1, asr #20 - adds v1, v1, ip, lsr #8 - movmi v1, #0 - cmp v1, #255 - movgt v1, #255 + aclip v1, v1, ip, lsr #8 orr a2, a2, v1, lsl #8 ldr v1, [sp, #32] sub a4, a4, v2 rsb v2, v1, v1, lsl #3 ldrh ip, [v2, lr]! strh a2, [lr] - mov a3, a3, asr #20 and a2, ip, #255 - adds a3, a3, a2 - movmi a3, #0 - cmp a3, #255 - movgt a3, #255 + aclip a3, a2, a3, asr #20 mov a4, a4, asr #20 - adds a4, a4, ip, lsr #8 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + aclip a4, a4, ip, lsr #8 add a2, lr, #2 str a2, [sp, #28] orr a2, a3, a4, lsl #8 @@ -498,102 +450,60 @@ function idct_col_add_armv5te ldmfd sp!, {a3, a4} ldrh ip, [lr, v1]! sub a2, a3, v3 - mov a2, a2, asr #20 add a3, a3, v3 and v3, ip, #255 - adds a2, a2, v3 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + aclip a2, v3, a2, asr #20 sub v3, a4, v4 mov v3, v3, asr #20 - adds v3, v3, ip, lsr #8 - movmi v3, #0 - cmp v3, #255 - movgt v3, #255 + aclip v3, v3, ip, lsr #8 orr a2, a2, v3, lsl #8 add a4, a4, v4 ldrh ip, [v2, -v1]! strh a2, [lr] - mov a3, a3, asr #20 and a2, ip, #255 - adds a3, a3, a2 - movmi a3, #0 - cmp a3, #255 - movgt a3, #255 + aclip a3, a2, a3, asr #20 mov a4, a4, asr #20 - adds a4, a4, ip, lsr #8 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + aclip a4, a4, ip, lsr #8 orr a2, a3, a4, lsl #8 strh a2, [v2] ldmfd sp!, {a3, a4} ldrh ip, [lr, v1]! add a2, a3, v5 - mov a2, a2, asr #20 sub a3, a3, v5 and v3, ip, #255 - adds a2, a2, v3 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + aclip a2, v3, a2, asr #20 add v3, a4, v6 mov v3, v3, asr #20 - adds v3, v3, ip, lsr #8 - movmi v3, #0 - cmp v3, #255 - movgt v3, #255 + aclip v3, v3, ip, lsr #8 orr a2, a2, v3, lsl #8 sub a4, a4, v6 ldrh ip, [v2, -v1]! strh a2, [lr] - mov a3, a3, asr #20 and a2, ip, #255 - adds a3, a3, a2 - movmi a3, #0 - cmp a3, #255 - movgt a3, #255 + aclip a3, a2, a3, asr #20 mov a4, a4, asr #20 - adds a4, a4, ip, lsr #8 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + aclip a4, a4, ip, lsr #8 orr a2, a3, a4, lsl #8 strh a2, [v2] ldmfd sp!, {a3, a4} ldrh ip, [lr, v1]! add a2, a3, v7 - mov a2, a2, asr #20 sub a3, a3, v7 and v3, ip, #255 - adds a2, a2, v3 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + aclip a2, v3, a2, asr #20 add v3, a4, fp mov v3, v3, asr #20 - adds v3, v3, ip, lsr #8 - movmi v3, #0 - cmp v3, #255 - movgt v3, #255 + aclip v3, v3, ip, lsr #8 orr a2, a2, v3, lsl #8 sub a4, a4, fp ldrh ip, [v2, -v1]! strh a2, [lr] - mov a3, a3, asr #20 and a2, ip, #255 - adds a3, a3, a2 - movmi a3, #0 - cmp a3, #255 - movgt a3, #255 + aclip a3, a2, a3, asr #20 mov a4, a4, asr #20 - adds a4, a4, ip, lsr #8 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + aclip a4, a4, ip, lsr #8 orr a2, a3, a4, lsl #8 strh a2, [v2] |