diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-12-14 23:58:10 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-12-14 23:58:10 +0100 |
commit | e462257242fc037c99206457d1316e1ff9e5306f (patch) | |
tree | 045910517a8b587f7a016b1c46403e1d1021f4f2 /libavcodec/arm | |
parent | a1be5bc79d7ac4c7c7ed79c4d72b4f1945ecb55c (diff) | |
parent | 115a57302a7d6661426304bec3a5bc72d0edf4b0 (diff) | |
download | ffmpeg-e462257242fc037c99206457d1316e1ff9e5306f.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master: (23 commits)
applehttp: Properly clean up if unable to probe a segment
applehttp: Avoid reading uninitialized memory
fate: Replace misleading "aac" in the name of an ADTS test with "adts".
fate: Drop pointless "-an" from pictor test command.
fate: split off image codec FATE tests into their own file
fate: split off WMA codec FATE tests into their own file
fate: split off lossless video and audio FATE tests into their own files
fate: split off qtrle codec FATE tests into their own file
fate: split off Ut Video codec FATE tests into their own file
fate: split off screen codec FATE tests into their own file
fate: split off Real Inc. codec FATE tests into their own file
fate: split off AC-3 codec FATE tests into their own file
mpegvideo: remove abort() in ff_find_unused_picture()
rv40: NEON optimised loop filter strength selection
rv40: rearrange loop filter functions
configure: cosmetics: sort some lists where appropriate
swscale_mmx: drop no longer required parameters from VSCALEX macros
swscale: Mark yuv2planeX_8_mmx as MMX2; it contains MMX2 instructions.
build: conditionally compile x86 H.264 chroma optimizations
v410 encoder and decoder
...
Conflicts:
Changelog
configure
doc/developer.texi
doc/general.texi
libavcodec/arm/asm.S
libavcodec/avcodec.h
libavcodec/v410dec.c
libavcodec/v410enc.c
libavcodec/version.h
libavcodec/x86/Makefile
libavcodec/x86/dsputil_mmx.c
libswscale/x86/swscale_mmx.c
tests/Makefile
tests/fate2.mak
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/arm')
-rw-r--r-- | libavcodec/arm/asm.S | 8 | ||||
-rw-r--r-- | libavcodec/arm/rv40dsp_init_neon.c | 10 | ||||
-rw-r--r-- | libavcodec/arm/rv40dsp_neon.S | 86 |
3 files changed, 100 insertions, 4 deletions
diff --git a/libavcodec/arm/asm.S b/libavcodec/arm/asm.S index d711cb8f11..2daac59242 100644 --- a/libavcodec/arm/asm.S +++ b/libavcodec/arm/asm.S @@ -113,10 +113,10 @@ T add \rn, \rn, \rm T ldr \rt, [\rn] .endm -.macro ldr_dpren rt, rn, rm:vararg -A ldr \rt, [\rn, -\rm] -T sub \rt, \rn, \rm -T ldr \rt, [\rt] +.macro ldr_dpre rt, rn, rm:vararg +A ldr \rt, [\rn, -\rm]! +T sub \rn, \rn, \rm +T ldr \rt, [\rn] .endm .macro ldr_post rt, rn, rm:vararg diff --git a/libavcodec/arm/rv40dsp_init_neon.c b/libavcodec/arm/rv40dsp_init_neon.c index 36d75e6fd8..59dddb6605 100644 --- a/libavcodec/arm/rv40dsp_init_neon.c +++ b/libavcodec/arm/rv40dsp_init_neon.c @@ -54,6 +54,13 @@ void ff_avg_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_rv40_weight_func_16_neon(uint8_t *, uint8_t *, uint8_t *, int, int, int); void ff_rv40_weight_func_8_neon(uint8_t *, uint8_t *, uint8_t *, int, int, int); +int ff_rv40_h_loop_filter_strength_neon(uint8_t *src, int stride, + int beta, int beta2, int edge, + int *p1, int *q1); +int ff_rv40_v_loop_filter_strength_neon(uint8_t *src, int stride, + int beta, int beta2, int edge, + int *p1, int *q1); + void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp) { c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon; @@ -116,4 +123,7 @@ void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp) c->rv40_weight_pixels_tab[0] = ff_rv40_weight_func_16_neon; c->rv40_weight_pixels_tab[1] = ff_rv40_weight_func_8_neon; + + c->rv40_loop_filter_strength[0] = ff_rv40_h_loop_filter_strength_neon; + c->rv40_loop_filter_strength[1] = ff_rv40_v_loop_filter_strength_neon; } diff --git a/libavcodec/arm/rv40dsp_neon.S b/libavcodec/arm/rv40dsp_neon.S index 07ba8428c1..a4313d89f9 100644 --- a/libavcodec/arm/rv40dsp_neon.S +++ b/libavcodec/arm/rv40dsp_neon.S @@ -722,3 +722,89 @@ function ff_rv40_weight_func_8_neon, export=1 bne 1b bx lr endfunc + +function ff_rv40_h_loop_filter_strength_neon, export=1 + pkhbt r2, r3, r2, lsl #18 + + ldr r3, [r0] + ldr_dpre r12, r0, r1 + teq r3, r12 + beq 1f + + sub r0, r0, r1, lsl #1 + + vld1.32 {d4[]}, [r0,:32], r1 @ -3 + vld1.32 {d0[]}, [r0,:32], r1 @ -2 + vld1.32 {d4[1]}, [r0,:32], r1 @ -1 + vld1.32 {d5[]}, [r0,:32], r1 @ 0 + vld1.32 {d1[]}, [r0,:32], r1 @ 1 + vld1.32 {d5[0]}, [r0,:32], r1 @ 2 + + vpaddl.u8 q8, q0 @ -2, -2, -2, -2, 1, 1, 1, 1 + vpaddl.u8 q9, q2 @ -3, -3, -1, -1, 2, 2, 0, 0 + vdup.32 d30, r2 @ beta2, beta << 2 + vpadd.u16 d16, d16, d17 @ -2, -2, 1, 1 + vpadd.u16 d18, d18, d19 @ -3, -1, 2, 0 + vabd.u16 d16, d18, d16 + vclt.u16 d16, d16, d30 + + ldrd r2, r3, [sp, #4] + vmovl.u16 q12, d16 + vtrn.16 d16, d17 + vshr.u32 q12, q12, #15 + ldr r0, [sp] + vst1.32 {d24[1]}, [r2,:32] + vst1.32 {d25[1]}, [r3,:32] + + cmp r0, #0 + it eq + bxeq lr + + vand d18, d16, d17 + vtrn.32 d18, d19 + vand d18, d18, d19 + vmov.u16 r0, d18[0] + bx lr +1: + ldrd r2, r3, [sp, #4] + mov r0, #0 + str r0, [r2] + str r0, [r3] + bx lr +endfunc + +function ff_rv40_v_loop_filter_strength_neon, export=1 + sub r0, r0, #3 + pkhbt r2, r3, r2, lsl #18 + + vld1.8 {d0}, [r0], r1 + vld1.8 {d1}, [r0], r1 + vld1.8 {d2}, [r0], r1 + vld1.8 {d3}, [r0], r1 + + vaddl.u8 q0, d0, d1 + vaddl.u8 q1, d2, d3 + vdup.32 q15, r2 + vadd.u16 q0, q0, q1 @ -3, -2, -1, 0, 1, 2 + vext.16 q1, q0, q0, #1 @ -2, -1, 0, 1, 2 + vabd.u16 q0, q1, q0 + vclt.u16 q0, q0, q15 + + ldrd r2, r3, [sp, #4] + vmovl.u16 q1, d0 + vext.16 d1, d0, d1, #3 + vshr.u32 q1, q1, #15 + ldr r0, [sp] + vst1.32 {d2[1]}, [r2,:32] + vst1.32 {d3[1]}, [r3,:32] + + cmp r0, #0 + it eq + bxeq lr + + vand d0, d0, d1 + vtrn.16 d0, d1 + vand d0, d0, d1 + vmov.u16 r0, d0[0] + bx lr +endfunc |