diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-12-17 01:36:59 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-12-17 02:18:16 +0100 |
commit | 552ec4c9fda480d61bff8447347b08f927f1fca3 (patch) | |
tree | 72da1c610adde49ba4bb08e310e7d8f6b18ec581 /libavcodec | |
parent | 6d8e6fe9dbc365f50521cf0c4a5ffee97c970cb5 (diff) | |
parent | a1e98f198e9db4e5ddfc2f777014179d3d7bc4d2 (diff) | |
download | ffmpeg-552ec4c9fda480d61bff8447347b08f927f1fca3.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
get_bits: remove A32 variant
avconv: support stream specifiers in -metadata and -map_metadata
wavpack: Fix 32-bit clipping
wavpack: Clip samples after shifting
h264: don't drop B-frames after next keyframe on POC reset.
get_bits: remove useless pointer casts
configure: refactor lists of tests and components into variables
rv40: NEON optimised weak loop filter
mpegts: replace some magic numbers with the existing define
swscale: add unscaled packed 16 bit per component endianess conversion
Conflicts:
libavcodec/get_bits.h
libavcodec/h264.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/arm/rv40dsp_init_neon.c | 9 | ||||
-rw-r--r-- | libavcodec/arm/rv40dsp_neon.S | 110 | ||||
-rw-r--r-- | libavcodec/dv.c | 2 | ||||
-rw-r--r-- | libavcodec/get_bits.h | 4 | ||||
-rw-r--r-- | libavcodec/imc.c | 1 | ||||
-rw-r--r-- | libavcodec/proresdec_lgpl.c | 2 | ||||
-rw-r--r-- | libavcodec/wavpack.c | 8 |
7 files changed, 127 insertions, 9 deletions
diff --git a/libavcodec/arm/rv40dsp_init_neon.c b/libavcodec/arm/rv40dsp_init_neon.c index 59dddb6605..898b841344 100644 --- a/libavcodec/arm/rv40dsp_init_neon.c +++ b/libavcodec/arm/rv40dsp_init_neon.c @@ -61,6 +61,13 @@ int ff_rv40_v_loop_filter_strength_neon(uint8_t *src, int stride, int beta, int beta2, int edge, int *p1, int *q1); +void ff_rv40_h_weak_loop_filter_neon(uint8_t *src, int stride, int filter_p1, + int filter_q1, int alpha, int beta, + int lim_p0q0, int lim_q1, int lim_p1); +void ff_rv40_v_weak_loop_filter_neon(uint8_t *src, int stride, int filter_p1, + int filter_q1, int alpha, int beta, + int lim_p0q0, int lim_q1, int lim_p1); + void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp) { c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon; @@ -126,4 +133,6 @@ void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp) c->rv40_loop_filter_strength[0] = ff_rv40_h_loop_filter_strength_neon; c->rv40_loop_filter_strength[1] = ff_rv40_v_loop_filter_strength_neon; + c->rv40_weak_loop_filter[0] = ff_rv40_h_weak_loop_filter_neon; + c->rv40_weak_loop_filter[1] = ff_rv40_v_weak_loop_filter_neon; } diff --git a/libavcodec/arm/rv40dsp_neon.S b/libavcodec/arm/rv40dsp_neon.S index d9e1b7c959..f68f38234a 100644 --- a/libavcodec/arm/rv40dsp_neon.S +++ b/libavcodec/arm/rv40dsp_neon.S @@ -808,3 +808,113 @@ function ff_rv40_v_loop_filter_strength_neon, export=1 vmov.u16 r0, d0[0] bx lr endfunc + +.macro rv40_weak_loop_filter + vdup.16 d30, r2 @ filter_p1 + vdup.16 d31, r3 @ filter_q1 + ldrd r2, r3, [sp] + vdup.16 d28, r2 @ alpha + vdup.16 d29, r3 @ beta + ldr r12, [sp, #8] + vdup.16 d25, r12 @ lim_p0q0 + ldrd r2, r3, [sp, #12] + vsubl.u8 q9, d5, d4 @ x, t + vabdl.u8 q8, d5, d4 @ x, abs(t) + vneg.s16 q15, q15 + vceq.i16 d16, d19, #0 @ !t + vshl.s16 d19, d19, #2 @ t << 2 + vmul.u16 d18, d17, d28 @ alpha * abs(t) + vand d24, d30, d31 @ filter_p1 & filter_q1 + vsubl.u8 q1, d0, d4 @ p1p2, p1p0 + vsubl.u8 q3, d1, d5 @ q1q2, q1q0 + vmov.i16 d22, #3 + vshr.u16 d18, d18, #7 + vadd.i16 d22, d22, d24 @ 3 - (filter_p1 & filter_q1) + vsubl.u8 q10, d0, d1 @ src[-2] - src[1] + vcle.u16 d18, d18, d22 + vand d20, d20, d24 + vneg.s16 d23, d25 @ -lim_p0q0 + vadd.s16 d19, d19, d20 + vbic d16, d18, d16 @ t && u <= 3 - (fp1 & fq1) + vtrn.32 d4, d5 @ -3, 2, -1, 0 + vrshr.s16 d19, d19, #3 + vmov d28, d29 @ beta + vswp d3, d6 @ q1q2, p1p0 + vmin.s16 d19, d19, d25 + vand d30, d30, d16 + vand d31, d31, d16 + vadd.s16 q10, q1, q3 @ p1p2 + p1p0, q1q2 + q1q0 + vmax.s16 d19, d19, d23 @ diff + vabs.s16 q1, q1 @ abs(p1p2), abs(q1q2) + vand d18, d19, d16 @ diff + vcle.u16 q1, q1, q14 + vneg.s16 d19, d18 @ -diff + vdup.16 d26, r3 @ lim_p1 + vaddw.u8 q2, q9, d5 @ src[-1]+diff, src[0]-diff + vhsub.s16 q11, q10, q9 + vand q1, q1, q15 + vqmovun.s16 d4, q2 @ -1, 0 + vand q9, q11, q1 + vdup.16 d27, r2 @ lim_q1 + vneg.s16 q9, q9 + vneg.s16 q14, q13 + vmin.s16 q9, q9, q13 + vtrn.32 d0, d1 @ -2, 1, -2, 1 + vmax.s16 q9, q9, q14 + vaddw.u8 q3, q9, d0 + vqmovun.s16 d5, q3 @ -2, 1 +.endm + +function ff_rv40_h_weak_loop_filter_neon, export=1 + sub r0, r0, r1, lsl #1 + sub r0, r0, r1 + + vld1.32 {d4[]}, [r0,:32], r1 + vld1.32 {d0[]}, [r0,:32], r1 + vld1.32 {d4[1]}, [r0,:32], r1 + vld1.32 {d5[]}, [r0,:32], r1 + vld1.32 {d1[]}, [r0,:32], r1 + vld1.32 {d5[0]}, [r0,:32] + + sub r0, r0, r1, lsl #2 + + rv40_weak_loop_filter + + vst1.32 {d5[0]}, [r0,:32], r1 + vst1.32 {d4[0]}, [r0,:32], r1 + vst1.32 {d4[1]}, [r0,:32], r1 + vst1.32 {d5[1]}, [r0,:32], r1 + + bx lr +endfunc + +function ff_rv40_v_weak_loop_filter_neon, export=1 + sub r12, r0, #3 + sub r0, r0, #2 + + vld1.8 {d4}, [r12], r1 + vld1.8 {d5}, [r12], r1 + vld1.8 {d2}, [r12], r1 + vld1.8 {d3}, [r12], r1 + + vtrn.16 q2, q1 + vtrn.8 d4, d5 + vtrn.8 d2, d3 + + vrev64.32 d5, d5 + vtrn.32 q2, q1 + vdup.32 d0, d3[0] + vdup.32 d1, d2[0] + + rv40_weak_loop_filter + + vtrn.32 q2, q3 + vswp d4, d5 + + vst4.8 {d4[0],d5[0],d6[0],d7[0]}, [r0], r1 + vst4.8 {d4[1],d5[1],d6[1],d7[1]}, [r0], r1 + vst4.8 {d4[2],d5[2],d6[2],d7[2]}, [r0], r1 + vst4.8 {d4[3],d5[3],d6[3],d7[3]}, [r0], r1 + + bx lr +endfunc diff --git a/libavcodec/dv.c b/libavcodec/dv.c index 787bd6511e..a05928801d 100644 --- a/libavcodec/dv.c +++ b/libavcodec/dv.c @@ -37,7 +37,7 @@ * @file * DV codec. */ -#define ALT_BITSTREAM_READER + #include "libavutil/pixdesc.h" #include "avcodec.h" #include "dsputil.h" diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h index d4d81375b1..46620410e3 100644 --- a/libavcodec/get_bits.h +++ b/libavcodec/get_bits.h @@ -133,12 +133,12 @@ for examples see get_bits, show_bits, skip_bits, get_vlc # ifdef ALT_BITSTREAM_READER_LE # define UPDATE_CACHE(name, gb) \ - name##_cache = AV_RL32(((const uint8_t *)(gb)->buffer)+(name##_index>>3)) >> (name##_index&0x07) + name##_cache = AV_RL32((gb)->buffer+(name##_index>>3)) >> (name##_index&0x07) # define SKIP_CACHE(name, gb, num) name##_cache >>= (num) # else # define UPDATE_CACHE(name, gb) \ - name##_cache = AV_RB32(((const uint8_t *)(gb)->buffer)+(name##_index>>3)) << (name##_index&0x07) + name##_cache = AV_RB32((gb)->buffer+(name##_index>>3)) << (name##_index&0x07) # define SKIP_CACHE(name, gb, num) name##_cache <<= (num) # endif diff --git a/libavcodec/imc.c b/libavcodec/imc.c index 3919797038..d3b8bf5a12 100644 --- a/libavcodec/imc.c +++ b/libavcodec/imc.c @@ -35,7 +35,6 @@ #include <stddef.h> #include <stdio.h> -#define ALT_BITSTREAM_READER #include "avcodec.h" #include "get_bits.h" #include "dsputil.h" diff --git a/libavcodec/proresdec_lgpl.c b/libavcodec/proresdec_lgpl.c index 1e9b962790..5fe47755c2 100644 --- a/libavcodec/proresdec_lgpl.c +++ b/libavcodec/proresdec_lgpl.c @@ -28,7 +28,7 @@ * @see http://wiki.multimedia.cx/index.php?title=Apple_ProRes */ -#define A32_BITSTREAM_READER // some ProRes vlc codes require up to 28 bits to be read at once +#define LONG_BITSTREAM_READER // some ProRes vlc codes require up to 28 bits to be read at once #include <stdint.h> diff --git a/libavcodec/wavpack.c b/libavcodec/wavpack.c index 1a8c25943f..8d9b804852 100644 --- a/libavcodec/wavpack.c +++ b/libavcodec/wavpack.c @@ -405,12 +405,12 @@ static inline int wv_get_value_integer(WavpackFrameContext *s, uint32_t *crc, in } bit = (S & s->and) | s->or; - bit = (((S + bit) << s->shift) - bit); + bit = (((S + bit) << s->shift) - bit) << s->post_shift; if(s->hybrid) - bit = av_clip(bit, -s->hybrid_maxclip, s->hybrid_maxclip - 1); + bit = av_clip(bit, -s->hybrid_maxclip - 1, s->hybrid_maxclip); - return bit << s->post_shift; + return bit; } static float wv_get_value_float(WavpackFrameContext *s, uint32_t *crc, int S) @@ -798,7 +798,7 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no, s->joint = s->frame_flags & WV_JOINT_STEREO; s->hybrid = s->frame_flags & WV_HYBRID_MODE; s->hybrid_bitrate = s->frame_flags & WV_HYBRID_BITRATE; - s->hybrid_maxclip = 1 << ((((s->frame_flags & 0x03) + 1) << 3) - 1); + s->hybrid_maxclip = (1LL << ((((s->frame_flags & 0x03) + 1) << 3) - 1)) - 1; s->post_shift = 8 * (bpp-1-(s->frame_flags&0x03)) + ((s->frame_flags >> 13) & 0x1f); s->CRC = AV_RL32(buf); buf += 4; if(wc->mkv_mode) |