aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-12-17 01:36:59 +0100
committerMichael Niedermayer <michaelni@gmx.at>2011-12-17 02:18:16 +0100
commit552ec4c9fda480d61bff8447347b08f927f1fca3 (patch)
tree72da1c610adde49ba4bb08e310e7d8f6b18ec581 /libavcodec
parent6d8e6fe9dbc365f50521cf0c4a5ffee97c970cb5 (diff)
parenta1e98f198e9db4e5ddfc2f777014179d3d7bc4d2 (diff)
downloadffmpeg-552ec4c9fda480d61bff8447347b08f927f1fca3.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: get_bits: remove A32 variant avconv: support stream specifiers in -metadata and -map_metadata wavpack: Fix 32-bit clipping wavpack: Clip samples after shifting h264: don't drop B-frames after next keyframe on POC reset. get_bits: remove useless pointer casts configure: refactor lists of tests and components into variables rv40: NEON optimised weak loop filter mpegts: replace some magic numbers with the existing define swscale: add unscaled packed 16 bit per component endianess conversion Conflicts: libavcodec/get_bits.h libavcodec/h264.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/arm/rv40dsp_init_neon.c9
-rw-r--r--libavcodec/arm/rv40dsp_neon.S110
-rw-r--r--libavcodec/dv.c2
-rw-r--r--libavcodec/get_bits.h4
-rw-r--r--libavcodec/imc.c1
-rw-r--r--libavcodec/proresdec_lgpl.c2
-rw-r--r--libavcodec/wavpack.c8
7 files changed, 127 insertions, 9 deletions
diff --git a/libavcodec/arm/rv40dsp_init_neon.c b/libavcodec/arm/rv40dsp_init_neon.c
index 59dddb6605..898b841344 100644
--- a/libavcodec/arm/rv40dsp_init_neon.c
+++ b/libavcodec/arm/rv40dsp_init_neon.c
@@ -61,6 +61,13 @@ int ff_rv40_v_loop_filter_strength_neon(uint8_t *src, int stride,
int beta, int beta2, int edge,
int *p1, int *q1);
+void ff_rv40_h_weak_loop_filter_neon(uint8_t *src, int stride, int filter_p1,
+ int filter_q1, int alpha, int beta,
+ int lim_p0q0, int lim_q1, int lim_p1);
+void ff_rv40_v_weak_loop_filter_neon(uint8_t *src, int stride, int filter_p1,
+ int filter_q1, int alpha, int beta,
+ int lim_p0q0, int lim_q1, int lim_p1);
+
void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
{
c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon;
@@ -126,4 +133,6 @@ void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
c->rv40_loop_filter_strength[0] = ff_rv40_h_loop_filter_strength_neon;
c->rv40_loop_filter_strength[1] = ff_rv40_v_loop_filter_strength_neon;
+ c->rv40_weak_loop_filter[0] = ff_rv40_h_weak_loop_filter_neon;
+ c->rv40_weak_loop_filter[1] = ff_rv40_v_weak_loop_filter_neon;
}
diff --git a/libavcodec/arm/rv40dsp_neon.S b/libavcodec/arm/rv40dsp_neon.S
index d9e1b7c959..f68f38234a 100644
--- a/libavcodec/arm/rv40dsp_neon.S
+++ b/libavcodec/arm/rv40dsp_neon.S
@@ -808,3 +808,113 @@ function ff_rv40_v_loop_filter_strength_neon, export=1
vmov.u16 r0, d0[0]
bx lr
endfunc
+
+.macro rv40_weak_loop_filter
+ vdup.16 d30, r2 @ filter_p1
+ vdup.16 d31, r3 @ filter_q1
+ ldrd r2, r3, [sp]
+ vdup.16 d28, r2 @ alpha
+ vdup.16 d29, r3 @ beta
+ ldr r12, [sp, #8]
+ vdup.16 d25, r12 @ lim_p0q0
+ ldrd r2, r3, [sp, #12]
+ vsubl.u8 q9, d5, d4 @ x, t
+ vabdl.u8 q8, d5, d4 @ x, abs(t)
+ vneg.s16 q15, q15
+ vceq.i16 d16, d19, #0 @ !t
+ vshl.s16 d19, d19, #2 @ t << 2
+ vmul.u16 d18, d17, d28 @ alpha * abs(t)
+ vand d24, d30, d31 @ filter_p1 & filter_q1
+ vsubl.u8 q1, d0, d4 @ p1p2, p1p0
+ vsubl.u8 q3, d1, d5 @ q1q2, q1q0
+ vmov.i16 d22, #3
+ vshr.u16 d18, d18, #7
+ vadd.i16 d22, d22, d24 @ 3 - (filter_p1 & filter_q1)
+ vsubl.u8 q10, d0, d1 @ src[-2] - src[1]
+ vcle.u16 d18, d18, d22
+ vand d20, d20, d24
+ vneg.s16 d23, d25 @ -lim_p0q0
+ vadd.s16 d19, d19, d20
+ vbic d16, d18, d16 @ t && u <= 3 - (fp1 & fq1)
+ vtrn.32 d4, d5 @ -3, 2, -1, 0
+ vrshr.s16 d19, d19, #3
+ vmov d28, d29 @ beta
+ vswp d3, d6 @ q1q2, p1p0
+ vmin.s16 d19, d19, d25
+ vand d30, d30, d16
+ vand d31, d31, d16
+ vadd.s16 q10, q1, q3 @ p1p2 + p1p0, q1q2 + q1q0
+ vmax.s16 d19, d19, d23 @ diff
+ vabs.s16 q1, q1 @ abs(p1p2), abs(q1q2)
+ vand d18, d19, d16 @ diff
+ vcle.u16 q1, q1, q14
+ vneg.s16 d19, d18 @ -diff
+ vdup.16 d26, r3 @ lim_p1
+ vaddw.u8 q2, q9, d5 @ src[-1]+diff, src[0]-diff
+ vhsub.s16 q11, q10, q9
+ vand q1, q1, q15
+ vqmovun.s16 d4, q2 @ -1, 0
+ vand q9, q11, q1
+ vdup.16 d27, r2 @ lim_q1
+ vneg.s16 q9, q9
+ vneg.s16 q14, q13
+ vmin.s16 q9, q9, q13
+ vtrn.32 d0, d1 @ -2, 1, -2, 1
+ vmax.s16 q9, q9, q14
+ vaddw.u8 q3, q9, d0
+ vqmovun.s16 d5, q3 @ -2, 1
+.endm
+
+function ff_rv40_h_weak_loop_filter_neon, export=1
+ sub r0, r0, r1, lsl #1
+ sub r0, r0, r1
+
+ vld1.32 {d4[]}, [r0,:32], r1
+ vld1.32 {d0[]}, [r0,:32], r1
+ vld1.32 {d4[1]}, [r0,:32], r1
+ vld1.32 {d5[]}, [r0,:32], r1
+ vld1.32 {d1[]}, [r0,:32], r1
+ vld1.32 {d5[0]}, [r0,:32]
+
+ sub r0, r0, r1, lsl #2
+
+ rv40_weak_loop_filter
+
+ vst1.32 {d5[0]}, [r0,:32], r1
+ vst1.32 {d4[0]}, [r0,:32], r1
+ vst1.32 {d4[1]}, [r0,:32], r1
+ vst1.32 {d5[1]}, [r0,:32], r1
+
+ bx lr
+endfunc
+
+function ff_rv40_v_weak_loop_filter_neon, export=1
+ sub r12, r0, #3
+ sub r0, r0, #2
+
+ vld1.8 {d4}, [r12], r1
+ vld1.8 {d5}, [r12], r1
+ vld1.8 {d2}, [r12], r1
+ vld1.8 {d3}, [r12], r1
+
+ vtrn.16 q2, q1
+ vtrn.8 d4, d5
+ vtrn.8 d2, d3
+
+ vrev64.32 d5, d5
+ vtrn.32 q2, q1
+ vdup.32 d0, d3[0]
+ vdup.32 d1, d2[0]
+
+ rv40_weak_loop_filter
+
+ vtrn.32 q2, q3
+ vswp d4, d5
+
+ vst4.8 {d4[0],d5[0],d6[0],d7[0]}, [r0], r1
+ vst4.8 {d4[1],d5[1],d6[1],d7[1]}, [r0], r1
+ vst4.8 {d4[2],d5[2],d6[2],d7[2]}, [r0], r1
+ vst4.8 {d4[3],d5[3],d6[3],d7[3]}, [r0], r1
+
+ bx lr
+endfunc
diff --git a/libavcodec/dv.c b/libavcodec/dv.c
index 787bd6511e..a05928801d 100644
--- a/libavcodec/dv.c
+++ b/libavcodec/dv.c
@@ -37,7 +37,7 @@
* @file
* DV codec.
*/
-#define ALT_BITSTREAM_READER
+
#include "libavutil/pixdesc.h"
#include "avcodec.h"
#include "dsputil.h"
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index d4d81375b1..46620410e3 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -133,12 +133,12 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# ifdef ALT_BITSTREAM_READER_LE
# define UPDATE_CACHE(name, gb) \
- name##_cache = AV_RL32(((const uint8_t *)(gb)->buffer)+(name##_index>>3)) >> (name##_index&0x07)
+ name##_cache = AV_RL32((gb)->buffer+(name##_index>>3)) >> (name##_index&0x07)
# define SKIP_CACHE(name, gb, num) name##_cache >>= (num)
# else
# define UPDATE_CACHE(name, gb) \
- name##_cache = AV_RB32(((const uint8_t *)(gb)->buffer)+(name##_index>>3)) << (name##_index&0x07)
+ name##_cache = AV_RB32((gb)->buffer+(name##_index>>3)) << (name##_index&0x07)
# define SKIP_CACHE(name, gb, num) name##_cache <<= (num)
# endif
diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 3919797038..d3b8bf5a12 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -35,7 +35,6 @@
#include <stddef.h>
#include <stdio.h>
-#define ALT_BITSTREAM_READER
#include "avcodec.h"
#include "get_bits.h"
#include "dsputil.h"
diff --git a/libavcodec/proresdec_lgpl.c b/libavcodec/proresdec_lgpl.c
index 1e9b962790..5fe47755c2 100644
--- a/libavcodec/proresdec_lgpl.c
+++ b/libavcodec/proresdec_lgpl.c
@@ -28,7 +28,7 @@
* @see http://wiki.multimedia.cx/index.php?title=Apple_ProRes
*/
-#define A32_BITSTREAM_READER // some ProRes vlc codes require up to 28 bits to be read at once
+#define LONG_BITSTREAM_READER // some ProRes vlc codes require up to 28 bits to be read at once
#include <stdint.h>
diff --git a/libavcodec/wavpack.c b/libavcodec/wavpack.c
index 1a8c25943f..8d9b804852 100644
--- a/libavcodec/wavpack.c
+++ b/libavcodec/wavpack.c
@@ -405,12 +405,12 @@ static inline int wv_get_value_integer(WavpackFrameContext *s, uint32_t *crc, in
}
bit = (S & s->and) | s->or;
- bit = (((S + bit) << s->shift) - bit);
+ bit = (((S + bit) << s->shift) - bit) << s->post_shift;
if(s->hybrid)
- bit = av_clip(bit, -s->hybrid_maxclip, s->hybrid_maxclip - 1);
+ bit = av_clip(bit, -s->hybrid_maxclip - 1, s->hybrid_maxclip);
- return bit << s->post_shift;
+ return bit;
}
static float wv_get_value_float(WavpackFrameContext *s, uint32_t *crc, int S)
@@ -798,7 +798,7 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
s->joint = s->frame_flags & WV_JOINT_STEREO;
s->hybrid = s->frame_flags & WV_HYBRID_MODE;
s->hybrid_bitrate = s->frame_flags & WV_HYBRID_BITRATE;
- s->hybrid_maxclip = 1 << ((((s->frame_flags & 0x03) + 1) << 3) - 1);
+ s->hybrid_maxclip = (1LL << ((((s->frame_flags & 0x03) + 1) << 3) - 1)) - 1;
s->post_shift = 8 * (bpp-1-(s->frame_flags&0x03)) + ((s->frame_flags >> 13) & 0x1f);
s->CRC = AV_RL32(buf); buf += 4;
if(wc->mkv_mode)