aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-09-29 01:03:02 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-09-29 01:11:01 +0200
commitf9a2d0c3feccab94a86c92396f3e36110dc2227b (patch)
treee7d0fa58e78006fd1d26dab64c74f22355bd9ce8 /libavcodec
parenta3a5c61c6175a0bf398cce6a51fe94fcfca1145b (diff)
parentdaf98908118074e96199ca7195663af4543d3808 (diff)
downloadffmpeg-f9a2d0c3feccab94a86c92396f3e36110dc2227b.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: (23 commits) avconv: Reformat s16 volume adjustment. ARM: NEON optimised vector_fmac_scalar() dca: use vector_fmac_scalar from dsputil dsputil: add vector_fmac_scalar() latmenc: Fix private options vf_unsharp: store hsub/vsub in the filter context vf_unsharp: adopt a more natural order of params in apply_unsharp() vf_unsharp: rename method "unsharpen" to "apply_unsharp" vf_scale: apply the same transform to the aspect during init that is applied per frame vf_pad: fix "vsub" variable value computation vf_scale: add a "sar" variable lavfi: fix realloc size computation in avfilter_add_format() vsrc_color: use internal timebase lavfi: fix signature for avfilter_graph_parse() and avfilter_graph_config() graphparser: prefer void * over AVClass * for log contexts avfiltergraph: use meaningful error codes avconv: Initialize return value for codec copy path. fate: use 'run' helper for seek-test fate: remove seek-mpeg2reuse test Fix memory (re)allocation in matroskadec.c, related to MSVR-11-0080. ... Conflicts: doc/filters.texi libavfilter/avfilter.h libavfilter/avfiltergraph.c libavfilter/avfiltergraph.h libavfilter/graphparser.c libavfilter/vf_scale.c libavfilter/vsrc_color.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/arm/dsputil_init_neon.c3
-rw-r--r--libavcodec/arm/dsputil_neon.S48
-rw-r--r--libavcodec/dca.c7
-rw-r--r--libavcodec/dsputil.c9
-rw-r--r--libavcodec/dsputil.h11
5 files changed, 73 insertions, 5 deletions
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 15536d0bd2..ddc9d640f8 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -143,6 +143,8 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0,
const float *src1, const float *win, int len);
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
int len);
+void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
+ int len);
void ff_butterflies_float_neon(float *v1, float *v2, int len);
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
@@ -305,6 +307,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->vector_fmul = ff_vector_fmul_neon;
c->vector_fmul_window = ff_vector_fmul_window_neon;
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
+ c->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
c->butterflies_float = ff_butterflies_float_neon;
c->scalarproduct_float = ff_scalarproduct_float_neon;
c->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index 94a7a8cb75..1574ad6496 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -587,6 +587,54 @@ NOVFP vdup.32 q8, r2
.unreq len
endfunc
+function ff_vector_fmac_scalar_neon, export=1
+VFP len .req r2
+VFP acc .req r3
+NOVFP len .req r3
+NOVFP acc .req r2
+VFP vdup.32 q15, d0[0]
+NOVFP vdup.32 q15, r2
+ bics r12, len, #15
+ mov acc, r0
+ beq 3f
+ vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vld1.32 {q1}, [r1,:128]!
+ vld1.32 {q9}, [acc,:128]!
+1: vmla.f32 q8, q0, q15
+ vld1.32 {q2}, [r1,:128]!
+ vld1.32 {q10}, [acc,:128]!
+ vmla.f32 q9, q1, q15
+ vld1.32 {q3}, [r1,:128]!
+ vld1.32 {q11}, [acc,:128]!
+ vmla.f32 q10, q2, q15
+ vst1.32 {q8}, [r0,:128]!
+ vmla.f32 q11, q3, q15
+ vst1.32 {q9}, [r0,:128]!
+ subs r12, r12, #16
+ beq 2f
+ vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vst1.32 {q10}, [r0,:128]!
+ vld1.32 {q1}, [r1,:128]!
+ vld1.32 {q9}, [acc,:128]!
+ vst1.32 {q11}, [r0,:128]!
+ b 1b
+2: vst1.32 {q10}, [r0,:128]!
+ vst1.32 {q11}, [r0,:128]!
+ ands len, len, #15
+ it eq
+ bxeq lr
+3: vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vmla.f32 q8, q0, q15
+ vst1.32 {q8}, [r0,:128]!
+ subs len, len, #4
+ bgt 3b
+ bx lr
+ .unreq len
+endfunc
+
function ff_butterflies_float_neon, export=1
1: vld1.32 {q0},[r0,:128]
vld1.32 {q1},[r1,:128]
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index e11439f939..762821c3dc 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -1833,11 +1833,8 @@ static int dca_decode_frame(AVCodecContext * avctx,
float* back_chan = s->samples + s->channel_order_tab[s->xch_base_channel] * 256;
float* lt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 2] * 256;
float* rt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 1] * 256;
- int j;
- for(j = 0; j < 256; ++j) {
- lt_chan[j] -= back_chan[j] * M_SQRT1_2;
- rt_chan[j] -= back_chan[j] * M_SQRT1_2;
- }
+ s->dsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
+ s->dsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
}
if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index c64a39d250..ebce93039f 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2443,6 +2443,14 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
dst[i] = src[i] * mul;
}
+static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
+ int len)
+{
+ int i;
+ for (i = 0; i < len; i++)
+ dst[i] += src[i] * mul;
+}
+
static void butterflies_float_c(float *restrict v1, float *restrict v2,
int len)
{
@@ -2978,6 +2986,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->scalarproduct_float = scalarproduct_float_c;
c->butterflies_float = butterflies_float_c;
c->vector_fmul_scalar = vector_fmul_scalar_c;
+ c->vector_fmac_scalar = vector_fmac_scalar_c;
c->shrink[0]= av_image_copy_plane;
c->shrink[1]= ff_shrink22;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 07ef196185..057c41cca5 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -424,6 +424,17 @@ typedef struct DSPContext {
void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
int len);
/**
+ * Multiply a vector of floats by a scalar float and add to
+ * destination vector. Source and destination vectors must
+ * overlap exactly or not at all.
+ * @param dst result vector, 16-byte aligned
+ * @param src input vector, 16-byte aligned
+ * @param mul scalar value
+ * @param len length of vector, multiple of 4
+ */
+ void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
+ int len);
+ /**
* Calculate the scalar product of two vectors of floats.
* @param v1 first vector, 16-byte aligned
* @param v2 second vector, 16-byte aligned