aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2013-05-04 10:06:10 +0200
committerMichael Niedermayer <michaelni@gmx.at>2013-05-04 10:07:43 +0200
commit711c8ee71d719d6d89a523618f078bb17dbe1abf (patch)
treeb3f74bd4807eade81577b24b2b5c601c110d498f /libavcodec
parentf18d2dff1194b34b79dc7641aafe54d1df349e40 (diff)
parent4a7af92cc80ced8498626401ed21f25ffe6740c8 (diff)
downloadffmpeg-711c8ee71d719d6d89a523618f078bb17dbe1abf.tar.gz
Merge commit '4a7af92cc80ced8498626401ed21f25ffe6740c8'
* commit '4a7af92cc80ced8498626401ed21f25ffe6740c8': sbrdsp: Unroll and use integer operations sbrdsp: Unroll sbr_autocorrelate_c x86: sbrdsp: Implement SSE2 qmf_deint_bfly Conflicts: libavcodec/sbrdsp.c libavcodec/x86/sbrdsp.asm libavcodec/x86/sbrdsp_init.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/sbrdsp.c82
-rw-r--r--libavcodec/x86/sbrdsp.asm48
2 files changed, 65 insertions, 65 deletions
diff --git a/libavcodec/sbrdsp.c b/libavcodec/sbrdsp.c
index 7207f3bc54..de961cf70e 100644
--- a/libavcodec/sbrdsp.c
+++ b/libavcodec/sbrdsp.c
@@ -52,41 +52,41 @@ static float sbr_sum_square_c(float (*x)[2], int n)
static void sbr_neg_odd_64_c(float *x)
{
- union av_intfloat32 *xi = (union av_intfloat32*)x;
+ union av_intfloat32 *xi = (union av_intfloat32*) x;
int i;
- for (i = 1; i < 64; i += 4)
- {
- xi[i+0].i ^= 1U<<31;
- xi[i+2].i ^= 1U<<31;
+ for (i = 1; i < 64; i += 4) {
+ xi[i + 0].i ^= 1U << 31;
+ xi[i + 2].i ^= 1U << 31;
}
}
static void sbr_qmf_pre_shuffle_c(float *z)
{
- union av_intfloat32 *zi = (union av_intfloat32*)z;
+ union av_intfloat32 *zi = (union av_intfloat32*) z;
int k;
zi[64].i = zi[0].i;
zi[65].i = zi[1].i;
- for (k = 1; k < 31; k+=2) {
- zi[64+2*k+0].i = zi[64 - k].i ^ (1U<<31);
- zi[64+2*k+1].i = zi[ k + 1].i;
- zi[64+2*k+2].i = zi[63 - k].i ^ (1U<<31);
- zi[64+2*k+3].i = zi[ k + 2].i;
+ for (k = 1; k < 31; k += 2) {
+ zi[64 + 2 * k + 0].i = zi[64 - k].i ^ (1U << 31);
+ zi[64 + 2 * k + 1].i = zi[ k + 1].i;
+ zi[64 + 2 * k + 2].i = zi[63 - k].i ^ (1U << 31);
+ zi[64 + 2 * k + 3].i = zi[ k + 2].i;
}
- zi[64+2*31+0].i = zi[64 - 31].i ^ (1U<<31);
- zi[64+2*31+1].i = zi[31 + 1].i;
+
+ zi[64 + 2 * 31 + 0].i = zi[64 - 31].i ^ (1U << 31);
+ zi[64 + 2 * 31 + 1].i = zi[31 + 1].i;
}
static void sbr_qmf_post_shuffle_c(float W[32][2], const float *z)
{
- const union av_intfloat32 *zi = (const union av_intfloat32*)z;
- union av_intfloat32 *Wi = (union av_intfloat32*)W;
+ const union av_intfloat32 *zi = (const union av_intfloat32*) z;
+ union av_intfloat32 *Wi = (union av_intfloat32*) W;
int k;
- for (k = 0; k < 32; k+=2) {
- Wi[2*k+0].i = zi[63-k].i ^ (1U<<31);
- Wi[2*k+1].i = zi[k+0].i;
- Wi[2*k+2].i = zi[62-k].i ^ (1U<<31);
- Wi[2*k+3].i = zi[k+1].i;
+ for (k = 0; k < 32; k += 2) {
+ Wi[2 * k + 0].i = zi[63 - k].i ^ (1U << 31);
+ Wi[2 * k + 1].i = zi[ k + 0].i;
+ Wi[2 * k + 2].i = zi[62 - k].i ^ (1U << 31);
+ Wi[2 * k + 3].i = zi[ k + 1].i;
}
}
@@ -96,8 +96,8 @@ static void sbr_qmf_deint_neg_c(float *v, const float *src)
union av_intfloat32 *vi = (union av_intfloat32*)v;
int i;
for (i = 0; i < 32; i++) {
- vi[ i].i = si[63 - 2*i ].i;
- vi[63 - i].i = si[63 - 2*i - 1].i ^ (1U<<31);
+ vi[ i].i = si[63 - 2 * i ].i;
+ vi[63 - i].i = si[63 - 2 * i - 1].i ^ (1U << 31);
}
}
@@ -139,32 +139,32 @@ static av_always_inline void autocorrelate(const float x[40][2],
static void sbr_autocorrelate_c(const float x[40][2], float phi[3][2][2])
{
#if 0
- // This code is slower because it multiplies memory accesses.
- // It is left as eucational purpose and because it may offer
- // a better reference for writing arch-specific dsp functions.
+ /* This code is slower because it multiplies memory accesses.
+ * It is left for educational purposes and because it may offer
+ * a better reference for writing arch-specific DSP functions. */
autocorrelate(x, phi, 0);
autocorrelate(x, phi, 1);
autocorrelate(x, phi, 2);
#else
- float real_sum2 = x[ 0][0] * x[ 2][0] + x[ 0][1] * x[ 2][1];
- float imag_sum2 = x[ 0][0] * x[ 2][1] - x[ 0][1] * x[ 2][0];
- float real_sum1 = 0.f, imag_sum1 = 0.f, real_sum0 = 0.0f;
+ float real_sum2 = x[0][0] * x[2][0] + x[0][1] * x[2][1];
+ float imag_sum2 = x[0][0] * x[2][1] - x[0][1] * x[2][0];
+ float real_sum1 = 0.0f, imag_sum1 = 0.0f, real_sum0 = 0.0f;
int i;
for (i = 1; i < 38; i++) {
- real_sum0 += x[i][0] * x[i ][0] + x[i][1] * x[i ][1];
- real_sum1 += x[i][0] * x[i+1][0] + x[i][1] * x[i+1][1];
- imag_sum1 += x[i][0] * x[i+1][1] - x[i][1] * x[i+1][0];
- real_sum2 += x[i][0] * x[i+2][0] + x[i][1] * x[i+2][1];
- imag_sum2 += x[i][0] * x[i+2][1] - x[i][1] * x[i+2][0];
+ real_sum0 += x[i][0] * x[i ][0] + x[i][1] * x[i ][1];
+ real_sum1 += x[i][0] * x[i + 1][0] + x[i][1] * x[i + 1][1];
+ imag_sum1 += x[i][0] * x[i + 1][1] - x[i][1] * x[i + 1][0];
+ real_sum2 += x[i][0] * x[i + 2][0] + x[i][1] * x[i + 2][1];
+ imag_sum2 += x[i][0] * x[i + 2][1] - x[i][1] * x[i + 2][0];
}
- phi[2-2][1][0] = real_sum2;
- phi[2-2][1][1] = imag_sum2;
- phi[2 ][1][0] = real_sum0 + x[ 0][0] * x[ 0][0] + x[ 0][1] * x[ 0][1];
- phi[1 ][0][0] = real_sum0 + x[38][0] * x[38][0] + x[38][1] * x[38][1];
- phi[2-1][1][0] = real_sum1 + x[ 0][0] * x[ 1][0] + x[ 0][1] * x[ 1][1];
- phi[2-1][1][1] = imag_sum1 + x[ 0][0] * x[ 1][1] - x[ 0][1] * x[ 1][0];
- phi[0 ][0][0] = real_sum1 + x[38][0] * x[39][0] + x[38][1] * x[39][1];
- phi[0 ][0][1] = imag_sum1 + x[38][0] * x[39][1] - x[38][1] * x[39][0];
+ phi[2 - 2][1][0] = real_sum2;
+ phi[2 - 2][1][1] = imag_sum2;
+ phi[2 ][1][0] = real_sum0 + x[ 0][0] * x[ 0][0] + x[ 0][1] * x[ 0][1];
+ phi[1 ][0][0] = real_sum0 + x[38][0] * x[38][0] + x[38][1] * x[38][1];
+ phi[2 - 1][1][0] = real_sum1 + x[ 0][0] * x[ 1][0] + x[ 0][1] * x[ 1][1];
+ phi[2 - 1][1][1] = imag_sum1 + x[ 0][0] * x[ 1][1] - x[ 0][1] * x[ 1][0];
+ phi[0 ][0][0] = real_sum1 + x[38][0] * x[39][0] + x[38][1] * x[39][1];
+ phi[0 ][0][1] = imag_sum1 + x[38][0] * x[39][1] - x[38][1] * x[39][0];
#endif
}
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index 1ce5777602..c4c95780ee 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -252,36 +252,36 @@ cglobal sbr_neg_odd_64, 1,2,4,z
; sbr_qmf_deint_bfly(float *v, const float *src0, const float *src1)
%macro SBR_QMF_DEINT_BFLY 0
cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c
- mov cq, 64*4-2*mmsize
- lea vrevq, [vq + 64*4]
+ mov cq, 64*4-2*mmsize
+ lea vrevq, [vq + 64*4]
.loop:
- mova m0, [src0q+cq]
- mova m1, [src1q]
- mova m4, [src0q+cq+mmsize]
- mova m5, [src1q+mmsize]
+ mova m0, [src0q+cq]
+ mova m1, [src1q]
+ mova m4, [src0q+cq+mmsize]
+ mova m5, [src1q+mmsize]
%if cpuflag(sse2)
- pshufd m2, m0, q0123
- pshufd m3, m1, q0123
- pshufd m6, m4, q0123
- pshufd m7, m5, q0123
+ pshufd m2, m0, q0123
+ pshufd m3, m1, q0123
+ pshufd m6, m4, q0123
+ pshufd m7, m5, q0123
%else
- shufps m2, m0, m0, q0123
- shufps m3, m1, m1, q0123
- shufps m6, m4, m4, q0123
- shufps m7, m5, m5, q0123
+ shufps m2, m0, m0, q0123
+ shufps m3, m1, m1, q0123
+ shufps m6, m4, m4, q0123
+ shufps m7, m5, m5, q0123
%endif
- addps m5, m2
- subps m0, m7
- addps m1, m6
- subps m4, m3
- mova [vrevq], m1
+ addps m5, m2
+ subps m0, m7
+ addps m1, m6
+ subps m4, m3
+ mova [vrevq], m1
mova [vrevq+mmsize], m5
- mova [vq+cq], m0
+ mova [vq+cq], m0
mova [vq+cq+mmsize], m4
- add src1q, 2*mmsize
- add vrevq, 2*mmsize
- sub cq, 2*mmsize
- jge .loop
+ add src1q, 2*mmsize
+ add vrevq, 2*mmsize
+ sub cq, 2*mmsize
+ jge .loop
REP_RET
%endmacro