diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2006-08-09 06:33:49 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2006-08-09 06:33:49 +0000 |
commit | 2494bdd90d594fe7e5263d26287dbb2f24ec1d32 (patch) | |
tree | 945d0bc81c29d5cb345c82359b3093d96e2eccab | |
parent | 8331891957555c66cfeddb8394059c920425ef1a (diff) | |
download | ffmpeg-2494bdd90d594fe7e5263d26287dbb2f24ec1d32.tar.gz |
gcc 2.95 and 3.4.x on x86 32bit without fomit-frame-pointer can't even find 5 registers for asm input.
0.5% slower vorbis.
Originally committed as revision 5964 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/i386/fft_3dn2.c | 26 |
1 files changed, 16 insertions, 10 deletions
diff --git a/libavcodec/i386/fft_3dn2.c b/libavcodec/i386/fft_3dn2.c index 80dece700d..24d7799d58 100644 --- a/libavcodec/i386/fft_3dn2.c +++ b/libavcodec/i386/fft_3dn2.c @@ -154,20 +154,23 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, in1 = input; in2 = input + n2 - 1; for(k = 0; k < n4; k++) { + // FIXME a single block is faster, but gcc 2.95 and 3.4.x on 32bit can't compile it asm volatile( - "movd %1, %%mm0 \n\t" - "movd %3, %%mm1 \n\t" - "punpckldq %2, %%mm0 \n\t" - "punpckldq %4, %%mm1 \n\t" + "movd %0, %%mm0 \n\t" + "movd %2, %%mm1 \n\t" + "punpckldq %1, %%mm0 \n\t" + "punpckldq %3, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "pfmul %%mm1, %%mm0 \n\t" "pswapd %%mm1, %%mm1 \n\t" "pfmul %%mm1, %%mm2 \n\t" "pfpnacc %%mm2, %%mm0 \n\t" + ::"m"(in2[-2*k]), "m"(in1[2*k]), + "m"(tcos[k]), "m"(tsin[k]) + ); + asm volatile( "movq %%mm0, %0 \n\t" :"=m"(z[revtab[k]]) - :"m"(in2[-2*k]), "m"(in1[2*k]), - "m"(tcos[k]), "m"(tsin[k]) ); } @@ -190,11 +193,15 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, ); } + z += n8; asm volatile("movd %0, %%mm7" ::"r"(1<<31)); for(k = 0; k < n8; k++) { asm volatile( - "movq %4, %%mm0 \n\t" - "pswapd %5, %%mm1 \n\t" + "movq %0, %%mm0 \n\t" + "pswapd %1, %%mm1 \n\t" + ::"m"(z[k]), "m"(z[-1-k]) + ); + asm volatile( "movq %%mm0, %%mm2 \n\t" "pxor %%mm7, %%mm2 \n\t" "punpckldq %%mm1, %%mm2 \n\t" @@ -209,8 +216,7 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, "movq %%mm3, %3 \n\t" // { z[n8-1-k].im, -z[n8+k].re } :"=m"(output[2*k]), "=m"(output[n2-2-2*k]), "=m"(output[n2+2*k]), "=m"(output[n-2-2*k]) - :"m"(z[n8+k]), "m"(z[n8-1-k]) - :"memory" + ::"memory" ); } asm volatile("emms"); |