diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2006-08-03 03:18:47 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2006-08-03 03:18:47 +0000 |
commit | 2dac4acfc0f2abbe28082cdb5c3ed775a78d2867 (patch) | |
tree | ae3bf6a7ddd9bb5bf29a305eef842488629965d9 | |
parent | 7bf0049623652b92a566999d37f0b481c2056d6e (diff) | |
download | ffmpeg-2dac4acfc0f2abbe28082cdb5c3ed775a78d2867.tar.gz |
sse & sse2 implementations of vorbis channel coupling.
9% faster vorbis (on a K8).
Originally committed as revision 5898 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/dsputil.c | 7 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 2 | ||||
-rw-r--r-- | libavcodec/i386/dsputil_mmx.c | 58 | ||||
-rw-r--r-- | libavcodec/vorbis.c | 47 | ||||
-rw-r--r-- | libavcodec/vorbis.h | 1 |
5 files changed, 95 insertions, 20 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 9b79b8659d..937dceb2c4 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -35,6 +35,9 @@ /* snow.c */ void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); +/* vorbis.c */ +void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); + uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; uint32_t squareTbl[512] = {0, }; @@ -4090,6 +4093,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->inner_add_yblock = ff_snow_inner_add_yblock; #endif +#ifdef CONFIG_VORBIS_DECODER + c->vorbis_inverse_coupling = vorbis_inverse_coupling; +#endif + c->shrink[0]= ff_img_copy_plane; c->shrink[1]= ff_shrink22; c->shrink[2]= ff_shrink44; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index a2a5171129..a608350294 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -307,6 +307,8 @@ typedef struct DSPContext { void (*h261_loop_filter)(uint8_t *src, int stride); + void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); + /* (I)DCT */ void (*fdct)(DCTELEM *block/* align 16*/); void (*fdct248)(DCTELEM *block/* align 16*/); diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index ec6b2ad1a7..afcb02e4db 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -2711,6 +2711,59 @@ static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) } #endif +static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) +{ + int i; + asm volatile("pxor %%mm7, %%mm7":); + for(i=0; i<blocksize; i+=2) { + asm volatile( + "movq %0, %%mm0 \n\t" + "movq %1, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm3 \n\t" + "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0 + "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0 + "pslld $31, %%mm2 \n\t" // keep only the sign bit + "pxor %%mm2, %%mm1 \n\t" + "movq %%mm3, %%mm4 \n\t" + "pand %%mm1, %%mm3 \n\t" + "pandn %%mm1, %%mm4 \n\t" + "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m))) + "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m))) + "movq %%mm3, %1 \n\t" + "movq %%mm0, %0 \n\t" + :"+m"(mag[i]), "+m"(ang[i]) + ::"memory" + ); + } + asm volatile("emms"); +} +static void vorbis_inverse_coupling_sse2(float *mag, float *ang, int blocksize) +{ + int i; + for(i=0; i<blocksize; i+=4) { + asm volatile( + "movaps %0, %%xmm0 \n\t" + "movaps %1, %%xmm1 \n\t" + "pxor %%xmm2, %%xmm2 \n\t" + "pxor %%xmm3, %%xmm3 \n\t" + "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0 + "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0 + "pslld $31, %%xmm2 \n\t" // keep only the sign bit + "pxor %%xmm2, %%xmm1 \n\t" + "movaps %%xmm3, %%xmm4 \n\t" + "pand %%xmm1, %%xmm3 \n\t" + "pandn %%xmm1, %%xmm4 \n\t" + "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m))) + "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m))) + "movaps %%xmm3, %1 \n\t" + "movaps %%xmm0, %0 \n\t" + :"+m"(mag[i]), "+m"(ang[i]) + ::"memory" + ); + } +} + #ifdef CONFIG_SNOW_ENCODER extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width); extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width); @@ -3137,6 +3190,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; } #endif + + if(mm_flags & MM_SSE2) + c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse2; + else if(mm_flags & MM_SSE) + c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; } #ifdef CONFIG_ENCODERS diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c index 9adec4bed7..cdf7cee579 100644 --- a/libavcodec/vorbis.c +++ b/libavcodec/vorbis.c @@ -929,6 +929,7 @@ static int vorbis_decode_init(AVCodecContext *avccontext) { int i, j, hdr_type; vc->avccontext = avccontext; + dsputil_init(&vc->dsp, avccontext); if (!headers_len) { av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n"); @@ -1443,6 +1444,31 @@ static int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fa return 0; } +void vorbis_inverse_coupling(float *mag, float *ang, int blocksize) +{ + int i; + for(i=0; i<blocksize; i++) + { + if (mag[i]>0.0) { + if (ang[i]>0.0) { + ang[i]=mag[i]-ang[i]; + } else { + float temp=ang[i]; + ang[i]=mag[i]; + mag[i]+=temp; + } + } else { + if (ang[i]>0.0) { + ang[i]+=mag[i]; + } else { + float temp=ang[i]; + ang[i]=mag[i]; + mag[i]-=temp; + } + } + } +} + // Decode the audio packet using the functions above #define BIAS 385 @@ -1541,26 +1567,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) { mag=vc->channel_residues+res_chan[mapping->magnitude[i]]*blocksize/2; ang=vc->channel_residues+res_chan[mapping->angle[i]]*blocksize/2; - for(j=0;j<blocksize/2;++j) { - float temp; - if (mag[j]>0.0) { - if (ang[j]>0.0) { - ang[j]=mag[j]-ang[j]; - } else { - temp=ang[j]; - ang[j]=mag[j]; - mag[j]+=temp; - } - } else { - if (ang[j]>0.0) { - ang[j]+=mag[j]; - } else { - temp=ang[j]; - ang[j]=mag[j]; - mag[j]-=temp; - } - } - } + vc->dsp.vorbis_inverse_coupling(mag, ang, blocksize/2); } // Dotproduct diff --git a/libavcodec/vorbis.h b/libavcodec/vorbis.h index c818207d92..1274f1891f 100644 --- a/libavcodec/vorbis.h +++ b/libavcodec/vorbis.h @@ -87,6 +87,7 @@ typedef struct { typedef struct vorbis_context_s { AVCodecContext *avccontext; GetBitContext gb; + DSPContext dsp; MDCTContext mdct0; MDCTContext mdct1; |