aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2006-08-03 03:18:47 +0000
committerLoren Merritt <lorenm@u.washington.edu>2006-08-03 03:18:47 +0000
commit2dac4acfc0f2abbe28082cdb5c3ed775a78d2867 (patch)
treeae3bf6a7ddd9bb5bf29a305eef842488629965d9
parent7bf0049623652b92a566999d37f0b481c2056d6e (diff)
downloadffmpeg-2dac4acfc0f2abbe28082cdb5c3ed775a78d2867.tar.gz
sse & sse2 implementations of vorbis channel coupling.
9% faster vorbis (on a K8). Originally committed as revision 5898 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/dsputil.c7
-rw-r--r--libavcodec/dsputil.h2
-rw-r--r--libavcodec/i386/dsputil_mmx.c58
-rw-r--r--libavcodec/vorbis.c47
-rw-r--r--libavcodec/vorbis.h1
5 files changed, 95 insertions, 20 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 9b79b8659d..937dceb2c4 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -35,6 +35,9 @@
/* snow.c */
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
+/* vorbis.c */
+void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
+
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
uint32_t squareTbl[512] = {0, };
@@ -4090,6 +4093,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->inner_add_yblock = ff_snow_inner_add_yblock;
#endif
+#ifdef CONFIG_VORBIS_DECODER
+ c->vorbis_inverse_coupling = vorbis_inverse_coupling;
+#endif
+
c->shrink[0]= ff_img_copy_plane;
c->shrink[1]= ff_shrink22;
c->shrink[2]= ff_shrink44;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index a2a5171129..a608350294 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -307,6 +307,8 @@ typedef struct DSPContext {
void (*h261_loop_filter)(uint8_t *src, int stride);
+ void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
+
/* (I)DCT */
void (*fdct)(DCTELEM *block/* align 16*/);
void (*fdct248)(DCTELEM *block/* align 16*/);
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index ec6b2ad1a7..afcb02e4db 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -2711,6 +2711,59 @@ static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
}
#endif
+static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
+{
+ int i;
+ asm volatile("pxor %%mm7, %%mm7":);
+ for(i=0; i<blocksize; i+=2) {
+ asm volatile(
+ "movq %0, %%mm0 \n\t"
+ "movq %1, %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "movq %%mm1, %%mm3 \n\t"
+ "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
+ "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
+ "pslld $31, %%mm2 \n\t" // keep only the sign bit
+ "pxor %%mm2, %%mm1 \n\t"
+ "movq %%mm3, %%mm4 \n\t"
+ "pand %%mm1, %%mm3 \n\t"
+ "pandn %%mm1, %%mm4 \n\t"
+ "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
+ "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
+ "movq %%mm3, %1 \n\t"
+ "movq %%mm0, %0 \n\t"
+ :"+m"(mag[i]), "+m"(ang[i])
+ ::"memory"
+ );
+ }
+ asm volatile("emms");
+}
+static void vorbis_inverse_coupling_sse2(float *mag, float *ang, int blocksize)
+{
+ int i;
+ for(i=0; i<blocksize; i+=4) {
+ asm volatile(
+ "movaps %0, %%xmm0 \n\t"
+ "movaps %1, %%xmm1 \n\t"
+ "pxor %%xmm2, %%xmm2 \n\t"
+ "pxor %%xmm3, %%xmm3 \n\t"
+ "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
+ "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
+ "pslld $31, %%xmm2 \n\t" // keep only the sign bit
+ "pxor %%xmm2, %%xmm1 \n\t"
+ "movaps %%xmm3, %%xmm4 \n\t"
+ "pand %%xmm1, %%xmm3 \n\t"
+ "pandn %%xmm1, %%xmm4 \n\t"
+ "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
+ "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
+ "movaps %%xmm3, %1 \n\t"
+ "movaps %%xmm0, %0 \n\t"
+ :"+m"(mag[i]), "+m"(ang[i])
+ ::"memory"
+ );
+ }
+}
+
#ifdef CONFIG_SNOW_ENCODER
extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width);
extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width);
@@ -3137,6 +3190,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
}
#endif
+
+ if(mm_flags & MM_SSE2)
+ c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse2;
+ else if(mm_flags & MM_SSE)
+ c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
}
#ifdef CONFIG_ENCODERS
diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c
index 9adec4bed7..cdf7cee579 100644
--- a/libavcodec/vorbis.c
+++ b/libavcodec/vorbis.c
@@ -929,6 +929,7 @@ static int vorbis_decode_init(AVCodecContext *avccontext) {
int i, j, hdr_type;
vc->avccontext = avccontext;
+ dsputil_init(&vc->dsp, avccontext);
if (!headers_len) {
av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n");
@@ -1443,6 +1444,31 @@ static int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fa
return 0;
}
+void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
+{
+ int i;
+ for(i=0; i<blocksize; i++)
+ {
+ if (mag[i]>0.0) {
+ if (ang[i]>0.0) {
+ ang[i]=mag[i]-ang[i];
+ } else {
+ float temp=ang[i];
+ ang[i]=mag[i];
+ mag[i]+=temp;
+ }
+ } else {
+ if (ang[i]>0.0) {
+ ang[i]+=mag[i];
+ } else {
+ float temp=ang[i];
+ ang[i]=mag[i];
+ mag[i]-=temp;
+ }
+ }
+ }
+}
+
// Decode the audio packet using the functions above
#define BIAS 385
@@ -1541,26 +1567,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
mag=vc->channel_residues+res_chan[mapping->magnitude[i]]*blocksize/2;
ang=vc->channel_residues+res_chan[mapping->angle[i]]*blocksize/2;
- for(j=0;j<blocksize/2;++j) {
- float temp;
- if (mag[j]>0.0) {
- if (ang[j]>0.0) {
- ang[j]=mag[j]-ang[j];
- } else {
- temp=ang[j];
- ang[j]=mag[j];
- mag[j]+=temp;
- }
- } else {
- if (ang[j]>0.0) {
- ang[j]+=mag[j];
- } else {
- temp=ang[j];
- ang[j]=mag[j];
- mag[j]-=temp;
- }
- }
- }
+ vc->dsp.vorbis_inverse_coupling(mag, ang, blocksize/2);
}
// Dotproduct
diff --git a/libavcodec/vorbis.h b/libavcodec/vorbis.h
index c818207d92..1274f1891f 100644
--- a/libavcodec/vorbis.h
+++ b/libavcodec/vorbis.h
@@ -87,6 +87,7 @@ typedef struct {
typedef struct vorbis_context_s {
AVCodecContext *avccontext;
GetBitContext gb;
+ DSPContext dsp;
MDCTContext mdct0;
MDCTContext mdct1;