diff options
author | Diego Biurrun <diego@biurrun.de> | 2005-12-22 01:10:11 +0000 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2005-12-22 01:10:11 +0000 |
commit | bb270c0896b39e1ae9277355e3c120ed3feb64a3 (patch) | |
tree | fc2fc2b1216d19acb3879abb6ea5a3b400f43fe4 /libavcodec/sh4 | |
parent | 50827fcf44f34521df4708cdb633809b56fb9df3 (diff) | |
download | ffmpeg-bb270c0896b39e1ae9277355e3c120ed3feb64a3.tar.gz |
COSMETICS: tabs --> spaces, some prettyprinting
Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/sh4')
-rw-r--r-- | libavcodec/sh4/dsputil_align.c | 442 | ||||
-rw-r--r-- | libavcodec/sh4/dsputil_sh4.c | 130 | ||||
-rw-r--r-- | libavcodec/sh4/idct_sh4.c | 584 | ||||
-rw-r--r-- | libavcodec/sh4/qpel.c | 506 |
4 files changed, 831 insertions, 831 deletions
diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c index 5ac0fc887f..6e907ec79d 100644 --- a/libavcodec/sh4/dsputil_align.c +++ b/libavcodec/sh4/dsputil_align.c @@ -23,249 +23,249 @@ #include "../dsputil.h" -#define LP(p) *(uint32_t*)(p) +#define LP(p) *(uint32_t*)(p) -#define UNPACK(ph,pl,tt0,tt1) do { \ - uint32_t t0,t1; t0=tt0;t1=tt1; \ - ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \ - pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0) +#define UNPACK(ph,pl,tt0,tt1) do { \ + uint32_t t0,t1; t0=tt0;t1=tt1; \ + ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \ + pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0) -#define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03)) -#define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03)) +#define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03)) +#define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03)) /* little endian */ -#define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) ) -#define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) ) +#define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) ) +#define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) ) /* big -#define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) ) -#define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) ) +#define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) ) +#define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) ) */ -#define put(d,s) d = s -#define avg(d,s) d = rnd_avg32(s,d) +#define put(d,s) d = s +#define avg(d,s) d = rnd_avg32(s,d) -#define OP_C4(ofs) \ - ref-=ofs; \ - do { \ - OP(LP(dest),MERGE1(LP(ref),LP(ref+4),ofs)); \ - ref+=stride; \ - dest+=stride; \ - } while(--height) +#define OP_C4(ofs) \ + ref-=ofs; \ + do { \ + OP(LP(dest),MERGE1(LP(ref),LP(ref+4),ofs)); \ + ref+=stride; \ + dest+=stride; \ + } while(--height) -#define OP_C40() \ - do { \ - OP(LP(dest),LP(ref)); \ - ref+=stride; \ - dest+=stride; \ - } while(--height) +#define OP_C40() \ + do { \ + OP(LP(dest),LP(ref)); \ + ref+=stride; \ + dest+=stride; \ + } while(--height) -#define OP put +#define OP put static void put_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height) { - switch((int)ref&3){ - case 0: OP_C40(); return; - case 1: OP_C4(1); return; - case 2: OP_C4(2); return; - case 3: OP_C4(3); return; - } + switch((int)ref&3){ + case 0: OP_C40(); return; + case 1: OP_C4(1); return; + case 2: OP_C4(2); return; + case 3: OP_C4(3); return; + } } -#undef OP -#define OP avg +#undef OP +#define OP avg static void avg_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height) { - switch((int)ref&3){ - case 0: OP_C40(); return; - case 1: OP_C4(1); return; - case 2: OP_C4(2); return; - case 3: OP_C4(3); return; - } + switch((int)ref&3){ + case 0: OP_C40(); return; + case 1: OP_C4(1); return; + case 2: OP_C4(2); return; + case 3: OP_C4(3); return; + } } -#undef OP +#undef OP -#define OP_C(ofs,sz,avg2) \ +#define OP_C(ofs,sz,avg2) \ { \ - ref-=ofs; \ - do { \ - uint32_t t0,t1; \ - t0 = LP(ref+0); \ - t1 = LP(ref+4); \ - OP(LP(dest+0), MERGE1(t0,t1,ofs)); \ - t0 = LP(ref+8); \ - OP(LP(dest+4), MERGE1(t1,t0,ofs)); \ + ref-=ofs; \ + do { \ + uint32_t t0,t1; \ + t0 = LP(ref+0); \ + t1 = LP(ref+4); \ + OP(LP(dest+0), MERGE1(t0,t1,ofs)); \ + t0 = LP(ref+8); \ + OP(LP(dest+4), MERGE1(t1,t0,ofs)); \ if (sz==16) { \ - t1 = LP(ref+12); \ - OP(LP(dest+8), MERGE1(t0,t1,ofs)); \ - t0 = LP(ref+16); \ - OP(LP(dest+12), MERGE1(t1,t0,ofs)); \ + t1 = LP(ref+12); \ + OP(LP(dest+8), MERGE1(t0,t1,ofs)); \ + t0 = LP(ref+16); \ + OP(LP(dest+12), MERGE1(t1,t0,ofs)); \ } \ - ref+=stride; \ - dest+= stride; \ - } while(--height); \ + ref+=stride; \ + dest+= stride; \ + } while(--height); \ } /* aligned */ -#define OP_C0(sz,avg2) \ +#define OP_C0(sz,avg2) \ { \ - do { \ - OP(LP(dest+0), LP(ref+0)); \ - OP(LP(dest+4), LP(ref+4)); \ + do { \ + OP(LP(dest+0), LP(ref+0)); \ + OP(LP(dest+4), LP(ref+4)); \ if (sz==16) { \ - OP(LP(dest+8), LP(ref+8)); \ - OP(LP(dest+12), LP(ref+12)); \ + OP(LP(dest+8), LP(ref+8)); \ + OP(LP(dest+12), LP(ref+12)); \ } \ - ref+=stride; \ - dest+= stride; \ - } while(--height); \ + ref+=stride; \ + dest+= stride; \ + } while(--height); \ } -#define OP_X(ofs,sz,avg2) \ +#define OP_X(ofs,sz,avg2) \ { \ - ref-=ofs; \ - do { \ - uint32_t t0,t1; \ - t0 = LP(ref+0); \ - t1 = LP(ref+4); \ - OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ - t0 = LP(ref+8); \ - OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ + ref-=ofs; \ + do { \ + uint32_t t0,t1; \ + t0 = LP(ref+0); \ + t1 = LP(ref+4); \ + OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ + t0 = LP(ref+8); \ + OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ if (sz==16) { \ - t1 = LP(ref+12); \ - OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ - t0 = LP(ref+16); \ - OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ + t1 = LP(ref+12); \ + OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ + t0 = LP(ref+16); \ + OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ } \ - ref+=stride; \ - dest+= stride; \ - } while(--height); \ + ref+=stride; \ + dest+= stride; \ + } while(--height); \ } /* aligned */ -#define OP_Y0(sz,avg2) \ +#define OP_Y0(sz,avg2) \ { \ - uint32_t t0,t1,t2,t3,t; \ + uint32_t t0,t1,t2,t3,t; \ \ - t0 = LP(ref+0); \ - t1 = LP(ref+4); \ + t0 = LP(ref+0); \ + t1 = LP(ref+4); \ if (sz==16) { \ - t2 = LP(ref+8); \ - t3 = LP(ref+12); \ + t2 = LP(ref+8); \ + t3 = LP(ref+12); \ } \ - do { \ - ref += stride; \ + do { \ + ref += stride; \ \ - t = LP(ref+0); \ - OP(LP(dest+0), avg2(t0,t)); t0 = t; \ - t = LP(ref+4); \ - OP(LP(dest+4), avg2(t1,t)); t1 = t; \ + t = LP(ref+0); \ + OP(LP(dest+0), avg2(t0,t)); t0 = t; \ + t = LP(ref+4); \ + OP(LP(dest+4), avg2(t1,t)); t1 = t; \ if (sz==16) { \ - t = LP(ref+8); \ - OP(LP(dest+8), avg2(t2,t)); t2 = t; \ - t = LP(ref+12); \ - OP(LP(dest+12), avg2(t3,t)); t3 = t; \ + t = LP(ref+8); \ + OP(LP(dest+8), avg2(t2,t)); t2 = t; \ + t = LP(ref+12); \ + OP(LP(dest+12), avg2(t3,t)); t3 = t; \ } \ - dest+= stride; \ - } while(--height); \ + dest+= stride; \ + } while(--height); \ } -#define OP_Y(ofs,sz,avg2) \ +#define OP_Y(ofs,sz,avg2) \ { \ - uint32_t t0,t1,t2,t3,t,w0,w1; \ + uint32_t t0,t1,t2,t3,t,w0,w1; \ \ - ref-=ofs; \ - w0 = LP(ref+0); \ - w1 = LP(ref+4); \ - t0 = MERGE1(w0,w1,ofs); \ - w0 = LP(ref+8); \ - t1 = MERGE1(w1,w0,ofs); \ + ref-=ofs; \ + w0 = LP(ref+0); \ + w1 = LP(ref+4); \ + t0 = MERGE1(w0,w1,ofs); \ + w0 = LP(ref+8); \ + t1 = MERGE1(w1,w0,ofs); \ if (sz==16) { \ - w1 = LP(ref+12); \ - t2 = MERGE1(w0,w1,ofs); \ - w0 = LP(ref+16); \ - t3 = MERGE1(w1,w0,ofs); \ + w1 = LP(ref+12); \ + t2 = MERGE1(w0,w1,ofs); \ + w0 = LP(ref+16); \ + t3 = MERGE1(w1,w0,ofs); \ } \ - do { \ - ref += stride; \ + do { \ + ref += stride; \ \ - w0 = LP(ref+0); \ - w1 = LP(ref+4); \ - t = MERGE1(w0,w1,ofs); \ - OP(LP(dest+0), avg2(t0,t)); t0 = t; \ - w0 = LP(ref+8); \ - t = MERGE1(w1,w0,ofs); \ - OP(LP(dest+4), avg2(t1,t)); t1 = t; \ + w0 = LP(ref+0); \ + w1 = LP(ref+4); \ + t = MERGE1(w0,w1,ofs); \ + OP(LP(dest+0), avg2(t0,t)); t0 = t; \ + w0 = LP(ref+8); \ + t = MERGE1(w1,w0,ofs); \ + OP(LP(dest+4), avg2(t1,t)); t1 = t; \ if (sz==16) { \ - w1 = LP(ref+12); \ - t = MERGE1(w0,w1,ofs); \ - OP(LP(dest+8), avg2(t2,t)); t2 = t; \ - w0 = LP(ref+16); \ - t = MERGE1(w1,w0,ofs); \ - OP(LP(dest+12), avg2(t3,t)); t3 = t; \ + w1 = LP(ref+12); \ + t = MERGE1(w0,w1,ofs); \ + OP(LP(dest+8), avg2(t2,t)); t2 = t; \ + w0 = LP(ref+16); \ + t = MERGE1(w1,w0,ofs); \ + OP(LP(dest+12), avg2(t3,t)); t3 = t; \ } \ - dest+=stride; \ - } while(--height); \ + dest+=stride; \ + } while(--height); \ } #define OP_X0(sz,avg2) OP_X(0,sz,avg2) #define OP_XY0(sz,PACK) OP_XY(0,sz,PACK) -#define OP_XY(ofs,sz,PACK) \ +#define OP_XY(ofs,sz,PACK) \ { \ - uint32_t t2,t3,w0,w1; \ - uint32_t a0,a1,a2,a3,a4,a5,a6,a7; \ + uint32_t t2,t3,w0,w1; \ + uint32_t a0,a1,a2,a3,a4,a5,a6,a7; \ \ - ref -= ofs; \ - w0 = LP(ref+0); \ - w1 = LP(ref+4); \ - UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - w0 = LP(ref+8); \ - UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ + ref -= ofs; \ + w0 = LP(ref+0); \ + w1 = LP(ref+4); \ + UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ + w0 = LP(ref+8); \ + UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ if (sz==16) { \ - w1 = LP(ref+12); \ - UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - w0 = LP(ref+16); \ - UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ + w1 = LP(ref+12); \ + UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ + w0 = LP(ref+16); \ + UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ } \ - do { \ - ref+=stride; \ - w0 = LP(ref+0); \ - w1 = LP(ref+4); \ - UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - OP(LP(dest+0),PACK(a0,a1,t2,t3)); \ - a0 = t2; a1 = t3; \ - w0 = LP(ref+8); \ - UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ - OP(LP(dest+4),PACK(a2,a3,t2,t3)); \ - a2 = t2; a3 = t3; \ + do { \ + ref+=stride; \ + w0 = LP(ref+0); \ + w1 = LP(ref+4); \ + UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ + OP(LP(dest+0),PACK(a0,a1,t2,t3)); \ + a0 = t2; a1 = t3; \ + w0 = LP(ref+8); \ + UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ + OP(LP(dest+4),PACK(a2,a3,t2,t3)); \ + a2 = t2; a3 = t3; \ if (sz==16) { \ - w1 = LP(ref+12); \ - UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ - OP(LP(dest+8),PACK(a4,a5,t2,t3)); \ - a4 = t2; a5 = t3; \ - w0 = LP(ref+16); \ - UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ - OP(LP(dest+12),PACK(a6,a7,t2,t3)); \ - a6 = t2; a7 = t3; \ + w1 = LP(ref+12); \ + UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ + OP(LP(dest+8),PACK(a4,a5,t2,t3)); \ + a4 = t2; a5 = t3; \ + w0 = LP(ref+16); \ + UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ + OP(LP(dest+12),PACK(a6,a7,t2,t3)); \ + a6 = t2; a7 = t3; \ } \ - dest+=stride; \ - } while(--height); \ + dest+=stride; \ + } while(--height); \ } -#define DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \ -static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \ - const int stride, int height) \ +#define DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \ +static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ { \ - switch((int)ref&3) { \ - case 0:OP_N##0(sz,rnd##_##avgfunc); return; \ - case 1:OP_N(1,sz,rnd##_##avgfunc); return; \ - case 2:OP_N(2,sz,rnd##_##avgfunc); return; \ - case 3:OP_N(3,sz,rnd##_##avgfunc); return; \ - } \ + switch((int)ref&3) { \ + case 0:OP_N##0(sz,rnd##_##avgfunc); return; \ + case 1:OP_N(1,sz,rnd##_##avgfunc); return; \ + case 2:OP_N(2,sz,rnd##_##avgfunc); return; \ + case 3:OP_N(3,sz,rnd##_##avgfunc); return; \ + } \ } #define OP put @@ -305,21 +305,21 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK) #undef OP -#define put_no_rnd_pixels8_o put_rnd_pixels8_o -#define put_no_rnd_pixels16_o put_rnd_pixels16_o -#define avg_no_rnd_pixels8_o avg_rnd_pixels8_o -#define avg_no_rnd_pixels16_o avg_rnd_pixels16_o +#define put_no_rnd_pixels8_o put_rnd_pixels8_o +#define put_no_rnd_pixels16_o put_rnd_pixels16_o +#define avg_no_rnd_pixels8_o avg_rnd_pixels8_o +#define avg_no_rnd_pixels16_o avg_rnd_pixels16_o -#define put_pixels8_c put_rnd_pixels8_o -#define put_pixels16_c put_rnd_pixels16_o -#define avg_pixels8_c avg_rnd_pixels8_o -#define avg_pixels16_c avg_rnd_pixels16_o -#define put_no_rnd_pixels8_c put_rnd_pixels8_o -#define put_no_rnd_pixels16_c put_rnd_pixels16_o -#define avg_no_rnd_pixels8_c avg_rnd_pixels8_o -#define avg_no_rnd_pixels16_c avg_rnd_pixels16_o +#define put_pixels8_c put_rnd_pixels8_o +#define put_pixels16_c put_rnd_pixels16_o +#define avg_pixels8_c avg_rnd_pixels8_o +#define avg_pixels16_c avg_rnd_pixels16_o +#define put_no_rnd_pixels8_c put_rnd_pixels8_o +#define put_no_rnd_pixels16_c put_rnd_pixels16_o +#define avg_no_rnd_pixels8_c avg_rnd_pixels8_o +#define avg_no_rnd_pixels16_c avg_rnd_pixels16_o -#define QPEL +#define QPEL #ifdef QPEL @@ -329,41 +329,41 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK) void dsputil_init_align(DSPContext* c, AVCodecContext *avctx) { - c->put_pixels_tab[0][0] = put_rnd_pixels16_o; - c->put_pixels_tab[0][1] = put_rnd_pixels16_x; - c->put_pixels_tab[0][2] = put_rnd_pixels16_y; - c->put_pixels_tab[0][3] = put_rnd_pixels16_xy; - c->put_pixels_tab[1][0] = put_rnd_pixels8_o; - c->put_pixels_tab[1][1] = put_rnd_pixels8_x; - c->put_pixels_tab[1][2] = put_rnd_pixels8_y; - c->put_pixels_tab[1][3] = put_rnd_pixels8_xy; - - c->put_no_rnd_pixels_tab[0][0] = put_no_rnd_pixels16_o; - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y; - c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy; - c->put_no_rnd_pixels_tab[1][0] = put_no_rnd_pixels8_o; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y; - c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy; - - c->avg_pixels_tab[0][0] = avg_rnd_pixels16_o; - c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x; - c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y; - c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy; - c->avg_pixels_tab[1][0] = avg_rnd_pixels8_o; - c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x; - c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y; - c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy; - - c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_o; - c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x; - c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y; - c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy; - c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_o; - c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x; - c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y; - c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy; + c->put_pixels_tab[0][0] = put_rnd_pixels16_o; + c->put_pixels_tab[0][1] = put_rnd_pixels16_x; + c->put_pixels_tab[0][2] = put_rnd_pixels16_y; + c->put_pixels_tab[0][3] = put_rnd_pixels16_xy; + c->put_pixels_tab[1][0] = put_rnd_pixels8_o; + c->put_pixels_tab[1][1] = put_rnd_pixels8_x; + c->put_pixels_tab[1][2] = put_rnd_pixels8_y; + c->put_pixels_tab[1][3] = put_rnd_pixels8_xy; + + c->put_no_rnd_pixels_tab[0][0] = put_no_rnd_pixels16_o; + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y; + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy; + c->put_no_rnd_pixels_tab[1][0] = put_no_rnd_pixels8_o; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y; + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy; + + c->avg_pixels_tab[0][0] = avg_rnd_pixels16_o; + c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x; + c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y; + c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy; + c->avg_pixels_tab[1][0] = avg_rnd_pixels8_o; + c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x; + c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y; + c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy; + + c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_o; + c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x; + c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y; + c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy; + c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_o; + c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x; + c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y; + c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy; #ifdef QPEL diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c index bd22638357..1e61429de9 100644 --- a/libavcodec/sh4/dsputil_sh4.c +++ b/libavcodec/sh4/dsputil_sh4.c @@ -24,95 +24,95 @@ static void memzero_align8(void *dst,size_t size) { #if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) - (char*)dst+=size; - size/=8*4; - asm( + (char*)dst+=size; + size/=8*4; + asm( #if defined(__SH4__) - " fschg\n" //single float mode + " fschg\n" //single float mode #endif - " fldi0 fr0\n" - " fldi0 fr1\n" - " fschg\n" // double - "1: \n" \ - " dt %1\n" - " fmov dr0,@-%0\n" - " fmov dr0,@-%0\n" - " fmov dr0,@-%0\n" - " bf.s 1b\n" - " fmov dr0,@-%0\n" + " fldi0 fr0\n" + " fldi0 fr1\n" + " fschg\n" // double + "1: \n" \ + " dt %1\n" + " fmov dr0,@-%0\n" + " fmov dr0,@-%0\n" + " fmov dr0,@-%0\n" + " bf.s 1b\n" + " fmov dr0,@-%0\n" #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) - " fschg" //back to single + " fschg" //back to single #endif - : : "r"(dst),"r"(size): "memory" ); + : : "r"(dst),"r"(size): "memory" ); #else - double *d = dst; - size/=8*4; - do { - d[0] = 0.0; - d[1] = 0.0; - d[2] = 0.0; - d[3] = 0.0; - d+=4; - } while(--size); + double *d = dst; + size/=8*4; + do { + d[0] = 0.0; + d[1] = 0.0; + d[2] = 0.0; + d[3] = 0.0; + d+=4; + } while(--size); #endif } static void clear_blocks_sh4(DCTELEM *blocks) { -// if (((int)blocks&7)==0) - memzero_align8(blocks,sizeof(DCTELEM)*6*64); +// if (((int)blocks&7)==0) + memzero_align8(blocks,sizeof(DCTELEM)*6*64); } extern void idct_sh4(DCTELEM *block); static void idct_put(uint8_t *dest, int line_size, DCTELEM *block) { - idct_sh4(block); - int i; - uint8_t *cm = cropTbl + MAX_NEG_CROP; - for(i=0;i<8;i++) { - dest[0] = cm[block[0]]; - dest[1] = cm[block[1]]; - dest[2] = cm[block[2]]; - dest[3] = cm[block[3]]; - dest[4] = cm[block[4]]; - dest[5] = cm[block[5]]; - dest[6] = cm[block[6]]; - dest[7] = cm[block[7]]; - dest+=line_size; - block+=8; - } + idct_sh4(block); + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + for(i=0;i<8;i++) { + dest[0] = cm[block[0]]; + dest[1] = cm[block[1]]; + dest[2] = cm[block[2]]; + dest[3] = cm[block[3]]; + dest[4] = cm[block[4]]; + dest[5] = cm[block[5]]; + dest[6] = cm[block[6]]; + dest[7] = cm[block[7]]; + dest+=line_size; + block+=8; + } } static void idct_add(uint8_t *dest, int line_size, DCTELEM *block) { - idct_sh4(block); - int i; - uint8_t *cm = cropTbl + MAX_NEG_CROP; - for(i=0;i<8;i++) { - dest[0] = cm[dest[0]+block[0]]; - dest[1] = cm[dest[1]+block[1]]; - dest[2] = cm[dest[2]+block[2]]; - dest[3] = cm[dest[3]+block[3]]; - dest[4] = cm[dest[4]+block[4]]; - dest[5] = cm[dest[5]+block[5]]; - dest[6] = cm[dest[6]+block[6]]; - dest[7] = cm[dest[7]+block[7]]; - dest+=line_size; - block+=8; - } + idct_sh4(block); + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + for(i=0;i<8;i++) { + dest[0] = cm[dest[0]+block[0]]; + dest[1] = cm[dest[1]+block[1]]; + dest[2] = cm[dest[2]+block[2]]; + dest[3] = cm[dest[3]+block[3]]; + dest[4] = cm[dest[4]+block[4]]; + dest[5] = cm[dest[5]+block[5]]; + dest[6] = cm[dest[6]+block[6]]; + dest[7] = cm[dest[7]+block[7]]; + dest+=line_size; + block+=8; + } } extern void dsputil_init_align(DSPContext* c, AVCodecContext *avctx); void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx) { - const int idct_algo= avctx->idct_algo; - dsputil_init_align(c,avctx); + const int idct_algo= avctx->idct_algo; + dsputil_init_align(c,avctx); - c->clear_blocks = clear_blocks_sh4; - if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){ - c->idct_put = idct_put; - c->idct_add = idct_add; + c->clear_blocks = clear_blocks_sh4; + if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){ + c->idct_put = idct_put; + c->idct_add = idct_add; c->idct = idct_sh4; - c->idct_permutation_type= FF_NO_IDCT_PERM; //FF_SIMPLE_IDCT_PERM; //FF_LIBMPEG2_IDCT_PERM; - } + c->idct_permutation_type= FF_NO_IDCT_PERM; //FF_SIMPLE_IDCT_PERM; //FF_LIBMPEG2_IDCT_PERM; + } } diff --git a/libavcodec/sh4/idct_sh4.c b/libavcodec/sh4/idct_sh4.c index 9d115c8fe2..68c4e6edf8 100644 --- a/libavcodec/sh4/idct_sh4.c +++ b/libavcodec/sh4/idct_sh4.c @@ -19,65 +19,65 @@ */ #include "../dsputil.h" -#define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */ -#define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */ -#define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */ -#define c4 1.00000000000000000000 /* sqrt(2)*cos(4*pi/16) */ -#define c5 0.78569495838710234903 /* sqrt(2)*cos(5*pi/16) */ -#define c6 0.54119610014619712324 /* sqrt(2)*cos(6*pi/16) */ -#define c7 0.27589937928294311353 /* sqrt(2)*cos(7*pi/16) */ - -const static float even_table[] __attribute__ ((aligned(8))) = { - c4, c4, c4, c4, - c2, c6,-c6,-c2, - c4,-c4,-c4, c4, - c6,-c2, c2,-c6 +#define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */ +#define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */ +#define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */ +#define c4 1.00000000000000000000 /* sqrt(2)*cos(4*pi/16) */ +#define c5 0.78569495838710234903 /* sqrt(2)*cos(5*pi/16) */ +#define c6 0.54119610014619712324 /* sqrt(2)*cos(6*pi/16) */ +#define c7 0.27589937928294311353 /* sqrt(2)*cos(7*pi/16) */ + +const static float even_table[] __attribute__ ((aligned(8))) = { + c4, c4, c4, c4, + c2, c6,-c6,-c2, + c4,-c4,-c4, c4, + c6,-c2, c2,-c6 }; -const static float odd_table[] __attribute__ ((aligned(8))) = { - c1, c3, c5, c7, - c3,-c7,-c1,-c5, - c5,-c1, c7, c3, - c7,-c5, c3,-c1 +const static float odd_table[] __attribute__ ((aligned(8))) = { + c1, c3, c5, c7, + c3,-c7,-c1,-c5, + c5,-c1, c7, c3, + c7,-c5, c3,-c1 }; -#undef c1 -#undef c2 -#undef c3 -#undef c4 -#undef c5 -#undef c6 -#undef c7 +#undef c1 +#undef c2 +#undef c3 +#undef c4 +#undef c5 +#undef c6 +#undef c7 #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) -#define load_matrix(table) \ - __asm__ volatile( \ - " fschg\n" \ - " fmov @%0+,xd0\n" \ - " fmov @%0+,xd2\n" \ - " fmov @%0+,xd4\n" \ - " fmov @%0+,xd6\n" \ - " fmov @%0+,xd8\n" \ - " fmov @%0+,xd10\n" \ - " fmov @%0+,xd12\n" \ - " fmov @%0+,xd14\n" \ - " fschg\n" \ - :\ - : "r"(table)\ - : "0" \ - ) - -#define ftrv() \ - __asm__ volatile("ftrv xmtrx,fv0" \ - : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \ - : "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) ); - -#define DEFREG \ - register float fr0 __asm__("fr0"); \ - register float fr1 __asm__("fr1"); \ - register float fr2 __asm__("fr2"); \ - register float fr3 __asm__("fr3") +#define load_matrix(table) \ + __asm__ volatile( \ + " fschg\n" \ + " fmov @%0+,xd0\n" \ + " fmov @%0+,xd2\n" \ + " fmov @%0+,xd4\n" \ + " fmov @%0+,xd6\n" \ + " fmov @%0+,xd8\n" \ + " fmov @%0+,xd10\n" \ + " fmov @%0+,xd12\n" \ + " fmov @%0+,xd14\n" \ + " fschg\n" \ + :\ + : "r"(table)\ + : "0" \ + ) + +#define ftrv() \ + __asm__ volatile("ftrv xmtrx,fv0" \ + : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \ + : "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) ); + +#define DEFREG \ + register float fr0 __asm__("fr0"); \ + register float fr1 __asm__("fr1"); \ + register float fr2 __asm__("fr2"); \ + register float fr3 __asm__("fr3") #else @@ -85,40 +85,40 @@ const static float odd_table[] __attribute__ ((aligned(8))) = { static void ftrv_(const float xf[],float fv[]) { - float f0,f1,f2,f3; - f0 = fv[0]; - f1 = fv[1]; - f2 = fv[2]; - f3 = fv[3]; - fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3; - fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3; - fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3; - fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3; + float f0,f1,f2,f3; + f0 = fv[0]; + f1 = fv[1]; + f2 = fv[2]; + f3 = fv[3]; + fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3; + fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3; + fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3; + fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3; } static void load_matrix_(float xf[],const float table[]) { - int i; - for(i=0;i<16;i++) xf[i]=table[i]; + int i; + for(i=0;i<16;i++) xf[i]=table[i]; } -#define ftrv() ftrv_(xf,fv) -#define load_matrix(table) load_matrix_(xf,table) +#define ftrv() ftrv_(xf,fv) +#define load_matrix(table) load_matrix_(xf,table) -#define DEFREG \ - float fv[4],xf[16] +#define DEFREG \ + float fv[4],xf[16] -#define fr0 fv[0] -#define fr1 fv[1] -#define fr2 fv[2] -#define fr3 fv[3] +#define fr0 fv[0] +#define fr1 fv[1] +#define fr2 fv[2] +#define fr3 fv[3] #endif #if 1 -#define DESCALE(x,n) (x)*(1.0f/(1<<(n))) +#define DESCALE(x,n) (x)*(1.0f/(1<<(n))) #else -#define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n)) +#define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n)) #endif /* this code work worse on gcc cvs. 3.2.3 work fine */ @@ -129,236 +129,236 @@ static void load_matrix_(float xf[],const float table[]) void idct_sh4(DCTELEM *block) { - DEFREG; + DEFREG; - int i; - float tblock[8*8],*fblock; - int ofs1,ofs2,ofs3; + int i; + float tblock[8*8],*fblock; + int ofs1,ofs2,ofs3; #if defined(__SH4__) -#error "FIXME!! change to single float" +#error "FIXME!! change to single float" #endif - /* row */ - - /* even part */ - load_matrix(even_table); - - fblock = tblock+4; - i = 8; - do { - fr0 = block[0]; - fr1 = block[2]; - fr2 = block[4]; - fr3 = block[6]; - block+=8; - ftrv(); - *--fblock = fr3; - *--fblock = fr2; - *--fblock = fr1; - *--fblock = fr0; - fblock+=8+4; - } while(--i); - block-=8*8; - fblock-=8*8+4; - - load_matrix(odd_table); - - i = 8; - -// ofs1 = sizeof(float)*1; -// ofs2 = sizeof(float)*2; -// ofs3 = sizeof(float)*3; - - do { - float t0,t1,t2,t3; - fr0 = block[1]; - fr1 = block[3]; - fr2 = block[5]; - fr3 = block[7]; - block+=8; - ftrv(); - t0 = *fblock++; - t1 = *fblock++; - t2 = *fblock++; - t3 = *fblock++; - fblock+=4; - *--fblock = t0 - fr0; - *--fblock = t1 - fr1; - *--fblock = t2 - fr2; - *--fblock = t3 - fr3; - *--fblock = t3 + fr3; - *--fblock = t2 + fr2; - *--fblock = t1 + fr1; - *--fblock = t0 + fr0; - fblock+=8; - } while(--i); - block-=8*8; - fblock-=8*8; - - /* col */ - - /* even part */ - load_matrix(even_table); - - ofs1 = sizeof(float)*2*8; - ofs2 = sizeof(float)*4*8; - ofs3 = sizeof(float)*6*8; - - i = 8; - -#define OA(fblock,ofs) *(float*)((char*)fblock + ofs) - - do { - fr0 = OA(fblock, 0); - fr1 = OA(fblock,ofs1); - fr2 = OA(fblock,ofs2); - fr3 = OA(fblock,ofs3); - ftrv(); - OA(fblock,0 ) = fr0; - OA(fblock,ofs1) = fr1; - OA(fblock,ofs2) = fr2; - OA(fblock,ofs3) = fr3; - fblock++; - } while(--i); - fblock-=8; - - load_matrix(odd_table); - - i=8; - do { - float t0,t1,t2,t3; - t0 = OA(fblock, 0); /* [8*0] */ - t1 = OA(fblock,ofs1); /* [8*2] */ - t2 = OA(fblock,ofs2); /* [8*4] */ - t3 = OA(fblock,ofs3); /* [8*6] */ - fblock+=8; - fr0 = OA(fblock, 0); /* [8*1] */ - fr1 = OA(fblock,ofs1); /* [8*3] */ - fr2 = OA(fblock,ofs2); /* [8*5] */ - fr3 = OA(fblock,ofs3); /* [8*7] */ - fblock+=-8+1; - ftrv(); - block[8*0] = DESCALE(t0 + fr0,3); - block[8*7] = DESCALE(t0 - fr0,3); - block[8*1] = DESCALE(t1 + fr1,3); - block[8*6] = DESCALE(t1 - fr1,3); - block[8*2] = DESCALE(t2 + fr2,3); - block[8*5] = DESCALE(t2 - fr2,3); - block[8*3] = DESCALE(t3 + fr3,3); - block[8*4] = DESCALE(t3 - fr3,3); - block++; - } while(--i); + /* row */ + + /* even part */ + load_matrix(even_table); + + fblock = tblock+4; + i = 8; + do { + fr0 = block[0]; + fr1 = block[2]; + fr2 = block[4]; + fr3 = block[6]; + block+=8; + ftrv(); + *--fblock = fr3; + *--fblock = fr2; + *--fblock = fr1; + *--fblock = fr0; + fblock+=8+4; + } while(--i); + block-=8*8; + fblock-=8*8+4; + + load_matrix(odd_table); + + i = 8; + +// ofs1 = sizeof(float)*1; +// ofs2 = sizeof(float)*2; +// ofs3 = sizeof(float)*3; + + do { + float t0,t1,t2,t3; + fr0 = block[1]; + fr1 = block[3]; + fr2 = block[5]; + fr3 = block[7]; + block+=8; + ftrv(); + t0 = *fblock++; + t1 = *fblock++; + t2 = *fblock++; + t3 = *fblock++; + fblock+=4; + *--fblock = t0 - fr0; + *--fblock = t1 - fr1; + *--fblock = t2 - fr2; + *--fblock = t3 - fr3; + *--fblock = t3 + fr3; + *--fblock = t2 + fr2; + *--fblock = t1 + fr1; + *--fblock = t0 + fr0; + fblock+=8; + } while(--i); + block-=8*8; + fblock-=8*8; + + /* col */ + + /* even part */ + load_matrix(even_table); + + ofs1 = sizeof(float)*2*8; + ofs2 = sizeof(float)*4*8; + ofs3 = sizeof(float)*6*8; + + i = 8; + +#define OA(fblock,ofs) *(float*)((char*)fblock + ofs) + + do { + fr0 = OA(fblock, 0); + fr1 = OA(fblock,ofs1); + fr2 = OA(fblock,ofs2); + fr3 = OA(fblock,ofs3); + ftrv(); + OA(fblock,0 ) = fr0; + OA(fblock,ofs1) = fr1; + OA(fblock,ofs2) = fr2; + OA(fblock,ofs3) = fr3; + fblock++; + } while(--i); + fblock-=8; + + load_matrix(odd_table); + + i=8; + do { + float t0,t1,t2,t3; + t0 = OA(fblock, 0); /* [8*0] */ + t1 = OA(fblock,ofs1); /* [8*2] */ + t2 = OA(fblock,ofs2); /* [8*4] */ + t3 = OA(fblock,ofs3); /* [8*6] */ + fblock+=8; + fr0 = OA(fblock, 0); /* [8*1] */ + fr1 = OA(fblock,ofs1); /* [8*3] */ + fr2 = OA(fblock,ofs2); /* [8*5] */ + fr3 = OA(fblock,ofs3); /* [8*7] */ + fblock+=-8+1; + ftrv(); + block[8*0] = DESCALE(t0 + fr0,3); + block[8*7] = DESCALE(t0 - fr0,3); + block[8*1] = DESCALE(t1 + fr1,3); + block[8*6] = DESCALE(t1 - fr1,3); + block[8*2] = DESCALE(t2 + fr2,3); + block[8*5] = DESCALE(t2 - fr2,3); + block[8*3] = DESCALE(t3 + fr3,3); + block[8*4] = DESCALE(t3 - fr3,3); + block++; + } while(--i); #if defined(__SH4__) -#error "FIXME!! change to double" +#error "FIXME!! change to double" #endif } #else void idct_sh4(DCTELEM *block) { - DEFREG; - - int i; - float tblock[8*8],*fblock; - - /* row */ - - /* even part */ - load_matrix(even_table); - - fblock = tblock; - i = 8; - do { - fr0 = block[0]; - fr1 = block[2]; - fr2 = block[4]; - fr3 = block[6]; - block+=8; - ftrv(); - fblock[0] = fr0; - fblock[2] = fr1; - fblock[4] = fr2; - fblock[6] = fr3; - fblock+=8; - } while(--i); - block-=8*8; - fblock-=8*8; - - load_matrix(odd_table); - - i = 8; - - do { - float t0,t1,t2,t3; - fr0 = block[1]; - fr1 = block[3]; - fr2 = block[5]; - fr3 = block[7]; - block+=8; - ftrv(); - t0 = fblock[0]; - t1 = fblock[2]; - t2 = fblock[4]; - t3 = fblock[6]; - fblock[0] = t0 + fr0; - fblock[7] = t0 - fr0; - fblock[1] = t1 + fr1; - fblock[6] = t1 - fr1; - fblock[2] = t2 + fr2; - fblock[5] = t2 - fr2; - fblock[3] = t3 + fr3; - fblock[4] = t3 - fr3; - fblock+=8; - } while(--i); - block-=8*8; - fblock-=8*8; - - /* col */ - - /* even part */ - load_matrix(even_table); - - i = 8; - - do { - fr0 = fblock[8*0]; - fr1 = fblock[8*2]; - fr2 = fblock[8*4]; - fr3 = fblock[8*6]; - ftrv(); - fblock[8*0] = fr0; - fblock[8*2] = fr1; - fblock[8*4] = fr2; - fblock[8*6] = fr3; - fblock++; - } while(--i); - fblock-=8; - - load_matrix(odd_table); - - i=8; - do { - float t0,t1,t2,t3; - fr0 = fblock[8*1]; - fr1 = fblock[8*3]; - fr2 = fblock[8*5]; - fr3 = fblock[8*7]; - ftrv(); - t0 = fblock[8*0]; - t1 = fblock[8*2]; - t2 = fblock[8*4]; - t3 = fblock[8*6]; - fblock++; - block[8*0] = DESCALE(t0 + fr0,3); - block[8*7] = DESCALE(t0 - fr0,3); - block[8*1] = DESCALE(t1 + fr1,3); - block[8*6] = DESCALE(t1 - fr1,3); - block[8*2] = DESCALE(t2 + fr2,3); - block[8*5] = DESCALE(t2 - fr2,3); - block[8*3] = DESCALE(t3 + fr3,3); - block[8*4] = DESCALE(t3 - fr3,3); - block++; - } while(--i); + DEFREG; + + int i; + float tblock[8*8],*fblock; + + /* row */ + + /* even part */ + load_matrix(even_table); + + fblock = tblock; + i = 8; + do { + fr0 = block[0]; + fr1 = block[2]; + fr2 = block[4]; + fr3 = block[6]; + block+=8; + ftrv(); + fblock[0] = fr0; + fblock[2] = fr1; + fblock[4] = fr2; + fblock[6] = fr3; + fblock+=8; + } while(--i); + block-=8*8; + fblock-=8*8; + + load_matrix(odd_table); + + i = 8; + + do { + float t0,t1,t2,t3; + fr0 = block[1]; + fr1 = block[3]; + fr2 = block[5]; + fr3 = block[7]; + block+=8; + ftrv(); + t0 = fblock[0]; + t1 = fblock[2]; + t2 = fblock[4]; + t3 = fblock[6]; + fblock[0] = t0 + fr0; + fblock[7] = t0 - fr0; + fblock[1] = t1 + fr1; + fblock[6] = t1 - fr1; + fblock[2] = t2 + fr2; + fblock[5] = t2 - fr2; + fblock[3] = t3 + fr3; + fblock[4] = t3 - fr3; + fblock+=8; + } while(--i); + block-=8*8; + fblock-=8*8; + + /* col */ + + /* even part */ + load_matrix(even_table); + + i = 8; + + do { + fr0 = fblock[8*0]; + fr1 = fblock[8*2]; + fr2 = fblock[8*4]; + fr3 = fblock[8*6]; + ftrv(); + fblock[8*0] = fr0; + fblock[8*2] = fr1; + fblock[8*4] = fr2; + fblock[8*6] = fr3; + fblock++; + } while(--i); + fblock-=8; + + load_matrix(odd_table); + + i=8; + do { + float t0,t1,t2,t3; + fr0 = fblock[8*1]; + fr1 = fblock[8*3]; + fr2 = fblock[8*5]; + fr3 = fblock[8*7]; + ftrv(); + t0 = fblock[8*0]; + t1 = fblock[8*2]; + t2 = fblock[8*4]; + t3 = fblock[8*6]; + fblock++; + block[8*0] = DESCALE(t0 + fr0,3); + block[8*7] = DESCALE(t0 - fr0,3); + block[8*1] = DESCALE(t1 + fr1,3); + block[8*6] = DESCALE(t1 - fr1,3); + block[8*2] = DESCALE(t2 + fr2,3); + block[8*5] = DESCALE(t2 - fr2,3); + block[8*3] = DESCALE(t3 + fr3,3); + block[8*4] = DESCALE(t3 - fr3,3); + block++; + } while(--i); } #endif diff --git a/libavcodec/sh4/qpel.c b/libavcodec/sh4/qpel.c index 52eefdbcd0..a54795c2ab 100644 --- a/libavcodec/sh4/qpel.c +++ b/libavcodec/sh4/qpel.c @@ -1,183 +1,183 @@ /* - this is optimized for sh, which have post increment addressing (*p++) - some cpu may be index (p[n]) faster than post increment (*p++) + this is optimized for sh, which have post increment addressing (*p++) + some cpu may be index (p[n]) faster than post increment (*p++) */ -#define LD(adr) *(uint32_t*)(adr) +#define LD(adr) *(uint32_t*)(adr) #define PIXOP2(OPNAME, OP) \ /*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ - OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ - OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ + OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ + OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ - OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ - OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ + OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ + OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }*/\ \ static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do { /* onlye src2 aligned */\ - OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do { /* onlye src2 aligned */\ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ @@ -193,163 +193,163 @@ static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *s { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ \ static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LP(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LP(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LP(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LP(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ - UNPACK(a0,a1,LD32(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ + UNPACK(a0,a1,LD32(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LD32(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LD32(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LP(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ - UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ - UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ - OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LP(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ + UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ + OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ + UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ + OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LP(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ - UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ - OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ - UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ - OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LP(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ + UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ + OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ + UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ + OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { /* src1 is unaligned */\ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LD32(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ - UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ - UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ - OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { /* src1 is unaligned */\ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LD32(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ + UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ + OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ + UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ + OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LD32(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ - UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ - OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ - UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ - OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LD32(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ + UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ + OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ + UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ + OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ |