diff options
author | Diego Biurrun <diego@biurrun.de> | 2005-12-22 01:10:11 +0000 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2005-12-22 01:10:11 +0000 |
commit | bb270c0896b39e1ae9277355e3c120ed3feb64a3 (patch) | |
tree | fc2fc2b1216d19acb3879abb6ea5a3b400f43fe4 /libavcodec/sh4/qpel.c | |
parent | 50827fcf44f34521df4708cdb633809b56fb9df3 (diff) | |
download | ffmpeg-bb270c0896b39e1ae9277355e3c120ed3feb64a3.tar.gz |
COSMETICS: tabs --> spaces, some prettyprinting
Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/sh4/qpel.c')
-rw-r--r-- | libavcodec/sh4/qpel.c | 506 |
1 files changed, 253 insertions, 253 deletions
diff --git a/libavcodec/sh4/qpel.c b/libavcodec/sh4/qpel.c index 52eefdbcd0..a54795c2ab 100644 --- a/libavcodec/sh4/qpel.c +++ b/libavcodec/sh4/qpel.c @@ -1,183 +1,183 @@ /* - this is optimized for sh, which have post increment addressing (*p++) - some cpu may be index (p[n]) faster than post increment (*p++) + this is optimized for sh, which have post increment addressing (*p++) + some cpu may be index (p[n]) faster than post increment (*p++) */ -#define LD(adr) *(uint32_t*)(adr) +#define LD(adr) *(uint32_t*)(adr) #define PIXOP2(OPNAME, OP) \ /*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ - OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ - OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ + OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ + OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ - OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ - OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ + OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ + OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }*/\ \ static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do { /* onlye src2 aligned */\ - OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do { /* onlye src2 aligned */\ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ - do {\ - OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \ - src1+=src_stride1; \ - src2+=src_stride2; \ - dst+=dst_stride; \ - } while(--h); \ + do {\ + OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \ + src1+=src_stride1; \ + src2+=src_stride2; \ + dst+=dst_stride; \ + } while(--h); \ }\ \ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ @@ -193,163 +193,163 @@ static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *s { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ \ static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LP(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LP(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LP(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LP(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ - UNPACK(a0,a1,LD32(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ + UNPACK(a0,a1,LD32(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LD32(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LD32(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LP(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ - UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ - UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ - OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LP(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ + UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ + OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ + UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ + OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LP(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ - UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ - OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ - UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ - OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LP(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \ + UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ + OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \ + UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ + OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { /* src1 is unaligned */\ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LD32(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ - UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ - OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ - UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ - OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { /* src1 is unaligned */\ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LD32(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ + UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ + OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ + UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ + OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - do { \ - uint32_t a0,a1,a2,a3; \ - UNPACK(a0,a1,LD32(src1),LP(src2)); \ - UNPACK(a2,a3,LP(src3),LP(src4)); \ - OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ - UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ - OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ - UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ - OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ - UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ - UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ - OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ - src1+=src_stride1;\ - src2+=src_stride2;\ - src3+=src_stride3;\ - src4+=src_stride4;\ - dst+=dst_stride;\ - } while(--h); \ + do { \ + uint32_t a0,a1,a2,a3; \ + UNPACK(a0,a1,LD32(src1),LP(src2)); \ + UNPACK(a2,a3,LP(src3),LP(src4)); \ + OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \ + UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \ + OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \ + UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \ + OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ + UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \ + UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \ + OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ + src1+=src_stride1;\ + src2+=src_stride2;\ + src3+=src_stride3;\ + src4+=src_stride4;\ + dst+=dst_stride;\ + } while(--h); \ } \ \ |