aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/i386
diff options
context:
space:
mode:
authorDiego Biurrun <diego@biurrun.de>2005-12-17 18:14:38 +0000
committerDiego Biurrun <diego@biurrun.de>2005-12-17 18:14:38 +0000
commit115329f16062074e11ccf3b89ead6176606c9696 (patch)
treee98aa993905a702688bf821737ab9a443969fc28 /libavcodec/i386
parentd76319b1ab716320f6e6a4d690b85fe4504ebd5b (diff)
downloadffmpeg-115329f16062074e11ccf3b89ead6176606c9696.tar.gz
COSMETICS: Remove all trailing whitespace.
Originally committed as revision 4749 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386')
-rw-r--r--libavcodec/i386/cputest.c28
-rw-r--r--libavcodec/i386/dsputil_mmx.c228
-rw-r--r--libavcodec/i386/dsputil_mmx_avg.h22
-rw-r--r--libavcodec/i386/dsputil_mmx_rnd.h2
-rw-r--r--libavcodec/i386/fdct_mmx.c174
-rw-r--r--libavcodec/i386/fft_sse.c20
-rw-r--r--libavcodec/i386/h264dsp_mmx.c4
-rw-r--r--libavcodec/i386/idct_mmx_xvid.c12
-rw-r--r--libavcodec/i386/motion_est_mmx.c2
-rw-r--r--libavcodec/i386/mpegvideo_mmx.c32
-rw-r--r--libavcodec/i386/mpegvideo_mmx_template.c218
-rw-r--r--libavcodec/i386/simple_idct_mmx.c26
-rw-r--r--libavcodec/i386/vp3dsp_mmx.c2
-rw-r--r--libavcodec/i386/vp3dsp_sse2.c24
14 files changed, 397 insertions, 397 deletions
diff --git a/libavcodec/i386/cputest.c b/libavcodec/i386/cputest.c
index 593e0550db..f02c63d449 100644
--- a/libavcodec/i386/cputest.c
+++ b/libavcodec/i386/cputest.c
@@ -29,28 +29,28 @@ int mm_support(void)
int eax, ebx, ecx, edx;
int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
long a, c;
-
+
__asm__ __volatile__ (
/* See if CPUID instruction is supported ... */
/* ... Get copies of EFLAGS into eax and ecx */
"pushf\n\t"
"pop %0\n\t"
"mov %0, %1\n\t"
-
+
/* ... Toggle the ID bit in one copy and store */
/* to the EFLAGS reg */
"xor $0x200000, %0\n\t"
"push %0\n\t"
"popf\n\t"
-
+
/* ... Get the (hopefully modified) EFLAGS */
"pushf\n\t"
"pop %0\n\t"
: "=a" (a), "=c" (c)
:
- : "cc"
+ : "cc"
);
-
+
if (a == c)
return 0; /* CPUID not supported */
@@ -60,9 +60,9 @@ int mm_support(void)
cpuid(1, eax, ebx, ecx, std_caps);
if (std_caps & (1<<23))
rval |= MM_MMX;
- if (std_caps & (1<<25))
+ if (std_caps & (1<<25))
rval |= MM_MMXEXT | MM_SSE;
- if (std_caps & (1<<26))
+ if (std_caps & (1<<26))
rval |= MM_SSE2;
}
@@ -103,18 +103,18 @@ int mm_support(void)
According to the table, the only CPU which supports level
2 is also the only one which supports extended CPUID levels.
*/
- if (eax < 2)
+ if (eax < 2)
return rval;
if (ext_caps & (1<<24))
rval |= MM_MMXEXT;
}
#if 0
- av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n",
- (rval&MM_MMX) ? "MMX ":"",
- (rval&MM_MMXEXT) ? "MMX2 ":"",
- (rval&MM_SSE) ? "SSE ":"",
- (rval&MM_SSE2) ? "SSE2 ":"",
- (rval&MM_3DNOW) ? "3DNow ":"",
+ av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n",
+ (rval&MM_MMX) ? "MMX ":"",
+ (rval&MM_MMXEXT) ? "MMX2 ":"",
+ (rval&MM_SSE) ? "SSE ":"",
+ (rval&MM_SSE2) ? "SSE2 ":"",
+ (rval&MM_3DNOW) ? "3DNow ":"",
(rval&MM_3DNOWEXT) ? "3DNowExt ":"");
#endif
return rval;
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index d8e655269b..7566b5d16a 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -602,9 +602,9 @@ static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){
const int strength= ff_h263_loop_filter_strength[qscale];
asm volatile(
-
+
H263_LOOP_FILTER
-
+
"movq %%mm3, %1 \n\t"
"movq %%mm4, %2 \n\t"
"movq %%mm5, %0 \n\t"
@@ -634,7 +634,7 @@ static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int
"movd %%mm1, %2 \n\t"
"punpckhdq %%mm1, %%mm1 \n\t"
"movd %%mm1, %3 \n\t"
-
+
: "=m" (*(uint32_t*)(dst + 0*dst_stride)),
"=m" (*(uint32_t*)(dst + 1*dst_stride)),
"=m" (*(uint32_t*)(dst + 2*dst_stride)),
@@ -650,14 +650,14 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
const int strength= ff_h263_loop_filter_strength[qscale];
uint64_t temp[4] __attribute__ ((aligned(8)));
uint8_t *btemp= (uint8_t*)temp;
-
+
src -= 2;
transpose4x4(btemp , src , 8, stride);
transpose4x4(btemp+4, src + 4*stride, 8, stride);
asm volatile(
H263_LOOP_FILTER // 5 3 4 6
-
+
: "+m" (temp[0]),
"+m" (temp[1]),
"+m" (temp[2]),
@@ -796,7 +796,7 @@ static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
"psrlq $32, %%mm7\n" /* shift hi dword to lo */
"paddd %%mm7,%%mm1\n"
"movd %%mm1,%2\n"
- : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+ : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp;
@@ -856,7 +856,7 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
"psrlq $32, %%mm7\n" /* shift hi dword to lo */
"paddd %%mm7,%%mm1\n"
"movd %%mm1,%2\n"
- : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+ : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp;
@@ -919,7 +919,7 @@ static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
"psrldq $4, %%xmm7\n" /* shift hi dword to lo */
"paddd %%xmm1,%%xmm7\n"
"movd %%xmm7,%3\n"
- : "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp)
+ : "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp)
: "r" ((long)line_size));
return tmp;
}
@@ -930,7 +930,7 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
"movl %3,%%ecx\n"
"pxor %%mm7,%%mm7\n"
"pxor %%mm6,%%mm6\n"
-
+
"movq (%0),%%mm0\n"
"movq %%mm0, %%mm1\n"
"psllq $8, %%mm0\n"
@@ -944,9 +944,9 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
"punpckhbw %%mm7,%%mm3\n"
"psubw %%mm1, %%mm0\n"
"psubw %%mm3, %%mm2\n"
-
+
"add %2,%0\n"
-
+
"movq (%0),%%mm4\n"
"movq %%mm4, %%mm1\n"
"psllq $8, %%mm4\n"
@@ -968,14 +968,14 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
"pcmpgtw %%mm2, %%mm1\n\t"
"pxor %%mm3, %%mm0\n"
"pxor %%mm1, %%mm2\n"
- "psubw %%mm3, %%mm0\n"
+ "psubw %%mm3, %%mm0\n"
"psubw %%mm1, %%mm2\n"
"paddw %%mm0, %%mm2\n"
"paddw %%mm2, %%mm6\n"
"add %2,%0\n"
"1:\n"
-
+
"movq (%0),%%mm0\n"
"movq %%mm0, %%mm1\n"
"psllq $8, %%mm0\n"
@@ -997,13 +997,13 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
"pcmpgtw %%mm5, %%mm1\n\t"
"pxor %%mm3, %%mm4\n"
"pxor %%mm1, %%mm5\n"
- "psubw %%mm3, %%mm4\n"
+ "psubw %%mm3, %%mm4\n"
"psubw %%mm1, %%mm5\n"
"paddw %%mm4, %%mm5\n"
"paddw %%mm5, %%mm6\n"
-
+
"add %2,%0\n"
-
+
"movq (%0),%%mm4\n"
"movq %%mm4, %%mm1\n"
"psllq $8, %%mm4\n"
@@ -1025,7 +1025,7 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
"pcmpgtw %%mm2, %%mm1\n\t"
"pxor %%mm3, %%mm0\n"
"pxor %%mm1, %%mm2\n"
- "psubw %%mm3, %%mm0\n"
+ "psubw %%mm3, %%mm0\n"
"psubw %%mm1, %%mm2\n"
"paddw %%mm0, %%mm2\n"
"paddw %%mm2, %%mm6\n"
@@ -1038,12 +1038,12 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
"punpcklwd %%mm7,%%mm0\n"
"punpckhwd %%mm7,%%mm6\n"
"paddd %%mm0, %%mm6\n"
-
+
"movq %%mm6,%%mm0\n"
"psrlq $32, %%mm6\n"
"paddd %%mm6,%%mm0\n"
"movd %%mm0,%1\n"
- : "+r" (pix1), "=r"(tmp)
+ : "+r" (pix1), "=r"(tmp)
: "r" ((long)line_size) , "g" (h-2)
: "%ecx");
return tmp;
@@ -1056,7 +1056,7 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
"movl %3,%%ecx\n"
"pxor %%mm7,%%mm7\n"
"pxor %%mm6,%%mm6\n"
-
+
"movq (%0),%%mm0\n"
"movq 1(%0),%%mm1\n"
"movq %%mm0, %%mm2\n"
@@ -1067,9 +1067,9 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
"punpckhbw %%mm7,%%mm3\n"
"psubw %%mm1, %%mm0\n"
"psubw %%mm3, %%mm2\n"
-
+
"add %2,%0\n"
-
+
"movq (%0),%%mm4\n"
"movq 1(%0),%%mm1\n"
"movq %%mm4, %%mm5\n"
@@ -1088,14 +1088,14 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
"pcmpgtw %%mm2, %%mm1\n\t"
"pxor %%mm3, %%mm0\n"
"pxor %%mm1, %%mm2\n"
- "psubw %%mm3, %%mm0\n"
+ "psubw %%mm3, %%mm0\n"
"psubw %%mm1, %%mm2\n"
"paddw %%mm0, %%mm2\n"
"paddw %%mm2, %%mm6\n"
"add %2,%0\n"
"1:\n"
-
+
"movq (%0),%%mm0\n"
"movq 1(%0),%%mm1\n"
"movq %%mm0, %%mm2\n"
@@ -1118,9 +1118,9 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
"psubw %%mm1, %%mm5\n"
"paddw %%mm4, %%mm5\n"
"paddw %%mm5, %%mm6\n"
-
+
"add %2,%0\n"
-
+
"movq (%0),%%mm4\n"
"movq 1(%0),%%mm1\n"
"movq %%mm4, %%mm5\n"
@@ -1139,7 +1139,7 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
"pcmpgtw %%mm2, %%mm1\n\t"
"pxor %%mm3, %%mm0\n"
"pxor %%mm1, %%mm2\n"
- "psubw %%mm3, %%mm0\n"
+ "psubw %%mm3, %%mm0\n"
"psubw %%mm1, %%mm2\n"
"paddw %%mm0, %%mm2\n"
"paddw %%mm2, %%mm6\n"
@@ -1152,12 +1152,12 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
"punpcklwd %%mm7,%%mm0\n"
"punpckhwd %%mm7,%%mm6\n"
"paddd %%mm0, %%mm6\n"
-
+
"movq %%mm6,%%mm0\n"
"psrlq $32, %%mm6\n"
"paddd %%mm6,%%mm0\n"
"movd %%mm0,%1\n"
- : "+r" (pix1), "=r"(tmp)
+ : "+r" (pix1), "=r"(tmp)
: "r" ((long)line_size) , "g" (h-2)
: "%ecx");
return tmp + hf_noise8_mmx(pix+8, line_size, h);
@@ -1186,10 +1186,10 @@ static int nsse8_mmx(void *p, uint8_t * pix1, uint8_t * pix2, int line_size, int
static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
int tmp;
-
+
assert( (((int)pix) & 7) == 0);
assert((line_size &7) ==0);
-
+
#define SUM(in0, in1, out0, out1) \
"movq (%0), %%mm2\n"\
"movq 8(%0), %%mm3\n"\
@@ -1213,7 +1213,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
"paddw %%mm2, " #in0 "\n"\
"paddw " #in0 ", %%mm6\n"
-
+
asm volatile (
"movl %3,%%ecx\n"
"pxor %%mm6,%%mm6\n"
@@ -1224,11 +1224,11 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
"subl $2, %%ecx\n"
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
"1:\n"
-
+
SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-
+
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-
+
"subl $2, %%ecx\n"
"jnz 1b\n"
@@ -1239,7 +1239,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
"psrlq $16, %%mm0\n"
"paddw %%mm6,%%mm0\n"
"movd %%mm0,%1\n"
- : "+r" (pix), "=r"(tmp)
+ : "+r" (pix), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp & 0xFFFF;
@@ -1248,10 +1248,10 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
int tmp;
-
+
assert( (((int)pix) & 7) == 0);
assert((line_size &7) ==0);
-
+
#define SUM(in0, in1, out0, out1) \
"movq (%0), " #out0 "\n"\
"movq 8(%0), " #out1 "\n"\
@@ -1271,16 +1271,16 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s
"subl $2, %%ecx\n"
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
"1:\n"
-
+
SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-
+
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-
+
"subl $2, %%ecx\n"
"jnz 1b\n"
"movd %%mm6,%1\n"
- : "+r" (pix), "=r"(tmp)
+ : "+r" (pix), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp;
@@ -1289,11 +1289,11 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s
static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp;
-
+
assert( (((int)pix1) & 7) == 0);
assert( (((int)pix2) & 7) == 0);
assert((line_size &7) ==0);
-
+
#define SUM(in0, in1, out0, out1) \
"movq (%0),%%mm2\n"\
"movq (%1)," #out0 "\n"\
@@ -1324,7 +1324,7 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
"paddw %%mm2, " #in0 "\n"\
"paddw " #in0 ", %%mm6\n"
-
+
asm volatile (
"movl %4,%%ecx\n"
"pxor %%mm6,%%mm6\n"
@@ -1344,11 +1344,11 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
"pxor %%mm7, %%mm1\n"
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
"1:\n"
-
+
SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-
+
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-
+
"subl $2, %%ecx\n"
"jnz 1b\n"
@@ -1359,7 +1359,7 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
"psrlq $16, %%mm0\n"
"paddw %%mm6,%%mm0\n"
"movd %%mm0,%2\n"
- : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+ : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp & 0x7FFF;
@@ -1368,11 +1368,11 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp;
-
+
assert( (((int)pix1) & 7) == 0);
assert( (((int)pix2) & 7) == 0);
assert((line_size &7) ==0);
-
+
#define SUM(in0, in1, out0, out1) \
"movq (%0)," #out0 "\n"\
"movq (%1),%%mm2\n"\
@@ -1408,16 +1408,16 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i
"pxor %%mm7, %%mm1\n"
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
"1:\n"
-
+
SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-
+
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-
+
"subl $2, %%ecx\n"
"jnz 1b\n"
"movd %%mm6,%2\n"
- : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+ : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp;
@@ -1449,7 +1449,7 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
long i=0;
uint8_t l, lt;
-
+
asm volatile(
"1: \n\t"
"movq -1(%1, %0), %%mm0 \n\t" // LT
@@ -1462,7 +1462,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
"movq %%mm4, %%mm5 \n\t" // L
"pmaxub %%mm1, %%mm4 \n\t" // max(T, L)
"pminub %%mm5, %%mm1 \n\t" // min(T, L)
- "pminub %%mm2, %%mm4 \n\t"
+ "pminub %%mm2, %%mm4 \n\t"
"pmaxub %%mm1, %%mm4 \n\t"
"psubb %%mm4, %%mm3 \n\t" // dst - pred
"movq %%mm3, (%3, %0) \n\t"
@@ -1475,9 +1475,9 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
l= *left;
lt= *left_top;
-
+
dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF);
-
+
*left_top= src1[w-1];
*left = src2[w-1];
}
@@ -1521,7 +1521,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
"psubw " #a ", " #z " \n\t"\
"pmaxsw " #z ", " #a " \n\t"\
"paddusw " #a ", " #sum " \n\t"
-
+
#define SBUTTERFLY(a,b,t,n)\
"movq " #a ", " #t " \n\t" /* abcd */\
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
@@ -1548,7 +1548,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
uint64_t temp[16] __align8;
int sum=0;
-
+
assert(h==8);
diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
@@ -1556,38 +1556,38 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride,
asm volatile(
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
-
+
"movq %%mm7, 112(%1) \n\t"
-
+
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
-
+
"movq 112(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
-
+
"movq %%mm7, 120(%1) \n\t"
-
+
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
-
+
"movq 120(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
"movq %%mm7, %%mm5 \n\t"//FIXME remove
"movq %%mm6, %%mm7 \n\t"
"movq %%mm0, %%mm6 \n\t"
// STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
-
+
LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
// LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
"movq %%mm7, 64(%1) \n\t"
MMABS(%%mm0, %%mm7)
@@ -1600,10 +1600,10 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride,
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM(%%mm1, %%mm7, %%mm0)
"movq %%mm0, 64(%1) \n\t"
-
+
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
"movq %%mm7, (%1) \n\t"
MMABS(%%mm0, %%mm7)
@@ -1617,7 +1617,7 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride,
MMABS_SUM(%%mm1, %%mm7, %%mm0)
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM(%%mm1, %%mm7, %%mm0)
-
+
"movq %%mm0, %%mm1 \n\t"
"psrlq $32, %%mm0 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
@@ -1625,7 +1625,7 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride,
"psrlq $16, %%mm0 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movd %%mm0, %0 \n\t"
-
+
: "=r" (sum)
: "r"(temp)
);
@@ -1635,7 +1635,7 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride,
static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
uint64_t temp[16] __align8;
int sum=0;
-
+
assert(h==8);
diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
@@ -1643,38 +1643,38 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
asm volatile(
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
-
+
"movq %%mm7, 112(%1) \n\t"
-
+
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
-
+
"movq 112(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
-
+
"movq %%mm7, 120(%1) \n\t"
-
+
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
-
+
"movq 120(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
"movq %%mm7, %%mm5 \n\t"//FIXME remove
"movq %%mm6, %%mm7 \n\t"
"movq %%mm0, %%mm6 \n\t"
// STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
-
+
LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
// LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
"movq %%mm7, 64(%1) \n\t"
MMABS_MMX2(%%mm0, %%mm7)
@@ -1687,10 +1687,10 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
"movq %%mm0, 64(%1) \n\t"
-
+
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
"movq %%mm7, (%1) \n\t"
MMABS_MMX2(%%mm0, %%mm7)
@@ -1704,13 +1704,13 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
-
+
"pshufw $0x0E, %%mm0, %%mm1 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"pshufw $0x01, %%mm0, %%mm1 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movd %%mm0, %0 \n\t"
-
+
: "=r" (sum)
: "r"(temp)
);
@@ -2405,7 +2405,7 @@ static void just_return() { return; }
static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
long i=0;
-
+
assert(ABS(scale) < 256);
scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
@@ -2413,11 +2413,11 @@ static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[6
"pcmpeqw %%mm6, %%mm6 \n\t" // -1w
"psrlw $15, %%mm6 \n\t" // 1w
"pxor %%mm7, %%mm7 \n\t"
- "movd %4, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
+ "movd %4, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
"1: \n\t"
- "movq (%1, %0), %%mm0 \n\t"
+ "movq (%1, %0), %%mm0 \n\t"
"movq 8(%1, %0), %%mm1 \n\t"
"pmulhw %%mm5, %%mm0 \n\t"
"pmulhw %%mm5, %%mm1 \n\t"
@@ -2444,7 +2444,7 @@ static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[6
"paddd %%mm6, %%mm7 \n\t"
"psrld $2, %%mm7 \n\t"
"movd %%mm7, %0 \n\t"
-
+
: "+r" (i)
: "r"(basis), "r"(rem), "r"(weight), "g"(scale)
);
@@ -2453,21 +2453,21 @@ static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[6
static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){
long i=0;
-
+
if(ABS(scale) < 256){
scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
asm volatile(
"pcmpeqw %%mm6, %%mm6 \n\t" // -1w
"psrlw $15, %%mm6 \n\t" // 1w
- "movd %3, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
+ "movd %3, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
"1: \n\t"
- "movq (%1, %0), %%mm0 \n\t"
+ "movq (%1, %0), %%mm0 \n\t"
"movq 8(%1, %0), %%mm1 \n\t"
"pmulhw %%mm5, %%mm0 \n\t"
"pmulhw %%mm5, %%mm1 \n\t"
- "paddw %%mm6, %%mm0 \n\t"
+ "paddw %%mm6, %%mm0 \n\t"
"paddw %%mm6, %%mm1 \n\t"
"psraw $1, %%mm0 \n\t"
"psraw $1, %%mm1 \n\t"
@@ -2478,19 +2478,19 @@ static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){
"add $16, %0 \n\t"
"cmp $128, %0 \n\t" //FIXME optimize & bench
" jb 1b \n\t"
-
+
: "+r" (i)
: "r"(basis), "r"(rem), "g"(scale)
);
}else{
for(i=0; i<8*8; i++){
rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
- }
+ }
}
}
#include "h264dsp_mmx.c"
-
+
/* external functions, from idct_mmx.c */
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
@@ -2563,7 +2563,7 @@ static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
add_pixels_clamped_mmx(block, dest, line_size);
}
#endif
-
+
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
mm_flags = mm_support();
@@ -2701,14 +2701,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
-
+
c->add_bytes= add_bytes_mmx;
#ifdef CONFIG_ENCODERS
c->diff_bytes= diff_bytes_mmx;
-
+
c->hadamard8_diff[0]= hadamard8_diff16_mmx;
c->hadamard8_diff[1]= hadamard8_diff_mmx;
-
+
c->pix_norm1 = pix_norm1_mmx;
c->sse[0] = (mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx;
c->sse[1] = sse8_mmx;
@@ -2719,19 +2719,19 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->vsad[0] = vsad16_mmx;
}
-
+
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->try_8x8basis= try_8x8basis_mmx;
}
c->add_8x8basis= add_8x8basis_mmx;
-
+
#endif //CONFIG_ENCODERS
c->h263_v_loop_filter= h263_v_loop_filter_mmx;
- c->h263_h_loop_filter= h263_h_loop_filter_mmx;
+ c->h263_h_loop_filter= h263_h_loop_filter_mmx;
c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx;
c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx;
-
+
if (mm_flags & MM_MMXEXT) {
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
@@ -2945,7 +2945,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
}
}
-
+
#ifdef CONFIG_ENCODERS
dsputil_init_pix_mmx(c, avctx);
#endif //CONFIG_ENCODERS
diff --git a/libavcodec/i386/dsputil_mmx_avg.h b/libavcodec/i386/dsputil_mmx_avg.h
index c708913048..434bc3a0e8 100644
--- a/libavcodec/i386/dsputil_mmx_avg.h
+++ b/libavcodec/i386/dsputil_mmx_avg.h
@@ -21,7 +21,7 @@
* mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
* and improved by Zdenek Kabelac <kabi@users.sf.net>
*/
-
+
/* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm
clobber bug - now it will work with 2.95.2 and also with -fPIC
*/
@@ -100,7 +100,7 @@ static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
}
@@ -147,7 +147,7 @@ static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
@@ -217,7 +217,7 @@ static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
@@ -272,7 +272,7 @@ static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
}
@@ -324,7 +324,7 @@ static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
@@ -412,7 +412,7 @@ static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
@@ -466,7 +466,7 @@ static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
@@ -539,13 +539,13 @@ static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *sr
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
:"memory");*/
}
-
+
/* GL: this function does incorrect rounding if overflow */
static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
@@ -746,7 +746,7 @@ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
:"%"REG_a, "memory");
}
-// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter
+// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter
static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
MOVQ_BONE(mm6);
diff --git a/libavcodec/i386/dsputil_mmx_rnd.h b/libavcodec/i386/dsputil_mmx_rnd.h
index a56374b63e..6d93f9d55f 100644
--- a/libavcodec/i386/dsputil_mmx_rnd.h
+++ b/libavcodec/i386/dsputil_mmx_rnd.h
@@ -197,7 +197,7 @@ static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1,
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
}
static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
diff --git a/libavcodec/i386/fdct_mmx.c b/libavcodec/i386/fdct_mmx.c
index 6a13090a13..f3023549a2 100644
--- a/libavcodec/i386/fdct_mmx.c
+++ b/libavcodec/i386/fdct_mmx.c
@@ -5,7 +5,7 @@
* SSE2 optimization is Copyright (c) 2004 Denes Balatoni.
*
* from fdctam32.c - AP922 MMX(3D-Now) forward-DCT
- *
+ *
* Intel Application Note AP-922 - fast, precise implementation of DCT
* http://developer.intel.com/vtune/cbts/appnotes.htm
*
@@ -51,7 +51,7 @@ static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL;
static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW };
-struct
+struct
{
const int32_t fdct_r_row_sse2[4] ATTR_ALIGN(16);
} fdct_r_row_sse2 ATTR_ALIGN(16)=
@@ -61,90 +61,90 @@ struct
//static const long fdct_r_row_sse2[4] ATTR_ALIGN(16) = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW};
static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = { // forward_dct coeff table
- 16384, 16384, 22725, 19266,
- 16384, 16384, 12873, 4520,
- 21407, 8867, 19266, -4520,
- -8867, -21407, -22725, -12873,
- 16384, -16384, 12873, -22725,
- -16384, 16384, 4520, 19266,
- 8867, -21407, 4520, -12873,
- 21407, -8867, 19266, -22725,
-
- 22725, 22725, 31521, 26722,
- 22725, 22725, 17855, 6270,
- 29692, 12299, 26722, -6270,
- -12299, -29692, -31521, -17855,
- 22725, -22725, 17855, -31521,
- -22725, 22725, 6270, 26722,
- 12299, -29692, 6270, -17855,
- 29692, -12299, 26722, -31521,
-
- 21407, 21407, 29692, 25172,
- 21407, 21407, 16819, 5906,
- 27969, 11585, 25172, -5906,
- -11585, -27969, -29692, -16819,
- 21407, -21407, 16819, -29692,
- -21407, 21407, 5906, 25172,
- 11585, -27969, 5906, -16819,
- 27969, -11585, 25172, -29692,
-
- 19266, 19266, 26722, 22654,
- 19266, 19266, 15137, 5315,
- 25172, 10426, 22654, -5315,
- -10426, -25172, -26722, -15137,
- 19266, -19266, 15137, -26722,
- -19266, 19266, 5315, 22654,
- 10426, -25172, 5315, -15137,
- 25172, -10426, 22654, -26722,
-
- 16384, 16384, 22725, 19266,
- 16384, 16384, 12873, 4520,
- 21407, 8867, 19266, -4520,
- -8867, -21407, -22725, -12873,
- 16384, -16384, 12873, -22725,
- -16384, 16384, 4520, 19266,
- 8867, -21407, 4520, -12873,
- 21407, -8867, 19266, -22725,
-
- 19266, 19266, 26722, 22654,
- 19266, 19266, 15137, 5315,
- 25172, 10426, 22654, -5315,
- -10426, -25172, -26722, -15137,
- 19266, -19266, 15137, -26722,
- -19266, 19266, 5315, 22654,
- 10426, -25172, 5315, -15137,
- 25172, -10426, 22654, -26722,
-
- 21407, 21407, 29692, 25172,
- 21407, 21407, 16819, 5906,
- 27969, 11585, 25172, -5906,
- -11585, -27969, -29692, -16819,
- 21407, -21407, 16819, -29692,
- -21407, 21407, 5906, 25172,
- 11585, -27969, 5906, -16819,
- 27969, -11585, 25172, -29692,
-
- 22725, 22725, 31521, 26722,
- 22725, 22725, 17855, 6270,
- 29692, 12299, 26722, -6270,
- -12299, -29692, -31521, -17855,
- 22725, -22725, 17855, -31521,
- -22725, 22725, 6270, 26722,
- 12299, -29692, 6270, -17855,
- 29692, -12299, 26722, -31521,
+ 16384, 16384, 22725, 19266,
+ 16384, 16384, 12873, 4520,
+ 21407, 8867, 19266, -4520,
+ -8867, -21407, -22725, -12873,
+ 16384, -16384, 12873, -22725,
+ -16384, 16384, 4520, 19266,
+ 8867, -21407, 4520, -12873,
+ 21407, -8867, 19266, -22725,
+
+ 22725, 22725, 31521, 26722,
+ 22725, 22725, 17855, 6270,
+ 29692, 12299, 26722, -6270,
+ -12299, -29692, -31521, -17855,
+ 22725, -22725, 17855, -31521,
+ -22725, 22725, 6270, 26722,
+ 12299, -29692, 6270, -17855,
+ 29692, -12299, 26722, -31521,
+
+ 21407, 21407, 29692, 25172,
+ 21407, 21407, 16819, 5906,
+ 27969, 11585, 25172, -5906,
+ -11585, -27969, -29692, -16819,
+ 21407, -21407, 16819, -29692,
+ -21407, 21407, 5906, 25172,
+ 11585, -27969, 5906, -16819,
+ 27969, -11585, 25172, -29692,
+
+ 19266, 19266, 26722, 22654,
+ 19266, 19266, 15137, 5315,
+ 25172, 10426, 22654, -5315,
+ -10426, -25172, -26722, -15137,
+ 19266, -19266, 15137, -26722,
+ -19266, 19266, 5315, 22654,
+ 10426, -25172, 5315, -15137,
+ 25172, -10426, 22654, -26722,
+
+ 16384, 16384, 22725, 19266,
+ 16384, 16384, 12873, 4520,
+ 21407, 8867, 19266, -4520,
+ -8867, -21407, -22725, -12873,
+ 16384, -16384, 12873, -22725,
+ -16384, 16384, 4520, 19266,
+ 8867, -21407, 4520, -12873,
+ 21407, -8867, 19266, -22725,
+
+ 19266, 19266, 26722, 22654,
+ 19266, 19266, 15137, 5315,
+ 25172, 10426, 22654, -5315,
+ -10426, -25172, -26722, -15137,
+ 19266, -19266, 15137, -26722,
+ -19266, 19266, 5315, 22654,
+ 10426, -25172, 5315, -15137,
+ 25172, -10426, 22654, -26722,
+
+ 21407, 21407, 29692, 25172,
+ 21407, 21407, 16819, 5906,
+ 27969, 11585, 25172, -5906,
+ -11585, -27969, -29692, -16819,
+ 21407, -21407, 16819, -29692,
+ -21407, 21407, 5906, 25172,
+ 11585, -27969, 5906, -16819,
+ 27969, -11585, 25172, -29692,
+
+ 22725, 22725, 31521, 26722,
+ 22725, 22725, 17855, 6270,
+ 29692, 12299, 26722, -6270,
+ -12299, -29692, -31521, -17855,
+ 22725, -22725, 17855, -31521,
+ -22725, 22725, 6270, 26722,
+ 12299, -29692, 6270, -17855,
+ 29692, -12299, 26722, -31521,
};
-struct
+struct
{
const int16_t tab_frw_01234567_sse2[256] ATTR_ALIGN(16);
} tab_frw_01234567_sse2 ATTR_ALIGN(16) =
{{
-//static const int16_t tab_frw_01234567_sse2[] ATTR_ALIGN(16) = { // forward_dct coeff table
+//static const int16_t tab_frw_01234567_sse2[] ATTR_ALIGN(16) = { // forward_dct coeff table
#define TABLE_SSE2 C4, C4, C1, C3, -C6, -C2, -C1, -C5, \
C4, C4, C5, C7, C2, C6, C3, -C7, \
-C4, C4, C7, C3, C6, -C2, C7, -C5, \
- C4, -C4, C5, -C1, C2, -C6, C3, -C1,
-// c1..c7 * cos(pi/4) * 2^15
+ C4, -C4, C5, -C1, C2, -C6, C3, -C1,
+// c1..c7 * cos(pi/4) * 2^15
#define C1 22725
#define C2 21407
#define C3 19266
@@ -355,17 +355,17 @@ static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
"movq \\i(%0), %%xmm2 \n\t"
"movq \\i+8(%0), %%xmm0 \n\t"
"movdqa \\t+32(%1), %%xmm3 \n\t"
- "movdqa \\t+48(%1), %%xmm7 \n\t"
+ "movdqa \\t+48(%1), %%xmm7 \n\t"
"movdqa \\t(%1), %%xmm4 \n\t"
- "movdqa \\t+16(%1), %%xmm5 \n\t"
+ "movdqa \\t+16(%1), %%xmm5 \n\t"
".endm \n\t"
".macro FDCT_ROW_SSE2_H2 i t \n\t"
"movq \\i(%0), %%xmm2 \n\t"
"movq \\i+8(%0), %%xmm0 \n\t"
"movdqa \\t+32(%1), %%xmm3 \n\t"
- "movdqa \\t+48(%1), %%xmm7 \n\t"
+ "movdqa \\t+48(%1), %%xmm7 \n\t"
".endm \n\t"
- ".macro FDCT_ROW_SSE2 i \n\t"
+ ".macro FDCT_ROW_SSE2 i \n\t"
"movq %%xmm2, %%xmm1 \n\t"
"pshuflw $27, %%xmm0, %%xmm0 \n\t"
"paddsw %%xmm0, %%xmm1 \n\t"
@@ -376,7 +376,7 @@ static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
"pmaddwd %%xmm1, %%xmm7 \n\t"
"pmaddwd %%xmm5, %%xmm2 \n\t"
"pmaddwd %%xmm4, %%xmm1 \n\t"
- "paddd %%xmm7, %%xmm3 \n\t"
+ "paddd %%xmm7, %%xmm3 \n\t"
"paddd %%xmm2, %%xmm1 \n\t"
"paddd %%xmm6, %%xmm3 \n\t"
"paddd %%xmm6, %%xmm1 \n\t"
@@ -384,8 +384,8 @@ static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
"psrad %3, %%xmm1 \n\t"
"packssdw %%xmm3, %%xmm1 \n\t"
"movdqa %%xmm1, \\i(%4) \n\t"
- ".endm \n\t"
- "movdqa (%2), %%xmm6 \n\t"
+ ".endm \n\t"
+ "movdqa (%2), %%xmm6 \n\t"
"FDCT_ROW_SSE2_H1 0 0 \n\t"
"FDCT_ROW_SSE2 0 \n\t"
"FDCT_ROW_SSE2_H2 64 0 \n\t"
@@ -411,7 +411,7 @@ static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
}
static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
-{
+{
pshufw_m2r(*(in + 4), mm5, 0x1B);
movq_m2r(*(in + 0), mm0);
movq_r2r(mm0, mm1);
@@ -454,7 +454,7 @@ static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const i
}
static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
-{
+{
//FIXME reorder (i dont have a old mmx only cpu here to benchmark ...)
movd_m2r(*(in + 6), mm1);
punpcklwd_m2r(*(in + 4), mm1);
@@ -547,7 +547,7 @@ void ff_fdct_mmx2(int16_t *block)
}
}
-void ff_fdct_sse2(int16_t *block)
+void ff_fdct_sse2(int16_t *block)
{
int64_t align_tmp[16] ATTR_ALIGN(8);
int16_t * const block_tmp= (int16_t*)align_tmp;
diff --git a/libavcodec/i386/fft_sse.c b/libavcodec/i386/fft_sse.c
index d07c943e91..f8be644a3b 100644
--- a/libavcodec/i386/fft_sse.c
+++ b/libavcodec/i386/fft_sse.c
@@ -23,13 +23,13 @@
#include <xmmintrin.h>
-static const float p1p1p1m1[4] __attribute__((aligned(16))) =
+static const float p1p1p1m1[4] __attribute__((aligned(16))) =
{ 1.0, 1.0, 1.0, -1.0 };
-static const float p1p1m1p1[4] __attribute__((aligned(16))) =
+static const float p1p1m1p1[4] __attribute__((aligned(16))) =
{ 1.0, 1.0, -1.0, 1.0 };
-static const float p1p1m1m1[4] __attribute__((aligned(16))) =
+static const float p1p1m1m1[4] __attribute__((aligned(16))) =
{ 1.0, 1.0, -1.0, -1.0 };
#if 0
@@ -107,27 +107,27 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
a = *(__m128 *)p;
b = *(__m128 *)q;
-
+
/* complex mul */
c = *(__m128 *)cptr;
/* cre*re cim*re */
- t1 = _mm_mul_ps(c,
- _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 0, 0)));
+ t1 = _mm_mul_ps(c,
+ _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 0, 0)));
c = *(__m128 *)(cptr + 2);
/* -cim*im cre*im */
t2 = _mm_mul_ps(c,
- _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 1, 1)));
+ _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 1, 1)));
b = _mm_add_ps(t1, t2);
-
+
/* butterfly */
*(__m128 *)p = _mm_add_ps(a, b);
*(__m128 *)q = _mm_sub_ps(a, b);
-
+
p += 2;
q += 2;
cptr += 4;
} while (--k);
-
+
p += nloops;
q += nloops;
} while (--j);
diff --git a/libavcodec/i386/h264dsp_mmx.c b/libavcodec/i386/h264dsp_mmx.c
index b5e9baa3a6..45a3c02f35 100644
--- a/libavcodec/i386/h264dsp_mmx.c
+++ b/libavcodec/i386/h264dsp_mmx.c
@@ -384,7 +384,7 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a
"psraw $5, %%mm6 \n\t"\
"packuswb %%mm6, %%mm6 \n\t"\
OP(%%mm6, (%1), A, d)\
- "add %3, %1 \n\t"
+ "add %3, %1 \n\t"
#define QPEL_H264HV(A,B,C,D,E,F,OF)\
"movd (%0), "#F" \n\t"\
@@ -399,7 +399,7 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a
"paddw "#F", "#A" \n\t"\
"paddw "#A", %%mm6 \n\t"\
"movq %%mm6, "#OF"(%1) \n\t"
-
+
#define QPEL_H264(OPNAME, OP, MMX)\
static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
int h=4;\
diff --git a/libavcodec/i386/idct_mmx_xvid.c b/libavcodec/i386/idct_mmx_xvid.c
index 943c50f92b..219260ed88 100644
--- a/libavcodec/i386/idct_mmx_xvid.c
+++ b/libavcodec/i386/idct_mmx_xvid.c
@@ -72,13 +72,13 @@
//-----------------------------------------------------------------------------
-static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = {
+static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = {
13036,13036,13036,13036, // tg * (2<<16) + 0.5
27146,27146,27146,27146, // tg * (2<<16) + 0.5
-21746,-21746,-21746,-21746, // tg * (2<<16) + 0.5
23170,23170,23170,23170}; // cos * (2<<15) + 0.5
-static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = {
+static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = {
65536,65536,
3597,3597,
2260,2260,
@@ -148,7 +148,7 @@ static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8)))
//-----------------------------------------------------------------------------
// Table for rows 0,4 - constants are multiplied by cos_4_16
-static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8))) = {
+static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8))) = {
16384,16384,16384,-16384, // movq-> w06 w04 w02 w00
21407,8867,8867,-21407, // w07 w05 w03 w01
16384,-16384,16384,16384, // w14 w12 w10 w08
@@ -190,7 +190,7 @@ static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8
//-----------------------------------------------------------------------------
// %3 for rows 0,4 - constants are multiplied by cos_4_16
-static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8))) = {
+static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8))) = {
16384,21407,16384,8867, // movq-> w05 w04 w01 w00
16384,8867,-16384,-21407, // w07 w06 w03 w02
16384,-8867,16384,-21407, // w13 w12 w09 w08
@@ -501,7 +501,7 @@ asm volatile(
DCT_8_INV_ROW_MMX(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1))
DCT_8_INV_ROW_MMX(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1))
DCT_8_INV_ROW_MMX(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1))
-
+
//# Process the columns (4 at a time)
DCT_8_INV_COL(0(%0), 0(%0))
DCT_8_INV_COL(8(%0), 8(%0))
@@ -524,7 +524,7 @@ asm volatile(
DCT_8_INV_ROW_XMM(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1))
DCT_8_INV_ROW_XMM(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1))
DCT_8_INV_ROW_XMM(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1))
-
+
//# Process the columns (4 at a time)
DCT_8_INV_COL(0(%0), 0(%0))
DCT_8_INV_COL(8(%0), 8(%0))
diff --git a/libavcodec/i386/motion_est_mmx.c b/libavcodec/i386/motion_est_mmx.c
index 1b90f8e40f..69e10f628b 100644
--- a/libavcodec/i386/motion_est_mmx.c
+++ b/libavcodec/i386/motion_est_mmx.c
@@ -393,7 +393,7 @@ void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
c->sad[0]= sad16_mmx2;
c->sad[1]= sad8_mmx2;
-
+
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->pix_abs[0][1] = sad16_x2_mmx2;
c->pix_abs[0][2] = sad16_y2_mmx2;
diff --git a/libavcodec/i386/mpegvideo_mmx.c b/libavcodec/i386/mpegvideo_mmx.c
index 70c81f6754..af799b6b00 100644
--- a/libavcodec/i386/mpegvideo_mmx.c
+++ b/libavcodec/i386/mpegvideo_mmx.c
@@ -40,7 +40,7 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
qmul = qscale << 1;
assert(s->block_last_index[n]>=0 || s->h263_aic);
-
+
if (!s->h263_aic) {
if (n < 4)
level = block[0] * s->y_dc_scale;
@@ -116,7 +116,7 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
qadd = (qscale - 1) | 1;
assert(s->block_last_index[n]>=0 || s->h263_aic);
-
+
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
//printf("%d %d ", qmul, qadd);
asm volatile(
@@ -209,7 +209,7 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
- if (n < 4)
+ if (n < 4)
block0 = block[0] * s->y_dc_scale;
else
block0 = block[0] * s->c_dc_scale;
@@ -263,7 +263,7 @@ asm volatile(
"js 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory"
- );
+ );
block[0]= block0;
}
@@ -339,13 +339,13 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
long nCoeffs;
const uint16_t *quant_matrix;
int block0;
-
+
assert(s->block_last_index[n]>=0);
if(s->alternate_scan) nCoeffs= 63; //FIXME
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
- if (n < 4)
+ if (n < 4)
block0 = block[0] * s->y_dc_scale;
else
block0 = block[0] * s->c_dc_scale;
@@ -394,7 +394,7 @@ asm volatile(
"jng 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory"
- );
+ );
block[0]= block0;
//Note, we dont do mismatch control for intra as errors cannot accumulate
}
@@ -404,7 +404,7 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
{
long nCoeffs;
const uint16_t *quant_matrix;
-
+
assert(s->block_last_index[n]>=0);
if(s->alternate_scan) nCoeffs= 63; //FIXME
@@ -470,13 +470,13 @@ asm volatile(
"psrlq $15, %%mm7 \n\t"
"pxor %%mm7, %%mm0 \n\t"
"movd %%mm0, 124(%0, %3) \n\t"
-
+
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
: "%"REG_a, "memory"
);
}
-/* draw the edges of width 'w' of an image of size width, height
+/* draw the edges of width 'w' of an image of size width, height
this mmx version can only handle w==8 || w==16 */
static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
{
@@ -491,7 +491,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
asm volatile(
"1: \n\t"
"movd (%0), %%mm0 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
+ "punpcklbw %%mm0, %%mm0 \n\t"
"punpcklwd %%mm0, %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"movq %%mm0, -8(%0) \n\t"
@@ -512,7 +512,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
asm volatile(
"1: \n\t"
"movd (%0), %%mm0 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
+ "punpcklbw %%mm0, %%mm0 \n\t"
"punpcklwd %%mm0, %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"movq %%mm0, -8(%0) \n\t"
@@ -525,12 +525,12 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
"movq %%mm1, 8(%0, %2) \n\t"
"add %1, %0 \n\t"
"cmp %3, %0 \n\t"
- " jb 1b \n\t"
+ " jb 1b \n\t"
: "+r" (ptr)
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
);
}
-
+
for(i=0;i<w;i+=4) {
/* top and bottom (and hopefully also the corners) */
ptr= buf - (i + 1) * wrap - w;
@@ -694,7 +694,7 @@ void MPV_common_init_mmx(MpegEncContext *s)
{
if (mm_flags & MM_MMX) {
const int dct_algo = s->avctx->dct_algo;
-
+
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
@@ -703,7 +703,7 @@ void MPV_common_init_mmx(MpegEncContext *s)
s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
draw_edges = draw_edges_mmx;
-
+
if (mm_flags & MM_SSE2) {
s->denoise_dct= denoise_dct_sse2;
} else {
diff --git a/libavcodec/i386/mpegvideo_mmx_template.c b/libavcodec/i386/mpegvideo_mmx_template.c
index 93f156ee55..28afdeef0c 100644
--- a/libavcodec/i386/mpegvideo_mmx_template.c
+++ b/libavcodec/i386/mpegvideo_mmx_template.c
@@ -52,7 +52,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
int level=0, q; //=0 is cuz gcc says uninitalized ...
const uint16_t *qmat, *bias;
__align8 int16_t temp_block[64];
-
+
assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
//s->fdct (block);
@@ -88,7 +88,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
} else
/* For AIC we skip quant/dequant of INTRADC */
level = (block[0] + 4)>>3;
-
+
block[0]=0; //avoid fake overflow
// temp_block[0] = (block[0] + (q >> 1)) / q;
last_non_zero_p1 = 1;
@@ -101,7 +101,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
}
if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
-
+
asm volatile(
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
SPREADW(%%mm3)
@@ -116,16 +116,16 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
"pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
- "pxor %%mm1, %%mm0 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
"psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
- "por %%mm0, %%mm4 \n\t"
- "pxor %%mm1, %%mm0 \n\t"
+ "por %%mm0, %%mm4 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
"movq %%mm0, (%5, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
- "movq (%4, %%"REG_a"), %%mm1 \n\t"
+ "movq (%4, %%"REG_a"), %%mm1 \n\t"
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
"pandn %%mm1, %%mm0 \n\t"
PMAXW(%%mm0, %%mm3)
@@ -142,7 +142,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
asm volatile(
"movd %1, %%mm1 \n\t" // max_qcoeff
SPREADW(%%mm1)
- "psubusw %%mm1, %%mm4 \n\t"
+ "psubusw %%mm1, %%mm4 \n\t"
"packuswb %%mm4, %%mm4 \n\t"
"movd %%mm4, %0 \n\t" // *overflow
: "=g" (*overflow)
@@ -160,18 +160,18 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
"pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
- "pxor %%mm1, %%mm0 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
"movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0]
"paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
"movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i]
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
- "por %%mm0, %%mm4 \n\t"
- "pxor %%mm1, %%mm0 \n\t"
+ "por %%mm0, %%mm4 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
"movq %%mm0, (%5, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
- "movq (%4, %%"REG_a"), %%mm1 \n\t"
+ "movq (%4, %%"REG_a"), %%mm1 \n\t"
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
"pandn %%mm1, %%mm0 \n\t"
PMAXW(%%mm0, %%mm3)
@@ -188,7 +188,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
asm volatile(
"movd %1, %%mm1 \n\t" // max_qcoeff
SPREADW(%%mm1)
- "psubusw %%mm1, %%mm4 \n\t"
+ "psubusw %%mm1, %%mm4 \n\t"
"packuswb %%mm4, %%mm4 \n\t"
"movd %%mm4, %0 \n\t" // *overflow
: "=g" (*overflow)
@@ -201,135 +201,135 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){
if(last_non_zero_p1 <= 1) goto end;
- block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
- block[0x20] = temp_block[0x10];
+ block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
+ block[0x20] = temp_block[0x10];
if(last_non_zero_p1 <= 4) goto end;
- block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02];
- block[0x09] = temp_block[0x03];
+ block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02];
+ block[0x09] = temp_block[0x03];
if(last_non_zero_p1 <= 7) goto end;
- block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11];
- block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20];
+ block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11];
+ block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20];
if(last_non_zero_p1 <= 11) goto end;
- block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12];
- block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04];
- block[0x0C] = temp_block[0x05];
+ block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12];
+ block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04];
+ block[0x0C] = temp_block[0x05];
if(last_non_zero_p1 <= 16) goto end;
- block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13];
- block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21];
- block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30];
- block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22];
+ block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13];
+ block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21];
+ block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30];
+ block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22];
if(last_non_zero_p1 <= 24) goto end;
- block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14];
- block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06];
- block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E];
- block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C];
+ block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14];
+ block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06];
+ block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E];
+ block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C];
if(last_non_zero_p1 <= 32) goto end;
- block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A];
- block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38];
- block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32];
- block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24];
+ block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A];
+ block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38];
+ block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32];
+ block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24];
if(last_non_zero_p1 <= 40) goto end;
- block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16];
- block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17];
- block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25];
- block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33];
+ block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16];
+ block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17];
+ block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25];
+ block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33];
if(last_non_zero_p1 <= 48) goto end;
- block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
- block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D];
- block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
- block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E];
+ block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
+ block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D];
+ block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+ block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E];
if(last_non_zero_p1 <= 56) goto end;
- block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C];
- block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
- block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
+ block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C];
+ block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
+ block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
}else if(s->dsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){
if(last_non_zero_p1 <= 1) goto end;
- block[0x04] = temp_block[0x01];
- block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
+ block[0x04] = temp_block[0x01];
+ block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
if(last_non_zero_p1 <= 4) goto end;
- block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
- block[0x05] = temp_block[0x03];
+ block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
+ block[0x05] = temp_block[0x03];
if(last_non_zero_p1 <= 7) goto end;
- block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
- block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
+ block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
+ block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
if(last_non_zero_p1 <= 11) goto end;
- block[0x1C] = temp_block[0x19];
- block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
- block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
+ block[0x1C] = temp_block[0x19];
+ block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
+ block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
if(last_non_zero_p1 <= 16) goto end;
- block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
- block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
- block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
- block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
+ block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
+ block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
+ block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
+ block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
if(last_non_zero_p1 <= 24) goto end;
- block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
- block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
- block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
- block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
+ block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
+ block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
+ block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
+ block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
if(last_non_zero_p1 <= 32) goto end;
- block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
- block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
- block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
- block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
+ block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
+ block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
+ block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
+ block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
if(last_non_zero_p1 <= 40) goto end;
- block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
- block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
- block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
- block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
+ block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
+ block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
+ block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
+ block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
if(last_non_zero_p1 <= 48) goto end;
- block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
- block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
- block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
- block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
+ block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
+ block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
+ block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+ block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
if(last_non_zero_p1 <= 56) goto end;
- block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
- block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
- block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
+ block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
+ block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
+ block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
}else{
if(last_non_zero_p1 <= 1) goto end;
- block[0x01] = temp_block[0x01];
- block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
+ block[0x01] = temp_block[0x01];
+ block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
if(last_non_zero_p1 <= 4) goto end;
- block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02];
- block[0x03] = temp_block[0x03];
+ block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02];
+ block[0x03] = temp_block[0x03];
if(last_non_zero_p1 <= 7) goto end;
- block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11];
- block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
+ block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11];
+ block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
if(last_non_zero_p1 <= 11) goto end;
- block[0x19] = temp_block[0x19];
- block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B];
- block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05];
+ block[0x19] = temp_block[0x19];
+ block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B];
+ block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05];
if(last_non_zero_p1 <= 16) goto end;
- block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13];
- block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21];
- block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
- block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22];
+ block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13];
+ block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21];
+ block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
+ block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22];
if(last_non_zero_p1 <= 24) goto end;
- block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14];
- block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06];
- block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E];
- block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C];
+ block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14];
+ block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06];
+ block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E];
+ block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C];
if(last_non_zero_p1 <= 32) goto end;
- block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A];
- block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38];
- block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32];
- block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24];
+ block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A];
+ block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38];
+ block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32];
+ block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24];
if(last_non_zero_p1 <= 40) goto end;
- block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16];
- block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
- block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25];
- block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33];
+ block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16];
+ block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
+ block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25];
+ block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33];
if(last_non_zero_p1 <= 48) goto end;
- block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
- block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D];
- block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
- block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E];
+ block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
+ block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D];
+ block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+ block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E];
if(last_non_zero_p1 <= 56) goto end;
- block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C];
- block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36];
- block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
+ block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C];
+ block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36];
+ block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
}
end:
diff --git a/libavcodec/i386/simple_idct_mmx.c b/libavcodec/i386/simple_idct_mmx.c
index 92a366f217..7b5084c7eb 100644
--- a/libavcodec/i386/simple_idct_mmx.c
+++ b/libavcodec/i386/simple_idct_mmx.c
@@ -60,19 +60,19 @@ static const int16_t __attribute__((aligned(8))) coeffs[]= {
C4, C4, C4, C4,
C4, -C4, C4, -C4,
-
+
C2, C6, C2, C6,
C6, -C2, C6, -C2,
-
+
C1, C3, C1, C3,
C5, C7, C5, C7,
-
+
C3, -C7, C3, -C7,
-C1, -C5, -C1, -C5,
-
+
C5, -C1, C5, -C1,
C7, C3, C7, C3,
-
+
C7, -C5, C7, -C5,
C3, -C1, C3, -C1
};
@@ -357,7 +357,7 @@ static inline void idct(int16_t *block)
"movd %%mm4, 64+" #dst " \n\t"\
"movd %%mm5, 80+" #dst " \n\t"\
-
+
#define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
"movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
"movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
@@ -857,7 +857,7 @@ Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
"packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
"movd %%mm6, 48+" #dst " \n\t"\
"movd %%mm1, 64+" #dst " \n\t"\
- "movd %%mm5, 80+" #dst " \n\t"
+ "movd %%mm5, 80+" #dst " \n\t"
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
@@ -924,7 +924,7 @@ Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
"packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
"movd %%mm6, 48+" #dst " \n\t"\
"movd %%mm1, 64+" #dst " \n\t"\
- "movd %%mm5, 80+" #dst " \n\t"
+ "movd %%mm5, 80+" #dst " \n\t"
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
@@ -1137,8 +1137,8 @@ IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
"packssdw %%mm1, %%mm6 \n\t" /* A3+B3 a3+b3 */\
"movq %%mm6, 48+" #dst " \n\t"\
"movq %%mm6, 64+" #dst " \n\t"\
- "movq %%mm5, 80+" #dst " \n\t"
-
+ "movq %%mm5, 80+" #dst " \n\t"
+
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
@@ -1214,7 +1214,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
"packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
"movd %%mm4, 64+" #dst " \n\t"\
"movd %%mm5, 80+" #dst " \n\t"
-
+
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
@@ -1256,7 +1256,7 @@ IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
"movq %%mm0, 32+" #dst " \n\t"\
"movq %%mm4, 48+" #dst " \n\t"\
"movq %%mm4, 64+" #dst " \n\t"\
- "movq %%mm0, 80+" #dst " \n\t"
+ "movq %%mm0, 80+" #dst " \n\t"
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
@@ -1277,7 +1277,7 @@ Input
12 32 16 36 52 72 56 76
05 45 07 47 25 65 27 67
15 35 17 37 55 75 57 77
-
+
Temp
00 04 10 14 20 24 30 34
40 44 50 54 60 64 70 74
diff --git a/libavcodec/i386/vp3dsp_mmx.c b/libavcodec/i386/vp3dsp_mmx.c
index 3d220c1d45..4aa1a5f403 100644
--- a/libavcodec/i386/vp3dsp_mmx.c
+++ b/libavcodec/i386/vp3dsp_mmx.c
@@ -208,7 +208,7 @@ static const uint16_t idct_cosine_table[7] = {
I(1) = d1 c1 b1 a1
I(2) = d2 c2 b2 a2
I(3) = d3 c3 b3 a3
-
+
J(4) = h0 g0 f0 e0
J(5) = h1 g1 f1 e1
J(6) = h2 g2 f2 e2
diff --git a/libavcodec/i386/vp3dsp_sse2.c b/libavcodec/i386/vp3dsp_sse2.c
index ed17891bfa..fcc511b651 100644
--- a/libavcodec/i386/vp3dsp_sse2.c
+++ b/libavcodec/i386/vp3dsp_sse2.c
@@ -36,21 +36,21 @@ static const unsigned short __align16 SSE2_dequant_const[] =
};
static const unsigned int __align16 eight_data[] =
-{
- 0x00080008,
+{
+ 0x00080008,
+ 0x00080008,
0x00080008,
- 0x00080008,
- 0x00080008
-};
+ 0x00080008
+};
static const unsigned short __align16 SSE2_idct_data[7 * 8] =
{
- 64277,64277,64277,64277,64277,64277,64277,64277,
- 60547,60547,60547,60547,60547,60547,60547,60547,
- 54491,54491,54491,54491,54491,54491,54491,54491,
- 46341,46341,46341,46341,46341,46341,46341,46341,
- 36410,36410,36410,36410,36410,36410,36410,36410,
- 25080,25080,25080,25080,25080,25080,25080,25080,
+ 64277,64277,64277,64277,64277,64277,64277,64277,
+ 60547,60547,60547,60547,60547,60547,60547,60547,
+ 54491,54491,54491,54491,54491,54491,54491,54491,
+ 46341,46341,46341,46341,46341,46341,46341,46341,
+ 36410,36410,36410,36410,36410,36410,36410,36410,
+ 25080,25080,25080,25080,25080,25080,25080,25080,
12785,12785,12785,12785,12785,12785,12785,12785
};
@@ -820,6 +820,6 @@ void ff_vp3_idct_sse2(int16_t *input_data)
SSE2_Row_IDCT();
SSE2_Transpose();
-
+
SSE2_Column_IDCT();
}