diff options
author | Mans Rullgard <mans@mansr.com> | 2012-08-13 00:53:05 +0100 |
---|---|---|
committer | Mans Rullgard <mans@mansr.com> | 2012-08-13 14:51:52 +0100 |
commit | 90540c2d5ace46a1e9789c75fde0b1f7dbb12a9b (patch) | |
tree | f812df2891dd9e30c18e2f8dc491b9618437bf3f /libswscale | |
parent | 10b83cb653e6effc80f3f0ddaaf39aea87546a6d (diff) | |
download | ffmpeg-90540c2d5ace46a1e9789c75fde0b1f7dbb12a9b.tar.gz |
x86: swscale: fix fragile memory accesses
To access data at multiple fixed offsets from a base address, this
code uses a single "m" operand and code of the form "32%0", relying on
the memory operand instantiation having no displacement, giving a final
result of the form "32(%rax)". If the compiler uses a register and
displacement, e.g. "64(%rax)", the end result becomes "3264(%rax)",
which obviously does not work.
Replacing the "m" operands with "r" operands allows safe addition of a
displacement. In theory, multiple memory operands could use a shared
base register with different index registers, "(%rax,%rbx)", potentially
making more efficient use of registers. In the cases at hand, no such
sharing is possible since the addresses involved are entirely unrelated.
After this change, the code somewhat rudely accesses memory without
using a corresponding memory operand, which in some cases can lead to
unwanted "optimisations" of surrounding code. However, the original
code also accesses memory not covered by a memory operand, so this is
not adding any defect not already present. It is also hightly unlikely
that any such optimisations could be performed here since the memory
locations in questions are not accessed elsewhere in the same functions.
This fixes crashes with suncc.
Signed-off-by: Mans Rullgard <mans@mansr.com>
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/x86/rgb2rgb_template.c | 284 |
1 files changed, 137 insertions, 147 deletions
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index 0f2bfd0581..7a641e1814 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "punpckldq 3%1, %%mm0 \n\t" - "movd 6%1, %%mm1 \n\t" - "punpckldq 9%1, %%mm1 \n\t" - "movd 12%1, %%mm2 \n\t" - "punpckldq 15%1, %%mm2 \n\t" - "movd 18%1, %%mm3 \n\t" - "punpckldq 21%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "punpckldq 3(%1), %%mm0 \n\t" + "movd 6(%1), %%mm1 \n\t" + "punpckldq 9(%1), %%mm1 \n\t" + "movd 12(%1), %%mm2 \n\t" + "punpckldq 15(%1), %%mm2 \n\t" + "movd 18(%1), %%mm3 \n\t" + "punpckldq 21(%1), %%mm3 \n\t" "por %%mm7, %%mm0 \n\t" "por %%mm7, %%mm1 \n\t" "por %%mm7, %%mm2 \n\t" "por %%mm7, %%mm3 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0" - :"=m"(*dest) - :"m"(*s) + MOVNTQ" %%mm0, (%0) \n\t" + MOVNTQ" %%mm1, 8(%0) \n\t" + MOVNTQ" %%mm2, 16(%0) \n\t" + MOVNTQ" %%mm3, 24(%0)" + :: "r"(dest), "r"(s) :"memory"); dest += 32; s += 24; @@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \ "por %%mm5, %%mm4 \n\t" \ \ - MOVNTQ" %%mm0, %0 \n\t" \ - MOVNTQ" %%mm1, 8%0 \n\t" \ - MOVNTQ" %%mm4, 16%0" + MOVNTQ" %%mm0, (%0) \n\t" \ + MOVNTQ" %%mm1, 8(%0) \n\t" \ + MOVNTQ" %%mm4, 16(%0)" static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) @@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 31; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm5 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "movq 16(%1), %%mm4 \n\t" + "movq 24(%1), %%mm5 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm1, %%mm3 \n\t" "movq %%mm4, %%mm6 \n\t" "movq %%mm5, %%mm7 \n\t" STORE_BGR24_MMX - :"=m"(*dest) - :"m"(*s) + :: "r"(dest), "r"(s) :"memory"); dest += 24; s += 32; @@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s<mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "pand %%mm4, %%mm0 \n\t" "pand %%mm4, %%mm2 \n\t" "paddw %%mm1, %%mm0 \n\t" "paddw %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) + MOVNTQ" %%mm0, (%0) \n\t" + MOVNTQ" %%mm2, 8(%0)" + :: "r"(d), "r"(s) ); d+=16; s+=16; @@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s<mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "psrlq $1, %%mm0 \n\t" @@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s "pand %%mm6, %%mm3 \n\t" "por %%mm1, %%mm0 \n\t" "por %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) + MOVNTQ" %%mm0, (%0) \n\t" + MOVNTQ" %%mm2, 8(%0)" + :: "r"(d), "r"(s) ); d+=16; s+=16; @@ -344,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 4(%1), %%mm3 \n\t" + "punpckldq 8(%1), %%mm0 \n\t" + "punpckldq 12(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -371,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + :: "r"(d),"r"(s),"m"(blue_16mask):"memory"); d += 4; s += 16; } @@ -449,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 4(%1), %%mm3 \n\t" + "punpckldq 8(%1), %%mm0 \n\t" + "punpckldq 12(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -476,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); d += 4; s += 16; } @@ -504,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 11; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -531,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_16mask):"memory"); d += 4; s += 12; } @@ -561,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -588,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_16mask):"memory"); d += 4; s += 12; } @@ -618,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 11; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -645,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); d += 4; s += 12; } @@ -675,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -702,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); d += 4; s += 12; } @@ -749,10 +745,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 7; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -780,9 +776,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm0, %%mm6 \n\t" "movq %%mm3, %%mm7 \n\t" - "movq 8%1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 8%1, %%mm2 \n\t" + "movq 8(%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -808,7 +804,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) + :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -824,8 +820,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr STORE_BGR24_MMX - :"=m"(*d) - :"m"(*s) + :: "r"(d), "m"(*s) :"memory"); d += 24; s += 8; @@ -852,10 +847,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 7; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -883,9 +878,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm0, %%mm6 \n\t" "movq %%mm3, %%mm7 \n\t" - "movq 8%1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 8%1, %%mm2 \n\t" + "movq 8(%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -910,7 +905,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm4, %%mm3 \n\t" "por %%mm5, %%mm3 \n\t" :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) + :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -926,8 +921,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr STORE_BGR24_MMX - :"=m"(*d) - :"m"(*s) + :: "r"(d), "m"(*s) :"memory"); d += 24; s += 8; @@ -959,8 +953,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm0, %%mm3 \n\t" \ "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ - MOVNTQ" %%mm0, %0 \n\t" \ - MOVNTQ" %%mm3, 8%0 \n\t" \ + MOVNTQ" %%mm0, (%0) \n\t" \ + MOVNTQ" %%mm3, 8(%0) \n\t" \ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size) { @@ -975,10 +969,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 3; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -986,8 +980,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s "psrlq $2, %%mm1 \n\t" "psrlq $7, %%mm2 \n\t" PACK_RGB32 - :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) + ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) :"memory"); d += 16; s += 4; @@ -1017,10 +1010,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 3; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -1028,8 +1021,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s "psrlq $3, %%mm1 \n\t" "psrlq $8, %%mm2 \n\t" PACK_RGB32 - :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) + ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r) :"memory"); d += 16; s += 4; @@ -1957,8 +1949,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, int srcStride1, int srcStride2, int dstStride1, int dstStride2) { - x86_reg y; - int x,w,h; + x86_reg x, y; + int w,h; w=width/2; h=height/2; __asm__ volatile( PREFETCH" %0 \n\t" @@ -1970,11 +1962,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, x=0; for (;x<w-31;x+=32) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm6 \n\t" + PREFETCH" 32(%1,%2) \n\t" + "movq (%1,%2), %%mm0 \n\t" + "movq 8(%1,%2), %%mm2 \n\t" + "movq 16(%1,%2), %%mm4 \n\t" + "movq 24(%1,%2), %%mm6 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" @@ -1987,16 +1979,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, "punpckhbw %%mm5, %%mm5 \n\t" "punpcklbw %%mm6, %%mm6 \n\t" "punpckhbw %%mm7, %%mm7 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0 \n\t" - MOVNTQ" %%mm4, 32%0 \n\t" - MOVNTQ" %%mm5, 40%0 \n\t" - MOVNTQ" %%mm6, 48%0 \n\t" - MOVNTQ" %%mm7, 56%0" - :"=m"(d[2*x]) - :"m"(s1[x]) + MOVNTQ" %%mm0, (%0,%2,2) \n\t" + MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" + MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" + MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" + MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" + MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" + MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" + MOVNTQ" %%mm7, 56(%0,%2,2)" + :: "r"(d), "r"(s1), "r"(x) :"memory"); } for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; @@ -2007,11 +1998,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, x=0; for (;x<w-31;x+=32) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm6 \n\t" + PREFETCH" 32(%1,%2) \n\t" + "movq (%1,%2), %%mm0 \n\t" + "movq 8(%1,%2), %%mm2 \n\t" + "movq 16(%1,%2), %%mm4 \n\t" + "movq 24(%1,%2), %%mm6 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" @@ -2024,16 +2015,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, "punpckhbw %%mm5, %%mm5 \n\t" "punpcklbw %%mm6, %%mm6 \n\t" "punpckhbw %%mm7, %%mm7 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0 \n\t" - MOVNTQ" %%mm4, 32%0 \n\t" - MOVNTQ" %%mm5, 40%0 \n\t" - MOVNTQ" %%mm6, 48%0 \n\t" - MOVNTQ" %%mm7, 56%0" - :"=m"(d[2*x]) - :"m"(s2[x]) + MOVNTQ" %%mm0, (%0,%2,2) \n\t" + MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" + MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" + MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" + MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" + MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" + MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" + MOVNTQ" %%mm7, 56(%0,%2,2)" + :: "r"(d), "r"(s2), "r"(x) :"memory"); } for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; |