aboutsummaryrefslogtreecommitdiffstats
path: root/libswscale/x86/rgb2rgb_template.c
diff options
context:
space:
mode:
authorMans Rullgard <mans@mansr.com>2012-08-13 00:53:05 +0100
committerMans Rullgard <mans@mansr.com>2012-08-13 14:51:52 +0100
commit90540c2d5ace46a1e9789c75fde0b1f7dbb12a9b (patch)
treef812df2891dd9e30c18e2f8dc491b9618437bf3f /libswscale/x86/rgb2rgb_template.c
parent10b83cb653e6effc80f3f0ddaaf39aea87546a6d (diff)
downloadffmpeg-90540c2d5ace46a1e9789c75fde0b1f7dbb12a9b.tar.gz
x86: swscale: fix fragile memory accesses
To access data at multiple fixed offsets from a base address, this code uses a single "m" operand and code of the form "32%0", relying on the memory operand instantiation having no displacement, giving a final result of the form "32(%rax)". If the compiler uses a register and displacement, e.g. "64(%rax)", the end result becomes "3264(%rax)", which obviously does not work. Replacing the "m" operands with "r" operands allows safe addition of a displacement. In theory, multiple memory operands could use a shared base register with different index registers, "(%rax,%rbx)", potentially making more efficient use of registers. In the cases at hand, no such sharing is possible since the addresses involved are entirely unrelated. After this change, the code somewhat rudely accesses memory without using a corresponding memory operand, which in some cases can lead to unwanted "optimisations" of surrounding code. However, the original code also accesses memory not covered by a memory operand, so this is not adding any defect not already present. It is also hightly unlikely that any such optimisations could be performed here since the memory locations in questions are not accessed elsewhere in the same functions. This fixes crashes with suncc. Signed-off-by: Mans Rullgard <mans@mansr.com>
Diffstat (limited to 'libswscale/x86/rgb2rgb_template.c')
-rw-r--r--libswscale/x86/rgb2rgb_template.c284
1 files changed, 137 insertions, 147 deletions
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 0f2bfd0581..7a641e1814 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
__asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "punpckldq 3%1, %%mm0 \n\t"
- "movd 6%1, %%mm1 \n\t"
- "punpckldq 9%1, %%mm1 \n\t"
- "movd 12%1, %%mm2 \n\t"
- "punpckldq 15%1, %%mm2 \n\t"
- "movd 18%1, %%mm3 \n\t"
- "punpckldq 21%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "punpckldq 3(%1), %%mm0 \n\t"
+ "movd 6(%1), %%mm1 \n\t"
+ "punpckldq 9(%1), %%mm1 \n\t"
+ "movd 12(%1), %%mm2 \n\t"
+ "punpckldq 15(%1), %%mm2 \n\t"
+ "movd 18(%1), %%mm3 \n\t"
+ "punpckldq 21(%1), %%mm3 \n\t"
"por %%mm7, %%mm0 \n\t"
"por %%mm7, %%mm1 \n\t"
"por %%mm7, %%mm2 \n\t"
"por %%mm7, %%mm3 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm1, 8%0 \n\t"
- MOVNTQ" %%mm2, 16%0 \n\t"
- MOVNTQ" %%mm3, 24%0"
- :"=m"(*dest)
- :"m"(*s)
+ MOVNTQ" %%mm0, (%0) \n\t"
+ MOVNTQ" %%mm1, 8(%0) \n\t"
+ MOVNTQ" %%mm2, 16(%0) \n\t"
+ MOVNTQ" %%mm3, 24(%0)"
+ :: "r"(dest), "r"(s)
:"memory");
dest += 32;
s += 24;
@@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
"pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
"por %%mm5, %%mm4 \n\t" \
\
- MOVNTQ" %%mm0, %0 \n\t" \
- MOVNTQ" %%mm1, 8%0 \n\t" \
- MOVNTQ" %%mm4, 16%0"
+ MOVNTQ" %%mm0, (%0) \n\t" \
+ MOVNTQ" %%mm1, 8(%0) \n\t" \
+ MOVNTQ" %%mm4, 16(%0)"
static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
@@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 31;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm1 \n\t"
- "movq 16%1, %%mm4 \n\t"
- "movq 24%1, %%mm5 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "movq 16(%1), %%mm4 \n\t"
+ "movq 24(%1), %%mm5 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm1, %%mm3 \n\t"
"movq %%mm4, %%mm6 \n\t"
"movq %%mm5, %%mm7 \n\t"
STORE_BGR24_MMX
- :"=m"(*dest)
- :"m"(*s)
+ :: "r"(dest), "r"(s)
:"memory");
dest += 24;
s += 32;
@@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s<mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"pand %%mm4, %%mm0 \n\t"
"pand %%mm4, %%mm2 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"paddw %%mm3, %%mm2 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm2, 8%0"
- :"=m"(*d)
- :"m"(*s)
+ MOVNTQ" %%mm0, (%0) \n\t"
+ MOVNTQ" %%mm2, 8(%0)"
+ :: "r"(d), "r"(s)
);
d+=16;
s+=16;
@@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s<mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"psrlq $1, %%mm0 \n\t"
@@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
"pand %%mm6, %%mm3 \n\t"
"por %%mm1, %%mm0 \n\t"
"por %%mm3, %%mm2 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm2, 8%0"
- :"=m"(*d)
- :"m"(*s)
+ MOVNTQ" %%mm0, (%0) \n\t"
+ MOVNTQ" %%mm2, 8(%0)"
+ :: "r"(d), "r"(s)
);
d+=16;
s+=16;
@@ -344,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 4%1, %%mm3 \n\t"
- "punpckldq 8%1, %%mm0 \n\t"
- "punpckldq 12%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 4(%1), %%mm3 \n\t"
+ "punpckldq 8(%1), %%mm0 \n\t"
+ "punpckldq 12(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -371,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ :: "r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 16;
}
@@ -449,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 4%1, %%mm3 \n\t"
- "punpckldq 8%1, %%mm0 \n\t"
- "punpckldq 12%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 4(%1), %%mm3 \n\t"
+ "punpckldq 8(%1), %%mm0 \n\t"
+ "punpckldq 12(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -476,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 16;
}
@@ -504,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 11;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -531,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 12;
}
@@ -561,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -588,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 12;
}
@@ -618,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 11;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -645,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 12;
}
@@ -675,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -702,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 12;
}
@@ -749,10 +745,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 7;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -780,9 +776,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm6 \n\t"
"movq %%mm3, %%mm7 \n\t"
- "movq 8%1, %%mm0 \n\t"
- "movq 8%1, %%mm1 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ "movq 8(%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -808,7 +804,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
- :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
+ :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -824,8 +820,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
- :"=m"(*d)
- :"m"(*s)
+ :: "r"(d), "m"(*s)
:"memory");
d += 24;
s += 8;
@@ -852,10 +847,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 7;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -883,9 +878,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm6 \n\t"
"movq %%mm3, %%mm7 \n\t"
- "movq 8%1, %%mm0 \n\t"
- "movq 8%1, %%mm1 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ "movq 8(%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -910,7 +905,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm4, %%mm3 \n\t"
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
- :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+ :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -926,8 +921,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
- :"=m"(*d)
- :"m"(*s)
+ :: "r"(d), "m"(*s)
:"memory");
d += 24;
s += 8;
@@ -959,8 +953,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm3 \n\t" \
"punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
"punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
- MOVNTQ" %%mm0, %0 \n\t" \
- MOVNTQ" %%mm3, 8%0 \n\t" \
+ MOVNTQ" %%mm0, (%0) \n\t" \
+ MOVNTQ" %%mm3, 8(%0) \n\t" \
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
{
@@ -975,10 +969,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 3;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -986,8 +980,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
"psrlq $2, %%mm1 \n\t"
"psrlq $7, %%mm2 \n\t"
PACK_RGB32
- :"=m"(*d)
- :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
+ ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
:"memory");
d += 16;
s += 4;
@@ -1017,10 +1010,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 3;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -1028,8 +1021,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
"psrlq $3, %%mm1 \n\t"
"psrlq $8, %%mm2 \n\t"
PACK_RGB32
- :"=m"(*d)
- :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
+ ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
:"memory");
d += 16;
s += 4;
@@ -1957,8 +1949,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
int srcStride1, int srcStride2,
int dstStride1, int dstStride2)
{
- x86_reg y;
- int x,w,h;
+ x86_reg x, y;
+ int w,h;
w=width/2; h=height/2;
__asm__ volatile(
PREFETCH" %0 \n\t"
@@ -1970,11 +1962,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
x=0;
for (;x<w-31;x+=32) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
- "movq 16%1, %%mm4 \n\t"
- "movq 24%1, %%mm6 \n\t"
+ PREFETCH" 32(%1,%2) \n\t"
+ "movq (%1,%2), %%mm0 \n\t"
+ "movq 8(%1,%2), %%mm2 \n\t"
+ "movq 16(%1,%2), %%mm4 \n\t"
+ "movq 24(%1,%2), %%mm6 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
@@ -1987,16 +1979,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
"punpckhbw %%mm5, %%mm5 \n\t"
"punpcklbw %%mm6, %%mm6 \n\t"
"punpckhbw %%mm7, %%mm7 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm1, 8%0 \n\t"
- MOVNTQ" %%mm2, 16%0 \n\t"
- MOVNTQ" %%mm3, 24%0 \n\t"
- MOVNTQ" %%mm4, 32%0 \n\t"
- MOVNTQ" %%mm5, 40%0 \n\t"
- MOVNTQ" %%mm6, 48%0 \n\t"
- MOVNTQ" %%mm7, 56%0"
- :"=m"(d[2*x])
- :"m"(s1[x])
+ MOVNTQ" %%mm0, (%0,%2,2) \n\t"
+ MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
+ MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
+ MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
+ MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
+ MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
+ MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
+ MOVNTQ" %%mm7, 56(%0,%2,2)"
+ :: "r"(d), "r"(s1), "r"(x)
:"memory");
}
for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
@@ -2007,11 +1998,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
x=0;
for (;x<w-31;x+=32) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
- "movq 16%1, %%mm4 \n\t"
- "movq 24%1, %%mm6 \n\t"
+ PREFETCH" 32(%1,%2) \n\t"
+ "movq (%1,%2), %%mm0 \n\t"
+ "movq 8(%1,%2), %%mm2 \n\t"
+ "movq 16(%1,%2), %%mm4 \n\t"
+ "movq 24(%1,%2), %%mm6 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
@@ -2024,16 +2015,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
"punpckhbw %%mm5, %%mm5 \n\t"
"punpcklbw %%mm6, %%mm6 \n\t"
"punpckhbw %%mm7, %%mm7 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm1, 8%0 \n\t"
- MOVNTQ" %%mm2, 16%0 \n\t"
- MOVNTQ" %%mm3, 24%0 \n\t"
- MOVNTQ" %%mm4, 32%0 \n\t"
- MOVNTQ" %%mm5, 40%0 \n\t"
- MOVNTQ" %%mm6, 48%0 \n\t"
- MOVNTQ" %%mm7, 56%0"
- :"=m"(d[2*x])
- :"m"(s2[x])
+ MOVNTQ" %%mm0, (%0,%2,2) \n\t"
+ MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
+ MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
+ MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
+ MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
+ MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
+ MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
+ MOVNTQ" %%mm7, 56(%0,%2,2)"
+ :: "r"(d), "r"(s2), "r"(x)
:"memory");
}
for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];