aboutsummaryrefslogtreecommitdiffstats
path: root/libswscale/x86
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-08-14 15:31:43 +0200
committerMichael Niedermayer <michaelni@gmx.at>2012-08-14 15:34:39 +0200
commit7427d1ca4ab202def24fc3cefc4401a351d7248c (patch)
treebe2cd06e1e5457daad74966a0155e86baa5e9086 /libswscale/x86
parent0e05908c954ff64ef2fcb2a97ed083bc285282c1 (diff)
parent0d230e9312a676266bd6fa3478032db4860221a7 (diff)
downloadffmpeg-7427d1ca4ab202def24fc3cefc4401a351d7248c.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: g723.1: simplify scale_vector() g723.1: simplify normalize_bits() vda: cosmetics: fix Doxygen comment formatting vda: better frame allocation vda: Merge implementation into one file vda: support synchronous decoding vda: Reuse the bitstream buffer and reallocate it only if needed build: Factor out mpegvideo encoding dependencies to CONFIG_MPEGVIDEOENC avprobe: Include libm.h for the log2 fallback proresenc: use the edge emulation buffer rtmp: handle bytes read reports configure: Fix typo in mpeg2video/svq1 decoder dependency declaration Use log2(x) instead of log(x) / log(2) x86: swscale: fix fragile memory accesses x86: swscale: remove disabled code x86: yadif: fix asm with suncc x86: cabac: allow building with suncc x86: mlpdsp: avoid taking address of void ARM: intmath: use native-size return types for clipping functions Conflicts: configure ffprobe.c libavcodec/Makefile libavcodec/g723_1.c libavcodec/v210dec.h libavcodec/vda.h libavcodec/vda_h264.c libavcodec/x86/cabac.h libavfilter/x86/yadif_template.c libswscale/x86/rgb2rgb_template.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale/x86')
-rw-r--r--libswscale/x86/rgb2rgb_template.c368
1 files changed, 137 insertions, 231 deletions
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 594524d9ed..7e5ffdf8d1 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
__asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "punpckldq 3%1, %%mm0 \n\t"
- "movd 6%1, %%mm1 \n\t"
- "punpckldq 9%1, %%mm1 \n\t"
- "movd 12%1, %%mm2 \n\t"
- "punpckldq 15%1, %%mm2 \n\t"
- "movd 18%1, %%mm3 \n\t"
- "punpckldq 21%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "punpckldq 3(%1), %%mm0 \n\t"
+ "movd 6(%1), %%mm1 \n\t"
+ "punpckldq 9(%1), %%mm1 \n\t"
+ "movd 12(%1), %%mm2 \n\t"
+ "punpckldq 15(%1), %%mm2 \n\t"
+ "movd 18(%1), %%mm3 \n\t"
+ "punpckldq 21(%1), %%mm3 \n\t"
"por %%mm7, %%mm0 \n\t"
"por %%mm7, %%mm1 \n\t"
"por %%mm7, %%mm2 \n\t"
"por %%mm7, %%mm3 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm1, 8%0 \n\t"
- MOVNTQ" %%mm2, 16%0 \n\t"
- MOVNTQ" %%mm3, 24%0"
- :"=m"(*dest)
- :"m"(*s)
+ MOVNTQ" %%mm0, (%0) \n\t"
+ MOVNTQ" %%mm1, 8(%0) \n\t"
+ MOVNTQ" %%mm2, 16(%0) \n\t"
+ MOVNTQ" %%mm3, 24(%0)"
+ :: "r"(dest), "r"(s)
:"memory");
dest += 32;
s += 24;
@@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
"pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
"por %%mm5, %%mm4 \n\t" \
\
- MOVNTQ" %%mm0, %0 \n\t" \
- MOVNTQ" %%mm1, 8%0 \n\t" \
- MOVNTQ" %%mm4, 16%0"
+ MOVNTQ" %%mm0, (%0) \n\t" \
+ MOVNTQ" %%mm1, 8(%0) \n\t" \
+ MOVNTQ" %%mm4, 16(%0)"
static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
@@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 31;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm1 \n\t"
- "movq 16%1, %%mm4 \n\t"
- "movq 24%1, %%mm5 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "movq 16(%1), %%mm4 \n\t"
+ "movq 24(%1), %%mm5 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm1, %%mm3 \n\t"
"movq %%mm4, %%mm6 \n\t"
"movq %%mm5, %%mm7 \n\t"
STORE_BGR24_MMX
- :"=m"(*dest)
- :"m"(*s)
+ :: "r"(dest), "r"(s)
:"memory");
dest += 24;
s += 32;
@@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s<mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"pand %%mm4, %%mm0 \n\t"
"pand %%mm4, %%mm2 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"paddw %%mm3, %%mm2 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm2, 8%0"
- :"=m"(*d)
- :"m"(*s)
+ MOVNTQ" %%mm0, (%0) \n\t"
+ MOVNTQ" %%mm2, 8(%0)"
+ :: "r"(d), "r"(s)
);
d+=16;
s+=16;
@@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s<mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"psrlq $1, %%mm0 \n\t"
@@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
"pand %%mm6, %%mm3 \n\t"
"por %%mm1, %%mm0 \n\t"
"por %%mm3, %%mm2 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm2, 8%0"
- :"=m"(*d)
- :"m"(*s)
+ MOVNTQ" %%mm0, (%0) \n\t"
+ MOVNTQ" %%mm2, 8(%0)"
+ :: "r"(d), "r"(s)
);
d+=16;
s+=16;
@@ -287,7 +283,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s
uint16_t *d = (uint16_t *)dst;
end = s + src_size;
mm_end = end - 15;
-#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
__asm__ volatile(
"movq %3, %%mm5 \n\t"
"movq %4, %%mm6 \n\t"
@@ -322,47 +317,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s
: "+r" (d), "+r"(s)
: "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
);
-#else
- __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
- __asm__ volatile(
- "movq %0, %%mm7 \n\t"
- "movq %1, %%mm6 \n\t"
- ::"m"(red_16mask),"m"(green_16mask));
- while (s < mm_end) {
- __asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 4%1, %%mm3 \n\t"
- "punpckldq 8%1, %%mm0 \n\t"
- "punpckldq 12%1, %%mm3 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "movq %%mm0, %%mm2 \n\t"
- "movq %%mm3, %%mm4 \n\t"
- "movq %%mm3, %%mm5 \n\t"
- "psrlq $3, %%mm0 \n\t"
- "psrlq $3, %%mm3 \n\t"
- "pand %2, %%mm0 \n\t"
- "pand %2, %%mm3 \n\t"
- "psrlq $5, %%mm1 \n\t"
- "psrlq $5, %%mm4 \n\t"
- "pand %%mm6, %%mm1 \n\t"
- "pand %%mm6, %%mm4 \n\t"
- "psrlq $8, %%mm2 \n\t"
- "psrlq $8, %%mm5 \n\t"
- "pand %%mm7, %%mm2 \n\t"
- "pand %%mm7, %%mm5 \n\t"
- "por %%mm1, %%mm0 \n\t"
- "por %%mm4, %%mm3 \n\t"
- "por %%mm2, %%mm0 \n\t"
- "por %%mm5, %%mm3 \n\t"
- "psllq $16, %%mm3 \n\t"
- "por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
- d += 4;
- s += 16;
- }
-#endif
__asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory");
while (s < end) {
@@ -386,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 4%1, %%mm3 \n\t"
- "punpckldq 8%1, %%mm0 \n\t"
- "punpckldq 12%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 4(%1), %%mm3 \n\t"
+ "punpckldq 8(%1), %%mm0 \n\t"
+ "punpckldq 12(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -413,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ :: "r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 16;
}
@@ -434,7 +388,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s
uint16_t *d = (uint16_t *)dst;
end = s + src_size;
mm_end = end - 15;
-#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
__asm__ volatile(
"movq %3, %%mm5 \n\t"
"movq %4, %%mm6 \n\t"
@@ -469,47 +422,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s
: "+r" (d), "+r"(s)
: "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
);
-#else
- __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
- __asm__ volatile(
- "movq %0, %%mm7 \n\t"
- "movq %1, %%mm6 \n\t"
- ::"m"(red_15mask),"m"(green_15mask));
- while (s < mm_end) {
- __asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 4%1, %%mm3 \n\t"
- "punpckldq 8%1, %%mm0 \n\t"
- "punpckldq 12%1, %%mm3 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "movq %%mm0, %%mm2 \n\t"
- "movq %%mm3, %%mm4 \n\t"
- "movq %%mm3, %%mm5 \n\t"
- "psrlq $3, %%mm0 \n\t"
- "psrlq $3, %%mm3 \n\t"
- "pand %2, %%mm0 \n\t"
- "pand %2, %%mm3 \n\t"
- "psrlq $6, %%mm1 \n\t"
- "psrlq $6, %%mm4 \n\t"
- "pand %%mm6, %%mm1 \n\t"
- "pand %%mm6, %%mm4 \n\t"
- "psrlq $9, %%mm2 \n\t"
- "psrlq $9, %%mm5 \n\t"
- "pand %%mm7, %%mm2 \n\t"
- "pand %%mm7, %%mm5 \n\t"
- "por %%mm1, %%mm0 \n\t"
- "por %%mm4, %%mm3 \n\t"
- "por %%mm2, %%mm0 \n\t"
- "por %%mm5, %%mm3 \n\t"
- "psllq $16, %%mm3 \n\t"
- "por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
- d += 4;
- s += 16;
- }
-#endif
__asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory");
while (s < end) {
@@ -533,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 4%1, %%mm3 \n\t"
- "punpckldq 8%1, %%mm0 \n\t"
- "punpckldq 12%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 4(%1), %%mm3 \n\t"
+ "punpckldq 8(%1), %%mm0 \n\t"
+ "punpckldq 12(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -560,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 16;
}
@@ -588,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 11;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -615,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 12;
}
@@ -645,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -672,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 12;
}
@@ -702,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 11;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -729,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 12;
}
@@ -759,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movd %1, %%mm0 \n\t"
- "movd 3%1, %%mm3 \n\t"
- "punpckldq 6%1, %%mm0 \n\t"
- "punpckldq 9%1, %%mm3 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 3(%1), %%mm3 \n\t"
+ "punpckldq 6(%1), %%mm0 \n\t"
+ "punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@@ -786,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+ MOVNTQ" %%mm0, (%0) \n\t"
+ ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 12;
}
@@ -812,10 +724,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 7;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -844,9 +756,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm6 \n\t"
"movq %%mm3, %%mm7 \n\t"
- "movq 8%1, %%mm0 \n\t"
- "movq 8%1, %%mm1 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ "movq 8(%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -873,7 +785,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
- :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mmx_null)
+ :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -889,8 +801,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
- :"=m"(*d)
- :"m"(*s)
+ :: "r"(d), "m"(*s)
:"memory");
d += 24;
s += 8;
@@ -917,10 +828,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 7;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -950,9 +861,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm6 \n\t"
"movq %%mm3, %%mm7 \n\t"
- "movq 8%1, %%mm0 \n\t"
- "movq 8%1, %%mm1 \n\t"
- "movq 8%1, %%mm2 \n\t"
+ "movq 8(%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -979,7 +890,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm4, %%mm3 \n\t"
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
- :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+ :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -995,8 +906,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
- :"=m"(*d)
- :"m"(*s)
+ :: "r"(d), "m"(*s)
:"memory");
d += 24;
s += 8;
@@ -1028,8 +938,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm3 \n\t" \
"punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
"punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
- MOVNTQ" %%mm0, %0 \n\t" \
- MOVNTQ" %%mm3, 8%0 \n\t" \
+ MOVNTQ" %%mm0, (%0) \n\t" \
+ MOVNTQ" %%mm3, 8(%0) \n\t" \
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
{
@@ -1044,10 +954,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 3;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -1056,8 +966,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
"pmulhw %5, %%mm1 \n\t"
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32
- :"=m"(*d)
- :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mul15_mid)
+ ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid)
:"memory");
d += 16;
s += 4;
@@ -1087,10 +996,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 3;
while (s < mm_end) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %1, %%mm2 \n\t"
+ PREFETCH" 32(%1) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@@ -1100,8 +1009,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
"pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32
- :"=m"(*d)
- :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
+ ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
:"memory");
d += 16;
s += 4;
@@ -2029,8 +1937,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
int srcStride1, int srcStride2,
int dstStride1, int dstStride2)
{
- x86_reg y;
- int x,w,h;
+ x86_reg x, y;
+ int w,h;
w=width/2; h=height/2;
__asm__ volatile(
PREFETCH" %0 \n\t"
@@ -2042,11 +1950,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
x=0;
for (;x<w-31;x+=32) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
- "movq 16%1, %%mm4 \n\t"
- "movq 24%1, %%mm6 \n\t"
+ PREFETCH" 32(%1,%2) \n\t"
+ "movq (%1,%2), %%mm0 \n\t"
+ "movq 8(%1,%2), %%mm2 \n\t"
+ "movq 16(%1,%2), %%mm4 \n\t"
+ "movq 24(%1,%2), %%mm6 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
@@ -2059,16 +1967,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
"punpckhbw %%mm5, %%mm5 \n\t"
"punpcklbw %%mm6, %%mm6 \n\t"
"punpckhbw %%mm7, %%mm7 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm1, 8%0 \n\t"
- MOVNTQ" %%mm2, 16%0 \n\t"
- MOVNTQ" %%mm3, 24%0 \n\t"
- MOVNTQ" %%mm4, 32%0 \n\t"
- MOVNTQ" %%mm5, 40%0 \n\t"
- MOVNTQ" %%mm6, 48%0 \n\t"
- MOVNTQ" %%mm7, 56%0"
- :"=m"(d[2*x])
- :"m"(s1[x])
+ MOVNTQ" %%mm0, (%0,%2,2) \n\t"
+ MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
+ MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
+ MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
+ MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
+ MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
+ MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
+ MOVNTQ" %%mm7, 56(%0,%2,2)"
+ :: "r"(d), "r"(s1), "r"(x)
:"memory");
}
for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
@@ -2079,11 +1986,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
x=0;
for (;x<w-31;x+=32) {
__asm__ volatile(
- PREFETCH" 32%1 \n\t"
- "movq %1, %%mm0 \n\t"
- "movq 8%1, %%mm2 \n\t"
- "movq 16%1, %%mm4 \n\t"
- "movq 24%1, %%mm6 \n\t"
+ PREFETCH" 32(%1,%2) \n\t"
+ "movq (%1,%2), %%mm0 \n\t"
+ "movq 8(%1,%2), %%mm2 \n\t"
+ "movq 16(%1,%2), %%mm4 \n\t"
+ "movq 24(%1,%2), %%mm6 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
@@ -2096,16 +2003,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
"punpckhbw %%mm5, %%mm5 \n\t"
"punpcklbw %%mm6, %%mm6 \n\t"
"punpckhbw %%mm7, %%mm7 \n\t"
- MOVNTQ" %%mm0, %0 \n\t"
- MOVNTQ" %%mm1, 8%0 \n\t"
- MOVNTQ" %%mm2, 16%0 \n\t"
- MOVNTQ" %%mm3, 24%0 \n\t"
- MOVNTQ" %%mm4, 32%0 \n\t"
- MOVNTQ" %%mm5, 40%0 \n\t"
- MOVNTQ" %%mm6, 48%0 \n\t"
- MOVNTQ" %%mm7, 56%0"
- :"=m"(d[2*x])
- :"m"(s2[x])
+ MOVNTQ" %%mm0, (%0,%2,2) \n\t"
+ MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
+ MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
+ MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
+ MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
+ MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
+ MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
+ MOVNTQ" %%mm7, 56(%0,%2,2)"
+ :: "r"(d), "r"(s2), "r"(x)
:"memory");
}
for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];