aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Oliver <protogonoi@gmail.com>2014-03-18 15:53:26 +1100
committerMichael Niedermayer <michaelni@gmx.at>2014-03-18 23:39:30 +0100
commit823674751196e382c1d6334b8c92839f95d0ba9e (patch)
treed3b52ee9bfc94f3e4e8bd9ce646c29d6a2f41726
parentb2d3a45598ef8f8aaee489541c6914f960e53db4 (diff)
downloadffmpeg-823674751196e382c1d6334b8c92839f95d0ba9e.tar.gz
Automatically change MANGLE() into named inline asm operands when direct symbol reference in inline asm are not supported.
This is part of the patch-set for intel C inline asm on windows support Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rwxr-xr-xconfigure3
-rw-r--r--libavcodec/x86/cabac.h3
-rw-r--r--libavcodec/x86/cavsdsp.c2
-rw-r--r--libavcodec/x86/dsputil_mmx.c1
-rw-r--r--libavcodec/x86/h264_i386.h2
-rw-r--r--libavcodec/x86/idct_sse2_xvid.c2
-rw-r--r--libavcodec/x86/lpc.c3
-rw-r--r--libavcodec/x86/motion_est.c3
-rw-r--r--libavcodec/x86/simple_idct.c1
-rw-r--r--libavcodec/x86/vc1dsp_mmx.c6
-rw-r--r--libavutil/x86/asm.h36
-rw-r--r--libpostproc/postprocess_template.c7
-rw-r--r--libswresample/x86/resample_mmx.h2
-rw-r--r--libswscale/x86/rgb2rgb_template.c11
-rw-r--r--libswscale/x86/swscale_template.c12
-rw-r--r--libswscale/x86/yuv2rgb_template.c9
16 files changed, 99 insertions, 4 deletions
diff --git a/configure b/configure
index 3939cfa63e..b41e0dc8d5 100755
--- a/configure
+++ b/configure
@@ -1691,6 +1691,7 @@ TOOLCHAIN_FEATURES="
ibm_asm
inline_asm_labels
inline_asm_nonlocal_labels
+ inline_asm_direct_symbol_refs
pragma_deprecated
rsync_contimeout
symver_asm_label
@@ -4306,6 +4307,8 @@ EOF
# check whether xmm clobbers are supported
check_inline_asm xmm_clobbers '"":::"%xmm0"'
+ check_inline_asm inline_asm_direct_symbol_refs '"movl test, %eax"'
+
# check whether binutils is new enough to compile SSSE3/MMXEXT
enabled ssse3 && check_inline_asm ssse3_inline '"pabsw %xmm0, %xmm0"'
enabled mmxext && check_inline_asm mmxext_inline '"pmaxub %mm0, %mm1"'
diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 3a82c1e4a4..0a68b7b7ef 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -110,7 +110,7 @@
"2: \n\t"
#else /* BROKEN_RELOCATIONS */
-#define TABLES_ARG
+#define TABLES_ARG NAMED_CONSTRAINTS_ADD(ff_h264_cabac_tables)
#define RIP_ARG
#if HAVE_FAST_CMOV
@@ -184,6 +184,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
__asm__ volatile(
"lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
: "=&r"(tables)
+ : NAMED_CONSTRAINTS(ff_h264_cabac_tables)
);
#endif
diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c
index aaa09d1784..805e120db3 100644
--- a/libavcodec/x86/cavsdsp.c
+++ b/libavcodec/x86/cavsdsp.c
@@ -309,6 +309,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
\
: "+a"(src), "+c"(dst)\
: "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\
+ NAMED_CONSTRAINTS_ADD(MUL2)\
: "memory"\
);\
if(h==16){\
@@ -324,6 +325,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
\
: "+a"(src), "+c"(dst)\
: "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\
+ NAMED_CONSTRAINTS_ADD(MUL2)\
: "memory"\
);\
}\
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 47835acf2b..420a4ef8b5 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -123,6 +123,7 @@ void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
put_signed_pixels_clamped_mmx_half(64)
: "+&r" (pixels), "=&r" (line_skip3)
: "r" (block), "r" (line_skip)
+ NAMED_CONSTRAINTS_ADD(ff_pb_80)
: "memory");
}
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index 0dc0a7cb0f..9d811f07b7 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -55,6 +55,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
__asm__ volatile(
"lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
: "=&r"(tables)
+ : NAMED_CONSTRAINTS(ff_h264_cabac_tables)
);
#endif
@@ -130,6 +131,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
__asm__ volatile(
"lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
: "=&r"(tables)
+ : NAMED_CONSTRAINTS(ff_h264_cabac_tables)
);
#endif
diff --git a/libavcodec/x86/idct_sse2_xvid.c b/libavcodec/x86/idct_sse2_xvid.c
index d9bc841825..e1878fa93d 100644
--- a/libavcodec/x86/idct_sse2_xvid.c
+++ b/libavcodec/x86/idct_sse2_xvid.c
@@ -381,7 +381,7 @@ inline void ff_idct_xvid_sse2(short *block)
iLLM_PASS("%0")
"6: \n\t"
: "+r"(block)
- :
+ : NAMED_CONSTRAINTS(m127,iTab1,walkenIdctRounders,iTab2,iTab3,iTab4,tan3,tan1,tan2,sqrt2)
: XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" ,
"%xmm4" , "%xmm5" , "%xmm6" , "%xmm7" ,)
#if ARCH_X86_64
diff --git a/libavcodec/x86/lpc.c b/libavcodec/x86/lpc.c
index 9682733096..bff26359f4 100644
--- a/libavcodec/x86/lpc.c
+++ b/libavcodec/x86/lpc.c
@@ -72,6 +72,7 @@ static void lpc_apply_welch_window_sse2(const int32_t *data, int len,
"3: \n\t"
:"+&r"(i), "+&r"(j)
:"r"(w_data+n2), "r"(data+n2), "m"(c), "r"(len)
+ NAMED_CONSTRAINTS_ADD(pd_1,pd_2)
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm5", "%xmm6", "%xmm7")
);
@@ -116,6 +117,7 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag,
"movsd %%xmm2, 16(%1) \n\t"
:"+&r"(i)
:"r"(autoc+j), "r"(data+len), "r"(data+len-j)
+ NAMED_CONSTRAINTS_ADD(pd_1)
:"memory"
);
} else {
@@ -139,6 +141,7 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag,
"movsd %%xmm1, %2 \n\t"
:"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
:"r"(data+len), "r"(data+len-j)
+ NAMED_CONSTRAINTS_ADD(pd_1)
);
}
}
diff --git a/libavcodec/x86/motion_est.c b/libavcodec/x86/motion_est.c
index 2b11cf95d3..3df471ed78 100644
--- a/libavcodec/x86/motion_est.c
+++ b/libavcodec/x86/motion_est.c
@@ -193,7 +193,8 @@ static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2,
"sub $2, %0 \n\t"
" jg 1b \n\t"
: "+r" (h), "+r" (blk1), "+r" (blk2)
- : "r" ((x86_reg) stride));
+ : "r" ((x86_reg) stride)
+ NAMED_CONSTRAINTS_ADD(bone));
}
static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
diff --git a/libavcodec/x86/simple_idct.c b/libavcodec/x86/simple_idct.c
index e10dc7ba09..3ae30f3488 100644
--- a/libavcodec/x86/simple_idct.c
+++ b/libavcodec/x86/simple_idct.c
@@ -1143,6 +1143,7 @@ Temp
"9: \n\t"
:: "r" (block), "r" (temp), "r" (coeffs)
+ NAMED_CONSTRAINTS_ADD(wm1010,d40000)
: "%eax"
);
}
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index 5ceacd348e..942aa42b45 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -110,6 +110,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
: "+r"(src), "+r"(dst)
: "r"(stride), "r"(-2*stride),
"m"(shift), "m"(rnd), "r"(9*stride-4)
+ NAMED_CONSTRAINTS_ADD(ff_pw_9)
: "%"REG_c, "memory"
);
}
@@ -154,6 +155,7 @@ static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\
"jnz 1b \n\t"\
: "+r"(h), "+r" (src), "+r" (dst)\
: "r"(stride), "m"(rnd)\
+ NAMED_CONSTRAINTS_ADD(ff_pw_128,ff_pw_9)\
: "memory"\
);\
}
@@ -212,6 +214,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
: "+r"(src), "+r"(dst)\
: "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\
"g"(stride-offset)\
+ NAMED_CONSTRAINTS_ADD(ff_pw_9)\
: "%"REG_c, "memory"\
);\
}
@@ -314,6 +317,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \
: "+r"(h), "+r" (src), "+r" (dst) \
: "r"(src_stride), "r"(3*src_stride), \
"m"(rnd), "m"(shift) \
+ NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_53,ff_pw_18) \
: "memory" \
); \
}
@@ -351,6 +355,7 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \
"jnz 1b \n\t" \
: "+r"(h), "+r" (src), "+r" (dst) \
: "r"(stride), "m"(rnd) \
+ NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_18,ff_pw_53,ff_pw_128) \
: "memory" \
); \
}
@@ -386,6 +391,7 @@ OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \
"jnz 1b \n\t" \
: "+r"(h), "+r" (src), "+r" (dst) \
: "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd) \
+ NAMED_CONSTRAINTS_ADD(ff_pw_53,ff_pw_18,ff_pw_3) \
: "memory" \
); \
}
diff --git a/libavutil/x86/asm.h b/libavutil/x86/asm.h
index 70ccac7885..5328e2bfc9 100644
--- a/libavutil/x86/asm.h
+++ b/libavutil/x86/asm.h
@@ -107,6 +107,40 @@ typedef int x86_reg;
# define LOCAL_MANGLE(a) #a
#endif
-#define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
+#if HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS
+# define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
+# define NAMED_CONSTRAINTS_ADD(...)
+# define NAMED_CONSTRAINTS(...)
+#else
+ /* When direct symbol references are used in code passed to a compiler that does not support them
+ * then these references need to be converted to named asm constraints instead.
+ * Instead of returning a direct symbol MANGLE now returns a named constraint for that specific symbol.
+ * In order for this to work there must also be a corresponding entry in the asm-interface. To add this
+ * entry use the macro NAMED_CONSTRAINTS() and pass in a list of each symbol reference used in the
+ * corresponding block of code. (e.g. NAMED_CONSTRAINTS(var1,var2,var3) where var1 is the first symbol etc. ).
+ * If there are already existing constraints then use NAMED_CONSTRAINTS_ADD to add to the existing constraint list.
+ */
+# define MANGLE(a) "%["#a"]"
+ // Intel/MSVC does not correctly expand va-args so we need a rather ugly hack in order to get it to work
+# define FE_0(P,X) P(X)
+# define FE_1(P,X,X1) P(X), FE_0(P,X1)
+# define FE_2(P,X,X1,X2) P(X), FE_1(P,X1,X2)
+# define FE_3(P,X,X1,X2,X3) P(X), FE_2(P,X1,X2,X3)
+# define FE_4(P,X,X1,X2,X3,X4) P(X), FE_3(P,X1,X2,X3,X4)
+# define FE_5(P,X,X1,X2,X3,X4,X5) P(X), FE_4(P,X1,X2,X3,X4,X5)
+# define FE_6(P,X,X1,X2,X3,X4,X5,X6) P(X), FE_5(P,X1,X2,X3,X4,X5,X6)
+# define FE_7(P,X,X1,X2,X3,X4,X5,X6,X7) P(X), FE_6(P,X1,X2,X3,X4,X5,X6,X7)
+# define FE_8(P,X,X1,X2,X3,X4,X5,X6,X7,X8) P(X), FE_7(P,X1,X2,X3,X4,X5,X6,X7,X8)
+# define FE_9(P,X,X1,X2,X3,X4,X5,X6,X7,X8,X9) P(X), FE_8(P,X1,X2,X3,X4,X5,X6,X7,X8,X9)
+# define GET_FE_IMPL(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
+# define GET_FE(A) GET_FE_IMPL A
+# define GET_FE_GLUE(x, y) x y
+# define FOR_EACH_VA(P,...) GET_FE_GLUE(GET_FE((__VA_ARGS__,FE_9,FE_8,FE_7,FE_6,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)), (P,__VA_ARGS__))
+# define NAME_CONSTRAINT(x) [x] "m"(x)
+ // Parameters are a list of each symbol reference required
+# define NAMED_CONSTRAINTS_ADD(...) , FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__)
+ // Same but without comma for when there are no previously defined constraints
+# define NAMED_CONSTRAINTS(...) FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__)
+#endif
#endif /* AVUTIL_X86_ASM_H */
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 74b0ab4c46..3991ae9472 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -490,6 +490,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
:
: "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
+ NAMED_CONSTRAINTS_ADD(b01)
: "%"REG_a, "%"REG_c
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
@@ -755,6 +756,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
:
: "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
+ NAMED_CONSTRAINTS_ADD(b80,b00,b01)
: "%"REG_a, "%"REG_c
);
@@ -1042,6 +1044,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
: "+r" (src)
: "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
+ NAMED_CONSTRAINTS_ADD(w05,w20)
: "%"REG_a
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
@@ -1313,6 +1316,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
"1: \n\t"
: : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
+ NAMED_CONSTRAINTS_ADD(deringThreshold,b00,b02,b08)
: "%"REG_a, "%"REG_d, "%"REG_SP
);
#else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
@@ -2446,6 +2450,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
"4: \n\t"
:: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
+ NAMED_CONSTRAINTS_ADD(b80)
: "%"REG_a, "%"REG_d, "%"REG_c, "memory"
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
@@ -2790,6 +2795,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
: "+&r"(src)
: "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src)
+ NAMED_CONSTRAINTS_ADD(w04)
);
src+= step; // src points to begin of the 8x8 Block
@@ -3061,6 +3067,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
: "+r" (temp_src)
: "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask), "r"(tmp)
+ NAMED_CONSTRAINTS_ADD(w05,w20)
: "%"REG_a
);
}
diff --git a/libswresample/x86/resample_mmx.h b/libswresample/x86/resample_mmx.h
index fab52f704a..f366cc7f59 100644
--- a/libswresample/x86/resample_mmx.h
+++ b/libswresample/x86/resample_mmx.h
@@ -46,6 +46,7 @@ __asm__ volatile(\
: "r" (((uint8_t*)(src+sample_index))-len),\
"r" (((uint8_t*)filter)-len),\
"r" (dst+dst_index)\
+ NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\
);
#define COMMON_CORE_INT16_SSE2 \
@@ -69,4 +70,5 @@ __asm__ volatile(\
: "r" (((uint8_t*)(src+sample_index))-len),\
"r" (((uint8_t*)filter)-len),\
"r" (dst+dst_index)\
+ NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\
);
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 455e7c25a8..b68824dcbe 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -163,6 +163,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm5, %%mm7 \n\t"
STORE_BGR24_MMX
:: "r"(dest), "r"(s)
+ NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory");
dest += 24;
s += 32;
@@ -785,6 +786,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
:"=m"(*d)
:"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
+ NAMED_CONSTRAINTS_ADD(mul15_mid,mul15_hi)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -801,6 +803,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
:: "r"(d), "m"(*s)
+ NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory");
d += 24;
s += 8;
@@ -890,6 +893,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
:"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+ NAMED_CONSTRAINTS_ADD(mul15_mid,mul16_mid,mul15_hi)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -906,6 +910,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
:: "r"(d), "m"(*s)
+ NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory");
d += 24;
s += 8;
@@ -966,6 +971,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32
::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid)
+ NAMED_CONSTRAINTS_ADD(mul15_hi)
:"memory");
d += 16;
s += 4;
@@ -1009,6 +1015,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32
::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
+ NAMED_CONSTRAINTS_ADD(mul16_mid,mul15_hi)
:"memory");
d += 16;
s += 4;
@@ -1133,6 +1140,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"2: \n\t"
: "+a" (mmx_size)
: "r" (src-mmx_size), "r"(dst-mmx_size)
+ NAMED_CONSTRAINTS_ADD(mask24r,mask24g,mask24b)
);
__asm__ volatile(SFENCE:::"memory");
@@ -1468,6 +1476,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
:: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
"r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
"g" (-mmxSize)
+ NAMED_CONSTRAINTS_ADD(mmx_ff)
: "%"REG_a
);
@@ -1689,6 +1698,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
: : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv)
+ NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2YOffset)
: "%"REG_a, "%"REG_d
);
ydst += lumStride;
@@ -1837,6 +1847,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"add $4, %%"REG_a" \n\t"
" js 1b \n\t"
: : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv)
+ NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2UVOffset)
: "%"REG_a, "%"REG_d
);
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index c7a1bb46d9..71a60bc738 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -172,6 +172,7 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
:: "r" (&c->redDither), \
"m" (dummy), "m" (dummy), "m" (dummy),\
"r" (dest), "m" (dstW_reg), "m"(uv_off) \
+ NAMED_CONSTRAINTS_ADD(bF8,bFC) \
: "%"REG_a, "%"REG_d, "%"REG_S \
);
@@ -680,6 +681,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
:: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW_reg), "m"(uv_off)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
);
}
@@ -704,6 +706,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
:: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW_reg), "m"(uv_off)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
);
}
@@ -931,6 +934,7 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
);
}
@@ -960,6 +964,7 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8)
);
}
@@ -989,6 +994,7 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8,bFC)
);
}
@@ -1262,6 +1268,7 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
);
} else {
const int16_t *ubuf1 = ubuf[1];
@@ -1276,6 +1283,7 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
);
}
}
@@ -1307,6 +1315,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8)
);
} else {
const int16_t *ubuf1 = ubuf[1];
@@ -1327,6 +1336,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8)
);
}
}
@@ -1358,6 +1368,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8,bFC)
);
} else {
const int16_t *ubuf1 = ubuf[1];
@@ -1378,6 +1389,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8,bFC)
);
}
}
diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c
index c879102cc4..d29e3a424d 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -134,10 +134,18 @@
"add $4, %0\n\t" \
"js 1b\n\t" \
+#if COMPILE_TEMPLATE_MMXEXT
+#define RGB_PACK24_B_OPERANDS NAMED_CONSTRAINTS_ADD(mask1101,mask0110,mask0100,mask0010,mask1001)
+#else
+#define RGB_PACK24_B_OPERANDS
+#endif
+
#define YUV2RGB_OPERANDS \
: "+r" (index), "+r" (image) \
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
"r" (py - 2*index) \
+ NAMED_CONSTRAINTS_ADD(mmx_00ffw,pb_03,pb_07,mmx_redmask,pb_e0) \
+ RGB_PACK24_B_OPERANDS \
: "memory" \
); \
} \
@@ -146,6 +154,7 @@
: "+r" (index), "+r" (image) \
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
"r" (py - 2*index), "r" (pa - 2*index) \
+ NAMED_CONSTRAINTS_ADD(mmx_00ffw) \
: "memory" \
); \
} \