aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/vc1dsp_mmx.c
diff options
context:
space:
mode:
authorTimothy Gu <timothygu99@gmail.com>2016-02-14 04:22:48 +0000
committerTimothy Gu <timothygu99@gmail.com>2016-02-14 11:11:02 -0800
commitbcc223523e68a52050dc3f7d0e6a07c82f6f2bff (patch)
tree8d24f5be0b00e0399530226265844a0a811c50aa /libavcodec/x86/vc1dsp_mmx.c
parentebf648d490448d511b5fe970d76040169e65ef74 (diff)
downloadffmpeg-bcc223523e68a52050dc3f7d0e6a07c82f6f2bff.tar.gz
x86/vc1dsp: Port vc1_*_hor_16b_shift2 to NASM format
Reviewed-by: Christophe Gisquet <christophe.gisquet@gmail.com>
Diffstat (limited to 'libavcodec/x86/vc1dsp_mmx.c')
-rw-r--r--libavcodec/x86/vc1dsp_mmx.c61
1 files changed, 8 insertions, 53 deletions
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index ff13d9b119..832564837b 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -38,6 +38,10 @@
void ff_vc1_put_ver_16b_shift2_mmx(int16_t *dst,
const uint8_t *src, x86_reg stride,
int rnd, int64_t shift);
+void ff_vc1_put_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,
+ const int16_t *src, int rnd);
+void ff_vc1_avg_hor_16b_shift2_mmxext(uint8_t *dst, x86_reg stride,
+ const int16_t *src, int rnd);
#define OP_PUT(S,D)
#define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t"
@@ -71,55 +75,6 @@ void ff_vc1_put_ver_16b_shift2_mmx(int16_t *dst,
"punpckldq %%mm7, %%mm7 \n\t"
/**
- * Data is already unpacked, so some operations can directly be made from
- * memory.
- */
-#define VC1_HOR_16b_SHIFT2(OP, OPNAME)\
-static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\
- const int16_t *src, int rnd)\
-{\
- int h = 8;\
-\
- src -= 1;\
- rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */\
- __asm__ volatile(\
- LOAD_ROUNDER_MMX("%4")\
- "movq "MANGLE(ff_pw_128)", %%mm6\n\t"\
- "movq "MANGLE(ff_pw_9)", %%mm5 \n\t"\
- "1: \n\t"\
- "movq 2*0+0(%1), %%mm1 \n\t"\
- "movq 2*0+8(%1), %%mm2 \n\t"\
- "movq 2*1+0(%1), %%mm3 \n\t"\
- "movq 2*1+8(%1), %%mm4 \n\t"\
- "paddw 2*3+0(%1), %%mm1 \n\t"\
- "paddw 2*3+8(%1), %%mm2 \n\t"\
- "paddw 2*2+0(%1), %%mm3 \n\t"\
- "paddw 2*2+8(%1), %%mm4 \n\t"\
- "pmullw %%mm5, %%mm3 \n\t"\
- "pmullw %%mm5, %%mm4 \n\t"\
- "psubw %%mm1, %%mm3 \n\t"\
- "psubw %%mm2, %%mm4 \n\t"\
- NORMALIZE_MMX("$7")\
- /* Remove bias */\
- "paddw %%mm6, %%mm3 \n\t"\
- "paddw %%mm6, %%mm4 \n\t"\
- TRANSFER_DO_PACK(OP)\
- "add $24, %1 \n\t"\
- "add %3, %2 \n\t"\
- "decl %0 \n\t"\
- "jnz 1b \n\t"\
- : "+r"(h), "+r" (src), "+r" (dst)\
- : "r"(stride), "m"(rnd)\
- NAMED_CONSTRAINTS_ADD(ff_pw_128,ff_pw_9)\
- : "memory"\
- );\
-}
-
-VC1_HOR_16b_SHIFT2(OP_PUT, put_)
-VC1_HOR_16b_SHIFT2(OP_AVG, avg_)
-
-
-/**
* Purely vertical or horizontal 1/2 shift interpolation.
* Sacrify mm6 for *9 factor.
*/
@@ -380,14 +335,14 @@ typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, x86_
* @param hmode Vertical filter.
* @param rnd Rounding bias.
*/
-#define VC1_MSPEL_MC(OP)\
+#define VC1_MSPEL_MC(OP, INSTR)\
static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
int hmode, int vmode, int rnd)\
{\
static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\
{ NULL, vc1_put_ver_16b_shift1_mmx, ff_vc1_put_ver_16b_shift2_mmx, vc1_put_ver_16b_shift3_mmx };\
static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\
- { NULL, OP ## vc1_hor_16b_shift1_mmx, OP ## vc1_hor_16b_shift2_mmx, OP ## vc1_hor_16b_shift3_mmx };\
+ { NULL, OP ## vc1_hor_16b_shift1_mmx, ff_vc1_ ## OP ## hor_16b_shift2_ ## INSTR, OP ## vc1_hor_16b_shift3_mmx };\
static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =\
{ NULL, OP ## vc1_shift1_mmx, OP ## vc1_shift2_mmx, OP ## vc1_shift3_mmx };\
\
@@ -428,8 +383,8 @@ static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \
OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \
}
-VC1_MSPEL_MC(put_)
-VC1_MSPEL_MC(avg_)
+VC1_MSPEL_MC(put_, mmx)
+VC1_MSPEL_MC(avg_, mmxext)
/** Macro to ease bicubic filter interpolation functions declarations */
#define DECLARE_FUNCTION(a, b) \