diff options
author | Timothy Gu <timothygu99@gmail.com> | 2016-02-14 04:22:48 +0000 |
---|---|---|
committer | Timothy Gu <timothygu99@gmail.com> | 2016-02-14 11:11:02 -0800 |
commit | bcc223523e68a52050dc3f7d0e6a07c82f6f2bff (patch) | |
tree | 8d24f5be0b00e0399530226265844a0a811c50aa /libavcodec/x86/vc1dsp_mmx.c | |
parent | ebf648d490448d511b5fe970d76040169e65ef74 (diff) | |
download | ffmpeg-bcc223523e68a52050dc3f7d0e6a07c82f6f2bff.tar.gz |
x86/vc1dsp: Port vc1_*_hor_16b_shift2 to NASM format
Reviewed-by: Christophe Gisquet <christophe.gisquet@gmail.com>
Diffstat (limited to 'libavcodec/x86/vc1dsp_mmx.c')
-rw-r--r-- | libavcodec/x86/vc1dsp_mmx.c | 61 |
1 files changed, 8 insertions, 53 deletions
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c index ff13d9b119..832564837b 100644 --- a/libavcodec/x86/vc1dsp_mmx.c +++ b/libavcodec/x86/vc1dsp_mmx.c @@ -38,6 +38,10 @@ void ff_vc1_put_ver_16b_shift2_mmx(int16_t *dst, const uint8_t *src, x86_reg stride, int rnd, int64_t shift); +void ff_vc1_put_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride, + const int16_t *src, int rnd); +void ff_vc1_avg_hor_16b_shift2_mmxext(uint8_t *dst, x86_reg stride, + const int16_t *src, int rnd); #define OP_PUT(S,D) #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t" @@ -71,55 +75,6 @@ void ff_vc1_put_ver_16b_shift2_mmx(int16_t *dst, "punpckldq %%mm7, %%mm7 \n\t" /** - * Data is already unpacked, so some operations can directly be made from - * memory. - */ -#define VC1_HOR_16b_SHIFT2(OP, OPNAME)\ -static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\ - const int16_t *src, int rnd)\ -{\ - int h = 8;\ -\ - src -= 1;\ - rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */\ - __asm__ volatile(\ - LOAD_ROUNDER_MMX("%4")\ - "movq "MANGLE(ff_pw_128)", %%mm6\n\t"\ - "movq "MANGLE(ff_pw_9)", %%mm5 \n\t"\ - "1: \n\t"\ - "movq 2*0+0(%1), %%mm1 \n\t"\ - "movq 2*0+8(%1), %%mm2 \n\t"\ - "movq 2*1+0(%1), %%mm3 \n\t"\ - "movq 2*1+8(%1), %%mm4 \n\t"\ - "paddw 2*3+0(%1), %%mm1 \n\t"\ - "paddw 2*3+8(%1), %%mm2 \n\t"\ - "paddw 2*2+0(%1), %%mm3 \n\t"\ - "paddw 2*2+8(%1), %%mm4 \n\t"\ - "pmullw %%mm5, %%mm3 \n\t"\ - "pmullw %%mm5, %%mm4 \n\t"\ - "psubw %%mm1, %%mm3 \n\t"\ - "psubw %%mm2, %%mm4 \n\t"\ - NORMALIZE_MMX("$7")\ - /* Remove bias */\ - "paddw %%mm6, %%mm3 \n\t"\ - "paddw %%mm6, %%mm4 \n\t"\ - TRANSFER_DO_PACK(OP)\ - "add $24, %1 \n\t"\ - "add %3, %2 \n\t"\ - "decl %0 \n\t"\ - "jnz 1b \n\t"\ - : "+r"(h), "+r" (src), "+r" (dst)\ - : "r"(stride), "m"(rnd)\ - NAMED_CONSTRAINTS_ADD(ff_pw_128,ff_pw_9)\ - : "memory"\ - );\ -} - -VC1_HOR_16b_SHIFT2(OP_PUT, put_) -VC1_HOR_16b_SHIFT2(OP_AVG, avg_) - - -/** * Purely vertical or horizontal 1/2 shift interpolation. * Sacrify mm6 for *9 factor. */ @@ -380,14 +335,14 @@ typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, x86_ * @param hmode Vertical filter. * @param rnd Rounding bias. */ -#define VC1_MSPEL_MC(OP)\ +#define VC1_MSPEL_MC(OP, INSTR)\ static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\ int hmode, int vmode, int rnd)\ {\ static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\ { NULL, vc1_put_ver_16b_shift1_mmx, ff_vc1_put_ver_16b_shift2_mmx, vc1_put_ver_16b_shift3_mmx };\ static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\ - { NULL, OP ## vc1_hor_16b_shift1_mmx, OP ## vc1_hor_16b_shift2_mmx, OP ## vc1_hor_16b_shift3_mmx };\ + { NULL, OP ## vc1_hor_16b_shift1_mmx, ff_vc1_ ## OP ## hor_16b_shift2_ ## INSTR, OP ## vc1_hor_16b_shift3_mmx };\ static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =\ { NULL, OP ## vc1_shift1_mmx, OP ## vc1_shift2_mmx, OP ## vc1_shift3_mmx };\ \ @@ -428,8 +383,8 @@ static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \ OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \ } -VC1_MSPEL_MC(put_) -VC1_MSPEL_MC(avg_) +VC1_MSPEL_MC(put_, mmx) +VC1_MSPEL_MC(avg_, mmxext) /** Macro to ease bicubic filter interpolation functions declarations */ #define DECLARE_FUNCTION(a, b) \ |