diff options
author | James Almer <jamrial@gmail.com> | 2014-07-05 16:40:29 -0300 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-07-05 21:43:40 +0200 |
commit | dad31083aec7f0fb7bf39406dc6cbf11c4834b02 (patch) | |
tree | 53568a6a7f9cb063a487f4df5d7e2c3240cda31d /libavcodec/x86/svq1enc.asm | |
parent | 19b79c1429c4234b6ca60ca21e0ed8a152db1343 (diff) | |
download | ffmpeg-dad31083aec7f0fb7bf39406dc6cbf11c4834b02.tar.gz |
x86/svq1enc: port ssd_int8_vs_int16 to yasm
Also add an SSE2 version
Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/svq1enc.asm')
-rw-r--r-- | libavcodec/x86/svq1enc.asm | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/libavcodec/x86/svq1enc.asm b/libavcodec/x86/svq1enc.asm new file mode 100644 index 0000000000..24ee70f108 --- /dev/null +++ b/libavcodec/x86/svq1enc.asm @@ -0,0 +1,61 @@ +;****************************************************************************** +;* SIMD-optimized SVQ1 encoder functions +;* Copyright (c) 2007 Loren Merritt +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_TEXT + +%macro SSD_INT8_VS_INT16 0 +cglobal ssd_int8_vs_int16, 3, 3, 3, pix1, pix2, size + pxor m0, m0 +.loop + sub sizeq, 8 + movq m1, [pix1q + sizeq] + mova m2, [pix2q + sizeq*2] +%if mmsize == 8 + movq m3, [pix2q + sizeq*2 + mmsize] + punpckhbw m4, m1 + punpcklbw m1, m1 + psraw m4, 8 + psraw m1, 8 + psubw m3, m4 + psubw m2, m1 + pmaddwd m3, m3 + pmaddwd m2, m2 + paddd m0, m3 + paddd m0, m2 +%else + punpcklbw m1, m1 + psraw m1, 8 + psubw m2, m1 + pmaddwd m2, m2 + paddd m0, m2 +%endif + jg .loop + HADDD m0, m1 + movd eax, m0 + RET +%endmacro + +INIT_MMX mmx +SSD_INT8_VS_INT16 +INIT_XMM sse2 +SSD_INT8_VS_INT16 |