aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/dsputilenc.asm
diff options
context:
space:
mode:
authorTimothy Gu <timothygu99@gmail.com>2014-05-29 20:56:03 -0700
committerMichael Niedermayer <michaelni@gmx.at>2014-05-30 16:57:52 +0200
commit154cee9292840295bff07e31ebd92c8296d8aa2d (patch)
tree99f3d50f859d6086c38e9b899818b383f1c09938 /libavcodec/x86/dsputilenc.asm
parent0b6292b7b85ccc1437bddbd939b7cf1f8a964b17 (diff)
downloadffmpeg-154cee9292840295bff07e31ebd92c8296d8aa2d.tar.gz
x86: dsputilenc: convert ff_sse{8, 16}_mmx() to yasm
Signed-off-by: Timothy Gu <timothygu99@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/dsputilenc.asm')
-rw-r--r--libavcodec/x86/dsputilenc.asm48
1 files changed, 35 insertions, 13 deletions
diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm
index de36b95a30..0628550145 100644
--- a/libavcodec/x86/dsputilenc.asm
+++ b/libavcodec/x86/dsputilenc.asm
@@ -278,19 +278,27 @@ INIT_XMM ssse3
%define ABS_SUM_8x8 ABS_SUM_8x8_64
HADAMARD8_DIFF 9
-INIT_XMM sse2
-; int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
-; int line_size, int h);
-cglobal sse16, 5, 5, 8
- shr r4d, 1
+; int ff_sse*_*(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+; int line_size, int h)
+
+%macro SUM_SQUARED_ERRORS 1
+cglobal sse%1, 5,5,8, v, pix1, pix2, lsize, h
+%if %1 == mmsize
+ shr hd, 1
+%endif
pxor m0, m0 ; mm0 = 0
pxor m7, m7 ; mm7 holds the sum
.next2lines: ; FIXME why are these unaligned movs? pix1[] is aligned
- movu m1, [r1 ] ; mm1 = pix1[0][0-15]
- movu m2, [r2 ] ; mm2 = pix2[0][0-15]
- movu m3, [r1+r3] ; mm3 = pix1[1][0-15]
- movu m4, [r2+r3] ; mm4 = pix2[1][0-15]
+ movu m1, [pix1q] ; m1 = pix1[0][0-15], [0-7] for mmx
+ movu m2, [pix2q] ; m2 = pix2[0][0-15], [0-7] for mmx
+%if %1 == mmsize
+ movu m3, [pix1q+lsizeq] ; m3 = pix1[1][0-15], [0-7] for mmx
+ movu m4, [pix2q+lsizeq] ; m4 = pix2[1][0-15], [0-7] for mmx
+%else ; %1 / 2 == mmsize; mmx only
+ mova m3, [pix1q+8] ; m3 = pix1[0][8-15]
+ mova m4, [pix2q+8] ; m4 = pix2[0][8-15]
+%endif
; todo: mm1-mm2, mm3-mm4
; algo: subtract mm1 from mm2 with saturation and vice versa
@@ -319,20 +327,34 @@ cglobal sse16, 5, 5, 8
pmaddwd m1, m1
pmaddwd m3, m3
- lea r1, [r1+r3*2] ; pix1 += 2*line_size
- lea r2, [r2+r3*2] ; pix2 += 2*line_size
-
paddd m1, m2
paddd m3, m4
paddd m7, m1
paddd m7, m3
- dec r4
+%if %1 == mmsize
+ lea pix1q, [pix1q + 2*lsizeq]
+ lea pix2q, [pix2q + 2*lsizeq]
+%else
+ add pix1q, lsizeq
+ add pix2q, lsizeq
+%endif
+ dec hd
jnz .next2lines
HADDD m7, m1
movd eax, m7 ; return value
RET
+%endmacro
+
+INIT_MMX mmx
+SUM_SQUARED_ERRORS 8
+
+INIT_MMX mmx
+SUM_SQUARED_ERRORS 16
+
+INIT_XMM sse2
+SUM_SQUARED_ERRORS 16
INIT_MMX mmx
; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)