aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/hpeldsp.asm
diff options
context:
space:
mode:
authorrogerdpack <rogerpack2005@gmail.com>2013-01-15 19:09:15 -0700
committerrogerdpack <rogerpack2005@gmail.com>2013-01-15 19:09:15 -0700
commitc540312ac3b58e0bbd751844fc2c47c6e3713cf5 (patch)
treefcf92b1c0f1772b379828125c2555a47d1c81c6b /libavcodec/x86/hpeldsp.asm
parent47e88486b4b3b3de992b07f89dfaedf410a8bd5e (diff)
parent2b20397e1fbe52db800ef5deb810f7bc2602f248 (diff)
downloadffmpeg-c540312ac3b58e0bbd751844fc2c47c6e3713cf5.tar.gz
Merge remote-tracking branch 'origin/master' into combined
Diffstat (limited to 'libavcodec/x86/hpeldsp.asm')
-rw-r--r--libavcodec/x86/hpeldsp.asm44
1 files changed, 44 insertions, 0 deletions
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index 72bc111baa..7f0c285fa3 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -4,6 +4,7 @@
;* Copyright (c) Nick Kurshev <nickols_k@mail.ru>
;* Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
;* Copyright (c) 2002 Zdenek Kabelac <kabi@informatics.muni.cz>
+;* Copyright (c) 2013 Daniel Kang
;*
;* MMX optimized hpel functions
;*
@@ -469,3 +470,46 @@ INIT_MMX mmxext
AVG_PIXELS8_XY2
INIT_MMX 3dnow
AVG_PIXELS8_XY2
+
+INIT_XMM sse2
+; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+cglobal put_pixels16, 4,5,4
+ movsxdifnidn r2, r2d
+ lea r4, [r2*3]
+.loop:
+ movu m0, [r1]
+ movu m1, [r1+r2]
+ movu m2, [r1+r2*2]
+ movu m3, [r1+r4]
+ lea r1, [r1+r2*4]
+ mova [r0], m0
+ mova [r0+r2], m1
+ mova [r0+r2*2], m2
+ mova [r0+r4], m3
+ sub r3d, 4
+ lea r0, [r0+r2*4]
+ jnz .loop
+ REP_RET
+
+; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+cglobal avg_pixels16, 4,5,4
+ movsxdifnidn r2, r2d
+ lea r4, [r2*3]
+.loop:
+ movu m0, [r1]
+ movu m1, [r1+r2]
+ movu m2, [r1+r2*2]
+ movu m3, [r1+r4]
+ lea r1, [r1+r2*4]
+ pavgb m0, [r0]
+ pavgb m1, [r0+r2]
+ pavgb m2, [r0+r2*2]
+ pavgb m3, [r0+r4]
+ mova [r0], m0
+ mova [r0+r2], m1
+ mova [r0+r2*2], m2
+ mova [r0+r4], m3
+ sub r3d, 4
+ lea r0, [r0+r2*4]
+ jnz .loop
+ REP_RET