diff options
author | rogerdpack <rogerpack2005@gmail.com> | 2013-01-15 19:09:15 -0700 |
---|---|---|
committer | rogerdpack <rogerpack2005@gmail.com> | 2013-01-15 19:09:15 -0700 |
commit | c540312ac3b58e0bbd751844fc2c47c6e3713cf5 (patch) | |
tree | fcf92b1c0f1772b379828125c2555a47d1c81c6b /libavcodec/x86/hpeldsp.asm | |
parent | 47e88486b4b3b3de992b07f89dfaedf410a8bd5e (diff) | |
parent | 2b20397e1fbe52db800ef5deb810f7bc2602f248 (diff) | |
download | ffmpeg-c540312ac3b58e0bbd751844fc2c47c6e3713cf5.tar.gz |
Merge remote-tracking branch 'origin/master' into combined
Diffstat (limited to 'libavcodec/x86/hpeldsp.asm')
-rw-r--r-- | libavcodec/x86/hpeldsp.asm | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm index 72bc111baa..7f0c285fa3 100644 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@ -4,6 +4,7 @@ ;* Copyright (c) Nick Kurshev <nickols_k@mail.ru> ;* Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at> ;* Copyright (c) 2002 Zdenek Kabelac <kabi@informatics.muni.cz> +;* Copyright (c) 2013 Daniel Kang ;* ;* MMX optimized hpel functions ;* @@ -469,3 +470,46 @@ INIT_MMX mmxext AVG_PIXELS8_XY2 INIT_MMX 3dnow AVG_PIXELS8_XY2 + +INIT_XMM sse2 +; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +cglobal put_pixels16, 4,5,4 + movsxdifnidn r2, r2d + lea r4, [r2*3] +.loop: + movu m0, [r1] + movu m1, [r1+r2] + movu m2, [r1+r2*2] + movu m3, [r1+r4] + lea r1, [r1+r2*4] + mova [r0], m0 + mova [r0+r2], m1 + mova [r0+r2*2], m2 + mova [r0+r4], m3 + sub r3d, 4 + lea r0, [r0+r2*4] + jnz .loop + REP_RET + +; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +cglobal avg_pixels16, 4,5,4 + movsxdifnidn r2, r2d + lea r4, [r2*3] +.loop: + movu m0, [r1] + movu m1, [r1+r2] + movu m2, [r1+r2*2] + movu m3, [r1+r4] + lea r1, [r1+r2*4] + pavgb m0, [r0] + pavgb m1, [r0+r2] + pavgb m2, [r0+r2*2] + pavgb m3, [r0+r4] + mova [r0], m0 + mova [r0+r2], m1 + mova [r0+r2*2], m2 + mova [r0+r4], m3 + sub r3d, 4 + lea r0, [r0+r2*4] + jnz .loop + REP_RET |