diff options
author | James Almer <jamrial@gmail.com> | 2015-12-23 20:37:37 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2015-12-24 13:05:18 -0300 |
commit | ce4c85de6a402cdc8d5320184eb3694346f58909 (patch) | |
tree | a066b52cc2452d4d90de6a919c06450f3c6f1167 | |
parent | 470749703e3cec3a86bc556648a20f0ec1701954 (diff) | |
download | ffmpeg-ce4c85de6a402cdc8d5320184eb3694346f58909.tar.gz |
x86/vf_maskedmerge: make ff_maskedmerge8_sse2 work on x86_32
Reviewed-by: Paul B Mahol <onemda@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavfilter/x86/vf_maskedmerge.asm | 29 | ||||
-rw-r--r-- | libavfilter/x86/vf_maskedmerge_init.c | 2 |
2 files changed, 19 insertions, 12 deletions
diff --git a/libavfilter/x86/vf_maskedmerge.asm b/libavfilter/x86/vf_maskedmerge.asm index 1970b0c6d8..7e61935b97 100644 --- a/libavfilter/x86/vf_maskedmerge.asm +++ b/libavfilter/x86/vf_maskedmerge.asm @@ -22,7 +22,6 @@ %include "libavutil/x86/x86util.asm" -%if ARCH_X86_64 SECTION_RODATA pw_128: times 8 dw 128 @@ -31,24 +30,33 @@ pw_256: times 8 dw 256 SECTION .text INIT_XMM sse2 -cglobal maskedmerge8, 10, 11, 7, 0, bsrc, osrc, msrc, dst, blinesize, olinesize, mlinesize, dlinesize, w, h +%if ARCH_X86_64 +cglobal maskedmerge8, 8, 11, 7, bsrc, osrc, msrc, dst, blinesize, olinesize, mlinesize, dlinesize, w, h, x + mov wd, dword wm + mov hd, dword hm +%else +cglobal maskedmerge8, 5, 7, 7, bsrc, osrc, msrc, dst, blinesize, w, x + mov wd, r8m +%define olinesizeq r5mp +%define mlinesizeq r6mp +%define dlinesizeq r7mp +%define hd r9mp +%endif mova m4, [pw_256] mova m5, [pw_128] pxor m6, m6 - movsxdifnidn wq, wd add bsrcq, wq add osrcq, wq add msrcq, wq add dstq, wq neg wq - %define x r10q .nextrow: - mov x, wq + mov xq, wq .loop: - movh m0, [bsrcq + x] - movh m1, [osrcq + x] - movh m3, [msrcq + x] + movh m0, [bsrcq + xq] + movh m1, [osrcq + xq] + movh m3, [msrcq + xq] mova m2, m4 punpcklbw m0, m6 punpcklbw m1, m6 @@ -60,8 +68,8 @@ cglobal maskedmerge8, 10, 11, 7, 0, bsrc, osrc, msrc, dst, blinesize, olinesize, paddw m1, m5 psrlw m1, 8 packuswb m1, m1 - movh [dstq + x], m1 - add r10q, mmsize / 2 + movh [dstq + xq], m1 + add xq, mmsize / 2 jl .loop add bsrcq, blinesizeq @@ -71,4 +79,3 @@ cglobal maskedmerge8, 10, 11, 7, 0, bsrc, osrc, msrc, dst, blinesize, olinesize, sub hd, 1 jg .nextrow REP_RET -%endif diff --git a/libavfilter/x86/vf_maskedmerge_init.c b/libavfilter/x86/vf_maskedmerge_init.c index 443570047b..73ab888083 100644 --- a/libavfilter/x86/vf_maskedmerge_init.c +++ b/libavfilter/x86/vf_maskedmerge_init.c @@ -34,7 +34,7 @@ av_cold void ff_maskedmerge_init_x86(MaskedMergeContext *s) { int cpu_flags = av_get_cpu_flags(); - if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && s->depth == 8) { + if (EXTERNAL_SSE2(cpu_flags) && s->depth == 8) { s->maskedmerge = ff_maskedmerge8_sse2; } } |