diff options
author | Martin Vignali <martin.vignali@gmail.com> | 2018-03-17 19:37:06 +0100 |
---|---|---|
committer | Martin Vignali <martin.vignali@gmail.com> | 2018-04-05 21:46:16 +0200 |
commit | f3df42e81d367547756e7955e36c8af7c9c18db2 (patch) | |
tree | f0c23de031d57c28ad7fc87516a7d763ba020d03 /libavfilter/x86/vf_blend.asm | |
parent | 8eb0bb11083320cc12bcc23104a384984c4a9d64 (diff) | |
download | ffmpeg-f3df42e81d367547756e7955e36c8af7c9c18db2.tar.gz |
avfilter/x86/vf_blend : add SIMD for 16 bit version of
grainextract
grainmerge
average
extremity
negation
Diffstat (limited to 'libavfilter/x86/vf_blend.asm')
-rw-r--r-- | libavfilter/x86/vf_blend.asm | 168 |
1 files changed, 108 insertions, 60 deletions
diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm index 9cd5ee7acb..251bbb5a12 100644 --- a/libavfilter/x86/vf_blend.asm +++ b/libavfilter/x86/vf_blend.asm @@ -27,6 +27,8 @@ SECTION_RODATA ps_255: times 4 dd 255.0 +pd_32768 : times 4 dd 32768 +pd_65535 : times 4 dd 65535 pw_1: times 8 dw 1 pw_128: times 8 dw 128 pw_255: times 8 dw 255 @@ -79,26 +81,33 @@ BLEND_INIT %1, 2, %3 BLEND_END %endmacro -%macro GRAINEXTRACT 0 -BLEND_INIT grainextract, 6 +; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit) +%macro GRAINEXTRACT 3-4 +BLEND_INIT %1, 6, %4 pxor m4, m4 +%if %0 == 4 ; 16 bit + VBROADCASTI128 m5, [pd_32768] +%else VBROADCASTI128 m5, [pw_128] +%endif .nextrow: mov xq, widthq .loop: movu m1, [topq + xq] movu m3, [bottomq + xq] - punpcklbw m0, m1, m4 - punpckhbw m1, m4 - punpcklbw m2, m3, m4 - punpckhbw m3, m4 - paddw m0, m5 - paddw m1, m5 - psubw m0, m2 - psubw m1, m3 + punpckl%2%3 m0, m1, m4 + punpckh%2%3 m1, m4 + punpckl%2%3 m2, m3, m4 + punpckh%2%3 m3, m4 + + padd%3 m0, m5 + padd%3 m1, m5 + psub%3 m0, m2 + psub%3 m1, m3 + + packus%3%2 m0, m1 - packuswb m0, m1 mova [dstq + xq], m0 add xq, mmsize jl .loop @@ -172,8 +181,9 @@ BLEND_INIT screen, 7 BLEND_END %endmacro -%macro AVERAGE 0 -BLEND_INIT average, 3 +;%1 name, %2 (b or w), %3 (set if 16 bit) +%macro AVERAGE 2-3 +BLEND_INIT %1, 3, %3 pcmpeqb m2, m2 .nextrow: @@ -184,7 +194,7 @@ BLEND_INIT average, 3 movu m1, [bottomq + xq] pxor m0, m2 pxor m1, m2 - pavgb m0, m1 + pavg%2 m0, m1 pxor m0, m2 mova [dstq + xq], m0 add xq, mmsize @@ -192,29 +202,34 @@ BLEND_INIT average, 3 BLEND_END %endmacro - -%macro GRAINMERGE 0 -BLEND_INIT grainmerge, 6 +; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit) +%macro GRAINMERGE 3-4 +BLEND_INIT %1, 6, %4 pxor m4, m4 - +%if %0 == 4 ; 16 bit + VBROADCASTI128 m5, [pd_32768] +%else VBROADCASTI128 m5, [pw_128] +%endif .nextrow: mov xq, widthq .loop: movu m1, [topq + xq] movu m3, [bottomq + xq] - punpcklbw m0, m1, m4 - punpckhbw m1, m4 - punpcklbw m2, m3, m4 - punpckhbw m3, m4 - paddw m0, m2 - paddw m1, m3 - psubw m0, m5 - psubw m1, m5 + punpckl%2%3 m0, m1, m4 + punpckh%2%3 m1, m4 + punpckl%2%3 m2, m3, m4 + punpckh%2%3 m3, m4 + + padd%3 m0, m2 + padd%3 m1, m3 + psub%3 m0, m5 + psub%3 m1, m5 + + packus%3%2 m0, m1 - packuswb m0, m1 mova [dstq + xq], m0 add xq, mmsize jl .loop @@ -324,52 +339,73 @@ BLEND_INIT %1, 5, %4 BLEND_END %endmacro -%macro BLEND_ABS 0 -BLEND_INIT extremity, 8 +; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit) +%macro EXTREMITY 3-4 +BLEND_INIT %1, 8, %4 pxor m2, m2 +%if %0 == 4; 16 bit + VBROADCASTI128 m4, [pd_65535] +%else VBROADCASTI128 m4, [pw_255] +%endif .nextrow: mov xq, widthq .loop: movu m0, [topq + xq] movu m1, [bottomq + xq] - punpckhbw m5, m0, m2 - punpcklbw m0, m2 - punpckhbw m6, m1, m2 - punpcklbw m1, m2 - psubw m3, m4, m0 - psubw m7, m4, m5 - psubw m3, m1 - psubw m7, m6 + punpckh%2%3 m5, m0, m2 + punpckl%2%3 m0, m2 + punpckh%2%3 m6, m1, m2 + punpckl%2%3 m1, m2 + psub%3 m3, m4, m0 + psub%3 m7, m4, m5 + psub%3 m3, m1 + psub%3 m7, m6 +%if %0 == 4; 16 bit + pabsd m3, m3 + pabsd m7, m7 +%else ABS2 m3, m7, m1, m6 - packuswb m3, m7 +%endif + packus%3%2 m3, m7 mova [dstq + xq], m3 add xq, mmsize jl .loop BLEND_END +%endmacro -BLEND_INIT negation, 8 +%macro NEGATION 3-4 +BLEND_INIT %1, 8, %4 pxor m2, m2 +%if %0 == 4; 16 bit + VBROADCASTI128 m4, [pd_65535] +%else VBROADCASTI128 m4, [pw_255] +%endif .nextrow: mov xq, widthq .loop: movu m0, [topq + xq] movu m1, [bottomq + xq] - punpckhbw m5, m0, m2 - punpcklbw m0, m2 - punpckhbw m6, m1, m2 - punpcklbw m1, m2 - psubw m3, m4, m0 - psubw m7, m4, m5 - psubw m3, m1 - psubw m7, m6 + punpckh%2%3 m5, m0, m2 + punpckl%2%3 m0, m2 + punpckh%2%3 m6, m1, m2 + punpckl%2%3 m1, m2 + psub%3 m3, m4, m0 + psub%3 m7, m4, m5 + psub%3 m3, m1 + psub%3 m7, m6 +%if %0 == 4; 16 bit + pabsd m3, m3 + pabsd m7, m7 +%else ABS2 m3, m7, m1, m6 - psubw m0, m4, m3 - psubw m1, m4, m7 - packuswb m0, m1 +%endif + psub%3 m0, m4, m3 + psub%3 m1, m4, m7 + packus%3%2 m0, m1 mova [dstq + xq], m0 add xq, mmsize jl .loop @@ -384,17 +420,17 @@ BLEND_SIMPLE addition, addusb BLEND_SIMPLE subtract, subusb BLEND_SIMPLE darken, minub BLEND_SIMPLE lighten, maxub -GRAINEXTRACT +GRAINEXTRACT grainextract, b, w BLEND_MULTIPLY BLEND_SCREEN -AVERAGE -GRAINMERGE +AVERAGE average, b +GRAINMERGE grainmerge, b, w HARDMIX PHOENIX phoenix, b DIFFERENCE difference, b, w DIVIDE - -BLEND_ABS +EXTREMITY extremity, b, w +NEGATION negation, b, w %if ARCH_X86_64 BLEND_SIMPLE addition_16, addusw, 1 @@ -402,18 +438,24 @@ BLEND_SIMPLE and_16, and, 1 BLEND_SIMPLE or_16, or, 1 BLEND_SIMPLE subtract_16, subusw, 1 BLEND_SIMPLE xor_16, xor, 1 +AVERAGE average_16, w, 1 %endif INIT_XMM ssse3 DIFFERENCE difference, b, w -BLEND_ABS +EXTREMITY extremity, b, w +NEGATION negation, b, w INIT_XMM sse4 %if ARCH_X86_64 BLEND_SIMPLE darken_16, minuw, 1 BLEND_SIMPLE lighten_16, maxuw, 1 +GRAINEXTRACT grainextract_16, w, d, 1 +GRAINMERGE grainmerge_16, w, d, 1 PHOENIX phoenix_16, w, 1 DIFFERENCE difference_16, w, d, 1 +EXTREMITY extremity_16, w, d, 1 +NEGATION negation_16, w, d, 1 %endif %if HAVE_AVX2_EXTERNAL @@ -425,16 +467,17 @@ BLEND_SIMPLE addition, addusb BLEND_SIMPLE subtract, subusb BLEND_SIMPLE darken, minub BLEND_SIMPLE lighten, maxub -GRAINEXTRACT +GRAINEXTRACT grainextract, b, w BLEND_MULTIPLY BLEND_SCREEN -AVERAGE -GRAINMERGE +AVERAGE average, b +GRAINMERGE grainmerge, b, w HARDMIX PHOENIX phoenix, b DIFFERENCE difference, b, w -BLEND_ABS +EXTREMITY extremity, b, w +NEGATION negation, b, w %if ARCH_X86_64 BLEND_SIMPLE addition_16, addusw, 1 @@ -444,7 +487,12 @@ BLEND_SIMPLE lighten_16, maxuw, 1 BLEND_SIMPLE or_16, or, 1 BLEND_SIMPLE subtract_16, subusw, 1 BLEND_SIMPLE xor_16, xor, 1 +GRAINEXTRACT grainextract_16, w, d, 1 +AVERAGE average_16, w, 1 +GRAINMERGE grainmerge_16, w, d, 1 PHOENIX phoenix_16, w, 1 DIFFERENCE difference_16, w, d, 1 +EXTREMITY extremity_16, w, d, 1 +NEGATION negation_16, w, d, 1 %endif %endif |