aboutsummaryrefslogtreecommitdiffstats
path: root/libavfilter
diff options
context:
space:
mode:
authorTimothy Gu <timothygu99@gmail.com>2016-02-09 02:20:56 +0000
committerTimothy Gu <timothygu99@gmail.com>2016-02-10 11:26:04 -0800
commit74f8d9aaef9150ead953e156e1c366d2a933ec1c (patch)
tree2746edd94703387a78c221dcf7397598131de370 /libavfilter
parentc8b1612af03b6ad1e6bcf56fad73544ba2a2893c (diff)
downloadffmpeg-74f8d9aaef9150ead953e156e1c366d2a933ec1c.tar.gz
x86/vf_blend: Add SSE2 optimization for screen
10x faster than C. Reviewed-by: Paul B Mahol <onemda@gmail.com>
Diffstat (limited to 'libavfilter')
-rw-r--r--libavfilter/x86/vf_blend.asm29
-rw-r--r--libavfilter/x86/vf_blend_init.c2
2 files changed, 31 insertions, 0 deletions
diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 50b5f8a197..a5ea74c5bc 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -111,6 +111,13 @@ BLEND_END
psrlw %1, 8 ; 00xx00xx a * b / 255
%endmacro
+%macro SCREEN 4 ; a, b, pw_1, pw_255
+ pxor %1, %4 ; 00xx00xx 255 - a
+ pxor %2, %4
+ MULTIPLY %1, %2, %3
+ pxor %1, %4 ; 00xx00xx 255 - x / 255
+%endmacro
+
BLEND_INIT multiply, 4
pxor m2, m2
mova m3, [pw_1]
@@ -134,6 +141,28 @@ BLEND_INIT multiply, 4
jl .loop
BLEND_END
+BLEND_INIT screen, 5
+ pxor m2, m2
+ mova m3, [pw_1]
+ mova m4, [pw_255]
+.nextrow:
+ mov xq, widthq
+
+ .loop:
+ movh m0, [topq + xq] ; 0000xxxx
+ movh m1, [bottomq + xq]
+ punpcklbw m0, m2 ; 00xx00xx
+ punpcklbw m1, m2
+
+ SCREEN m0, m1, m3, m4
+
+ packuswb m0, m0 ; 0000xxxx
+ movh [dstq + xq], m0
+ add xq, mmsize / 2
+
+ jl .loop
+BLEND_END
+
BLEND_INIT average, 3
pxor m2, m2
.nextrow:
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index 8ac526aacd..a6baf94f42 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -37,6 +37,7 @@ BLEND_FUNC(and, sse2)
BLEND_FUNC(darken, sse2)
BLEND_FUNC(difference128, sse2)
BLEND_FUNC(multiply, sse2)
+BLEND_FUNC(screen, sse2)
BLEND_FUNC(hardmix, sse2)
BLEND_FUNC(lighten, sse2)
BLEND_FUNC(or, sse2)
@@ -65,6 +66,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
case BLEND_MULTIPLY: param->blend = ff_blend_multiply_sse2; break;
case BLEND_OR: param->blend = ff_blend_or_sse2; break;
case BLEND_PHOENIX: param->blend = ff_blend_phoenix_sse2; break;
+ case BLEND_SCREEN: param->blend = ff_blend_screen_sse2; break;
case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
case BLEND_XOR: param->blend = ff_blend_xor_sse2; break;
case BLEND_DIFFERENCE: param->blend = ff_blend_difference_sse2; break;