aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimothy Gu <timothygu99@gmail.com>2016-02-14 02:15:18 +0000
committerTimothy Gu <timothygu99@gmail.com>2016-02-14 09:12:37 -0800
commita678d667816a86b7e5f8c4b558f8ed333bb98841 (patch)
treeaca9df8d14fb93c57296886b990e2c1975605902
parent4b750104ea2b8dec4e055216a047092650a3ba25 (diff)
downloadffmpeg-a678d667816a86b7e5f8c4b558f8ed333bb98841.tar.gz
vf_blend: Use integers for divide mode
2.5x faster for 8-bit mode without autovectorization in GCC, 2x slower with it on x86. However, since the platforms we enable GCC autovectorization on most probably has support for SSE2 optimization (added in the subsequent commit), this commit should in general do good.
-rw-r--r--libavfilter/vf_blend.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/libavfilter/vf_blend.c b/libavfilter/vf_blend.c
index 4b4d4350c4..61aa17ebeb 100644
--- a/libavfilter/vf_blend.c
+++ b/libavfilter/vf_blend.c
@@ -247,7 +247,7 @@ DEFINE_BLEND8(hardlight, (B < 128) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
DEFINE_BLEND8(hardmix, (A < (255 - B)) ? 0: 255)
DEFINE_BLEND8(darken, FFMIN(A, B))
DEFINE_BLEND8(lighten, FFMAX(A, B))
-DEFINE_BLEND8(divide, av_clip_uint8(((float)A / ((float)B) * 255)))
+DEFINE_BLEND8(divide, av_clip_uint8(B == 0 ? 255 : 255 * A / B))
DEFINE_BLEND8(dodge, DODGE(A, B))
DEFINE_BLEND8(burn, BURN(A, B))
DEFINE_BLEND8(softlight, (A > 127) ? B + (255 - B) * (A - 127.5) / 127.5 * (0.5 - fabs(B - 127.5) / 255): B - B * ((127.5 - A) / 127.5) * (0.5 - fabs(B - 127.5)/255))
@@ -287,7 +287,7 @@ DEFINE_BLEND16(hardlight, (B < 32768) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
DEFINE_BLEND16(hardmix, (A < (65535 - B)) ? 0: 65535)
DEFINE_BLEND16(darken, FFMIN(A, B))
DEFINE_BLEND16(lighten, FFMAX(A, B))
-DEFINE_BLEND16(divide, av_clip_uint16(((float)A / ((float)B) * 65535)))
+DEFINE_BLEND16(divide, av_clip_uint16(B == 0 ? 65535 : 65535 * A / B))
DEFINE_BLEND16(dodge, DODGE(A, B))
DEFINE_BLEND16(burn, BURN(A, B))
DEFINE_BLEND16(softlight, (A > 32767) ? B + (65535 - B) * (A - 32767.5) / 32767.5 * (0.5 - fabs(B - 32767.5) / 65535): B - B * ((32767.5 - A) / 32767.5) * (0.5 - fabs(B - 32767.5)/65535))