aboutsummaryrefslogtreecommitdiffstats
path: root/libavfilter/x86
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2015-10-07 10:12:26 +0200
committerPaul B Mahol <onemda@gmail.com>2015-10-07 22:50:15 +0200
commit0948ba320496d02ad185487c18b249610de1a184 (patch)
tree11694f840cbe4272e79c8895e46ad10f2b50d295 /libavfilter/x86
parent8a9fa46e87daf09fca14c903e6ee0c13fd453998 (diff)
downloadffmpeg-0948ba320496d02ad185487c18b249610de1a184.tar.gz
avfilter/x86/vf_blend.asm: add hardmix and phoenix sse2 SIMD
Signed-off-by: Paul B Mahol <onemda@gmail.com>
Diffstat (limited to 'libavfilter/x86')
-rw-r--r--libavfilter/x86/vf_blend.asm64
-rw-r--r--libavfilter/x86/vf_blend_init.c14
2 files changed, 78 insertions, 0 deletions
diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 167e72b22d..54b5430a90 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -27,6 +27,8 @@ SECTION_RODATA
pw_128: times 8 dw 128
pw_255: times 8 dw 255
+pb_128: times 16 db 128
+pb_255: times 16 db 255
SECTION .text
@@ -273,6 +275,37 @@ cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, d
jg .nextrow
REP_RET
+cglobal blend_hardmix, 9, 10, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
+ add topq, widthq
+ add bottomq, widthq
+ add dstq, widthq
+ sub endq, startq
+ mova m2, [pb_255]
+ mova m3, [pb_128]
+ neg widthq
+.nextrow:
+ mov r10q, widthq
+ %define x r10q
+
+ .loop:
+ movu m0, [topq + x]
+ movu m1, [bottomq + x]
+ pxor m1, m2
+ pxor m0, m3
+ pxor m1, m3
+ pcmpgtb m1, m0
+ pxor m1, m2
+ mova [dstq + x], m1
+ add r10q, mmsize
+ jl .loop
+
+ add topq, top_linesizeq
+ add bottomq, bottom_linesizeq
+ add dstq, dst_linesizeq
+ sub endd, 1
+ jg .nextrow
+REP_RET
+
cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
add topq, widthq
add bottomq, widthq
@@ -298,6 +331,37 @@ cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize,
jg .nextrow
REP_RET
+cglobal blend_phoenix, 9, 10, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
+ add topq, widthq
+ add bottomq, widthq
+ add dstq, widthq
+ sub endq, startq
+ mova m3, [pb_255]
+ neg widthq
+.nextrow:
+ mov r10q, widthq
+ %define x r10q
+
+ .loop:
+ movu m0, [topq + x]
+ movu m1, [bottomq + x]
+ mova m2, m0
+ pminub m0, m1
+ pmaxub m1, m2
+ mova m2, m3
+ psubusb m2, m1
+ paddusb m2, m0
+ mova [dstq + x], m2
+ add r10q, mmsize
+ jl .loop
+
+ add topq, top_linesizeq
+ add bottomq, bottom_linesizeq
+ add dstq, dst_linesizeq
+ sub endd, 1
+ jg .nextrow
+REP_RET
+
INIT_XMM ssse3
cglobal blend_difference, 9, 10, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
add topq, widthq
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index 61e90f8d37..454d03030d 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -59,6 +59,12 @@ void ff_blend_difference128_sse2(const uint8_t *top, ptrdiff_t top_linesize,
ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
struct FilterParams *param, double *values);
+void ff_blend_hardmix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
+ const uint8_t *bottom, ptrdiff_t bottom_linesize,
+ uint8_t *dst, ptrdiff_t dst_linesize,
+ ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
+ struct FilterParams *param, double *values);
+
void ff_blend_lighten_sse2(const uint8_t *top, ptrdiff_t top_linesize,
const uint8_t *bottom, ptrdiff_t bottom_linesize,
uint8_t *dst, ptrdiff_t dst_linesize,
@@ -71,6 +77,12 @@ void ff_blend_or_sse2(const uint8_t *top, ptrdiff_t top_linesize,
ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
struct FilterParams *param, double *values);
+void ff_blend_phoenix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
+ const uint8_t *bottom, ptrdiff_t bottom_linesize,
+ uint8_t *dst, ptrdiff_t dst_linesize,
+ ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
+ struct FilterParams *param, double *values);
+
void ff_blend_subtract_sse2(const uint8_t *top, ptrdiff_t top_linesize,
const uint8_t *bottom, ptrdiff_t bottom_linesize,
uint8_t *dst, ptrdiff_t dst_linesize,
@@ -107,8 +119,10 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
case BLEND_AVERAGE: param->blend = ff_blend_average_sse2; break;
case BLEND_DARKEN: param->blend = ff_blend_darken_sse2; break;
case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break;
+ case BLEND_HARDMIX: param->blend = ff_blend_hardmix_sse2; break;
case BLEND_LIGHTEN: param->blend = ff_blend_lighten_sse2; break;
case BLEND_OR: param->blend = ff_blend_or_sse2; break;
+ case BLEND_PHOENIX: param->blend = ff_blend_phoenix_sse2; break;
case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
case BLEND_XOR: param->blend = ff_blend_xor_sse2; break;
}