diff options
author | Paul B Mahol <onemda@gmail.com> | 2019-09-15 18:13:26 +0200 |
---|---|---|
committer | Paul B Mahol <onemda@gmail.com> | 2019-09-16 10:21:16 +0200 |
commit | 921eb21b1d1b0468dc9e15418a498ea692312ff6 (patch) | |
tree | 4ac33b5f48f6b826173b41e3d18c157f60b59ea4 /libavfilter/x86 | |
parent | 8e8fd25272c5c270243674184662ff6957e70484 (diff) | |
download | ffmpeg-921eb21b1d1b0468dc9e15418a498ea692312ff6.tar.gz |
avfilter/x86/vf_360: add most of >8 depth asm
Diffstat (limited to 'libavfilter/x86')
-rw-r--r-- | libavfilter/x86/vf_v360.asm | 67 | ||||
-rw-r--r-- | libavfilter/x86/vf_v360_init.c | 12 |
2 files changed, 79 insertions, 0 deletions
diff --git a/libavfilter/x86/vf_v360.asm b/libavfilter/x86/vf_v360.asm index a0936eb6dc..5b241220d8 100644 --- a/libavfilter/x86/vf_v360.asm +++ b/libavfilter/x86/vf_v360.asm @@ -26,7 +26,9 @@ SECTION_RODATA pb_mask: db 0,4,8,12,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 +pw_mask: db 0,1,4, 5, 8, 9,12,13,-1,-1,-1,-1,-1,-1,-1,-1 pd_255: times 4 dd 255 +pd_65535: times 4 dd 65535 SECTION .text @@ -61,6 +63,34 @@ cglobal remap1_8bit_line, 6, 7, 6, dst, width, src, in_linesize, u, v, x RET INIT_YMM avx2 +cglobal remap1_16bit_line, 6, 7, 6, dst, width, src, in_linesize, u, v, x + movsxdifnidn widthq, widthd + xor xq, xq + movd xm0, in_linesized + pcmpeqw m4, m4 + VBROADCASTI128 m3, [pw_mask] + vpbroadcastd m0, xm0 + + .loop: + pmovsxwd m1, [vq + xq * 2] + pmovsxwd m2, [uq + xq * 2] + + pslld m2, 0x1 + pmulld m1, m0 + paddd m1, m2 + mova m2, m4 + vpgatherdd m5, [srcq + m1], m2 + pshufb m1, m5, m3 + vextracti128 xm2, m1, 1 + movq [dstq+xq*2], xm1 + movq [dstq+xq*2+8], xm2 + + add xq, mmsize / 4 + cmp xq, widthq + jl .loop + RET + +INIT_YMM avx2 cglobal remap2_8bit_line, 7, 8, 8, dst, width, src, in_linesize, u, v, ker, x movsxdifnidn widthq, widthd movd xm0, in_linesized @@ -96,6 +126,43 @@ DEFINE_ARGS dst, width, src, x, u, v, ker jl .loop RET +INIT_YMM avx2 +cglobal remap2_16bit_line, 7, 8, 8, dst, width, src, in_linesize, u, v, ker, x + movsxdifnidn widthq, widthd + movd xm0, in_linesized +%if ARCH_X86_32 +DEFINE_ARGS dst, width, src, x, u, v, ker +%endif + xor xq, xq + pcmpeqw m7, m7 + vpbroadcastd m0, xm0 + vpbroadcastd m6, [pd_65535] + + .loop: + pmovsxwd m1, [kerq + xq * 8] + pmovsxwd m2, [vq + xq * 8] + pmovsxwd m3, [uq + xq * 8] + + pslld m3, 0x1 + pmulld m4, m2, m0 + paddd m4, m3 + mova m3, m7 + vpgatherdd m2, [srcq + m4], m3 + pand m2, m6 + pmulld m2, m1 + phaddd m2, m2 + phaddd m1, m2, m2 + psrld m1, m1, 0xe + vextracti128 xm2, m1, 1 + + pextrw [dstq+xq*2], xm1, 0 + pextrw [dstq+xq*2+2], xm2, 0 + + add xq, mmsize / 16 + cmp xq, widthq + jl .loop + RET + %if ARCH_X86_64 INIT_YMM avx2 diff --git a/libavfilter/x86/vf_v360_init.c b/libavfilter/x86/vf_v360_init.c index 8c1a10c705..c7f4a3dd6d 100644 --- a/libavfilter/x86/vf_v360_init.c +++ b/libavfilter/x86/vf_v360_init.c @@ -32,6 +32,12 @@ void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdi void ff_remap4_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize, const uint16_t *u, const uint16_t *v, const int16_t *ker); +void ff_remap1_16bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize, + const uint16_t *u, const uint16_t *v, const int16_t *ker); + +void ff_remap2_16bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize, + const uint16_t *u, const uint16_t *v, const int16_t *ker); + av_cold void ff_v360_init_x86(V360Context *s, int depth) { int cpu_flags = av_get_cpu_flags(); @@ -42,6 +48,12 @@ av_cold void ff_v360_init_x86(V360Context *s, int depth) if (EXTERNAL_AVX2_FAST(cpu_flags) && s->interp == BILINEAR && depth <= 8) s->remap_line = ff_remap2_8bit_line_avx2; + if (EXTERNAL_AVX2_FAST(cpu_flags) && s->interp == NEAREST && depth > 8) + s->remap_line = ff_remap1_16bit_line_avx2; + + if (EXTERNAL_AVX2_FAST(cpu_flags) && s->interp == BILINEAR && depth > 8) + s->remap_line = ff_remap2_16bit_line_avx2; + #if ARCH_X86_64 if (EXTERNAL_AVX2_FAST(cpu_flags) && (s->interp == BICUBIC || s->interp == LANCZOS) && depth <= 8) |