diff options
author | Paul B Mahol <onemda@gmail.com> | 2020-03-31 14:08:20 +0200 |
---|---|---|
committer | Paul B Mahol <onemda@gmail.com> | 2020-04-02 12:25:37 +0200 |
commit | e4809e12ea57b5551830bc52b37f00caae6352a9 (patch) | |
tree | 8123154bd42835c7e6b940e924e22bc6daf48416 /libavfilter/x86 | |
parent | b00b935d994d8f0a57427c30d6b779e0837dff43 (diff) | |
download | ffmpeg-e4809e12ea57b5551830bc52b37f00caae6352a9.tar.gz |
avfilter/vf_v360: add SIMD for lagrange9 interpolation
Diffstat (limited to 'libavfilter/x86')
-rw-r--r-- | libavfilter/x86/vf_v360.asm | 43 | ||||
-rw-r--r-- | libavfilter/x86/vf_v360_init.c | 6 |
2 files changed, 49 insertions, 0 deletions
diff --git a/libavfilter/x86/vf_v360.asm b/libavfilter/x86/vf_v360.asm index 5b241220d8..8e7e4591b4 100644 --- a/libavfilter/x86/vf_v360.asm +++ b/libavfilter/x86/vf_v360.asm @@ -166,6 +166,49 @@ DEFINE_ARGS dst, width, src, x, u, v, ker %if ARCH_X86_64 INIT_YMM avx2 +cglobal remap3_8bit_line, 7, 11, 8, dst, width, src, in_linesize, u, v, ker, x, y, tmp, z + movsxdifnidn widthq, widthd + xor zq, zq + xor yq, yq + xor xq, xq + movd xm0, in_linesized + pcmpeqw m7, m7 + vpbroadcastd m0, xm0 + vpbroadcastd m6, [pd_255] + + .loop: + pmovsxwd m1, [kerq + yq] + pmovsxwd m2, [vq + yq] + pmovsxwd m3, [uq + yq] + + pmulld m4, m2, m0 + paddd m4, m3 + mova m3, m7 + vpgatherdd m2, [srcq + m4], m3 + pand m2, m6 + pmulld m2, m1 + HADDD m2, m1 + movzx tmpq, word [vq + yq + 16] + imul tmpq, in_linesizeq + movzx zq, word [uq + yq + 16] + add tmpq, zq + movzx zq, byte [srcq + tmpq] + movzx tmpq, word [kerq + yq + 16] + imul zd, tmpd + movd xm1, zd + paddd m2, m1 + psrld m2, m2, 0xe + + packuswb m2, m2 + pextrb [dstq+xq], xm2, 0 + + add xq, 1 + add yq, 18 + cmp xq, widthq + jl .loop + RET + +INIT_YMM avx2 cglobal remap4_8bit_line, 7, 9, 11, dst, width, src, in_linesize, u, v, ker, x, y movsxdifnidn widthq, widthd xor yq, yq diff --git a/libavfilter/x86/vf_v360_init.c b/libavfilter/x86/vf_v360_init.c index babc6c426a..5b1decd777 100644 --- a/libavfilter/x86/vf_v360_init.c +++ b/libavfilter/x86/vf_v360_init.c @@ -29,6 +29,9 @@ void ff_remap1_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdi void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize, const int16_t *const u, const int16_t *const v, const int16_t *const ker); +void ff_remap3_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize, + const int16_t *const u, const int16_t *const v, const int16_t *const ker); + void ff_remap4_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize, const int16_t *const u, const int16_t *const v, const int16_t *const ker); @@ -55,6 +58,9 @@ av_cold void ff_v360_init_x86(V360Context *s, int depth) s->remap_line = ff_remap2_16bit_line_avx2; #if ARCH_X86_64 + if (EXTERNAL_AVX2_FAST(cpu_flags) && s->interp == LAGRANGE9 && depth <= 8) + s->remap_line = ff_remap3_8bit_line_avx2; + if (EXTERNAL_AVX2_FAST(cpu_flags) && (s->interp == BICUBIC || s->interp == LANCZOS) && depth <= 8) s->remap_line = ff_remap4_8bit_line_avx2; |