diff options
author | Wu Jianhua <jianhua.wu@intel.com> | 2022-03-11 15:52:11 +0800 |
---|---|---|
committer | Haihao Xiang <haihao.xiang@intel.com> | 2022-04-24 14:47:01 +0800 |
commit | 6fbb8cc8ad8c406cec0d0eb0bb3078c2929109c8 (patch) | |
tree | 96935f195a37f06201369c007d4f445f7c301670 | |
parent | c1790b60d643100266192c2bbaefb2c76eba6e5a (diff) | |
download | ffmpeg-6fbb8cc8ad8c406cec0d0eb0bb3078c2929109c8.tar.gz |
avcodec/x86/hevc_mc: add qpel_h4_8_avx512icl
ff_hevc_put_hevc_qpel_h4_8_sse4 993694
ff_hevc_put_hevc_qpel_h4_8_avx512icl 686647
Reviewed-by: Henrik Gramner <henrik@gramner.com>
Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
-rw-r--r-- | libavcodec/x86/hevc_mc.asm | 9 | ||||
-rw-r--r-- | libavcodec/x86/hevcdsp.h | 1 | ||||
-rw-r--r-- | libavcodec/x86/hevcdsp_init.c | 1 |
3 files changed, 11 insertions, 0 deletions
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm index 4e39cdd7fe..ff59ae3509 100644 --- a/libavcodec/x86/hevc_mc.asm +++ b/libavcodec/x86/hevc_mc.asm @@ -87,6 +87,7 @@ QPEL_TABLE 12, 4, w, sse4 QPEL_TABLE 8,16, b, avx2 QPEL_TABLE 10, 8, w, avx2 +QPEL_TABLE 4, 1, b, avx512icl_h QPEL_TABLE 8, 1, b, avx512icl_h QPEL_TABLE 8, 1, d, avx512icl_v QPEL_TABLE 16, 1, b, avx512icl_h @@ -1754,7 +1755,12 @@ cglobal hevc_put_hevc_qpel_h%1_%2, 5, 6, 8, dst, src, srcstride, height, mx, tmp QPEL_LOAD_SHUF 2, 3 .loop: QPEL_H_LOAD_COMPUTE 6, src +%if %1 == 4 + vpmovdw xm6, m6 + movq [dstq], xm6 +%else vpmovdw [dstq], m6 +%endif LOOP_END dst, src, srcstride RET %endmacro @@ -1822,6 +1828,9 @@ cglobal hevc_put_hevc_qpel_hv%1_%2, 6, 7, 27, dst, src, srcstride, height, mx, m %if ARCH_X86_64 %if HAVE_AVX512ICL_EXTERNAL +INIT_XMM avx512icl +HEVC_PUT_HEVC_QPEL_AVX512ICL 4, 8 + INIT_YMM avx512icl HEVC_PUT_HEVC_QPEL_AVX512ICL 8, 8 HEVC_PUT_HEVC_QPEL_HV_AVX512ICL 8, 8 diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h index 6e3fc01ad0..51ffdc9628 100644 --- a/libavcodec/x86/hevcdsp.h +++ b/libavcodec/x86/hevcdsp.h @@ -233,6 +233,7 @@ WEIGHTING_PROTOTYPES(8, sse4); WEIGHTING_PROTOTYPES(10, sse4); WEIGHTING_PROTOTYPES(12, sse4); +void ff_hevc_put_hevc_qpel_h4_8_avx512icl(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); void ff_hevc_put_hevc_qpel_h8_8_avx512icl(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); void ff_hevc_put_hevc_qpel_h16_8_avx512icl(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); void ff_hevc_put_hevc_qpel_hv8_8_avx512icl(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index a73c2e2bd9..58b91459ed 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -879,6 +879,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->add_residual[3] = ff_hevc_add_residual_32_8_avx2; } if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) { + c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_avx512icl; c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_avx512icl; c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_avx512icl; c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_avx512icl; |