diff options
author | James Darnley <jdarnley@obe.tv> | 2017-02-15 14:54:11 +0100 |
---|---|---|
committer | James Darnley <jdarnley@obe.tv> | 2017-02-27 13:22:06 +0100 |
commit | 88307b3eec018b86f6adfddeb6e9b384c95b7ca6 (patch) | |
tree | 78d8a2e53d61b4719ee5ca1b5ec6f9e1799eb03d /libavcodec | |
parent | ac096fc82df6aa8190f29a51ee5b589f318cfaa8 (diff) | |
download | ffmpeg-88307b3eec018b86f6adfddeb6e9b384c95b7ca6.tar.gz |
avcodec/h264: add avx 8-bit 4:2:2 chroma h deblock/loop filter
~1.21x faster (68 vs. 56 cycles) compared with mmxext function
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/h264_deblock.asm | 27 | ||||
-rw-r--r-- | libavcodec/x86/h264dsp_init.c | 2 |
2 files changed, 29 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index 0465c9f13b..e2eb002f71 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -1163,6 +1163,33 @@ cglobal deblock_h_chroma_8, 5, 7, 8, 0-16, pix_, stride_, alpha_, beta_, tc0_ STORE_8_ROWS PASS8ROWS(pix_q - 2, r5 - 2, stride_q, r6) RET +cglobal deblock_h_chroma422_8, 5, 7, 8, 0-16, pix_, stride_, alpha_, beta_, tc0_, + CHROMA_H_START_XMM r5, r6 + LOAD_8_ROWS PASS8ROWS(pix_q - 2, r5 - 2, stride_q, r6) + TRANSPOSE_8x4B_XMM + movq [rsp], m0 + movq [rsp + 8], m3 + CHROMA_INTER_BODY_XMM 2 + movq m0, [rsp] + movq m3, [rsp + 8] + TRANSPOSE_4x8B_XMM + STORE_8_ROWS PASS8ROWS(pix_q - 2, r5 - 2, stride_q, r6) + + lea pix_q, [pix_q + 8*stride_q] + lea r5, [r5 + 8*stride_q] + add tc0_q, 2 + + LOAD_8_ROWS PASS8ROWS(pix_q - 2, r5 - 2, stride_q, r6) + TRANSPOSE_8x4B_XMM + movq [rsp], m0 + movq [rsp + 8], m3 + CHROMA_INTER_BODY_XMM 2 + movq m0, [rsp] + movq m3, [rsp + 8] + TRANSPOSE_4x8B_XMM + STORE_8_ROWS PASS8ROWS(pix_q - 2, r5 - 2, stride_q, r6) +RET + %endmacro ; DEBLOCK_CHROMA_XMM DEBLOCK_CHROMA_XMM avx diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index 0b15471675..6073932b25 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -321,6 +321,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_avx; if (chroma_format_idc <= 1) { c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_avx; + } else { + c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_avx; } } } else if (bit_depth == 10) { |