diff options
author | James Darnley <jdarnley@obe.tv> | 2017-04-05 21:07:47 +0200 |
---|---|---|
committer | James Darnley <jdarnley@obe.tv> | 2017-05-15 15:00:20 +0200 |
commit | 7aa90b4e94147d0512ab28535f6863767b888f19 (patch) | |
tree | 0409c2abfa881d4268752a22ecde6353b1895ea6 /libavcodec/x86/h264dsp_init.c | |
parent | 27460dfebc296636dec2584e0d74aaa4d48da0b9 (diff) | |
download | ffmpeg-7aa90b4e94147d0512ab28535f6863767b888f19.tar.gz |
avcodec/h264: add sse2 versions of previous idct functions
Kaby Lake Pentium:
- ff_h264_idct_add_8_sse2: ~1.18x faster than mmxext
- ff_h264_idct_dc_add_8_sse2: ~1.07x faster than mmxext
Diffstat (limited to 'libavcodec/x86/h264dsp_init.c')
-rw-r--r-- | libavcodec/x86/h264dsp_init.c | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index bf74937b3f..ce7179f5be 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -32,9 +32,11 @@ void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT(uint8_t *dst, \ int stride); IDCT_ADD_FUNC(, 8, mmx) +IDCT_ADD_FUNC(, 8, sse2) IDCT_ADD_FUNC(, 8, avx) IDCT_ADD_FUNC(, 10, sse2) IDCT_ADD_FUNC(_dc, 8, mmxext) +IDCT_ADD_FUNC(_dc, 8, sse2) IDCT_ADD_FUNC(_dc, 8, avx) IDCT_ADD_FUNC(_dc, 10, mmxext) IDCT_ADD_FUNC(8_dc, 8, mmxext) @@ -316,6 +318,9 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_sse2; c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_sse2; } + + c->h264_idct_add = ff_h264_idct_add_8_sse2; + c->h264_idct_dc_add = ff_h264_idct_dc_add_8_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; |