diff options
author | James Darnley <jdarnley@obe.tv> | 2016-11-28 18:22:17 +0100 |
---|---|---|
committer | James Darnley <jdarnley@obe.tv> | 2016-11-30 22:58:27 +0100 |
commit | 1dae7ffa0b11293fa3294fc25cd38055915fcf19 (patch) | |
tree | 0c4c9e76e73f82e9b3bf4d5b77ec971bda678960 /libavcodec/x86/h264dsp_init.c | |
parent | 815ea8c6ccf7a1a8154a829f948d7c98120ad084 (diff) | |
download | ffmpeg-1dae7ffa0b11293fa3294fc25cd38055915fcf19.tar.gz |
avcodec/h264: mmx 4:2:2 idct add8 function
2.87 times faster (1830 vs. 638 cycles)
Diffstat (limited to 'libavcodec/x86/h264dsp_init.c')
-rw-r--r-- | libavcodec/x86/h264dsp_init.c | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index 027c1ae0b3..ed52c4d8ef 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -78,6 +78,8 @@ IDCT_ADD_REP_FUNC2(, 8, 8, sse2) IDCT_ADD_REP_FUNC2(, 8, 10, sse2) IDCT_ADD_REP_FUNC2(, 8, 10, avx) +IDCT_ADD_REP_FUNC2(, 8_422, 8, mmx) + void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul); void ff_h264_luma_dc_dequant_idct_sse2(int16_t *output, int16_t *input, int qmul); @@ -228,8 +230,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, c->h264_idct_add16 = ff_h264_idct_add16_8_mmx; c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx; - if (chroma_format_idc <= 1) + if (chroma_format_idc <= 1) { c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; + } else { + c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmx; + } c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx; if (cpu_flags & AV_CPU_FLAG_CMOV) c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; |