diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2010-10-05 22:06:18 +0000 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2010-10-05 22:06:18 +0000 |
commit | dd68d4db430089bff70f62a38cb84e07d183a435 (patch) | |
tree | 80651f97cce9f9c3073a9fb8cf0695849c8abf7b /libavcodec/x86/h264_intrapred_init.c | |
parent | 2f412421e8bb1927fcd866777dfedf223c9dfdfb (diff) | |
download | ffmpeg-dd68d4db430089bff70f62a38cb84e07d183a435.tar.gz |
MMX, MMX2, SSE2 and SSSE3 optimizations for pred16x16/8x8_plane H264 intra
prediction (plus some with different rounding for svq3/rv40). Speedup (for
SSSE3) about ~6-fold, 3.6% faster overall with cathedral sample.
Originally committed as revision 25361 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86/h264_intrapred_init.c')
-rw-r--r-- | libavcodec/x86/h264_intrapred_init.c | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index 4471453d63..54b81426ce 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -29,6 +29,18 @@ void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride); void ff_pred16x16_dc_mmxext (uint8_t *src, int stride); void ff_pred16x16_dc_sse2 (uint8_t *src, int stride); void ff_pred16x16_dc_ssse3 (uint8_t *src, int stride); +void ff_pred16x16_plane_h264_mmx (uint8_t *src, int stride); +void ff_pred16x16_plane_h264_mmx2 (uint8_t *src, int stride); +void ff_pred16x16_plane_h264_sse2 (uint8_t *src, int stride); +void ff_pred16x16_plane_h264_ssse3 (uint8_t *src, int stride); +void ff_pred16x16_plane_rv40_mmx (uint8_t *src, int stride); +void ff_pred16x16_plane_rv40_mmx2 (uint8_t *src, int stride); +void ff_pred16x16_plane_rv40_sse2 (uint8_t *src, int stride); +void ff_pred16x16_plane_rv40_ssse3 (uint8_t *src, int stride); +void ff_pred16x16_plane_svq3_mmx (uint8_t *src, int stride); +void ff_pred16x16_plane_svq3_mmx2 (uint8_t *src, int stride); +void ff_pred16x16_plane_svq3_sse2 (uint8_t *src, int stride); +void ff_pred16x16_plane_svq3_ssse3 (uint8_t *src, int stride); void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride); void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride); void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride); @@ -37,6 +49,10 @@ void ff_pred8x8_vertical_mmx (uint8_t *src, int stride); void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride); void ff_pred8x8_horizontal_mmxext (uint8_t *src, int stride); void ff_pred8x8_horizontal_ssse3 (uint8_t *src, int stride); +void ff_pred8x8_plane_mmx (uint8_t *src, int stride); +void ff_pred8x8_plane_mmx2 (uint8_t *src, int stride); +void ff_pred8x8_plane_sse2 (uint8_t *src, int stride); +void ff_pred8x8_plane_ssse3 (uint8_t *src, int stride); void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride); void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride); void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride); @@ -61,6 +77,15 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx; h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx; h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx; + } else { + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx; + if (codec_id == CODEC_ID_SVQ3) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx; + } else if (codec_id == CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_mmx; + } else { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_mmx; + } } } @@ -75,6 +100,15 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmxext; h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext; h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext; + } else { + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2; + if (codec_id == CODEC_ID_SVQ3) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx2; + } else if (codec_id == CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_mmx2; + } else { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_mmx2; + } } } @@ -87,6 +121,15 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) if (codec_id == CODEC_ID_VP8) { h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2; h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2; + } else { + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_sse2; + if (codec_id == CODEC_ID_SVQ3) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2; + } else if (codec_id == CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_sse2; + } else { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_sse2; + } } } @@ -97,6 +140,15 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) if (codec_id == CODEC_ID_VP8) { h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3; h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3; + } else { + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3; + if (codec_id == CODEC_ID_SVQ3) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3; + } else if (codec_id == CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_ssse3; + } else { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_ssse3; + } } } #endif |