diff options
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/Makefile | 3 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 98 | ||||
-rw-r--r-- | libavcodec/x86/h264chroma_init.c | 116 |
3 files changed, 118 insertions, 99 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 23348a6b3d..111fdedf47 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -51,7 +51,8 @@ YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_mmx.o x86/diracdsp_yasm.o x86/dwt_yasm.o YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o YASM-OBJS-$(CONFIG_FFT) += x86/fft.o -YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \ +YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264chroma_init.o \ + x86/h264_chromamc.o \ x86/h264_chromamc_10bit.o YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \ x86/h264_deblock_10bit.o \ diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 75edf4a7fe..b528d4ba95 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1527,49 +1527,6 @@ void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); -void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); -void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); - -void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); -void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); -void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); - -void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); -void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); - -void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); -void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); - -void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); -void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); - -#define CHROMA_MC(OP, NUM, DEPTH, OPT) \ -void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \ - (uint8_t *dst, uint8_t *src, \ - int stride, int h, int x, int y); - -CHROMA_MC(put, 2, 10, mmxext) -CHROMA_MC(avg, 2, 10, mmxext) -CHROMA_MC(put, 4, 10, mmxext) -CHROMA_MC(avg, 4, 10, mmxext) -CHROMA_MC(put, 8, 10, sse2) -CHROMA_MC(avg, 8, 10, sse2) -CHROMA_MC(put, 8, 10, avx) -CHROMA_MC(avg, 8, 10, avx) - #if HAVE_INLINE_ASM /* CAVS-specific */ @@ -1859,11 +1816,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_INLINE_ASM */ #if HAVE_YASM - if (!high_bit_depth && CONFIG_H264CHROMA) { - c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx; - c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; - } - c->vector_clip_int32 = ff_vector_clip_int32_mmx; #endif @@ -1920,19 +1872,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; } - if (!high_bit_depth && CONFIG_H264CHROMA) { - c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext; - c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext; - c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext; - c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext; - } - if (bit_depth == 10 && CONFIG_H264CHROMA) { - c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext; - c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext; - c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext; - c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext; - } - /* slower than cmov version on AMD */ if (!(mm_flags & AV_CPU_FLAG_3DNOW)) c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmxext; @@ -1985,11 +1924,6 @@ static av_cold void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; } - - if (!high_bit_depth && CONFIG_H264CHROMA) { - c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow; - c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; - } #endif /* HAVE_YASM */ } @@ -2042,13 +1976,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, } } - if (bit_depth == 10) { - if (CONFIG_H264CHROMA) { - c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; - c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; - } - } - c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; if (mm_flags & AV_CPU_FLAG_ATOM) { @@ -2069,14 +1996,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, int mm_flags) { #if HAVE_SSSE3_EXTERNAL - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - - if (!high_bit_depth && CONFIG_H264CHROMA) { - c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3; - c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3; - c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3; - c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3; - } c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; @@ -2099,20 +2018,6 @@ static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_SSE4_EXTERNAL */ } -static av_cold void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags) -{ -#if HAVE_AVX_EXTERNAL - const int bit_depth = avctx->bits_per_raw_sample; - - if (bit_depth == 10) { - if (CONFIG_H264CHROMA) { - c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; - c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; - } - } -#endif /* HAVE_AVX_EXTERNAL */ -} - av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); @@ -2185,9 +2090,6 @@ av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) if (mm_flags & AV_CPU_FLAG_SSE4) dsputil_init_sse4(c, avctx, mm_flags); - if (mm_flags & AV_CPU_FLAG_AVX) - dsputil_init_avx(c, avctx, mm_flags); - if (CONFIG_ENCODERS) ff_dsputilenc_init_mmx(c, avctx); } diff --git a/libavcodec/x86/h264chroma_init.c b/libavcodec/x86/h264chroma_init.c new file mode 100644 index 0000000000..b9783d6693 --- /dev/null +++ b/libavcodec/x86/h264chroma_init.c @@ -0,0 +1,116 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "config.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/h264chroma.h" + +void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); + +void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); + +void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); + +void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); + +void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); + +#define CHROMA_MC(OP, NUM, DEPTH, OPT) \ +void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \ + (uint8_t *dst, uint8_t *src, \ + int stride, int h, int x, int y); + +CHROMA_MC(put, 2, 10, mmxext) +CHROMA_MC(avg, 2, 10, mmxext) +CHROMA_MC(put, 4, 10, mmxext) +CHROMA_MC(avg, 4, 10, mmxext) +CHROMA_MC(put, 8, 10, sse2) +CHROMA_MC(avg, 8, 10, sse2) +CHROMA_MC(put, 8, 10, avx) +CHROMA_MC(avg, 8, 10, avx) + +void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth) +{ + int high_bit_depth = bit_depth > 8; + int mm_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(mm_flags) && !high_bit_depth) { + c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx; + c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; + } + + if (EXTERNAL_AMD3DNOW(mm_flags) && !high_bit_depth) { + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow; + c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; + } + + if (EXTERNAL_MMXEXT(mm_flags) && !high_bit_depth) { + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext; + c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext; + c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext; + c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext; + } + + if (EXTERNAL_MMXEXT(mm_flags) && bit_depth == 10) { + c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext; + c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext; + c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext; + c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext; + } + + if (EXTERNAL_SSE2(mm_flags) && bit_depth == 10) { + c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; + } + + if (EXTERNAL_SSSE3(mm_flags) && !high_bit_depth) { + c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3; + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3; + c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3; + c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3; + } + + if (EXTERNAL_AVX(mm_flags) && bit_depth == 10) { + // AVX implies !cache64. + // TODO: Port cache(32|64) detection from x264. + c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; + } +} |