diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-10-23 01:49:32 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-10-23 05:13:56 +0200 |
commit | f97faf67510d92c2a18180a6ec80435d5dd1da0b (patch) | |
tree | b63cb9b8a5fc5bafa6963c07c712dbbdf419a175 /libswscale/x86/swscale_mmx.c | |
parent | 9e8dff90efa3faae5ac07fa5fae7e154367e24fd (diff) | |
parent | e71ebb19722bd2b46831d0ec311b757a56340617 (diff) | |
download | ffmpeg-f97faf67510d92c2a18180a6ec80435d5dd1da0b.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
id3v2: fix doxy comment - 'machine byte order' makes no sense on char arrays
VC1: restore mistakenly removed code
twinvq: check output buffer size before decoding
twinvq: return an error when the packet size is too small
lavf: export some forgotten symbols with non-av prefixes.
swscale: update altivec yuv2planeX asm to new per-plane API.
swscale: make yuv2yuvX_10_sse2/avx 8/9/16-bits aware.
yuv2planeX10 SIMD
swscale: decide whether to use yuv2plane1/X on a per-plane basis.
swscale: reintroduce full precision in 16-bit output.
Split up yuv2yuvX functions
Split out yuv2yuv1 luma and chroma in order to make them generic DSP functions
lavc: replace references to deprecated AVCodecContext.error_recognition to use AVCodecContext.err_recognition
lavc: translate non-flag-based er options into flag-based ef options at codec open
add -err_filter AVOptions to access flag-based error recognition
h264_weight: initialize "height" function argument properly.
presets: spelling error in libvpx 1080p50_60
avplay: fix fullscreen behaviour with SDL 1.2.14 on Mac OS X
Conflicts:
ffplay.c
libavformat/libavformat.v
libswscale/swscale.c
libswscale/x86/swscale_template.c
tests/ref/lavfi/pixfmts_scale
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale/x86/swscale_mmx.c')
-rw-r--r-- | libswscale/x86/swscale_mmx.c | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index 815055ebc9..4c12276848 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -213,6 +213,23 @@ SCALE_FUNCS_SSE(sse2); SCALE_FUNCS_SSE(ssse3); SCALE_FUNCS_SSE(sse4); +#define VSCALEX_FUNC(size, opt) \ +extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \ + const int16_t **src, uint8_t *dest, int dstW, \ + const uint8_t *dither, int offset) +#define VSCALEX_FUNCS(opt1, opt2) \ + VSCALEX_FUNC(8, opt1); \ + VSCALEX_FUNC(9, opt2); \ + VSCALEX_FUNC(10, opt2) + +#if ARCH_X86_32 +VSCALEX_FUNCS(mmx, mmx2); +#endif +VSCALEX_FUNCS(sse2, sse2); +VSCALEX_FUNCS(sse4, sse4); +VSCALEX_FUNC(16, sse4); +VSCALEX_FUNCS(avx, avx); + void ff_sws_init_swScale_mmx(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); @@ -249,10 +266,18 @@ void ff_sws_init_swScale_mmx(SwsContext *c) case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ } +#define ASSIGN_VSCALEX_FUNC(vscalefn, opt1, opt2, opt2chk, do_16_case) \ +switch(c->dstBpc){ \ + case 16: do_16_case; break; \ + case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2planeX_10_ ## opt2; break; \ + case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2planeX_9_ ## opt2; break; \ + default: vscalefn = ff_yuv2planeX_8_ ## opt1; break; \ + } #if ARCH_X86_32 if (cpu_flags & AV_CPU_FLAG_MMX) { ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); + ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2,); } #endif #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ @@ -266,6 +291,7 @@ void ff_sws_init_swScale_mmx(SwsContext *c) if (cpu_flags & AV_CPU_FLAG_SSE2) { ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); + ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, sse2, 1,); } if (cpu_flags & AV_CPU_FLAG_SSSE3) { ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); @@ -275,6 +301,12 @@ void ff_sws_init_swScale_mmx(SwsContext *c) /* Xto15 don't need special sse4 functions */ ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); + ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4, sse4, 1, + if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4); + } + + if (cpu_flags & AV_CPU_FLAG_AVX) { + ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, avx, 1,); } #endif } |