diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-09-13 23:31:17 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-09-13 23:44:12 +0200 |
commit | cac9877ed5150bb6feda64927bb327ee4698086a (patch) | |
tree | 17f92a30adce99c085b48a05f646f7cf13492952 /libswscale/x86/swscale_mmx.c | |
parent | 5ad01decef9993e4ec84c10e53a01f9f841c330b (diff) | |
parent | e0c3e0738757a92c2910bac83f2ef830b428ba11 (diff) | |
download | ffmpeg-cac9877ed5150bb6feda64927bb327ee4698086a.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
sws: implement MMX/SSE2/SSSE3/SSE4 versions for horizontal scaling.
include stdint.h in adpcm_data.h
mpeg12: reorder functions to avoid ugly forward declarations
Fixed off by one packet size allocation in the smacker demuxer.
Check for invalid packet size in the smacker demuxer.
ape demuxer: fix segfault on memory allocation failure.
xan: Add some buffer checks
xan: Remove extra trailing newline
Fixed size given to init_get_bits() in xan decoder.
Conflicts:
libavcodec/mpeg12.c
libswscale/x86/swscale_template.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale/x86/swscale_mmx.c')
-rw-r--r-- | libswscale/x86/swscale_mmx.c | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index 775d5f683d..427979b689 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -176,6 +176,41 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI } } +#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ +extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ + SwsContext *c, int16_t *data, \ + int dstW, const uint8_t *src, \ + const int16_t *filter, \ + const int16_t *filterPos, int filterSize); + +#define SCALE_FUNCS(filter_n, opt) \ + SCALE_FUNC(filter_n, 8, 15, opt); \ + SCALE_FUNC(filter_n, 9, 15, opt); \ + SCALE_FUNC(filter_n, 10, 15, opt); \ + SCALE_FUNC(filter_n, 16, 15, opt); \ + SCALE_FUNC(filter_n, 8, 19, opt); \ + SCALE_FUNC(filter_n, 9, 19, opt); \ + SCALE_FUNC(filter_n, 10, 19, opt); \ + SCALE_FUNC(filter_n, 16, 19, opt) + +#define SCALE_FUNCS_MMX(opt) \ + SCALE_FUNCS(4, opt); \ + SCALE_FUNCS(8, opt); \ + SCALE_FUNCS(X, opt) + +#define SCALE_FUNCS_SSE(opt) \ + SCALE_FUNCS(4, opt); \ + SCALE_FUNCS(8, opt); \ + SCALE_FUNCS(X4, opt); \ + SCALE_FUNCS(X8, opt) + +#if ARCH_X86_32 +SCALE_FUNCS_MMX(mmx); +#endif +SCALE_FUNCS_SSE(sse2); +SCALE_FUNCS_SSE(ssse3); +SCALE_FUNCS_SSE(sse4); + void ff_sws_init_swScale_mmx(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); @@ -186,4 +221,55 @@ void ff_sws_init_swScale_mmx(SwsContext *c) if (cpu_flags & AV_CPU_FLAG_MMX2) sws_init_swScale_MMX2(c); #endif + +#if HAVE_YASM +#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ + if (c->srcBpc == 8) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale8to19_ ## filtersize ## _ ## opt1; \ + } else if (c->srcBpc == 9) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale9to19_ ## filtersize ## _ ## opt1; \ + } else if (c->srcBpc == 10) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale10to19_ ## filtersize ## _ ## opt1; \ + } else if(c->srcBpc == 16 && !((c->srcFormat==PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale16to19_ ## filtersize ## _ ## opt1; \ + } \ +} while (0) +#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ + switch (filtersize) { \ + case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ + case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ + default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ + } +#if ARCH_X86_32 + if (cpu_flags & AV_CPU_FLAG_MMX) { + ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); + ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); + } +#endif +#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ + switch (filtersize) { \ + case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ + case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ + default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \ + else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ + break; \ + } + if (cpu_flags & AV_CPU_FLAG_SSE2) { + ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); + ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); + } + if (cpu_flags & AV_CPU_FLAG_SSSE3) { + ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); + ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3); + } + if (cpu_flags & AV_CPU_FLAG_SSE4) { + /* Xto15 don't need special sse4 functions */ + ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); + ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); + } +#endif } |