diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-10-06 22:36:49 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-10-06 23:04:31 +0200 |
commit | e063ffbf4ba98255cfe721afe48877b32590ed2c (patch) | |
tree | 2303599247590be4cd5a901259fcad1d844fcbfd /libavcodec | |
parent | 094a82c7de30b6a086c785aa14cc7e2e2eb3418e (diff) | |
download | ffmpeg-e063ffbf4ba98255cfe721afe48877b32590ed2c.tar.gz |
dsputil_mmx: put optimized gmc code back and avoid a VLA without loosing features.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 61 |
1 files changed, 54 insertions, 7 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index f608f661dd..27e732f5da 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1929,10 +1929,15 @@ static av_noinline void emulated_edge_mc_sse(uint8_t *buf, const uint8_t *src, #if HAVE_INLINE_ASM -static void gmc_mmx(uint8_t *dst, uint8_t *src, - int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, - int shift, int r, int width, int height) +typedef void emulated_edge_mc_func(uint8_t *dst, const uint8_t *src, + int linesize, int block_w, int block_h, + int src_x, int src_y, int w, int h); + +static av_always_inline void gmc(uint8_t *dst, uint8_t *src, + int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, + int shift, int r, int width, int height, + emulated_edge_mc_func *emu_edge_fn) { const int w = 8; const int ix = ox >> (16 + shift); @@ -1947,6 +1952,9 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys }; const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys }; const uint64_t shift2 = 2 * shift; +#define MAX_STRIDE 4096U +#define MAX_H 8U + uint8_t edge_buf[(MAX_H + 1) * MAX_STRIDE]; int x, y; const int dxw = (dxx - (1 << (16 + shift))) * (w - 1); @@ -1957,9 +1965,8 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, ((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) | (oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + shift) // uses more than 16 bits of subpel mv (only at huge resolution) - || (dxx | dxy | dyx | dyy) & 15 || - (unsigned)ix >= width - w || - (unsigned)iy >= height - h) { + || (dxx | dxy | dyx | dyy) & 15 + || h > MAX_H || stride > MAX_STRIDE) { // FIXME could still use mmx for some of the rows ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r, width, height); @@ -1967,6 +1974,11 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, } src += ix + iy * stride; + if ((unsigned)ix >= width - w || + (unsigned)iy >= height - h) { + emu_edge_fn(edge_buf, src, stride, w + 1, h + 1, ix, iy, width, height); + src = edge_buf; + } __asm__ volatile ( "movd %0, %%mm6 \n\t" @@ -2045,6 +2057,36 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, } } +#if HAVE_YASM +#if ARCH_X86_32 +static void gmc_mmx(uint8_t *dst, uint8_t *src, + int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, + int shift, int r, int width, int height) +{ + gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r, + width, height, &emulated_edge_mc_mmx); +} +#endif +static void gmc_sse(uint8_t *dst, uint8_t *src, + int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, + int shift, int r, int width, int height) +{ + gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r, + width, height, &emulated_edge_mc_sse); +} +#else +static void gmc_mmx(uint8_t *dst, uint8_t *src, + int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, + int shift, int r, int width, int height) +{ + gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r, + width, height, &ff_emulated_edge_mc_8); +} +#endif + #define PREFETCH(name, op) \ static void name(void *mem, int stride, int h) \ { \ @@ -2545,7 +2587,9 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx); } +#if ARCH_X86_32 || !HAVE_YASM c->gmc = gmc_mmx; +#endif c->add_bytes = add_bytes_mmx; @@ -2800,6 +2844,9 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags) if (!high_bit_depth) c->emulated_edge_mc = emulated_edge_mc_sse; +#if HAVE_INLINE_ASM + c->gmc = gmc_sse; +#endif #endif /* HAVE_YASM */ } |