diff options
author | Diego Biurrun <diego@biurrun.de> | 2013-01-29 22:13:55 +0100 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2013-02-05 12:59:12 +0100 |
commit | 25841dfe806a13de526ae09c11149ab1f83555a8 (patch) | |
tree | b440e39d40a14e963bd3c36f41a370cb448c6006 /libavcodec/x86 | |
parent | e66240f22e240b0f0d970d1b138db80ceb517097 (diff) | |
download | ffmpeg-25841dfe806a13de526ae09c11149ab1f83555a8.tar.gz |
Use ptrdiff_t instead of int for {avg, put}_pixels line_size parameter.
This avoids SIMD-optimized functions having to sign-extend their
line size argument manually to be able to do pointer arithmetic.
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/dsputil_avg_template.c | 14 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 50 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_rnd_template.c | 30 | ||||
-rw-r--r-- | libavcodec/x86/hpeldsp.asm | 39 |
4 files changed, 60 insertions, 73 deletions
diff --git a/libavcodec/x86/dsputil_avg_template.c b/libavcodec/x86/dsputil_avg_template.c index 90e4074f7a..2249ce6c97 100644 --- a/libavcodec/x86/dsputil_avg_template.c +++ b/libavcodec/x86/dsputil_avg_template.c @@ -27,14 +27,14 @@ //FIXME the following could be optimized too ... static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, - int line_size, int h) + ptrdiff_t line_size, int h) { DEF(ff_put_no_rnd_pixels8_x2)(block, pixels, line_size, h); DEF(ff_put_no_rnd_pixels8_x2)(block + 8, pixels + 8, line_size, h); } static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, - int line_size, int h) + ptrdiff_t line_size, int h) { DEF(ff_put_pixels8_y2)(block, pixels, line_size, h); DEF(ff_put_pixels8_y2)(block + 8, pixels + 8, line_size, h); @@ -42,35 +42,35 @@ static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, - int line_size, int h) + ptrdiff_t line_size, int h) { DEF(ff_put_no_rnd_pixels8_y2)(block, pixels, line_size, h); DEF(ff_put_no_rnd_pixels8_y2)(block + 8, pixels + 8, line_size, h); } static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels, - int line_size, int h) + ptrdiff_t line_size, int h) { DEF(ff_avg_pixels8)(block, pixels, line_size, h); DEF(ff_avg_pixels8)(block + 8, pixels + 8, line_size, h); } static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, - int line_size, int h) + ptrdiff_t line_size, int h) { DEF(ff_avg_pixels8_x2)(block, pixels, line_size, h); DEF(ff_avg_pixels8_x2)(block + 8, pixels + 8, line_size, h); } static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, - int line_size, int h) + ptrdiff_t line_size, int h) { DEF(ff_avg_pixels8_y2)(block, pixels, line_size, h); DEF(ff_avg_pixels8_y2)(block + 8, pixels + 8, line_size, h); } static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, - int line_size, int h) + ptrdiff_t line_size, int h) { DEF(ff_avg_pixels8_xy2)(block, pixels, line_size, h); DEF(ff_avg_pixels8_xy2)(block + 8, pixels + 8, line_size, h); diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 3ccef6226d..b882c68a3b 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -83,9 +83,9 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; #if HAVE_YASM void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h); void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, @@ -94,9 +94,9 @@ void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h); void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h); void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, @@ -104,47 +104,47 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h); void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_avg_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); -void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); static void ff_put_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h) { @@ -1455,9 +1455,9 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, #endif /* HAVE_INLINE_ASM */ void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + ptrdiff_t line_size, int h); void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); diff --git a/libavcodec/x86/dsputil_rnd_template.c b/libavcodec/x86/dsputil_rnd_template.c index 7fcc6b7533..6ce926c6d1 100644 --- a/libavcodec/x86/dsputil_rnd_template.c +++ b/libavcodec/x86/dsputil_rnd_template.c @@ -25,7 +25,7 @@ */ // put_pixels -static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_BFE(mm6); __asm__ volatile( @@ -107,7 +107,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t :"memory"); } -static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_BFE(mm6); __asm__ volatile( @@ -202,7 +202,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t :"memory"); } -static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_BFE(mm6); __asm__ volatile( @@ -231,7 +231,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line :REG_a, "memory"); } -static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_ZERO(mm7); SET_RND(mm6); // =2 for rnd and =1 for no_rnd version @@ -298,7 +298,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin } // avg_pixels -static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_BFE(mm6); JUMPALIGN(); @@ -319,7 +319,7 @@ static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, i #ifndef NO_RND // in case more speed is needed - unroling would certainly help -static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_BFE(mm6); JUMPALIGN(); @@ -339,7 +339,7 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si } #endif // NO_RND -static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_BFE(mm6); JUMPALIGN(); @@ -363,7 +363,7 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s } #ifndef NO_RND -static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_BFE(mm6); JUMPALIGN(); @@ -405,7 +405,7 @@ static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t } while (--h); } -static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_BFE(mm6); JUMPALIGN(); @@ -458,7 +458,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t } while (--h); } -static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_BFE(mm6); __asm__ volatile( @@ -498,7 +498,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line } // this routine is 'slightly' suboptimal but mostly unused -static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { MOVQ_ZERO(mm7); SET_RND(mm6); // =2 for rnd and =1 for no_rnd version @@ -573,22 +573,22 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin } //FIXME optimize -static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){ DEF(put, pixels8_y2)(block , pixels , line_size, h); DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h); } -static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){ DEF(put, pixels8_xy2)(block , pixels , line_size, h); DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h); } -static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){ DEF(avg, pixels8_y2)(block , pixels , line_size, h); DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h); } -static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){ DEF(avg, pixels8_xy2)(block , pixels , line_size, h); DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h); } diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm index 920ae67630..d38186c857 100644 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@ -25,10 +25,9 @@ cextern pb_1 SECTION_TEXT -; put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro PUT_PIXELS8_X2 0 cglobal put_pixels8_x2, 4,5 - movsxdifnidn r2, r2d lea r4, [r2*2] .loop: mova m0, [r1] @@ -58,10 +57,9 @@ INIT_MMX 3dnow PUT_PIXELS8_X2 -; put_pixels16_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; put_pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro PUT_PIXELS_16 0 cglobal put_pixels16_x2, 4,5 - movsxdifnidn r2, r2d lea r4, [r2*2] .loop: mova m0, [r1] @@ -103,11 +101,10 @@ INIT_MMX 3dnow PUT_PIXELS_16 -; put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro PUT_NO_RND_PIXELS8_X2 0 cglobal put_no_rnd_pixels8_x2, 4,5 mova m6, [pb_1] - movsxdifnidn r2, r2d lea r4, [r2*2] .loop: mova m0, [r1] @@ -145,10 +142,9 @@ INIT_MMX 3dnow PUT_NO_RND_PIXELS8_X2 -; put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro PUT_NO_RND_PIXELS8_X2_EXACT 0 cglobal put_no_rnd_pixels8_x2_exact, 4,5 - movsxdifnidn r2, r2d lea r4, [r2*3] pcmpeqb m6, m6 .loop: @@ -193,10 +189,9 @@ INIT_MMX 3dnow PUT_NO_RND_PIXELS8_X2_EXACT -; put_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro PUT_PIXELS8_Y2 0 cglobal put_pixels8_y2, 4,5 - movsxdifnidn r2, r2d lea r4, [r2*2] mova m0, [r1] sub r0, r2 @@ -228,11 +223,10 @@ INIT_MMX 3dnow PUT_PIXELS8_Y2 -; put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro PUT_NO_RND_PIXELS8_Y2 0 cglobal put_no_rnd_pixels8_y2, 4,5 mova m6, [pb_1] - movsxdifnidn r2, r2d lea r4, [r2+r2] mova m0, [r1] sub r0, r2 @@ -266,10 +260,9 @@ INIT_MMX 3dnow PUT_NO_RND_PIXELS8_Y2 -; put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 cglobal put_no_rnd_pixels8_y2_exact, 4,5 - movsxdifnidn r2, r2d lea r4, [r2*3] mova m0, [r1] pcmpeqb m6, m6 @@ -309,10 +302,9 @@ INIT_MMX 3dnow PUT_NO_RND_PIXELS8_Y2_EXACT -; avg_pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro AVG_PIXELS8 0 cglobal avg_pixels8, 4,5 - movsxdifnidn r2, r2d lea r4, [r2*2] .loop: mova m0, [r0] @@ -340,10 +332,9 @@ INIT_MMX 3dnow AVG_PIXELS8 -; avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro AVG_PIXELS8_X2 0 cglobal avg_pixels8_x2, 4,5 - movsxdifnidn r2, r2d lea r4, [r2*2] .loop: mova m0, [r1] @@ -377,10 +368,9 @@ INIT_MMX 3dnow AVG_PIXELS8_X2 -; avg_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; avg_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro AVG_PIXELS8_Y2 0 cglobal avg_pixels8_y2, 4,5 - movsxdifnidn r2, r2d lea r4, [r2*2] mova m0, [r1] sub r0, r2 @@ -420,11 +410,10 @@ INIT_MMX 3dnow AVG_PIXELS8_Y2 -; avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro AVG_PIXELS8_XY2 0 cglobal avg_pixels8_xy2, 4,5 mova m6, [pb_1] - movsxdifnidn r2, r2d lea r4, [r2*2] mova m0, [r1] pavgb m0, [r1+1] @@ -465,9 +454,8 @@ INIT_MMX 3dnow AVG_PIXELS8_XY2 INIT_XMM sse2 -; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) cglobal put_pixels16, 4,5,4 - movsxdifnidn r2, r2d lea r4, [r2*3] .loop: movu m0, [r1] @@ -484,9 +472,8 @@ cglobal put_pixels16, 4,5,4 jnz .loop REP_RET -; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) cglobal avg_pixels16, 4,5,4 - movsxdifnidn r2, r2d lea r4, [r2*3] .loop: movu m0, [r1] |