diff options
author | Mike Melanson <mike@multimedia.cx> | 2004-04-27 03:58:06 +0000 |
---|---|---|
committer | Mike Melanson <mike@multimedia.cx> | 2004-04-27 03:58:06 +0000 |
commit | f9ed9d8584d762142cf5e579b38bfe649cc5c8e8 (patch) | |
tree | 790114f1ba563c9f673933792b8f5f8811cf656e | |
parent | c0c37848d8c571b13c5fe443f6d0811ac2d3cc36 (diff) | |
download | ffmpeg-f9ed9d8584d762142cf5e579b38bfe649cc5c8e8.tar.gz |
separate out put_signed_pixels_clamped() into its own function and
implement an optimized MMX version of the function
Originally committed as revision 3082 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/dsputil.c | 22 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 2 | ||||
-rw-r--r-- | libavcodec/i386/dsputil_mmx.c | 19 | ||||
-rw-r--r-- | libavcodec/vp3.c | 23 |
4 files changed, 46 insertions, 20 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 7b554b1fd0..b1252251ad 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -332,6 +332,27 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, } } +static void put_signed_pixels_clamped_c(const DCTELEM *block, + uint8_t *restrict pixels, + int line_size) +{ + int i, j; + + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + if (*block < -128) + *pixels = 0; + else if (*block > 127) + *pixels = 255; + else + *pixels = (uint8_t)(*block + 128); + block++; + pixels++; + } + pixels += (line_size - 8); + } +} + static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size) { @@ -3131,6 +3152,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->get_pixels = get_pixels_c; c->diff_pixels = diff_pixels_c; c->put_pixels_clamped = put_pixels_clamped_c; + c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; c->add_pixels_clamped = add_pixels_clamped_c; c->gmc1 = gmc1_c; c->gmc = gmc_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 0307dbd6ab..3681541f5a 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -137,6 +137,7 @@ typedef struct DSPContext { void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); + void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); /** * translational global motion compensation. @@ -374,6 +375,7 @@ extern int mm_flags; void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); +void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); static inline void emms(void) { diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 61bfc89ac5..d117b0ca86 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -22,6 +22,7 @@ #include "../dsputil.h" #include "../simple_idct.h" +#include "mmx.h" //#undef NDEBUG //#include <assert.h> @@ -293,6 +294,23 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size :"memory"); } +void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) +{ + int i; + unsigned char __align8 vector128[8] = + { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; + + movq_m2r(*vector128, mm1); + for (i = 0; i < 8; i++) { + movq_m2r(*(block), mm0); + packsswb_m2r(*(block + 4), mm0); + block += 8; + paddb_r2r(mm1, mm0); + movq_r2m(mm0, *pixels); + pixels += line_size; + } +} + void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) { const DCTELEM *p; @@ -2160,6 +2178,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->diff_pixels = diff_pixels_mmx; #endif //CONFIG_ENCODERS c->put_pixels_clamped = put_pixels_clamped_mmx; + c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; c->add_pixels_clamped = add_pixels_clamped_mmx; c->clear_blocks = clear_blocks_mmx; #ifdef CONFIG_ENCODERS diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c index cf22ee6ce0..59d183505e 100644 --- a/libavcodec/vp3.c +++ b/libavcodec/vp3.c @@ -2061,10 +2061,6 @@ static void render_fragments(Vp3DecodeContext *s, int motion_halfpel_index; uint8_t *motion_source; - int16_t *op; - uint8_t *dest; - int j, k; - debug_vp3(" vp3: rendering final fragments for %s\n", (plane == 0) ? "Y plane" : (plane == 1) ? "U plane" : "V plane"); @@ -2186,22 +2182,9 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x, s->all_fragments[i].coeff_count, output_samples); if (s->all_fragments[i].coding_method == MODE_INTRA) { - /* this really needs to be optimized sooner or later */ - op = output_samples; - dest = output_plane + s->all_fragments[i].first_pixel; - for (j = 0; j < 8; j++) { - for (k = 0; k < 8; k++) { - if (*op < -128) - *dest = 0; - else if (*op > 127) - *dest = 255; - else - *dest = (uint8_t)(*op + 128); - op++; - dest++; - } - dest += (stride - 8); - } + s->dsp.put_signed_pixels_clamped(output_samples, + output_plane + s->all_fragments[i].first_pixel, + stride); } else { s->dsp.add_pixels_clamped(output_samples, output_plane + s->all_fragments[i].first_pixel, |