aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Melanson <mike@multimedia.cx>2004-04-27 03:58:06 +0000
committerMike Melanson <mike@multimedia.cx>2004-04-27 03:58:06 +0000
commitf9ed9d8584d762142cf5e579b38bfe649cc5c8e8 (patch)
tree790114f1ba563c9f673933792b8f5f8811cf656e
parentc0c37848d8c571b13c5fe443f6d0811ac2d3cc36 (diff)
downloadffmpeg-f9ed9d8584d762142cf5e579b38bfe649cc5c8e8.tar.gz
separate out put_signed_pixels_clamped() into its own function and
implement an optimized MMX version of the function Originally committed as revision 3082 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/dsputil.c22
-rw-r--r--libavcodec/dsputil.h2
-rw-r--r--libavcodec/i386/dsputil_mmx.c19
-rw-r--r--libavcodec/vp3.c23
4 files changed, 46 insertions, 20 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 7b554b1fd0..b1252251ad 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -332,6 +332,27 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
}
}
+static void put_signed_pixels_clamped_c(const DCTELEM *block,
+ uint8_t *restrict pixels,
+ int line_size)
+{
+ int i, j;
+
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < 8; j++) {
+ if (*block < -128)
+ *pixels = 0;
+ else if (*block > 127)
+ *pixels = 255;
+ else
+ *pixels = (uint8_t)(*block + 128);
+ block++;
+ pixels++;
+ }
+ pixels += (line_size - 8);
+ }
+}
+
static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
int line_size)
{
@@ -3131,6 +3152,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->get_pixels = get_pixels_c;
c->diff_pixels = diff_pixels_c;
c->put_pixels_clamped = put_pixels_clamped_c;
+ c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
c->add_pixels_clamped = add_pixels_clamped_c;
c->gmc1 = gmc1_c;
c->gmc = gmc_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 0307dbd6ab..3681541f5a 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -137,6 +137,7 @@ typedef struct DSPContext {
void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+ void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
/**
* translational global motion compensation.
@@ -374,6 +375,7 @@ extern int mm_flags;
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
static inline void emms(void)
{
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 61bfc89ac5..d117b0ca86 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -22,6 +22,7 @@
#include "../dsputil.h"
#include "../simple_idct.h"
+#include "mmx.h"
//#undef NDEBUG
//#include <assert.h>
@@ -293,6 +294,23 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
:"memory");
}
+void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+ int i;
+ unsigned char __align8 vector128[8] =
+ { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
+
+ movq_m2r(*vector128, mm1);
+ for (i = 0; i < 8; i++) {
+ movq_m2r(*(block), mm0);
+ packsswb_m2r(*(block + 4), mm0);
+ block += 8;
+ paddb_r2r(mm1, mm0);
+ movq_r2m(mm0, *pixels);
+ pixels += line_size;
+ }
+}
+
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
{
const DCTELEM *p;
@@ -2160,6 +2178,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->diff_pixels = diff_pixels_mmx;
#endif //CONFIG_ENCODERS
c->put_pixels_clamped = put_pixels_clamped_mmx;
+ c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
c->add_pixels_clamped = add_pixels_clamped_mmx;
c->clear_blocks = clear_blocks_mmx;
#ifdef CONFIG_ENCODERS
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index cf22ee6ce0..59d183505e 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -2061,10 +2061,6 @@ static void render_fragments(Vp3DecodeContext *s,
int motion_halfpel_index;
uint8_t *motion_source;
- int16_t *op;
- uint8_t *dest;
- int j, k;
-
debug_vp3(" vp3: rendering final fragments for %s\n",
(plane == 0) ? "Y plane" : (plane == 1) ? "U plane" : "V plane");
@@ -2186,22 +2182,9 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x,
s->all_fragments[i].coeff_count,
output_samples);
if (s->all_fragments[i].coding_method == MODE_INTRA) {
- /* this really needs to be optimized sooner or later */
- op = output_samples;
- dest = output_plane + s->all_fragments[i].first_pixel;
- for (j = 0; j < 8; j++) {
- for (k = 0; k < 8; k++) {
- if (*op < -128)
- *dest = 0;
- else if (*op > 127)
- *dest = 255;
- else
- *dest = (uint8_t)(*op + 128);
- op++;
- dest++;
- }
- dest += (stride - 8);
- }
+ s->dsp.put_signed_pixels_clamped(output_samples,
+ output_plane + s->all_fragments[i].first_pixel,
+ stride);
} else {
s->dsp.add_pixels_clamped(output_samples,
output_plane + s->all_fragments[i].first_pixel,