diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2003-10-13 17:27:30 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2003-10-13 17:27:30 +0000 |
commit | 84705403444dc440a41d6aea2d49bac7bdfdedff (patch) | |
tree | 4250e821d043be1a0bf9849e8c768816a9631238 /libavcodec | |
parent | 8c812d73178de487c658e343505d2212ba55bcb1 (diff) | |
download | ffmpeg-84705403444dc440a41d6aea2d49bac7bdfdedff.tar.gz |
mmx2 optimization of huffyuv median encoding
Originally committed as revision 2372 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/dsputil.c | 19 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 5 | ||||
-rw-r--r-- | libavcodec/huffyuv.c | 33 | ||||
-rw-r--r-- | libavcodec/i386/dsputil_mmx.c | 39 |
4 files changed, 70 insertions, 26 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 9e7cca734f..270424706a 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2526,6 +2526,24 @@ static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ dst[i+0] = src1[i+0]-src2[i+0]; } +static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ + int i; + uint8_t l, lt; + + l= *left; + lt= *left_top; + + for(i=0; i<w; i++){ + const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); + lt= src1[i]; + l= src2[i]; + dst[i]= l - pred; + } + + *left= l; + *left_top= lt; +} + #define BUTTERFLY2(o1,o2,i1,i2) \ o1= (i1)+(i2);\ o2= (i1)-(i2); @@ -3007,6 +3025,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; + c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; c->bswap_buf= bswap_buf; #ifdef HAVE_MMX diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 402b99091a..0cc6b9be0f 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -234,6 +234,11 @@ typedef struct DSPContext { /* huffyuv specific */ void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); + /** + * subtract huffyuv's variant of median prediction + * note, this might read from src1[-1], src2[-1] + */ + void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top); void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w); /* (I)DCT */ diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c index a6220f0ac8..ee9790d989 100644 --- a/libavcodec/huffyuv.c +++ b/libavcodec/huffyuv.c @@ -153,25 +153,6 @@ static inline void add_median_prediction(uint8_t *dst, uint8_t *src1, uint8_t *d *left_top= lt; } -//FIXME optimize -static inline void sub_median_prediction(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ - int i; - uint8_t l, lt; - - l= *left; - lt= *left_top; - - for(i=0; i<w; i++){ - const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); - lt= src1[i]; - l= src2[i]; - dst[i]= l - pred; - } - - *left= l; - *left_top= lt; -} - static inline void add_left_prediction_bgr32(uint8_t *dst, uint8_t *src, int w, int *red, int *green, int *blue){ int i; int r,g,b; @@ -999,9 +980,9 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, lefttopy= p->data[0][3]; lefttopu= p->data[1][1]; lefttopv= p->data[2][1]; - sub_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy); - sub_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu); - sub_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv); + s->dsp.sub_hfyu_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy); + s->dsp.sub_hfyu_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu); + s->dsp.sub_hfyu_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv); encode_422_bitstream(s, width-4); y++; cy++; @@ -1011,7 +992,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, if(s->bitstream_bpp==12){ while(2*cy > y){ ydst= p->data[0] + p->linesize[0]*y; - sub_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); + s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); encode_gray_bitstream(s, width); y++; } @@ -1021,9 +1002,9 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, udst= p->data[1] + p->linesize[1]*cy; vdst= p->data[2] + p->linesize[2]*cy; - sub_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); - sub_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); - sub_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); + s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); + s->dsp.sub_hfyu_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); + s->dsp.sub_hfyu_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); encode_422_bitstream(s, width); } diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index adf27d4a11..2447ab6abf 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -583,6 +583,43 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ for(; i<w; i++) dst[i+0] = src1[i+0]-src2[i+0]; } + +static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ + int i=0; + uint8_t l, lt; + + asm volatile( + "1: \n\t" + "movq -1(%1, %0), %%mm0 \n\t" // LT + "movq (%1, %0), %%mm1 \n\t" // T + "movq -1(%2, %0), %%mm2 \n\t" // L + "movq (%2, %0), %%mm3 \n\t" // X + "movq %%mm2, %%mm4 \n\t" // L + "psubb %%mm0, %%mm2 \n\t" + "paddb %%mm1, %%mm2 \n\t" // L + T - LT + "movq %%mm4, %%mm5 \n\t" // L + "pmaxub %%mm1, %%mm4 \n\t" // max(T, L) + "pminub %%mm5, %%mm1 \n\t" // min(T, L) + "pminub %%mm2, %%mm4 \n\t" + "pmaxub %%mm1, %%mm4 \n\t" + "psubb %%mm4, %%mm3 \n\t" // dst - pred + "movq %%mm3, (%3, %0) \n\t" + "addl $8, %0 \n\t" + "cmpl %4, %0 \n\t" + " jb 1b \n\t" + : "+r" (i) + : "r"(src1), "r"(src2), "r"(dst), "r"(w) + ); + + l= *left; + lt= *left_top; + + dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF); + + *left_top= src1[w-1]; + *left = src2[w-1]; +} + #define LBUTTERFLY2(a1,b1,a2,b2)\ "paddw " #b1 ", " #a1 " \n\t"\ "paddw " #b2 ", " #a2 " \n\t"\ @@ -1699,6 +1736,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2) #endif + + c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; } else if (mm_flags & MM_3DNOW) { c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; |