diff options
author | Fred <foohoo@shaw.ca> | 2002-10-30 09:09:34 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2002-10-30 09:09:34 +0000 |
commit | 5981f4e6930d134d6b3d11546cd3ff49ed6d09cf (patch) | |
tree | 21b481d045c20e42fe6f060597f0ddaf1b92707a /libavcodec/imgconvert.c | |
parent | 2727c35ed76c3f82e395a82db09b258203b0d54c (diff) | |
download | ffmpeg-5981f4e6930d134d6b3d11546cd3ff49ed6d09cf.tar.gz |
1) Add MMX deinterlace code.
2) "Fix" first and last line deinterlace. I had second-thoughts that this might be some image filtering algorithm that someone cleverer than I created.
3) Add in-place deinterlace functions (only used when src == dst).
patch by (Fred <foohoo at shaw dot ca>)
Originally committed as revision 1113 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/imgconvert.c')
-rw-r--r-- | libavcodec/imgconvert.c | 232 |
1 files changed, 181 insertions, 51 deletions
diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c index 45bc540568..854ea50ddb 100644 --- a/libavcodec/imgconvert.c +++ b/libavcodec/imgconvert.c @@ -22,6 +22,10 @@ #ifdef USE_FASTMEMCPY #include "fastmemcpy.h" #endif + +#ifdef HAVE_MMX +#include "i386/mmx.h" +#endif /* XXX: totally non optimized */ static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, @@ -762,77 +766,182 @@ int img_convert(AVPicture *dst, int dst_pix_fmt, return 0; } + +#ifdef HAVE_MMX +#define DEINT_INPLACE_LINE_LUM \ + movd_m2r(lum_m4[0],mm0);\ + movd_m2r(lum_m3[0],mm1);\ + movd_m2r(lum_m2[0],mm2);\ + movd_m2r(lum_m1[0],mm3);\ + movd_m2r(lum[0],mm4);\ + punpcklbw_r2r(mm7,mm0);\ + movd_r2m(mm2,lum_m4[0]);\ + punpcklbw_r2r(mm7,mm1);\ + punpcklbw_r2r(mm7,mm2);\ + punpcklbw_r2r(mm7,mm3);\ + punpcklbw_r2r(mm7,mm4);\ + paddw_r2r(mm3,mm1);\ + psllw_i2r(1,mm2);\ + paddw_r2r(mm4,mm0);\ + psllw_i2r(2,mm1);\ + paddw_r2r(mm6,mm2);\ + paddw_r2r(mm2,mm1);\ + psubusw_r2r(mm0,mm1);\ + psrlw_i2r(3,mm1);\ + packuswb_r2r(mm7,mm1);\ + movd_r2m(mm1,lum_m2[0]); + +#define DEINT_LINE_LUM \ + movd_m2r(lum_m4[0],mm0);\ + movd_m2r(lum_m3[0],mm1);\ + movd_m2r(lum_m2[0],mm2);\ + movd_m2r(lum_m1[0],mm3);\ + movd_m2r(lum[0],mm4);\ + punpcklbw_r2r(mm7,mm0);\ + punpcklbw_r2r(mm7,mm1);\ + punpcklbw_r2r(mm7,mm2);\ + punpcklbw_r2r(mm7,mm3);\ + punpcklbw_r2r(mm7,mm4);\ + paddw_r2r(mm3,mm1);\ + psllw_i2r(1,mm2);\ + paddw_r2r(mm4,mm0);\ + psllw_i2r(2,mm1);\ + paddw_r2r(mm6,mm2);\ + paddw_r2r(mm2,mm1);\ + psubusw_r2r(mm0,mm1);\ + psrlw_i2r(3,mm1);\ + packuswb_r2r(mm7,mm1);\ + movd_r2m(mm1,dst[0]); +#endif + /* filter parameters: [-1 4 2 4 -1] // 8 */ -static void deinterlace_line(UINT8 *dst, UINT8 *src, int src_wrap, - int size) +static void deinterlace_line(UINT8 *dst, UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum, + int size) { +#ifndef HAVE_MMX UINT8 *cm = cropTbl + MAX_NEG_CROP; int sum; - UINT8 *s; for(;size > 0;size--) { - s = src; - sum = -s[0]; - s += src_wrap; - sum += s[0] << 2; - s += src_wrap; - sum += s[0] << 1; - s += src_wrap; - sum += s[0] << 2; - s += src_wrap; - sum += -s[0]; + sum = -lum_m4[0]; + sum += lum_m3[0] << 2; + sum += lum_m2[0] << 1; + sum += lum_m1[0] << 2; + sum += -lum[0]; dst[0] = cm[(sum + 4) >> 3]; + lum_m4++; + lum_m3++; + lum_m2++; + lum_m1++; + lum++; dst++; - src++; } +#else + + for (;size > 3; size-=4) { + DEINT_LINE_LUM + lum_m4+=4; + lum_m3+=4; + lum_m2+=4; + lum_m1+=4; + lum+=4; + dst+=4; + } +#endif +} +static void deinterlace_line_inplace(UINT8 *lum_m4, UINT8 *lum_m3, UINT8 *lum_m2, UINT8 *lum_m1, UINT8 *lum, + int size) +{ +#ifndef HAVE_MMX + UINT8 *cm = cropTbl + MAX_NEG_CROP; + int sum; + + for(;size > 0;size--) { + sum = -lum_m4[0]; + sum += lum_m3[0] << 2; + sum += lum_m2[0] << 1; + lum_m4[0]=lum_m2[0]; + sum += lum_m1[0] << 2; + sum += -lum[0]; + lum_m2[0] = cm[(sum + 4) >> 3]; + lum_m4++; + lum_m3++; + lum_m2++; + lum_m1++; + lum++; + } +#else + + for (;size > 3; size-=4) { + DEINT_INPLACE_LINE_LUM + lum_m4+=4; + lum_m3+=4; + lum_m2+=4; + lum_m1+=4; + lum+=4; + } +#endif } /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The top field is copied as is, but the bottom field is deinterlaced against the top field. */ static void deinterlace_bottom_field(UINT8 *dst, int dst_wrap, - UINT8 *src1, int src_wrap, - int width, int height) + UINT8 *src1, int src_wrap, + int width, int height) { - UINT8 *src, *ptr; - int y, y1, i; - UINT8 *buf; - - buf = (UINT8*)av_malloc(5 * width); - - src = src1; - for(y=0;y<height;y+=2) { - /* copy top field line */ - memcpy(dst, src, width); + UINT8 *src_m2, *src_m1, *src_0, *src_p1, *src_p2; + int y; + + src_m2 = src1; + src_m1 = src1; + src_0=&src_m1[src_wrap]; + src_p1=&src_0[src_wrap]; + src_p2=&src_p1[src_wrap]; + for(y=0;y<(height-2);y+=2) { + memcpy(dst,src_m1,width); dst += dst_wrap; - src += (1 - 2) * src_wrap; - y1 = y - 2; - if (y1 >= 0 && (y1 + 4) < height) { - /* fast case : no edges */ - deinterlace_line(dst, src, src_wrap, width); - } else { - /* in order to use the same function, we use an intermediate buffer */ - ptr = buf; - for(i=0;i<5;i++) { - if (y1 < 0) - memcpy(ptr, src1, width); - else if (y1 >= height) - memcpy(ptr, src1 + (height - 1) * src_wrap, width); - else - memcpy(ptr, src1 + y1 * src_wrap, width); - y1++; - ptr += width; - } - deinterlace_line(dst, buf, width, width); - } + deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width); + src_m2 = src_0; + src_m1 = src_p1; + src_0 = src_p2; + src_p1 += 2*src_wrap; + src_p2 += 2*src_wrap; dst += dst_wrap; - src += (2 + 1) * src_wrap; } + memcpy(dst,src_m1,width); + dst += dst_wrap; + /* do last line */ + deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width); +} + +static void deinterlace_bottom_field_inplace(UINT8 *src1, int src_wrap, + int width, int height) +{ + UINT8 *src_m1, *src_0, *src_p1, *src_p2; + int y; + UINT8 *buf; + buf = (UINT8*)av_malloc(width); + + src_m1 = src1; + memcpy(buf,src_m1,width); + src_0=&src_m1[src_wrap]; + src_p1=&src_0[src_wrap]; + src_p2=&src_p1[src_wrap]; + for(y=0;y<(height-2);y+=2) { + deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width); + src_m1 = src_p1; + src_0 = src_p2; + src_p1 += 2*src_wrap; + src_p2 += 2*src_wrap; + } + /* do last line */ + deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width); av_free(buf); } -/* deinterlace, return -1 if format not handled */ +/* deinterlace - if not supported return -1 */ int avpicture_deinterlace(AVPicture *dst, AVPicture *src, int pix_fmt, int width, int height) { @@ -842,8 +951,21 @@ int avpicture_deinterlace(AVPicture *dst, AVPicture *src, pix_fmt != PIX_FMT_YUV422P && pix_fmt != PIX_FMT_YUV444P) return -1; - if ((width & 1) != 0 || (height & 3) != 0) + if ((width & 3) != 0 || (height & 3) != 0) return -1; + +#ifdef HAVE_MMX + { + mmx_t rounder; + rounder.uw[0]=4; + rounder.uw[1]=4; + rounder.uw[2]=4; + rounder.uw[3]=4; + pxor_r2r(mm7,mm7); + movq_m2r(rounder,mm6); + } +#endif + for(i=0;i<3;i++) { if (i == 1) { @@ -859,10 +981,18 @@ int avpicture_deinterlace(AVPicture *dst, AVPicture *src, break; } } - deinterlace_bottom_field(dst->data[i], dst->linesize[i], - src->data[i], src->linesize[i], + if (src == dst) { + deinterlace_bottom_field_inplace(src->data[i], src->linesize[i], width, height); + } else { + deinterlace_bottom_field(dst->data[i],dst->linesize[i], + src->data[i], src->linesize[i], + width, height); + } } +#ifdef HAVE_MMX + emms(); +#endif return 0; } |