diff options
-rw-r--r-- | postproc/rgb2rgb.c | 13 | ||||
-rw-r--r-- | postproc/rgb2rgb.h | 1 | ||||
-rw-r--r-- | postproc/rgb2rgb_template.c | 89 | ||||
-rw-r--r-- | postproc/swscale.c | 42 |
4 files changed, 143 insertions, 2 deletions
diff --git a/postproc/rgb2rgb.c b/postproc/rgb2rgb.c index 962a58945f..3878e4835f 100644 --- a/postproc/rgb2rgb.c +++ b/postproc/rgb2rgb.c @@ -512,6 +512,19 @@ void yvu9toyv12(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, #endif } +void planar2x(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride) +{ +#ifdef CAN_COMPILE_X86_ASM + // ordered per speed fasterst first + if(gCpuCaps.hasMMX2) + planar2x_MMX2(src, dst, width, height, srcStride, dstStride); + else if(gCpuCaps.has3DNow) + planar2x_3DNow(src, dst, width, height, srcStride, dstStride); + else +#endif + planar2x_C(src, dst, width, height, srcStride, dstStride); +} + /** * * height should be a multiple of 2 and width should be a multiple of 2 (if this is a diff --git a/postproc/rgb2rgb.h b/postproc/rgb2rgb.h index 9fb6da6ef1..a0ce006103 100644 --- a/postproc/rgb2rgb.h +++ b/postproc/rgb2rgb.h @@ -41,6 +41,7 @@ extern void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t extern void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, unsigned int width, unsigned int height, unsigned int lumStride, unsigned int chromStride, unsigned int srcStride); +extern void planar2x(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride); extern void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst, unsigned width, unsigned height, unsigned src1Stride, diff --git a/postproc/rgb2rgb_template.c b/postproc/rgb2rgb_template.c index 015e7f2d56..b6c26a11ee 100644 --- a/postproc/rgb2rgb_template.c +++ b/postproc/rgb2rgb_template.c @@ -1295,6 +1295,95 @@ static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, /* XXX: implement upscaling for U,V */ } +static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) +{ + int x,y; + + // first line + for(x=0; x<srcWidth; x++){ + dst[2*x+0]= + dst[2*x+1]= src[x]; + } + dst+= dstStride; + + for(y=1; y<srcHeight; y++){ +#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) + const int mmxSize= srcWidth; + asm volatile( + "movl %4, %%eax \n\t" + "1: \n\t" + "movq (%0, %%eax), %%mm0 \n\t" + "movq (%1, %%eax), %%mm1 \n\t" + "movq 1(%0, %%eax), %%mm2 \n\t" + "movq 1(%1, %%eax), %%mm3 \n\t" + "movq %%mm0, %%mm4 \n\t" + "movq %%mm1, %%mm5 \n\t" + PAVGB" %%mm3, %%mm0 \n\t" + PAVGB" %%mm3, %%mm0 \n\t" + PAVGB" %%mm4, %%mm3 \n\t" + PAVGB" %%mm4, %%mm3 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + PAVGB" %%mm5, %%mm2 \n\t" + PAVGB" %%mm5, %%mm2 \n\t" + "movq %%mm3, %%mm4 \n\t" + "movq %%mm2, %%mm5 \n\t" + "punpcklbw %%mm1, %%mm3 \n\t" + "punpckhbw %%mm1, %%mm4 \n\t" + "punpcklbw %%mm0, %%mm2 \n\t" + "punpckhbw %%mm0, %%mm5 \n\t" +#if 1 + MOVNTQ" %%mm3, (%2, %%eax, 2) \n\t" + MOVNTQ" %%mm4, 8(%2, %%eax, 2) \n\t" + MOVNTQ" %%mm2, (%3, %%eax, 2) \n\t" + MOVNTQ" %%mm5, 8(%3, %%eax, 2) \n\t" +#else + "movq %%mm3, (%2, %%eax, 2) \n\t" + "movq %%mm4, 8(%2, %%eax, 2) \n\t" + "movq %%mm2, (%3, %%eax, 2) \n\t" + "movq %%mm5, 8(%3, %%eax, 2) \n\t" +#endif + "addl $8, %%eax \n\t" + " js 1b \n\t" + :: "r" (src + mmxSize-1), "r" (src + srcStride + mmxSize-1), + "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), + "g" (-mmxSize) + : "%eax" + + ); + dst[0]= + dst[dstStride]= src[0]; +#else + dst[0]= + dst[dstStride]= src[0]; + + for(x=0; x<srcWidth-1; x++){ + dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; + dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; + dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; + dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; + } +#endif + dst[srcWidth*2 -1]= + dst[srcWidth*2 -1 + dstStride]= src[srcWidth-1]; + + dst+=dstStride*2; + src+=srcStride; + } + src-=srcStride; + + // last line + for(x=0; x<srcWidth; x++){ + dst[2*x+0]= + dst[2*x+1]= src[x]; + } +#ifdef HAVE_MMX +asm volatile( EMMS" \n\t" + SFENCE" \n\t" + :::"memory"); +#endif +} + /** * * height should be a multiple of 2 and width should be a multiple of 16 (if this is a diff --git a/postproc/swscale.c b/postproc/swscale.c index 0f58c4d756..8a4223a7fa 100644 --- a/postproc/swscale.c +++ b/postproc/swscale.c @@ -1777,6 +1777,34 @@ static void bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], i dstStride[0], dstStride[1], srcStride[0]); } +static void yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + int i; + + /* copy Y */ + if(srcStride[0]==dstStride[0]) + memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH); + else{ + uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + + for(i=0; i<srcSliceH; i++) + { + memcpy(dstPtr, srcPtr, c->srcW); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } + + if(c->dstFormat==IMGFMT_YV12){ + planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]); + planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]); + }else{ + planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]); + planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]); + } +} + /** * bring pointers in YUV order instead of YVU */ @@ -2051,7 +2079,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, vo_format_name(srcFormat), vo_format_name(dstFormat)); return c; } -#if 1 + /* simple copy */ if( srcFormat == dstFormat || (srcFormat==IMGFMT_YV12 && dstFormat==IMGFMT_I420) @@ -2067,7 +2095,17 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, vo_format_name(srcFormat), vo_format_name(dstFormat)); return c; } -#endif + + if( srcFormat==IMGFMT_YVU9 && (dstFormat==IMGFMT_YV12 || dstFormat==IMGFMT_I420) ) + { + c->swScale= yvu9toyv12Wrapper; + + if(flags&SWS_PRINT_INFO) + MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", + vo_format_name(srcFormat), vo_format_name(dstFormat)); + return c; + } + /* bgr32to24 & rgb32to24*/ if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR24) ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB24)) |