aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--postproc/rgb2rgb.c13
-rw-r--r--postproc/rgb2rgb.h1
-rw-r--r--postproc/rgb2rgb_template.c89
-rw-r--r--postproc/swscale.c42
4 files changed, 143 insertions, 2 deletions
diff --git a/postproc/rgb2rgb.c b/postproc/rgb2rgb.c
index 962a58945f..3878e4835f 100644
--- a/postproc/rgb2rgb.c
+++ b/postproc/rgb2rgb.c
@@ -512,6 +512,19 @@ void yvu9toyv12(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
#endif
}
+void planar2x(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride)
+{
+#ifdef CAN_COMPILE_X86_ASM
+ // ordered per speed fasterst first
+ if(gCpuCaps.hasMMX2)
+ planar2x_MMX2(src, dst, width, height, srcStride, dstStride);
+ else if(gCpuCaps.has3DNow)
+ planar2x_3DNow(src, dst, width, height, srcStride, dstStride);
+ else
+#endif
+ planar2x_C(src, dst, width, height, srcStride, dstStride);
+}
+
/**
*
* height should be a multiple of 2 and width should be a multiple of 2 (if this is a
diff --git a/postproc/rgb2rgb.h b/postproc/rgb2rgb.h
index 9fb6da6ef1..a0ce006103 100644
--- a/postproc/rgb2rgb.h
+++ b/postproc/rgb2rgb.h
@@ -41,6 +41,7 @@ extern void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t
extern void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
unsigned int width, unsigned int height,
unsigned int lumStride, unsigned int chromStride, unsigned int srcStride);
+extern void planar2x(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride);
extern void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
unsigned width, unsigned height, unsigned src1Stride,
diff --git a/postproc/rgb2rgb_template.c b/postproc/rgb2rgb_template.c
index 015e7f2d56..b6c26a11ee 100644
--- a/postproc/rgb2rgb_template.c
+++ b/postproc/rgb2rgb_template.c
@@ -1295,6 +1295,95 @@ static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc,
/* XXX: implement upscaling for U,V */
}
+static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
+{
+ int x,y;
+
+ // first line
+ for(x=0; x<srcWidth; x++){
+ dst[2*x+0]=
+ dst[2*x+1]= src[x];
+ }
+ dst+= dstStride;
+
+ for(y=1; y<srcHeight; y++){
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+ const int mmxSize= srcWidth;
+ asm volatile(
+ "movl %4, %%eax \n\t"
+ "1: \n\t"
+ "movq (%0, %%eax), %%mm0 \n\t"
+ "movq (%1, %%eax), %%mm1 \n\t"
+ "movq 1(%0, %%eax), %%mm2 \n\t"
+ "movq 1(%1, %%eax), %%mm3 \n\t"
+ "movq %%mm0, %%mm4 \n\t"
+ "movq %%mm1, %%mm5 \n\t"
+ PAVGB" %%mm3, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm0 \n\t"
+ PAVGB" %%mm4, %%mm3 \n\t"
+ PAVGB" %%mm4, %%mm3 \n\t"
+ PAVGB" %%mm2, %%mm1 \n\t"
+ PAVGB" %%mm2, %%mm1 \n\t"
+ PAVGB" %%mm5, %%mm2 \n\t"
+ PAVGB" %%mm5, %%mm2 \n\t"
+ "movq %%mm3, %%mm4 \n\t"
+ "movq %%mm2, %%mm5 \n\t"
+ "punpcklbw %%mm1, %%mm3 \n\t"
+ "punpckhbw %%mm1, %%mm4 \n\t"
+ "punpcklbw %%mm0, %%mm2 \n\t"
+ "punpckhbw %%mm0, %%mm5 \n\t"
+#if 1
+ MOVNTQ" %%mm3, (%2, %%eax, 2) \n\t"
+ MOVNTQ" %%mm4, 8(%2, %%eax, 2) \n\t"
+ MOVNTQ" %%mm2, (%3, %%eax, 2) \n\t"
+ MOVNTQ" %%mm5, 8(%3, %%eax, 2) \n\t"
+#else
+ "movq %%mm3, (%2, %%eax, 2) \n\t"
+ "movq %%mm4, 8(%2, %%eax, 2) \n\t"
+ "movq %%mm2, (%3, %%eax, 2) \n\t"
+ "movq %%mm5, 8(%3, %%eax, 2) \n\t"
+#endif
+ "addl $8, %%eax \n\t"
+ " js 1b \n\t"
+ :: "r" (src + mmxSize-1), "r" (src + srcStride + mmxSize-1),
+ "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
+ "g" (-mmxSize)
+ : "%eax"
+
+ );
+ dst[0]=
+ dst[dstStride]= src[0];
+#else
+ dst[0]=
+ dst[dstStride]= src[0];
+
+ for(x=0; x<srcWidth-1; x++){
+ dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
+ dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
+ dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
+ dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
+ }
+#endif
+ dst[srcWidth*2 -1]=
+ dst[srcWidth*2 -1 + dstStride]= src[srcWidth-1];
+
+ dst+=dstStride*2;
+ src+=srcStride;
+ }
+ src-=srcStride;
+
+ // last line
+ for(x=0; x<srcWidth; x++){
+ dst[2*x+0]=
+ dst[2*x+1]= src[x];
+ }
+#ifdef HAVE_MMX
+asm volatile( EMMS" \n\t"
+ SFENCE" \n\t"
+ :::"memory");
+#endif
+}
+
/**
*
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
diff --git a/postproc/swscale.c b/postproc/swscale.c
index 0f58c4d756..8a4223a7fa 100644
--- a/postproc/swscale.c
+++ b/postproc/swscale.c
@@ -1777,6 +1777,34 @@ static void bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], i
dstStride[0], dstStride[1], srcStride[0]);
}
+static void yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dst[], int dstStride[]){
+ int i;
+
+ /* copy Y */
+ if(srcStride[0]==dstStride[0])
+ memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
+ else{
+ uint8_t *srcPtr= src[0];
+ uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
+
+ for(i=0; i<srcSliceH; i++)
+ {
+ memcpy(dstPtr, srcPtr, c->srcW);
+ srcPtr+= srcStride[0];
+ dstPtr+= dstStride[0];
+ }
+ }
+
+ if(c->dstFormat==IMGFMT_YV12){
+ planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
+ planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
+ }else{
+ planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
+ planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
+ }
+}
+
/**
* bring pointers in YUV order instead of YVU
*/
@@ -2051,7 +2079,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
-#if 1
+
/* simple copy */
if( srcFormat == dstFormat
|| (srcFormat==IMGFMT_YV12 && dstFormat==IMGFMT_I420)
@@ -2067,7 +2095,17 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
-#endif
+
+ if( srcFormat==IMGFMT_YVU9 && (dstFormat==IMGFMT_YV12 || dstFormat==IMGFMT_I420) )
+ {
+ c->swScale= yvu9toyv12Wrapper;
+
+ if(flags&SWS_PRINT_INFO)
+ MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
+ vo_format_name(srcFormat), vo_format_name(dstFormat));
+ return c;
+ }
+
/* bgr32to24 & rgb32to24*/
if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR24)
||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB24))