diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2009-03-19 03:45:29 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2009-03-19 03:45:29 +0000 |
commit | 0411072ee3c1b1b727cd6f76eda48c4c3b84103b (patch) | |
tree | 0c81d9f71fed34a1d9bdc6e8e27e5f0b309fea7f | |
parent | 07d16e2ecfb7da976ef56829c4fd6f93ca94b00e (diff) | |
download | ffmpeg-0411072ee3c1b1b727cd6f76eda48c4c3b84103b.tar.gz |
Unscaled converters for
YUYV->YUV420P
YUYV->YUV422P
UYVY->YUV420P
UYVY->YUV422P
Originally committed as revision 28997 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscale
-rw-r--r-- | libswscale/rgb2rgb.c | 13 | ||||
-rw-r--r-- | libswscale/rgb2rgb.h | 14 | ||||
-rw-r--r-- | libswscale/rgb2rgb_template.c | 245 | ||||
-rw-r--r-- | libswscale/swscale.c | 53 |
4 files changed, 324 insertions, 1 deletions
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c index ad69265c37..59b8e30a63 100644 --- a/libswscale/rgb2rgb.c +++ b/libswscale/rgb2rgb.c @@ -87,6 +87,19 @@ void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *sr long width, long height, long srcStride1, long srcStride2, long srcStride3, long dstStride); +void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride); +void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride); +void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride); +void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride); + #if ARCH_X86 && CONFIG_GPL DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL; diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h index df912c8533..3850ef291d 100644 --- a/libswscale/rgb2rgb.h +++ b/libswscale/rgb2rgb.h @@ -142,6 +142,20 @@ extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint long srcStride1, long srcStride2, long srcStride3, long dstStride); + +extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride); +extern void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride); +extern void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride); +extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride); + void sws_rgb2rgb_init(int flags); #endif /* SWSCALE_RGB2RGB_H */ diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index fb4ac23d88..fa6de09a09 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -2701,6 +2701,245 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 #endif } +static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) +{ + dst += count; + src += 2*count; + count= - count; + +#if HAVE_MMX + if(count <= -16){ + count += 15; + __asm__ volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $8, %%mm7 \n\t" + "1: \n\t" + "movq -30(%1, %0, 2), %%mm0 \n\t" + "movq -22(%1, %0, 2), %%mm1 \n\t" + "movq -14(%1, %0, 2), %%mm2 \n\t" + "movq -6(%1, %0, 2), %%mm3 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm2 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + MOVNTQ" %%mm0,-15(%2, %0) \n\t" + MOVNTQ" %%mm2,- 7(%2, %0) \n\t" + "add $16, %0 \n\t" + " js 1b \n\t" + : "+r"(count) + : "r"(src), "r"(dst) + ); + count -= 15; + } +#endif + while(count<0){ + dst[count]= src[2*count]; + count++; + } +} + +static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) +{ + dst0+= count; + dst1+= count; + src += 4*count; + count= - count; +#if HAVE_MMX + if(count <= -8){ + count += 7; + __asm__ volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $8, %%mm7 \n\t" + "1: \n\t" + "movq -28(%1, %0, 4), %%mm0 \n\t" + "movq -20(%1, %0, 4), %%mm1 \n\t" + "movq -12(%1, %0, 4), %%mm2 \n\t" + "movq -4(%1, %0, 4), %%mm3 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm2 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm2 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm2, %%mm0 \n\t" + "packuswb %%mm3, %%mm1 \n\t" + MOVNTQ" %%mm0,- 7(%3, %0) \n\t" + MOVNTQ" %%mm1,- 7(%2, %0) \n\t" + "add $8, %0 \n\t" + " js 1b \n\t" + : "+r"(count) + : "r"(src), "r"(dst0), "r"(dst1) + ); + count -= 7; + } +#endif + while(count<0){ + dst0[count]= src[4*count+0]; + dst1[count]= src[4*count+2]; + count++; + } +} + +static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) +{ + dst0+= count; + dst1+= count; + src += 4*count; + count= - count; +#if HAVE_MMX + if(count <= -8){ + count += 7; + __asm__ volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $8, %%mm7 \n\t" + "1: \n\t" + "movq -28(%1, %0, 4), %%mm0 \n\t" + "movq -20(%1, %0, 4), %%mm1 \n\t" + "movq -12(%1, %0, 4), %%mm2 \n\t" + "movq -4(%1, %0, 4), %%mm3 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm1 \n\t" + "psrlw $8, %%mm2 \n\t" + "psrlw $8, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm2 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm2, %%mm0 \n\t" + "packuswb %%mm3, %%mm1 \n\t" + MOVNTQ" %%mm0,- 7(%3, %0) \n\t" + MOVNTQ" %%mm1,- 7(%2, %0) \n\t" + "add $8, %0 \n\t" + " js 1b \n\t" + : "+r"(count) + : "r"(src), "r"(dst0), "r"(dst1) + ); + count -= 7; + } +#endif + while(count<0){ + dst0[count]= src[4*count+0]; + dst1[count]= src[4*count+2]; + count++; + } +} + +static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride) +{ + long y; + const long chromWidth= -((-width)>>1); + + for (y=0; y<height; y++){ + RENAME(extract_even)(src, ydst, width); + if(!(y&1)){ + RENAME(extract_odd2)(src, udst, vdst, chromWidth); + udst+= chromStride; + vdst+= chromStride; + } + + src += srcStride; + ydst+= lumStride; + } +#if HAVE_MMX + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +#endif +} + +static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride) +{ + long y; + const long chromWidth= -((-width)>>1); + + for (y=0; y<height; y++){ + RENAME(extract_even)(src, ydst, width); + RENAME(extract_odd2)(src, udst, vdst, chromWidth); + + src += srcStride; + ydst+= lumStride; + udst+= chromStride; + vdst+= chromStride; + } +#if HAVE_MMX + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +#endif +} + +static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride) +{ + long y; + const long chromWidth= -((-width)>>1); + + for (y=0; y<height; y++){ + RENAME(extract_even)(src+1, ydst, width); + if(!(y&1)){ + RENAME(extract_even2)(src, udst, vdst, chromWidth); + udst+= chromStride; + vdst+= chromStride; + } + + src += srcStride; + ydst+= lumStride; + } +#if HAVE_MMX + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +#endif +} + +static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + long width, long height, + long lumStride, long chromStride, long srcStride) +{ + long y; + const long chromWidth= -((-width)>>1); + + for (y=0; y<height; y++){ + RENAME(extract_even)(src+1, ydst, width); + RENAME(extract_even2)(src, udst, vdst, chromWidth); + + src += srcStride; + ydst+= lumStride; + udst+= chromStride; + vdst+= chromStride; + } +#if HAVE_MMX + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +#endif +} + static inline void RENAME(rgb2rgb_init)(void){ rgb15to16 = RENAME(rgb15to16); rgb15tobgr24 = RENAME(rgb15tobgr24); @@ -2725,11 +2964,15 @@ static inline void RENAME(rgb2rgb_init)(void){ yuv422ptoyuy2 = RENAME(yuv422ptoyuy2); yuv422ptouyvy = RENAME(yuv422ptouyvy); yuy2toyv12 = RENAME(yuy2toyv12); -// uyvytoyv12 = RENAME(uyvytoyv12); // yvu9toyv12 = RENAME(yvu9toyv12); planar2x = RENAME(planar2x); rgb24toyv12 = RENAME(rgb24toyv12); interleaveBytes = RENAME(interleaveBytes); vu9_to_vu12 = RENAME(vu9_to_vu12); yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); + + uyvytoyuv420 = RENAME(uyvytoyuv420); + uyvytoyuv422 = RENAME(uyvytoyuv422); + yuyvtoyuv420 = RENAME(yuyvtoyuv420); + yuyvtoyuv422 = RENAME(yuyvtoyuv422); } diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 5974ea566c..e1101b47db 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1745,6 +1745,50 @@ static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], return srcSliceH; } +static int YUYV2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]){ + uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; + uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2; + uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2; + + yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); + + return srcSliceH; +} + +static int YUYV2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]){ + uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; + uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2; + uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2; + + yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); + + return srcSliceH; +} + +static int UYVY2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]){ + uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; + uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2; + uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2; + + uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); + + return srcSliceH; +} + +static int UYVY2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]){ + uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; + uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2; + uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2; + + uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); + + return srcSliceH; +} + static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ const enum PixelFormat srcFormat= c->srcFormat; @@ -2399,7 +2443,16 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d else if (dstFormat == PIX_FMT_UYVY422) c->swScale= PlanarToUyvyWrapper; } + + if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV420P) + c->swScale= YUYV2YUV420Wrapper; + if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV420P) + c->swScale= UYVY2YUV420Wrapper; } + if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P) + c->swScale= YUYV2YUV422Wrapper; + if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P) + c->swScale= UYVY2YUV422Wrapper; #ifdef COMPILE_ALTIVEC if ((c->flags & SWS_CPU_CAPS_ALTIVEC) && |