diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-03-07 02:57:53 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-03-07 03:22:49 +0100 |
commit | 6df42f98746be06c883ce683563e07c9a2af983f (patch) | |
tree | 6bb893aaf179526515cfb3b1cc933721317dcf6f /libswscale | |
parent | 57986c501e8c97d4bd2e1b7ce9e9037c4ae06245 (diff) | |
parent | b5161908e06b4497bf663510fb495ba97a6fd2b5 (diff) | |
download | ffmpeg-6df42f98746be06c883ce683563e07c9a2af983f.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
SBR DSP: fix SSE code to not use SSE2 instructions.
cpu: initialize mask to -1, so that by default, optimizations are used.
error_resilience: initialize s->block_index[].
svq3: protect against negative quantizers.
Don't use ff_cropTbl[] for IDCT.
swscale: make filterPos 32bit.
FATE: add CPUFLAGS variable, mapping to -cpuflags avconv option.
avconv: add -cpuflags option for setting supported cpuflags.
cpu: add av_set_cpu_flags_mask().
libx264: Allow overriding the sliced threads option
avconv: fix counting encoded video size.
Conflicts:
doc/APIchanges
doc/fate.texi
doc/ffmpeg.texi
ffmpeg.c
libavcodec/h264idct_template.c
libavcodec/svq3.c
libavutil/avutil.h
libavutil/cpu.c
libavutil/cpu.h
libswscale/swscale.c
tests/Makefile
tests/fate-run.sh
tests/regression-funcs.sh
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/ppc/swscale_altivec.c | 2 | ||||
-rw-r--r-- | libswscale/swscale.c | 20 | ||||
-rw-r--r-- | libswscale/swscale_internal.h | 12 | ||||
-rw-r--r-- | libswscale/utils.c | 4 | ||||
-rw-r--r-- | libswscale/x86/scale.asm | 31 | ||||
-rw-r--r-- | libswscale/x86/swscale_mmx.c | 6 | ||||
-rw-r--r-- | libswscale/x86/swscale_template.c | 4 |
7 files changed, 41 insertions, 38 deletions
diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index b6aed04f9e..df8afb0ba2 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -144,7 +144,7 @@ static void yuv2planeX_altivec(const int16_t *filter, int filterSize, static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, - const int16_t *filterPos, int filterSize) + const int32_t *filterPos, int filterSize) { register int i; DECLARE_ALIGNED(16, int, tempo)[4]; diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 6dc02b4422..38856d9c37 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -63,7 +63,7 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride, static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, - const int16_t *filterPos, int filterSize) + const int32_t *filterPos, int filterSize) { int i; int32_t *dst = (int32_t *) _dst; @@ -89,7 +89,7 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, const int16_t *filter, - const int16_t *filterPos, int filterSize) + const int32_t *filterPos, int filterSize) { int i; const uint16_t *src = (const uint16_t *) _src; @@ -113,7 +113,7 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t // bilinear / bicubic scaling static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, - const int16_t *filter, const int16_t *filterPos, + const int16_t *filter, const int32_t *filterPos, int filterSize) { int i; @@ -131,7 +131,7 @@ static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t * } static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src, - const int16_t *filter, const int16_t *filterPos, + const int16_t *filter, const int32_t *filterPos, int filterSize) { int i; @@ -234,7 +234,7 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src_in[4], int srcW, int xInc, const int16_t *hLumFilter, - const int16_t *hLumFilterPos, int hLumFilterSize, + const int32_t *hLumFilterPos, int hLumFilterSize, uint8_t *formatConvBuffer, uint32_t *pal, int isAlpha) { @@ -282,7 +282,7 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src_in[4], int srcW, int xInc, const int16_t *hChrFilter, - const int16_t *hChrFilterPos, int hChrFilterSize, + const int32_t *hChrFilterPos, int hChrFilterSize, uint8_t *formatConvBuffer, uint32_t *pal) { const uint8_t *src1 = src_in[1], *src2 = src_in[2]; @@ -326,10 +326,10 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int chrXInc= c->chrXInc; const enum PixelFormat dstFormat= c->dstFormat; const int flags= c->flags; - int16_t *vLumFilterPos= c->vLumFilterPos; - int16_t *vChrFilterPos= c->vChrFilterPos; - int16_t *hLumFilterPos= c->hLumFilterPos; - int16_t *hChrFilterPos= c->hChrFilterPos; + int32_t *vLumFilterPos= c->vLumFilterPos; + int32_t *vChrFilterPos= c->vChrFilterPos; + int32_t *hLumFilterPos= c->hLumFilterPos; + int32_t *hChrFilterPos= c->hChrFilterPos; int16_t *hLumFilter= c->hLumFilter; int16_t *hChrFilter= c->hChrFilter; int32_t *lumMmxFilter= c->lumMmxFilter; diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 930435608b..e73806ecc2 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -299,10 +299,10 @@ typedef struct SwsContext { int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes. int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes. int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes. - int16_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes. - int16_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. - int16_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. - int16_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. + int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes. + int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. + int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. + int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. int hChrFilterSize; ///< Horizontal filter size for chroma pixels. int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. @@ -515,10 +515,10 @@ typedef struct SwsContext { /** @{ */ void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, - const int16_t *filterPos, int filterSize); + const int32_t *filterPos, int filterSize); void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, - const int16_t *filterPos, int filterSize); + const int32_t *filterPos, int filterSize); /** @} */ /// Color range conversion function for luma plane if needed. diff --git a/libswscale/utils.c b/libswscale/utils.c index 6391d51c87..7e8285de7c 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -191,7 +191,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist dist-1.0); } -static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc, +static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSize, int xInc, int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags, SwsVector *srcFilter, SwsVector *dstFilter, double param[2]) { @@ -207,7 +207,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions) // NOTE: the +3 is for the MMX(+1)/SSE(+3) scaler which reads over the end - FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(int16_t), fail); + FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(**filterPos), fail); if (FFABS(xInc - 0x10000) <10) { // unscaled int i; diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index 3c8b6fa00e..b8cea49783 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -38,7 +38,7 @@ SECTION .text ; (SwsContext *c, int{16,32}_t *dst, ; int dstW, const uint{8,16}_t *src, ; const int16_t *filter, -; const int16_t *filterPos, int filterSize); +; const int32_t *filterPos, int filterSize); ; ; Scale one horizontal line. Input is either 8-bits width or 16-bits width ; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to @@ -53,6 +53,9 @@ SECTION .text cglobal hscale%1to%2_%4_%5, %6, 7, %7 %if ARCH_X86_64 movsxd r2, r2d +%define mov32 movsxd +%else ; x86-32 +%define mov32 mov %endif ; x86-64 %if %2 == 19 %if mmsize == 8 ; mmx @@ -95,14 +98,14 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %else ; %2 == 19 lea r1, [r1+r2*(4>>r2shr)] %endif ; %2 == 15/19 - lea r5, [r5+r2*(2>>r2shr)] + lea r5, [r5+r2*(4>>r2shr)] neg r2 .loop: %if %3 == 4 ; filterSize == 4 scaling ; load 2x4 or 4x4 source pixels into m0/m1 - movsx r0, word [r5+r2*2+0] ; filterPos[0] - movsx r6, word [r5+r2*2+2] ; filterPos[1] + mov32 r0, dword [r5+r2*4+0] ; filterPos[0] + mov32 r6, dword [r5+r2*4+4] ; filterPos[1] movlh m0, [r3+r0*srcmul] ; src[filterPos[0] + {0,1,2,3}] %if mmsize == 8 movlh m1, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] @@ -112,8 +115,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %else ; %1 == 8 movd m4, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] %endif - movsx r0, word [r5+r2*2+4] ; filterPos[2] - movsx r6, word [r5+r2*2+6] ; filterPos[3] + mov32 r0, dword [r5+r2*4+8] ; filterPos[2] + mov32 r6, dword [r5+r2*4+12] ; filterPos[3] movlh m1, [r3+r0*srcmul] ; src[filterPos[2] + {0,1,2,3}] %if %1 > 8 movhps m1, [r3+r6*srcmul] ; src[filterPos[3] + {0,1,2,3}] @@ -156,8 +159,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %endif ; mmx/sse2/ssse3/sse4 %else ; %3 == 8, i.e. filterSize == 8 scaling ; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5 - movsx r0, word [r5+r2*1+0] ; filterPos[0] - movsx r6, word [r5+r2*1+2] ; filterPos[1] + mov32 r0, dword [r5+r2*2+0] ; filterPos[0] + mov32 r6, dword [r5+r2*2+4] ; filterPos[1] movbh m0, [r3+ r0 *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}] %if mmsize == 8 movbh m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}] @@ -165,8 +168,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 movbh m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}] %else ; mmsize == 16 movbh m1, [r3+ r6 *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}] - movsx r0, word [r5+r2*1+4] ; filterPos[2] - movsx r6, word [r5+r2*1+6] ; filterPos[3] + mov32 r0, dword [r5+r2*2+8] ; filterPos[2] + mov32 r6, dword [r5+r2*2+12] ; filterPos[3] movbh m4, [r3+ r0 *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}] movbh m5, [r3+ r6 *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}] %endif ; mmsize == 8/16 @@ -251,7 +254,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %define r1x r1 %define filter2 r6m %endif ; x86-32/64 - lea r5, [r5+r2*2] + lea r5, [r5+r2*4] %if %2 == 15 lea r1, [r1+r2*2] %else ; %2 == 19 @@ -261,8 +264,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 neg r2 .loop: - movsx r0, word [r5+r2*2+0] ; filterPos[0] - movsx r1x, word [r5+r2*2+2] ; filterPos[1] + mov32 r0, dword [r5+r2*4+0] ; filterPos[0] + mov32 r1x, dword [r5+r2*4+4] ; filterPos[1] ; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)? pxor m4, m4 pxor m5, m5 @@ -293,7 +296,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 jl .innerloop %ifidn %4, X4 - movsx r1x, word [r5+r2*2+2] ; filterPos[1] + mov32 r1x, dword [r5+r2*4+4] ; filterPos[1] movlh m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0] sub r1x, r6 ; and first 4 srcpx of dstpx[1] %if %1 > 8 diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index 28fc004573..0c8732ce2a 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -94,8 +94,8 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI int16_t **alpPixBuf= c->alpPixBuf; const int vLumBufSize= c->vLumBufSize; const int vChrBufSize= c->vChrBufSize; - int16_t *vLumFilterPos= c->vLumFilterPos; - int16_t *vChrFilterPos= c->vChrFilterPos; + int32_t *vLumFilterPos= c->vLumFilterPos; + int32_t *vChrFilterPos= c->vChrFilterPos; int16_t *vLumFilter= c->vLumFilter; int16_t *vChrFilter= c->vChrFilter; int32_t *lumMmxFilter= c->lumMmxFilter; @@ -266,7 +266,7 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( SwsContext *c, int16_t *data, \ int dstW, const uint8_t *src, \ const int16_t *filter, \ - const int16_t *filterPos, int filterSize) + const int32_t *filterPos, int filterSize) #define SCALE_FUNCS(filter_n, opt) \ SCALE_FUNC(filter_n, 8, 15, opt); \ diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 4eee894c7e..cf8e802229 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -1450,7 +1450,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc) { - int16_t *filterPos = c->hLumFilterPos; + int32_t *filterPos = c->hLumFilterPos; int16_t *filter = c->hLumFilter; void *mmx2FilterCode= c->lumMmx2FilterCode; int i; @@ -1546,7 +1546,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc) { - int16_t *filterPos = c->hChrFilterPos; + int32_t *filterPos = c->hChrFilterPos; int16_t *filter = c->hChrFilter; void *mmx2FilterCode= c->chrMmx2FilterCode; int i; |