diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-02-08 02:59:09 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-02-08 05:53:35 +0100 |
commit | 18d0a16fc9d189b1d5593f9a42bb2316e9a66ca9 (patch) | |
tree | aad3d9b1a07b9efebd7435bb27dde147cfa67913 /libswscale/x86 | |
parent | 950930b461cef025152de406f816a3b2efffb540 (diff) | |
parent | ef1c785f11c168384e42d147648c8fdf5317739b (diff) | |
download | ffmpeg-18d0a16fc9d189b1d5593f9a42bb2316e9a66ca9.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
swscale: make yuv2yuv1 use named registers.
h264: mark h264_idct_add8_10 with number of XMM registers.
swscale: fix V plane memory location in bilinear/unscaled RGB/YUYV case.
vp8: always update next_framep[] before returning from decode_frame().
avconv: estimate next_dts from framerate if it is set.
avconv: better next_dts usage.
avconv: rename InputStream.pts to last_dts.
avconv: reduce overloading for InputStream.pts.
avconv: rename InputStream.next_pts to next_dts.
avconv: rework -t handling for encoding.
avconv: set encoder timebase for subtitles.
pva-demux test: add -vn
swscale: K&R formatting cosmetics for SPARC code
apedec: allow the user to set the maximum number of output samples per call
apedec: do not unnecessarily zero output samples for mono frames
apedec: allocate a single flat buffer for decoded samples
apedec: use sizeof(field) instead of sizeof(type)
swscale: split C output functions into separate file.
swscale: Split C input functions into separate file.
bytestream: Add bytestream2 writing API.
The avconv changes are due to massive regressions and bugs not merged yet.
Conflicts:
ffmpeg.c
libavcodec/vp8.c
libswscale/swscale.c
libswscale/x86/swscale_template.c
tests/fate/demux.mak
tests/ref/lavf/asf
tests/ref/lavf/avi
tests/ref/lavf/mkv
tests/ref/lavf/mpg
tests/ref/lavf/nut
tests/ref/lavf/ogg
tests/ref/lavf/rm
tests/ref/lavf/ts
tests/ref/seek/lavf_avi
tests/ref/seek/lavf_mkv
tests/ref/seek/lavf_rm
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale/x86')
-rw-r--r-- | libswscale/x86/output.asm | 50 | ||||
-rw-r--r-- | libswscale/x86/swscale_template.c | 24 |
2 files changed, 37 insertions, 37 deletions
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index 70a2c16bcf..4b2f5c89eb 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -275,17 +275,17 @@ yuv2planeX_fn 10, 7, 5 %macro yuv2plane1_mainloop 2 .loop_%2: %if %1 == 8 - paddsw m0, m2, [r0+r2*2+mmsize*0] - paddsw m1, m3, [r0+r2*2+mmsize*1] + paddsw m0, m2, [srcq+dstwq*2+mmsize*0] + paddsw m1, m3, [srcq+dstwq*2+mmsize*1] psraw m0, 7 psraw m1, 7 packuswb m0, m1 mov%2 [r1+r2], m0 %elif %1 == 16 - paddd m0, m4, [r0+r2*4+mmsize*0] - paddd m1, m4, [r0+r2*4+mmsize*1] - paddd m2, m4, [r0+r2*4+mmsize*2] - paddd m3, m4, [r0+r2*4+mmsize*3] + paddd m0, m4, [srcq+dstwq*4+mmsize*0] + paddd m1, m4, [srcq+dstwq*4+mmsize*1] + paddd m2, m4, [srcq+dstwq*4+mmsize*2] + paddd m3, m4, [srcq+dstwq*4+mmsize*3] psrad m0, 3 psrad m1, 3 psrad m2, 3 @@ -299,46 +299,46 @@ yuv2planeX_fn 10, 7, 5 paddw m0, m5 paddw m2, m5 %endif ; mmx/sse2/sse4/avx - mov%2 [r1+r2*2], m0 - mov%2 [r1+r2*2+mmsize], m2 -%else - paddsw m0, m2, [r0+r2*2+mmsize*0] - paddsw m1, m2, [r0+r2*2+mmsize*1] + mov%2 [dstq+dstwq*2+mmsize*0], m0 + mov%2 [dstq+dstwq*2+mmsize*1], m2 +%else ; %1 == 9/10 + paddsw m0, m2, [srcq+dstwq*2+mmsize*0] + paddsw m1, m2, [srcq+dstwq*2+mmsize*1] psraw m0, 15 - %1 psraw m1, 15 - %1 pmaxsw m0, m4 pmaxsw m1, m4 pminsw m0, m3 pminsw m1, m3 - mov%2 [r1+r2*2], m0 - mov%2 [r1+r2*2+mmsize], m1 + mov%2 [dstq+dstwq*2+mmsize*0], m0 + mov%2 [dstq+dstwq*2+mmsize*1], m1 %endif - add r2, mmsize + add dstwq, mmsize jl .loop_%2 %endmacro %macro yuv2plane1_fn 3 -cglobal yuv2plane1_%1, %3, %3, %2 - add r2, mmsize - 1 - and r2, ~(mmsize - 1) +cglobal yuv2plane1_%1, %3, %3, %2, src, dst, dstw, dither, offset + add dstwq, mmsize - 1 + and dstwq, ~(mmsize - 1) %if %1 == 8 - add r1, r2 + add dstq, dstwq %else ; %1 != 8 - lea r1, [r1+r2*2] + lea dstq, [dstq+dstwq*2] %endif ; %1 == 8 %if %1 == 16 - lea r0, [r0+r2*4] + lea srcq, [srcq+dstwq*4] %else ; %1 != 16 - lea r0, [r0+r2*2] + lea srcq, [srcq+dstwq*2] %endif ; %1 == 16 - neg r2 + neg dstwq %if %1 == 8 pxor m4, m4 ; zero ; create registers holding dither - movq m3, [r3] ; dither - test r4d, r4d + movq m3, [ditherq] ; dither + test offsetd, offsetd jz .no_rot %if mmsize == 16 punpcklqdq m3, m3 @@ -374,7 +374,7 @@ cglobal yuv2plane1_%1, %3, %3, %2 %if mmsize == 8 yuv2plane1_mainloop %1, a %else ; mmsize == 16 - test r1, 15 + test dstq, 15 jnz .unaligned yuv2plane1_mainloop %1, a REP_RET diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 8a98c7b924..d9e5cbbf44 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -762,10 +762,10 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ @@ -993,10 +993,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ @@ -1048,9 +1048,9 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], ".p2align 4 \n\t"\ "1: \n\t"\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ - "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ @@ -1101,10 +1101,10 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ @@ -1368,9 +1368,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, ".p2align 4 \n\t"\ "1: \n\t"\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ - "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "psraw $7, %%mm3 \n\t" \ "psraw $7, %%mm4 \n\t" \ "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ @@ -1386,10 +1386,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ "psrlw $8, %%mm3 \n\t" \ |