diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-03-08 02:28:40 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-03-08 02:51:45 +0100 |
commit | bf807a5e874442aa3fe1b475459cdd509e34bff4 (patch) | |
tree | f8067bfb5e99b8b8e2716a7ea8519a4aaa8ac60f /libavcodec/x86 | |
parent | 4cda8aa1c5bc58f8a7f53a21a19b03e7379bbcdc (diff) | |
parent | 6eda85e15b38863a627fd0602098aa3250174698 (diff) | |
download | ffmpeg-bf807a5e874442aa3fe1b475459cdd509e34bff4.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master: (29 commits)
sbrdsp.asm: convert all instructions to float/SSE ones.
dv: cosmetics.
dv: check buffer size before reading profile.
Revert "AAC SBR: group some writes."
udp: Print an error message if bind fails
cook: extend channel uncoupling tables so the full bit range is covered.
roqvideo: cosmetics.
roqvideo: convert to bytestream2 API.
dca: don't use av_clip_uintp2().
wmall: fix build with -DDEBUG enabled.
smc: port to bytestream2 API.
AAC SBR: group some writes.
dsputil: remove shift parameter from scalarproduct_int16
SBR DSP: unroll sum_square
rv34: remove dead code in intra availability check
rv34: clean a bit availability checks.
v4l2: update documentation
tgq: convert to bytestream2 API.
parser: remove forward declaration of MpegEncContext
dca: prevent accessing static arrays with invalid indexes.
...
Conflicts:
doc/indevs.texi
libavcodec/Makefile
libavcodec/dca.c
libavcodec/dvdata.c
libavcodec/eatgq.c
libavcodec/mmvideo.c
libavcodec/roqvideodec.c
libavcodec/smc.c
libswscale/output.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/dsputil_yasm.asm | 7 | ||||
-rw-r--r-- | libavcodec/x86/h264_qpel_mmx.c | 10 | ||||
-rw-r--r-- | libavcodec/x86/sbrdsp.asm | 16 |
3 files changed, 10 insertions, 23 deletions
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index 8e8c10c189..7ddc770a2e 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -35,13 +35,12 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 SECTION_TEXT %macro SCALARPRODUCT 1 -; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order, int shift) -cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift +; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order) +cglobal scalarproduct_int16_%1, 3,3,3, v1, v2, order shl orderq, 1 add v1q, orderq add v2q, orderq neg orderq - movd m3, shiftm pxor m2, m2 .loop: movu m0, [v1q + orderq] @@ -55,10 +54,8 @@ cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift %if mmsize == 16 movhlps m0, m2 paddd m2, m0 - psrad m2, m3 pshuflw m0, m2, 0x4e %else - psrad m2, m3 pshufw m0, m2, 0x4e %endif paddd m2, m0 diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c index 807d8548d6..6cc3ac823d 100644 --- a/libavcodec/x86/h264_qpel_mmx.c +++ b/libavcodec/x86/h264_qpel_mmx.c @@ -1161,16 +1161,6 @@ QPEL(put_, 16,XMM, 16)\ QPEL(avg_, 8, XMM, 16)\ QPEL(avg_, 16,XMM, 16)\ - -#define AVG_3DNOW_OP(a,b,temp, size) \ -"mov" #size " " #b ", " #temp " \n\t"\ -"pavgusb " #temp ", " #a " \n\t"\ -"mov" #size " " #a ", " #b " \n\t" -#define AVG_MMX2_OP(a,b,temp, size) \ -"mov" #size " " #b ", " #temp " \n\t"\ -"pavgb " #temp ", " #a " \n\t"\ -"mov" #size " " #a ", " #b " \n\t" - #define PAVGB "pavgusb" QPEL_H264(put_, PUT_OP, 3dnow) QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm index c3b559bb15..31a1c8b76f 100644 --- a/libavcodec/x86/sbrdsp.asm +++ b/libavcodec/x86/sbrdsp.asm @@ -82,14 +82,14 @@ cglobal sbr_hf_g_filt, 5, 6, 5 lea r0, [r0 + r3*8] neg r3 .loop4: - movq m0, [r2 + 4*r3 + 0] - movq m1, [r2 + 4*r3 + 8] - movq m2, [r1 + 0*STEP] - movq m3, [r1 + 2*STEP] + movlps m0, [r2 + 4*r3 + 0] + movlps m1, [r2 + 4*r3 + 8] + movlps m2, [r1 + 0*STEP] + movlps m3, [r1 + 2*STEP] movhps m2, [r1 + 1*STEP] movhps m3, [r1 + 3*STEP] - punpckldq m0, m0 - punpckldq m1, m1 + unpcklps m0, m0 + unpcklps m1, m1 mulps m0, m2 mulps m1, m3 movu [r0 + 8*r3 + 0], m0 @@ -101,8 +101,8 @@ cglobal sbr_hf_g_filt, 5, 6, 5 jz .end .loop1: ; element 0 and 1 can be computed at the same time movss m0, [r2] - movq m2, [r1] - punpckldq m0, m0 + movlps m2, [r1] + unpcklps m0, m0 mulps m2, m0 movlps [r0], m2 add r0, 8 |