aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-06-30 22:44:18 +0200
committerMichael Niedermayer <michaelni@gmx.at>2012-06-30 22:44:18 +0200
commit64b25938e90253432d28ffd7d971f085c560a523 (patch)
tree56ba6a39d7e8f14ebccdcc4db935164b5da624a2 /libavcodec
parentbe24f85176d8e46c3154f1f51013f235b273183e (diff)
parentceabc13f129cd6344b1eebdbe10119083fe5520e (diff)
downloadffmpeg-64b25938e90253432d28ffd7d971f085c560a523.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: dsputilenc_mmx: split assignment of ff_sse16_sse2 to SSE2 section. dnxhdenc: add space between function argument type and comment. x86: fmtconvert: add special asm for float_to_int16_interleave_misc_* attributes: Add a definition of av_always_inline for MSVC cmdutils: Pass the actual chosen encoder to filter_codec_opts os_support: Add fallback definitions for stat flags os_support: Rename the poll fallback function to ff_poll network: Check for struct pollfd os_support: Don't compare a negative number against socket descriptors os_support: Include all the necessary headers for the win32 open function x86: vc1: fix and enable optimised loop filter Conflicts: cmdutils.c cmdutils.h ffmpeg.c ffplay.c libavformat/os_support.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/dnxhdenc.h2
-rw-r--r--libavcodec/x86/dsputilenc_mmx.c9
-rw-r--r--libavcodec/x86/fmtconvert.asm78
-rw-r--r--libavcodec/x86/fmtconvert_mmx.c12
-rw-r--r--libavcodec/x86/vc1dsp_mmx.c2
-rw-r--r--libavcodec/x86/vc1dsp_yasm.asm5
6 files changed, 96 insertions, 12 deletions
diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h
index 279a978cd3..640bbd3995 100644
--- a/libavcodec/dnxhdenc.h
+++ b/libavcodec/dnxhdenc.h
@@ -90,7 +90,7 @@ typedef struct DNXHDEncContext {
RCCMPEntry *mb_cmp;
RCEntry (*mb_rc)[8160];
- void (*get_pixels_8x4_sym)(DCTELEM */*align 16*/, const uint8_t *, int);
+ void (*get_pixels_8x4_sym)(DCTELEM * /*align 16*/, const uint8_t *, int);
} DNXHDEncContext;
void ff_dnxhd_init_mmx(DNXHDEncContext *ctx);
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index bf439ca5df..00e0a3fc37 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -1128,8 +1128,8 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
#endif
c->pix_norm1 = pix_norm1_mmx;
- c->sse[0] = (HAVE_YASM && mm_flags & AV_CPU_FLAG_SSE2) ? ff_sse16_sse2 : sse16_mmx;
- c->sse[1] = sse8_mmx;
+ c->sse[0] = sse16_mmx;
+ c->sse[1] = sse8_mmx;
c->vsad[4]= vsad_intra16_mmx;
c->nsse[0] = nsse16_mmx;
@@ -1165,10 +1165,13 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
if (bit_depth <= 8)
c->get_pixels = get_pixels_sse2;
c->sum_abs_dctelem= sum_abs_dctelem_sse2;
-#if HAVE_YASM && HAVE_ALIGNED_STACK
+#if HAVE_YASM
+ c->sse[0] = ff_sse16_sse2;
+#if HAVE_ALIGNED_STACK
c->hadamard8_diff[0]= ff_hadamard8_diff16_sse2;
c->hadamard8_diff[1]= ff_hadamard8_diff_sse2;
#endif
+#endif
}
#if HAVE_SSSE3
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 72bee55669..9499a9e3a7 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -115,6 +115,84 @@ FLOAT_TO_INT16 sse, 0
FLOAT_TO_INT16 3dnow, 0
%undef cvtps2pi
+;------------------------------------------------------------------------------
+; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
+;------------------------------------------------------------------------------
+%macro FLOAT_TO_INT16_STEP 2
+cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
+ add lenq, lenq
+ lea srcq, [srcq+2*lenq]
+ lea step3q, [stepq*3]
+ neg lenq
+.loop:
+%ifidn %1, sse2
+ cvtps2dq m0, [srcq+2*lenq ]
+ cvtps2dq m1, [srcq+2*lenq+16]
+ packssdw m0, m1
+ movd v1d, m0
+ psrldq m0, 4
+ movd v2d, m0
+ psrldq m0, 4
+ mov [dstq], v1w
+ mov [dstq+stepq*4], v2w
+ shr v1d, 16
+ shr v2d, 16
+ mov [dstq+stepq*2], v1w
+ mov [dstq+step3q*2], v2w
+ lea dstq, [dstq+stepq*8]
+ movd v1d, m0
+ psrldq m0, 4
+ movd v2d, m0
+ mov [dstq], v1w
+ mov [dstq+stepq*4], v2w
+ shr v1d, 16
+ shr v2d, 16
+ mov [dstq+stepq*2], v1w
+ mov [dstq+step3q*2], v2w
+ lea dstq, [dstq+stepq*8]
+%else
+ cvtps2pi m0, [srcq+2*lenq ]
+ cvtps2pi m1, [srcq+2*lenq+ 8]
+ cvtps2pi m2, [srcq+2*lenq+16]
+ cvtps2pi m3, [srcq+2*lenq+24]
+ packssdw m0, m1
+ packssdw m2, m3
+ movd v1d, m0
+ psrlq m0, 32
+ movd v2d, m0
+ mov [dstq], v1w
+ mov [dstq+stepq*4], v2w
+ shr v1d, 16
+ shr v2d, 16
+ mov [dstq+stepq*2], v1w
+ mov [dstq+step3q*2], v2w
+ lea dstq, [dstq+stepq*8]
+ movd v1d, m2
+ psrlq m2, 32
+ movd v2d, m2
+ mov [dstq], v1w
+ mov [dstq+stepq*4], v2w
+ shr v1d, 16
+ shr v2d, 16
+ mov [dstq+stepq*2], v1w
+ mov [dstq+step3q*2], v2w
+ lea dstq, [dstq+stepq*8]
+%endif
+ add lenq, 16
+ js .loop
+%ifnidn %1, sse2
+ emms
+%endif
+ REP_RET
+%endmacro
+
+INIT_XMM
+FLOAT_TO_INT16_STEP sse2, 2
+INIT_MMX
+FLOAT_TO_INT16_STEP sse, 0
+%define cvtps2pi pf2id
+FLOAT_TO_INT16_STEP 3dnow, 0
+%undef cvtps2pi
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index ca0b29344a..8c9c43f662 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -25,6 +25,7 @@
#include "libavutil/cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/fmtconvert.h"
+#include "libavcodec/dsputil.h"
#if HAVE_YASM
@@ -35,6 +36,10 @@ void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
void ff_float_to_int16_sse (int16_t *dst, const float *src, long len);
void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
+void ff_float_to_int16_step_3dnow(int16_t *dst, const float *src, long len, long step);
+void ff_float_to_int16_step_sse (int16_t *dst, const float *src, long len, long step);
+void ff_float_to_int16_step_sse2 (int16_t *dst, const float *src, long len, long step);
+
void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
void ff_float_to_int16_interleave2_sse (int16_t *dst, const float **src, long len);
void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
@@ -48,12 +53,9 @@ void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len
#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
- DECLARE_ALIGNED(16, int16_t, tmp)[len];\
- int i,j,c;\
+ int c;\
for(c=0; c<channels; c++){\
- ff_float_to_int16_##cpu(tmp, src[c], len);\
- for(i=0, j=c; i<len; i++, j+=channels)\
- dst[j] = tmp[i];\
+ ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\
}\
}\
\
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index 4adeabca3b..bddac5ec77 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -809,7 +809,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
#if HAVE_YASM
if (mm_flags & AV_CPU_FLAG_MMX) {
}
- return;
+
if (mm_flags & AV_CPU_FLAG_MMX2) {
ASSIGN_LF(mmx2);
}
diff --git a/libavcodec/x86/vc1dsp_yasm.asm b/libavcodec/x86/vc1dsp_yasm.asm
index b897580b76..590aa509a7 100644
--- a/libavcodec/x86/vc1dsp_yasm.asm
+++ b/libavcodec/x86/vc1dsp_yasm.asm
@@ -119,7 +119,9 @@ section .text
pand m2, m6
pand m3, m2 ; d final
- PSIGNW m3, m7
+ psraw m7, 15
+ pxor m3, m7
+ psubw m3, m7
psubw m0, m3
paddw m1, m3
packuswb m0, m0
@@ -284,7 +286,6 @@ cglobal vc1_h_loop_filter8_sse2, 3,6,8
RET
%define PABSW PABSW_SSSE3
-%define PSIGNW PSIGNW_SSSE3
INIT_MMX
; void ff_vc1_v_loop_filter4_ssse3(uint8_t *src, int stride, int pq)