aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorJustin Ruggles <justin.ruggles@gmail.com>2011-11-06 20:43:13 -0500
committerJustin Ruggles <justin.ruggles@gmail.com>2011-11-22 15:40:51 -0500
commit395f2e70dd26524cb82d412cb938ded508df4d42 (patch)
tree8078be86ea184d7f1e2e2e4cf498c43321e79b9e /libavcodec/x86
parent05d1e45d1f42cc90d1f2f36c546d0096cea126a8 (diff)
downloadffmpeg-395f2e70dd26524cb82d412cb938ded508df4d42.tar.gz
dsputil: use movups instead of movdqu in ff_emu_edge_core_sse()
This allows emulated_edge_mc_sse() and gmc_sse() to be used under AV_CPU_FLAG_SSE.
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/dsputil_mmx.c8
-rw-r--r--libavcodec/x86/dsputil_yasm.asm6
2 files changed, 7 insertions, 7 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index f0de05a763..104bd7595f 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2874,6 +2874,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if HAVE_YASM
c->scalarproduct_float = ff_scalarproduct_float_sse;
c->butterflies_float_interleave = ff_butterflies_float_interleave_sse;
+
+ if (!high_bit_depth)
+ c->emulated_edge_mc = emulated_edge_mc_sse;
+ c->gmc = gmc_sse;
#endif
}
if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW))
@@ -2894,10 +2898,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->apply_window_int16 = ff_apply_window_int16_sse2;
}
}
-
- if (!high_bit_depth)
- c->emulated_edge_mc = emulated_edge_mc_sse;
- c->gmc= gmc_sse;
#endif
}
if (mm_flags & AV_CPU_FLAG_SSSE3) {
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index f2894cd501..8723a7e0b0 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -637,7 +637,7 @@ cglobal emu_edge_core_%1, 2, 7, 0
%ifnidn %3, mmx
%rep %2/16
- movdqu xmm %+ %%sxidx, [r1+%%src_off]
+ movups xmm %+ %%sxidx, [r1+%%src_off]
%assign %%src_off %%src_off+16
%assign %%sxidx %%sxidx+1
%endrep ; %2/16
@@ -686,7 +686,7 @@ cglobal emu_edge_core_%1, 2, 7, 0
%ifnidn %3, mmx
%rep %2/16
- movdqu [r0+%%dst_off], xmm %+ %%dxidx
+ movups [r0+%%dst_off], xmm %+ %%dxidx
%assign %%dst_off %%dst_off+16
%assign %%dxidx %%dxidx+1
%endrep ; %2/16
@@ -915,7 +915,7 @@ ALIGN 64
%define linesize r2m
V_COPY_NPX %1, mm0, movq, 8, 0xFFFFFFF8
%else ; !mmx
- V_COPY_NPX %1, xmm0, movdqu, 16, 0xFFFFFFF0
+ V_COPY_NPX %1, xmm0, movups, 16, 0xFFFFFFF0
%ifdef ARCH_X86_64
%define linesize r2
V_COPY_NPX %1, rax , mov, 8