aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/dsputil_yasm.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-08-04 22:39:25 +0200
committerMichael Niedermayer <michaelni@gmx.at>2012-08-04 23:51:43 +0200
commite776ee8f294984f7643a3c45db803c7266e1edfd (patch)
treea1fd00ab7a0760ec0f2848aed9dc3f79d889e816 /libavcodec/x86/dsputil_yasm.asm
parent88fc1438c693ffb7793aeb111d89775440491840 (diff)
parent8821ae649e61097ec57ca58472c3e4239c82913c (diff)
downloadffmpeg-e776ee8f294984f7643a3c45db803c7266e1edfd.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: lavr: fix handling of custom mix matrices fate: force pix_fmt in lagarith-rgb32 test fate: add tests for lagarith lossless video codec. ARMv6: vp8: fix stack allocation with Apple's assembler ARM: vp56: allow inline asm to build with clang fft: 3dnow: fix register name typo in DECL_IMDCT macro x86: dct32: port to cpuflags x86: build: replace mmx2 by mmxext Revert "wmapro: prevent division by zero when sample rate is unspecified" wmapro: prevent division by zero when sample rate is unspecified lagarith: fix color plane inversion for YUY2 output. lagarith: pad RGB buffer by 1 byte. dsputil: make add_hfyu_left_prediction_sse4() support unaligned src. Conflicts: doc/APIchanges libavcodec/lagarith.c libavfilter/x86/gradfun.c libavutil/cpu.h libavutil/version.h libswscale/utils.c libswscale/version.h libswscale/x86/yuv2rgb.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/dsputil_yasm.asm')
-rw-r--r--libavcodec/x86/dsputil_yasm.asm20
1 files changed, 13 insertions, 7 deletions
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 5a6c3d1eae..06d2027c69 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -388,12 +388,16 @@ cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_to
RET
-%macro ADD_HFYU_LEFT_LOOP 1 ; %1 = is_aligned
+%macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
add srcq, wq
add dstq, wq
neg wq
%%.loop:
+%if %2
mova m1, [srcq+wq]
+%else
+ movu m1, [srcq+wq]
+%endif
mova m2, m1
psllw m1, 8
paddb m1, m2
@@ -435,7 +439,7 @@ cglobal add_hfyu_left_prediction_ssse3, 3,3,7, dst, src, w, left
mova m3, [pb_zz11zz55zz99zzdd]
movd m0, leftm
psllq m0, 56
- ADD_HFYU_LEFT_LOOP 1
+ ADD_HFYU_LEFT_LOOP 1, 1
INIT_XMM
cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left
@@ -446,12 +450,14 @@ cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left
movd m0, leftm
pslldq m0, 15
test srcq, 15
- jnz add_hfyu_left_prediction_ssse3.skip_prologue
+ jnz .src_unaligned
test dstq, 15
- jnz .unaligned
- ADD_HFYU_LEFT_LOOP 1
-.unaligned:
- ADD_HFYU_LEFT_LOOP 0
+ jnz .dst_unaligned
+ ADD_HFYU_LEFT_LOOP 1, 1
+.dst_unaligned:
+ ADD_HFYU_LEFT_LOOP 0, 1
+.src_unaligned:
+ ADD_HFYU_LEFT_LOOP 0, 0
; float scalarproduct_float_sse(const float *v1, const float *v2, int len)