diff options
author | Clément Bœsch <u@pkh.me> | 2014-01-23 07:58:09 +0100 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2014-01-25 11:59:43 +0100 |
commit | cddbfd2a95540084fdf660b46fe755255f05d4df (patch) | |
tree | c4447cf2cca55aa755f87a965cceec096250cd81 /libavcodec/x86 | |
parent | 78a9f185eb175e6164b1c0f40d20ff1933ac8fb7 (diff) | |
download | ffmpeg-cddbfd2a95540084fdf660b46fe755255f05d4df.tar.gz |
x86/lossless_videodsp: simplify and explicit aligned/unaligned flags
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/lossless_videodsp.asm | 82 |
1 files changed, 27 insertions, 55 deletions
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm index a1869b3119..59ca733dda 100644 --- a/libavcodec/x86/lossless_videodsp.asm +++ b/libavcodec/x86/lossless_videodsp.asm @@ -31,7 +31,7 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7 SECTION_TEXT -%macro ADD_INT16_LOOP 1 ; %1 = is_aligned +%macro ADD_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned) movd m4, maskd SPLATW m4, m4 add wq, wq @@ -51,28 +51,16 @@ SECTION_TEXT neg wq jz %%.end %%.loop: -%if %1 - mova m0, [srcq+wq] - mova m1, [dstq+wq] - mova m2, [srcq+wq+mmsize] - mova m3, [dstq+wq+mmsize] -%else - movu m0, [srcq+wq] - movu m1, [dstq+wq] - movu m2, [srcq+wq+mmsize] - movu m3, [dstq+wq+mmsize] -%endif + mov%1 m0, [srcq+wq] + mov%1 m1, [dstq+wq] + mov%1 m2, [srcq+wq+mmsize] + mov%1 m3, [dstq+wq+mmsize] paddw m0, m1 paddw m2, m3 pand m0, m4 pand m2, m4 -%if %1 - mova [dstq+wq] , m0 - mova [dstq+wq+mmsize], m2 -%else - movu [dstq+wq] , m0 - movu [dstq+wq+mmsize], m2 -%endif + mov%1 [dstq+wq] , m0 + mov%1 [dstq+wq+mmsize], m2 add wq, 2*mmsize jl %%.loop %%.end: @@ -81,7 +69,7 @@ SECTION_TEXT INIT_MMX mmx cglobal add_int16, 4,4,5, dst, src, mask, w - ADD_INT16_LOOP 1 + ADD_INT16_LOOP a INIT_XMM sse2 cglobal add_int16, 4,4,5, dst, src, mask, w @@ -89,11 +77,11 @@ cglobal add_int16, 4,4,5, dst, src, mask, w jnz .unaligned test dstq, mmsize-1 jnz .unaligned - ADD_INT16_LOOP 1 + ADD_INT16_LOOP a .unaligned: - ADD_INT16_LOOP 0 + ADD_INT16_LOOP u -%macro DIFF_INT16_LOOP 1 ; %1 = is_aligned +%macro DIFF_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned) movd m4, maskd SPLATW m4, m4 add wq, wq @@ -114,28 +102,16 @@ cglobal add_int16, 4,4,5, dst, src, mask, w neg wq jz %%.end %%.loop: -%if %1 - mova m0, [src1q+wq] - mova m1, [src2q+wq] - mova m2, [src1q+wq+mmsize] - mova m3, [src2q+wq+mmsize] -%else - movu m0, [src1q+wq] - movu m1, [src2q+wq] - movu m2, [src1q+wq+mmsize] - movu m3, [src2q+wq+mmsize] -%endif + mov%1 m0, [src1q+wq] + mov%1 m1, [src2q+wq] + mov%1 m2, [src1q+wq+mmsize] + mov%1 m3, [src2q+wq+mmsize] psubw m0, m1 psubw m2, m3 pand m0, m4 pand m2, m4 -%if %1 - mova [dstq+wq] , m0 - mova [dstq+wq+mmsize], m2 -%else - movu [dstq+wq] , m0 - movu [dstq+wq+mmsize], m2 -%endif + mov%1 [dstq+wq] , m0 + mov%1 [dstq+wq+mmsize], m2 add wq, 2*mmsize jl %%.loop %%.end: @@ -144,7 +120,7 @@ cglobal add_int16, 4,4,5, dst, src, mask, w INIT_MMX mmx cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w - DIFF_INT16_LOOP 1 + DIFF_INT16_LOOP a INIT_XMM sse2 cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w @@ -154,22 +130,18 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w jnz .unaligned test dstq, mmsize-1 jnz .unaligned - DIFF_INT16_LOOP 1 + DIFF_INT16_LOOP a .unaligned: - DIFF_INT16_LOOP 0 + DIFF_INT16_LOOP u -%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst_is_aligned, %2 = src_is_aligned +%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u) add wq, wq add srcq, wq add dstq, wq neg wq %%.loop: -%if %2 - mova m1, [srcq+wq] -%else - movu m1, [srcq+wq] -%endif + mov%2 m1, [srcq+wq] mova m2, m1 pslld m1, 16 paddw m1, m2 @@ -185,7 +157,7 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w %endif paddw m0, m1 pand m0, m7 -%if %1 +%ifidn %1, a mova [dstq+wq], m0 %else movq [dstq+wq], m0 @@ -214,7 +186,7 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left psllq m0, 48 movd m7, maskm SPLATW m7 ,m7 - ADD_HFYU_LEFT_LOOP_INT16 1, 1 + ADD_HFYU_LEFT_LOOP_INT16 a, a INIT_XMM sse4 cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left @@ -229,11 +201,11 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left jnz .src_unaligned test dstq, 15 jnz .dst_unaligned - ADD_HFYU_LEFT_LOOP_INT16 1, 1 + ADD_HFYU_LEFT_LOOP_INT16 a, a .dst_unaligned: - ADD_HFYU_LEFT_LOOP_INT16 0, 1 + ADD_HFYU_LEFT_LOOP_INT16 u, a .src_unaligned: - ADD_HFYU_LEFT_LOOP_INT16 0, 0 + ADD_HFYU_LEFT_LOOP_INT16 u, u ; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) INIT_MMX mmxext |