diff options
author | James Almer <jamrial@gmail.com> | 2017-07-04 15:05:47 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-07-04 23:02:24 -0300 |
commit | bcbe9e444790c6ac299aa01958dcb7e9ac70fc82 (patch) | |
tree | 86f1fd3a9c8dd2bc132a95b2a00b9a1ddc4dee11 /libavcodec/x86 | |
parent | 440285474bb894ae4bd5717ae0470fd3601bc977 (diff) | |
download | ffmpeg-bcbe9e444790c6ac299aa01958dcb7e9ac70fc82.tar.gz |
x86/sbrdsp: zero extend m_max in apply_noise_main
Tested-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/sbrdsp.asm | 28 |
1 files changed, 14 insertions, 14 deletions
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm index c716184b14..62bbe512ec 100644 --- a/libavcodec/x86/sbrdsp.asm +++ b/libavcodec/x86/sbrdsp.asm @@ -378,24 +378,24 @@ cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max apply_noise_main: %if ARCH_X86_64 == 0 || WIN64 mov kxd, m_maxm -%define count kxq + DEFINE_ARGS Y, s_m, q_filt, noise, count %else -%define count m_maxq + DEFINE_ARGS Y, s_m, q_filt, noise, kx, count %endif movsxdifnidn noiseq, noised dec noiseq - shl count, 2 + shl countd, 2 %ifdef PIC lea NOISE_TABLE, [sbr_noise_table] %endif - lea Yq, [Yq + 2*count] - add s_mq, count - add q_filtq, count + lea Yq, [Yq + 2*countq] + add s_mq, countq + add q_filtq, countq shl noiseq, 3 pxor m5, m5 - neg count + neg countq .loop: - mova m1, [q_filtq + count] + mova m1, [q_filtq + countq] movu m3, [noiseq + NOISE_TABLE + 1*mmsize] movu m4, [noiseq + NOISE_TABLE + 2*mmsize] add noiseq, 2*mmsize @@ -404,7 +404,7 @@ apply_noise_main: punpckldq m1, m1 mulps m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise] mulps m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise] - mova m3, [s_mq + count] + mova m3, [s_mq + countq] ; TODO: replace by a vpermd in AVX2 punpckhdq m4, m3, m3 punpckldq m3, m3 @@ -414,15 +414,15 @@ apply_noise_main: mulps m4, m0 ; s_m[m] * phi_sign pand m1, m6 pand m2, m7 - movu m6, [Yq + 2*count] - movu m7, [Yq + 2*count + mmsize] + movu m6, [Yq + 2*countq] + movu m7, [Yq + 2*countq + mmsize] addps m3, m1 addps m4, m2 addps m6, m3 addps m7, m4 - movu [Yq + 2*count], m6 - movu [Yq + 2*count + mmsize], m7 - add count, mmsize + movu [Yq + 2*countq], m6 + movu [Yq + 2*countq + mmsize], m7 + add countq, mmsize jl .loop RET |