lavr: handle clipping in the float to s32 conversion

We cannot clip to INT_MAX because that value cannot be exactly represented by a float value and ends up overflowing during conversion anyway. We need to use a slightly smaller float value, which ends up with slightly inaccurate results for samples which clip or nearly clip, but it is close enough. Using doubles as intermediates in the conversion would be more accurate, but it takes about twice as much time. Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
author: Justin Ruggles <justin.ruggles@gmail.com> 2012-10-12 15:46:53 -0400
committer: Luca Barbato <lu_zero@gentoo.org> 2012-10-13 12:34:34 +0200
commit: 10e645e9cb06dc87074232740d4366db18e7ba56 (patch)
tree: bbb634ed7150d0d959c203e1881569f1ea809960 /libavresample
parent: 92281850a2d878dae1d50e271886ba87013b6ff3 (diff)
download: ffmpeg-10e645e9cb06dc87074232740d4366db18e7ba56.tar.gz
1 files changed, 7 insertions, 1 deletions
diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm
index 2ebdbc1ec0..25166afff2 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -28,6 +28,7 @@ SECTION_RODATA 32
 
 pf_s32_inv_scale: times 8 dd 0x30000000
 pf_s32_scale:     times 8 dd 0x4f000000
+pf_s32_clip:      times 8 dd 0x4effffff
 pf_s16_inv_scale: times 4 dd 0x38000000
 pf_s16_scale:     times 4 dd 0x47000000
 pb_shuf_unpack_even:      db -1, -1,  0,  1, -1, -1,  2,  3, -1, -1,  8,  9, -1, -1, 10, 11
@@ -197,17 +198,22 @@ cglobal conv_flt_to_s16, 3,3,5, dst, src, len
 ;------------------------------------------------------------------------------
 
 %macro CONV_FLT_TO_S32 0
-cglobal conv_flt_to_s32, 3,3,5, dst, src, len
+cglobal conv_flt_to_s32, 3,3,6, dst, src, len
     lea     lenq, [lend*4]
     add     srcq, lenq
     add     dstq, lenq
     neg     lenq
     mova      m4, [pf_s32_scale]
+    mova      m5, [pf_s32_clip]
 .loop:
     mulps     m0, m4, [srcq+lenq         ]
     mulps     m1, m4, [srcq+lenq+1*mmsize]
     mulps     m2, m4, [srcq+lenq+2*mmsize]
     mulps     m3, m4, [srcq+lenq+3*mmsize]
+    minps     m0, m0, m5
+    minps     m1, m1, m5
+    minps     m2, m2, m5
+    minps     m3, m3, m5
     cvtps2dq  m0, m0
     cvtps2dq  m1, m1
     cvtps2dq  m2, m2
author	Justin Ruggles <justin.ruggles@gmail.com>	2012-10-12 15:46:53 -0400
committer	Luca Barbato <lu_zero@gentoo.org>	2012-10-13 12:34:34 +0200
commit	10e645e9cb06dc87074232740d4366db18e7ba56 (patch)
tree	bbb634ed7150d0d959c203e1881569f1ea809960 /libavresample
parent	92281850a2d878dae1d50e271886ba87013b6ff3 (diff)
download	ffmpeg-10e645e9cb06dc87074232740d4366db18e7ba56.tar.gz