aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2002-09-15 10:02:15 +0000
committerMichael Niedermayer <michaelni@gmx.at>2002-09-15 10:02:15 +0000
commit8f2b21a80f80eb76b93be5bb96eb2ff113ae3c49 (patch)
treec17ea18bb7307f7674a7fc6c0a7df2bee6950ee0
parent5596c60cceaee5f59ea67d71601f86c49b21d4fa (diff)
downloadffmpeg-8f2b21a80f80eb76b93be5bb96eb2ff113ae3c49.tar.gz
fixing memalign
Originally committed as revision 949 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/mem.c22
1 files changed, 20 insertions, 2 deletions
diff --git a/libavcodec/mem.c b/libavcodec/mem.c
index 113e285e7c..5799c07744 100644
--- a/libavcodec/mem.c
+++ b/libavcodec/mem.c
@@ -29,8 +29,8 @@
void *av_malloc(int size)
{
void *ptr;
-#if defined ( ARCH_X86 ) && defined ( HAVE_MEMALIGN ) && 0
- ptr = memalign(64,size);
+#if defined (HAVE_MEMALIGN)
+ ptr = memalign(16,size);
/* Why 64?
Indeed, we should align it:
on 4 for 386
@@ -40,11 +40,29 @@ void *av_malloc(int size)
Because L1 and L2 caches are aligned on those values.
But I don't want to code such logic here!
*/
+ /* Why 16?
+ because some cpus need alignment, for example SSE2 on P4, & most RISC cpus
+ it will just trigger an exception and the unaligned load will be done in the
+ exception handler or it will just segfault (SSE2 on P4)
+ Why not larger? because i didnt see a difference in benchmarks ...
+ */
+ /* benchmarks with p3
+ memalign(64)+1 3071,3051,3032
+ memalign(64)+2 3051,3032,3041
+ memalign(64)+4 2911,2896,2915
+ memalign(64)+8 2545,2554,2550
+ memalign(64)+16 2543,2572,2563
+ memalign(64)+32 2546,2545,2571
+ memalign(64)+64 2570,2533,2558
+
+ btw, malloc seems to do 8 byte alignment by default here
+ */
#else
ptr = malloc(size);
#endif
if (!ptr)
return NULL;
+//fprintf(stderr, "%X %d\n", (int)ptr, size);
/* NOTE: this memset should not be present */
memset(ptr, 0, size);
return ptr;