diff options
author | Falk Hüffner <mellum@users.sourceforge.net> | 2003-01-19 12:06:36 +0000 |
---|---|---|
committer | Falk Hüffner <mellum@users.sourceforge.net> | 2003-01-19 12:06:36 +0000 |
commit | 6b026927919097153e46b8abb51c4d3b89b827b7 (patch) | |
tree | 93686e3b8302215c690870c218660a3d8e08d0b4 /libavcodec/dsputil.c | |
parent | 1d0eab1dcc26344309f624a95d91f97d72f4b847 (diff) | |
download | ffmpeg-6b026927919097153e46b8abb51c4d3b89b827b7.tar.gz |
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
uint32 array index -> segv), so let's just use a nice plain
unobfuscated version, which also happens to be faster for me.
Originally committed as revision 1470 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/dsputil.c')
-rw-r--r-- | libavcodec/dsputil.c | 72 |
1 files changed, 22 insertions, 50 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index efdb438c9b..3443d0342d 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -191,60 +191,32 @@ static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) return s; } -static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) +static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) { - int s, i, j; - UINT32 *sq = squareTbl + 256; + int s, i; + uint32_t *sq = squareTbl + 256; s = 0; for (i = 0; i < 16; i++) { - for (j = 0; j < 16; j += 8) { -#if 1 -#if LONG_MAX > 2147483647 - uint64_t x,y; - x=*(uint64_t*)pix1; - y=*(uint64_t*)pix2; - - s += sq[(x&0xff) - (y&0xff)]; - s += sq[((x>>8)&0xff) - ((y>>8)&0xff)]; - s += sq[((x>>16)&0xff) - ((y>>16)&0xff)]; - s += sq[((x>>24)&0xff) - ((y>>24)&0xff)]; - s += sq[((x>>32)&0xff) - ((y>>32)&0xff)]; - s += sq[((x>>40)&0xff) - ((y>>40)&0xff)]; - s += sq[((x>>48)&0xff) - ((y>>48)&0xff)]; - s += sq[((x>>56)&0xff) - ((y>>56)&0xff)]; -#else - uint32_t x,y; - x=*(uint32_t*)pix1; - y=*(uint32_t*)pix2; - - s += sq[(x&0xff) - (y&0xff)]; - s += sq[((x>>8)&0xff) - ((y>>8)&0xff)]; - s += sq[((x>>16)&0xff) - ((y>>16)&0xff)]; - s += sq[((x>>24)&0xff) - ((y>>24)&0xff)]; - - x=*(uint32_t*)(pix1+4); - y=*(uint32_t*)(pix2+4); - s += sq[(x&0xff) - (y&0xff)]; - s += sq[((x>>8)&0xff) - ((y>>8)&0xff)]; - s += sq[((x>>16)&0xff) - ((y>>16)&0xff)]; - s += sq[((x>>24)&0xff) - ((y>>24)&0xff)]; -#endif -#else - s += sq[pix1[0] - pix2[0]]; - s += sq[pix1[1] - pix2[1]]; - s += sq[pix1[2] - pix2[2]]; - s += sq[pix1[3] - pix2[3]]; - s += sq[pix1[4] - pix2[4]]; - s += sq[pix1[5] - pix2[5]]; - s += sq[pix1[6] - pix2[6]]; - s += sq[pix1[7] - pix2[7]]; -#endif - pix1 += 8; - pix2 += 8; - } - pix1 += line_size - 16; - pix2 += line_size - 16; + s += sq[pix1[ 0] - pix2[ 0]]; + s += sq[pix1[ 1] - pix2[ 1]]; + s += sq[pix1[ 2] - pix2[ 2]]; + s += sq[pix1[ 3] - pix2[ 3]]; + s += sq[pix1[ 4] - pix2[ 4]]; + s += sq[pix1[ 5] - pix2[ 5]]; + s += sq[pix1[ 6] - pix2[ 6]]; + s += sq[pix1[ 7] - pix2[ 7]]; + s += sq[pix1[ 8] - pix2[ 8]]; + s += sq[pix1[ 9] - pix2[ 9]]; + s += sq[pix1[10] - pix2[10]]; + s += sq[pix1[11] - pix2[11]]; + s += sq[pix1[12] - pix2[12]]; + s += sq[pix1[13] - pix2[13]]; + s += sq[pix1[14] - pix2[14]]; + s += sq[pix1[15] - pix2[15]]; + + pix1 += line_size; + pix2 += line_size; } return s; } |