diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2008-02-21 07:10:46 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2008-02-21 07:10:46 +0000 |
commit | 4a9ca0a279375e8dae774deef0c22b437935db20 (patch) | |
tree | 051ff058e0135aeacb61f6e7321d15a0ff3cfa7a /libavcodec/dsputil.c | |
parent | 1435e4ccdeb913e5d32ce814617c18379a3d1ecc (diff) | |
download | ffmpeg-4a9ca0a279375e8dae774deef0c22b437935db20.tar.gz |
simd and unroll png_filter_row
cycles per 1000 pixels on core2:
left: 9211->5170
top: 9283->2138
avg: 12215->7611
paeth: 64024->17360
overall rgb png decoding speed: +45%
overall greyscale png decoding speed: +6%
Originally committed as revision 12164 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/dsputil.c')
-rw-r--r-- | libavcodec/dsputil.c | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index abd588bec5..c9b02a0542 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -44,6 +44,9 @@ void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); /* flacenc.c */ void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc); +/* pngdec.c */ +void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); + uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; uint32_t ff_squareTbl[512] = {0, }; @@ -3288,6 +3291,17 @@ static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ dst[i+0] += src[i+0]; } +static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ + int i; + for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ + long a = *(long*)(src1+i); + long b = *(long*)(src2+i); + *(long*)(dst+i) = ((a&0x7f7f7f7f7f7f7f7fL) + (b&0x7f7f7f7f7f7f7f7fL)) ^ ((a^b)&0x8080808080808080L); + } + for(; i<w; i++) + dst[i] = src1[i]+src2[i]; +} + static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ int i; for(i=0; i+7<w; i+=8){ @@ -4232,9 +4246,13 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; c->add_bytes= add_bytes_c; + c->add_bytes_l2= add_bytes_l2_c; c->diff_bytes= diff_bytes_c; c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; c->bswap_buf= bswap_buf; +#ifdef CONFIG_PNG_DECODER + c->add_png_paeth_prediction= ff_add_png_paeth_prediction; +#endif c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; |