diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2010-02-24 20:43:06 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2010-02-24 20:43:06 +0000 |
commit | b5bd070029a2091e16641c6be18c5ec101ba0480 (patch) | |
tree | ac60dfe1f32265d875f630ca124a9bccff84c678 /libavcodec/h264.h | |
parent | 8ef4e65e94caad90b2d63ff421c8a8155045cb15 (diff) | |
download | ffmpeg-b5bd070029a2091e16641c6be18c5ec101ba0480.tar.gz |
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
for high resolution videos.
about 20cycles faster per MB for cathederal.
Originally committed as revision 22038 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/h264.h')
-rw-r--r-- | libavcodec/h264.h | 51 |
1 files changed, 29 insertions, 22 deletions
diff --git a/libavcodec/h264.h b/libavcodec/h264.h index dcfa731d95..c73cc01889 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -486,8 +486,8 @@ typedef struct H264Context{ /* chroma_pred_mode for i4x4 or i16x16, else 0 */ uint8_t *chroma_pred_mode_table; int last_qscale_diff; - int16_t (*mvd_table[2])[2]; - DECLARE_ALIGNED_16(int16_t, mvd_cache)[2][5*8][2]; + uint8_t (*mvd_table[2])[2]; + DECLARE_ALIGNED_16(uint8_t, mvd_cache)[2][5*8][2]; uint8_t *direct_table; uint8_t direct_cache[5*8]; @@ -732,6 +732,14 @@ static av_always_inline uint32_t pack16to32(int a, int b){ #endif } +static av_always_inline uint16_t pack8to16(int a, int b){ +#if HAVE_BIGENDIAN + return (b&0xFF) + (a<<8); +#else + return (a&0xFF) + (b<<8); +#endif +} + /** * gets the chroma qp. */ @@ -1060,32 +1068,31 @@ static void fill_decode_caches(H264Context *h, int mb_type){ /* XXX beurk, Load mvd */ if(USES_LIST(top_type, list)){ const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; - AV_COPY128(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]); + AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]); }else{ - AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]); + AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]); } if(USES_LIST(left_type[0], list)){ const int b_xy= h->mb2b_xy[left_xy[0]] + 3; - AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]); - AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]); + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]); + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]); }else{ - AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 0*8]); - AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 1*8]); + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]); + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]); } if(USES_LIST(left_type[1], list)){ const int b_xy= h->mb2b_xy[left_xy[1]] + 3; - AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]); - AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]); + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]); + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]); }else{ - AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 2*8]); - AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 3*8]); + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]); + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]); } - AV_ZERO32(h->mvd_cache [list][scan8[5 ]+1]); - AV_ZERO32(h->mvd_cache [list][scan8[7 ]+1]); - AV_ZERO32(h->mvd_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else) - AV_ZERO32(h->mvd_cache [list][scan8[4 ]]); - AV_ZERO32(h->mvd_cache [list][scan8[12]]); - + AV_ZERO16(h->mvd_cache [list][scan8[5 ]+1]); + AV_ZERO16(h->mvd_cache [list][scan8[7 ]+1]); + AV_ZERO16(h->mvd_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else) + AV_ZERO16(h->mvd_cache [list][scan8[4 ]]); + AV_ZERO16(h->mvd_cache [list][scan8[12]]); if(h->slice_type_nos == FF_B_TYPE){ fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1); @@ -1414,13 +1421,13 @@ static inline void write_back_motion(H264Context *h, int mb_type){ AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y); } if( CABAC ) { - int16_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy]; - int16_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; + uint8_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy]; + uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; if(IS_SKIP(mb_type)) - fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 4); + fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 2); else for(y=0; y<4; y++){ - AV_COPY128(mvd_dst + y*b_stride, mvd_src + 8*y); + AV_COPY64(mvd_dst + y*b_stride, mvd_src + 8*y); } } |