aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/h264.h
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2010-02-24 20:43:06 +0000
committerMichael Niedermayer <michaelni@gmx.at>2010-02-24 20:43:06 +0000
commitb5bd070029a2091e16641c6be18c5ec101ba0480 (patch)
treeac60dfe1f32265d875f630ca124a9bccff84c678 /libavcodec/h264.h
parent8ef4e65e94caad90b2d63ff421c8a8155045cb15 (diff)
downloadffmpeg-b5bd070029a2091e16641c6be18c5ec101ba0480.tar.gz
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
for high resolution videos. about 20cycles faster per MB for cathederal. Originally committed as revision 22038 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/h264.h')
-rw-r--r--libavcodec/h264.h51
1 files changed, 29 insertions, 22 deletions
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index dcfa731d95..c73cc01889 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -486,8 +486,8 @@ typedef struct H264Context{
/* chroma_pred_mode for i4x4 or i16x16, else 0 */
uint8_t *chroma_pred_mode_table;
int last_qscale_diff;
- int16_t (*mvd_table[2])[2];
- DECLARE_ALIGNED_16(int16_t, mvd_cache)[2][5*8][2];
+ uint8_t (*mvd_table[2])[2];
+ DECLARE_ALIGNED_16(uint8_t, mvd_cache)[2][5*8][2];
uint8_t *direct_table;
uint8_t direct_cache[5*8];
@@ -732,6 +732,14 @@ static av_always_inline uint32_t pack16to32(int a, int b){
#endif
}
+static av_always_inline uint16_t pack8to16(int a, int b){
+#if HAVE_BIGENDIAN
+ return (b&0xFF) + (a<<8);
+#else
+ return (a&0xFF) + (b<<8);
+#endif
+}
+
/**
* gets the chroma qp.
*/
@@ -1060,32 +1068,31 @@ static void fill_decode_caches(H264Context *h, int mb_type){
/* XXX beurk, Load mvd */
if(USES_LIST(top_type, list)){
const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
- AV_COPY128(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]);
+ AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]);
}else{
- AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
+ AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
}
if(USES_LIST(left_type[0], list)){
const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
- AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]);
- AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]);
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]);
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]);
}else{
- AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 0*8]);
- AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 1*8]);
+ AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]);
+ AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]);
}
if(USES_LIST(left_type[1], list)){
const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
- AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]);
- AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]);
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]);
+ AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]);
}else{
- AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 2*8]);
- AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 3*8]);
+ AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]);
+ AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]);
}
- AV_ZERO32(h->mvd_cache [list][scan8[5 ]+1]);
- AV_ZERO32(h->mvd_cache [list][scan8[7 ]+1]);
- AV_ZERO32(h->mvd_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else)
- AV_ZERO32(h->mvd_cache [list][scan8[4 ]]);
- AV_ZERO32(h->mvd_cache [list][scan8[12]]);
-
+ AV_ZERO16(h->mvd_cache [list][scan8[5 ]+1]);
+ AV_ZERO16(h->mvd_cache [list][scan8[7 ]+1]);
+ AV_ZERO16(h->mvd_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else)
+ AV_ZERO16(h->mvd_cache [list][scan8[4 ]]);
+ AV_ZERO16(h->mvd_cache [list][scan8[12]]);
if(h->slice_type_nos == FF_B_TYPE){
fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1);
@@ -1414,13 +1421,13 @@ static inline void write_back_motion(H264Context *h, int mb_type){
AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y);
}
if( CABAC ) {
- int16_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy];
- int16_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
+ uint8_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy];
+ uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
if(IS_SKIP(mb_type))
- fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 4);
+ fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 2);
else
for(y=0; y<4; y++){
- AV_COPY128(mvd_dst + y*b_stride, mvd_src + 8*y);
+ AV_COPY64(mvd_dst + y*b_stride, mvd_src + 8*y);
}
}