diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2010-01-13 01:59:19 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2010-01-13 01:59:19 +0000 |
commit | e1e949026ec552b206306da00bf90fc797542fe5 (patch) | |
tree | e96e47a073028b429b9ca3ced39a20010df5d567 /libavcodec/h264.h | |
parent | 08f8b51f697edddfb6b66856c0d3343666e5578f (diff) | |
download | ffmpeg-e1e949026ec552b206306da00bf90fc797542fe5.tar.gz |
Split cavlc out of h264.c.
Seems to speed the code up a little...
The placement of many generic functions between h264.c and h264.h is still open
Currently they are a little randomly placed between them.
Originally committed as revision 21178 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/h264.h')
-rw-r--r-- | libavcodec/h264.h | 596 |
1 files changed, 596 insertions, 0 deletions
diff --git a/libavcodec/h264.h b/libavcodec/h264.h index 1c4c956056..703cf8f534 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -32,6 +32,7 @@ #include "cabac.h" #include "mpegvideo.h" #include "h264pred.h" +#include "rectangle.h" #define interlaced_dct interlaced_dct_is_a_bad_name #define mb_intra mb_intra_is_not_initialized_see_mb_type @@ -642,6 +643,13 @@ void ff_h264_hl_decode_mb(H264Context *h); int ff_h264_frame_start(H264Context *h); av_cold int ff_h264_decode_init(AVCodecContext *avctx); av_cold int ff_h264_decode_end(AVCodecContext *avctx); +av_cold void ff_h264_decode_init_vlc(void); + +/** + * decodes a macroblock + * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed + */ +int ff_h264_decode_mb_cavlc(H264Context *h); void ff_h264_direct_dist_scale_factor(H264Context * const h); void ff_h264_direct_ref_list_init(H264Context * const h); @@ -694,4 +702,592 @@ static inline int get_chroma_qp(H264Context *h, int t, int qscale){ return h->pps.chroma_qp_table[t][qscale]; } +static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my); + +static void fill_caches(H264Context *h, int mb_type, int for_deblock){ + MpegEncContext * const s = &h->s; + const int mb_xy= h->mb_xy; + int topleft_xy, top_xy, topright_xy, left_xy[2]; + int topleft_type, top_type, topright_type, left_type[2]; + const uint8_t * left_block; + int topleft_partition= -1; + int i; + static const uint8_t left_block_options[4][8]={ + {0,1,2,3,7,10,8,11}, + {2,2,3,3,8,11,8,11}, + {0,0,1,1,7,10,7,10}, + {0,2,0,2,7,10,7,10} + }; + + top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE); + + //FIXME deblocking could skip the intra and nnz parts. + if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF) + return; + + /* Wow, what a mess, why didn't they simplify the interlacing & intra + * stuff, I can't imagine that these complex rules are worth it. */ + + topleft_xy = top_xy - 1; + topright_xy= top_xy + 1; + left_xy[1] = left_xy[0] = mb_xy-1; + left_block = left_block_options[0]; + if(FRAME_MBAFF){ + const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; + const int top_pair_xy = pair_xy - s->mb_stride; + const int topleft_pair_xy = top_pair_xy - 1; + const int topright_pair_xy = top_pair_xy + 1; + const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]); + const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); + const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]); + const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); + const int curr_mb_field_flag = IS_INTERLACED(mb_type); + const int bottom = (s->mb_y & 1); + tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag); + + if (curr_mb_field_flag && (bottom || top_mb_field_flag)){ + top_xy -= s->mb_stride; + } + if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){ + topleft_xy -= s->mb_stride; + } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) { + topleft_xy += s->mb_stride; + // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition + topleft_partition = 0; + } + if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){ + topright_xy -= s->mb_stride; + } + if (left_mb_field_flag != curr_mb_field_flag) { + left_xy[1] = left_xy[0] = pair_xy - 1; + if (curr_mb_field_flag) { + left_xy[1] += s->mb_stride; + left_block = left_block_options[3]; + } else { + left_block= left_block_options[2 - bottom]; + } + } + } + + h->top_mb_xy = top_xy; + h->left_mb_xy[0] = left_xy[0]; + h->left_mb_xy[1] = left_xy[1]; + if(for_deblock){ + topleft_type = 0; + topright_type = 0; + top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0; + left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0; + left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0; + + if(MB_MBAFF && !IS_INTRA(mb_type)){ + int list; + for(list=0; list<h->list_count; list++){ + //These values where changed for ease of performing MC, we need to change them back + //FIXME maybe we can make MC and loop filter use the same values or prevent + //the MC code from changing ref_cache and rather use a temporary array. + if(USES_LIST(mb_type,list)){ + int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; + *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = + *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101; + ref += h->b8_stride; + *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = + *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101; + } + } + } + }else{ + topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; + top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; + topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; + left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; + left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; + + if(IS_INTRA(mb_type)){ + int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; + h->topleft_samples_available= + h->top_samples_available= + h->left_samples_available= 0xFFFF; + h->topright_samples_available= 0xEEEA; + + if(!(top_type & type_mask)){ + h->topleft_samples_available= 0xB3FF; + h->top_samples_available= 0x33FF; + h->topright_samples_available= 0x26EA; + } + if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){ + if(IS_INTERLACED(mb_type)){ + if(!(left_type[0] & type_mask)){ + h->topleft_samples_available&= 0xDFFF; + h->left_samples_available&= 0x5FFF; + } + if(!(left_type[1] & type_mask)){ + h->topleft_samples_available&= 0xFF5F; + h->left_samples_available&= 0xFF5F; + } + }else{ + int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num + ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0; + assert(left_xy[0] == left_xy[1]); + if(!((left_typei & type_mask) && (left_type[0] & type_mask))){ + h->topleft_samples_available&= 0xDF5F; + h->left_samples_available&= 0x5F5F; + } + } + }else{ + if(!(left_type[0] & type_mask)){ + h->topleft_samples_available&= 0xDF5F; + h->left_samples_available&= 0x5F5F; + } + } + + if(!(topleft_type & type_mask)) + h->topleft_samples_available&= 0x7FFF; + + if(!(topright_type & type_mask)) + h->topright_samples_available&= 0xFBFF; + + if(IS_INTRA4x4(mb_type)){ + if(IS_INTRA4x4(top_type)){ + h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4]; + h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5]; + h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6]; + h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3]; + }else{ + int pred; + if(!(top_type & type_mask)) + pred= -1; + else{ + pred= 2; + } + h->intra4x4_pred_mode_cache[4+8*0]= + h->intra4x4_pred_mode_cache[5+8*0]= + h->intra4x4_pred_mode_cache[6+8*0]= + h->intra4x4_pred_mode_cache[7+8*0]= pred; + } + for(i=0; i<2; i++){ + if(IS_INTRA4x4(left_type[i])){ + h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]]; + h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]]; + }else{ + int pred; + if(!(left_type[i] & type_mask)) + pred= -1; + else{ + pred= 2; + } + h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= + h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; + } + } + } + } + } + + +/* +0 . T T. T T T T +1 L . .L . . . . +2 L . .L . . . . +3 . T TL . . . . +4 L . .L . . . . +5 L . .. . . . . +*/ +//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) + if(top_type){ + h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4]; + h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5]; + h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6]; + h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3]; + + h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9]; + h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8]; + + h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12]; + h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11]; + + }else{ + h->non_zero_count_cache[4+8*0]= + h->non_zero_count_cache[5+8*0]= + h->non_zero_count_cache[6+8*0]= + h->non_zero_count_cache[7+8*0]= + + h->non_zero_count_cache[1+8*0]= + h->non_zero_count_cache[2+8*0]= + + h->non_zero_count_cache[1+8*3]= + h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; + + } + + for (i=0; i<2; i++) { + if(left_type[i]){ + h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]]; + h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]]; + h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]]; + h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]]; + }else{ + h->non_zero_count_cache[3+8*1 + 2*8*i]= + h->non_zero_count_cache[3+8*2 + 2*8*i]= + h->non_zero_count_cache[0+8*1 + 8*i]= + h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; + } + } + + if( h->pps.cabac ) { + // top_cbp + if(top_type) { + h->top_cbp = h->cbp_table[top_xy]; + } else if(IS_INTRA(mb_type)) { + h->top_cbp = 0x1C0; + } else { + h->top_cbp = 0; + } + // left_cbp + if (left_type[0]) { + h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0; + } else if(IS_INTRA(mb_type)) { + h->left_cbp = 0x1C0; + } else { + h->left_cbp = 0; + } + if (left_type[0]) { + h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1; + } + if (left_type[1]) { + h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3; + } + } + +#if 1 + if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ + int list; + for(list=0; list<h->list_count; list++){ + if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){ + /*if(!h->mv_cache_clean[list]){ + memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? + memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); + h->mv_cache_clean[list]= 1; + }*/ + continue; + } + h->mv_cache_clean[list]= 0; + + if(USES_LIST(top_type, list)){ + const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; + const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; + *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; + *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1]; + *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2]; + *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3]; + h->ref_cache[list][scan8[0] + 0 - 1*8]= + h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; + h->ref_cache[list][scan8[0] + 2 - 1*8]= + h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; + }else{ + *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= + *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= + *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= + *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0; + *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; + } + + for(i=0; i<2; i++){ + int cache_idx = scan8[0] - 1 + i*2*8; + if(USES_LIST(left_type[i], list)){ + const int b_xy= h->mb2b_xy[left_xy[i]] + 3; + const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; + *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]; + *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]; + h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; + h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; + }else{ + *(uint32_t*)h->mv_cache [list][cache_idx ]= + *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; + h->ref_cache[list][cache_idx ]= + h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE; + } + } + + if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF)) + continue; + + if(USES_LIST(topleft_type, list)){ + const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride); + const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride); + *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; + h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; + }else{ + *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; + h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; + } + + if(USES_LIST(topright_type, list)){ + const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; + const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; + *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; + h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; + }else{ + *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; + h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; + } + + if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF) + continue; + + h->ref_cache[list][scan8[5 ]+1] = + h->ref_cache[list][scan8[7 ]+1] = + h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else) + h->ref_cache[list][scan8[4 ]] = + h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; + *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]= + *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]= + *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) + *(uint32_t*)h->mv_cache [list][scan8[4 ]]= + *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; + + if( h->pps.cabac ) { + /* XXX beurk, Load mvd */ + if(USES_LIST(top_type, list)){ + const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; + *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0]; + *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1]; + *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2]; + *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3]; + }else{ + *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]= + *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]= + *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]= + *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0; + } + if(USES_LIST(left_type[0], list)){ + const int b_xy= h->mb2b_xy[left_xy[0]] + 3; + *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; + *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; + }else{ + *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= + *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; + } + if(USES_LIST(left_type[1], list)){ + const int b_xy= h->mb2b_xy[left_xy[1]] + 3; + *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; + *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; + }else{ + *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= + *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0; + } + *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]= + *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]= + *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) + *(uint32_t*)h->mvd_cache [list][scan8[4 ]]= + *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0; + + if(h->slice_type_nos == FF_B_TYPE){ + fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1); + + if(IS_DIRECT(top_type)){ + *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101; + }else if(IS_8X8(top_type)){ + int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; + h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; + h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; + }else{ + *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0; + } + + if(IS_DIRECT(left_type[0])) + h->direct_cache[scan8[0] - 1 + 0*8]= 1; + else if(IS_8X8(left_type[0])) + h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)]; + else + h->direct_cache[scan8[0] - 1 + 0*8]= 0; + + if(IS_DIRECT(left_type[1])) + h->direct_cache[scan8[0] - 1 + 2*8]= 1; + else if(IS_8X8(left_type[1])) + h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)]; + else + h->direct_cache[scan8[0] - 1 + 2*8]= 0; + } + } + + if(FRAME_MBAFF){ +#define MAP_MVS\ + MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ + MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ + MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ + MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ + MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ + MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ + MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\ + MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\ + MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\ + MAP_F2F(scan8[0] - 1 + 3*8, left_type[1]) + if(MB_FIELD){ +#define MAP_F2F(idx, mb_type)\ + if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ + h->ref_cache[list][idx] <<= 1;\ + h->mv_cache[list][idx][1] /= 2;\ + h->mvd_cache[list][idx][1] /= 2;\ + } + MAP_MVS +#undef MAP_F2F + }else{ +#define MAP_F2F(idx, mb_type)\ + if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ + h->ref_cache[list][idx] >>= 1;\ + h->mv_cache[list][idx][1] <<= 1;\ + h->mvd_cache[list][idx][1] <<= 1;\ + } + MAP_MVS +#undef MAP_F2F + } + } + } + } +#endif + + h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); +} + +/** + * gets the predicted intra4x4 prediction mode. + */ +static inline int pred_intra_mode(H264Context *h, int n){ + const int index8= scan8[n]; + const int left= h->intra4x4_pred_mode_cache[index8 - 1]; + const int top = h->intra4x4_pred_mode_cache[index8 - 8]; + const int min= FFMIN(left, top); + + tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min); + + if(min<0) return DC_PRED; + else return min; +} + +static inline void write_back_non_zero_count(H264Context *h){ + const int mb_xy= h->mb_xy; + + h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1]; + h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2]; + h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3]; + h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4]; + h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4]; + h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4]; + h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4]; + + h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2]; + h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2]; + h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1]; + + h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5]; + h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5]; + h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4]; +} + +static inline void write_back_motion(H264Context *h, int mb_type){ + MpegEncContext * const s = &h->s; + const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; + const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; + int list; + + if(!USES_LIST(mb_type, 0)) + fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1); + + for(list=0; list<h->list_count; list++){ + int y; + if(!USES_LIST(mb_type, list)) + continue; + + for(y=0; y<4; y++){ + *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y]; + *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y]; + } + if( h->pps.cabac ) { + if(IS_SKIP(mb_type)) + fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4); + else + for(y=0; y<4; y++){ + *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y]; + *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y]; + } + } + + { + int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; + ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; + ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; + ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; + ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; + } + } + + if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){ + if(IS_8X8(mb_type)){ + uint8_t *direct_table = &h->direct_table[b8_xy]; + direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; + direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; + direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; + } + } +} + +static inline int get_dct8x8_allowed(H264Context *h){ + if(h->sps.direct_8x8_inference_flag) + return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); + else + return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); +} + +static void predict_field_decoding_flag(H264Context *h){ + MpegEncContext * const s = &h->s; + const int mb_xy= h->mb_xy; + int mb_type = (h->slice_table[mb_xy-1] == h->slice_num) + ? s->current_picture.mb_type[mb_xy-1] + : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num) + ? s->current_picture.mb_type[mb_xy-s->mb_stride] + : 0; + h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; +} + +/** + * decodes a P_SKIP or B_SKIP macroblock + */ +static void decode_mb_skip(H264Context *h){ + MpegEncContext * const s = &h->s; + const int mb_xy= h->mb_xy; + int mb_type=0; + + memset(h->non_zero_count[mb_xy], 0, 16); + memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui + + if(MB_FIELD) + mb_type|= MB_TYPE_INTERLACED; + + if( h->slice_type_nos == FF_B_TYPE ) + { + // just for fill_caches. pred_direct_motion will set the real mb_type + mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; + + fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... + ff_h264_pred_direct_motion(h, &mb_type); + mb_type|= MB_TYPE_SKIP; + } + else + { + int mx, my; + mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; + + fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... + pred_pskip_motion(h, &mx, &my); + fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); + fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); + } + + write_back_motion(h, mb_type); + s->current_picture.mb_type[mb_xy]= mb_type; + s->current_picture.qscale_table[mb_xy]= s->qscale; + h->slice_table[ mb_xy ]= h->slice_num; + h->prev_mb_skipped= 1; +} + #endif /* AVCODEC_H264_H */ |