diff options
author | Oskar Arvidsson <oskar@irock.se> | 2011-03-29 17:48:57 +0200 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-05-10 07:24:33 -0400 |
commit | 6e3ef511d787ff632547059f8730396ff4498e70 (patch) | |
tree | bda396b13aaa01e8dccbd60d697a543eff7643db /libavcodec/h264.c | |
parent | 44ca80df3445a59bc065924d8c6110fa10367d01 (diff) | |
download | ffmpeg-6e3ef511d787ff632547059f8730396ff4498e70.tar.gz |
Add the notion of pixel size in h264 related functions.
In high bit depth the pixels will not be stored in uint8_t like in the
normal case, but in uint16_t. The pixel size is thus 1 in normal bit
depth and 2 in high bit depth.
Preparatory patch for high bit depth h264 decoding support.
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec/h264.c')
-rw-r--r-- | libavcodec/h264.c | 323 |
1 files changed, 220 insertions, 103 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 142a07807a..e02b277769 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -314,12 +314,13 @@ static void chroma_dc_dct_c(DCTELEM *block){ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int src_x_offset, int src_y_offset, - qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ + qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op, + int pixel_shift){ MpegEncContext * const s = &h->s; const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; const int luma_xy= (mx&3) + ((my&3)<<2); - uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize; + uint8_t * src_y = pic->data[0] + ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize; uint8_t * src_cb, * src_cr; int extra_width= h->emu_edge_width; int extra_height= h->emu_edge_height; @@ -336,8 +337,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, || full_my < 0-extra_height || full_mx + 16/*FIXME*/ > pic_width + extra_width || full_my + 16/*FIXME*/ > pic_height + extra_height){ - s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); - src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize; + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); + src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; emu=1; } @@ -353,8 +354,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); } - src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize; - src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize; + src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize; + src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize; if(emu){ s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); @@ -374,14 +375,14 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei int x_offset, int y_offset, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, - int list0, int list1){ + int list0, int list1, int pixel_shift){ MpegEncContext * const s = &h->s; qpel_mc_func *qpix_op= qpix_put; h264_chroma_mc_func chroma_op= chroma_put; - dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; - dest_cb += x_offset + y_offset*h->mb_uvlinesize; - dest_cr += x_offset + y_offset*h->mb_uvlinesize; + dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h-> mb_linesize; + dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; + dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; x_offset += 8*s->mb_x; y_offset += 8*(s->mb_y >> MB_FIELD); @@ -389,7 +390,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; mc_dir_part(h, ref, n, square, chroma_height, delta, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, - qpix_op, chroma_op); + qpix_op, chroma_op, pixel_shift); qpix_op= qpix_avg; chroma_op= chroma_avg; @@ -399,7 +400,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; mc_dir_part(h, ref, n, square, chroma_height, delta, 1, dest_y, dest_cb, dest_cr, x_offset, y_offset, - qpix_op, chroma_op); + qpix_op, chroma_op, pixel_shift); } } @@ -409,12 +410,12 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, - int list0, int list1){ + int list0, int list1, int pixel_shift){ MpegEncContext * const s = &h->s; - dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; - dest_cb += x_offset + y_offset*h->mb_uvlinesize; - dest_cr += x_offset + y_offset*h->mb_uvlinesize; + dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h-> mb_linesize; + dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; + dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; x_offset += 8*s->mb_x; y_offset += 8*(s->mb_y >> MB_FIELD); @@ -422,17 +423,17 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom /* don't optimize for luma-only case, since B-frames usually * use implicit weights => chroma too. */ uint8_t *tmp_cb = s->obmc_scratchpad; - uint8_t *tmp_cr = s->obmc_scratchpad + 8; + uint8_t *tmp_cr = s->obmc_scratchpad + (8 << pixel_shift); uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize; int refn0 = h->ref_cache[0][ scan8[n] ]; int refn1 = h->ref_cache[1][ scan8[n] ]; mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, dest_y, dest_cb, dest_cr, - x_offset, y_offset, qpix_put, chroma_put); + x_offset, y_offset, qpix_put, chroma_put, pixel_shift); mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, tmp_y, tmp_cb, tmp_cr, - x_offset, y_offset, qpix_put, chroma_put); + x_offset, y_offset, qpix_put, chroma_put, pixel_shift); if(h->use_weight == 2){ int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; @@ -457,7 +458,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom Picture *ref= &h->ref_list[list][refn]; mc_dir_part(h, ref, n, square, chroma_height, delta, list, dest_y, dest_cb, dest_cr, x_offset, y_offset, - qpix_put, chroma_put); + qpix_put, chroma_put, pixel_shift); luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); @@ -476,19 +477,21 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, h264_weight_func *weight_op, h264_biweight_func *weight_avg, - int list0, int list1){ + int list0, int list1, int pixel_shift){ if((h->use_weight==2 && list0 && list1 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) || h->use_weight==1) mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put, - weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); + weight_op[0], weight_op[3], weight_avg[0], + weight_avg[3], list0, list1, pixel_shift); else mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, - x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); + x_offset, y_offset, qpix_put, chroma_put, qpix_avg, + chroma_avg, list0, list1, pixel_shift); } -static inline void prefetch_motion(H264Context *h, int list){ +static inline void prefetch_motion(H264Context *h, int list, int pixel_shift){ /* fetch pixels for estimated mv 4 macroblocks ahead * optimized for 64byte cache lines */ MpegEncContext * const s = &h->s; @@ -497,48 +500,54 @@ static inline void prefetch_motion(H264Context *h, int list){ const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; uint8_t **src= h->ref_list[list][refn].data; - int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64; + int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift); s->dsp.prefetch(src[0]+off, s->linesize, 4); - off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; + off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift); s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); } } -static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, +static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), - h264_weight_func *weight_op, h264_biweight_func *weight_avg){ + h264_weight_func *weight_op, h264_biweight_func *weight_avg, + int pixel_shift){ MpegEncContext * const s = &h->s; const int mb_xy= h->mb_xy; const int mb_type= s->current_picture.mb_type[mb_xy]; assert(IS_INTER(mb_type)); - prefetch_motion(h, 0); + prefetch_motion(h, 0, pixel_shift); if(IS_16X16(mb_type)){ mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], weight_op, weight_avg, - IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), + pixel_shift); }else if(IS_16X8(mb_type)){ - mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, + mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], &weight_op[1], &weight_avg[1], - IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); - mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), + pixel_shift); + mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], &weight_op[1], &weight_avg[1], - IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); + IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), + pixel_shift); }else if(IS_8X16(mb_type)){ mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[2], &weight_avg[2], - IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), + pixel_shift); mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[2], &weight_avg[2], - IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); + IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), + pixel_shift); }else{ int i; @@ -554,25 +563,30 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[3], &weight_avg[3], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift); }else if(IS_SUB_8X4(sub_mb_type)){ - mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, + mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[4], &weight_avg[4], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift); + mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[4], &weight_avg[4], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift); }else if(IS_SUB_4X8(sub_mb_type)){ mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[5], &weight_avg[5], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift); mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[5], &weight_avg[5], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift); }else{ int j; assert(IS_SUB_4X4(sub_mb_type)); @@ -582,15 +596,32 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[6], &weight_avg[6], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift); } } } } - prefetch_motion(h, 1); + prefetch_motion(h, 1, pixel_shift); } +#define hl_motion_fn(sh, bits) \ +static av_always_inline void hl_motion_ ## bits(H264Context *h, \ + uint8_t *dest_y, \ + uint8_t *dest_cb, uint8_t *dest_cr, \ + qpel_mc_func (*qpix_put)[16], \ + h264_chroma_mc_func (*chroma_put), \ + qpel_mc_func (*qpix_avg)[16], \ + h264_chroma_mc_func (*chroma_avg), \ + h264_weight_func *weight_op, \ + h264_biweight_func *weight_avg) \ +{ \ + hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \ + qpix_avg, chroma_avg, weight_op, weight_avg, sh); \ +} +hl_motion_fn(0, 8); +hl_motion_fn(1, 16); static void free_tables(H264Context *h, int free_rbsp){ int i; @@ -758,8 +789,8 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){ * Allocate buffers which are not shared amongst multiple threads. */ static int context_init(H264Context *h){ - FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) - FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) + FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail) + FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail) h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] = h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE; @@ -861,6 +892,8 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){ ff_h264_decode_init_vlc(); + h->pixel_shift = 0; + h->thread_context[0] = h; h->outputed_poc = INT_MIN; h->prev_poc_msb= 1<<16; @@ -888,6 +921,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){ int ff_h264_frame_start(H264Context *h){ MpegEncContext * const s = &h->s; int i; + const int pixel_shift = h->pixel_shift; if(MPV_frame_start(s, s->avctx) < 0) return -1; @@ -904,14 +938,14 @@ int ff_h264_frame_start(H264Context *h){ assert(s->linesize && s->uvlinesize); for(i=0; i<16; i++){ - h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3); - h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3); + h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3); + h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3); } for(i=0; i<4; i++){ h->block_offset[16+i]= - h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); + h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); h->block_offset[24+16+i]= - h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); + h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); } /* can't be in alloc_tables because linesize isn't known there. @@ -945,6 +979,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src MpegEncContext * const s = &h->s; uint8_t *top_border; int top_idx = 1; + const int pixel_shift = h->pixel_shift; src_y -= linesize; src_cb -= uvlinesize; @@ -955,9 +990,16 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src if(!MB_MBAFF){ top_border = h->top_borders[0][s->mb_x]; AV_COPY128(top_border, src_y + 15*linesize); + if (pixel_shift) + AV_COPY128(top_border+16, src_y+15*linesize+16); if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ + if (pixel_shift) { + AV_COPY128(top_border+32, src_cb+7*uvlinesize); + AV_COPY128(top_border+48, src_cr+7*uvlinesize); + } else { AV_COPY64(top_border+16, src_cb+7*uvlinesize); AV_COPY64(top_border+24, src_cr+7*uvlinesize); + } } } }else if(MB_MBAFF){ @@ -970,14 +1012,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src // There are two lines saved, the line above the the top macroblock of a pair, // and the line above the bottom macroblock AV_COPY128(top_border, src_y + 16*linesize); + if (pixel_shift) + AV_COPY128(top_border+16, src_y+16*linesize+16); if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ + if (pixel_shift) { + AV_COPY128(top_border+32, src_cb+8*uvlinesize); + AV_COPY128(top_border+48, src_cr+8*uvlinesize); + } else { AV_COPY64(top_border+16, src_cb+8*uvlinesize); AV_COPY64(top_border+24, src_cr+8*uvlinesize); + } } } -static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){ +static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, + uint8_t *src_cb, uint8_t *src_cr, + int linesize, int uvlinesize, + int xchg, int simple, int pixel_shift){ MpegEncContext * const s = &h->s; int deblock_left; int deblock_top; @@ -1002,41 +1054,62 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c deblock_top = (s->mb_y > !!MB_FIELD); } - src_y -= linesize + 1; - src_cb -= uvlinesize + 1; - src_cr -= uvlinesize + 1; + src_y -= linesize + 1 + pixel_shift; + src_cb -= uvlinesize + 1 + pixel_shift; + src_cr -= uvlinesize + 1 + pixel_shift; top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; top_border = h->top_borders[top_idx][s->mb_x]; #define XCHG(a,b,xchg)\ + if (pixel_shift) {\ + if (xchg) {\ + AV_SWAP64(b+0,a+0);\ + AV_SWAP64(b+8,a+8);\ + } else {\ + AV_COPY128(b,a); \ + }\ + } else \ if (xchg) AV_SWAP64(b,a);\ else AV_COPY64(b,a); if(deblock_top){ if(deblock_left){ - XCHG(top_border_m1+8, src_y -7, 1); + XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1); } - XCHG(top_border+0, src_y +1, xchg); - XCHG(top_border+8, src_y +9, 1); + XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg); + XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1); if(s->mb_x+1 < s->mb_width){ - XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1); + XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1); } } - if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(deblock_top){ if(deblock_left){ - XCHG(top_border_m1+16, src_cb -7, 1); - XCHG(top_border_m1+24, src_cr -7, 1); + XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); + XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); } - XCHG(top_border+16, src_cb+1, 1); - XCHG(top_border+24, src_cr+1, 1); + XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1); + XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1); } } } -static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ +static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) { + if (high_bit_depth) { + return AV_RN32A(((int32_t*)mb) + index); + } else + return AV_RN16A(mb + index); +} + +static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) { + if (high_bit_depth) { + AV_WN32A(((int32_t*)mb) + index, value); + } else + AV_WN16A(mb + index, value); +} + +static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){ MpegEncContext * const s = &h->s; const int mb_x= s->mb_x; const int mb_y= s->mb_y; @@ -1052,12 +1125,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); - dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; - dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; - dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; + dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; + dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; + dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; - s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4); - s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2); + s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4); + s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2); h->list_counts[mb_xy]= h->list_count; @@ -1094,6 +1167,28 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ } if (!simple && IS_INTRA_PCM(mb_type)) { + if (pixel_shift) { + const int bit_depth = h->sps.bit_depth_luma; + int j; + GetBitContext gb; + init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth); + + for (i = 0; i < 16; i++) { + uint16_t *tmp_y = (uint16_t*)(dest_y + i*linesize); + for (j = 0; j < 16; j++) + tmp_y[j] = get_bits(&gb, bit_depth); + } + for (i = 0; i < 8; i++) { + uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); + for (j = 0; j < 8; j++) + tmp_cb[j] = get_bits(&gb, bit_depth); + } + for (i = 0; i < 8; i++) { + uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); + for (j = 0; j < 8; j++) + tmp_cr[j] = get_bits(&gb, bit_depth); + } + } else { for (i=0; i<16; i++) { memcpy(dest_y + i* linesize, h->mb + i*8, 16); } @@ -1101,10 +1196,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); } + } } else { if(IS_INTRA(mb_type)){ if(h->deblocking_filter) - xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple); + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift); if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); @@ -1125,16 +1221,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ uint8_t * const ptr= dest_y + block_offset[i]; const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ - h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize); + h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize); }else{ const int nnz = h->non_zero_count_cache[ scan8[i] ]; h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, (h->topright_samples_available<<i)&0x4000, linesize); if(nnz){ - if(nnz == 1 && h->mb[i*16]) - idct_dc_add(ptr, h->mb + i*16, linesize); + if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16)) + idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize); else - idct_add (ptr, h->mb + i*16, linesize); + idct_add (ptr, h->mb + (i*16 << pixel_shift), linesize); } } } @@ -1151,18 +1247,24 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ - h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize); + h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize); }else{ uint8_t *topright; int nnz, tr; + uint64_t tr_high; if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ const int topright_avail= (h->topright_samples_available<<i)&0x8000; assert(mb_y || linesize <= block_offset[i]); if(!topright_avail){ + if (pixel_shift) { + tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL; + topright= (uint8_t*) &tr_high; + } else { tr= ptr[3 - linesize]*0x01010101; topright= (uint8_t*) &tr; + } }else - topright= ptr + 4 - linesize; + topright= ptr + (4 << pixel_shift) - linesize; }else topright= NULL; @@ -1170,10 +1272,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ nnz = h->non_zero_count_cache[ scan8[i] ]; if(nnz){ if(is_h264){ - if(nnz == 1 && h->mb[i*16]) - idct_dc_add(ptr, h->mb + i*16, linesize); + if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16)) + idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize); else - idct_add (ptr, h->mb + i*16, linesize); + idct_add (ptr, h->mb + (i*16 << pixel_shift), linesize); }else ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); } @@ -1191,19 +1293,27 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16, 8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16}; for(i = 0; i < 16; i++) - h->mb[dc_mapping[i]] = h->mb_luma_dc[i]; + dctcoef_set(h->mb, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc, pixel_shift, i)); } } }else ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale); } if(h->deblocking_filter) - xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift); }else if(is_h264){ - hl_motion(h, dest_y, dest_cb, dest_cr, - s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, - s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, - h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab); + if (pixel_shift) { + hl_motion_16(h, dest_y, dest_cb, dest_cr, + s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, + s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, + h->h264dsp.weight_h264_pixels_tab, + h->h264dsp.biweight_h264_pixels_tab); + } else + hl_motion_8(h, dest_y, dest_cb, dest_cr, + s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, + s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, + h->h264dsp.weight_h264_pixels_tab, + h->h264dsp.biweight_h264_pixels_tab); } @@ -1215,8 +1325,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); }else{ for(i=0; i<16; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) - s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize); + if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) + s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize); } } }else{ @@ -1228,7 +1338,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; for(i=0; i<16; i+=di){ if(h->non_zero_count_cache[ scan8[i] ]){ - idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); + idct_add(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize); } } }else{ @@ -1253,21 +1363,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ uint8_t *dest[2] = {dest_cb, dest_cr}; if(transform_bypass){ if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ - h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize); - h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize); + h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16 << pixel_shift), uvlinesize); + h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16 << pixel_shift), uvlinesize); }else{ idct_add = s->dsp.add_pixels4; for(i=16; i<16+8; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) - idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); + if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) + idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize); } } }else{ if(is_h264){ if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16 << pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16) << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); h->h264dsp.h264_idct_add8(dest, block_offset, h->mb, uvlinesize, h->non_zero_count_cache); @@ -1291,15 +1401,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ /** * Process a macroblock; this case avoids checks for expensive uncommon cases. */ -static void hl_decode_mb_simple(H264Context *h){ - hl_decode_mb_internal(h, 1); +#define hl_decode_mb_simple(sh, bits) \ +static void hl_decode_mb_simple_ ## bits(H264Context *h){ \ + hl_decode_mb_internal(h, 1, sh); \ } +hl_decode_mb_simple(0, 8); +hl_decode_mb_simple(1, 16); /** * Process a macroblock; this handles edge cases, such as interlacing. */ static void av_noinline hl_decode_mb_complex(H264Context *h){ - hl_decode_mb_internal(h, 0); + hl_decode_mb_internal(h, 0, h->pixel_shift); } void ff_h264_hl_decode_mb(H264Context *h){ @@ -1308,9 +1421,12 @@ void ff_h264_hl_decode_mb(H264Context *h){ const int mb_type= s->current_picture.mb_type[mb_xy]; int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; - if (is_complex) + if (is_complex) { hl_decode_mb_complex(h); - else hl_decode_mb_simple(h); + } else if (h->pixel_shift) { + hl_decode_mb_simple_16(h); + } else + hl_decode_mb_simple_8(h); } static int pred_weight_table(H264Context *h){ @@ -2432,6 +2548,7 @@ static void loop_filter(H264Context *h){ int linesize, uvlinesize, mb_x, mb_y; const int end_mb_y= s->mb_y + FRAME_MBAFF; const int old_slice_type= h->slice_type; + const int pixel_shift = h->pixel_shift; if(h->deblocking_filter) { for(mb_x= 0; mb_x<s->mb_width; mb_x++){ @@ -2447,9 +2564,9 @@ static void loop_filter(H264Context *h){ s->mb_x= mb_x; s->mb_y= mb_y; - dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; - dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; - dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; + dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; + dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; + dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; //FIXME simplify above if (MB_FIELD) { |