diff options
author | Baptiste Coudurier <baptiste.coudurier@gmail.com> | 2011-08-15 00:39:55 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-08-15 00:39:55 +0200 |
commit | 231a6df9eaf438a7d3dc802fce8c094d979796e8 (patch) | |
tree | 7a10055446b8ef9b71a8d62836ca8f307dc1ba03 /libavcodec | |
parent | 9a33078b64d5b854a3a0b631e747205851d98476 (diff) | |
download | ffmpeg-231a6df9eaf438a7d3dc802fce8c094d979796e8.tar.gz |
h264dec: h264: 4:2:2 intra decoding
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/arm/h264dsp_init_arm.c | 9 | ||||
-rw-r--r-- | libavcodec/arm/h264pred_init_arm.c | 2 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 2 | ||||
-rw-r--r-- | libavcodec/h264.c | 92 | ||||
-rw-r--r-- | libavcodec/h264.h | 12 | ||||
-rw-r--r-- | libavcodec/h264_cabac.c | 38 | ||||
-rw-r--r-- | libavcodec/h264_cavlc.c | 116 | ||||
-rw-r--r-- | libavcodec/h264_loopfilter.c | 61 | ||||
-rw-r--r-- | libavcodec/h264_mvpred.h | 7 | ||||
-rw-r--r-- | libavcodec/h264_ps.c | 5 | ||||
-rw-r--r-- | libavcodec/h264data.h | 9 | ||||
-rw-r--r-- | libavcodec/h264dsp.c | 38 | ||||
-rw-r--r-- | libavcodec/h264dsp.h | 8 | ||||
-rw-r--r-- | libavcodec/h264dsp_template.c | 16 | ||||
-rw-r--r-- | libavcodec/h264idct_template.c | 69 | ||||
-rw-r--r-- | libavcodec/h264pred.c | 51 | ||||
-rw-r--r-- | libavcodec/h264pred.h | 6 | ||||
-rw-r--r-- | libavcodec/h264pred_template.c | 138 | ||||
-rw-r--r-- | libavcodec/ppc/h264_altivec.c | 5 | ||||
-rw-r--r-- | libavcodec/rv34.c | 2 | ||||
-rw-r--r-- | libavcodec/vp8.c | 2 | ||||
-rw-r--r-- | libavcodec/x86/h264_intrapred_init.c | 47 | ||||
-rw-r--r-- | libavcodec/x86/h264dsp_mmx.c | 23 |
23 files changed, 623 insertions, 135 deletions
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c index e9146405c2..e2c226557e 100644 --- a/libavcodec/arm/h264dsp_init_arm.c +++ b/libavcodec/arm/h264dsp_init_arm.c @@ -92,7 +92,7 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) +static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) { if (bit_depth == 8) { c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; @@ -122,14 +122,15 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; c->h264_idct_add16 = ff_h264_idct_add16_neon; c->h264_idct_add16intra = ff_h264_idct_add16intra_neon; - c->h264_idct_add8 = ff_h264_idct_add8_neon; + if (chroma_format_idc == 1) + c->h264_idct_add8 = ff_h264_idct_add8_neon; c->h264_idct8_add = ff_h264_idct8_add_neon; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon; c->h264_idct8_add4 = ff_h264_idct8_add4_neon; } } -void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth) +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) { - if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth); + if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc); } diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c index b1d4f005e8..62f891e0fb 100644 --- a/libavcodec/arm/h264pred_init_arm.c +++ b/libavcodec/arm/h264pred_init_arm.c @@ -42,7 +42,7 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride); void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride); void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride); -static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth) +static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) { const int high_depth = bit_depth > 8; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index e8b36aef90..07ef196185 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -63,8 +63,10 @@ void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int strid void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ +void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\ +void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);\ void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul); H264_IDCT( 8) diff --git a/libavcodec/h264.c b/libavcodec/h264.c index bfc73cb86d..29426f74b6 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -942,7 +942,7 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){ dst->list_counts = src->list_counts; dst->s.obmc_scratchpad = NULL; - ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma); + ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, src->sps.chroma_format_idc); } /** @@ -970,8 +970,8 @@ static av_cold void common_init(H264Context *h){ s->height = s->avctx->height; s->codec_id= s->avctx->codec->id; - ff_h264dsp_init(&h->h264dsp, 8); - ff_h264_pred_init(&h->hpc, s->codec_id, 8); + ff_h264dsp_init(&h->h264dsp, 8, 1); + ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1); h->dequant_coeff_pps= -1; s->unrestricted_mv=1; @@ -1428,11 +1428,13 @@ static void decode_postinit(H264Context *h, int setup_finished){ ff_thread_finish_setup(s->avctx); } -static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){ +static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ MpegEncContext * const s = &h->s; uint8_t *top_border; int top_idx = 1; const int pixel_shift = h->pixel_shift; + int chroma444 = CHROMA444; + int chroma422 = CHROMA422; src_y -= linesize; src_cb -= uvlinesize; @@ -1456,6 +1458,14 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, ui AV_COPY128(top_border+16, src_cb + 15*uvlinesize); AV_COPY128(top_border+32, src_cr + 15*uvlinesize); } + } else if(chroma422){ + if (pixel_shift) { + AV_COPY128(top_border+32, src_cb + 15*uvlinesize); + AV_COPY128(top_border+48, src_cr + 15*uvlinesize); + } else { + AV_COPY64(top_border+16, src_cb + 15*uvlinesize); + AV_COPY64(top_border+24, src_cr + 15*uvlinesize); + } } else { if (pixel_shift) { AV_COPY128(top_border+32, src_cb+7*uvlinesize); @@ -1491,6 +1501,14 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, ui AV_COPY128(top_border+16, src_cb + 16*linesize); AV_COPY128(top_border+32, src_cr + 16*linesize); } + } else if(chroma422) { + if (pixel_shift) { + AV_COPY128(top_border+32, src_cb+16*uvlinesize); + AV_COPY128(top_border+48, src_cr+16*uvlinesize); + } else { + AV_COPY64(top_border+16, src_cb+16*uvlinesize); + AV_COPY64(top_border+24, src_cr+16*uvlinesize); + } } else { if (pixel_shift) { AV_COPY128(top_border+32, src_cb+8*uvlinesize); @@ -1769,10 +1787,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i /* is_h264 should always be true if SVQ3 is disabled. */ const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); + const int block_h = 16>>s->chroma_y_shift; dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; - dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; - dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; + dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h; + dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h; s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4); s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2); @@ -1785,8 +1804,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i block_offset = &h->block_offset[48]; if(mb_y&1){ //FIXME move out of this function? dest_y -= s->linesize*15; - dest_cb-= s->uvlinesize*7; - dest_cr-= s->uvlinesize*7; + dest_cb-= s->uvlinesize*(block_h-1); + dest_cr-= s->uvlinesize*(block_h-1); } if(FRAME_MBAFF) { int list; @@ -1833,12 +1852,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i } } } else { - for (i = 0; i < 8; i++) { + for (i = 0; i < block_h; i++) { uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); for (j = 0; j < 8; j++) tmp_cb[j] = get_bits(&gb, bit_depth); } - for (i = 0; i < 8; i++) { + for (i = 0; i < block_h; i++) { uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); for (j = 0; j < 8; j++) tmp_cr[j] = get_bits(&gb, bit_depth); @@ -1856,7 +1875,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i memset(dest_cr+ i*uvlinesize, 1 << (bit_depth - 1), 8); } } else { - for (i=0; i<8; i++) { + for (i=0; i<block_h; i++) { memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); } @@ -1904,10 +1923,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i } }else{ if(is_h264){ + int qp[2]; + if (CHROMA422) { + qp[0] = h->chroma_qp[0]+3; + qp[1] = h->chroma_qp[1]+3; + } else { + qp[0] = h->chroma_qp[0]; + qp[1] = h->chroma_qp[1]; + } if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][qp[0]][0]); if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][qp[1]][0]); h->h264dsp.h264_idct_add8(dest, block_offset, h->mb, uvlinesize, h->non_zero_count_cache); @@ -2545,11 +2572,13 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ h->b_stride= s->mb_width*4; + s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p + s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1); if(h->sps.frame_mbs_only_flag) - s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); + s->height= 16*s->mb_height - (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1); else - s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); + s->height= 16*s->mb_height - (2<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1); if (s->context_initialized && ( s->width != s->avctx->width || s->height != s->avctx->height @@ -2594,14 +2623,24 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ switch (h->sps.bit_depth_luma) { case 9 : - s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9; + if (CHROMA444) + s->avctx->pix_fmt = PIX_FMT_YUV444P9; + else + s->avctx->pix_fmt = PIX_FMT_YUV420P9; break; case 10 : - s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10; + if (CHROMA444) + s->avctx->pix_fmt = PIX_FMT_YUV444P10; + else if (CHROMA422) + s->avctx->pix_fmt = PIX_FMT_YUV422P10; + else + s->avctx->pix_fmt = PIX_FMT_YUV420P10; break; default: if (CHROMA444){ s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P; + }else if (CHROMA422) { + s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P; }else{ s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts ? @@ -3263,6 +3302,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){ const int end_mb_y= s->mb_y + FRAME_MBAFF; const int old_slice_type= h->slice_type; const int pixel_shift = h->pixel_shift; + const int block_h = 16>>s->chroma_y_shift; if(h->deblocking_filter) { for(mb_x= start_x; mb_x<end_x; mb_x++){ @@ -3279,8 +3319,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){ s->mb_x= mb_x; s->mb_y= mb_y; dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; - dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); - dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); + dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*(8<<CHROMA444) + mb_y * s->uvlinesize * block_h; + dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*(8<<CHROMA444) + mb_y * s->uvlinesize * block_h; //FIXME simplify above if (MB_FIELD) { @@ -3288,14 +3328,14 @@ static void loop_filter(H264Context *h, int start_x, int end_x){ uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; if(mb_y&1){ //FIXME move out of this function? dest_y -= s->linesize*15; - dest_cb-= s->uvlinesize*((8 << CHROMA444)-1); - dest_cr-= s->uvlinesize*((8 << CHROMA444)-1); + dest_cb-= s->uvlinesize*(block_h-1); + dest_cr-= s->uvlinesize*(block_h-1); } } else { linesize = h->mb_linesize = s->linesize; uvlinesize = h->mb_uvlinesize = s->uvlinesize; } - backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0); + backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); if(fill_filter_caches(h, mb_type)) continue; h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]); @@ -3731,13 +3771,15 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ if(avctx->has_b_frames < 2) avctx->has_b_frames= !s->low_delay; - if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) { + if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma || + h->cur_chroma_format_idc != h->sps.chroma_format_idc) { if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) { avctx->bits_per_raw_sample = h->sps.bit_depth_luma; + h->cur_chroma_format_idc = h->sps.chroma_format_idc; h->pixel_shift = h->sps.bit_depth_luma > 8; - ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); - ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); + ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, h->sps.chroma_format_idc); + ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, h->sps.chroma_format_idc); s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16; dsputil_init(&s->dsp, s->avctx); } else { diff --git a/libavcodec/h264.h b/libavcodec/h264.h index d448fc3330..2809e3253e 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -39,13 +39,6 @@ #define interlaced_dct interlaced_dct_is_a_bad_name #define mb_intra mb_intra_is_not_initialized_see_mb_type -#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 -#define COEFF_TOKEN_VLC_BITS 8 -#define TOTAL_ZEROS_VLC_BITS 9 -#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 -#define RUN_VLC_BITS 3 -#define RUN7_VLC_BITS 6 - #define MAX_SPS_COUNT 32 #define MAX_PPS_COUNT 256 @@ -92,6 +85,7 @@ #define CABAC h->pps.cabac #endif +#define CHROMA422 (h->sps.chroma_format_idc == 2) #define CHROMA444 (h->sps.chroma_format_idc == 3) #define EXTENDED_SAR 255 @@ -582,6 +576,8 @@ typedef struct H264Context{ // Timestamp stuff int sei_buffering_period_present; ///< Buffering period SEI flag int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs + + int cur_chroma_format_idc; }H264Context; @@ -809,7 +805,7 @@ static av_always_inline void write_back_non_zero_count(H264Context *h){ AV_COPY32(&nnz[32], &nnz_cache[4+8*11]); AV_COPY32(&nnz[36], &nnz_cache[4+8*12]); - if(CHROMA444){ + if(!h->s.chroma_y_shift){ AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]); AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]); AV_COPY32(&nnz[40], &nnz_cache[4+8*13]); diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index 6bbf87f0ba..55cafc7bbc 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -1587,6 +1587,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 } }; + static const uint8_t sig_coeff_offset_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0). * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter). * map node ctx => cabac ctx for level=1 */ @@ -1651,12 +1652,20 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, last_coeff_ctx_base, sig_off); } else { - coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, - last_coeff_ctx_base-significant_coeff_ctx_base); + if (is_dc && max_coeff == 8) { // dc 422 + DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); + } else { + coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, + last_coeff_ctx_base-significant_coeff_ctx_base); + } #else DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); } else { - DECODE_SIGNIFICANCE( max_coeff - 1, last, last ); + if (is_dc && max_coeff == 8) { // dc 422 + DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); + } else { + DECODE_SIGNIFICANCE(max_coeff - 1, last, last); + } #endif } assert(coeff_count > 0); @@ -1692,6 +1701,8 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT } \ } else { \ int coeff_abs = 2; \ + if (is_dc && max_coeff == 8) \ + node_ctx = FFMIN(node_ctx, 6); \ ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \ node_ctx = coeff_abs_level_transition[1][node_ctx]; \ \ @@ -2315,22 +2326,31 @@ decode_intra_mb: decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1); decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2); } else { + const int num_c8x8 = h->sps.chroma_format_idc; + if( cbp&0x30 ){ int c; for( c = 0; c < 2; c++ ) { //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); - decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4); + decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, + CHROMA_DC_BLOCK_INDEX+c, + CHROMA422 ? chroma422_dc_scan : chroma_dc_scan, + 4*num_c8x8); } } if( cbp&0x20 ) { - int c, i; + int c, i, i8x8; for( c = 0; c < 2; c++ ) { + DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift); qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; - for( i = 0; i < 4; i++ ) { - const int index = 16 + 16 * c + i; - //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); - decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15); + for (i8x8 = 0; i8x8 < num_c8x8; i8x8++) { + for (i = 0; i < 4; i++) { + const int index = 16 + 16 * c + 8*i8x8 + i; + //av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16); + decode_cabac_residual_nondc(h, mb, 4, index, scan + 1, qmul, 15); + mb += 16<<pixel_shift; + } } } } else { diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c index 0cd147fd7a..cc38370047 100644 --- a/libavcodec/h264_cavlc.c +++ b/libavcodec/h264_cavlc.c @@ -62,6 +62,30 @@ static const uint8_t chroma_dc_coeff_token_bits[4*5]={ 2, 3, 2, 0, }; +static const uint8_t chroma422_dc_coeff_token_len[4*9]={ + 1, 0, 0, 0, + 7, 2, 0, 0, + 7, 7, 3, 0, + 9, 7, 7, 5, + 9, 9, 7, 6, + 10, 10, 9, 7, + 11, 11, 10, 7, + 12, 12, 11, 10, + 13, 12, 12, 11, +}; + +static const uint8_t chroma422_dc_coeff_token_bits[4*9]={ + 1, 0, 0, 0, + 15, 1, 0, 0, + 14, 13, 1, 0, + 7, 12, 11, 1, + 6, 5, 10, 1, + 7, 6, 4, 9, + 7, 6, 5, 8, + 7, 6, 5, 4, + 7, 5, 4, 4, +}; + static const uint8_t coeff_token_len[4][4*17]={ { 1, 0, 0, 0, @@ -172,6 +196,26 @@ static const uint8_t chroma_dc_total_zeros_bits[3][4]= { { 1, 0, 0, 0,}, }; +static const uint8_t chroma422_dc_total_zeros_len[7][8]= { + { 1, 3, 3, 4, 4, 4, 5, 5 }, + { 3, 2, 3, 3, 3, 3, 3 }, + { 3, 3, 2, 2, 3, 3 }, + { 3, 2, 2, 2, 3 }, + { 2, 2, 2, 2 }, + { 2, 2, 1 }, + { 1, 1 }, +}; + +static const uint8_t chroma422_dc_total_zeros_bits[7][8]= { + { 1, 2, 3, 2, 3, 1, 1, 0 }, + { 0, 1, 1, 4, 5, 6, 7 }, + { 0, 1, 1, 2, 6, 7 }, + { 6, 0, 1, 2, 7 }, + { 0, 1, 2, 3 }, + { 0, 1, 1 }, + { 0, 1 }, +}; + static const uint8_t run_len[7][16]={ {1,1}, {1,2,2}, @@ -200,6 +244,10 @@ static VLC chroma_dc_coeff_token_vlc; static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2]; static const int chroma_dc_coeff_token_vlc_table_size = 256; +static VLC chroma422_dc_coeff_token_vlc; +static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2]; +static const int chroma422_dc_coeff_token_vlc_table_size = 8192; + static VLC total_zeros_vlc[15]; static VLC_TYPE total_zeros_vlc_tables[15][512][2]; static const int total_zeros_vlc_tables_size = 512; @@ -208,6 +256,10 @@ static VLC chroma_dc_total_zeros_vlc[3]; static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2]; static const int chroma_dc_total_zeros_vlc_tables_size = 8; +static VLC chroma422_dc_total_zeros_vlc[7]; +static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2]; +static const int chroma422_dc_total_zeros_vlc_tables_size = 32; + static VLC run_vlc[6]; static VLC_TYPE run_vlc_tables[6][8][2]; static const int run_vlc_tables_size = 8; @@ -219,6 +271,14 @@ static const int run7_vlc_table_size = 96; #define LEVEL_TAB_BITS 8 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2]; +#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 +#define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13 +#define COEFF_TOKEN_VLC_BITS 8 +#define TOTAL_ZEROS_VLC_BITS 9 +#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 +#define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5 +#define RUN_VLC_BITS 3 +#define RUN7_VLC_BITS 6 /** * gets the predicted number of non-zero coefficients. @@ -277,6 +337,13 @@ av_cold void ff_h264_decode_init_vlc(void){ &chroma_dc_coeff_token_bits[0], 1, 1, INIT_VLC_USE_NEW_STATIC); + chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table; + chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size; + init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9, + &chroma422_dc_coeff_token_len [0], 1, 1, + &chroma422_dc_coeff_token_bits[0], 1, 1, + INIT_VLC_USE_NEW_STATIC); + offset = 0; for(i=0; i<4; i++){ coeff_token_vlc[i].table = coeff_token_vlc_tables+offset; @@ -303,6 +370,17 @@ av_cold void ff_h264_decode_init_vlc(void){ &chroma_dc_total_zeros_bits[i][0], 1, 1, INIT_VLC_USE_NEW_STATIC); } + + for(i=0; i<7; i++){ + chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i]; + chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size; + init_vlc(&chroma422_dc_total_zeros_vlc[i], + CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8, + &chroma422_dc_total_zeros_len [i][0], 1, 1, + &chroma422_dc_total_zeros_bits[i][0], 1, 1, + INIT_VLC_USE_NEW_STATIC); + } + for(i=0; i<15; i++){ total_zeros_vlc[i].table = total_zeros_vlc_tables[i]; total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size; @@ -372,7 +450,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in //FIXME put trailing_onex into the context if(max_coeff <= 8){ - coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); + if (max_coeff == 4) + coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); + else + coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1); total_coeff= coeff_token>>2; }else{ if(n >= LUMA_DC_BLOCK_INDEX){ @@ -482,11 +563,16 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in if(total_coeff == max_coeff) zeros_left=0; else{ - /* FIXME: we don't actually support 4:2:2 yet. */ - if(max_coeff <= 8) - zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); - else + if (max_coeff <= 8) { + if (max_coeff == 4) + zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table, + CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); + else + zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table, + CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1); + } else { zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1); + } } #define STORE_BLOCK(type) \ @@ -993,7 +1079,7 @@ decode_intra_mb: s->current_picture.f.mb_type[mb_xy] = mb_type; if(cbp || IS_INTRA16x16(mb_type)){ - int i4x4, chroma_idx; + int i4x4, i8x8, chroma_idx; int dquant; int ret; GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; @@ -1036,9 +1122,14 @@ decode_intra_mb: return -1; } } else { + const int num_c8x8 = h->sps.chroma_format_idc; + if(cbp&0x30){ for(chroma_idx=0; chroma_idx<2; chroma_idx++) - if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){ + if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), + CHROMA_DC_BLOCK_INDEX+chroma_idx, + CHROMA422 ? chroma422_dc_scan : chroma_dc_scan, + NULL, 4*num_c8x8) < 0) { return -1; } } @@ -1046,10 +1137,13 @@ decode_intra_mb: if(cbp&0x20){ for(chroma_idx=0; chroma_idx<2; chroma_idx++){ const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]]; - for(i4x4=0; i4x4<4; i4x4++){ - const int index= 16 + 16*chroma_idx + i4x4; - if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){ - return -1; + DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift); + for (i8x8=0; i8x8<num_c8x8; i8x8++) { + for (i4x4=0; i4x4<4; i4x4++) { + const int index= 16 + 16*chroma_idx + 8*i8x8 + i4x4; + if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0) + return -1; + mb += 16<<pixel_shift; } } } diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c index 0b7806994f..67399395a1 100644 --- a/libavcodec/h264_loopfilter.c +++ b/libavcodec/h264_loopfilter.c @@ -212,6 +212,7 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m MpegEncContext * const s = &h->s; int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); int chroma444 = CHROMA444; + int chroma422 = CHROMA422; int mb_xy = h->mb_xy; int left_type= h->left_type[LTOP]; @@ -289,6 +290,23 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, a, b, h, 0); filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, a, b, h, 0); } + }else if(chroma422){ + if(left_type){ + filter_mb_edgecv(&img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1); + filter_mb_edgecv(&img_cr[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1); + } + filter_mb_edgecv(&img_cb[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0); + filter_mb_edgecv(&img_cr[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0); + if(top_type){ + filter_mb_edgech(&img_cb[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1); + filter_mb_edgech(&img_cr[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1); + } + filter_mb_edgech(&img_cb[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); + filter_mb_edgech(&img_cr[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); + filter_mb_edgech(&img_cb[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); + filter_mb_edgech(&img_cr[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); + filter_mb_edgech(&img_cb[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); + filter_mb_edgech(&img_cr[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); }else{ if(left_type){ filter_mb_edgecv( &img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1); @@ -411,10 +429,12 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){ return v; } -static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int chroma444, int dir) { +static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int dir) { MpegEncContext * const s = &h->s; int edge; int chroma_qp_avg[2]; + int chroma444 = CHROMA444; + int chroma422 = CHROMA422; const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type; @@ -564,8 +584,9 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u for( edge = 1; edge < edges; edge++ ) { DECLARE_ALIGNED(8, int16_t, bS)[4]; int qp; + const int deblock_edge = !IS_8x8DCT(mb_type & (edge<<24)); // (edge&1) && IS_8x8DCT(mb_type) - if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type) + if (!deblock_edge && (!chroma422 || dir == 0)) continue; if( IS_INTRA(mb_type)) { @@ -627,14 +648,23 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u } } } else { - filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0 ); - if (chroma) { - if (chroma444) { - filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); - filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); - } else if( (edge&1) == 0 ) { - filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); - filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); + if (chroma422) { + if (deblock_edge) + filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0); + if (chroma) { + filter_mb_edgech(&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); + filter_mb_edgech(&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); + } + } else { + filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0); + if (chroma) { + if (chroma444) { + filter_mb_edgeh (&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); + filter_mb_edgeh (&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); + } else if ((edge&1) == 0) { + filter_mb_edgech(&img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); + filter_mb_edgech(&img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); + } } } } @@ -726,6 +756,11 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 ); filter_mb_mbaff_edgev ( h, img_cr, uvlinesize, bS , 1, rqp[0], a, b, 1 ); filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1 ); + } else if (CHROMA422) { + filter_mb_mbaff_edgecv(h, img_cb, uvlinesize, bS , 1, bqp[0], a, b, 1); + filter_mb_mbaff_edgecv(h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1); + filter_mb_mbaff_edgecv(h, img_cr, uvlinesize, bS , 1, rqp[0], a, b, 1); + filter_mb_mbaff_edgecv(h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1); }else{ filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0], a, b, 1 ); filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 ); @@ -754,9 +789,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint #if CONFIG_SMALL for( dir = 0; dir < 2; dir++ ) - filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, CHROMA444, dir); + filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, dir); #else - filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, CHROMA444, 0); - filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, a, b, chroma, CHROMA444, 1); + filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, 0); + filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, a, b, chroma, 1); #endif } diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h index d2677eaa6c..b46d0985a8 100644 --- a/libavcodec/h264_mvpred.h +++ b/libavcodec/h264_mvpred.h @@ -510,7 +510,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){ if(top_type){ nnz = h->non_zero_count[top_xy]; AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]); - if(CHROMA444){ + if(!s->chroma_y_shift){ AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]); AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]); }else{ @@ -534,6 +534,11 @@ static void fill_decode_caches(H264Context *h, int mb_type){ nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4]; nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4]; nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4]; + }else if(CHROMA422) { + nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]-2+4*4]; + nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]-2+4*4]; + nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]-2+8*4]; + nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]-2+8*4]; }else{ nnz_cache[3+8* 6 + 8*i]= nnz[left_block[8+4+2*i]]; nnz_cache[3+8*11 + 8*i]= nnz[left_block[8+5+2*i]]; diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index 61fb12ce0c..3c7b04abc6 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -396,7 +396,8 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){ #endif sps->crop= get_bits1(&s->gb); if(sps->crop){ - int crop_limit = sps->chroma_format_idc == 3 ? 16 : 8; + int crop_vertical_limit = sps->chroma_format_idc & 2 ? 16 : 8; + int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8; sps->crop_left = get_ue_golomb(&s->gb); sps->crop_right = get_ue_golomb(&s->gb); sps->crop_top = get_ue_golomb(&s->gb); @@ -404,7 +405,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){ if(sps->crop_left || sps->crop_top){ av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n"); } - if(sps->crop_right >= crop_limit || sps->crop_bottom >= crop_limit){ + if(sps->crop_right >= crop_horizontal_limit || sps->crop_bottom >= crop_vertical_limit){ av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n"); } }else{ diff --git a/libavcodec/h264data.h b/libavcodec/h264data.h index c459523f71..a5ed069e94 100644 --- a/libavcodec/h264data.h +++ b/libavcodec/h264data.h @@ -80,7 +80,14 @@ static const uint8_t luma_dc_field_scan[16]={ static const uint8_t chroma_dc_scan[4]={ (0+0*2)*16, (1+0*2)*16, - (0+1*2)*16, (1+1*2)*16, //FIXME + (0+1*2)*16, (1+1*2)*16, +}; + +static const uint8_t chroma422_dc_scan[8]={ + (0+0*2)*16, (0+1*2)*16, + (1+0*2)*16, (0+2*2)*16, + (0+3*2)*16, (1+1*2)*16, + (1+2*2)*16, (1+3*2)*16, }; // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c index 96a38ff77d..cf0067b8f0 100644 --- a/libavcodec/h264dsp.c +++ b/libavcodec/h264dsp.c @@ -41,7 +41,7 @@ #include "h264dsp_template.c" #undef BIT_DEPTH -void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) +void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) { #undef FUNC #define FUNC(a, depth) a ## _ ## depth ## _c @@ -53,10 +53,16 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\ c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\ c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\ - c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\ + if (chroma_format_idc == 1)\ + c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\ + else\ + c->h264_idct_add8 = FUNC(ff_h264_idct_add8_422, depth);\ c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\ c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\ - c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\ + if (chroma_format_idc == 1)\ + c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\ + else\ + c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\ \ c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\ c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\ @@ -86,11 +92,23 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\ c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\ c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\ - c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\ - c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\ + if (chroma_format_idc == 1)\ + c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\ + else\ + c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma422, depth);\ + if (chroma_format_idc == 1)\ + c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\ + else\ + c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma422_mbaff, depth);\ c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\ - c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\ - c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\ + if (chroma_format_idc == 1)\ + c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\ + else\ + c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma422_intra, depth);\ + if (chroma_format_idc == 1)\ + c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\ + else\ + c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma422_mbaff_intra, depth);\ c->h264_loop_filter_strength= NULL; switch (bit_depth) { @@ -105,7 +123,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) break; } - if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth); - if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth); - if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth); + if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc); + if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc); + if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth, chroma_format_idc); } diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h index b61967adfc..c79ab0a625 100644 --- a/libavcodec/h264dsp.h +++ b/libavcodec/h264dsp.h @@ -74,9 +74,9 @@ typedef struct H264DSPContext{ void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul); }H264DSPContext; -void ff_h264dsp_init(H264DSPContext *c, const int bit_depth); -void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth); -void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth); -void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth); +void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); +void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); #endif /* AVCODEC_H264DSP_H */ diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c index 906d99f739..3023541634 100644 --- a/libavcodec/h264dsp_template.c +++ b/libavcodec/h264dsp_template.c @@ -275,6 +275,14 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int { FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); } +static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); +} +static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); +} static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta) { @@ -312,3 +320,11 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int strid { FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); } +static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, int stride, int alpha, int beta) +{ + FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); +} +static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) +{ + FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); +} diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c index 3b386695a2..64bc70dd47 100644 --- a/libavcodec/h264idct_template.c +++ b/libavcodec/h264idct_template.c @@ -224,6 +224,39 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM * } } } + +void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ + int i, j; + +#if 0 + av_log(NULL, AV_LOG_INFO, "idct\n"); + int32_t *b = block; + for (int i = 0; i < 256; i++) { + av_log(NULL, AV_LOG_INFO, "%5d ", b[i+256]); + if (!((i+1) % 16)) + av_log(NULL, AV_LOG_INFO, "\n"); + } +#endif + + for(j=1; j<3; j++){ + for(i=j*16; i<j*16+4; i++){ + if(nnzc[ scan8[i] ]) + FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); + else if(((dctcoef*)block)[i*16]) + FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); + } + } + + for(j=1; j<3; j++){ + for(i=j*16+4; i<j*16+8; i++){ + if(nnzc[ scan8[i+4] ]) + FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); + else if(((dctcoef*)block)[i*16]) + FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); + } + } +} + /** * IDCT transforms the 16 dc values and dequantizes them. * @param qmul quantization parameter @@ -263,6 +296,42 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, in #undef stride } +void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){ + const int stride= 16*2; + const int xStride= 16; + int i; + int temp[8]; + static const uint8_t x_offset[2]={0, 16}; + dctcoef *block = (dctcoef*)p_block; + + for(i=0; i<4; i++){ + temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1]; + temp[2*i+1] = block[stride*i + xStride*0] - block[stride*i + xStride*1]; + } + + for(i=0; i<2; i++){ + const int offset= x_offset[i]; + const int z0= temp[2*0+i] + temp[2*2+i]; + const int z1= temp[2*0+i] - temp[2*2+i]; + const int z2= temp[2*1+i] - temp[2*3+i]; + const int z3= temp[2*1+i] + temp[2*3+i]; + + block[stride*0+offset]= ((z0 + z3)*qmul + 128) >> 8; + block[stride*1+offset]= ((z1 + z2)*qmul + 128) >> 8; + block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8; + block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8; + } + +#if 0 + av_log(NULL, AV_LOG_INFO, "after chroma dc\n"); + for (int i = 0; i < 256; i++) { + av_log(NULL, AV_LOG_INFO, "%5d ", block[i]); + if (!((i+1) % 16)) + av_log(NULL, AV_LOG_INFO, "\n"); + } +#endif +} + void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *p_block, int qmul){ const int stride= 16*2; const int xStride= 16; diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c index f6533cf9ba..eb4166f1e3 100644 --- a/libavcodec/h264pred.c +++ b/libavcodec/h264pred.c @@ -363,7 +363,7 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){ /** * Set the intra prediction function pointers. */ -void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ +void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc){ // MpegEncContext * const s = &h->s; #undef FUNC @@ -436,20 +436,39 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ h->pred8x8l[TOP_DC_PRED ]= FUNCC(pred8x8l_top_dc , depth);\ h->pred8x8l[DC_128_PRED ]= FUNCC(pred8x8l_128_dc , depth);\ \ - h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\ - h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\ + if (chroma_format_idc == 1) {\ + h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\ + h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\ + } else {\ + h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x16_vertical , depth);\ + h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x16_horizontal , depth);\ + }\ if (codec_id != CODEC_ID_VP8) {\ - h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\ + if (chroma_format_idc == 1) {\ + h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\ + } else {\ + h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x16_plane , depth);\ + }\ } else\ h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\ if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\ - h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\ - h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\ - h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\ - h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ - h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ - h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ - h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ + if (chroma_format_idc == 1) {\ + h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\ + h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\ + h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\ + h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ + h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ + h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ + h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ + } else {\ + h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\ + h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\ + h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\ + h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ + h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ + h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ + h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ + }\ }else{\ h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\ @@ -459,7 +478,11 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\ }\ }\ - h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\ + if (chroma_format_idc == 1) {\ + h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\ + } else {\ + h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x16_128_dc , depth);\ + }\ \ h->pred16x16[DC_PRED8x8 ]= FUNCC(pred16x16_dc , depth);\ h->pred16x16[VERT_PRED8x8 ]= FUNCC(pred16x16_vertical , depth);\ @@ -506,6 +529,6 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ break; } - if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth); - if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth); + if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc); + if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc); } diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h index daf123f968..599cdb228b 100644 --- a/libavcodec/h264pred.h +++ b/libavcodec/h264pred.h @@ -101,8 +101,8 @@ typedef struct H264PredContext{ void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); }H264PredContext; -void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth); -void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth); -void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth); +void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); #endif /* AVCODEC_H264PRED_H */ diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c index 36f6d4e12f..eb5250501a 100644 --- a/libavcodec/h264pred_template.c +++ b/libavcodec/h264pred_template.c @@ -454,6 +454,19 @@ static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){ } } +static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){ + int i; + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0); + const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1); + + for(i=0; i<16; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, a); + AV_WN4PA(((pixel4*)(src+i*stride))+1, b); + } +} + static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ int i; pixel *src = (pixel*)_src; @@ -466,6 +479,17 @@ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ } } +static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){ + int i; + pixel *src = (pixel*)_src; + stride >>= sizeof(pixel)-1; + for(i=0; i<16; i++){ + const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); + AV_WN4PA(((pixel4*)(src+i*stride))+0, a); + AV_WN4PA(((pixel4*)(src+i*stride))+1, a); + } +} + #define PRED8x8_X(n, v)\ static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\ int i;\ @@ -482,6 +506,11 @@ PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1); PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0); PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1); +static void FUNCC(pred8x16_128_dc)(uint8_t *_src, int stride){ + FUNCC(pred8x8_128_dc)(_src, stride); + FUNCC(pred8x8_128_dc)(_src+8*stride, stride); +} + static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ int i; int dc0, dc2; @@ -507,6 +536,11 @@ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ } } +static void FUNCC(pred8x16_left_dc)(uint8_t *_src, int stride){ + FUNCC(pred8x8_left_dc)(_src, stride); + FUNCC(pred8x8_left_dc)(_src+8*stride, stride); +} + static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ int i; int dc0, dc1; @@ -532,6 +566,27 @@ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ } } +static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){ + int i; + int dc0, dc1; + pixel4 dc0splat, dc1splat; + pixel *src = (pixel*)_src; + stride >>= sizeof(pixel)-1; + + dc0=dc1=0; + for(i=0;i<4; i++){ + dc0+= src[i-stride]; + dc1+= src[4+i-stride]; + } + dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); + dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); + + for(i=0; i<16; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); + } +} + static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ int i; int dc0, dc1, dc2; @@ -560,6 +615,48 @@ static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ } } +static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){ + int i; + int dc0, dc1, dc2, dc3, dc4; + pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat; + pixel *src = (pixel*)_src; + stride >>= sizeof(pixel)-1; + + dc0=dc1=dc2=dc3=dc4=0; + for(i=0;i<4; i++){ + dc0+= src[-1+i*stride] + src[i-stride]; + dc1+= src[4+i-stride]; + dc2+= src[-1+(i+4)*stride]; + dc3+= src[-1+(i+8)*stride]; + dc4+= src[-1+(i+12)*stride]; + } + dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); + dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); + dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); + dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); + dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2); + dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3); + dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2); + dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3); + + for(i=0; i<4; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); + } + for(i=4; i<8; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat); + } + for(i=8; i<12; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat); + } + for(i=12; i<16; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat); + } +} + //the following 4 function should not be optimized! static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ FUNCC(pred8x8_top_dc)(src, stride); @@ -618,6 +715,47 @@ static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ } } +static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){ + int j, k; + int a; + INIT_CLIP + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel * const src0 = src +3-stride; + const pixel * src1 = src +8*stride-1; + const pixel * src2 = src1-2*stride; // == src+6*stride-1; + int H = src0[1] - src0[-1]; + int V = src1[0] - src2[ 0]; + + for (k = 2; k <= 4; ++k) { + src1 += stride; src2 -= stride; + H += k*(src0[k] - src0[-k]); + V += k*(src1[0] - src2[ 0]); + } + for (; k <= 8; ++k) { + src1 += stride; src2 -= stride; + V += k*(src1[0] - src2[0]); + } + + H = (17*H+16) >> 5; + V = (5*V+32) >> 6; + + a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H; + for(j=16; j>0; --j) { + int b = a; + a += V; + src[0] = CLIP((b ) >> 5); + src[1] = CLIP((b+ H) >> 5); + src[2] = CLIP((b+2*H) >> 5); + src[3] = CLIP((b+3*H) >> 5); + src[4] = CLIP((b+4*H) >> 5); + src[5] = CLIP((b+5*H) >> 5); + src[6] = CLIP((b+6*H) >> 5); + src[7] = CLIP((b+7*H) >> 5); + src += stride; + } +} + #define SRC(x,y) src[(x)+(y)*stride] #define PL(y) \ const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c index 223971bd1a..00fb0a73d9 100644 --- a/libavcodec/ppc/h264_altivec.c +++ b/libavcodec/ppc/h264_altivec.c @@ -999,12 +999,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { } } -void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth) +void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) { if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { if (bit_depth == 8) { c->h264_idct_add = ff_h264_idct_add_altivec; - c->h264_idct_add8 = ff_h264_idct_add8_altivec; + if (chroma_format_idc == 1) + c->h264_idct_add8 = ff_h264_idct_add8_altivec; c->h264_idct_add16 = ff_h264_idct_add16_altivec; c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec; c->h264_idct_dc_add= h264_idct_dc_add_altivec; diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c index 324cf717c2..d1df3461cc 100644 --- a/libavcodec/rv34.c +++ b/libavcodec/rv34.c @@ -1412,7 +1412,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx) if (MPV_common_init(s) < 0) return -1; - ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8); + ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8, 1); #if CONFIG_RV30_DECODER if (avctx->codec_id == CODEC_ID_RV30) diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index fb3f7f733a..7cbf8b1434 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -1721,7 +1721,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx) avctx->pix_fmt = PIX_FMT_YUV420P; dsputil_init(&s->dsp, avctx); - ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8); + ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1); ff_vp8dsp_init(&s->vp8dsp); return 0; diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index 7220dd75c1..cdf7d55d96 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -167,7 +167,7 @@ void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int s void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride); -void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth) +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) { int mm_flags = av_get_cpu_flags(); @@ -176,14 +176,17 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth if (mm_flags & AV_CPU_FLAG_MMX) { h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; - h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_mmx; - h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; + if (chroma_format_idc == 1) { + h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_mmx; + h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; + } if (codec_id == CODEC_ID_VP8) { h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmx; h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_mmx; h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx; } else { - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx; + if (chroma_format_idc == 1) + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx; if (codec_id == CODEC_ID_SVQ3) { h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx; } else if (codec_id == CODEC_ID_RV40) { @@ -197,7 +200,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth if (mm_flags & AV_CPU_FLAG_MMX2) { h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext; - h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; + if (chroma_format_idc == 1) + h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext; h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_mmxext; h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_mmxext; @@ -221,8 +225,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_mmxext; } if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) { - h->pred8x8 [TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext; - h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_mmxext; + if (chroma_format_idc == 1) { + h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext; + h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_mmxext; + } } if (codec_id == CODEC_ID_VP8) { h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmxext; @@ -231,7 +237,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext; h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext; } else { - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2; + if (chroma_format_idc == 1) + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2; if (codec_id == CODEC_ID_SVQ3) { h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_svq3_mmx2; } else if (codec_id == CODEC_ID_RV40) { @@ -257,7 +264,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_sse2; h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_sse2; } else { - h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_plane_sse2; + if (chroma_format_idc == 1) + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_sse2; if (codec_id == CODEC_ID_SVQ3) { h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2; } else if (codec_id == CODEC_ID_RV40) { @@ -271,7 +279,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth if (mm_flags & AV_CPU_FLAG_SSSE3) { h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; - h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; + if (chroma_format_idc == 1) + h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3; h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_ssse3; h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_ssse3; @@ -286,7 +295,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_ssse3; h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3; } else { - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3; + if (chroma_format_idc == 1) + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3; if (codec_id == CODEC_ID_SVQ3) { h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3; } else if (codec_id == CODEC_ID_RV40) { @@ -301,7 +311,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; - h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext; + if (chroma_format_idc == 1) + h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext; h->pred8x8l[DC_128_PRED ] = ff_pred8x8l_128_dc_10_mmxext; @@ -319,11 +330,13 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2; h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2; - h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2; - h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2; - h->pred8x8[PLANE_PRED8x8 ] = ff_pred8x8_plane_10_sse2; - h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2; - h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2; + if (chroma_format_idc == 1) { + h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2; + h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2; + h->pred8x8[PLANE_PRED8x8 ] = ff_pred8x8_plane_10_sse2; + h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2; + h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2; + } h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_sse2; h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_sse2; diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index 68e543681f..71beb262c9 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -350,7 +350,7 @@ H264_BIWEIGHT_10_SSE( 4, 8, 10) H264_BIWEIGHT_10_SSE( 4, 4, 10) H264_BIWEIGHT_10_SSE( 4, 2, 10) -void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) { int mm_flags = av_get_cpu_flags(); @@ -368,7 +368,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_idct_add16 = ff_h264_idct_add16_8_mmx; c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx; - c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; + if (chroma_format_idc == 1) + c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx; c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx; @@ -377,13 +378,16 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx2; - c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2; + if (chroma_format_idc == 1) + c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2; c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2; c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext; - c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext; c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext; - c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext; + if (chroma_format_idc == 1) { + c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext; + c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext; + } #if ARCH_X86_32 c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext; c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext; @@ -413,7 +417,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; - c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; + if (chroma_format_idc == 1) + c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; @@ -472,7 +477,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; - c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; + if (chroma_format_idc == 1) + c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2; #if HAVE_ALIGNED_STACK c->h264_idct8_add = ff_h264_idct8_add_10_sse2; @@ -532,7 +538,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; c->h264_idct_add16 = ff_h264_idct_add16_10_avx; - c->h264_idct_add8 = ff_h264_idct_add8_10_avx; + if (chroma_format_idc == 1) + c->h264_idct_add8 = ff_h264_idct_add8_10_avx; c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx; #if HAVE_ALIGNED_STACK c->h264_idct8_add = ff_h264_idct8_add_10_avx; |