diff options
author | Dawid Kozinski <d.kozinski@samsung.com> | 2023-06-15 13:46:43 +0200 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2023-06-15 09:51:04 -0300 |
commit | 34e4f18360c4ecb8e5979cab8f389478d8cd7819 (patch) | |
tree | 80aa546b81f111ae4d3873de64b9880525a467c6 | |
parent | 41229ef7058702600cc6a6d09e933251d41e8f3b (diff) | |
download | ffmpeg-34e4f18360c4ecb8e5979cab8f389478d8cd7819.tar.gz |
avcodec/evc_parser: Added parser implementation for EVC format
- Added constants definitions for EVC parser
- Provided NAL units parsing following ISO_IEC_23094-1
- EVC parser registration
Signed-off-by: Dawid Kozinski <d.kozinski@samsung.com>
-rwxr-xr-x | configure | 2 | ||||
-rw-r--r-- | libavcodec/Makefile | 2 | ||||
-rw-r--r-- | libavcodec/evc.h | 155 | ||||
-rw-r--r-- | libavcodec/evc_parse.c | 767 | ||||
-rw-r--r-- | libavcodec/evc_parse.h | 357 | ||||
-rw-r--r-- | libavcodec/evc_parser.c | 235 | ||||
-rw-r--r-- | libavcodec/parsers.c | 1 |
7 files changed, 1519 insertions, 0 deletions
@@ -2483,6 +2483,7 @@ CONFIG_EXTRA=" dnn dovi_rpu dvprofile + evcparse exif faandct faanidct @@ -3321,6 +3322,7 @@ av1_amf_encoder_deps="amf" # parsers aac_parser_select="adts_header mpeg4audio" av1_parser_select="cbs_av1" +evc_parser_select="golomb evcparse" h264_parser_select="golomb h264dsp h264parse h264_sei" hevc_parser_select="hevcparse hevc_sei" mpegaudio_parser_select="mpegaudioheader" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 2efab60d7d..a28919992a 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -84,6 +84,7 @@ OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o OBJS-$(CONFIG_DEFLATE_WRAPPER) += zlib_wrapper.o OBJS-$(CONFIG_DOVI_RPU) += dovi_rpu.o OBJS-$(CONFIG_ERROR_RESILIENCE) += error_resilience.o +OBJS-$(CONFIG_EVCPARSE) += evc_parse.o OBJS-$(CONFIG_EXIF) += exif.o tiff_common.o OBJS-$(CONFIG_FAANDCT) += faandct.o OBJS-$(CONFIG_FAANIDCT) += faanidct.o @@ -1168,6 +1169,7 @@ OBJS-$(CONFIG_DVAUDIO_PARSER) += dvaudio_parser.o OBJS-$(CONFIG_DVBSUB_PARSER) += dvbsub_parser.o OBJS-$(CONFIG_DVD_NAV_PARSER) += dvd_nav_parser.o OBJS-$(CONFIG_DVDSUB_PARSER) += dvdsub_parser.o +OBJS-$(CONFIG_EVC_PARSER) += evc_parser.o OBJS-$(CONFIG_FLAC_PARSER) += flac_parser.o flacdata.o flac.o OBJS-$(CONFIG_FTR_PARSER) += ftr_parser.o OBJS-$(CONFIG_G723_1_PARSER) += g723_1_parser.o diff --git a/libavcodec/evc.h b/libavcodec/evc.h new file mode 100644 index 0000000000..d1fdb4fac6 --- /dev/null +++ b/libavcodec/evc.h @@ -0,0 +1,155 @@ +/* + * EVC definitions and enums + * Copyright (c) 2022 Dawid Kozinski <d.kozinski@samsung.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_EVC_H +#define AVCODEC_EVC_H + +// The length field that indicates the length in bytes of the following NAL unit is configured to be of 4 bytes +#define EVC_NALU_LENGTH_PREFIX_SIZE (4) /* byte */ +#define EVC_NALU_HEADER_SIZE (2) /* byte */ + +/** + * @see ISO_IEC_23094-1_2020, 7.4.2.2 NAL unit header semantic + * Table 4 - NAL unit type codes and NAL unit type classes + */ +enum EVCNALUnitType { + EVC_NOIDR_NUT = 0, /* Coded slice of a non-IDR picture */ + EVC_IDR_NUT = 1, /* Coded slice of an IDR picture */ + EVC_RSV_VCL_NUT02 = 2, + EVC_RSV_VCL_NUT03 = 3, + EVC_RSV_VCL_NUT04 = 4, + EVC_RSV_VCL_NUT05 = 5, + EVC_RSV_VCL_NUT06 = 6, + EVC_RSV_VCL_NUT07 = 7, + EVC_RSV_VCL_NUT08 = 8, + EVC_RSV_VCL_NUT09 = 9, + EVC_RSV_VCL_NUT10 = 10, + EVC_RSV_VCL_NUT11 = 11, + EVC_RSV_VCL_NUT12 = 12, + EVC_RSV_VCL_NUT13 = 13, + EVC_RSV_VCL_NUT14 = 14, + EVC_RSV_VCL_NUT15 = 15, + EVC_RSV_VCL_NUT16 = 16, + EVC_RSV_VCL_NUT17 = 17, + EVC_RSV_VCL_NUT18 = 18, + EVC_RSV_VCL_NUT19 = 19, + EVC_RSV_VCL_NUT20 = 20, + EVC_RSV_VCL_NUT21 = 21, + EVC_RSV_VCL_NUT22 = 22, + EVC_RSV_VCL_NUT23 = 23, + EVC_SPS_NUT = 24, /* Sequence parameter set */ + EVC_PPS_NUT = 25, /* Picture paremeter set */ + EVC_APS_NUT = 26, /* Adaptation parameter set */ + EVC_FD_NUT = 27, /* Filler data */ + EVC_SEI_NUT = 28, /* Supplemental enhancement information */ + EVC_RSV_NONVCL29 = 29, + EVC_RSV_NONVCL30 = 30, + EVC_RSV_NONVCL31 = 31, + EVC_RSV_NONVCL32 = 32, + EVC_RSV_NONVCL33 = 33, + EVC_RSV_NONVCL34 = 34, + EVC_RSV_NONVCL35 = 35, + EVC_RSV_NONVCL36 = 36, + EVC_RSV_NONVCL37 = 37, + EVC_RSV_NONVCL38 = 38, + EVC_RSV_NONVCL39 = 39, + EVC_RSV_NONVCL40 = 40, + EVC_RSV_NONVCL41 = 41, + EVC_RSV_NONVCL42 = 42, + EVC_RSV_NONVCL43 = 43, + EVC_RSV_NONVCL44 = 44, + EVC_RSV_NONVCL45 = 45, + EVC_RSV_NONVCL46 = 46, + EVC_RSV_NONVCL47 = 47, + EVC_RSV_NONVCL48 = 48, + EVC_RSV_NONVCL49 = 49, + EVC_RSV_NONVCL50 = 50, + EVC_RSV_NONVCL51 = 51, + EVC_RSV_NONVCL52 = 52, + EVC_RSV_NONVCL53 = 53, + EVC_RSV_NONVCL54 = 54, + EVC_RSV_NONVCL55 = 55, + EVC_UNSPEC_NUT56 = 56, + EVC_UNSPEC_NUT57 = 57, + EVC_UNSPEC_NUT58 = 58, + EVC_UNSPEC_NUT59 = 59, + EVC_UNSPEC_NUT60 = 60, + EVC_UNSPEC_NUT61 = 61, + EVC_UNSPEC_NUT62 = 62 +}; + +// slice type +// @see ISO_IEC_23094-1_2020 7.4.5 Slice header semantics +// +enum EVCSliceType { + EVC_SLICE_TYPE_B = 0, + EVC_SLICE_TYPE_P = 1, + EVC_SLICE_TYPE_I = 2 +}; + +enum { + // 7.4.3.2: aps_video_parameter_set_id is u(4). + EVC_MAX_APS_COUNT = 32, + + // 7.4.3.1: sps_seq_parameter_set_id is in [0, 15]. + EVC_MAX_SPS_COUNT = 16, + + // 7.4.3.2: pps_pic_parameter_set_id is in [0, 63]. + EVC_MAX_PPS_COUNT = 64, + + // 7.4.5: slice header slice_pic_parameter_set_id in [0, 63] + EVC_MAX_SH_COUNT = 64, + + // E.3.2: cpb_cnt_minus1[i] is in [0, 31]. + EVC_MAX_CPB_CNT = 32, + + // A.4.1: in table A.1 the highest level allows a MaxLumaPs of 35 651 584. + EVC_MAX_LUMA_PS = 35651584, + + EVC_MAX_NUM_REF_PICS = 21, + + EVC_MAX_NUM_RPLS = 32, + + // A.4.1: pic_width_in_luma_samples and pic_height_in_luma_samples are + // constrained to be not greater than sqrt(MaxLumaPs * 8). Hence height/ + // width are bounded above by sqrt(8 * 35651584) = 16888.2 samples. + EVC_MAX_WIDTH = 16888, + EVC_MAX_HEIGHT = 16888, + + // A.4.1: table A.1 allows at most 22 tile rows for any level. + EVC_MAX_TILE_ROWS = 22, + // A.4.1: table A.1 allows at most 20 tile columns for any level. + EVC_MAX_TILE_COLUMNS = 20, + + // A.4.1: table A.1 allows at most 600 slice segments for any level. + EVC_MAX_SLICE_SEGMENTS = 600, + + // 7.4.7.1: in the worst case (tiles_enabled_flag and + // entropy_coding_sync_enabled_flag are both set), entry points can be + // placed at the beginning of every Ctb row in every tile, giving an + // upper bound of (num_tile_columns_minus1 + 1) * PicHeightInCtbsY - 1. + // Only a stream with very high resolution and perverse parameters could + // get near that, though, so set a lower limit here with the maximum + // possible value for 4K video (at most 135 16x16 Ctb rows). + HEVC_MAX_ENTRY_POINT_OFFSETS = EVC_MAX_TILE_COLUMNS * 135, +}; + +#endif // AVCODEC_EVC_H diff --git a/libavcodec/evc_parse.c b/libavcodec/evc_parse.c new file mode 100644 index 0000000000..f69d7ffb6b --- /dev/null +++ b/libavcodec/evc_parse.c @@ -0,0 +1,767 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "golomb.h" +#include "parser.h" +#include "evc.h" +#include "evc_parse.h" + +#define EXTENDED_SAR 255 + +#define NUM_CHROMA_FORMATS 4 // @see ISO_IEC_23094-1 section 6.2 table 2 + +static const enum AVPixelFormat pix_fmts_8bit[NUM_CHROMA_FORMATS] = { + AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P +}; + +static const enum AVPixelFormat pix_fmts_9bit[NUM_CHROMA_FORMATS] = { + AV_PIX_FMT_GRAY9, AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9 +}; + +static const enum AVPixelFormat pix_fmts_10bit[NUM_CHROMA_FORMATS] = { + AV_PIX_FMT_GRAY10, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10 +}; + +static const enum AVPixelFormat pix_fmts_12bit[NUM_CHROMA_FORMATS] = { + AV_PIX_FMT_GRAY12, AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12 +}; + +static const enum AVPixelFormat pix_fmts_14bit[NUM_CHROMA_FORMATS] = { + AV_PIX_FMT_GRAY14, AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14 +}; + +static const enum AVPixelFormat pix_fmts_16bit[NUM_CHROMA_FORMATS] = { + AV_PIX_FMT_GRAY16, AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16 +}; + +// nuh_temporal_id specifies a temporal identifier for the NAL unit +int ff_evc_get_temporal_id(const uint8_t *bits, int bits_size, void *logctx) +{ + int temporal_id = 0; + uint16_t t = 0; + + if (bits_size < EVC_NALU_HEADER_SIZE) { + av_log(logctx, AV_LOG_ERROR, "Can't read NAL unit header\n"); + return 0; + } + + // forbidden_zero_bit + if ((bits[0] & 0x80) != 0) + return -1; + + t = AV_RB16(bits); + + temporal_id = (t >> 6) & 0x0007; + + return temporal_id; +} + +// @see ISO_IEC_23094-1 (7.3.7 Reference picture list structure syntax) +static int ref_pic_list_struct(GetBitContext *gb, RefPicListStruct *rpl) +{ + uint32_t delta_poc_st, strp_entry_sign_flag = 0; + rpl->ref_pic_num = get_ue_golomb(gb); + if (rpl->ref_pic_num > 0) { + delta_poc_st = get_ue_golomb(gb); + + rpl->ref_pics[0] = delta_poc_st; + if (rpl->ref_pics[0] != 0) { + strp_entry_sign_flag = get_bits(gb, 1); + + rpl->ref_pics[0] *= 1 - (strp_entry_sign_flag << 1); + } + } + + for (int i = 1; i < rpl->ref_pic_num; ++i) { + delta_poc_st = get_ue_golomb(gb); + if (delta_poc_st != 0) + strp_entry_sign_flag = get_bits(gb, 1); + rpl->ref_pics[i] = rpl->ref_pics[i - 1] + delta_poc_st * (1 - (strp_entry_sign_flag << 1)); + } + + return 0; +} + +// @see ISO_IEC_23094-1 (E.2.2 HRD parameters syntax) +static int hrd_parameters(GetBitContext *gb, HRDParameters *hrd) +{ + hrd->cpb_cnt_minus1 = get_ue_golomb(gb); + hrd->bit_rate_scale = get_bits(gb, 4); + hrd->cpb_size_scale = get_bits(gb, 4); + for (int SchedSelIdx = 0; SchedSelIdx <= hrd->cpb_cnt_minus1; SchedSelIdx++) { + hrd->bit_rate_value_minus1[SchedSelIdx] = get_ue_golomb(gb); + hrd->cpb_size_value_minus1[SchedSelIdx] = get_ue_golomb(gb); + hrd->cbr_flag[SchedSelIdx] = get_bits(gb, 1); + } + hrd->initial_cpb_removal_delay_length_minus1 = get_bits(gb, 5); + hrd->cpb_removal_delay_length_minus1 = get_bits(gb, 5); + hrd->cpb_removal_delay_length_minus1 = get_bits(gb, 5); + hrd->time_offset_length = get_bits(gb, 5); + + return 0; +} + +// @see ISO_IEC_23094-1 (E.2.1 VUI parameters syntax) +static int vui_parameters(GetBitContext *gb, VUIParameters *vui) +{ + vui->aspect_ratio_info_present_flag = get_bits(gb, 1); + if (vui->aspect_ratio_info_present_flag) { + vui->aspect_ratio_idc = get_bits(gb, 8); + if (vui->aspect_ratio_idc == EXTENDED_SAR) { + vui->sar_width = get_bits(gb, 16); + vui->sar_height = get_bits(gb, 16); + } + } + vui->overscan_info_present_flag = get_bits(gb, 1); + if (vui->overscan_info_present_flag) + vui->overscan_appropriate_flag = get_bits(gb, 1); + vui->video_signal_type_present_flag = get_bits(gb, 1); + if (vui->video_signal_type_present_flag) { + vui->video_format = get_bits(gb, 3); + vui->video_full_range_flag = get_bits(gb, 1); + vui->colour_description_present_flag = get_bits(gb, 1); + if (vui->colour_description_present_flag) { + vui->colour_primaries = get_bits(gb, 8); + vui->transfer_characteristics = get_bits(gb, 8); + vui->matrix_coefficients = get_bits(gb, 8); + } + } + vui->chroma_loc_info_present_flag = get_bits(gb, 1); + if (vui->chroma_loc_info_present_flag) { + vui->chroma_sample_loc_type_top_field = get_ue_golomb(gb); + vui->chroma_sample_loc_type_bottom_field = get_ue_golomb(gb); + } + vui->neutral_chroma_indication_flag = get_bits(gb, 1); + + vui->field_seq_flag = get_bits(gb, 1); + + vui->timing_info_present_flag = get_bits(gb, 1); + if (vui->timing_info_present_flag) { + vui->num_units_in_tick = get_bits(gb, 32); + vui->time_scale = get_bits(gb, 32); + vui->fixed_pic_rate_flag = get_bits(gb, 1); + } + vui->nal_hrd_parameters_present_flag = get_bits(gb, 1); + if (vui->nal_hrd_parameters_present_flag) + hrd_parameters(gb, &vui->hrd_parameters); + vui->vcl_hrd_parameters_present_flag = get_bits(gb, 1); + if (vui->vcl_hrd_parameters_present_flag) + hrd_parameters(gb, &vui->hrd_parameters); + if (vui->nal_hrd_parameters_present_flag || vui->vcl_hrd_parameters_present_flag) + vui->low_delay_hrd_flag = get_bits(gb, 1); + vui->pic_struct_present_flag = get_bits(gb, 1); + vui->bitstream_restriction_flag = get_bits(gb, 1); + if (vui->bitstream_restriction_flag) { + vui->motion_vectors_over_pic_boundaries_flag = get_bits(gb, 1); + vui->max_bytes_per_pic_denom = get_ue_golomb(gb); + vui->max_bits_per_mb_denom = get_ue_golomb(gb); + vui->log2_max_mv_length_horizontal = get_ue_golomb(gb); + vui->log2_max_mv_length_vertical = get_ue_golomb(gb); + vui->num_reorder_pics = get_ue_golomb(gb); + vui->max_dec_pic_buffering = get_ue_golomb(gb); + } + + return 0; +} + +// @see ISO_IEC_23094-1 (7.3.2.1 SPS RBSP syntax) +EVCParserSPS *ff_evc_parse_sps(EVCParserContext *ctx, const uint8_t *bs, int bs_size) +{ + GetBitContext gb; + EVCParserSPS *sps; + int sps_seq_parameter_set_id; + + if (init_get_bits8(&gb, bs, bs_size) < 0) + return NULL; + + sps_seq_parameter_set_id = get_ue_golomb(&gb); + + if (sps_seq_parameter_set_id >= EVC_MAX_SPS_COUNT) + return NULL; + + if(!ctx->sps[sps_seq_parameter_set_id]) { + if((ctx->sps[sps_seq_parameter_set_id] = av_malloc(sizeof(EVCParserSPS))) == NULL) + return NULL; + } + + sps = ctx->sps[sps_seq_parameter_set_id]; + sps->sps_seq_parameter_set_id = sps_seq_parameter_set_id; + + // the Baseline profile is indicated by profile_idc eqal to 0 + // the Main profile is indicated by profile_idc eqal to 1 + sps->profile_idc = get_bits(&gb, 8); + + sps->level_idc = get_bits(&gb, 8); + + skip_bits_long(&gb, 32); /* skip toolset_idc_h */ + skip_bits_long(&gb, 32); /* skip toolset_idc_l */ + + // 0 - monochrome + // 1 - 4:2:0 + // 2 - 4:2:2 + // 3 - 4:4:4 + sps->chroma_format_idc = get_ue_golomb(&gb); + + sps->pic_width_in_luma_samples = get_ue_golomb(&gb); + sps->pic_height_in_luma_samples = get_ue_golomb(&gb); + + sps->bit_depth_luma_minus8 = get_ue_golomb(&gb); + sps->bit_depth_chroma_minus8 = get_ue_golomb(&gb); + + sps->sps_btt_flag = get_bits(&gb, 1); + if (sps->sps_btt_flag) { + sps->log2_ctu_size_minus5 = get_ue_golomb(&gb); + sps->log2_min_cb_size_minus2 = get_ue_golomb(&gb); + sps->log2_diff_ctu_max_14_cb_size = get_ue_golomb(&gb); + sps->log2_diff_ctu_max_tt_cb_size = get_ue_golomb(&gb); + sps->log2_diff_min_cb_min_tt_cb_size_minus2 = get_ue_golomb(&gb); + } + + sps->sps_suco_flag = get_bits(&gb, 1); + if (sps->sps_suco_flag) { + sps->log2_diff_ctu_size_max_suco_cb_size = get_ue_golomb(&gb); + sps->log2_diff_max_suco_min_suco_cb_size = get_ue_golomb(&gb); + } + + sps->sps_admvp_flag = get_bits(&gb, 1); + if (sps->sps_admvp_flag) { + sps->sps_affine_flag = get_bits(&gb, 1); + sps->sps_amvr_flag = get_bits(&gb, 1); + sps->sps_dmvr_flag = get_bits(&gb, 1); + sps->sps_mmvd_flag = get_bits(&gb, 1); + sps->sps_hmvp_flag = get_bits(&gb, 1); + } + + sps->sps_eipd_flag = get_bits(&gb, 1); + if (sps->sps_eipd_flag) { + sps->sps_ibc_flag = get_bits(&gb, 1); + if (sps->sps_ibc_flag) + sps->log2_max_ibc_cand_size_minus2 = get_ue_golomb(&gb); + } + + sps->sps_cm_init_flag = get_bits(&gb, 1); + if (sps->sps_cm_init_flag) + sps->sps_adcc_flag = get_bits(&gb, 1); + + sps->sps_iqt_flag = get_bits(&gb, 1); + if (sps->sps_iqt_flag) + sps->sps_ats_flag = get_bits(&gb, 1); + + sps->sps_addb_flag = get_bits(&gb, 1); + sps->sps_alf_flag = get_bits(&gb, 1); + sps->sps_htdf_flag = get_bits(&gb, 1); + sps->sps_rpl_flag = get_bits(&gb, 1); + sps->sps_pocs_flag = get_bits(&gb, 1); + sps->sps_dquant_flag = get_bits(&gb, 1); + sps->sps_dra_flag = get_bits(&gb, 1); + + if (sps->sps_pocs_flag) + sps->log2_max_pic_order_cnt_lsb_minus4 = get_ue_golomb(&gb); + + if (!sps->sps_pocs_flag || !sps->sps_rpl_flag) { + sps->log2_sub_gop_length = get_ue_golomb(&gb); + if (sps->log2_sub_gop_length == 0) + sps->log2_ref_pic_gap_length = get_ue_golomb(&gb); + } + + if (!sps->sps_rpl_flag) + sps->max_num_tid0_ref_pics = get_ue_golomb(&gb); + else { + sps->sps_max_dec_pic_buffering_minus1 = get_ue_golomb(&gb); + sps->long_term_ref_pic_flag = get_bits(&gb, 1); + sps->rpl1_same_as_rpl0_flag = get_bits(&gb, 1); + sps->num_ref_pic_list_in_sps[0] = get_ue_golomb(&gb); + + for (int i = 0; i < sps->num_ref_pic_list_in_sps[0]; ++i) + ref_pic_list_struct(&gb, &sps->rpls[0][i]); + + if (!sps->rpl1_same_as_rpl0_flag) { + sps->num_ref_pic_list_in_sps[1] = get_ue_golomb(&gb); + for (int i = 0; i < sps->num_ref_pic_list_in_sps[1]; ++i) + ref_pic_list_struct(&gb, &sps->rpls[1][i]); + } + } + + sps->picture_cropping_flag = get_bits(&gb, 1); + + if (sps->picture_cropping_flag) { + sps->picture_crop_left_offset = get_ue_golomb(&gb); + sps->picture_crop_right_offset = get_ue_golomb(&gb); + sps->picture_crop_top_offset = get_ue_golomb(&gb); + sps->picture_crop_bottom_offset = get_ue_golomb(&gb); + } + + if (sps->chroma_format_idc != 0) { + sps->chroma_qp_table_struct.chroma_qp_table_present_flag = get_bits(&gb, 1); + + if (sps->chroma_qp_table_struct.chroma_qp_table_present_flag) { + sps->chroma_qp_table_struct.same_qp_table_for_chroma = get_bits(&gb, 1); + sps->chroma_qp_table_struct.global_offset_flag = get_bits(&gb, 1); + for (int i = 0; i < (sps->chroma_qp_table_struct.same_qp_table_for_chroma ? 1 : 2); i++) { + sps->chroma_qp_table_struct.num_points_in_qp_table_minus1[i] = get_ue_golomb(&gb);; + for (int j = 0; j <= sps->chroma_qp_table_struct.num_points_in_qp_table_minus1[i]; j++) { + sps->chroma_qp_table_struct.delta_qp_in_val_minus1[i][j] = get_bits(&gb, 6); + sps->chroma_qp_table_struct.delta_qp_out_val[i][j] = get_se_golomb(&gb); + } + } + } + } + + sps->vui_parameters_present_flag = get_bits(&gb, 1); + if (sps->vui_parameters_present_flag) + vui_parameters(&gb, &(sps->vui_parameters)); + + // @note + // If necessary, add the missing fields to the EVCParserSPS structure + // and then extend parser implementation + + return sps; +} + +// @see ISO_IEC_23094-1 (7.3.2.2 SPS RBSP syntax) +// +// @note +// The current implementation of parse_sps function doesn't handle VUI parameters parsing. +// If it will be needed, parse_sps function could be extended to handle VUI parameters parsing +// to initialize fields of the AVCodecContex i.e. color_primaries, color_trc,color_range +// +EVCParserPPS *ff_evc_parse_pps(EVCParserContext *ctx, const uint8_t *bs, int bs_size) +{ + GetBitContext gb; + EVCParserPPS *pps; + + int pps_pic_parameter_set_id; + + if (init_get_bits8(&gb, bs, bs_size) < 0) + return NULL; + + pps_pic_parameter_set_id = get_ue_golomb(&gb); + if (pps_pic_parameter_set_id > EVC_MAX_PPS_COUNT) + return NULL; + + if(!ctx->pps[pps_pic_parameter_set_id]) { + if((ctx->pps[pps_pic_parameter_set_id] = av_malloc(sizeof(EVCParserSPS))) == NULL) + return NULL; + } + + pps = ctx->pps[pps_pic_parameter_set_id]; + + pps->pps_pic_parameter_set_id = pps_pic_parameter_set_id; + + pps->pps_seq_parameter_set_id = get_ue_golomb(&gb); + if (pps->pps_seq_parameter_set_id >= EVC_MAX_SPS_COUNT) + return NULL; + + pps->num_ref_idx_default_active_minus1[0] = get_ue_golomb(&gb); + pps->num_ref_idx_default_active_minus1[1] = get_ue_golomb(&gb); + pps->additional_lt_poc_lsb_len = get_ue_golomb(&gb); + pps->rpl1_idx_present_flag = get_bits(&gb, 1); + pps->single_tile_in_pic_flag = get_bits(&gb, 1); + + if (!pps->single_tile_in_pic_flag) { + pps->num_tile_columns_minus1 = get_ue_golomb(&gb); + pps->num_tile_rows_minus1 = get_ue_golomb(&gb); + pps->uniform_tile_spacing_flag = get_bits(&gb, 1); + + if (!pps->uniform_tile_spacing_flag) { + for (int i = 0; i < pps->num_tile_columns_minus1; i++) + pps->tile_column_width_minus1[i] = get_ue_golomb(&gb); + + for (int i = 0; i < pps->num_tile_rows_minus1; i++) + pps->tile_row_height_minus1[i] = get_ue_golomb(&gb); + } + pps->loop_filter_across_tiles_enabled_flag = get_bits(&gb, 1); + pps->tile_offset_len_minus1 = get_ue_golomb(&gb); + } + + pps->tile_id_len_minus1 = get_ue_golomb(&gb); + pps->explicit_tile_id_flag = get_bits(&gb, 1); + + if (pps->explicit_tile_id_flag) { + for (int i = 0; i <= pps->num_tile_rows_minus1; i++) { + for (int j = 0; j <= pps->num_tile_columns_minus1; j++) + pps->tile_id_val[i][j] = get_bits(&gb, pps->tile_id_len_minus1 + 1); + } + } + + pps->pic_dra_enabled_flag = 0; + pps->pic_dra_enabled_flag = get_bits(&gb, 1); + + if (pps->pic_dra_enabled_flag) + pps->pic_dra_aps_id = get_bits(&gb, 5); + + pps->arbitrary_slice_present_flag = get_bits(&gb, 1); + pps->constrained_intra_pred_flag = get_bits(&gb, 1); + pps->cu_qp_delta_enabled_flag = get_bits(&gb, 1); + + if (pps->cu_qp_delta_enabled_flag) + pps->log2_cu_qp_delta_area_minus6 = get_ue_golomb(&gb); + + return pps; +} + +// @see ISO_IEC_23094-1 (7.3.2.6 Slice layer RBSP syntax) +EVCParserSliceHeader *ff_evc_parse_slice_header(EVCParserContext *ctx, const uint8_t *bs, int bs_size) +{ + GetBitContext gb; + EVCParserSliceHeader *sh; + EVCParserPPS *pps; + EVCParserSPS *sps; + + int num_tiles_in_slice = 0; + int slice_pic_parameter_set_id; + + if (init_get_bits8(&gb, bs, bs_size) < 0) + return NULL; + + slice_pic_parameter_set_id = get_ue_golomb(&gb); + + if (slice_pic_parameter_set_id < 0 || slice_pic_parameter_set_id >= EVC_MAX_PPS_COUNT) + return NULL; + + if(!ctx->slice_header[slice_pic_parameter_set_id]) { + if((ctx->slice_header[slice_pic_parameter_set_id] = av_malloc(sizeof(EVCParserSliceHeader))) == NULL) + return NULL; + } + + sh = ctx->slice_header[slice_pic_parameter_set_id]; + + pps = ctx->pps[slice_pic_parameter_set_id]; + if(!pps) + return NULL; + + sps = ctx->sps[slice_pic_parameter_set_id]; + if(!sps) + return NULL; + + sh->slice_pic_parameter_set_id = slice_pic_parameter_set_id; + + if (!pps->single_tile_in_pic_flag) { + sh->single_tile_in_slice_flag = get_bits(&gb, 1); + sh->first_tile_id = get_bits(&gb, pps->tile_id_len_minus1 + 1); + } else + sh->single_tile_in_slice_flag = 1; + + if (!sh->single_tile_in_slice_flag) { + if (pps->arbitrary_slice_present_flag) + sh->arbitrary_slice_flag = get_bits(&gb, 1); + + if (!sh->arbitrary_slice_flag) + sh->last_tile_id = get_bits(&gb, pps->tile_id_len_minus1 + 1); + else { + sh->num_remaining_tiles_in_slice_minus1 = get_ue_golomb(&gb); + num_tiles_in_slice = sh->num_remaining_tiles_in_slice_minus1 + 2; + for (int i = 0; i < num_tiles_in_slice - 1; ++i) + sh->delta_tile_id_minus1[i] = get_ue_golomb(&gb); + } + } + + sh->slice_type = get_ue_golomb(&gb); + + if (ctx->nalu_type == EVC_IDR_NUT) + sh->no_output_of_prior_pics_flag = get_bits(&gb, 1); + + if (sps->sps_mmvd_flag && ((sh->slice_type == EVC_SLICE_TYPE_B) || (sh->slice_type == EVC_SLICE_TYPE_P))) + sh->mmvd_group_enable_flag = get_bits(&gb, 1); + else + sh->mmvd_group_enable_flag = 0; + + if (sps->sps_alf_flag) { + int ChromaArrayType = sps->chroma_format_idc; + + sh->slice_alf_enabled_flag = get_bits(&gb, 1); + + if (sh->slice_alf_enabled_flag) { + sh->slice_alf_luma_aps_id = get_bits(&gb, 5); + sh->slice_alf_map_flag = get_bits(&gb, 1); + sh->slice_alf_chroma_idc = get_bits(&gb, 2); + + if ((ChromaArrayType == 1 || ChromaArrayType == 2) && sh->slice_alf_chroma_idc > 0) + sh->slice_alf_chroma_aps_id = get_bits(&gb, 5); + } + if (ChromaArrayType == 3) { + int sliceChromaAlfEnabledFlag = 0; + int sliceChroma2AlfEnabledFlag = 0; + + if (sh->slice_alf_chroma_idc == 1) { // @see ISO_IEC_23094-1 (7.4.5) + sliceChromaAlfEnabledFlag = 1; + sliceChroma2AlfEnabledFlag = 0; + } else if (sh->slice_alf_chroma_idc == 2) { + sliceChromaAlfEnabledFlag = 0; + sliceChroma2AlfEnabledFlag = 1; + } else if (sh->slice_alf_chroma_idc == 3) { + sliceChromaAlfEnabledFlag = 1; + sliceChroma2AlfEnabledFlag = 1; + } else { + sliceChromaAlfEnabledFlag = 0; + sliceChroma2AlfEnabledFlag = 0; + } + + if (!sh->slice_alf_enabled_flag) + sh->slice_alf_chroma_idc = get_bits(&gb, 2); + + if (sliceChromaAlfEnabledFlag) { + sh->slice_alf_chroma_aps_id = get_bits(&gb, 5); + sh->slice_alf_chroma_map_flag = get_bits(&gb, 1); + } + + if (sliceChroma2AlfEnabledFlag) { + sh->slice_alf_chroma2_aps_id = get_bits(&gb, 5); + sh->slice_alf_chroma2_map_flag = get_bits(&gb, 1); + } + } + } + + if (ctx->nalu_type != EVC_IDR_NUT) { + if (sps->sps_pocs_flag) + sh->slice_pic_order_cnt_lsb = get_bits(&gb, sps->log2_max_pic_order_cnt_lsb_minus4 + 4); + } + + // @note + // If necessary, add the missing fields to the EVCParserSliceHeader structure + // and then extend parser implementation + + return sh; +} + +int ff_evc_parse_nal_unit(EVCParserContext *ctx, const uint8_t *buf, int buf_size, void *logctx) +{ + int nalu_type, nalu_size; + int tid; + const uint8_t *data = buf; + int data_size = buf_size; + + // ctx->picture_structure = AV_PICTURE_STRUCTURE_FRAME; + ctx->key_frame = -1; + + nalu_size = buf_size; + if (nalu_size <= 0) { + av_log(logctx, AV_LOG_ERROR, "Invalid NAL unit size: (%d)\n", nalu_size); + return AVERROR_INVALIDDATA; + } + + // @see ISO_IEC_23094-1_2020, 7.4.2.2 NAL unit header semantic (Table 4 - NAL unit type codes and NAL unit type classes) + // @see enum EVCNALUnitType in evc.h + nalu_type = evc_get_nalu_type(data, data_size, logctx); + if (nalu_type < EVC_NOIDR_NUT || nalu_type > EVC_UNSPEC_NUT62) { + av_log(logctx, AV_LOG_ERROR, "Invalid NAL unit type: (%d)\n", nalu_type); + return AVERROR_INVALIDDATA; + } + ctx->nalu_type = nalu_type; + + tid = ff_evc_get_temporal_id(data, data_size, logctx); + if (tid < 0) { + av_log(logctx, AV_LOG_ERROR, "Invalid temporial id: (%d)\n", tid); + return AVERROR_INVALIDDATA; + } + ctx->nuh_temporal_id = tid; + + data += EVC_NALU_HEADER_SIZE; + data_size -= EVC_NALU_HEADER_SIZE; + + switch(nalu_type) { + case EVC_SPS_NUT: { + EVCParserSPS *sps; + int SubGopLength; + int bit_depth; + + sps = ff_evc_parse_sps(ctx, data, nalu_size); + if (!sps) { + av_log(logctx, AV_LOG_ERROR, "SPS parsing error\n"); + return AVERROR_INVALIDDATA; + } + + ctx->coded_width = sps->pic_width_in_luma_samples; + ctx->coded_height = sps->pic_height_in_luma_samples; + + if(sps->picture_cropping_flag) { + ctx->width = sps->pic_width_in_luma_samples - sps->picture_crop_left_offset - sps->picture_crop_right_offset; + ctx->height = sps->pic_height_in_luma_samples - sps->picture_crop_top_offset - sps->picture_crop_bottom_offset; + } else { + ctx->width = sps->pic_width_in_luma_samples; + ctx->height = sps->pic_height_in_luma_samples; + } + + SubGopLength = (int)pow(2.0, sps->log2_sub_gop_length); + ctx->gop_size = SubGopLength; + + ctx->delay = (sps->sps_max_dec_pic_buffering_minus1) ? sps->sps_max_dec_pic_buffering_minus1 - 1 : SubGopLength + sps->max_num_tid0_ref_pics - 1; + + if (sps->profile_idc == 1) ctx->profile = FF_PROFILE_EVC_MAIN; + else ctx->profile = FF_PROFILE_EVC_BASELINE; + + if (sps->vui_parameters_present_flag) { + if (sps->vui_parameters.timing_info_present_flag) { + int64_t num = sps->vui_parameters.num_units_in_tick; + int64_t den = sps->vui_parameters.time_scale; + if (num != 0 && den != 0) + av_reduce(&ctx->framerate.den, &ctx->framerate.num, num, den, 1 << 30); + } + } + + bit_depth = sps->bit_depth_chroma_minus8 + 8; + ctx->format = AV_PIX_FMT_NONE; + + switch (bit_depth) { + case 8: + ctx->format = pix_fmts_8bit[sps->chroma_format_idc]; + break; + case 9: + ctx->format = pix_fmts_9bit[sps->chroma_format_idc]; + break; + case 10: + ctx->format = pix_fmts_10bit[sps->chroma_format_idc]; + break; + case 12: + ctx->format = pix_fmts_12bit[sps->chroma_format_idc]; + break; + case 14: + ctx->format = pix_fmts_14bit[sps->chroma_format_idc]; + break; + case 16: + ctx->format = pix_fmts_16bit[sps->chroma_format_idc]; + break; + } + av_assert0(ctx->format != AV_PIX_FMT_NONE); + + break; + } + case EVC_PPS_NUT: { + EVCParserPPS *pps; + + pps = ff_evc_parse_pps(ctx, data, nalu_size); + if (!pps) { + av_log(logctx, AV_LOG_ERROR, "PPS parsing error\n"); + return AVERROR_INVALIDDATA; + } + break; + } + case EVC_SEI_NUT: // Supplemental Enhancement Information + case EVC_APS_NUT: // Adaptation parameter set + case EVC_FD_NUT: // Filler data + break; + case EVC_IDR_NUT: // Coded slice of a IDR or non-IDR picture + case EVC_NOIDR_NUT: { + EVCParserSliceHeader *sh; + EVCParserSPS *sps; + int slice_pic_parameter_set_id; + + sh = ff_evc_parse_slice_header(ctx, data, nalu_size); + if (!sh) { + av_log(logctx, AV_LOG_ERROR, "Slice header parsing error\n"); + return AVERROR_INVALIDDATA; + } + + switch (sh->slice_type) { + case EVC_SLICE_TYPE_B: { + ctx->pict_type = AV_PICTURE_TYPE_B; + break; + } + case EVC_SLICE_TYPE_P: { + ctx->pict_type = AV_PICTURE_TYPE_P; + break; + } + case EVC_SLICE_TYPE_I: { + ctx->pict_type = AV_PICTURE_TYPE_I; + break; + } + default: { + ctx->pict_type = AV_PICTURE_TYPE_NONE; + } + } + + ctx->key_frame = (nalu_type == EVC_IDR_NUT) ? 1 : 0; + + // POC (picture order count of the current picture) derivation + // @see ISO/IEC 23094-1:2020(E) 8.3.1 Decoding process for picture order count + slice_pic_parameter_set_id = sh->slice_pic_parameter_set_id; + sps = ctx->sps[slice_pic_parameter_set_id]; + + if (sps && sps->sps_pocs_flag) { + + int PicOrderCntMsb = 0; + ctx->poc.prevPicOrderCntVal = ctx->poc.PicOrderCntVal; + + if (nalu_type == EVC_IDR_NUT) + PicOrderCntMsb = 0; + else { + int MaxPicOrderCntLsb = 1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4); + + int prevPicOrderCntLsb = ctx->poc.PicOrderCntVal & (MaxPicOrderCntLsb - 1); + int prevPicOrderCntMsb = ctx->poc.PicOrderCntVal - prevPicOrderCntLsb; + + + if ((sh->slice_pic_order_cnt_lsb < prevPicOrderCntLsb) && + ((prevPicOrderCntLsb - sh->slice_pic_order_cnt_lsb) >= (MaxPicOrderCntLsb / 2))) + + PicOrderCntMsb = prevPicOrderCntMsb + MaxPicOrderCntLsb; + + else if ((sh->slice_pic_order_cnt_lsb > prevPicOrderCntLsb) && + ((sh->slice_pic_order_cnt_lsb - prevPicOrderCntLsb) > (MaxPicOrderCntLsb / 2))) + + PicOrderCntMsb = prevPicOrderCntMsb - MaxPicOrderCntLsb; + + else + PicOrderCntMsb = prevPicOrderCntMsb; + } + ctx->poc.PicOrderCntVal = PicOrderCntMsb + sh->slice_pic_order_cnt_lsb; + + } else { + if (nalu_type == EVC_IDR_NUT) { + ctx->poc.PicOrderCntVal = 0; + ctx->poc.DocOffset = -1; + } else { + int SubGopLength = (int)pow(2.0, sps->log2_sub_gop_length); + if (tid == 0) { + ctx->poc.PicOrderCntVal = ctx->poc.prevPicOrderCntVal + SubGopLength; + ctx->poc.DocOffset = 0; + ctx->poc.prevPicOrderCntVal = ctx->poc.PicOrderCntVal; + } else { + int ExpectedTemporalId; + int PocOffset; + int prevDocOffset = ctx->poc.DocOffset; + + ctx->poc.DocOffset = (prevDocOffset + 1) % SubGopLength; + if (ctx->poc.DocOffset == 0) { + ctx->poc.prevPicOrderCntVal += SubGopLength; + ExpectedTemporalId = 0; + } else + ExpectedTemporalId = 1 + (int)log2(ctx->poc.DocOffset); + while (tid != ExpectedTemporalId) { + ctx->poc.DocOffset = (ctx->poc.DocOffset + 1) % SubGopLength; + if (ctx->poc.DocOffset == 0) + ExpectedTemporalId = 0; + else + ExpectedTemporalId = 1 + (int)log2(ctx->poc.DocOffset); + } + PocOffset = (int)(SubGopLength * ((2.0 * ctx->poc.DocOffset + 1) / (int)pow(2.0, tid) - 2)); + ctx->poc.PicOrderCntVal = ctx->poc.prevPicOrderCntVal + PocOffset; + } + } + } + + ctx->output_picture_number = ctx->poc.PicOrderCntVal; + ctx->key_frame = (nalu_type == EVC_IDR_NUT) ? 1 : 0; + + break; + } + } + + return 0; +} + diff --git a/libavcodec/evc_parse.h b/libavcodec/evc_parse.h new file mode 100644 index 0000000000..d74a3b5159 --- /dev/null +++ b/libavcodec/evc_parse.h @@ -0,0 +1,357 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * EVC decoder/parser shared code + */ + +#ifndef AVCODEC_EVC_PARSE_H +#define AVCODEC_EVC_PARSE_H + +#define EVC_MAX_QP_TABLE_SIZE 58 +#define NUM_CPB 32 + +// rpl structure +typedef struct RefPicListStruct { + int poc; + int tid; + int ref_pic_num; + int ref_pic_active_num; + int ref_pics[EVC_MAX_NUM_REF_PICS]; + char pic_type; + +} RefPicListStruct; + +// chromaQP table structure to be signalled in SPS +typedef struct ChromaQpTable { + int chroma_qp_table_present_flag; // u(1) + int same_qp_table_for_chroma; // u(1) + int global_offset_flag; // u(1) + int num_points_in_qp_table_minus1[2]; // ue(v) + int delta_qp_in_val_minus1[2][EVC_MAX_QP_TABLE_SIZE]; // u(6) + int delta_qp_out_val[2][EVC_MAX_QP_TABLE_SIZE]; // se(v) +} ChromaQpTable; + +// Hypothetical Reference Decoder (HRD) parameters, part of VUI +typedef struct HRDParameters { + int cpb_cnt_minus1; // ue(v) + int bit_rate_scale; // u(4) + int cpb_size_scale; // u(4) + int bit_rate_value_minus1[NUM_CPB]; // ue(v) + int cpb_size_value_minus1[NUM_CPB]; // ue(v) + int cbr_flag[NUM_CPB]; // u(1) + int initial_cpb_removal_delay_length_minus1; // u(5) + int cpb_removal_delay_length_minus1; // u(5) + int dpb_output_delay_length_minus1; // u(5) + int time_offset_length; // u(5) +} HRDParameters; + +// video usability information (VUI) part of SPS +typedef struct VUIParameters { + int aspect_ratio_info_present_flag; // u(1) + int aspect_ratio_idc; // u(8) + int sar_width; // u(16) + int sar_height; // u(16) + int overscan_info_present_flag; // u(1) + int overscan_appropriate_flag; // u(1) + int video_signal_type_present_flag; // u(1) + int video_format; // u(3) + int video_full_range_flag; // u(1) + int colour_description_present_flag; // u(1) + int colour_primaries; // u(8) + int transfer_characteristics; // u(8) + int matrix_coefficients; // u(8) + int chroma_loc_info_present_flag; // u(1) + int chroma_sample_loc_type_top_field; // ue(v) + int chroma_sample_loc_type_bottom_field; // ue(v) + int neutral_chroma_indication_flag; // u(1) + int field_seq_flag; // u(1) + int timing_info_present_flag; // u(1) + int num_units_in_tick; // u(32) + int time_scale; // u(32) + int fixed_pic_rate_flag; // u(1) + int nal_hrd_parameters_present_flag; // u(1) + int vcl_hrd_parameters_present_flag; // u(1) + int low_delay_hrd_flag; // u(1) + int pic_struct_present_flag; // u(1) + int bitstream_restriction_flag; // u(1) + int motion_vectors_over_pic_boundaries_flag; // u(1) + int max_bytes_per_pic_denom; // ue(v) + int max_bits_per_mb_denom; // ue(v) + int log2_max_mv_length_horizontal; // ue(v) + int log2_max_mv_length_vertical; // ue(v) + int num_reorder_pics; // ue(v) + int max_dec_pic_buffering; // ue(v) + + HRDParameters hrd_parameters; +} VUIParameters; + +// The sturcture reflects SPS RBSP(raw byte sequence payload) layout +// @see ISO_IEC_23094-1 section 7.3.2.1 +// +// The following descriptors specify the parsing process of each element +// u(n) - unsigned integer using n bits +// ue(v) - unsigned integer 0-th order Exp_Golomb-coded syntax element with the left bit first +typedef struct EVCParserSPS { + int sps_seq_parameter_set_id; // ue(v) + int profile_idc; // u(8) + int level_idc; // u(8) + int toolset_idc_h; // u(32) + int toolset_idc_l; // u(32) + int chroma_format_idc; // ue(v) + int pic_width_in_luma_samples; // ue(v) + int pic_height_in_luma_samples; // ue(v) + int bit_depth_luma_minus8; // ue(v) + int bit_depth_chroma_minus8; // ue(v) + + int sps_btt_flag; // u(1) + int log2_ctu_size_minus5; // ue(v) + int log2_min_cb_size_minus2; // ue(v) + int log2_diff_ctu_max_14_cb_size; // ue(v) + int log2_diff_ctu_max_tt_cb_size; // ue(v) + int log2_diff_min_cb_min_tt_cb_size_minus2; // ue(v) + + int sps_suco_flag; // u(1) + int log2_diff_ctu_size_max_suco_cb_size; // ue(v) + int log2_diff_max_suco_min_suco_cb_size; // ue(v) + + int sps_admvp_flag; // u(1) + int sps_affine_flag; // u(1) + int sps_amvr_flag; // u(1) + int sps_dmvr_flag; // u(1) + int sps_mmvd_flag; // u(1) + int sps_hmvp_flag; // u(1) + + int sps_eipd_flag; // u(1) + int sps_ibc_flag; // u(1) + int log2_max_ibc_cand_size_minus2; // ue(v) + + int sps_cm_init_flag; // u(1) + int sps_adcc_flag; // u(1) + + int sps_iqt_flag; // u(1) + int sps_ats_flag; // u(1) + + int sps_addb_flag; // u(1) + int sps_alf_flag; // u(1) + int sps_htdf_flag; // u(1) + int sps_rpl_flag; // u(1) + int sps_pocs_flag; // u(1) + int sps_dquant_flag; // u(1) + int sps_dra_flag; // u(1) + + int log2_max_pic_order_cnt_lsb_minus4; // ue(v) + int log2_sub_gop_length; // ue(v) + int log2_ref_pic_gap_length; // ue(v) + + int max_num_tid0_ref_pics; // ue(v) + + int sps_max_dec_pic_buffering_minus1; // ue(v) + int long_term_ref_pic_flag; // u(1) + int rpl1_same_as_rpl0_flag; // u(1) + int num_ref_pic_list_in_sps[2]; // ue(v) + struct RefPicListStruct rpls[2][EVC_MAX_NUM_RPLS]; + + int picture_cropping_flag; // u(1) + int picture_crop_left_offset; // ue(v) + int picture_crop_right_offset; // ue(v) + int picture_crop_top_offset; // ue(v) + int picture_crop_bottom_offset; // ue(v) + + struct ChromaQpTable chroma_qp_table_struct; + + int vui_parameters_present_flag; // u(1) + + struct VUIParameters vui_parameters; + +} EVCParserSPS; + +typedef struct EVCParserPPS { + int pps_pic_parameter_set_id; // ue(v) + int pps_seq_parameter_set_id; // ue(v) + int num_ref_idx_default_active_minus1[2]; // ue(v) + int additional_lt_poc_lsb_len; // ue(v) + int rpl1_idx_present_flag; // u(1) + int single_tile_in_pic_flag; // u(1) + int num_tile_columns_minus1; // ue(v) + int num_tile_rows_minus1; // ue(v) + int uniform_tile_spacing_flag; // u(1) + int tile_column_width_minus1[EVC_MAX_TILE_ROWS]; // ue(v) + int tile_row_height_minus1[EVC_MAX_TILE_COLUMNS]; // ue(v) + int loop_filter_across_tiles_enabled_flag; // u(1) + int tile_offset_len_minus1; // ue(v) + int tile_id_len_minus1; // ue(v) + int explicit_tile_id_flag; // u(1) + int tile_id_val[EVC_MAX_TILE_ROWS][EVC_MAX_TILE_COLUMNS]; // u(v) + int pic_dra_enabled_flag; // u(1) + int pic_dra_aps_id; // u(5) + int arbitrary_slice_present_flag; // u(1) + int constrained_intra_pred_flag; // u(1) + int cu_qp_delta_enabled_flag; // u(1) + int log2_cu_qp_delta_area_minus6; // ue(v) + +} EVCParserPPS; + +// The sturcture reflects Slice Header RBSP(raw byte sequence payload) layout +// @see ISO_IEC_23094-1 section 7.3.2.6 +// +// The following descriptors specify the parsing process of each element +// u(n) - unsigned integer using n bits +// ue(v) - unsigned integer 0-th order Exp_Golomb-coded syntax element with the left bit first +// u(n) - unsigned integer using n bits. +// When n is "v" in the syntax table, the number of bits varies in a manner dependent on the value of other syntax elements. +typedef struct EVCParserSliceHeader { + int slice_pic_parameter_set_id; // ue(v) + int single_tile_in_slice_flag; // u(1) + int first_tile_id; // u(v) + int arbitrary_slice_flag; // u(1) + int last_tile_id; // u(v) + int num_remaining_tiles_in_slice_minus1; // ue(v) + int delta_tile_id_minus1[EVC_MAX_TILE_ROWS * EVC_MAX_TILE_COLUMNS]; // ue(v) + + int slice_type; // ue(v) + int no_output_of_prior_pics_flag; // u(1) + int mmvd_group_enable_flag; // u(1) + int slice_alf_enabled_flag; // u(1) + + int slice_alf_luma_aps_id; // u(5) + int slice_alf_map_flag; // u(1) + int slice_alf_chroma_idc; // u(2) + int slice_alf_chroma_aps_id; // u(5) + int slice_alf_chroma_map_flag; // u(1) + int slice_alf_chroma2_aps_id; // u(5) + int slice_alf_chroma2_map_flag; // u(1) + int slice_pic_order_cnt_lsb; // u(v) + + // @note + // Currently the structure does not reflect the entire Slice Header RBSP layout. + // It contains only the fields that are necessary to read from the NAL unit all the values + // necessary for the correct initialization of the AVCodecContext structure. + + // @note + // If necessary, add the missing fields to the structure to reflect + // the contents of the entire NAL unit of the SPS type + +} EVCParserSliceHeader; + +// picture order count of the current picture +typedef struct EVCParserPoc { + int PicOrderCntVal; // current picture order count value + int prevPicOrderCntVal; // the picture order count of the previous Tid0 picture + int DocOffset; // the decoding order count of the previous picture +} EVCParserPoc; + +typedef struct EVCParserContext { + //ParseContext pc; + EVCParserSPS *sps[EVC_MAX_SPS_COUNT]; + EVCParserPPS *pps[EVC_MAX_PPS_COUNT]; + EVCParserSliceHeader *slice_header[EVC_MAX_PPS_COUNT]; + + EVCParserPoc poc; + + int nuh_temporal_id; // the value of TemporalId (shall be the same for all VCL NAL units of an Access Unit) + int nalu_type; // the current NALU type + + // Dimensions of the decoded video intended for presentation. + int width; + int height; + + // Dimensions of the coded video. + int coded_width; + int coded_height; + + // The format of the coded data, corresponds to enum AVPixelFormat + int format; + + // AV_PICTURE_TYPE_I, EVC_SLICE_TYPE_P, AV_PICTURE_TYPE_B + int pict_type; + + // Set by parser to 1 for key frames and 0 for non-key frames + int key_frame; + + // Picture number incremented in presentation or output order. + // This corresponds to EVCEVCParserPoc::PicOrderCntVal + int output_picture_number; + + // profile + // 0: FF_PROFILE_EVC_BASELINE + // 1: FF_PROFILE_EVC_MAIN + int profile; + + // Framerate value in the compressed bitstream + AVRational framerate; + + // Number of pictures in a group of pictures + int gop_size; + + // Number of frames the decoded output will be delayed relative to the encoded input + int delay; + + int parsed_extradata; + +} EVCParserContext; + +static inline int evc_get_nalu_type(const uint8_t *bits, int bits_size, void *logctx) +{ + int unit_type_plus1 = 0; + + if (bits_size >= EVC_NALU_HEADER_SIZE) { + unsigned char *p = (unsigned char *)bits; + // forbidden_zero_bit + if ((p[0] & 0x80) != 0) { + av_log(logctx, AV_LOG_ERROR, "Invalid NAL unit header\n"); + return -1; + } + + // nal_unit_type + unit_type_plus1 = (p[0] >> 1) & 0x3F; + } + + return unit_type_plus1 - 1; +} + +static inline uint32_t evc_read_nal_unit_length(const uint8_t *bits, int bits_size, void *logctx) +{ + uint32_t nalu_len = 0; + + if (bits_size < EVC_NALU_LENGTH_PREFIX_SIZE) { + av_log(logctx, AV_LOG_ERROR, "Can't read NAL unit length\n"); + return 0; + } + + nalu_len = AV_RB32(bits); + + return nalu_len; +} + +// nuh_temporal_id specifies a temporal identifier for the NAL unit +int ff_evc_get_temporal_id(const uint8_t *bits, int bits_size, void *logctx); + +// @see ISO_IEC_23094-1 (7.3.2.1 SPS RBSP syntax) +EVCParserSPS *ff_evc_parse_sps(EVCParserContext *ctx, const uint8_t *bs, int bs_size); + +// @see ISO_IEC_23094-1 (7.3.2.2 SPS RBSP syntax) +EVCParserPPS *ff_evc_parse_pps(EVCParserContext *ctx, const uint8_t *bs, int bs_size); + +// @see ISO_IEC_23094-1 (7.3.2.6 Slice layer RBSP syntax) +EVCParserSliceHeader *ff_evc_parse_slice_header(EVCParserContext *ctx, const uint8_t *bs, int bs_size); + +int ff_evc_parse_nal_unit(EVCParserContext *ctx, const uint8_t *buf, int buf_size, void *logctx); + +#endif /* AVCODEC_EVC_PARSE_H */ diff --git a/libavcodec/evc_parser.c b/libavcodec/evc_parser.c new file mode 100644 index 0000000000..7528fb13db --- /dev/null +++ b/libavcodec/evc_parser.c @@ -0,0 +1,235 @@ +/* + * EVC format parser + * + * Copyright (C) 2021 Dawid Kozinski <d.kozinski@samsung.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "parser.h" +#include "bytestream.h" +#include "evc.h" +#include "evc_parse.h" + +/** + * Parse NAL units of found picture and decode some basic information. + * + * @param s codec parser context + * @param avctx codec context + * @param buf buffer with field/frame data + * @param buf_size size of the buffer + */ +static int parse_nal_units(AVCodecParserContext *s, AVCodecContext *avctx, const uint8_t *buf, int buf_size) +{ + EVCParserContext *ctx = s->priv_data; + const uint8_t *data = buf; + int data_size = buf_size; + int bytes_read = 0; + int nalu_size = 0; + + while (data_size > 0) { + + // Buffer size is not enough for buffer to store NAL unit 4-bytes prefix (length) + if (data_size < EVC_NALU_LENGTH_PREFIX_SIZE) + return AVERROR_INVALIDDATA; + + nalu_size = evc_read_nal_unit_length(data, data_size, avctx); + + bytes_read += EVC_NALU_LENGTH_PREFIX_SIZE; + + data += EVC_NALU_LENGTH_PREFIX_SIZE; + data_size -= EVC_NALU_LENGTH_PREFIX_SIZE; + + if (data_size < nalu_size) + return AVERROR_INVALIDDATA; + + if (ff_evc_parse_nal_unit(ctx, data, nalu_size, avctx) != 0) { + av_log(avctx, AV_LOG_ERROR, "Parsing of NAL unit failed\n"); + return AVERROR_INVALIDDATA; + } + + if(ctx->nalu_type == EVC_SPS_NUT) { + + s->coded_width = ctx->coded_width; + s->coded_height = ctx->coded_height; + s->width = ctx->width; + s->height = ctx->height; + + s->format = ctx->format; + + avctx->gop_size = ctx->gop_size; + avctx->delay = ctx->delay; + avctx->profile = ctx->profile; + + } else if(ctx->nalu_type == EVC_NOIDR_NUT || ctx->nalu_type == EVC_IDR_NUT) { + + s->pict_type = ctx->pict_type; + s->key_frame = ctx->key_frame; + s->output_picture_number = ctx->output_picture_number; + + } + + data += nalu_size; + data_size -= nalu_size; + } + return 0; +} + +// Decoding nal units from evcC (EVCDecoderConfigurationRecord) +// @see @see ISO/IEC 14496-15:2021 Coding of audio-visual objects - Part 15: section 12.3.3.2 +static int decode_extradata(EVCParserContext *ctx, const uint8_t *data, int size, void *logctx) +{ + int ret = 0; + GetByteContext gb; + + bytestream2_init(&gb, data, size); + + if (!data || size <= 0) + return -1; + + // extradata is encoded as evcC format. + if (data[0] == 1) { + int num_of_arrays; // indicates the number of arrays of NAL units of the indicated type(s) + + int nalu_length_field_size; // indicates the length in bytes of the NALUnitLenght field in EVC video stream sample in the stream + // The value of this field shall be one of 0, 1, or 3 corresponding to a length encoded with 1, 2, or 4 bytes, respectively. + + if (bytestream2_get_bytes_left(&gb) < 18) { + av_log(logctx, AV_LOG_ERROR, "evcC %d too short\n", size); + return AVERROR_INVALIDDATA; + } + + bytestream2_skip(&gb, 16); + + // @see ISO/IEC 14496-15:2021 Coding of audio-visual objects - Part 15: section 12.3.3.3 + // LengthSizeMinusOne plus 1 indicates the length in bytes of the NALUnitLength field in a EVC video stream sample in the stream to which this configuration record applies. For example, a size of one byte is indicated with a value of 0. + // The value of this field shall be one of 0, 1, or 3 corresponding to a length encoded with 1, 2, or 4 bytes, respectively. + nalu_length_field_size = (bytestream2_get_byte(&gb) & 3) + 1; + if( nalu_length_field_size != 1 && + nalu_length_field_size != 2 && + nalu_length_field_size != 4 ) { + av_log(logctx, AV_LOG_ERROR, "The length in bytes of the NALUnitLenght field in a EVC video stream has unsupported value of %d\n", nalu_length_field_size); + return AVERROR_INVALIDDATA; + } + + num_of_arrays = bytestream2_get_byte(&gb); + + /* Decode nal units from evcC. */ + for (int i = 0; i < num_of_arrays; i++) { + + // @see ISO/IEC 14496-15:2021 Coding of audio-visual objects - Part 15: section 12.3.3.3 + // NAL_unit_type indicates the type of the NAL units in the following array (which shall be all of that type); + // - it takes a value as defined in ISO/IEC 23094-1; + // - it is restricted to take one of the values indicating a SPS, PPS, APS, or SEI NAL unit. + int nal_unit_type = bytestream2_get_byte(&gb) & 0x3f; + int num_nalus = bytestream2_get_be16(&gb); + + for (int j = 0; j < num_nalus; j++) { + + int nal_unit_length = bytestream2_get_be16(&gb); + + if (bytestream2_get_bytes_left(&gb) < nal_unit_length) { + av_log(logctx, AV_LOG_ERROR, "Invalid NAL unit size in extradata.\n"); + return AVERROR_INVALIDDATA; + } + + if( nal_unit_type == EVC_SPS_NUT || + nal_unit_type == EVC_PPS_NUT || + nal_unit_type == EVC_APS_NUT || + nal_unit_type == EVC_SEI_NUT ) { + if (ff_evc_parse_nal_unit(ctx, gb.buffer, nal_unit_length, logctx) != 0) { + av_log(logctx, AV_LOG_ERROR, "Parsing of NAL unit failed\n"); + return AVERROR_INVALIDDATA; + } + } + + bytestream2_skip(&gb, nal_unit_length); + } + } + } else + return -1; + + return ret; +} + +static int evc_parse(AVCodecParserContext *s, AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + int next; + int ret; + EVCParserContext *ctx = s->priv_data; + + if (avctx->extradata && !ctx->parsed_extradata) { + decode_extradata(ctx, avctx->extradata, avctx->extradata_size, avctx); + ctx->parsed_extradata = 1; + } + + next = buf_size; + + ret = parse_nal_units(s, avctx, buf, buf_size); + if(ret < 0) { + *poutbuf = NULL; + *poutbuf_size = 0; + return buf_size; + } + + s->picture_structure = AV_PICTURE_STRUCTURE_FRAME; + + // poutbuf contains just one Access Unit + *poutbuf = buf; + *poutbuf_size = buf_size; + + return next; +} + +static int evc_parser_init(AVCodecParserContext *s) +{ + EVCParserContext *ctx = s->priv_data; + + memset(ctx->sps, 0, sizeof(EVCParserSPS *)*EVC_MAX_SPS_COUNT); + memset(ctx->pps, 0, sizeof(EVCParserPPS *)*EVC_MAX_PPS_COUNT); + memset(ctx->slice_header, 0, sizeof(EVCParserSliceHeader *)*EVC_MAX_PPS_COUNT); + + return 0; +} + +static void evc_parser_close(AVCodecParserContext *s) +{ + EVCParserContext *ctx = s->priv_data; + + for(int i = 0; i < EVC_MAX_SPS_COUNT; i++) { + EVCParserSPS *sps = ctx->sps[i]; + av_freep(&sps); + } + + for(int i = 0; i < EVC_MAX_PPS_COUNT; i++) { + EVCParserPPS *pps = ctx->pps[i]; + EVCParserSliceHeader *sh = ctx->slice_header[i]; + + av_freep(&pps); + av_freep(&sh); + } +} + +const AVCodecParser ff_evc_parser = { + .codec_ids = { AV_CODEC_ID_EVC }, + .priv_data_size = sizeof(EVCParserContext), + .parser_init = evc_parser_init, + .parser_parse = evc_parse, + .parser_close = evc_parser_close, +}; diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c index d355808018..2c077ec3ae 100644 --- a/libavcodec/parsers.c +++ b/libavcodec/parsers.c @@ -41,6 +41,7 @@ extern const AVCodecParser ff_dvaudio_parser; extern const AVCodecParser ff_dvbsub_parser; extern const AVCodecParser ff_dvdsub_parser; extern const AVCodecParser ff_dvd_nav_parser; +extern const AVCodecParser ff_evc_parser; extern const AVCodecParser ff_flac_parser; extern const AVCodecParser ff_ftr_parser; extern const AVCodecParser ff_g723_1_parser; |