diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-05-10 22:41:29 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-05-10 23:30:42 +0200 |
commit | 015903294ca983f007ab5cae098a54013e77f2f6 (patch) | |
tree | 66838f53dca82964270a1938692489c36e1fb1b0 /libavcodec | |
parent | 2a793ff2bf2197f36db3bf296668d44915142d03 (diff) | |
parent | 110d0cdc9d1ec414a658f841a3fbefbf6f796d61 (diff) | |
download | ffmpeg-015903294ca983f007ab5cae098a54013e77f2f6.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master: (25 commits)
rv40dsp x86: MMX/MMX2/3DNow/SSE2/SSSE3 implementations of MC
ape: Use unsigned integer maths
arm: dsputil: fix overreads in put/avg_pixels functions
h264: K&R formatting cosmetics for header files (part II/II)
h264: K&R formatting cosmetics for header files (part I/II)
rtmp: Implement check bandwidth notification.
rtmp: Support 'rtmp_swfurl', an option which specifies the URL of the SWF player.
rtmp: Support 'rtmp_flashver', an option which overrides the version of the Flash plugin.
rtmp: Support 'rtmp_tcurl', an option which overrides the URL of the target stream.
cmdutils: Add fallback case to switch in check_stream_specifier().
sctp: be consistent with socket option level
configure: Add _XOPEN_SOURCE=600 to Solaris preprocessor flags.
vcr1enc: drop pointless empty encode_init() wrapper function
vcr1: drop pointless write-only AVCodecContext member from VCR1Context
vcr1: group encoder code together to save #ifdefs
vcr1: cosmetics: K&R prettyprinting, typos, parentheses, dead code, comments
mov: make one comment slightly more specific
lavr: replace the SSE version of ff_conv_fltp_to_flt_6ch() with SSE4 and AVX
lavfi: move audio-related functions to a separate file.
lavfi: remove some audio-related function from public API.
...
Conflicts:
cmdutils.c
libavcodec/h264.h
libavcodec/h264_mvpred.h
libavcodec/vcr1.c
libavfilter/avfilter.c
libavfilter/avfilter.h
libavfilter/defaults.c
libavfilter/internal.h
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/apedec.c | 4 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_neon.S | 92 | ||||
-rw-r--r-- | libavcodec/h264.h | 496 | ||||
-rw-r--r-- | libavcodec/h264_mvpred.h | 1005 | ||||
-rw-r--r-- | libavcodec/h264data.h | 369 | ||||
-rw-r--r-- | libavcodec/h264dsp.h | 96 | ||||
-rw-r--r-- | libavcodec/h264pred.h | 81 | ||||
-rw-r--r-- | libavcodec/vcr1.c | 217 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 16 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.h | 5 | ||||
-rw-r--r-- | libavcodec/x86/rv40dsp.asm | 316 | ||||
-rw-r--r-- | libavcodec/x86/rv40dsp_init.c | 146 |
12 files changed, 1756 insertions, 1087 deletions
diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c index 05639f0b33..c0064208ad 100644 --- a/libavcodec/apedec.c +++ b/libavcodec/apedec.c @@ -393,7 +393,7 @@ static inline int range_get_symbol(APEContext *ctx, } /** @} */ // group rangecoder -static inline void update_rice(APERice *rice, int x) +static inline void update_rice(APERice *rice, unsigned int x) { int lim = rice->k ? (1 << (rice->k + 4)) : 0; rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5); @@ -406,7 +406,7 @@ static inline void update_rice(APERice *rice, int x) static inline int ape_decode_value(APEContext *ctx, APERice *rice) { - int x, overflow; + unsigned int x, overflow; if (ctx->fileversion < 3990) { int tmpk; diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index c660cb0d4c..7b301707d9 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -95,6 +95,7 @@ endfunc .endm .macro pixels16_y2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {q0}, [r1], r2 vld1.64 {q1}, [r1], r2 1: subs r3, r3, #2 @@ -114,10 +115,25 @@ endfunc vst1.64 {q2}, [r0,:128], r2 vst1.64 {q3}, [r0,:128], r2 bne 1b + + avg q2, q0, q1 + vld1.64 {q0}, [r1], r2 + avg q3, q0, q1 + .if \avg + vld1.8 {q8}, [r0,:128], r2 + vld1.8 {q9}, [r0,:128] + vrhadd.u8 q2, q2, q8 + vrhadd.u8 q3, q3, q9 + sub r0, r0, r2 + .endif + vst1.64 {q2}, [r0,:128], r2 + vst1.64 {q3}, [r0,:128], r2 + bx lr .endm .macro pixels16_xy2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {d0-d2}, [r1], r2 vld1.64 {d4-d6}, [r1], r2 .ifeq \rnd @@ -173,6 +189,42 @@ endfunc vaddl.u8 q11, d3, d5 vst1.64 {q15}, [r0,:128], r2 bgt 1b + + vld1.64 {d0-d2}, [r1], r2 + vadd.u16 q12, q8, q9 + .ifeq \rnd + vadd.u16 q12, q12, q13 + .endif + vext.8 q15, q0, q1, #1 + vadd.u16 q1 , q10, q11 + shrn d28, q12, #2 + .ifeq \rnd + vadd.u16 q1, q1, q13 + .endif + shrn d29, q1, #2 + .if \avg + vld1.8 {q8}, [r0,:128] + vrhadd.u8 q14, q14, q8 + .endif + vaddl.u8 q8, d0, d30 + vaddl.u8 q10, d1, d31 + vst1.64 {q14}, [r0,:128], r2 + vadd.u16 q12, q8, q9 + .ifeq \rnd + vadd.u16 q12, q12, q13 + .endif + vadd.u16 q0, q10, q11 + shrn d30, q12, #2 + .ifeq \rnd + vadd.u16 q0, q0, q13 + .endif + shrn d31, q0, #2 + .if \avg + vld1.8 {q9}, [r0,:128] + vrhadd.u8 q15, q15, q9 + .endif + vst1.64 {q15}, [r0,:128], r2 + bx lr .endm @@ -228,6 +280,7 @@ endfunc .endm .macro pixels8_y2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {d0}, [r1], r2 vld1.64 {d1}, [r1], r2 1: subs r3, r3, #2 @@ -246,10 +299,24 @@ endfunc vst1.64 {d4}, [r0,:64], r2 vst1.64 {d5}, [r0,:64], r2 bne 1b + + avg d4, d0, d1 + vld1.64 {d0}, [r1], r2 + avg d5, d0, d1 + .if \avg + vld1.8 {d2}, [r0,:64], r2 + vld1.8 {d3}, [r0,:64] + vrhadd.u8 q2, q2, q1 + sub r0, r0, r2 + .endif + vst1.64 {d4}, [r0,:64], r2 + vst1.64 {d5}, [r0,:64], r2 + bx lr .endm .macro pixels8_xy2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {q0}, [r1], r2 vld1.64 {q1}, [r1], r2 .ifeq \rnd @@ -291,6 +358,31 @@ endfunc vaddl.u8 q9, d2, d6 vst1.64 {d7}, [r0,:64], r2 bgt 1b + + vld1.64 {q0}, [r1], r2 + vadd.u16 q10, q8, q9 + vext.8 d4, d0, d1, #1 + .ifeq \rnd + vadd.u16 q10, q10, q11 + .endif + vaddl.u8 q8, d0, d4 + shrn d5, q10, #2 + vadd.u16 q10, q8, q9 + .if \avg + vld1.8 {d7}, [r0,:64] + vrhadd.u8 d5, d5, d7 + .endif + .ifeq \rnd + vadd.u16 q10, q10, q11 + .endif + vst1.64 {d5}, [r0,:64], r2 + shrn d7, q10, #2 + .if \avg + vld1.8 {d5}, [r0,:64] + vrhadd.u8 d7, d7, d5 + .endif + vst1.64 {d7}, [r0,:64], r2 + bx lr .endm diff --git a/libavcodec/h264.h b/libavcodec/h264.h index f5542bfb1d..0e11e304ee 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -37,14 +37,14 @@ #include "rectangle.h" #define interlaced_dct interlaced_dct_is_a_bad_name -#define mb_intra mb_intra_is_not_initialized_see_mb_type +#define mb_intra mb_intra_is_not_initialized_see_mb_type -#define MAX_SPS_COUNT 32 -#define MAX_PPS_COUNT 256 +#define MAX_SPS_COUNT 32 +#define MAX_PPS_COUNT 256 -#define MAX_MMCO_COUNT 66 +#define MAX_MMCO_COUNT 66 -#define MAX_DELAYED_PIC_COUNT 16 +#define MAX_DELAYED_PIC_COUNT 16 #define MAX_MBPAIR_SIZE (256*1024) // a tighter bound could be calculated if someone cares about a few bytes @@ -61,25 +61,25 @@ #define MAX_SLICES 16 #ifdef ALLOW_INTERLACE -#define MB_MBAFF h->mb_mbaff -#define MB_FIELD h->mb_field_decoding_flag +#define MB_MBAFF h->mb_mbaff +#define MB_FIELD h->mb_field_decoding_flag #define FRAME_MBAFF h->mb_aff_frame #define FIELD_PICTURE (s->picture_structure != PICT_FRAME) #define LEFT_MBS 2 -#define LTOP 0 -#define LBOT 1 -#define LEFT(i) (i) +#define LTOP 0 +#define LBOT 1 +#define LEFT(i) (i) #else -#define MB_MBAFF 0 -#define MB_FIELD 0 -#define FRAME_MBAFF 0 +#define MB_MBAFF 0 +#define MB_FIELD 0 +#define FRAME_MBAFF 0 #define FIELD_PICTURE 0 #undef IS_INTERLACED #define IS_INTERLACED(mb_type) 0 #define LEFT_MBS 1 -#define LTOP 0 -#define LBOT 0 -#define LEFT(i) 0 +#define LTOP 0 +#define LBOT 0 +#define LEFT(i) 0 #endif #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE) @@ -91,9 +91,9 @@ #define CHROMA422 (h->sps.chroma_format_idc == 2) #define CHROMA444 (h->sps.chroma_format_idc == 3) -#define EXTENDED_SAR 255 +#define EXTENDED_SAR 255 -#define MB_TYPE_REF0 MB_TYPE_ACPRED //dirty but it fits in 16 bit +#define MB_TYPE_REF0 MB_TYPE_ACPRED // dirty but it fits in 16 bit #define MB_TYPE_8x8DCT 0x01000000 #define IS_REF0(a) ((a) & MB_TYPE_REF0) #define IS_8x8DCT(a) ((a) & MB_TYPE_8x8DCT) @@ -108,7 +108,7 @@ /* NAL unit types */ enum { - NAL_SLICE=1, + NAL_SLICE = 1, NAL_DPA, NAL_DPB, NAL_DPC, @@ -121,17 +121,17 @@ enum { NAL_END_STREAM, NAL_FILLER_DATA, NAL_SPS_EXT, - NAL_AUXILIARY_SLICE=19 + NAL_AUXILIARY_SLICE = 19 }; /** * SEI message types */ typedef enum { - SEI_BUFFERING_PERIOD = 0, ///< buffering period (H.264, D.1.1) - SEI_TYPE_PIC_TIMING = 1, ///< picture timing - SEI_TYPE_USER_DATA_UNREGISTERED = 5, ///< unregistered user data - SEI_TYPE_RECOVERY_POINT = 6 ///< recovery point (frame # to decoder sync) + SEI_BUFFERING_PERIOD = 0, ///< buffering period (H.264, D.1.1) + SEI_TYPE_PIC_TIMING = 1, ///< picture timing + SEI_TYPE_USER_DATA_UNREGISTERED = 5, ///< unregistered user data + SEI_TYPE_RECOVERY_POINT = 6 ///< recovery point (frame # to decoder sync) } SEI_Type; /** @@ -152,8 +152,7 @@ typedef enum { /** * Sequence parameter set */ -typedef struct SPS{ - +typedef struct SPS { int profile_idc; int level_idc; int chroma_format_idc; @@ -170,9 +169,9 @@ typedef struct SPS{ int mb_width; ///< pic_width_in_mbs_minus1 + 1 int mb_height; ///< pic_height_in_map_units_minus1 + 1 int frame_mbs_only_flag; - int mb_aff; ///<mb_adaptive_frame_field_flag + int mb_aff; ///< mb_adaptive_frame_field_flag int direct_8x8_inference_flag; - int crop; ///< frame_cropping_flag + int crop; ///< frame_cropping_flag unsigned int crop_left; ///< frame_cropping_rect_left_offset unsigned int crop_right; ///< frame_cropping_rect_right_offset unsigned int crop_top; ///< frame_cropping_rect_top_offset @@ -189,7 +188,7 @@ typedef struct SPS{ uint32_t num_units_in_tick; uint32_t time_scale; int fixed_frame_rate_flag; - short offset_for_ref_frame[256]; //FIXME dyn aloc? + short offset_for_ref_frame[256]; // FIXME dyn aloc? int bitstream_restriction_flag; int num_reorder_frames; int scaling_matrix_present; @@ -199,20 +198,20 @@ typedef struct SPS{ int vcl_hrd_parameters_present_flag; int pic_struct_present_flag; int time_offset_length; - int cpb_cnt; ///< See H.264 E.1.2 - int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 +1 - int cpb_removal_delay_length; ///< cpb_removal_delay_length_minus1 + 1 - int dpb_output_delay_length; ///< dpb_output_delay_length_minus1 + 1 - int bit_depth_luma; ///< bit_depth_luma_minus8 + 8 - int bit_depth_chroma; ///< bit_depth_chroma_minus8 + 8 - int residual_color_transform_flag; ///< residual_colour_transform_flag - int constraint_set_flags; ///< constraint_set[0-3]_flag -}SPS; + int cpb_cnt; ///< See H.264 E.1.2 + int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 + 1 + int cpb_removal_delay_length; ///< cpb_removal_delay_length_minus1 + 1 + int dpb_output_delay_length; ///< dpb_output_delay_length_minus1 + 1 + int bit_depth_luma; ///< bit_depth_luma_minus8 + 8 + int bit_depth_chroma; ///< bit_depth_chroma_minus8 + 8 + int residual_color_transform_flag; ///< residual_colour_transform_flag + int constraint_set_flags; ///< constraint_set[0-3]_flag +} SPS; /** * Picture parameter set */ -typedef struct PPS{ +typedef struct PPS { unsigned int sps_id; int cabac; ///< entropy_coding_mode_flag int pic_order_present; ///< pic_order_present_flag @@ -225,20 +224,20 @@ typedef struct PPS{ int init_qs; ///< pic_init_qs_minus26 + 26 int chroma_qp_index_offset[2]; int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag - int constrained_intra_pred; ///< constrained_intra_pred_flag - int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag - int transform_8x8_mode; ///< transform_8x8_mode_flag + int constrained_intra_pred; ///< constrained_intra_pred_flag + int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag + int transform_8x8_mode; ///< transform_8x8_mode_flag uint8_t scaling_matrix4[6][16]; uint8_t scaling_matrix8[6][64]; uint8_t chroma_qp_table[2][QP_MAX_NUM+1]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table int chroma_qp_diff; -}PPS; +} PPS; /** * Memory management control operation opcode. */ -typedef enum MMCOOpcode{ - MMCO_END=0, +typedef enum MMCOOpcode { + MMCO_END = 0, MMCO_SHORT2UNUSED, MMCO_LONG2UNUSED, MMCO_SHORT2LONG, @@ -250,7 +249,7 @@ typedef enum MMCOOpcode{ /** * Memory management control operation. */ -typedef struct MMCO{ +typedef struct MMCO { MMCOOpcode opcode; int short_pic_num; ///< pic_num without wrapping (pic_num & max_pic_num) int long_arg; ///< index, pic_num, or num long refs depending on opcode @@ -259,18 +258,18 @@ typedef struct MMCO{ /** * H264Context */ -typedef struct H264Context{ +typedef struct H264Context { MpegEncContext s; H264DSPContext h264dsp; int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264 - int chroma_qp[2]; //QPc + int chroma_qp[2]; // QPc int qp_thresh; ///< QP threshold to skip loopfilter int prev_mb_skipped; int next_mb_skipped; - //prediction stuff + // prediction stuff int chroma_pred_mode; int intra16x16_pred_mode; @@ -284,32 +283,32 @@ typedef struct H264Context{ int topright_type; int left_type[LEFT_MBS]; - const uint8_t * left_block; + const uint8_t *left_block; int topleft_partition; - int8_t intra4x4_pred_mode_cache[5*8]; - int8_t (*intra4x4_pred_mode); + int8_t intra4x4_pred_mode_cache[5 * 8]; + int8_t(*intra4x4_pred_mode); H264PredContext hpc; unsigned int topleft_samples_available; unsigned int top_samples_available; unsigned int topright_samples_available; unsigned int left_samples_available; - uint8_t (*top_borders[2])[(16*3)*2]; + uint8_t (*top_borders[2])[(16 * 3) * 2]; /** * non zero coeff count cache. * is 64 if not available. */ - DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15*8]; + DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8]; uint8_t (*non_zero_count)[48]; /** * Motion vector cache. */ - DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5*8][2]; - DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5*8]; -#define LIST_NOT_USED -1 //FIXME rename? + DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2]; + DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8]; +#define LIST_NOT_USED -1 // FIXME rename? #define PART_NOT_AVAILABLE -2 /** @@ -321,13 +320,13 @@ typedef struct H264Context{ * block_offset[ 0..23] for frame macroblocks * block_offset[24..47] for field macroblocks */ - int block_offset[2*(16*3)]; + int block_offset[2 * (16 * 3)]; - uint32_t *mb2b_xy; //FIXME are these 4 a good idea? + uint32_t *mb2b_xy; // FIXME are these 4 a good idea? uint32_t *mb2br_xy; - int b_stride; //FIXME use s->b4_stride + int b_stride; // FIXME use s->b4_stride - int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff + int mb_linesize; ///< may be equal to s->linesize or s->linesize * 2, for mbaff int mb_uvlinesize; int emu_edge_width; @@ -338,32 +337,32 @@ typedef struct H264Context{ /** * current pps */ - PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? + PPS pps; // FIXME move to Picture perhaps? (->no) do we need that? - uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down? - uint32_t dequant8_buffer[6][QP_MAX_NUM+1][64]; - uint32_t (*dequant4_coeff[6])[16]; - uint32_t (*dequant8_coeff[6])[64]; + uint32_t dequant4_buffer[6][QP_MAX_NUM + 1][16]; // FIXME should these be moved down? + uint32_t dequant8_buffer[6][QP_MAX_NUM + 1][64]; + uint32_t(*dequant4_coeff[6])[16]; + uint32_t(*dequant8_coeff[6])[64]; int slice_num; - uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 + uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 int slice_type; - int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P) + int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P) int slice_type_fixed; - //interlacing specific flags + // interlacing specific flags int mb_aff_frame; int mb_field_decoding_flag; - int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag + int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4]; - //Weighted pred stuff + // Weighted pred stuff int use_weight; int use_weight_chroma; int luma_log2_weight_denom; int chroma_log2_weight_denom; - //The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss + // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss int luma_weight[48][2][2]; int chroma_weight[48][2][2][2]; int implicit_weight[48][48][2]; @@ -373,48 +372,48 @@ typedef struct H264Context{ int col_fieldoff; int dist_scale_factor[16]; int dist_scale_factor_field[2][32]; - int map_col_to_list0[2][16+32]; - int map_col_to_list0_field[2][2][16+32]; + int map_col_to_list0[2][16 + 32]; + int map_col_to_list0_field[2][2][16 + 32]; /** * num_ref_idx_l0/1_active_minus1 + 1 */ - uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type - unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode + unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode unsigned int list_count; - Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs. - Reordered version of default_ref_list - according to picture reordering in slice header */ - int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 + uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type + Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs. + * Reordered version of default_ref_list + * according to picture reordering in slice header */ + int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 - //data partitioning + // data partitioning GetBitContext intra_gb; GetBitContext inter_gb; GetBitContext *intra_gb_ptr; GetBitContext *inter_gb_ptr; - DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space. - DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2]; - DCTELEM mb_padding[256*2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb + DECLARE_ALIGNED(16, DCTELEM, mb)[16 * 48 * 2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space. + DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16 * 2]; + DCTELEM mb_padding[256 * 2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb /** * Cabac */ CABACContext cabac; - uint8_t cabac_state[1024]; + uint8_t cabac_state[1024]; - /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ - uint16_t *cbp_table; + /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0, 1, 2), 0x0? luma_cbp */ + uint16_t *cbp_table; int cbp; int top_cbp; int left_cbp; /* chroma_pred_mode for i4x4 or i16x16, else 0 */ - uint8_t *chroma_pred_mode_table; - int last_qscale_diff; - uint8_t (*mvd_table[2])[2]; - DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2]; - uint8_t *direct_table; - uint8_t direct_cache[5*8]; + uint8_t *chroma_pred_mode_table; + int last_qscale_diff; + uint8_t (*mvd_table[2])[2]; + DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5 * 8][2]; + uint8_t *direct_table; + uint8_t direct_cache[5 * 8]; uint8_t zigzag_scan[16]; uint8_t zigzag_scan8x8[64]; @@ -435,13 +434,13 @@ typedef struct H264Context{ int is_complex; - //deblock - int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 + // deblock + int deblocking_filter; ///< disable_deblocking_filter_idc with 1 <-> 0 int slice_alpha_c0_offset; int slice_beta_offset; -//============================================================= - //Things below are not used in the MB or more inner code + // ============================================================= + // Things below are not used in the MB or more inner code int nal_ref_idc; int nal_unit_type; @@ -451,37 +450,36 @@ typedef struct H264Context{ /** * Used to parse AVC variant of h264 */ - int is_avc; ///< this flag is != 0 if codec is avc1 - int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) - int got_first; ///< this flag is != 0 if we've parsed a frame + int is_avc; ///< this flag is != 0 if codec is avc1 + int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) + int got_first; ///< this flag is != 0 if we've parsed a frame SPS *sps_buffers[MAX_SPS_COUNT]; PPS *pps_buffers[MAX_PPS_COUNT]; - int dequant_coeff_pps; ///< reinit tables when pps changes + int dequant_coeff_pps; ///< reinit tables when pps changes uint16_t *slice_table_base; - - //POC stuff + // POC stuff int poc_lsb; int poc_msb; int delta_poc_bottom; int delta_poc[2]; int frame_num; - int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 - int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 - int frame_num_offset; ///< for POC type 2 - int prev_frame_num_offset; ///< for POC type 2 - int prev_frame_num; ///< frame_num of the last pic for POC type 1/2 + int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 + int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 + int frame_num_offset; ///< for POC type 2 + int prev_frame_num_offset; ///< for POC type 2 + int prev_frame_num; ///< frame_num of the last pic for POC type 1/2 /** - * frame_num for frames or 2*frame_num+1 for field pics. + * frame_num for frames or 2 * frame_num + 1 for field pics. */ int curr_pic_num; /** - * max_frame_num or 2*max_frame_num for field pics. + * max_frame_num or 2 * max_frame_num for field pics. */ int max_pic_num; @@ -490,7 +488,7 @@ typedef struct H264Context{ Picture *short_ref[32]; Picture *long_ref[32]; Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture - Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size? + Picture *delayed_pic[MAX_DELAYED_PIC_COUNT + 2]; // FIXME size? int last_pocs[MAX_DELAYED_PIC_COUNT]; Picture *next_output_pic; int outputed_poc; @@ -503,10 +501,10 @@ typedef struct H264Context{ int mmco_index; int mmco_reset; - int long_ref_count; ///< number of actual long term references - int short_ref_count; ///< number of actual short term references + int long_ref_count; ///< number of actual long term references + int short_ref_count; ///< number of actual short term references - int cabac_init_idc; + int cabac_init_idc; /** * @name Members for slice based multithreading @@ -582,12 +580,12 @@ typedef struct H264Context{ */ int recovery_frame; - int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag - int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag + int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag + int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag // Timestamp stuff - int sei_buffering_period_present; ///< Buffering period SEI flag - int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs + int sei_buffering_period_present; ///< Buffering period SEI flag + int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs int cur_chroma_format_idc; @@ -598,10 +596,9 @@ typedef struct H264Context{ uint8_t parse_history[4]; int parse_history_count; int parse_last_mb; -}H264Context; - +} H264Context; -extern const uint8_t ff_h264_chroma_qp[5][QP_MAX_NUM+1]; ///< One chroma qp table for each possible bit depth (8-12). +extern const uint8_t ff_h264_chroma_qp[5][QP_MAX_NUM + 1]; ///< One chroma qp table for each possible bit depth (8-12). extern const uint16_t ff_h264_mb_sizes[4]; /** @@ -628,13 +625,16 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length); * Decode a network abstraction layer unit. * @param consumed is the number of bytes used as input * @param length is the length of the array - * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing? + * @param dst_length is the number of decoded bytes FIXME here + * or a decode rbsp tailing? * @return decoded bytes, might be src+1 if no escapes */ -const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length); +const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, + int *dst_length, int *consumed, int length); /** - * Free any data that may have been allocated in the H264 context like SPS, PPS etc. + * Free any data that may have been allocated in the H264 context + * like SPS, PPS etc. */ av_cold void ff_h264_free_context(H264Context *h); @@ -667,12 +667,16 @@ int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb); void ff_generate_sliding_window_mmcos(H264Context *h); - /** - * Check if the top & left blocks are available if needed & change the dc mode so it only uses the available blocks. + * Check if the top & left blocks are available if needed & change the + * dc mode so it only uses the available blocks. */ int ff_h264_check_intra4x4_pred_mode(H264Context *h); +/** + * Check if the top & left blocks are available if needed & change the + * dc mode so it only uses the available blocks. + */ int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma); void ff_h264_hl_decode_mb(H264Context *h); @@ -683,24 +687,28 @@ av_cold void ff_h264_decode_init_vlc(void); /** * Decode a macroblock - * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR if an error is noticed + * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR on error */ int ff_h264_decode_mb_cavlc(H264Context *h); /** * Decode a CABAC coded macroblock - * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR if an error is noticed + * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR on error */ int ff_h264_decode_mb_cabac(H264Context *h); void ff_h264_init_cabac_states(H264Context *h); -void ff_h264_direct_dist_scale_factor(H264Context * const h); -void ff_h264_direct_ref_list_init(H264Context * const h); -void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type); +void ff_h264_direct_dist_scale_factor(H264Context *const h); +void ff_h264_direct_ref_list_init(H264Context *const h); +void ff_h264_pred_direct_motion(H264Context *const h, int *mb_type); -void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); -void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); +void ff_h264_filter_mb_fast(H264Context *h, int mb_x, int mb_y, + uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, + unsigned int linesize, unsigned int uvlinesize); +void ff_h264_filter_mb(H264Context *h, int mb_x, int mb_y, + uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, + unsigned int linesize, unsigned int uvlinesize); /** * Reset SEI values at the beginning of the frame. @@ -709,16 +717,15 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint */ void ff_h264_reset_sei(H264Context *h); - /* -o-o o-o - / / / -o-o o-o - ,---' -o-o o-o - / / / -o-o o-o -*/ + * o-o o-o + * / / / + * o-o o-o + * ,---' + * o-o o-o + * / / / + * o-o o-o + */ /* Scan8 organization: * 0 1 2 3 4 5 6 7 @@ -743,156 +750,173 @@ o-o o-o #define LUMA_DC_BLOCK_INDEX 48 #define CHROMA_DC_BLOCK_INDEX 49 -//This table must be here because scan8[constant] must be known at compiletime -static const uint8_t scan8[16*3 + 3]={ - 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8, - 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8, - 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8, - 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8, - 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8, - 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8, - 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8, - 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8, - 4+11*8, 5+11*8, 4+12*8, 5+12*8, - 6+11*8, 7+11*8, 6+12*8, 7+12*8, - 4+13*8, 5+13*8, 4+14*8, 5+14*8, - 6+13*8, 7+13*8, 6+14*8, 7+14*8, - 0+ 0*8, 0+ 5*8, 0+10*8 +// This table must be here because scan8[constant] must be known at compiletime +static const uint8_t scan8[16 * 3 + 3] = { + 4 + 1 * 8, 5 + 1 * 8, 4 + 2 * 8, 5 + 2 * 8, + 6 + 1 * 8, 7 + 1 * 8, 6 + 2 * 8, 7 + 2 * 8, + 4 + 3 * 8, 5 + 3 * 8, 4 + 4 * 8, 5 + 4 * 8, + 6 + 3 * 8, 7 + 3 * 8, 6 + 4 * 8, 7 + 4 * 8, + 4 + 6 * 8, 5 + 6 * 8, 4 + 7 * 8, 5 + 7 * 8, + 6 + 6 * 8, 7 + 6 * 8, 6 + 7 * 8, 7 + 7 * 8, + 4 + 8 * 8, 5 + 8 * 8, 4 + 9 * 8, 5 + 9 * 8, + 6 + 8 * 8, 7 + 8 * 8, 6 + 9 * 8, 7 + 9 * 8, + 4 + 11 * 8, 5 + 11 * 8, 4 + 12 * 8, 5 + 12 * 8, + 6 + 11 * 8, 7 + 11 * 8, 6 + 12 * 8, 7 + 12 * 8, + 4 + 13 * 8, 5 + 13 * 8, 4 + 14 * 8, 5 + 14 * 8, + 6 + 13 * 8, 7 + 13 * 8, 6 + 14 * 8, 7 + 14 * 8, + 0 + 0 * 8, 0 + 5 * 8, 0 + 10 * 8 }; -static av_always_inline uint32_t pack16to32(int a, int b){ +static av_always_inline uint32_t pack16to32(int a, int b) +{ #if HAVE_BIGENDIAN - return (b&0xFFFF) + (a<<16); + return (b & 0xFFFF) + (a << 16); #else - return (a&0xFFFF) + (b<<16); + return (a & 0xFFFF) + (b << 16); #endif } -static av_always_inline uint16_t pack8to16(int a, int b){ +static av_always_inline uint16_t pack8to16(int a, int b) +{ #if HAVE_BIGENDIAN - return (b&0xFF) + (a<<8); + return (b & 0xFF) + (a << 8); #else - return (a&0xFF) + (b<<8); + return (a & 0xFF) + (b << 8); #endif } /** * Get the chroma qp. */ -static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){ +static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale) +{ return h->pps.chroma_qp_table[t][qscale]; } /** * Get the predicted intra4x4 prediction mode. */ -static av_always_inline int pred_intra_mode(H264Context *h, int n){ - const int index8= scan8[n]; - const int left= h->intra4x4_pred_mode_cache[index8 - 1]; - const int top = h->intra4x4_pred_mode_cache[index8 - 8]; - const int min= FFMIN(left, top); +static av_always_inline int pred_intra_mode(H264Context *h, int n) +{ + const int index8 = scan8[n]; + const int left = h->intra4x4_pred_mode_cache[index8 - 1]; + const int top = h->intra4x4_pred_mode_cache[index8 - 8]; + const int min = FFMIN(left, top); - tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min); + tprintf(h->s.avctx, "mode:%d %d min:%d\n", left, top, min); - if(min<0) return DC_PRED; - else return min; + if (min < 0) + return DC_PRED; + else + return min; } -static av_always_inline void write_back_intra_pred_mode(H264Context *h){ - int8_t *i4x4= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; - int8_t *i4x4_cache= h->intra4x4_pred_mode_cache; +static av_always_inline void write_back_intra_pred_mode(H264Context *h) +{ + int8_t *i4x4 = h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; + int8_t *i4x4_cache = h->intra4x4_pred_mode_cache; - AV_COPY32(i4x4, i4x4_cache + 4 + 8*4); - i4x4[4]= i4x4_cache[7+8*3]; - i4x4[5]= i4x4_cache[7+8*2]; - i4x4[6]= i4x4_cache[7+8*1]; + AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4); + i4x4[4] = i4x4_cache[7 + 8 * 3]; + i4x4[5] = i4x4_cache[7 + 8 * 2]; + i4x4[6] = i4x4_cache[7 + 8 * 1]; } -static av_always_inline void write_back_non_zero_count(H264Context *h){ - const int mb_xy= h->mb_xy; - uint8_t *nnz = h->non_zero_count[mb_xy]; +static av_always_inline void write_back_non_zero_count(H264Context *h) +{ + const int mb_xy = h->mb_xy; + uint8_t *nnz = h->non_zero_count[mb_xy]; uint8_t *nnz_cache = h->non_zero_count_cache; - AV_COPY32(&nnz[ 0], &nnz_cache[4+8* 1]); - AV_COPY32(&nnz[ 4], &nnz_cache[4+8* 2]); - AV_COPY32(&nnz[ 8], &nnz_cache[4+8* 3]); - AV_COPY32(&nnz[12], &nnz_cache[4+8* 4]); - AV_COPY32(&nnz[16], &nnz_cache[4+8* 6]); - AV_COPY32(&nnz[20], &nnz_cache[4+8* 7]); - AV_COPY32(&nnz[32], &nnz_cache[4+8*11]); - AV_COPY32(&nnz[36], &nnz_cache[4+8*12]); - - if(!h->s.chroma_y_shift){ - AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]); - AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]); - AV_COPY32(&nnz[40], &nnz_cache[4+8*13]); - AV_COPY32(&nnz[44], &nnz_cache[4+8*14]); + AV_COPY32(&nnz[ 0], &nnz_cache[4 + 8 * 1]); + AV_COPY32(&nnz[ 4], &nnz_cache[4 + 8 * 2]); + AV_COPY32(&nnz[ 8], &nnz_cache[4 + 8 * 3]); + AV_COPY32(&nnz[12], &nnz_cache[4 + 8 * 4]); + AV_COPY32(&nnz[16], &nnz_cache[4 + 8 * 6]); + AV_COPY32(&nnz[20], &nnz_cache[4 + 8 * 7]); + AV_COPY32(&nnz[32], &nnz_cache[4 + 8 * 11]); + AV_COPY32(&nnz[36], &nnz_cache[4 + 8 * 12]); + + if (!h->s.chroma_y_shift) { + AV_COPY32(&nnz[24], &nnz_cache[4 + 8 * 8]); + AV_COPY32(&nnz[28], &nnz_cache[4 + 8 * 9]); + AV_COPY32(&nnz[40], &nnz_cache[4 + 8 * 13]); + AV_COPY32(&nnz[44], &nnz_cache[4 + 8 * 14]); } } -static av_always_inline void write_back_motion_list(H264Context *h, MpegEncContext * const s, int b_stride, - int b_xy, int b8_xy, int mb_type, int list ) +static av_always_inline void write_back_motion_list(H264Context *h, + MpegEncContext *const s, + int b_stride, + int b_xy, int b8_xy, + int mb_type, int list) { - int16_t (*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy]; - int16_t (*mv_src)[2] = &h->mv_cache[list][scan8[0]]; - AV_COPY128(mv_dst + 0*b_stride, mv_src + 8*0); - AV_COPY128(mv_dst + 1*b_stride, mv_src + 8*1); - AV_COPY128(mv_dst + 2*b_stride, mv_src + 8*2); - AV_COPY128(mv_dst + 3*b_stride, mv_src + 8*3); - if( CABAC ) { - uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]]; - uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; - if(IS_SKIP(mb_type)) + int16_t(*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy]; + int16_t(*mv_src)[2] = &h->mv_cache[list][scan8[0]]; + AV_COPY128(mv_dst + 0 * b_stride, mv_src + 8 * 0); + AV_COPY128(mv_dst + 1 * b_stride, mv_src + 8 * 1); + AV_COPY128(mv_dst + 2 * b_stride, mv_src + 8 * 2); + AV_COPY128(mv_dst + 3 * b_stride, mv_src + 8 * 3); + if (CABAC) { + uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8 * h->mb_xy + : h->mb2br_xy[h->mb_xy]]; + uint8_t(*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; + if (IS_SKIP(mb_type)) { AV_ZERO128(mvd_dst); - else{ - AV_COPY64(mvd_dst, mvd_src + 8*3); - AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0); - AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1); - AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2); + } else { + AV_COPY64(mvd_dst, mvd_src + 8 * 3); + AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8 * 0); + AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8 * 1); + AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8 * 2); } } { int8_t *ref_index = &s->current_picture.f.ref_index[list][b8_xy]; int8_t *ref_cache = h->ref_cache[list]; - ref_index[0+0*2]= ref_cache[scan8[0]]; - ref_index[1+0*2]= ref_cache[scan8[4]]; - ref_index[0+1*2]= ref_cache[scan8[8]]; - ref_index[1+1*2]= ref_cache[scan8[12]]; + ref_index[0 + 0 * 2] = ref_cache[scan8[0]]; + ref_index[1 + 0 * 2] = ref_cache[scan8[4]]; + ref_index[0 + 1 * 2] = ref_cache[scan8[8]]; + ref_index[1 + 1 * 2] = ref_cache[scan8[12]]; } } -static av_always_inline void write_back_motion(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; - const int b_stride = h->b_stride; - const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy - const int b8_xy= 4*h->mb_xy; +static av_always_inline void write_back_motion(H264Context *h, int mb_type) +{ + MpegEncContext *const s = &h->s; + const int b_stride = h->b_stride; + const int b_xy = 4 * s->mb_x + 4 * s->mb_y * h->b_stride; // try mb2b(8)_xy + const int b8_xy = 4 * h->mb_xy; - if(USES_LIST(mb_type, 0)){ + if (USES_LIST(mb_type, 0)) { write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 0); - }else{ + } else { fill_rectangle(&s->current_picture.f.ref_index[0][b8_xy], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1); } - if(USES_LIST(mb_type, 1)){ + if (USES_LIST(mb_type, 1)) write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 1); - } - if(h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC){ - if(IS_8X8(mb_type)){ - uint8_t *direct_table = &h->direct_table[4*h->mb_xy]; - direct_table[1] = h->sub_mb_type[1]>>1; - direct_table[2] = h->sub_mb_type[2]>>1; - direct_table[3] = h->sub_mb_type[3]>>1; + if (h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC) { + if (IS_8X8(mb_type)) { + uint8_t *direct_table = &h->direct_table[4 * h->mb_xy]; + direct_table[1] = h->sub_mb_type[1] >> 1; + direct_table[2] = h->sub_mb_type[2] >> 1; + direct_table[3] = h->sub_mb_type[3] >> 1; } } } -static av_always_inline int get_dct8x8_allowed(H264Context *h){ - if(h->sps.direct_8x8_inference_flag) - return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); +static av_always_inline int get_dct8x8_allowed(H264Context *h) +{ + if (h->sps.direct_8x8_inference_flag) + return !(AV_RN64A(h->sub_mb_type) & + ((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8) * + 0x0001000100010001ULL)); else - return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); + return !(AV_RN64A(h->sub_mb_type) & + ((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_DIRECT2) * + 0x0001000100010001ULL)); } #endif /* AVCODEC_H264_H */ diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h index 85405c1542..12064c81e7 100644 --- a/libavcodec/h264_mvpred.h +++ b/libavcodec/h264_mvpred.h @@ -35,53 +35,53 @@ //#undef NDEBUG #include <assert.h> -static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ - const int topright_ref= h->ref_cache[list][ i - 8 + part_width ]; - MpegEncContext *s = &h->s; +static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, + int i, int list, int part_width) +{ + const int topright_ref = h->ref_cache[list][i - 8 + part_width]; + MpegEncContext *s = &h->s; /* there is no consistent mapping of mvs to neighboring locations that will * make mbaff happy, so we can't move all this logic to fill_caches */ - if(FRAME_MBAFF){ - -#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4)\ - const int xy = XY, y4 = Y4;\ - const int mb_type = mb_types[xy+(y4>>2)*s->mb_stride];\ - if(!USES_LIST(mb_type,list))\ - return LIST_NOT_USED;\ - mv = s->current_picture_ptr->f.motion_val[list][h->mb2b_xy[xy] + 3 + y4*h->b_stride];\ - h->mv_cache[list][scan8[0]-2][0] = mv[0];\ - h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\ - return s->current_picture_ptr->f.ref_index[list][4*xy + 1 + (y4 & ~1)] REF_OP; - - if(topright_ref == PART_NOT_AVAILABLE - && i >= scan8[0]+8 && (i&7)==4 - && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){ + if (FRAME_MBAFF) { +#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4) \ + const int xy = XY, y4 = Y4; \ + const int mb_type = mb_types[xy + (y4 >> 2) * s->mb_stride]; \ + if (!USES_LIST(mb_type, list)) \ + return LIST_NOT_USED; \ + mv = s->current_picture_ptr->f.motion_val[list][h->mb2b_xy[xy] + 3 + y4 * h->b_stride]; \ + h->mv_cache[list][scan8[0] - 2][0] = mv[0]; \ + h->mv_cache[list][scan8[0] - 2][1] = mv[1] MV_OP; \ + return s->current_picture_ptr->f.ref_index[list][4 * xy + 1 + (y4 & ~1)] REF_OP; + + if (topright_ref == PART_NOT_AVAILABLE + && i >= scan8[0] + 8 && (i & 7) == 4 + && h->ref_cache[list][scan8[0] - 1] != PART_NOT_AVAILABLE) { const uint32_t *mb_types = s->current_picture_ptr->f.mb_type; const int16_t *mv; - AV_ZERO32(h->mv_cache[list][scan8[0]-2]); - *C = h->mv_cache[list][scan8[0]-2]; + AV_ZERO32(h->mv_cache[list][scan8[0] - 2]); + *C = h->mv_cache[list][scan8[0] - 2]; - if(!MB_FIELD - && IS_INTERLACED(h->left_type[0])){ - SET_DIAG_MV(*2, >>1, h->left_mb_xy[0]+s->mb_stride, (s->mb_y&1)*2+(i>>5)); + if (!MB_FIELD && IS_INTERLACED(h->left_type[0])) { + SET_DIAG_MV(* 2, >> 1, h->left_mb_xy[0] + s->mb_stride, + (s->mb_y & 1) * 2 + (i >> 5)); } - if(MB_FIELD - && !IS_INTERLACED(h->left_type[0])){ + if (MB_FIELD && !IS_INTERLACED(h->left_type[0])) { // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK. - SET_DIAG_MV(/2, <<1, h->left_mb_xy[i>=36], ((i>>2))&3); + SET_DIAG_MV(/ 2, << 1, h->left_mb_xy[i >= 36], ((i >> 2)) & 3); } } #undef SET_DIAG_MV } - if(topright_ref != PART_NOT_AVAILABLE){ - *C= h->mv_cache[list][ i - 8 + part_width ]; + if (topright_ref != PART_NOT_AVAILABLE) { + *C = h->mv_cache[list][i - 8 + part_width]; return topright_ref; - }else{ + } else { tprintf(s->avctx, "topright MV not available\n"); - *C= h->mv_cache[list][ i - 8 - 1 ]; - return h->ref_cache[list][ i - 8 - 1 ]; + *C = h->mv_cache[list][i - 8 - 1]; + return h->ref_cache[list][i - 8 - 1]; } } @@ -92,53 +92,61 @@ static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static av_always_inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){ - const int index8= scan8[n]; - const int top_ref= h->ref_cache[list][ index8 - 8 ]; - const int left_ref= h->ref_cache[list][ index8 - 1 ]; - const int16_t * const A= h->mv_cache[list][ index8 - 1 ]; - const int16_t * const B= h->mv_cache[list][ index8 - 8 ]; - const int16_t * C; +static av_always_inline void pred_motion(H264Context *const h, int n, + int part_width, int list, int ref, + int *const mx, int *const my) +{ + const int index8 = scan8[n]; + const int top_ref = h->ref_cache[list][index8 - 8]; + const int left_ref = h->ref_cache[list][index8 - 1]; + const int16_t *const A = h->mv_cache[list][index8 - 1]; + const int16_t *const B = h->mv_cache[list][index8 - 8]; + const int16_t *C; int diagonal_ref, match_count; - assert(part_width==1 || part_width==2 || part_width==4); + assert(part_width == 1 || part_width == 2 || part_width == 4); /* mv_cache - B . . A T T T T - U . . L . . , . - U . . L . . . . - U . . L . . , . - . . . L . . . . -*/ - - diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width); - match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref); + * B . . A T T T T + * U . . L . . , . + * U . . L . . . . + * U . . L . . , . + * . . . L . . . . + */ + + diagonal_ref = fetch_diagonal_mv(h, &C, index8, list, part_width); + match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref); tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count); - if(match_count > 1){ //most common - *mx= mid_pred(A[0], B[0], C[0]); - *my= mid_pred(A[1], B[1], C[1]); - }else if(match_count==1){ - if(left_ref==ref){ - *mx= A[0]; - *my= A[1]; - }else if(top_ref==ref){ - *mx= B[0]; - *my= B[1]; - }else{ - *mx= C[0]; - *my= C[1]; + if (match_count > 1) { //most common + *mx = mid_pred(A[0], B[0], C[0]); + *my = mid_pred(A[1], B[1], C[1]); + } else if (match_count == 1) { + if (left_ref == ref) { + *mx = A[0]; + *my = A[1]; + } else if (top_ref == ref) { + *mx = B[0]; + *my = B[1]; + } else { + *mx = C[0]; + *my = C[1]; } - }else{ - if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){ - *mx= A[0]; - *my= A[1]; - }else{ - *mx= mid_pred(A[0], B[0], C[0]); - *my= mid_pred(A[1], B[1], C[1]); + } else { + if (top_ref == PART_NOT_AVAILABLE && + diagonal_ref == PART_NOT_AVAILABLE && + left_ref != PART_NOT_AVAILABLE) { + *mx = A[0]; + *my = A[1]; + } else { + *mx = mid_pred(A[0], B[0], C[0]); + *my = mid_pred(A[1], B[1], C[1]); } } - tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, + "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", + top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, + A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); } /** @@ -147,27 +155,32 @@ static av_always_inline void pred_motion(H264Context * const h, int n, int part_ * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ - if(n==0){ - const int top_ref= h->ref_cache[list][ scan8[0] - 8 ]; - const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; - - tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); - - if(top_ref == ref){ - *mx= B[0]; - *my= B[1]; +static av_always_inline void pred_16x8_motion(H264Context *const h, + int n, int list, int ref, + int *const mx, int *const my) +{ + if (n == 0) { + const int top_ref = h->ref_cache[list][scan8[0] - 8]; + const int16_t *const B = h->mv_cache[list][scan8[0] - 8]; + + tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", + top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); + + if (top_ref == ref) { + *mx = B[0]; + *my = B[1]; return; } - }else{ - const int left_ref= h->ref_cache[list][ scan8[8] - 1 ]; - const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ]; + } else { + const int left_ref = h->ref_cache[list][scan8[8] - 1]; + const int16_t *const A = h->mv_cache[list][scan8[8] - 1]; - tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", + left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); - if(left_ref == ref){ - *mx= A[0]; - *my= A[1]; + if (left_ref == ref) { + *mx = A[0]; + *my = A[1]; return; } } @@ -182,29 +195,34 @@ static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ - if(n==0){ - const int left_ref= h->ref_cache[list][ scan8[0] - 1 ]; - const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; - - tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); - - if(left_ref == ref){ - *mx= A[0]; - *my= A[1]; +static av_always_inline void pred_8x16_motion(H264Context *const h, + int n, int list, int ref, + int *const mx, int *const my) +{ + if (n == 0) { + const int left_ref = h->ref_cache[list][scan8[0] - 1]; + const int16_t *const A = h->mv_cache[list][scan8[0] - 1]; + + tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", + left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); + + if (left_ref == ref) { + *mx = A[0]; + *my = A[1]; return; } - }else{ - const int16_t * C; + } else { + const int16_t *C; int diagonal_ref; - diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2); + diagonal_ref = fetch_diagonal_mv(h, &C, scan8[4], list, 2); - tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", + diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); - if(diagonal_ref == ref){ - *mx= C[0]; - *my= C[1]; + if (diagonal_ref == ref) { + *mx = C[0]; + *my = C[1]; return; } } @@ -213,168 +231,174 @@ static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int pred_motion(h, n, 2, list, ref, mx, my); } -#define FIX_MV_MBAFF(type, refn, mvn, idx)\ - if(FRAME_MBAFF){\ - if(MB_FIELD){\ - if(!IS_INTERLACED(type)){\ - refn <<= 1;\ - AV_COPY32(mvbuf[idx], mvn);\ - mvbuf[idx][1] /= 2;\ - mvn = mvbuf[idx];\ - }\ - }else{\ - if(IS_INTERLACED(type)){\ - refn >>= 1;\ - AV_COPY32(mvbuf[idx], mvn);\ - mvbuf[idx][1] <<= 1;\ - mvn = mvbuf[idx];\ - }\ - }\ +#define FIX_MV_MBAFF(type, refn, mvn, idx) \ + if (FRAME_MBAFF) { \ + if (MB_FIELD) { \ + if (!IS_INTERLACED(type)) { \ + refn <<= 1; \ + AV_COPY32(mvbuf[idx], mvn); \ + mvbuf[idx][1] /= 2; \ + mvn = mvbuf[idx]; \ + } \ + } else { \ + if (IS_INTERLACED(type)) { \ + refn >>= 1; \ + AV_COPY32(mvbuf[idx], mvn); \ + mvbuf[idx][1] <<= 1; \ + mvn = mvbuf[idx]; \ + } \ + } \ } -static av_always_inline void pred_pskip_motion(H264Context * const h){ - DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0}; +static av_always_inline void pred_pskip_motion(H264Context *const h) +{ + DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = { 0 }; DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2]; - MpegEncContext * const s = &h->s; - int8_t *ref = s->current_picture.f.ref_index[0]; - int16_t (*mv)[2] = s->current_picture.f.motion_val[0]; + MpegEncContext *const s = &h->s; + int8_t *ref = s->current_picture.f.ref_index[0]; + int16_t(*mv)[2] = s->current_picture.f.motion_val[0]; int top_ref, left_ref, diagonal_ref, match_count, mx, my; const int16_t *A, *B, *C; int b_stride = h->b_stride; fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); - /* To avoid doing an entire fill_decode_caches, we inline the relevant parts here. - * FIXME: this is a partial duplicate of the logic in fill_decode_caches, but it's - * faster this way. Is there a way to avoid this duplication? + /* To avoid doing an entire fill_decode_caches, we inline the relevant + * parts here. + * FIXME: this is a partial duplicate of the logic in fill_decode_caches, + * but it's faster this way. Is there a way to avoid this duplication? */ - if(USES_LIST(h->left_type[LTOP], 0)){ - left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)]; - A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]]; + if (USES_LIST(h->left_type[LTOP], 0)) { + left_ref = ref[4 * h->left_mb_xy[LTOP] + 1 + (h->left_block[0] & ~1)]; + A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride * h->left_block[0]]; FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0); - if(!(left_ref | AV_RN32A(A))){ + if (!(left_ref | AV_RN32A(A))) goto zeromv; - } - }else if(h->left_type[LTOP]){ + } else if (h->left_type[LTOP]) { left_ref = LIST_NOT_USED; - A = zeromv; - }else{ + A = zeromv; + } else { goto zeromv; } - if(USES_LIST(h->top_type, 0)){ - top_ref = ref[4*h->top_mb_xy + 2]; - B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride]; + if (USES_LIST(h->top_type, 0)) { + top_ref = ref[4 * h->top_mb_xy + 2]; + B = mv[h->mb2b_xy[h->top_mb_xy] + 3 * b_stride]; FIX_MV_MBAFF(h->top_type, top_ref, B, 1); - if(!(top_ref | AV_RN32A(B))){ + if (!(top_ref | AV_RN32A(B))) goto zeromv; - } - }else if(h->top_type){ + } else if (h->top_type) { top_ref = LIST_NOT_USED; - B = zeromv; - }else{ + B = zeromv; + } else { goto zeromv; } - tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); + tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", + top_ref, left_ref, h->s.mb_x, h->s.mb_y); - if(USES_LIST(h->topright_type, 0)){ - diagonal_ref = ref[4*h->topright_mb_xy + 2]; - C = mv[h->mb2b_xy[h->topright_mb_xy] + 3*b_stride]; + if (USES_LIST(h->topright_type, 0)) { + diagonal_ref = ref[4 * h->topright_mb_xy + 2]; + C = mv[h->mb2b_xy[h->topright_mb_xy] + 3 * b_stride]; FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2); - }else if(h->topright_type){ + } else if (h->topright_type) { diagonal_ref = LIST_NOT_USED; C = zeromv; - }else{ - if(USES_LIST(h->topleft_type, 0)){ - diagonal_ref = ref[4*h->topleft_mb_xy + 1 + (h->topleft_partition & 2)]; - C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride)]; + } else { + if (USES_LIST(h->topleft_type, 0)) { + diagonal_ref = ref[4 * h->topleft_mb_xy + 1 + + (h->topleft_partition & 2)]; + C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + + (h->topleft_partition & 2 * b_stride)]; FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2); - }else if(h->topleft_type){ + } else if (h->topleft_type) { diagonal_ref = LIST_NOT_USED; - C = zeromv; - }else{ + C = zeromv; + } else { diagonal_ref = PART_NOT_AVAILABLE; - C = zeromv; + C = zeromv; } } - match_count= !diagonal_ref + !top_ref + !left_ref; + match_count = !diagonal_ref + !top_ref + !left_ref; tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count); - if(match_count > 1){ + if (match_count > 1) { mx = mid_pred(A[0], B[0], C[0]); my = mid_pred(A[1], B[1], C[1]); - }else if(match_count==1){ - if(!left_ref){ + } else if (match_count == 1) { + if (!left_ref) { mx = A[0]; my = A[1]; - }else if(!top_ref){ + } else if (!top_ref) { mx = B[0]; my = B[1]; - }else{ + } else { mx = C[0]; my = C[1]; } - }else{ + } else { mx = mid_pred(A[0], B[0], C[0]); my = mid_pred(A[1], B[1], C[1]); } - fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); + fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx, my), 4); return; + zeromv: - fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); + fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); return; } -static void fill_decode_neighbors(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; - const int mb_xy= h->mb_xy; +static void fill_decode_neighbors(H264Context *h, int mb_type) +{ + MpegEncContext *const s = &h->s; + const int mb_xy = h->mb_xy; int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS]; - static const uint8_t left_block_options[4][32]={ - {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4}, - {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4}, - {0,0,1,1,7,10,7,10,3+0*4, 3+0*4, 3+1*4, 3+1*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4}, - {0,2,0,2,7,10,7,10,3+0*4, 3+2*4, 3+0*4, 3+2*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4} + static const uint8_t left_block_options[4][32] = { + { 0, 1, 2, 3, 7, 10, 8, 11, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 5 * 4, 1 + 9 * 4 }, + { 2, 2, 3, 3, 8, 11, 8, 11, 3 + 2 * 4, 3 + 2 * 4, 3 + 3 * 4, 3 + 3 * 4, 1 + 5 * 4, 1 + 9 * 4, 1 + 5 * 4, 1 + 9 * 4 }, + { 0, 0, 1, 1, 7, 10, 7, 10, 3 + 0 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 1 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 }, + { 0, 2, 0, 2, 7, 10, 7, 10, 3 + 0 * 4, 3 + 2 * 4, 3 + 0 * 4, 3 + 2 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 } }; - h->topleft_partition= -1; + h->topleft_partition = -1; - top_xy = mb_xy - (s->mb_stride << MB_FIELD); + top_xy = mb_xy - (s->mb_stride << MB_FIELD); /* Wow, what a mess, why didn't they simplify the interlacing & intra * stuff, I can't imagine that these complex rules are worth it. */ - topleft_xy = top_xy - 1; - topright_xy= top_xy + 1; - left_xy[LBOT] = left_xy[LTOP] = mb_xy-1; + topleft_xy = top_xy - 1; + topright_xy = top_xy + 1; + left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1; h->left_block = left_block_options[0]; - if(FRAME_MBAFF){ + if (FRAME_MBAFF) { const int left_mb_field_flag = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]); const int curr_mb_field_flag = IS_INTERLACED(mb_type); - if(s->mb_y&1){ + if (s->mb_y & 1) { if (left_mb_field_flag != curr_mb_field_flag) { left_xy[LBOT] = left_xy[LTOP] = mb_xy - s->mb_stride - 1; if (curr_mb_field_flag) { left_xy[LBOT] += s->mb_stride; - h->left_block = left_block_options[3]; + h->left_block = left_block_options[3]; } else { topleft_xy += s->mb_stride; - // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition + /* take top left mv from the middle of the mb, as opposed + * to all other modes which use the bottom right partition */ h->topleft_partition = 0; - h->left_block = left_block_options[1]; + h->left_block = left_block_options[1]; } } - }else{ - if(curr_mb_field_flag){ + } else { + if (curr_mb_field_flag) { topleft_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy - 1] >> 7) & 1) - 1); topright_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy + 1] >> 7) & 1) - 1); - top_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy ] >> 7) & 1) - 1); + top_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy] >> 7) & 1) - 1); } if (left_mb_field_flag != curr_mb_field_flag) { if (curr_mb_field_flag) { left_xy[LBOT] += s->mb_stride; - h->left_block = left_block_options[3]; + h->left_block = left_block_options[3]; } else { h->left_block = left_block_options[2]; } @@ -382,9 +406,9 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){ } } - h->topleft_mb_xy = topleft_xy; - h->top_mb_xy = top_xy; - h->topright_mb_xy= topright_xy; + h->topleft_mb_xy = topleft_xy; + h->top_mb_xy = top_xy; + h->topright_mb_xy = topright_xy; h->left_mb_xy[LTOP] = left_xy[LTOP]; h->left_mb_xy[LBOT] = left_xy[LBOT]; //FIXME do we need all in the context? @@ -395,351 +419,372 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){ h->left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]]; h->left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]]; - if(FMO){ - if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0; - if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; - if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0; - }else{ - if(h->slice_table[topleft_xy ] != h->slice_num){ + if (FMO) { + if (h->slice_table[topleft_xy] != h->slice_num) + h->topleft_type = 0; + if (h->slice_table[top_xy] != h->slice_num) + h->top_type = 0; + if (h->slice_table[left_xy[LTOP]] != h->slice_num) + h->left_type[LTOP] = h->left_type[LBOT] = 0; + } else { + if (h->slice_table[topleft_xy] != h->slice_num) { h->topleft_type = 0; - if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; - if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0; + if (h->slice_table[top_xy] != h->slice_num) + h->top_type = 0; + if (h->slice_table[left_xy[LTOP]] != h->slice_num) + h->left_type[LTOP] = h->left_type[LBOT] = 0; } } - if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0; + if (h->slice_table[topright_xy] != h->slice_num) + h->topright_type = 0; } -static void fill_decode_caches(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; +static void fill_decode_caches(H264Context *h, int mb_type) +{ + MpegEncContext *const s = &h->s; int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS]; int topleft_type, top_type, topright_type, left_type[LEFT_MBS]; - const uint8_t * left_block= h->left_block; + const uint8_t *left_block = h->left_block; int i; uint8_t *nnz; uint8_t *nnz_cache; - topleft_xy = h->topleft_mb_xy; - top_xy = h->top_mb_xy; - topright_xy = h->topright_mb_xy; - left_xy[LTOP] = h->left_mb_xy[LTOP]; - left_xy[LBOT] = h->left_mb_xy[LBOT]; - topleft_type = h->topleft_type; - top_type = h->top_type; - topright_type = h->topright_type; - left_type[LTOP]= h->left_type[LTOP]; - left_type[LBOT]= h->left_type[LBOT]; - - if(!IS_SKIP(mb_type)){ - if(IS_INTRA(mb_type)){ - int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; - h->topleft_samples_available= - h->top_samples_available= - h->left_samples_available= 0xFFFF; - h->topright_samples_available= 0xEEEA; - - if(!(top_type & type_mask)){ - h->topleft_samples_available= 0xB3FF; - h->top_samples_available= 0x33FF; - h->topright_samples_available= 0x26EA; + topleft_xy = h->topleft_mb_xy; + top_xy = h->top_mb_xy; + topright_xy = h->topright_mb_xy; + left_xy[LTOP] = h->left_mb_xy[LTOP]; + left_xy[LBOT] = h->left_mb_xy[LBOT]; + topleft_type = h->topleft_type; + top_type = h->top_type; + topright_type = h->topright_type; + left_type[LTOP] = h->left_type[LTOP]; + left_type[LBOT] = h->left_type[LBOT]; + + if (!IS_SKIP(mb_type)) { + if (IS_INTRA(mb_type)) { + int type_mask = h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; + h->topleft_samples_available = + h->top_samples_available = + h->left_samples_available = 0xFFFF; + h->topright_samples_available = 0xEEEA; + + if (!(top_type & type_mask)) { + h->topleft_samples_available = 0xB3FF; + h->top_samples_available = 0x33FF; + h->topright_samples_available = 0x26EA; } - if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])){ - if(IS_INTERLACED(mb_type)){ - if(!(left_type[LTOP] & type_mask)){ - h->topleft_samples_available&= 0xDFFF; - h->left_samples_available&= 0x5FFF; + if (IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])) { + if (IS_INTERLACED(mb_type)) { + if (!(left_type[LTOP] & type_mask)) { + h->topleft_samples_available &= 0xDFFF; + h->left_samples_available &= 0x5FFF; } - if(!(left_type[LBOT] & type_mask)){ - h->topleft_samples_available&= 0xFF5F; - h->left_samples_available&= 0xFF5F; + if (!(left_type[LBOT] & type_mask)) { + h->topleft_samples_available &= 0xFF5F; + h->left_samples_available &= 0xFF5F; } - }else{ + } else { int left_typei = s->current_picture.f.mb_type[left_xy[LTOP] + s->mb_stride]; assert(left_xy[LTOP] == left_xy[LBOT]); - if(!((left_typei & type_mask) && (left_type[LTOP] & type_mask))){ - h->topleft_samples_available&= 0xDF5F; - h->left_samples_available&= 0x5F5F; + if (!((left_typei & type_mask) && (left_type[LTOP] & type_mask))) { + h->topleft_samples_available &= 0xDF5F; + h->left_samples_available &= 0x5F5F; } } - }else{ - if(!(left_type[LTOP] & type_mask)){ - h->topleft_samples_available&= 0xDF5F; - h->left_samples_available&= 0x5F5F; + } else { + if (!(left_type[LTOP] & type_mask)) { + h->topleft_samples_available &= 0xDF5F; + h->left_samples_available &= 0x5F5F; } } - if(!(topleft_type & type_mask)) - h->topleft_samples_available&= 0x7FFF; + if (!(topleft_type & type_mask)) + h->topleft_samples_available &= 0x7FFF; - if(!(topright_type & type_mask)) - h->topright_samples_available&= 0xFBFF; + if (!(topright_type & type_mask)) + h->topright_samples_available &= 0xFBFF; - if(IS_INTRA4x4(mb_type)){ - if(IS_INTRA4x4(top_type)){ - AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]); - }else{ - h->intra4x4_pred_mode_cache[4+8*0]= - h->intra4x4_pred_mode_cache[5+8*0]= - h->intra4x4_pred_mode_cache[6+8*0]= - h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask); + if (IS_INTRA4x4(mb_type)) { + if (IS_INTRA4x4(top_type)) { + AV_COPY32(h->intra4x4_pred_mode_cache + 4 + 8 * 0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]); + } else { + h->intra4x4_pred_mode_cache[4 + 8 * 0] = + h->intra4x4_pred_mode_cache[5 + 8 * 0] = + h->intra4x4_pred_mode_cache[6 + 8 * 0] = + h->intra4x4_pred_mode_cache[7 + 8 * 0] = 2 - 3 * !(top_type & type_mask); } - for(i=0; i<2; i++){ - if(IS_INTRA4x4(left_type[LEFT(i)])){ - int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]]; - h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]]; - h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]]; - }else{ - h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= - h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[LEFT(i)] & type_mask); + for (i = 0; i < 2; i++) { + if (IS_INTRA4x4(left_type[LEFT(i)])) { + int8_t *mode = h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]]; + h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = mode[6 - left_block[0 + 2 * i]]; + h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = mode[6 - left_block[1 + 2 * i]]; + } else { + h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = + h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = 2 - 3 * !(left_type[LEFT(i)] & type_mask); } } } } - -/* -0 . T T. T T T T -1 L . .L . . . . -2 L . .L . . . . -3 . T TL . . . . -4 L . .L . . . . -5 L . .. . . . . -*/ -//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) - nnz_cache = h->non_zero_count_cache; - if(top_type){ - nnz = h->non_zero_count[top_xy]; - AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]); - if(!s->chroma_y_shift){ - AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]); - AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]); - }else{ - AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 5]); - AV_COPY32(&nnz_cache[4+8*10], &nnz[4* 9]); + /* + * 0 . T T. T T T T + * 1 L . .L . . . . + * 2 L . .L . . . . + * 3 . T TL . . . . + * 4 L . .L . . . . + * 5 L . .. . . . . + */ + /* FIXME: constraint_intra_pred & partitioning & nnz + * (let us hope this is just a typo in the spec) */ + nnz_cache = h->non_zero_count_cache; + if (top_type) { + nnz = h->non_zero_count[top_xy]; + AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[4 * 3]); + if (!s->chroma_y_shift) { + AV_COPY32(&nnz_cache[4 + 8 * 5], &nnz[4 * 7]); + AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 11]); + } else { + AV_COPY32(&nnz_cache[4 + 8 * 5], &nnz[4 * 5]); + AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 9]); + } + } else { + uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; + AV_WN32A(&nnz_cache[4 + 8 * 0], top_empty); + AV_WN32A(&nnz_cache[4 + 8 * 5], top_empty); + AV_WN32A(&nnz_cache[4 + 8 * 10], top_empty); } - }else{ - uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; - AV_WN32A(&nnz_cache[4+8* 0], top_empty); - AV_WN32A(&nnz_cache[4+8* 5], top_empty); - AV_WN32A(&nnz_cache[4+8*10], top_empty); - } - for (i=0; i<2; i++) { - if(left_type[LEFT(i)]){ - nnz = h->non_zero_count[left_xy[LEFT(i)]]; - nnz_cache[3+8* 1 + 2*8*i]= nnz[left_block[8+0+2*i]]; - nnz_cache[3+8* 2 + 2*8*i]= nnz[left_block[8+1+2*i]]; - if(CHROMA444){ - nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]+4*4]; - nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4]; - nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4]; - nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4]; - }else if(CHROMA422) { - nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]-2+4*4]; - nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]-2+4*4]; - nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]-2+8*4]; - nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]-2+8*4]; - }else{ - nnz_cache[3+8* 6 + 8*i]= nnz[left_block[8+4+2*i]]; - nnz_cache[3+8*11 + 8*i]= nnz[left_block[8+5+2*i]]; + for (i = 0; i < 2; i++) { + if (left_type[LEFT(i)]) { + nnz = h->non_zero_count[left_xy[LEFT(i)]]; + nnz_cache[3 + 8 * 1 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i]]; + nnz_cache[3 + 8 * 2 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i]]; + if (CHROMA444) { + nnz_cache[3 + 8 * 6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 4 * 4]; + nnz_cache[3 + 8 * 7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 4 * 4]; + nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 8 * 4]; + nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 8 * 4]; + } else if (CHROMA422) { + nnz_cache[3 + 8 * 6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 4 * 4]; + nnz_cache[3 + 8 * 7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 4 * 4]; + nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 8 * 4]; + nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 8 * 4]; + } else { + nnz_cache[3 + 8 * 6 + 8 * i] = nnz[left_block[8 + 4 + 2 * i]]; + nnz_cache[3 + 8 * 11 + 8 * i] = nnz[left_block[8 + 5 + 2 * i]]; + } + } else { + nnz_cache[3 + 8 * 1 + 2 * 8 * i] = + nnz_cache[3 + 8 * 2 + 2 * 8 * i] = + nnz_cache[3 + 8 * 6 + 2 * 8 * i] = + nnz_cache[3 + 8 * 7 + 2 * 8 * i] = + nnz_cache[3 + 8 * 11 + 2 * 8 * i] = + nnz_cache[3 + 8 * 12 + 2 * 8 * i] = CABAC && !IS_INTRA(mb_type) ? 0 : 64; } - }else{ - nnz_cache[3+8* 1 + 2*8*i]= - nnz_cache[3+8* 2 + 2*8*i]= - nnz_cache[3+8* 6 + 2*8*i]= - nnz_cache[3+8* 7 + 2*8*i]= - nnz_cache[3+8*11 + 2*8*i]= - nnz_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64; } - } - if( CABAC ) { - // top_cbp - if(top_type) { - h->top_cbp = h->cbp_table[top_xy]; - } else { - h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; - } - // left_cbp - if (left_type[LTOP]) { - h->left_cbp = (h->cbp_table[left_xy[LTOP]] & 0x7F0) - | ((h->cbp_table[left_xy[LTOP]]>>(left_block[0]&(~1)))&2) - | (((h->cbp_table[left_xy[LBOT]]>>(left_block[2]&(~1)))&2) << 2); - } else { - h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; + if (CABAC) { + // top_cbp + if (top_type) + h->top_cbp = h->cbp_table[top_xy]; + else + h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; + // left_cbp + if (left_type[LTOP]) { + h->left_cbp = (h->cbp_table[left_xy[LTOP]] & 0x7F0) | + ((h->cbp_table[left_xy[LTOP]] >> (left_block[0] & (~1))) & 2) | + (((h->cbp_table[left_xy[LBOT]] >> (left_block[2] & (~1))) & 2) << 2); + } else { + h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; + } } } - } - if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){ + if (IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)) { int list; int b_stride = h->b_stride; - for(list=0; list<h->list_count; list++){ + for (list = 0; list < h->list_count; list++) { int8_t *ref_cache = &h->ref_cache[list][scan8[0]]; int8_t *ref = s->current_picture.f.ref_index[list]; - int16_t (*mv_cache)[2] = &h->mv_cache[list][scan8[0]]; - int16_t (*mv)[2] = s->current_picture.f.motion_val[list]; - if(!USES_LIST(mb_type, list)){ + int16_t(*mv_cache)[2] = &h->mv_cache[list][scan8[0]]; + int16_t(*mv)[2] = s->current_picture.f.motion_val[list]; + if (!USES_LIST(mb_type, list)) continue; - } assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)); - if(USES_LIST(top_type, list)){ - const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride; - AV_COPY128(mv_cache[0 - 1*8], mv[b_xy + 0]); - ref_cache[0 - 1*8]= - ref_cache[1 - 1*8]= ref[4*top_xy + 2]; - ref_cache[2 - 1*8]= - ref_cache[3 - 1*8]= ref[4*top_xy + 3]; - }else{ - AV_ZERO128(mv_cache[0 - 1*8]); - AV_WN32A(&ref_cache[0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101u); + if (USES_LIST(top_type, list)) { + const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride; + AV_COPY128(mv_cache[0 - 1 * 8], mv[b_xy + 0]); + ref_cache[0 - 1 * 8] = + ref_cache[1 - 1 * 8] = ref[4 * top_xy + 2]; + ref_cache[2 - 1 * 8] = + ref_cache[3 - 1 * 8] = ref[4 * top_xy + 3]; + } else { + AV_ZERO128(mv_cache[0 - 1 * 8]); + AV_WN32A(&ref_cache[0 - 1 * 8], + ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE) & 0xFF) * 0x01010101u); } - if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){ - for(i=0; i<2; i++){ - int cache_idx = -1 + i*2*8; - if(USES_LIST(left_type[LEFT(i)], list)){ - const int b_xy= h->mb2b_xy[left_xy[LEFT(i)]] + 3; - const int b8_xy= 4*left_xy[LEFT(i)] + 1; - AV_COPY32(mv_cache[cache_idx ], mv[b_xy + b_stride*left_block[0+i*2]]); - AV_COPY32(mv_cache[cache_idx+8], mv[b_xy + b_stride*left_block[1+i*2]]); - ref_cache[cache_idx ]= ref[b8_xy + (left_block[0+i*2]&~1)]; - ref_cache[cache_idx+8]= ref[b8_xy + (left_block[1+i*2]&~1)]; - }else{ - AV_ZERO32(mv_cache[cache_idx ]); - AV_ZERO32(mv_cache[cache_idx+8]); - ref_cache[cache_idx ]= - ref_cache[cache_idx+8]= (left_type[LEFT(i)]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; + if (mb_type & (MB_TYPE_16x8 | MB_TYPE_8x8)) { + for (i = 0; i < 2; i++) { + int cache_idx = -1 + i * 2 * 8; + if (USES_LIST(left_type[LEFT(i)], list)) { + const int b_xy = h->mb2b_xy[left_xy[LEFT(i)]] + 3; + const int b8_xy = 4 * left_xy[LEFT(i)] + 1; + AV_COPY32(mv_cache[cache_idx], + mv[b_xy + b_stride * left_block[0 + i * 2]]); + AV_COPY32(mv_cache[cache_idx + 8], + mv[b_xy + b_stride * left_block[1 + i * 2]]); + ref_cache[cache_idx] = ref[b8_xy + (left_block[0 + i * 2] & ~1)]; + ref_cache[cache_idx + 8] = ref[b8_xy + (left_block[1 + i * 2] & ~1)]; + } else { + AV_ZERO32(mv_cache[cache_idx]); + AV_ZERO32(mv_cache[cache_idx + 8]); + ref_cache[cache_idx] = + ref_cache[cache_idx + 8] = (left_type[LEFT(i)]) ? LIST_NOT_USED + : PART_NOT_AVAILABLE; + } } - } - }else{ - if(USES_LIST(left_type[LTOP], list)){ - const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3; - const int b8_xy= 4*left_xy[LTOP] + 1; - AV_COPY32(mv_cache[-1], mv[b_xy + b_stride*left_block[0]]); - ref_cache[-1]= ref[b8_xy + (left_block[0]&~1)]; - }else{ + } else { + if (USES_LIST(left_type[LTOP], list)) { + const int b_xy = h->mb2b_xy[left_xy[LTOP]] + 3; + const int b8_xy = 4 * left_xy[LTOP] + 1; + AV_COPY32(mv_cache[-1], mv[b_xy + b_stride * left_block[0]]); + ref_cache[-1] = ref[b8_xy + (left_block[0] & ~1)]; + } else { AV_ZERO32(mv_cache[-1]); - ref_cache[-1]= left_type[LTOP] ? LIST_NOT_USED : PART_NOT_AVAILABLE; + ref_cache[-1] = left_type[LTOP] ? LIST_NOT_USED + : PART_NOT_AVAILABLE; } } - if(USES_LIST(topright_type, list)){ - const int b_xy= h->mb2b_xy[topright_xy] + 3*b_stride; - AV_COPY32(mv_cache[4 - 1*8], mv[b_xy]); - ref_cache[4 - 1*8]= ref[4*topright_xy + 2]; - }else{ - AV_ZERO32(mv_cache[4 - 1*8]); - ref_cache[4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; + if (USES_LIST(topright_type, list)) { + const int b_xy = h->mb2b_xy[topright_xy] + 3 * b_stride; + AV_COPY32(mv_cache[4 - 1 * 8], mv[b_xy]); + ref_cache[4 - 1 * 8] = ref[4 * topright_xy + 2]; + } else { + AV_ZERO32(mv_cache[4 - 1 * 8]); + ref_cache[4 - 1 * 8] = topright_type ? LIST_NOT_USED + : PART_NOT_AVAILABLE; } if(ref_cache[2 - 1*8] < 0 || ref_cache[4 - 1*8] < 0){ - if(USES_LIST(topleft_type, list)){ - const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride); - const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2); - AV_COPY32(mv_cache[-1 - 1*8], mv[b_xy]); - ref_cache[-1 - 1*8]= ref[b8_xy]; - }else{ - AV_ZERO32(mv_cache[-1 - 1*8]); - ref_cache[-1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; + if (USES_LIST(topleft_type, list)) { + const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + + (h->topleft_partition & 2 * b_stride); + const int b8_xy = 4 * topleft_xy + 1 + (h->topleft_partition & 2); + AV_COPY32(mv_cache[-1 - 1 * 8], mv[b_xy]); + ref_cache[-1 - 1 * 8] = ref[b8_xy]; + } else { + AV_ZERO32(mv_cache[-1 - 1 * 8]); + ref_cache[-1 - 1 * 8] = topleft_type ? LIST_NOT_USED + : PART_NOT_AVAILABLE; } } - if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF) + if ((mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2)) && !FRAME_MBAFF) continue; - if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))){ - uint8_t (*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]]; - uint8_t (*mvd)[2] = h->mvd_table[list]; - ref_cache[2+8*0] = - ref_cache[2+8*2] = PART_NOT_AVAILABLE; - AV_ZERO32(mv_cache[2+8*0]); - AV_ZERO32(mv_cache[2+8*2]); - - if( CABAC ) { - if(USES_LIST(top_type, list)){ - const int b_xy= h->mb2br_xy[top_xy]; - AV_COPY64(mvd_cache[0 - 1*8], mvd[b_xy + 0]); - }else{ - AV_ZERO64(mvd_cache[0 - 1*8]); + if (!(mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2))) { + uint8_t(*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]]; + uint8_t(*mvd)[2] = h->mvd_table[list]; + ref_cache[2 + 8 * 0] = + ref_cache[2 + 8 * 2] = PART_NOT_AVAILABLE; + AV_ZERO32(mv_cache[2 + 8 * 0]); + AV_ZERO32(mv_cache[2 + 8 * 2]); + + if (CABAC) { + if (USES_LIST(top_type, list)) { + const int b_xy = h->mb2br_xy[top_xy]; + AV_COPY64(mvd_cache[0 - 1 * 8], mvd[b_xy + 0]); + } else { + AV_ZERO64(mvd_cache[0 - 1 * 8]); } - if(USES_LIST(left_type[LTOP], list)){ - const int b_xy= h->mb2br_xy[left_xy[LTOP]] + 6; - AV_COPY16(mvd_cache[-1 + 0*8], mvd[b_xy - left_block[0]]); - AV_COPY16(mvd_cache[-1 + 1*8], mvd[b_xy - left_block[1]]); - }else{ - AV_ZERO16(mvd_cache[-1 + 0*8]); - AV_ZERO16(mvd_cache[-1 + 1*8]); + if (USES_LIST(left_type[LTOP], list)) { + const int b_xy = h->mb2br_xy[left_xy[LTOP]] + 6; + AV_COPY16(mvd_cache[-1 + 0 * 8], mvd[b_xy - left_block[0]]); + AV_COPY16(mvd_cache[-1 + 1 * 8], mvd[b_xy - left_block[1]]); + } else { + AV_ZERO16(mvd_cache[-1 + 0 * 8]); + AV_ZERO16(mvd_cache[-1 + 1 * 8]); } - if(USES_LIST(left_type[LBOT], list)){ - const int b_xy= h->mb2br_xy[left_xy[LBOT]] + 6; - AV_COPY16(mvd_cache[-1 + 2*8], mvd[b_xy - left_block[2]]); - AV_COPY16(mvd_cache[-1 + 3*8], mvd[b_xy - left_block[3]]); - }else{ - AV_ZERO16(mvd_cache[-1 + 2*8]); - AV_ZERO16(mvd_cache[-1 + 3*8]); + if (USES_LIST(left_type[LBOT], list)) { + const int b_xy = h->mb2br_xy[left_xy[LBOT]] + 6; + AV_COPY16(mvd_cache[-1 + 2 * 8], mvd[b_xy - left_block[2]]); + AV_COPY16(mvd_cache[-1 + 3 * 8], mvd[b_xy - left_block[3]]); + } else { + AV_ZERO16(mvd_cache[-1 + 2 * 8]); + AV_ZERO16(mvd_cache[-1 + 3 * 8]); } - AV_ZERO16(mvd_cache[2+8*0]); - AV_ZERO16(mvd_cache[2+8*2]); - if(h->slice_type_nos == AV_PICTURE_TYPE_B){ + AV_ZERO16(mvd_cache[2 + 8 * 0]); + AV_ZERO16(mvd_cache[2 + 8 * 2]); + if (h->slice_type_nos == AV_PICTURE_TYPE_B) { uint8_t *direct_cache = &h->direct_cache[scan8[0]]; uint8_t *direct_table = h->direct_table; - fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16>>1, 1); - - if(IS_DIRECT(top_type)){ - AV_WN32A(&direct_cache[-1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1)); - }else if(IS_8X8(top_type)){ - int b8_xy = 4*top_xy; - direct_cache[0 - 1*8]= direct_table[b8_xy + 2]; - direct_cache[2 - 1*8]= direct_table[b8_xy + 3]; - }else{ - AV_WN32A(&direct_cache[-1*8], 0x01010101*(MB_TYPE_16x16>>1)); + fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16 >> 1, 1); + + if (IS_DIRECT(top_type)) { + AV_WN32A(&direct_cache[-1 * 8], + 0x01010101u * (MB_TYPE_DIRECT2 >> 1)); + } else if (IS_8X8(top_type)) { + int b8_xy = 4 * top_xy; + direct_cache[0 - 1 * 8] = direct_table[b8_xy + 2]; + direct_cache[2 - 1 * 8] = direct_table[b8_xy + 3]; + } else { + AV_WN32A(&direct_cache[-1 * 8], + 0x01010101 * (MB_TYPE_16x16 >> 1)); } - if(IS_DIRECT(left_type[LTOP])) - direct_cache[-1 + 0*8]= MB_TYPE_DIRECT2>>1; - else if(IS_8X8(left_type[LTOP])) - direct_cache[-1 + 0*8]= direct_table[4*left_xy[LTOP] + 1 + (left_block[0]&~1)]; + if (IS_DIRECT(left_type[LTOP])) + direct_cache[-1 + 0 * 8] = MB_TYPE_DIRECT2 >> 1; + else if (IS_8X8(left_type[LTOP])) + direct_cache[-1 + 0 * 8] = direct_table[4 * left_xy[LTOP] + 1 + (left_block[0] & ~1)]; else - direct_cache[-1 + 0*8]= MB_TYPE_16x16>>1; + direct_cache[-1 + 0 * 8] = MB_TYPE_16x16 >> 1; - if(IS_DIRECT(left_type[LBOT])) - direct_cache[-1 + 2*8]= MB_TYPE_DIRECT2>>1; - else if(IS_8X8(left_type[LBOT])) - direct_cache[-1 + 2*8]= direct_table[4*left_xy[LBOT] + 1 + (left_block[2]&~1)]; + if (IS_DIRECT(left_type[LBOT])) + direct_cache[-1 + 2 * 8] = MB_TYPE_DIRECT2 >> 1; + else if (IS_8X8(left_type[LBOT])) + direct_cache[-1 + 2 * 8] = direct_table[4 * left_xy[LBOT] + 1 + (left_block[2] & ~1)]; else - direct_cache[-1 + 2*8]= MB_TYPE_16x16>>1; + direct_cache[-1 + 2 * 8] = MB_TYPE_16x16 >> 1; } } } - if(FRAME_MBAFF){ -#define MAP_MVS\ - MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ - MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ - MAP_F2F(scan8[0] - 1 + 0*8, left_type[LTOP])\ - MAP_F2F(scan8[0] - 1 + 1*8, left_type[LTOP])\ - MAP_F2F(scan8[0] - 1 + 2*8, left_type[LBOT])\ - MAP_F2F(scan8[0] - 1 + 3*8, left_type[LBOT]) - if(MB_FIELD){ -#define MAP_F2F(idx, mb_type)\ - if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ - h->ref_cache[list][idx] <<= 1;\ - h->mv_cache[list][idx][1] /= 2;\ - h->mvd_cache[list][idx][1] >>=1;\ - } + +#define MAP_MVS \ + MAP_F2F(scan8[0] - 1 - 1 * 8, topleft_type) \ + MAP_F2F(scan8[0] + 0 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 1 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 2 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 3 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 4 - 1 * 8, topright_type) \ + MAP_F2F(scan8[0] - 1 + 0 * 8, left_type[LTOP]) \ + MAP_F2F(scan8[0] - 1 + 1 * 8, left_type[LTOP]) \ + MAP_F2F(scan8[0] - 1 + 2 * 8, left_type[LBOT]) \ + MAP_F2F(scan8[0] - 1 + 3 * 8, left_type[LBOT]) + + if (FRAME_MBAFF) { + if (MB_FIELD) { + +#define MAP_F2F(idx, mb_type) \ + if (!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) { \ + h->ref_cache[list][idx] <<= 1; \ + h->mv_cache[list][idx][1] /= 2; \ + h->mvd_cache[list][idx][1] >>= 1; \ + } + MAP_MVS + } else { + #undef MAP_F2F - }else{ -#define MAP_F2F(idx, mb_type)\ - if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ - h->ref_cache[list][idx] >>= 1;\ - h->mv_cache[list][idx][1] <<= 1;\ - h->mvd_cache[list][idx][1] <<= 1;\ - } +#define MAP_F2F(idx, mb_type) \ + if (IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) { \ + h->ref_cache[list][idx] >>= 1; \ + h->mv_cache[list][idx][1] <<= 1; \ + h->mvd_cache[list][idx][1] <<= 1; \ + } + MAP_MVS #undef MAP_F2F } @@ -747,36 +792,34 @@ static void fill_decode_caches(H264Context *h, int mb_type){ } } - h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]); + h->neighbor_transform_size = !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]); } /** * decodes a P_SKIP or B_SKIP macroblock */ -static void av_unused decode_mb_skip(H264Context *h){ - MpegEncContext * const s = &h->s; - const int mb_xy= h->mb_xy; - int mb_type=0; +static void av_unused decode_mb_skip(H264Context *h) +{ + MpegEncContext *const s = &h->s; + const int mb_xy = h->mb_xy; + int mb_type = 0; memset(h->non_zero_count[mb_xy], 0, 48); - if(MB_FIELD) - mb_type|= MB_TYPE_INTERLACED; + if (MB_FIELD) + mb_type |= MB_TYPE_INTERLACED; - if( h->slice_type_nos == AV_PICTURE_TYPE_B ) - { + if (h->slice_type_nos == AV_PICTURE_TYPE_B) { // just for fill_caches. pred_direct_motion will set the real mb_type - mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; - if(h->direct_spatial_mv_pred){ + mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 | MB_TYPE_SKIP; + if (h->direct_spatial_mv_pred) { fill_decode_neighbors(h, mb_type); - fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... + fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... } ff_h264_pred_direct_motion(h, &mb_type); - mb_type|= MB_TYPE_SKIP; - } - else - { - mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; + mb_type |= MB_TYPE_SKIP; + } else { + mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_SKIP; fill_decode_neighbors(h, mb_type); pred_pskip_motion(h); @@ -785,8 +828,8 @@ static void av_unused decode_mb_skip(H264Context *h){ write_back_motion(h, mb_type); s->current_picture.f.mb_type[mb_xy] = mb_type; s->current_picture.f.qscale_table[mb_xy] = s->qscale; - h->slice_table[ mb_xy ]= h->slice_num; - h->prev_mb_skipped= 1; + h->slice_table[mb_xy] = h->slice_num; + h->prev_mb_skipped = 1; } #endif /* AVCODEC_H264_MVPRED_H */ diff --git a/libavcodec/h264data.h b/libavcodec/h264data.h index a5ed069e94..60df532658 100644 --- a/libavcodec/h264data.h +++ b/libavcodec/h264data.h @@ -30,240 +30,243 @@ #define AVCODEC_H264DATA_H #include <stdint.h> + #include "libavutil/rational.h" #include "mpegvideo.h" #include "h264.h" +static const uint8_t golomb_to_pict_type[5] = { + AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, AV_PICTURE_TYPE_I, + AV_PICTURE_TYPE_SP, AV_PICTURE_TYPE_SI +}; -static const uint8_t golomb_to_pict_type[5]= -{AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, AV_PICTURE_TYPE_I, AV_PICTURE_TYPE_SP, AV_PICTURE_TYPE_SI}; - -static const uint8_t golomb_to_intra4x4_cbp[48]={ - 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, - 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, - 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41 +static const uint8_t golomb_to_intra4x4_cbp[48] = { + 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, + 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, + 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41 }; -static const uint8_t golomb_to_inter_cbp[48]={ - 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, - 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, - 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41 +static const uint8_t golomb_to_inter_cbp[48] = { + 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, + 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, + 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41 }; -static const uint8_t zigzag_scan[16]={ - 0+0*4, 1+0*4, 0+1*4, 0+2*4, - 1+1*4, 2+0*4, 3+0*4, 2+1*4, - 1+2*4, 0+3*4, 1+3*4, 2+2*4, - 3+1*4, 3+2*4, 2+3*4, 3+3*4, +static const uint8_t zigzag_scan[16] = { + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, + 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, + 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, }; -static const uint8_t field_scan[16]={ - 0+0*4, 0+1*4, 1+0*4, 0+2*4, - 0+3*4, 1+1*4, 1+2*4, 1+3*4, - 2+0*4, 2+1*4, 2+2*4, 2+3*4, - 3+0*4, 3+1*4, 3+2*4, 3+3*4, +static const uint8_t field_scan[16] = { + 0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4, + 0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4, + 2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4, + 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, }; -static const uint8_t luma_dc_zigzag_scan[16]={ - 0*16 + 0*64, 1*16 + 0*64, 2*16 + 0*64, 0*16 + 2*64, - 3*16 + 0*64, 0*16 + 1*64, 1*16 + 1*64, 2*16 + 1*64, - 1*16 + 2*64, 2*16 + 2*64, 3*16 + 2*64, 0*16 + 3*64, - 3*16 + 1*64, 1*16 + 3*64, 2*16 + 3*64, 3*16 + 3*64, +static const uint8_t luma_dc_zigzag_scan[16] = { + 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64, + 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64, + 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64, + 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64, }; -static const uint8_t luma_dc_field_scan[16]={ - 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64, - 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64, - 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64, - 1*16 + 1*64, 3*16 + 1*64, 1*16 + 3*64, 3*16 + 3*64, +static const uint8_t luma_dc_field_scan[16] = { + 0 * 16 + 0 * 64, 2 * 16 + 0 * 64, 1 * 16 + 0 * 64, 0 * 16 + 2 * 64, + 2 * 16 + 2 * 64, 3 * 16 + 0 * 64, 1 * 16 + 2 * 64, 3 * 16 + 2 * 64, + 0 * 16 + 1 * 64, 2 * 16 + 1 * 64, 0 * 16 + 3 * 64, 2 * 16 + 3 * 64, + 1 * 16 + 1 * 64, 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 3 * 16 + 3 * 64, }; -static const uint8_t chroma_dc_scan[4]={ - (0+0*2)*16, (1+0*2)*16, - (0+1*2)*16, (1+1*2)*16, +static const uint8_t chroma_dc_scan[4] = { + (0 + 0 * 2) * 16, (1 + 0 * 2) * 16, + (0 + 1 * 2) * 16, (1 + 1 * 2) * 16, }; -static const uint8_t chroma422_dc_scan[8]={ - (0+0*2)*16, (0+1*2)*16, - (1+0*2)*16, (0+2*2)*16, - (0+3*2)*16, (1+1*2)*16, - (1+2*2)*16, (1+3*2)*16, +static const uint8_t chroma422_dc_scan[8] = { + (0 + 0 * 2) * 16, (0 + 1 * 2) * 16, + (1 + 0 * 2) * 16, (0 + 2 * 2) * 16, + (0 + 3 * 2) * 16, (1 + 1 * 2) * 16, + (1 + 2 * 2) * 16, (1 + 3 * 2) * 16, }; // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] -static const uint8_t zigzag_scan8x8_cavlc[64]={ - 0+0*8, 1+1*8, 1+2*8, 2+2*8, - 4+1*8, 0+5*8, 3+3*8, 7+0*8, - 3+4*8, 1+7*8, 5+3*8, 6+3*8, - 2+7*8, 6+4*8, 5+6*8, 7+5*8, - 1+0*8, 2+0*8, 0+3*8, 3+1*8, - 3+2*8, 0+6*8, 4+2*8, 6+1*8, - 2+5*8, 2+6*8, 6+2*8, 5+4*8, - 3+7*8, 7+3*8, 4+7*8, 7+6*8, - 0+1*8, 3+0*8, 0+4*8, 4+0*8, - 2+3*8, 1+5*8, 5+1*8, 5+2*8, - 1+6*8, 3+5*8, 7+1*8, 4+5*8, - 4+6*8, 7+4*8, 5+7*8, 6+7*8, - 0+2*8, 2+1*8, 1+3*8, 5+0*8, - 1+4*8, 2+4*8, 6+0*8, 4+3*8, - 0+7*8, 4+4*8, 7+2*8, 3+6*8, - 5+5*8, 6+5*8, 6+6*8, 7+7*8, +static const uint8_t zigzag_scan8x8_cavlc[64] = { + 0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8, + 4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8, + 3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8, + 2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8, + 1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8, + 3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8, + 2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8, + 3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8, + 0 + 1 * 8, 3 + 0 * 8, 0 + 4 * 8, 4 + 0 * 8, + 2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8, + 1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8, + 4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8, + 0 + 2 * 8, 2 + 1 * 8, 1 + 3 * 8, 5 + 0 * 8, + 1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8, + 0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8, + 5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8, }; -static const uint8_t field_scan8x8[64]={ - 0+0*8, 0+1*8, 0+2*8, 1+0*8, - 1+1*8, 0+3*8, 0+4*8, 1+2*8, - 2+0*8, 1+3*8, 0+5*8, 0+6*8, - 0+7*8, 1+4*8, 2+1*8, 3+0*8, - 2+2*8, 1+5*8, 1+6*8, 1+7*8, - 2+3*8, 3+1*8, 4+0*8, 3+2*8, - 2+4*8, 2+5*8, 2+6*8, 2+7*8, - 3+3*8, 4+1*8, 5+0*8, 4+2*8, - 3+4*8, 3+5*8, 3+6*8, 3+7*8, - 4+3*8, 5+1*8, 6+0*8, 5+2*8, - 4+4*8, 4+5*8, 4+6*8, 4+7*8, - 5+3*8, 6+1*8, 6+2*8, 5+4*8, - 5+5*8, 5+6*8, 5+7*8, 6+3*8, - 7+0*8, 7+1*8, 6+4*8, 6+5*8, - 6+6*8, 6+7*8, 7+2*8, 7+3*8, - 7+4*8, 7+5*8, 7+6*8, 7+7*8, +static const uint8_t field_scan8x8[64] = { + 0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8, + 1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8, + 2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8, + 0 + 7 * 8, 1 + 4 * 8, 2 + 1 * 8, 3 + 0 * 8, + 2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8, + 2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8, + 2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8, + 3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8, + 3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8, + 4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8, + 4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8, + 5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8, + 5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8, + 7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8, + 6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8, + 7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8, }; -static const uint8_t field_scan8x8_cavlc[64]={ - 0+0*8, 1+1*8, 2+0*8, 0+7*8, - 2+2*8, 2+3*8, 2+4*8, 3+3*8, - 3+4*8, 4+3*8, 4+4*8, 5+3*8, - 5+5*8, 7+0*8, 6+6*8, 7+4*8, - 0+1*8, 0+3*8, 1+3*8, 1+4*8, - 1+5*8, 3+1*8, 2+5*8, 4+1*8, - 3+5*8, 5+1*8, 4+5*8, 6+1*8, - 5+6*8, 7+1*8, 6+7*8, 7+5*8, - 0+2*8, 0+4*8, 0+5*8, 2+1*8, - 1+6*8, 4+0*8, 2+6*8, 5+0*8, - 3+6*8, 6+0*8, 4+6*8, 6+2*8, - 5+7*8, 6+4*8, 7+2*8, 7+6*8, - 1+0*8, 1+2*8, 0+6*8, 3+0*8, - 1+7*8, 3+2*8, 2+7*8, 4+2*8, - 3+7*8, 5+2*8, 4+7*8, 5+4*8, - 6+3*8, 6+5*8, 7+3*8, 7+7*8, +static const uint8_t field_scan8x8_cavlc[64] = { + 0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8, + 2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8, + 3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8, + 5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8, + 0 + 1 * 8, 0 + 3 * 8, 1 + 3 * 8, 1 + 4 * 8, + 1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8, + 3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8, + 5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8, + 0 + 2 * 8, 0 + 4 * 8, 0 + 5 * 8, 2 + 1 * 8, + 1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8, + 3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8, + 5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8, + 1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8, + 1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8, + 3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8, + 6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8, }; -typedef struct IMbInfo{ +typedef struct IMbInfo { uint16_t type; uint8_t pred_mode; uint8_t cbp; } IMbInfo; -static const IMbInfo i_mb_type_info[26]={ -{MB_TYPE_INTRA4x4 , -1, -1}, -{MB_TYPE_INTRA16x16, 2, 0}, -{MB_TYPE_INTRA16x16, 1, 0}, -{MB_TYPE_INTRA16x16, 0, 0}, -{MB_TYPE_INTRA16x16, 3, 0}, -{MB_TYPE_INTRA16x16, 2, 16}, -{MB_TYPE_INTRA16x16, 1, 16}, -{MB_TYPE_INTRA16x16, 0, 16}, -{MB_TYPE_INTRA16x16, 3, 16}, -{MB_TYPE_INTRA16x16, 2, 32}, -{MB_TYPE_INTRA16x16, 1, 32}, -{MB_TYPE_INTRA16x16, 0, 32}, -{MB_TYPE_INTRA16x16, 3, 32}, -{MB_TYPE_INTRA16x16, 2, 15+0}, -{MB_TYPE_INTRA16x16, 1, 15+0}, -{MB_TYPE_INTRA16x16, 0, 15+0}, -{MB_TYPE_INTRA16x16, 3, 15+0}, -{MB_TYPE_INTRA16x16, 2, 15+16}, -{MB_TYPE_INTRA16x16, 1, 15+16}, -{MB_TYPE_INTRA16x16, 0, 15+16}, -{MB_TYPE_INTRA16x16, 3, 15+16}, -{MB_TYPE_INTRA16x16, 2, 15+32}, -{MB_TYPE_INTRA16x16, 1, 15+32}, -{MB_TYPE_INTRA16x16, 0, 15+32}, -{MB_TYPE_INTRA16x16, 3, 15+32}, -{MB_TYPE_INTRA_PCM , -1, -1}, +static const IMbInfo i_mb_type_info[26] = { + { MB_TYPE_INTRA4x4, -1, -1 }, + { MB_TYPE_INTRA16x16, 2, 0 }, + { MB_TYPE_INTRA16x16, 1, 0 }, + { MB_TYPE_INTRA16x16, 0, 0 }, + { MB_TYPE_INTRA16x16, 3, 0 }, + { MB_TYPE_INTRA16x16, 2, 16 }, + { MB_TYPE_INTRA16x16, 1, 16 }, + { MB_TYPE_INTRA16x16, 0, 16 }, + { MB_TYPE_INTRA16x16, 3, 16 }, + { MB_TYPE_INTRA16x16, 2, 32 }, + { MB_TYPE_INTRA16x16, 1, 32 }, + { MB_TYPE_INTRA16x16, 0, 32 }, + { MB_TYPE_INTRA16x16, 3, 32 }, + { MB_TYPE_INTRA16x16, 2, 15 + 0 }, + { MB_TYPE_INTRA16x16, 1, 15 + 0 }, + { MB_TYPE_INTRA16x16, 0, 15 + 0 }, + { MB_TYPE_INTRA16x16, 3, 15 + 0 }, + { MB_TYPE_INTRA16x16, 2, 15 + 16 }, + { MB_TYPE_INTRA16x16, 1, 15 + 16 }, + { MB_TYPE_INTRA16x16, 0, 15 + 16 }, + { MB_TYPE_INTRA16x16, 3, 15 + 16 }, + { MB_TYPE_INTRA16x16, 2, 15 + 32 }, + { MB_TYPE_INTRA16x16, 1, 15 + 32 }, + { MB_TYPE_INTRA16x16, 0, 15 + 32 }, + { MB_TYPE_INTRA16x16, 3, 15 + 32 }, + { MB_TYPE_INTRA_PCM, -1, -1 }, }; -typedef struct PMbInfo{ +typedef struct PMbInfo { uint16_t type; uint8_t partition_count; } PMbInfo; -static const PMbInfo p_mb_type_info[5]={ -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1}, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2}, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2}, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 4}, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_REF0, 4}, +static const PMbInfo p_mb_type_info[5] = { + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1 }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2 }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2 }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 4 }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_REF0, 4 }, }; -static const PMbInfo p_sub_mb_type_info[4]={ -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1}, -{MB_TYPE_16x8 |MB_TYPE_P0L0 , 2}, -{MB_TYPE_8x16 |MB_TYPE_P0L0 , 2}, -{MB_TYPE_8x8 |MB_TYPE_P0L0 , 4}, +static const PMbInfo p_sub_mb_type_info[4] = { + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1 }, + { MB_TYPE_16x8 | MB_TYPE_P0L0, 2 }, + { MB_TYPE_8x16 | MB_TYPE_P0L0, 2 }, + { MB_TYPE_8x8 | MB_TYPE_P0L0, 4 }, }; -static const PMbInfo b_mb_type_info[23]={ -{MB_TYPE_DIRECT2|MB_TYPE_L0L1 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1, }, -{MB_TYPE_16x16 |MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, }, +static const PMbInfo b_mb_type_info[23] = { + { MB_TYPE_DIRECT2 | MB_TYPE_L0L1, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, }, }; -static const PMbInfo b_sub_mb_type_info[13]={ -{MB_TYPE_DIRECT2 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1, }, -{MB_TYPE_16x16 |MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 4, }, -{MB_TYPE_8x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 4, }, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, }, +static const PMbInfo b_sub_mb_type_info[13] = { + { MB_TYPE_DIRECT2, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 4, }, + { MB_TYPE_8x8 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 4, }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, }, }; -static const uint8_t dequant4_coeff_init[6][3]={ - {10,13,16}, - {11,14,18}, - {13,16,20}, - {14,18,23}, - {16,20,25}, - {18,23,29}, +static const uint8_t dequant4_coeff_init[6][3] = { + { 10, 13, 16 }, + { 11, 14, 18 }, + { 13, 16, 20 }, + { 14, 18, 23 }, + { 16, 20, 25 }, + { 18, 23, 29 }, }; static const uint8_t dequant8_coeff_init_scan[16] = { - 0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1 + 0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1 }; -static const uint8_t dequant8_coeff_init[6][6]={ - {20,18,32,19,25,24}, - {22,19,35,21,28,26}, - {26,23,42,24,33,31}, - {28,25,45,26,35,33}, - {32,28,51,30,40,38}, - {36,32,58,34,46,43}, + +static const uint8_t dequant8_coeff_init[6][6] = { + { 20, 18, 32, 19, 25, 24 }, + { 22, 19, 35, 21, 28, 26 }, + { 26, 23, 42, 24, 33, 31 }, + { 28, 25, 45, 26, 35, 33 }, + { 32, 28, 51, 30, 40, 38 }, + { 36, 32, 58, 34, 46, 43 }, }; #endif /* AVCODEC_H264DATA_H */ diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h index d7c192012a..45f81a09c8 100644 --- a/libavcodec/h264dsp.h +++ b/libavcodec/h264dsp.h @@ -28,56 +28,90 @@ #define AVCODEC_H264DSP_H #include <stdint.h> + #include "dsputil.h" typedef void (*h264_weight_func)(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset); -typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height, - int log2_denom, int weightd, int weights, int offset); +typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, + int stride, int height, int log2_denom, + int weightd, int weights, int offset); /** * Context for storing H.264 DSP functions */ -typedef struct H264DSPContext{ +typedef struct H264DSPContext { /* weighted MC */ h264_weight_func weight_h264_pixels_tab[4]; h264_biweight_func biweight_h264_pixels_tab[4]; /* loop filter */ - void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, int stride, + int alpha, int beta, int8_t *tc0); /* v/h_loop_filter_luma_intra: align 16 */ - void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta); - void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); + void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, + int alpha, int beta); + void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, + int alpha, int beta); + void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/, + int stride, int alpha, int beta); + void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta, + int8_t *tc0); + void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); // h264_loop_filter_strength: simd only. the C version is inlined in h264.c - void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], - int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); + void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], + int8_t ref[2][40], int16_t mv[2][40][2], + int bidir, int edges, int step, + int mask_mv0, int mask_mv1, int field); /* IDCT */ - void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); + void (*h264_idct_add)(uint8_t *dst /*align 4*/, + DCTELEM *block /*align 16*/, int stride); + void (*h264_idct8_add)(uint8_t *dst /*align 8*/, + DCTELEM *block /*align 16*/, int stride); + void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/, + DCTELEM *block /*align 16*/, int stride); + void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/, + DCTELEM *block /*align 16*/, int stride); - void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul); + void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, + int stride, const uint8_t nnzc[15 * 8]); + void (*h264_luma_dc_dequant_idct)(DCTELEM *output, + DCTELEM *input /*align 16*/, int qmul); void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul); -}H264DSPContext; +} H264DSPContext; -void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); -void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); -void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); -void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); +void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); #endif /* AVCODEC_H264DSP_H */ diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h index 599cdb228b..d68f39bf8c 100644 --- a/libavcodec/h264pred.h +++ b/libavcodec/h264pred.h @@ -35,18 +35,18 @@ * Prediction types */ //@{ -#define VERT_PRED 0 -#define HOR_PRED 1 -#define DC_PRED 2 -#define DIAG_DOWN_LEFT_PRED 3 -#define DIAG_DOWN_RIGHT_PRED 4 -#define VERT_RIGHT_PRED 5 -#define HOR_DOWN_PRED 6 -#define VERT_LEFT_PRED 7 -#define HOR_UP_PRED 8 +#define VERT_PRED 0 +#define HOR_PRED 1 +#define DC_PRED 2 +#define DIAG_DOWN_LEFT_PRED 3 +#define DIAG_DOWN_RIGHT_PRED 4 +#define VERT_RIGHT_PRED 5 +#define HOR_DOWN_PRED 6 +#define VERT_LEFT_PRED 7 +#define HOR_UP_PRED 8 // DC edge (not for VP8) -#define LEFT_DC_PRED 9 +#define LEFT_DC_PRED 9 #define TOP_DC_PRED 10 #define DC_128_PRED 11 @@ -56,7 +56,7 @@ #define VERT_LEFT_PRED_RV40_NODOWN 14 // VP8 specific -#define TM_VP8_PRED 9 ///< "True Motion", used instead of plane +#define TM_VP8_PRED 9 ///< "True Motion", used instead of plane #define VERT_VP8_PRED 10 ///< for VP8, #VERT_PRED is the average of ///< (left col+cur col x2+right col) / 4; ///< this is the "unaveraged" one @@ -65,44 +65,53 @@ #define DC_127_PRED 12 #define DC_129_PRED 13 -#define DC_PRED8x8 0 -#define HOR_PRED8x8 1 -#define VERT_PRED8x8 2 -#define PLANE_PRED8x8 3 +#define DC_PRED8x8 0 +#define HOR_PRED8x8 1 +#define VERT_PRED8x8 2 +#define PLANE_PRED8x8 3 // DC edge -#define LEFT_DC_PRED8x8 4 -#define TOP_DC_PRED8x8 5 -#define DC_128_PRED8x8 6 +#define LEFT_DC_PRED8x8 4 +#define TOP_DC_PRED8x8 5 +#define DC_128_PRED8x8 6 // H264/SVQ3 (8x8) specific -#define ALZHEIMER_DC_L0T_PRED8x8 7 -#define ALZHEIMER_DC_0LT_PRED8x8 8 -#define ALZHEIMER_DC_L00_PRED8x8 9 +#define ALZHEIMER_DC_L0T_PRED8x8 7 +#define ALZHEIMER_DC_0LT_PRED8x8 8 +#define ALZHEIMER_DC_L00_PRED8x8 9 #define ALZHEIMER_DC_0L0_PRED8x8 10 // VP8 specific -#define DC_127_PRED8x8 7 -#define DC_129_PRED8x8 8 +#define DC_127_PRED8x8 7 +#define DC_129_PRED8x8 8 //@} /** * Context for storing H.264 prediction functions */ -typedef struct H264PredContext{ - void (*pred4x4 [9+3+3])(uint8_t *src, const uint8_t *topright, int stride);//FIXME move to dsp? - void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride); - void (*pred8x8 [4+3+4])(uint8_t *src, int stride); - void (*pred16x16[4+3+2])(uint8_t *src, int stride); +typedef struct H264PredContext { + void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright, int stride); //FIXME move to dsp? + void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright, int stride); + void(*pred8x8[4 + 3 + 4])(uint8_t *src, int stride); + void(*pred16x16[4 + 3 + 2])(uint8_t *src, int stride); - void (*pred4x4_add [2])(uint8_t *pix/*align 4*/, const DCTELEM *block/*align 16*/, int stride); - void (*pred8x8l_add [2])(uint8_t *pix/*align 8*/, const DCTELEM *block/*align 16*/, int stride); - void (*pred8x8_add [3])(uint8_t *pix/*align 8*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); - void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); -}H264PredContext; + void(*pred4x4_add[2])(uint8_t *pix /*align 4*/, + const DCTELEM *block /*align 16*/, int stride); + void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/, + const DCTELEM *block /*align 16*/, int stride); + void(*pred8x8_add[3])(uint8_t *pix /*align 8*/, + const int *block_offset, + const DCTELEM *block /*align 16*/, int stride); + void(*pred16x16_add[3])(uint8_t *pix /*align 16*/, + const int *block_offset, + const DCTELEM *block /*align 16*/, int stride); +} H264PredContext; -void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); -void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); -void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); #endif /* AVCODEC_H264PRED_H */ diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c index 2a25982531..13aded9122 100644 --- a/libavcodec/vcr1.c +++ b/libavcodec/vcr1.c @@ -21,39 +21,57 @@ /** * @file - * ati vcr1 codec. + * ATI VCR1 codec */ #include "avcodec.h" #include "dsputil.h" -//#undef NDEBUG -//#include <assert.h> - -/* Disable the encoder. */ -#undef CONFIG_VCR1_ENCODER -#define CONFIG_VCR1_ENCODER 0 - -typedef struct VCR1Context{ - AVCodecContext *avctx; +typedef struct VCR1Context { AVFrame picture; int delta[16]; int offset[4]; } VCR1Context; -static int decode_frame(AVCodecContext *avctx, - void *data, int *data_size, - AVPacket *avpkt) +static av_cold void common_init(AVCodecContext *avctx) +{ + VCR1Context *const a = avctx->priv_data; + + avctx->coded_frame = &a->picture; + avcodec_get_frame_defaults(&a->picture); +} + +static av_cold int decode_init(AVCodecContext *avctx) +{ + common_init(avctx); + + avctx->pix_fmt = PIX_FMT_YUV410P; + + return 0; +} + +static av_cold int decode_end(AVCodecContext *avctx) +{ + VCR1Context *s = avctx->priv_data; + + if (s->picture.data[0]) + avctx->release_buffer(avctx, &s->picture); + + return 0; +} + +static int decode_frame(AVCodecContext *avctx, void *data, + int *data_size, AVPacket *avpkt) { - const uint8_t *buf = avpkt->data; - int buf_size = avpkt->size; - VCR1Context * const a = avctx->priv_data; - AVFrame *picture = data; - AVFrame * const p = &a->picture; - const uint8_t *bytestream= buf; + const uint8_t *buf = avpkt->data; + int buf_size = avpkt->size; + VCR1Context *const a = avctx->priv_data; + AVFrame *picture = data; + AVFrame *const p = &a->picture; + const uint8_t *bytestream = buf; int i, x, y; - if(p->data[0]) + if (p->data[0]) avctx->release_buffer(avctx, p); if(buf_size < 16 + avctx->height + avctx->width*avctx->height*5/8){ @@ -61,57 +79,57 @@ static int decode_frame(AVCodecContext *avctx, return AVERROR(EINVAL); } - p->reference= 0; - if(avctx->get_buffer(avctx, p) < 0){ + p->reference = 0; + if (avctx->get_buffer(avctx, p) < 0) { av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return -1; } - p->pict_type= AV_PICTURE_TYPE_I; - p->key_frame= 1; + p->pict_type = AV_PICTURE_TYPE_I; + p->key_frame = 1; - for(i=0; i<16; i++){ - a->delta[i]= *(bytestream++); + for (i = 0; i < 16; i++) { + a->delta[i] = *bytestream++; bytestream++; } - for(y=0; y<avctx->height; y++){ + for (y = 0; y < avctx->height; y++) { int offset; - uint8_t *luma= &a->picture.data[0][ y*a->picture.linesize[0] ]; + uint8_t *luma = &a->picture.data[0][y * a->picture.linesize[0]]; - if((y&3) == 0){ - uint8_t *cb= &a->picture.data[1][ (y>>2)*a->picture.linesize[1] ]; - uint8_t *cr= &a->picture.data[2][ (y>>2)*a->picture.linesize[2] ]; + if ((y & 3) == 0) { + uint8_t *cb = &a->picture.data[1][(y >> 2) * a->picture.linesize[1]]; + uint8_t *cr = &a->picture.data[2][(y >> 2) * a->picture.linesize[2]]; - for(i=0; i<4; i++) - a->offset[i]= *(bytestream++); + for (i = 0; i < 4; i++) + a->offset[i] = *bytestream++; - offset= a->offset[0] - a->delta[ bytestream[2]&0xF ]; - for(x=0; x<avctx->width; x+=4){ - luma[0]=( offset += a->delta[ bytestream[2]&0xF ]); - luma[1]=( offset += a->delta[ bytestream[2]>>4 ]); - luma[2]=( offset += a->delta[ bytestream[0]&0xF ]); - luma[3]=( offset += a->delta[ bytestream[0]>>4 ]); - luma += 4; + offset = a->offset[0] - a->delta[bytestream[2] & 0xF]; + for (x = 0; x < avctx->width; x += 4) { + luma[0] = offset += a->delta[bytestream[2] & 0xF]; + luma[1] = offset += a->delta[bytestream[2] >> 4]; + luma[2] = offset += a->delta[bytestream[0] & 0xF]; + luma[3] = offset += a->delta[bytestream[0] >> 4]; + luma += 4; - *(cb++) = bytestream[3]; - *(cr++) = bytestream[1]; + *cb++ = bytestream[3]; + *cr++ = bytestream[1]; - bytestream+= 4; + bytestream += 4; } - }else{ - offset= a->offset[y&3] - a->delta[ bytestream[2]&0xF ]; - - for(x=0; x<avctx->width; x+=8){ - luma[0]=( offset += a->delta[ bytestream[2]&0xF ]); - luma[1]=( offset += a->delta[ bytestream[2]>>4 ]); - luma[2]=( offset += a->delta[ bytestream[3]&0xF ]); - luma[3]=( offset += a->delta[ bytestream[3]>>4 ]); - luma[4]=( offset += a->delta[ bytestream[0]&0xF ]); - luma[5]=( offset += a->delta[ bytestream[0]>>4 ]); - luma[6]=( offset += a->delta[ bytestream[1]&0xF ]); - luma[7]=( offset += a->delta[ bytestream[1]>>4 ]); - luma += 8; - bytestream+= 4; + } else { + offset = a->offset[y & 3] - a->delta[bytestream[2] & 0xF]; + + for (x = 0; x < avctx->width; x += 8) { + luma[0] = offset += a->delta[bytestream[2] & 0xF]; + luma[1] = offset += a->delta[bytestream[2] >> 4]; + luma[2] = offset += a->delta[bytestream[3] & 0xF]; + luma[3] = offset += a->delta[bytestream[3] >> 4]; + luma[4] = offset += a->delta[bytestream[0] & 0xF]; + luma[5] = offset += a->delta[bytestream[0] >> 4]; + luma[6] = offset += a->delta[bytestream[1] & 0xF]; + luma[7] = offset += a->delta[bytestream[1] >> 4]; + luma += 8; + bytestream += 4; } } } @@ -122,62 +140,6 @@ static int decode_frame(AVCodecContext *avctx, return buf_size; } -#if CONFIG_VCR1_ENCODER -static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ - VCR1Context * const a = avctx->priv_data; - AVFrame *pict = data; - AVFrame * const p = &a->picture; - int size; - - *p = *pict; - p->pict_type= AV_PICTURE_TYPE_I; - p->key_frame= 1; - - avpriv_align_put_bits(&a->pb); - while(get_bit_count(&a->pb)&31) - put_bits(&a->pb, 8, 0); - - size= get_bit_count(&a->pb)/32; - - return size*4; -} -#endif - -static av_cold void common_init(AVCodecContext *avctx){ - VCR1Context * const a = avctx->priv_data; - - avctx->coded_frame = &a->picture; - avcodec_get_frame_defaults(&a->picture); - a->avctx= avctx; -} - -static av_cold int decode_init(AVCodecContext *avctx){ - - common_init(avctx); - - avctx->pix_fmt= PIX_FMT_YUV410P; - - return 0; -} - -static av_cold int decode_end(AVCodecContext *avctx){ - VCR1Context *s = avctx->priv_data; - - if (s->picture.data[0]) - avctx->release_buffer(avctx, &s->picture); - - return 0; -} - -#if CONFIG_VCR1_ENCODER -static av_cold int encode_init(AVCodecContext *avctx){ - - common_init(avctx); - - return 0; -} -#endif - AVCodec ff_vcr1_decoder = { .name = "vcr1", .type = AVMEDIA_TYPE_VIDEO, @@ -190,14 +152,39 @@ AVCodec ff_vcr1_decoder = { .long_name = NULL_IF_CONFIG_SMALL("ATI VCR1"), }; +/* Disable the encoder. */ +#undef CONFIG_VCR1_ENCODER +#define CONFIG_VCR1_ENCODER 0 + #if CONFIG_VCR1_ENCODER +static int encode_frame(AVCodecContext *avctx, unsigned char *buf, + int buf_size, void *data) +{ + VCR1Context *const a = avctx->priv_data; + AVFrame *pict = data; + AVFrame *const p = &a->picture; + int size; + + *p = *pict; + p->pict_type = AV_PICTURE_TYPE_I; + p->key_frame = 1; + + avpriv_align_put_bits(&a->pb); + while (get_bit_count(&a->pb) & 31) + put_bits(&a->pb, 8, 0); + + size = get_bit_count(&a->pb) / 32; + + return size * 4; +} + AVCodec ff_vcr1_encoder = { .name = "vcr1", .type = AVMEDIA_TYPE_VIDEO, .id = CODEC_ID_VCR1, .priv_data_size = sizeof(VCR1Context), - .init = encode_init, + .init = common_init, .encode = encode_frame, .long_name = NULL_IF_CONFIG_SMALL("ATI VCR1"), }; -#endif +#endif /* CONFIG_VCR1_ENCODER */ diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 68df954897..e6cd161bfd 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1792,6 +1792,22 @@ QPEL_2TAP(avg_, 16, 3dnow) QPEL_2TAP(put_, 8, 3dnow) QPEL_2TAP(avg_, 8, 3dnow) +void ff_put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + put_pixels8_xy2_mmx(dst, src, stride, 8); +} +void ff_put_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + put_pixels16_xy2_mmx(dst, src, stride, 16); +} +void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + avg_pixels8_xy2_mmx(dst, src, stride, 8); +} +void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + avg_pixels16_xy2_mmx(dst, src, stride, 16); +} #if HAVE_YASM typedef void emu_edge_core_func(uint8_t *buf, const uint8_t *src, diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h index 6ba5ea83bc..91940f6ee8 100644 --- a/libavcodec/x86/dsputil_mmx.h +++ b/libavcodec/x86/dsputil_mmx.h @@ -199,6 +199,11 @@ void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd); +void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); +void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); +void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); +void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); + void ff_mmx_idct(DCTELEM *block); void ff_mmxext_idct(DCTELEM *block); diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index e8acfb25fe..9b50940d4d 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -1,5 +1,7 @@ ;****************************************************************************** ;* MMX/SSE2-optimized functions for the RV40 decoder +;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com> +;* Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com> ;* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet@gmail.com> ;* ;* This file is part of Libav. @@ -25,11 +27,319 @@ SECTION_RODATA align 16 -shift_round: times 8 dw 1 << (16 - 6) -cextern pw_16 +pw_1024: times 8 dw 1 << (16 - 6) ; pw_1024 + +sixtap_filter_hb_m: times 8 db 1, -5 + times 8 db 52, 20 + ; multiplied by 2 to have the same shift + times 8 db 2, -10 + times 8 db 40, 40 + ; back to normal + times 8 db 1, -5 + times 8 db 20, 52 + +sixtap_filter_v_m: times 8 dw 1 + times 8 dw -5 + times 8 dw 52 + times 8 dw 20 + ; multiplied by 2 to have the same shift + times 8 dw 2 + times 8 dw -10 + times 8 dw 40 + times 8 dw 40 + ; back to normal + times 8 dw 1 + times 8 dw -5 + times 8 dw 20 + times 8 dw 52 + +%ifdef PIC +%define sixtap_filter_hw picregq +%define sixtap_filter_hb picregq +%define sixtap_filter_v picregq +%define npicregs 1 +%else +%define sixtap_filter_hw sixtap_filter_hw_m +%define sixtap_filter_hb sixtap_filter_hb_m +%define sixtap_filter_v sixtap_filter_v_m +%define npicregs 0 +%endif + +filter_h6_shuf1: db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 +filter_h6_shuf2: db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 +filter_h6_shuf3: db 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, 11 + +cextern pw_32 +cextern pw_16 +cextern pw_512 SECTION .text +;----------------------------------------------------------------------------- +; subpel MC functions: +; +; void [put|rv40]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, int deststride, +; uint8_t *src, int srcstride, +; int len, int m); +;---------------------------------------------------------------------- +%macro LOAD 2 +%if WIN64 + movsxd %1q, %1d +%endif +%ifdef PIC + add %1q, picregq +%else + add %1q, %2 +%endif +%endmacro + +%macro STORE 3 +%ifidn %3, avg + movh %2, [dstq] +%endif + packuswb %1, %1 +%ifidn %3, avg +%if cpuflag(3dnow) + pavgusb %1, %2 +%else + pavgb %1, %2 +%endif +%endif + movh [dstq], %1 +%endmacro + +%macro FILTER_V 1 +cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height, my, picreg +%ifdef PIC + lea picregq, [sixtap_filter_v_m] +%endif + pxor m7, m7 + LOAD my, sixtap_filter_v + + ; read 5 lines + sub srcq, srcstrideq + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+srcstrideq] + movh m2, [srcq+srcstrideq*2] + lea srcq, [srcq+srcstrideq*2] + add srcq, srcstrideq + movh m3, [srcq] + movh m4, [srcq+srcstrideq] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + punpcklbw m4, m7 + +%ifdef m8 + mova m8, [myq+ 0] + mova m9, [myq+16] + mova m10, [myq+32] + mova m11, [myq+48] +%define COEFF05 m8 +%define COEFF14 m9 +%define COEFF2 m10 +%define COEFF3 m11 +%else +%define COEFF05 [myq+ 0] +%define COEFF14 [myq+16] +%define COEFF2 [myq+32] +%define COEFF3 [myq+48] +%endif +.nextrow: + mova m6, m1 + movh m5, [srcq+2*srcstrideq] ; read new row + paddw m6, m4 + punpcklbw m5, m7 + pmullw m6, COEFF14 + paddw m0, m5 + pmullw m0, COEFF05 + paddw m6, m0 + mova m0, m1 + paddw m6, [pw_32] + mova m1, m2 + pmullw m2, COEFF2 + paddw m6, m2 + mova m2, m3 + pmullw m3, COEFF3 + paddw m6, m3 + + ; round/clip/store + mova m3, m4 + psraw m6, 6 + mova m4, m5 + STORE m6, m5, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET +%endmacro + +%macro FILTER_H 1 +cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, height, mx, picreg +%ifdef PIC + lea picregq, [sixtap_filter_v_m] +%endif + pxor m7, m7 + LOAD mx, sixtap_filter_v + mova m6, [pw_32] +%ifdef m8 + mova m8, [mxq+ 0] + mova m9, [mxq+16] + mova m10, [mxq+32] + mova m11, [mxq+48] +%define COEFF05 m8 +%define COEFF14 m9 +%define COEFF2 m10 +%define COEFF3 m11 +%else +%define COEFF05 [mxq+ 0] +%define COEFF14 [mxq+16] +%define COEFF2 [mxq+32] +%define COEFF3 [mxq+48] +%endif +.nextrow: + movq m0, [srcq-2] + movq m5, [srcq+3] + movq m1, [srcq-1] + movq m4, [srcq+2] + punpcklbw m0, m7 + punpcklbw m5, m7 + punpcklbw m1, m7 + punpcklbw m4, m7 + movq m2, [srcq-0] + movq m3, [srcq+1] + paddw m0, m5 + paddw m1, m4 + punpcklbw m2, m7 + punpcklbw m3, m7 + pmullw m0, COEFF05 + pmullw m1, COEFF14 + pmullw m2, COEFF2 + pmullw m3, COEFF3 + paddw m0, m6 + paddw m1, m2 + paddw m0, m3 + paddw m0, m1 + psraw m0, 6 + STORE m0, m1, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +FILTER_V put +FILTER_H put + +INIT_MMX mmx2 +FILTER_V avg +FILTER_H avg + +INIT_MMX 3dnow +FILTER_V avg +FILTER_H avg +%endif + +INIT_XMM sse2 +FILTER_H put +FILTER_H avg +FILTER_V put +FILTER_V avg + +%macro FILTER_SSSE3 1 +cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height, my, picreg +%ifdef PIC + lea picregq, [sixtap_filter_hb_m] +%endif + + ; read 5 lines + sub srcq, srcstrideq + LOAD my, sixtap_filter_hb + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+srcstrideq] + movh m2, [srcq+srcstrideq*2] + lea srcq, [srcq+srcstrideq*2] + add srcq, srcstrideq + mova m5, [myq] + movh m3, [srcq] + movh m4, [srcq+srcstrideq] + lea srcq, [srcq+2*srcstrideq] + +.nextrow: + mova m6, m2 + punpcklbw m0, m1 + punpcklbw m6, m3 + pmaddubsw m0, m5 + pmaddubsw m6, [myq+16] + movh m7, [srcq] ; read new row + paddw m6, m0 + mova m0, m1 + mova m1, m2 + mova m2, m3 + mova m3, m4 + mova m4, m7 + punpcklbw m7, m3 + pmaddubsw m7, m5 + paddw m6, m7 + pmulhrsw m6, [pw_512] + STORE m6, m7, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET + +cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg +%ifdef PIC + lea picregq, [sixtap_filter_hb_m] +%endif + mova m3, [filter_h6_shuf2] + mova m4, [filter_h6_shuf3] + LOAD mx, sixtap_filter_hb + mova m5, [mxq] ; set up 6tap filter in bytes + mova m6, [mxq+16] + mova m7, [filter_h6_shuf1] + +.nextrow: + movu m0, [srcq-2] + mova m1, m0 + mova m2, m0 + pshufb m0, m7 + pshufb m1, m3 + pshufb m2, m4 + pmaddubsw m0, m5 + pmaddubsw m1, m6 + pmaddubsw m2, m5 + paddw m0, m1 + paddw m0, m2 + pmulhrsw m0, [pw_512] + STORE m0, m1, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET +%endmacro + +INIT_XMM ssse3 +FILTER_SSSE3 put +FILTER_SSSE3 avg + ; %1=5bits weights?, %2=dst %3=src1 %4=src3 %5=stride if sse2 %macro RV40_WCORE 4-5 movh m4, [%3 + r6 + 0] @@ -143,7 +453,7 @@ SECTION .text %macro RV40_WEIGHT 3 cglobal rv40_weight_func_%1_%2, 6, 7, 8 %if cpuflag(ssse3) - mova m1, [shift_round] + mova m1, [pw_1024] %else mova m1, [pw_16] %endif diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c index df468aa9e5..3f42363e4e 100644 --- a/libavcodec/x86/rv40dsp_init.c +++ b/libavcodec/x86/rv40dsp_init.c @@ -22,8 +22,11 @@ /** * @file * RV40 decoder motion compensation functions x86-optimised + * 2,0 and 0,2 have h264 equivalents. + * 3,3 is bugged in the rv40 format and maps to _xy2 version */ +#include "libavcodec/x86/dsputil_mmx.h" #include "libavcodec/rv34dsp.h" void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, @@ -53,6 +56,132 @@ DECLARE_WEIGHT(mmx) DECLARE_WEIGHT(sse2) DECLARE_WEIGHT(ssse3) +/** @{ */ +/** + * Define one qpel function. + * LOOPSIZE must be already set to the number of pixels processed per + * iteration in the inner loop of the called functions. + * COFF(x) must be already defined so as to provide the offset into any + * array of coeffs used by the called function for the qpel position x. + */ +#define QPEL_FUNC_DECL(OP, SIZE, PH, PV, OPT) \ +static void OP ## rv40_qpel ##SIZE ##_mc ##PH ##PV ##OPT(uint8_t *dst, \ + uint8_t *src, \ + int stride) \ +{ \ + int i; \ + if (PH && PV) { \ + DECLARE_ALIGNED(16, uint8_t, tmp)[SIZE * (SIZE + 5)]; \ + uint8_t *tmpptr = tmp + SIZE * 2; \ + src -= stride * 2; \ + \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_put_rv40_qpel_h ##OPT(tmp + i, SIZE, src + i, stride, \ + SIZE + 5, HCOFF(PH)); \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_ ##OP ##rv40_qpel_v ##OPT(dst + i, stride, tmpptr + i, \ + SIZE, SIZE, VCOFF(PV)); \ + } else if (PV) { \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_ ##OP ##rv40_qpel_v ## OPT(dst + i, stride, src + i, \ + stride, SIZE, VCOFF(PV)); \ + } else { \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_ ##OP ##rv40_qpel_h ## OPT(dst + i, stride, src + i, \ + stride, SIZE, HCOFF(PH)); \ + } \ +}; + +/** Declare functions for sizes 8 and 16 and given operations + * and qpel position. */ +#define QPEL_FUNCS_DECL(OP, PH, PV, OPT) \ + QPEL_FUNC_DECL(OP, 8, PH, PV, OPT) \ + QPEL_FUNC_DECL(OP, 16, PH, PV, OPT) + +/** Declare all functions for all sizes and qpel positions */ +#define QPEL_MC_DECL(OP, OPT) \ +void ff_ ##OP ##rv40_qpel_h ##OPT(uint8_t *dst, ptrdiff_t dstStride, \ + const uint8_t *src, \ + ptrdiff_t srcStride, \ + int len, int m); \ +void ff_ ##OP ##rv40_qpel_v ##OPT(uint8_t *dst, ptrdiff_t dstStride, \ + const uint8_t *src, \ + ptrdiff_t srcStride, \ + int len, int m); \ +QPEL_FUNCS_DECL(OP, 0, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 0, 3, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 0, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 2, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 3, OPT) \ +QPEL_FUNCS_DECL(OP, 2, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 2, 2, OPT) \ +QPEL_FUNCS_DECL(OP, 2, 3, OPT) \ +QPEL_FUNCS_DECL(OP, 3, 0, OPT) \ +QPEL_FUNCS_DECL(OP, 3, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 3, 2, OPT) +/** @} */ + +#define LOOPSIZE 8 +#define HCOFF(x) (32 * (x - 1)) +#define VCOFF(x) (32 * (x - 1)) +QPEL_MC_DECL(put_, _ssse3) +QPEL_MC_DECL(avg_, _ssse3) + +#undef LOOPSIZE +#undef HCOFF +#undef VCOFF +#define LOOPSIZE 8 +#define HCOFF(x) (64 * (x - 1)) +#define VCOFF(x) (64 * (x - 1)) +QPEL_MC_DECL(put_, _sse2) +QPEL_MC_DECL(avg_, _sse2) + +#if ARCH_X86_32 +#undef LOOPSIZE +#undef HCOFF +#undef VCOFF +#define LOOPSIZE 4 +#define HCOFF(x) (64 * (x - 1)) +#define VCOFF(x) (64 * (x - 1)) + +QPEL_MC_DECL(put_, _mmx) + +#define ff_put_rv40_qpel_h_mmx2 ff_put_rv40_qpel_h_mmx +#define ff_put_rv40_qpel_v_mmx2 ff_put_rv40_qpel_v_mmx +QPEL_MC_DECL(avg_, _mmx2) + +#define ff_put_rv40_qpel_h_3dnow ff_put_rv40_qpel_h_mmx +#define ff_put_rv40_qpel_v_3dnow ff_put_rv40_qpel_v_mmx +QPEL_MC_DECL(avg_, _3dnow) +#endif + +/** @{ */ +/** Set one function */ +#define QPEL_FUNC_SET(OP, SIZE, PH, PV, OPT) \ + c-> OP ## pixels_tab[2 - SIZE / 8][4 * PV + PH] = OP ## rv40_qpel ##SIZE ## _mc ##PH ##PV ##OPT; + +/** Set functions put and avg for sizes 8 and 16 and a given qpel position */ +#define QPEL_FUNCS_SET(OP, PH, PV, OPT) \ + QPEL_FUNC_SET(OP, 8, PH, PV, OPT) \ + QPEL_FUNC_SET(OP, 16, PH, PV, OPT) + +/** Set all functions for all sizes and qpel positions */ +#define QPEL_MC_SET(OP, OPT) \ +QPEL_FUNCS_SET (OP, 0, 1, OPT) \ +QPEL_FUNCS_SET (OP, 0, 3, OPT) \ +QPEL_FUNCS_SET (OP, 1, 0, OPT) \ +QPEL_FUNCS_SET (OP, 1, 1, OPT) \ +QPEL_FUNCS_SET (OP, 1, 2, OPT) \ +QPEL_FUNCS_SET (OP, 1, 3, OPT) \ +QPEL_FUNCS_SET (OP, 2, 1, OPT) \ +QPEL_FUNCS_SET (OP, 2, 2, OPT) \ +QPEL_FUNCS_SET (OP, 2, 3, OPT) \ +QPEL_FUNCS_SET (OP, 3, 0, OPT) \ +QPEL_FUNCS_SET (OP, 3, 1, OPT) \ +QPEL_FUNCS_SET (OP, 3, 2, OPT) +/** @} */ + void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) { #if HAVE_YASM @@ -65,25 +194,42 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_mmx; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_mmx; c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_mmx; + c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_mmx; + c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_mmx; + c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_mmx; + c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_mmx; +#if ARCH_X86_32 + QPEL_MC_SET(put_, _mmx) +#endif } if (mm_flags & AV_CPU_FLAG_MMX2) { c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; +#if ARCH_X86_32 + QPEL_MC_SET(avg_, _mmx2) +#endif } else if (mm_flags & AV_CPU_FLAG_3DNOW) { c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow; +#if ARCH_X86_32 + QPEL_MC_SET(avg_, _3dnow) +#endif } if (mm_flags & AV_CPU_FLAG_SSE2) { c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2; c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_sse2; + QPEL_MC_SET(put_, _sse2) + QPEL_MC_SET(avg_, _sse2) } if (mm_flags & AV_CPU_FLAG_SSSE3) { c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3; c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_ssse3; + QPEL_MC_SET(put_, _ssse3) + QPEL_MC_SET(avg_, _ssse3) } #endif } |