aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKostya Shishkov <kostya.shishkov@gmail.com>2023-08-11 18:27:48 +0200
committerKostya Shishkov <kostya.shishkov@gmail.com>2023-08-11 18:27:48 +0200
commitfe64781def821c3900abf44bdfbb38f3b3d21345 (patch)
tree579eca436786160b581e877394a05e24098dd7a1
parent5f223cdb5a7834fe58bf05d4dd0da36325f4f11c (diff)
downloadnihav-fe64781def821c3900abf44bdfbb38f3b3d21345.tar.gz
h264: more micro-optimisations
* split IDCT function so it has only two parameters * evade instantiating frame references for each MB or even sub-block * other small code changes
-rw-r--r--nihav-itu/src/codecs/h264/decoder_mt.rs56
-rw-r--r--nihav-itu/src/codecs/h264/decoder_st.rs121
-rw-r--r--nihav-itu/src/codecs/h264/dsp/mc/mod.rs16
-rw-r--r--nihav-itu/src/codecs/h264/dsp/mod.rs25
-rw-r--r--nihav-itu/src/codecs/h264/types.rs5
5 files changed, 143 insertions, 80 deletions
diff --git a/nihav-itu/src/codecs/h264/decoder_mt.rs b/nihav-itu/src/codecs/h264/decoder_mt.rs
index 1824042..f66ce2f 100644
--- a/nihav-itu/src/codecs/h264/decoder_mt.rs
+++ b/nihav-itu/src/codecs/h264/decoder_mt.rs
@@ -39,17 +39,19 @@ impl FrameDecoder {
let sslice_refs = SimplifiedSliceRefs::new(refs);
let mut br = BitReader::new(&nal[hdr_size / 8..], BitReaderMode::BE);
+ let mut dst_pic = self.cur_pic.clone();
+ let mut dst_frm = NASimpleVideoFrame::from_video_buf(&mut dst_pic.buf).unwrap();
if !self.pps.entropy_coding_mode {
br.skip((hdr_size & 7) as u32)?;
- self.decode_slice_cavlc(&mut br, full_size - (hdr_size & !7), hdr, &sslice_refs)
+ self.decode_slice_cavlc(&mut br, full_size - (hdr_size & !7), hdr, &sslice_refs, &mut dst_frm)
} else {
let csrc = &nal[(hdr_size + 7) / 8..];
validate!(csrc.len() >= 2);
let mut cabac = CABAC::new(csrc, hdr.slice_type, hdr.slice_qp, hdr.cabac_init_idc as usize)?;
- self.decode_slice_cabac(&mut cabac, hdr, &sslice_refs)
+ self.decode_slice_cabac(&mut cabac, hdr, &sslice_refs, &mut dst_frm)
}
}
- fn decode_slice_cavlc(&mut self, br: &mut BitReader, full_size: usize, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs) -> DecoderResult<usize> {
+ fn decode_slice_cavlc(&mut self, br: &mut BitReader, full_size: usize, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame<u8>) -> DecoderResult<usize> {
const INTRA_CBP: [u8; 48] = [
47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46,
16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4,
@@ -79,7 +81,7 @@ impl FrameDecoder {
validate!(mb_idx + mb_skip_run <= self.num_mbs);
mb_info.mb_type = skip_type;
for _ in 0..mb_skip_run {
- self.handle_macroblock(slice_hdr, &mut mb_info, refs)?;
+ self.handle_macroblock(slice_hdr, &mut mb_info, refs, frm)?;
mb_idx += 1;
}
if mb_idx == self.num_mbs || br.tell() >= full_size {
@@ -145,7 +147,7 @@ impl FrameDecoder {
decode_residual_cavlc(br, &mut self.sstate, &mut mb_info, &self.cavlc_cb)?;
}
}
- self.handle_macroblock(slice_hdr, &mut mb_info, refs)?;
+ self.handle_macroblock(slice_hdr, &mut mb_info, refs, frm)?;
}
mb_idx += 1;
if let Ok(disp) = self.dispatch.read() {
@@ -154,7 +156,7 @@ impl FrameDecoder {
}
Ok(mb_idx)
}
- fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs) -> DecoderResult<usize> {
+ fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame<u8>) -> DecoderResult<usize> {
let mut mb_idx = slice_hdr.first_mb_in_slice;
let mut prev_mb_skipped = false;
let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip };
@@ -250,7 +252,7 @@ impl FrameDecoder {
mb_info.transform_size_8x8 = false;
last_qp_diff = false;
}
- self.handle_macroblock(slice_hdr, &mut mb_info, refs)?;
+ self.handle_macroblock(slice_hdr, &mut mb_info, refs, frm)?;
prev_mb_skipped = mb_skip;
if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() {
if let Ok(disp) = self.dispatch.read() {
@@ -266,7 +268,7 @@ impl FrameDecoder {
Err(DecoderError::InvalidData)
}
#[allow(clippy::cognitive_complexity)]
- fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, refs: &SimplifiedSliceRefs) -> DecoderResult<()> {
+ fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame<u8>) -> DecoderResult<()> {
let qp_y = mb_info.qp_y;
let qpr = ((qp_y as i8) + self.pps.chroma_qp_index_offset).max(0).min(51) as usize;
let qp_u = CHROMA_QUANTS[qpr];
@@ -291,16 +293,31 @@ impl FrameDecoder {
}
if !mb_info.transform_size_8x8 {
let quant_dc = !mb_info.mb_type.is_intra16x16();
- for i in 0..16 {
- if mb_info.coded[i] {
- if !tx_bypass {
- idct(&mut mb_info.coeffs[i], qp_y, quant_dc);
+ if quant_dc {
+ for i in 0..16 {
+ if mb_info.coded[i] {
+ if !tx_bypass {
+ idct(&mut mb_info.coeffs[i], qp_y);
+ }
+ } else if has_dc {
+ if !tx_bypass {
+ idct_dc(&mut mb_info.coeffs[i], qp_y, quant_dc);
+ }
+ mb_info.coded[i] = true;
}
- } else if has_dc {
- if !tx_bypass {
- idct_dc(&mut mb_info.coeffs[i], qp_y, quant_dc);
+ }
+ } else {
+ for i in 0..16 {
+ if mb_info.coded[i] {
+ if !tx_bypass {
+ idct_skip_dc(&mut mb_info.coeffs[i], qp_y);
+ }
+ } else if has_dc {
+ if !tx_bypass {
+ idct_dc(&mut mb_info.coeffs[i], qp_y, quant_dc);
+ }
+ mb_info.coded[i] = true;
}
- mb_info.coded[i] = true;
}
}
} else {
@@ -320,7 +337,7 @@ impl FrameDecoder {
let blk_no = 16 + chroma * 4 + i;
mb_info.coeffs[blk_no][0] = mb_info.chroma_dc[chroma][i];
if mb_info.coded[blk_no] {
- idct(&mut mb_info.coeffs[blk_no], qp_c, false);
+ idct_skip_dc(&mut mb_info.coeffs[blk_no], qp_c);
} else if mb_info.coeffs[blk_no][0] != 0 {
idct_dc(&mut mb_info.coeffs[blk_no], qp_c, false);
mb_info.coded[blk_no] = true;
@@ -347,7 +364,6 @@ impl FrameDecoder {
let xpos = self.sstate.mb_x * 16;
let ypos = self.sstate.mb_y * 16;
- let mut frm = NASimpleVideoFrame::from_video_buf(&mut self.cur_pic.buf).unwrap();
if mb_info.mb_type != MBType::PCM {
let weight_mode = if self.pps.weighted_pred && slice_hdr.slice_type.is_p() {
1
@@ -356,7 +372,7 @@ impl FrameDecoder {
} else {
0
};
- recon_mb_mt(&mut frm, slice_hdr, mb_info, &mut self.sstate, refs, &mut self.mc_dsp, weight_mode, &self.dispatch)?;
+ recon_mb_mt(frm, slice_hdr, mb_info, &mut self.sstate, refs, &mut self.mc_dsp, weight_mode, &self.dispatch)?;
} else {
for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) {
dline[..16].copy_from_slice(src);
@@ -368,7 +384,7 @@ impl FrameDecoder {
dline[..8].copy_from_slice(src);
}
}
- self.sstate.save_ipred_context(&frm);
+ self.sstate.save_ipred_context(frm);
let mv_info = &mut self.cur_pic.mv_info;
let mb_pos = self.sstate.mb_x + self.sstate.mb_y * mv_info.mb_stride;
diff --git a/nihav-itu/src/codecs/h264/decoder_st.rs b/nihav-itu/src/codecs/h264/decoder_st.rs
index b9b7308..94b3977 100644
--- a/nihav-itu/src/codecs/h264/decoder_st.rs
+++ b/nihav-itu/src/codecs/h264/decoder_st.rs
@@ -218,15 +218,23 @@ println!("PAFF?");
self.transform_8x8_mode = pps.transform_8x8_mode;
self.sstate.reset(sps.pic_width_in_mbs, sps.pic_height_in_mbs, slice_hdr.first_mb_in_slice);
+
+ let mut dst_pic = if let Some(ref pic) = self.cur_pic {
+ pic.clone()
+ } else {
+ return Err(DecoderError::InvalidData);
+ };
+ let mut dst_frm = NASimpleVideoFrame::from_video_buf(&mut dst_pic.buf).unwrap();
+ let dst_mv_info = &mut dst_pic.mv_info;
if !pps.entropy_coding_mode {
- self.has_pic = self.decode_slice_cavlc(&mut br, &slice_hdr, full_size)?;
+ self.has_pic = self.decode_slice_cavlc(&mut br, &slice_hdr, full_size, &mut dst_frm, dst_mv_info)?;
} else {
br.align();
let start = br.tell() / 8;
let csrc = &src[start..];
validate!(csrc.len() >= 2);
let mut cabac = CABAC::new(csrc, slice_hdr.slice_type, slice_hdr.slice_qp, slice_hdr.cabac_init_idc as usize)?;
- self.has_pic = self.decode_slice_cabac(&mut cabac, &slice_hdr)?;
+ self.has_pic = self.decode_slice_cabac(&mut cabac, &slice_hdr, &mut dst_frm, dst_mv_info)?;
}
},
2 => { // slice data partition A
@@ -337,7 +345,7 @@ println!("PAFF?");
}
}
#[allow(clippy::cognitive_complexity)]
- fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, slice_refs: &SimplifiedSliceRefs) {
+ fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, slice_refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame<u8>, mv_info: &mut FrameMV) {
let pps = &self.pps[self.cur_pps];
let qp_y = mb_info.qp_y;
@@ -365,12 +373,23 @@ println!("PAFF?");
if !tx_bypass {
if !mb_info.transform_size_8x8 {
let quant_dc = !mb_info.mb_type.is_intra16x16();
- for (coded, coeffs) in mb_info.coded[..16].iter_mut().zip(mb_info.coeffs[..16].iter_mut()) {
- if *coded {
- idct(coeffs, qp_y, quant_dc);
- } else if has_dc {
- idct_dc(coeffs, qp_y, quant_dc);
- *coded = true;
+ if quant_dc {
+ for (coded, coeffs) in mb_info.coded[..16].iter_mut().zip(mb_info.coeffs[..16].iter_mut()) {
+ if *coded {
+ idct(coeffs, qp_y);
+ } else if has_dc {
+ idct_dc(coeffs, qp_y, quant_dc);
+ *coded = true;
+ }
+ }
+ } else {
+ for (coded, coeffs) in mb_info.coded[..16].iter_mut().zip(mb_info.coeffs[..16].iter_mut()) {
+ if *coded {
+ idct_skip_dc(coeffs, qp_y);
+ } else if has_dc {
+ idct_dc(coeffs, qp_y, quant_dc);
+ *coded = true;
+ }
}
}
} else {
@@ -397,7 +416,7 @@ println!("PAFF?");
let blk_no = 16 + chroma * 4 + i;
mb_info.coeffs[blk_no][0] = mb_info.chroma_dc[chroma][i];
if mb_info.coded[blk_no] {
- idct(&mut mb_info.coeffs[blk_no], qp_c, false);
+ idct_skip_dc(&mut mb_info.coeffs[blk_no], qp_c);
} else if mb_info.coeffs[blk_no][0] != 0 {
idct_dc(&mut mb_info.coeffs[blk_no], qp_c, false);
mb_info.coded[blk_no] = true;
@@ -416,28 +435,27 @@ println!("PAFF?");
let xpos = self.sstate.mb_x * 16;
let ypos = self.sstate.mb_y * 16;
- if let Some(ref mut pic) = self.cur_pic {
- let mut frm = NASimpleVideoFrame::from_video_buf(&mut pic.buf).unwrap();
- if mb_info.mb_type != MBType::PCM {
- let weight_mode = if self.pps[self.cur_pps].weighted_pred && slice_hdr.slice_type.is_p() {
- 1
- } else if slice_hdr.slice_type.is_b() {
- self.pps[self.cur_pps].weighted_bipred_idc
- } else {
- 0
- };
- recon_mb(&mut frm, slice_hdr, mb_info, &mut self.sstate, slice_refs, &mut self.mc_dsp, weight_mode);
- } else {
- for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) {
- dline[..16].copy_from_slice(src);
- }
- for (dline, src) in frm.data[frm.offset[1] + xpos/2 + ypos/2 * frm.stride[1]..].chunks_mut(frm.stride[1]).take(8).zip(self.ipcm_buf[256..].chunks(8)) {
- dline[..8].copy_from_slice(src);
- }
- for (dline, src) in frm.data[frm.offset[2] + xpos/2 + ypos/2 * frm.stride[2]..].chunks_mut(frm.stride[2]).take(8).zip(self.ipcm_buf[256 + 64..].chunks(8)) {
- dline[..8].copy_from_slice(src);
- }
+
+ if mb_info.mb_type != MBType::PCM {
+ let weight_mode = if self.pps[self.cur_pps].weighted_pred && slice_hdr.slice_type.is_p() {
+ 1
+ } else if slice_hdr.slice_type.is_b() {
+ self.pps[self.cur_pps].weighted_bipred_idc
+ } else {
+ 0
+ };
+ recon_mb(frm, slice_hdr, mb_info, &mut self.sstate, slice_refs, &mut self.mc_dsp, weight_mode);
+ } else {
+ for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) {
+ dline[..16].copy_from_slice(src);
+ }
+ for (dline, src) in frm.data[frm.offset[1] + xpos/2 + ypos/2 * frm.stride[1]..].chunks_mut(frm.stride[1]).take(8).zip(self.ipcm_buf[256..].chunks(8)) {
+ dline[..8].copy_from_slice(src);
+ }
+ for (dline, src) in frm.data[frm.offset[2] + xpos/2 + ypos/2 * frm.stride[2]..].chunks_mut(frm.stride[2]).take(8).zip(self.ipcm_buf[256 + 64..].chunks(8)) {
+ dline[..8].copy_from_slice(src);
}
+ }
/*match mb_info.mb_type {
MBType::BSkip | MBType::Direct | MBType::B16x16(_) | MBType::B16x8(_, _) | MBType::B8x16(_, _) | MBType::B8x8 => {
let dstride = frm.stride[0];
@@ -449,32 +467,27 @@ MBType::BSkip | MBType::Direct | MBType::B16x16(_) | MBType::B16x8(_, _) | MBTyp
},
_ => {},
};*/
- self.sstate.save_ipred_context(&frm);
+ self.sstate.save_ipred_context(frm);
+
+ let mb_pos = self.sstate.mb_x + self.sstate.mb_y * mv_info.mb_stride;
+ let mut mb = FrameMBInfo::new();
+ mb.mb_type = mb_info.mb_type.into();
+ for blk4 in 0..16 {
+ mb.mv[blk4] = self.sstate.get_cur_blk4(blk4).mv;
}
- if let Some(ref mut pic) = self.cur_pic {
- let mv_info = &mut pic.mv_info;
- let mb_pos = self.sstate.mb_x + self.sstate.mb_y * mv_info.mb_stride;
- let mut mb = FrameMBInfo::new();
- mb.mb_type = mb_info.mb_type.into();
- for blk4 in 0..16 {
- mb.mv[blk4] = self.sstate.get_cur_blk4(blk4).mv;
- }
- for blk8 in 0..4 {
- mb.ref_poc[blk8] = slice_refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx);
- mb.ref_idx[blk8] = self.sstate.get_cur_blk8(blk8).ref_idx;
- }
- mv_info.mbs[mb_pos] = mb;
+ for blk8 in 0..4 {
+ mb.ref_poc[blk8] = slice_refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx);
+ mb.ref_idx[blk8] = self.sstate.get_cur_blk8(blk8).ref_idx;
}
+ mv_info.mbs[mb_pos] = mb;
+
if !self.deblock_skip && self.deblock_mode != 1 {
self.sstate.fill_deblock(slice_refs, self.deblock_mode, self.is_s);
- if let Some(ref mut pic) = self.cur_pic {
- let mut frm = NASimpleVideoFrame::from_video_buf(&mut pic.buf).unwrap();
- loop_filter_mb(&mut frm, &self.sstate, self.lf_alpha, self.lf_beta);
- }
+ loop_filter_mb(frm, &self.sstate, self.lf_alpha, self.lf_beta);
}
self.sstate.next_mb();
}
- fn decode_slice_cavlc(&mut self, br: &mut BitReader, slice_hdr: &SliceHeader, full_size: usize) -> DecoderResult<bool> {
+ fn decode_slice_cavlc(&mut self, br: &mut BitReader, slice_hdr: &SliceHeader, full_size: usize, frm: &mut NASimpleVideoFrame<u8>, mv_info: &mut FrameMV) -> DecoderResult<bool> {
const INTRA_CBP: [u8; 48] = [
47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46,
16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4,
@@ -508,7 +521,7 @@ _ => {},
validate!(mb_idx + mb_skip_run <= self.num_mbs);
mb_info.mb_type = skip_type;
for _ in 0..mb_skip_run {
- self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs);
+ self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs, frm, mv_info);
mb_idx += 1;
}
if mb_idx == self.num_mbs || br.tell() >= full_size {
@@ -574,7 +587,7 @@ _ => {},
decode_residual_cavlc(br, &mut self.sstate, &mut mb_info, &self.cavlc_cb)?;
}
}
- self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs);
+ self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs, frm, mv_info);
}
mb_idx += 1;
}
@@ -583,7 +596,7 @@ _ => {},
}
Ok(mb_idx == self.num_mbs)
}
- fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader) -> DecoderResult<bool> {
+ fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, frm: &mut NASimpleVideoFrame<u8>, mv_info: &mut FrameMV) -> DecoderResult<bool> {
let mut mb_idx = slice_hdr.first_mb_in_slice;
let mut prev_mb_skipped = false;
let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip };
@@ -682,7 +695,7 @@ _ => {},
mb_info.transform_size_8x8 = false;
last_qp_diff = false;
}
- self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs);
+ self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs, frm, mv_info);
prev_mb_skipped = mb_skip;
if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() {
if let Some(ref mut pic) = self.cur_pic {
diff --git a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs
index 5845d92..615563e 100644
--- a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs
+++ b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs
@@ -199,7 +199,21 @@ impl H264MC {
pub fn do_mc_avg(&mut self, frm: &mut NASimpleVideoFrame<u8>, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) {
let mut abuf = self.avg_buf.clone();
- let mut afrm = NASimpleVideoFrame::from_video_buf(&mut abuf).unwrap();
+ let stride_y = abuf.get_stride(0);
+ let stride_c = abuf.get_stride(1);
+ let off_y = abuf.get_offset(0);
+ let off_u = abuf.get_offset(1);
+ let off_v = abuf.get_offset(2);
+ let data = abuf.get_data_mut().unwrap();
+ let mut afrm = NASimpleVideoFrame {
+ width: [64, 32, 32, 0],
+ height: [64, 32, 32, 0],
+ flip: false,
+ stride: [stride_y, stride_c, stride_c, 0],
+ offset: [off_y, off_u, off_v, 0],
+ components: 3,
+ data,
+ };
let amv = MV { x: mv.x + (xpos as i16) * 4, y: mv.y + (ypos as i16) * 4 };
self.do_mc(&mut afrm, refpic, 0, 0, w, h, amv);
let wsize = match w {
diff --git a/nihav-itu/src/codecs/h264/dsp/mod.rs b/nihav-itu/src/codecs/h264/dsp/mod.rs
index 76936ad..a2a58a4 100644
--- a/nihav-itu/src/codecs/h264/dsp/mod.rs
+++ b/nihav-itu/src/codecs/h264/dsp/mod.rs
@@ -135,7 +135,7 @@ pub fn idct_luma_dc(blk: &mut [i16; 16], qp: u8) {
}
}
-pub fn idct(blk: &mut [i16; 16], qp: u8, quant_dc: bool) {
+pub fn idct_skip_dc(blk: &mut [i16; 16], qp: u8) {
const BLK_INDEX: [usize; 16] = [
0, 2, 0, 2,
2, 1, 2, 1,
@@ -144,8 +144,27 @@ pub fn idct(blk: &mut [i16; 16], qp: u8, quant_dc: bool) {
];
let qidx = (qp % 6) as usize;
let shift = qp / 6;
- let start = if quant_dc { 0 } else { 1 };
- for (el, &idx) in blk.iter_mut().zip(BLK_INDEX.iter()).skip(start) {
+ for (el, &idx) in blk.iter_mut().zip(BLK_INDEX.iter()).skip(1) {
+ *el = (*el * LEVEL_SCALE[idx][qidx]) << shift;
+ }
+ for row in blk.chunks_exact_mut(4) {
+ transform!(row[0], row[1], row[2], row[3], 0);
+ }
+ for i in 0..4 {
+ transform!(blk[i], blk[i + 4], blk[i + 8], blk[i + 12], 6);
+ }
+}
+
+pub fn idct(blk: &mut [i16; 16], qp: u8) {
+ const BLK_INDEX: [usize; 16] = [
+ 0, 2, 0, 2,
+ 2, 1, 2, 1,
+ 0, 2, 0, 2,
+ 2, 1, 2, 1
+ ];
+ let qidx = (qp % 6) as usize;
+ let shift = qp / 6;
+ for (el, &idx) in blk.iter_mut().zip(BLK_INDEX.iter()) {
*el = (*el * LEVEL_SCALE[idx][qidx]) << shift;
}
for row in blk.chunks_exact_mut(4) {
diff --git a/nihav-itu/src/codecs/h264/types.rs b/nihav-itu/src/codecs/h264/types.rs
index 1310daa..4bcdb49 100644
--- a/nihav-itu/src/codecs/h264/types.rs
+++ b/nihav-itu/src/codecs/h264/types.rs
@@ -383,8 +383,9 @@ pub struct MBData {
}
pub fn blk4_to_blk8(blk4: usize) -> usize {
- const MAP: [usize; 16] = [ 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 ];
- MAP[blk4 & 0xF]
+ /*const MAP: [usize; 16] = [ 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 ];
+ MAP[blk4 & 0xF]*/
+ ((blk4 & 2) >> 1) | ((blk4 & 8) >> 2)
}
#[derive(Clone,Copy)]