aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKostya Shishkov <kostya.shishkov@gmail.com>2023-08-08 14:39:17 +0200
committerKostya Shishkov <kostya.shishkov@gmail.com>2023-08-08 18:16:24 +0200
commit5f223cdb5a7834fe58bf05d4dd0da36325f4f11c (patch)
treeb1654e2a4f1159dfa1c2a90f9500c6759baf6f50
parent754ab49a62c862e8c6e66ec88bb7ad626247140e (diff)
downloadnihav-5f223cdb5a7834fe58bf05d4dd0da36325f4f11c.tar.gz
h264: prepare data references before decoding
This speeds up decoding by eliminating the need for refcounted accesses.
-rw-r--r--nihav-itu/src/codecs/h264/decoder_mt.rs14
-rw-r--r--nihav-itu/src/codecs/h264/decoder_st.rs25
-rw-r--r--nihav-itu/src/codecs/h264/dsp/mc/mod.rs78
-rw-r--r--nihav-itu/src/codecs/h264/mb_recon.rs38
-rw-r--r--nihav-itu/src/codecs/h264/pic_ref.rs134
-rw-r--r--nihav-itu/src/codecs/h264/types.rs34
6 files changed, 256 insertions, 67 deletions
diff --git a/nihav-itu/src/codecs/h264/decoder_mt.rs b/nihav-itu/src/codecs/h264/decoder_mt.rs
index fac66c5..1824042 100644
--- a/nihav-itu/src/codecs/h264/decoder_mt.rs
+++ b/nihav-itu/src/codecs/h264/decoder_mt.rs
@@ -36,18 +36,20 @@ impl FrameDecoder {
}
validate!(full_size > 0);
+ let sslice_refs = SimplifiedSliceRefs::new(refs);
+
let mut br = BitReader::new(&nal[hdr_size / 8..], BitReaderMode::BE);
if !self.pps.entropy_coding_mode {
br.skip((hdr_size & 7) as u32)?;
- self.decode_slice_cavlc(&mut br, full_size - (hdr_size & !7), hdr, refs)
+ self.decode_slice_cavlc(&mut br, full_size - (hdr_size & !7), hdr, &sslice_refs)
} else {
let csrc = &nal[(hdr_size + 7) / 8..];
validate!(csrc.len() >= 2);
let mut cabac = CABAC::new(csrc, hdr.slice_type, hdr.slice_qp, hdr.cabac_init_idc as usize)?;
- self.decode_slice_cabac(&mut cabac, hdr, refs)
+ self.decode_slice_cabac(&mut cabac, hdr, &sslice_refs)
}
}
- fn decode_slice_cavlc(&mut self, br: &mut BitReader, full_size: usize, slice_hdr: &SliceHeader, refs: &SliceRefs) -> DecoderResult<usize> {
+ fn decode_slice_cavlc(&mut self, br: &mut BitReader, full_size: usize, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs) -> DecoderResult<usize> {
const INTRA_CBP: [u8; 48] = [
47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46,
16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4,
@@ -152,7 +154,7 @@ impl FrameDecoder {
}
Ok(mb_idx)
}
- fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, refs: &SliceRefs) -> DecoderResult<usize> {
+ fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs) -> DecoderResult<usize> {
let mut mb_idx = slice_hdr.first_mb_in_slice;
let mut prev_mb_skipped = false;
let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip };
@@ -264,7 +266,7 @@ impl FrameDecoder {
Err(DecoderError::InvalidData)
}
#[allow(clippy::cognitive_complexity)]
- fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, refs: &SliceRefs) -> DecoderResult<()> {
+ fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, refs: &SimplifiedSliceRefs) -> DecoderResult<()> {
let qp_y = mb_info.qp_y;
let qpr = ((qp_y as i8) + self.pps.chroma_qp_index_offset).max(0).min(51) as usize;
let qp_u = CHROMA_QUANTS[qpr];
@@ -394,7 +396,7 @@ impl FrameDecoder {
Ok(())
}
- fn pred_mv(sstate: &mut SliceState, frame_refs: &SliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) {
+ fn pred_mv(sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) {
let mb_type = mb_info.mb_type;
if !mb_type.is_4x4() {
let (pw, ph) = mb_type.size();
diff --git a/nihav-itu/src/codecs/h264/decoder_st.rs b/nihav-itu/src/codecs/h264/decoder_st.rs
index 7e8b83a..b9b7308 100644
--- a/nihav-itu/src/codecs/h264/decoder_st.rs
+++ b/nihav-itu/src/codecs/h264/decoder_st.rs
@@ -277,7 +277,7 @@ println!("PAFF?");
Ok(())
}
- fn pred_mv(sstate: &mut SliceState, frame_refs: &SliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) {
+ fn pred_mv(sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) {
let mb_type = mb_info.mb_type;
if !mb_type.is_4x4() {
let (pw, ph) = mb_type.size();
@@ -337,7 +337,7 @@ println!("PAFF?");
}
}
#[allow(clippy::cognitive_complexity)]
- fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo) {
+ fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, slice_refs: &SimplifiedSliceRefs) {
let pps = &self.pps[self.cur_pps];
let qp_y = mb_info.qp_y;
@@ -408,7 +408,7 @@ println!("PAFF?");
self.sstate.reset_mb_mv();
}
if !mb_info.mb_type.is_intra() {
- Self::pred_mv(&mut self.sstate, &self.frame_refs.cur_refs, mb_info, self.cur_id, self.temporal_mv, self.sps[self.cur_sps].direct_8x8_inference);
+ Self::pred_mv(&mut self.sstate, slice_refs, mb_info, self.cur_id, self.temporal_mv, self.sps[self.cur_sps].direct_8x8_inference);
}
if !pps.constrained_intra_pred && mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 {
self.sstate.fill_ipred(IntraPredMode::DC);
@@ -426,7 +426,7 @@ println!("PAFF?");
} else {
0
};
- recon_mb(&mut frm, slice_hdr, mb_info, &mut self.sstate, &self.frame_refs.cur_refs, &mut self.mc_dsp, weight_mode);
+ recon_mb(&mut frm, slice_hdr, mb_info, &mut self.sstate, slice_refs, &mut self.mc_dsp, weight_mode);
} else {
for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) {
dline[..16].copy_from_slice(src);
@@ -460,13 +460,13 @@ _ => {},
mb.mv[blk4] = self.sstate.get_cur_blk4(blk4).mv;
}
for blk8 in 0..4 {
- mb.ref_poc[blk8] = self.frame_refs.cur_refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx);
+ mb.ref_poc[blk8] = slice_refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx);
mb.ref_idx[blk8] = self.sstate.get_cur_blk8(blk8).ref_idx;
}
mv_info.mbs[mb_pos] = mb;
}
if !self.deblock_skip && self.deblock_mode != 1 {
- self.sstate.fill_deblock(&self.frame_refs.cur_refs, self.deblock_mode, self.is_s);
+ self.sstate.fill_deblock(slice_refs, self.deblock_mode, self.is_s);
if let Some(ref mut pic) = self.cur_pic {
let mut frm = NASimpleVideoFrame::from_video_buf(&mut pic.buf).unwrap();
loop_filter_mb(&mut frm, &self.sstate, self.lf_alpha, self.lf_beta);
@@ -489,6 +489,10 @@ _ => {},
let mut mb_idx = slice_hdr.first_mb_in_slice;
let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() };
let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip };
+
+ let slice_refs = self.frame_refs.cur_refs.clone();
+ let sslice_refs = SimplifiedSliceRefs::new(&slice_refs);
+
while br.tell() < full_size && mb_idx < self.num_mbs {
mb_info.coded = [false; 25];
mb_info.ref_l0 = [ZERO_REF; 4];
@@ -504,7 +508,7 @@ _ => {},
validate!(mb_idx + mb_skip_run <= self.num_mbs);
mb_info.mb_type = skip_type;
for _ in 0..mb_skip_run {
- self.handle_macroblock(slice_hdr, &mut mb_info);
+ self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs);
mb_idx += 1;
}
if mb_idx == self.num_mbs || br.tell() >= full_size {
@@ -570,7 +574,7 @@ _ => {},
decode_residual_cavlc(br, &mut self.sstate, &mut mb_info, &self.cavlc_cb)?;
}
}
- self.handle_macroblock(slice_hdr, &mut mb_info);
+ self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs);
}
mb_idx += 1;
}
@@ -587,6 +591,9 @@ _ => {},
let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() };
+ let slice_refs = self.frame_refs.cur_refs.clone();
+ let sslice_refs = SimplifiedSliceRefs::new(&slice_refs);
+
while mb_idx < self.num_mbs {
mb_info.coded = [false; 25];
mb_info.ref_l0 = [ZERO_REF; 4];
@@ -675,7 +682,7 @@ _ => {},
mb_info.transform_size_8x8 = false;
last_qp_diff = false;
}
- self.handle_macroblock(slice_hdr, &mut mb_info);
+ self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs);
prev_mb_skipped = mb_skip;
if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() {
if let Some(ref mut pic) = self.cur_pic {
diff --git a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs
index f558441..5845d92 100644
--- a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs
+++ b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs
@@ -1,6 +1,6 @@
use nihav_core::frame::*;
use nihav_codec_support::codecs::MV;
-use nihav_codec_support::codecs::blockdsp::*;
+use super::super::SimpleFrame;
macro_rules! module_selector {
($( ($cond:meta, $module:ident) ),*) => {
@@ -81,7 +81,7 @@ impl H264MC {
self.width = width;
self.height = height;
}
- pub fn do_mc(&mut self, frm: &mut NASimpleVideoFrame<u8>, refpic: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) {
+ pub fn do_mc(&mut self, frm: &mut NASimpleVideoFrame<u8>, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) {
let mut ebuf = [0u8; 22 * 22];
let mvx = mv.x >> 2;
let mvy = mv.y >> 2;
@@ -89,16 +89,16 @@ impl H264MC {
let pre = if mode != 0 { 2isize } else { 0 };
let post = if mode != 0 { 3isize } else { 0 };
let (yw, yh) = (self.width, self.height);
- let src = refpic.get_data();
- let systride = refpic.get_stride(0);
+ let src = refpic.data;
+ let systride = refpic.stride[0];
let src_x = (xpos as isize) + (mvx as isize);
let src_y = (ypos as isize) + (mvy as isize);
let (ysrc, ystride) = if (src_x - pre < 0) || (src_x + (w as isize) + post > (yw as isize)) || (src_y - pre < 0) || (src_y + (h as isize) + post > (yh as isize)) {
let add = (pre + post) as usize;
- edge_emu(&refpic, src_x - pre, src_y - pre, w + add, h + add, &mut ebuf, 22, 0, 0);
+ edge_emu_sf(refpic, src_x - pre, src_y - pre, yw, yh, w + add, h + add, &mut ebuf, 22, 0);
(&ebuf[..], 22)
} else {
- (&src[refpic.get_offset(0) + ((src_x - pre) as usize) + ((src_y - pre) as usize) * systride..], systride)
+ (&src[refpic.offset[0] + ((src_x - pre) as usize) + ((src_y - pre) as usize) * systride..], systride)
};
let wmode = match w {
4 => 0,
@@ -114,15 +114,17 @@ impl H264MC {
let dy = (mv.y & 7) as u16;
let src_x = ((xpos >> 1) as isize) + (mvx as isize);
let src_y = ((ypos >> 1) as isize) + (mvy as isize);
- let suoff = refpic.get_offset(1);
- let svoff = refpic.get_offset(2);
- let sustride = refpic.get_stride(1);
- let svstride = refpic.get_stride(2);
+ let suoff = refpic.offset[1];
+ let svoff = refpic.offset[2];
+ let sustride = refpic.stride[1];
+ let svstride = refpic.stride[2];
let cbw = w / 2;
let cbh = h / 2;
let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) {
- edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf, 18, 1, 4);
- edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf[9..], 18, 2, 4);
+ let aw = (cw + 7) & !7;
+ let ah = (ch + 7) & !7;
+ edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf, 18, 1);
+ edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf[9..], 18, 2);
([&ebuf, &ebuf[9..]], [18, 18])
} else {
([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..],
@@ -135,7 +137,7 @@ impl H264MC {
}
}
- pub fn mc_blocks(&mut self, dst: &mut McBlock, refpic: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) {
+ pub fn mc_blocks(&mut self, dst: &mut McBlock, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) {
let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize;
let pre = if mode != 0 { 2 } else { 0 };
@@ -155,14 +157,13 @@ impl H264MC {
if (sx - pre < 0) || (sx + (w as isize) + post > (width as isize)) ||
(sy - pre < 0) || (sy + (h as isize) + post > (height as isize)) {
let edge = (pre + post) as usize;
- edge_emu(&refpic, sx - pre, sy - pre, w + edge, h + edge,
- &mut ebuf, EBUF_STRIDE, 0, 0);
+ edge_emu_sf(refpic, sx - pre, sy - pre, width, height, w + edge, h + edge,
+ &mut ebuf, EBUF_STRIDE, 0);
(H264_LUMA_INTERP[wmode][mode])(&mut dst.y, 16, &ebuf, EBUF_STRIDE, h);
} else {
- let sstride = refpic.get_stride(0);
- let soff = refpic.get_offset(0);
- let sdta = refpic.get_data();
- let sbuf: &[u8] = sdta.as_slice();
+ let sstride = refpic.stride[0];
+ let soff = refpic.offset[0];
+ let sbuf = refpic.data;
let saddr = soff + ((sx - pre) as usize) + ((sy - pre) as usize) * sstride;
(H264_LUMA_INTERP[wmode][mode])(&mut dst.y, 16, &sbuf[saddr..], sstride, h);
}
@@ -174,16 +175,18 @@ impl H264MC {
let dy = (mv.y & 7) as u16;
let src_x = ((xpos >> 1) as isize) + (mvx as isize);
let src_y = ((ypos >> 1) as isize) + (mvy as isize);
- let suoff = refpic.get_offset(1);
- let svoff = refpic.get_offset(2);
- let sustride = refpic.get_stride(1);
- let svstride = refpic.get_stride(2);
- let src = refpic.get_data();
+ let suoff = refpic.offset[1];
+ let svoff = refpic.offset[2];
+ let sustride = refpic.stride[1];
+ let svstride = refpic.stride[2];
+ let src = refpic.data;
let cbw = w / 2;
let cbh = h / 2;
let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) {
- edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf, 18, 1, 4);
- edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf[9..], 18, 2, 4);
+ let aw = (cw + 7) & !7;
+ let ah = (ch + 7) & !7;
+ edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf, 18, 1);
+ edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf[9..], 18, 2);
([&ebuf, &ebuf[9..]], [18, 18])
} else {
([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..],
@@ -194,7 +197,7 @@ impl H264MC {
(self.chroma_interp[wmode])(&mut dst.v, 16, csrc[1], cstride[1], dx, dy, cbh);
}
- pub fn do_mc_avg(&mut self, frm: &mut NASimpleVideoFrame<u8>, refpic: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) {
+ pub fn do_mc_avg(&mut self, frm: &mut NASimpleVideoFrame<u8>, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) {
let mut abuf = self.avg_buf.clone();
let mut afrm = NASimpleVideoFrame::from_video_buf(&mut abuf).unwrap();
let amv = MV { x: mv.x + (xpos as i16) * 4, y: mv.y + (ypos as i16) * 4 };
@@ -233,6 +236,27 @@ impl H264MC {
}
}
+fn edge_emu_sf(src: &SimpleFrame, xpos: isize, ypos: isize, w: usize, h: usize, bw: usize, bh: usize, dst: &mut [u8], dstride: usize, comp: usize) {
+ let stride = src.stride[comp];
+ let offs = src.offset[comp];
+ let framebuf = src.data;
+
+ for y in 0..bh {
+ let srcy;
+ if (y as isize) + ypos < 0 { srcy = 0; }
+ else if (y as isize) + ypos >= (h as isize) { srcy = h - 1; }
+ else { srcy = ((y as isize) + ypos) as usize; }
+
+ for x in 0..bw {
+ let srcx;
+ if (x as isize) + xpos < 0 { srcx = 0; }
+ else if (x as isize) + xpos >= (w as isize) { srcx = w - 1; }
+ else { srcx = ((x as isize) + xpos) as usize; }
+ dst[x + y * dstride] = framebuf[offs + srcx + srcy * stride];
+ }
+ }
+}
+
fn avg(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) {
for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) {
for (dst, src) in dline.iter_mut().zip(sline.iter()).take(bw) {
diff --git a/nihav-itu/src/codecs/h264/mb_recon.rs b/nihav-itu/src/codecs/h264/mb_recon.rs
index 5a204f3..d8e51f3 100644
--- a/nihav-itu/src/codecs/h264/mb_recon.rs
+++ b/nihav-itu/src/codecs/h264/mb_recon.rs
@@ -4,7 +4,7 @@ use nihav_codec_support::codecs::{MV, ZERO_MV};
use super::{CurrentMBInfo, I4X4_SCAN, Shareable};
use super::dispatch::{ThreadDispatcher, FrameDecodingStatus};
use super::dsp::*;
-use super::pic_ref::SliceRefs;
+use super::pic_ref::SimplifiedSliceRefs;
use super::slice::{SliceHeader, WeightInfo, DEF_WEIGHT_INFO};
use super::types::*;
@@ -205,7 +205,7 @@ fn add_chroma(frm: &mut NASimpleVideoFrame<u8>, sstate: &SliceState, mb_info: &C
}
}
-fn do_p_mc(frm: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option<NAVideoBufferRef<u8>>, weight: &WeightInfo, mc_dsp: &mut H264MC) {
+fn do_p_mc(frm: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option<&SimpleFrame>, weight: &WeightInfo, mc_dsp: &mut H264MC) {
if let Some(buf) = ref_pic {
if !weight.is_weighted() {
mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv);
@@ -245,7 +245,7 @@ fn do_p_mc(frm: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, w: usize,
}
#[allow(clippy::match_like_matches_macro)]
-fn do_b_mc(frm: &mut NASimpleVideoFrame<u8>, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option<NAVideoBufferRef<u8>>, weight0: &WeightInfo, mv1: MV, ref_pic1: Option<NAVideoBufferRef<u8>>, weight1: &WeightInfo, mc_dsp: &mut H264MC) {
+fn do_b_mc(frm: &mut NASimpleVideoFrame<u8>, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option<&SimpleFrame>, weight0: &WeightInfo, mv1: MV, ref_pic1: Option<&SimpleFrame>, weight1: &WeightInfo, mc_dsp: &mut H264MC) {
let do_weight = match (mode, weight0.is_weighted(), weight1.is_weighted()) {
(BMode::L0, true, _) => true,
(BMode::L1, _, true) => true,
@@ -364,7 +364,7 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame<u8>, mode: BMode, xpos: usize, ypos: usi
}
}
-fn do_b_mc_4x4bi(frm: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, mv: &[MV; 2], ref_pic0: Option<NAVideoBufferRef<u8>>, weight0: &WeightInfo, ref_pic1: Option<NAVideoBufferRef<u8>>, weight1: &WeightInfo, mc_dsp: &mut H264MC) {
+fn do_b_mc_4x4bi(frm: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, mv: &[MV; 2], ref_pic0: Option<&SimpleFrame>, weight0: &WeightInfo, ref_pic1: Option<&SimpleFrame>, weight1: &WeightInfo, mc_dsp: &mut H264MC) {
if !weight0.is_weighted() || !weight1.is_weighted() {
match (ref_pic0, ref_pic1) {
(Some(buf0), Some(buf1)) => {
@@ -423,7 +423,7 @@ fn do_b_mc_4x4bi(frm: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, mv:
}
}
-fn get_weights(slice_hdr: &SliceHeader, frame_refs: &SliceRefs, mode: BMode, weight_mode: u8, ref_l0: PicRef, ref_l1: PicRef) -> (WeightInfo, WeightInfo) {
+fn get_weights(slice_hdr: &SliceHeader, frame_refs: &SimplifiedSliceRefs, mode: BMode, weight_mode: u8, ref_l0: PicRef, ref_l1: PicRef) -> (WeightInfo, WeightInfo) {
let idx_l0 = ref_l0.index();
let idx_l1 = ref_l1.index();
if mode != BMode::Bi || weight_mode != 2 {
@@ -432,7 +432,7 @@ fn get_weights(slice_hdr: &SliceHeader, frame_refs: &SliceRefs, mode: BMode, wei
let r0_poc = pic0.full_id as u16;
let r1_poc = pic1.full_id as u16;
let cur_id = frame_refs.cur_id as u16;
- if (r0_poc == r1_poc) || pic0.long_term.is_some() || pic1.long_term.is_some() {
+ if (r0_poc == r1_poc) || pic0.long_term || pic1.long_term {
return (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO);
}
@@ -473,7 +473,7 @@ fn get_weights(slice_hdr: &SliceHeader, frame_refs: &SliceRefs, mode: BMode, wei
}
}
-pub fn recon_mb(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SliceRefs, mc_dsp: &mut H264MC, weight_mode: u8) {
+pub fn recon_mb(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mc_dsp: &mut H264MC, weight_mode: u8) {
let xpos = sstate.mb_x * 16;
let ypos = sstate.mb_y * 16;
@@ -525,12 +525,12 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb_in
do_p_mc(frm, xpos + bx, ypos + by, 8, 8, mv, rpic, weight, mc_dsp);
},
SubMBType::P8x4 => {
- do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic.clone(), weight, mc_dsp);
+ do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic, weight, mc_dsp);
let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0];
do_p_mc(frm, xpos + bx, ypos + by + 4, 8, 4, mv, rpic, weight, mc_dsp);
},
SubMBType::P4x8 => {
- do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic.clone(), weight, mc_dsp);
+ do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic, weight, mc_dsp);
let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0];
do_p_mc(frm, xpos + bx + 4, ypos + by, 4, 8, mv, rpic, weight, mc_dsp);
},
@@ -540,7 +540,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb_in
let sypos = ypos + by + (sb_no & 2) * 2;
let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4;
let mv = sstate.get_cur_blk4(sblk_no).mv[0];
- do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic.clone(), weight, mc_dsp);
+ do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic, weight, mc_dsp);
}
},
_ => unreachable!(),
@@ -631,7 +631,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb_in
let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]);
let (pw, ph) = subtype.size();
let mv = sstate.get_cur_blk4(blk8).mv;
- do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp);
+ do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
let addr2 = blk8 + (pw & 4) / 4 + (ph & 4);
let mv = sstate.get_cur_blk4(addr2).mv;
do_b_mc(frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
@@ -641,7 +641,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb_in
for i in 0..4 {
let addr2 = blk8 + (i & 1) + (i & 2) * 2;
let mv = sstate.get_cur_blk4(addr2).mv;
- do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp);
+ do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
bx += 4;
if i == 1 {
bx -= 8;
@@ -680,7 +680,7 @@ pub fn wait_for_mb(disp: &Shareable<ThreadDispatcher>, sstate: &SliceState, xpos
}
}
-fn wait_b_mc(disp: &Shareable<ThreadDispatcher>, sstate: &SliceState, frame_refs: &SliceRefs, mv: [MV; 2], ref_idx: [PicRef; 2], xpos: usize, ypos: usize, w: usize, h: usize) -> DecoderResult<()> {
+fn wait_b_mc(disp: &Shareable<ThreadDispatcher>, sstate: &SliceState, frame_refs: &SimplifiedSliceRefs, mv: [MV; 2], ref_idx: [PicRef; 2], xpos: usize, ypos: usize, w: usize, h: usize) -> DecoderResult<()> {
if let Some(ref_id) = frame_refs.get_ref_id(0, ref_idx[0].index()) {
wait_for_mb(disp, sstate, xpos + w, ypos + h, mv[0], ref_id)?;
}
@@ -690,7 +690,7 @@ fn wait_b_mc(disp: &Shareable<ThreadDispatcher>, sstate: &SliceState, frame_refs
Ok(())
}
-pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SliceRefs, mc_dsp: &mut H264MC, weight_mode: u8, disp: &Shareable<ThreadDispatcher>) -> DecoderResult<()> {
+pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mc_dsp: &mut H264MC, weight_mode: u8, disp: &Shareable<ThreadDispatcher>) -> DecoderResult<()> {
let xpos = sstate.mb_x * 16;
let ypos = sstate.mb_y * 16;
@@ -760,7 +760,7 @@ pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb
if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) {
wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 4, mv, ref_id)?;
}
- do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic.clone(), weight, mc_dsp);
+ do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic, weight, mc_dsp);
let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0];
if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) {
wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?;
@@ -771,7 +771,7 @@ pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb
if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) {
wait_for_mb(disp, sstate, xpos + bx + 4, ypos + by + 8, mv, ref_id)?;
}
- do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic.clone(), weight, mc_dsp);
+ do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic, weight, mc_dsp);
let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0];
if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) {
wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?;
@@ -787,7 +787,7 @@ pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb
if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) {
wait_for_mb(disp, sstate, sxpos + 4, sypos + 4, mv, ref_id)?;
}
- do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic.clone(), weight, mc_dsp);
+ do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic, weight, mc_dsp);
}
},
_ => unreachable!(),
@@ -900,7 +900,7 @@ pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb
let (pw, ph) = subtype.size();
let mv = sstate.get_cur_blk4(blk8).mv;
wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, pw, ph)?;
- do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp);
+ do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
let addr2 = blk8 + (pw & 4) / 4 + (ph & 4);
let mv = sstate.get_cur_blk4(addr2).mv;
wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph)?;
@@ -912,7 +912,7 @@ pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb
let addr2 = blk8 + (i & 1) + (i & 2) * 2;
let mv = sstate.get_cur_blk4(addr2).mv;
wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, 4, 4)?;
- do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp);
+ do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
bx += 4;
if i == 1 {
bx -= 8;
diff --git a/nihav-itu/src/codecs/h264/pic_ref.rs b/nihav-itu/src/codecs/h264/pic_ref.rs
index a24e9d0..b027d06 100644
--- a/nihav-itu/src/codecs/h264/pic_ref.rs
+++ b/nihav-itu/src/codecs/h264/pic_ref.rs
@@ -56,6 +56,7 @@ pub struct SliceRefs {
pub cur_id: u32,
}
+#[allow(dead_code)]
impl SliceRefs {
pub fn get_ref_id(&self, list_id: u8, ref_id: usize) -> Option<u32> {
let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 };
@@ -150,6 +151,139 @@ impl SliceRefs {
}
}
+#[derive(Clone)]
+pub struct SimplePictureInfo<'a> {
+ pub full_id: u32,
+ pub buf: SimpleFrame<'a>,
+ pub long_term: bool,
+ pub mv_info: &'a FrameMV,
+}
+
+#[derive(Clone)]
+pub struct SimplifiedSliceRefs<'a> {
+ pub ref_list0: Vec<Option<SimplePictureInfo<'a>>>,
+ pub ref_list1: Vec<Option<SimplePictureInfo<'a>>>,
+ pub cur_id: u32,
+}
+
+impl<'a> SimplifiedSliceRefs<'a> {
+ pub fn new(srefs: &'a SliceRefs) -> Self {
+ let mut ref_list0 = Vec::with_capacity(srefs.ref_list0.len());
+ let mut ref_list1 = Vec::with_capacity(srefs.ref_list1.len());
+ for entry in srefs.ref_list0.iter() {
+ ref_list0.push(entry.as_ref().map(|pic| SimplePictureInfo {
+ full_id: pic.full_id,
+ buf: SimpleFrame::new(&pic.buf),
+ long_term: pic.long_term.is_some(),
+ mv_info: &pic.mv_info,
+ }));
+ }
+ for entry in srefs.ref_list1.iter() {
+ ref_list1.push(entry.as_ref().map(|pic| SimplePictureInfo {
+ full_id: pic.full_id,
+ buf: SimpleFrame::new(&pic.buf),
+ long_term: pic.long_term.is_some(),
+ mv_info: &pic.mv_info,
+ }));
+ }
+ Self {
+ cur_id: srefs.cur_id,
+ ref_list0, ref_list1
+ }
+ }
+ pub fn get_ref_id(&self, list_id: u8, ref_id: usize) -> Option<u32> {
+ let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 };
+ if ref_list.len() > ref_id {
+ ref_list[ref_id].as_ref().map(|pic| pic.full_id)
+ } else {
+ None
+ }
+ }
+ pub fn select_ref_pic(&self, list_id: u8, ref_id: usize) -> Option<&SimpleFrame> {
+ let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 };
+ if ref_list.len() > ref_id {
+ ref_list[ref_id].as_ref().map(|pic| &pic.buf)
+ } else {
+ None
+ }
+ }
+ pub fn get_colocated_info(&self, mb_x: usize, mb_y: usize) -> (FrameMBInfo, u16, bool) {
+ if let Some(ref ref_pic) = &self.ref_list1[0] {
+ let mv_info = ref_pic.mv_info;
+ let mb = mv_info.mbs[mb_x + mb_y * mv_info.mb_stride];
+ (mb, ref_pic.full_id as u16, ref_pic.long_term)
+ } else {
+ (FrameMBInfo::default(), 0, false)
+ }
+ }
+ pub fn map_ref0(&self, ref0_id: u16) -> (PicRef, bool) {
+ let mut r0_idx = 0;
+ let mut long = false;
+ for (i, rpic0) in self.ref_list0.iter().enumerate() {
+ if let Some(ref pic) = rpic0 {
+ if (pic.full_id as u16) == ref0_id {
+ r0_idx = i as u8;
+ long = pic.long_term;
+ break;
+ }
+ }
+ }
+ (PicRef::new(r0_idx), long)
+ }
+ pub fn map_refs(&self, ref_idx: [PicRef; 2]) -> [u16; 2] {
+ let r0 = ref_idx[0].index();
+ let r1 = ref_idx[1].index();
+ let ref0 = if r0 < self.ref_list0.len() {
+ if let Some(ref pic) = self.ref_list0[r0] {
+ pic.full_id as u16
+ } else {
+ MISSING_POC
+ }
+ } else {
+ MISSING_POC
+ };
+ let ref1 = if r1 < self.ref_list1.len() {
+ if let Some(ref pic) = self.ref_list1[r1] {
+ pic.full_id as u16
+ } else {
+ MISSING_POC
+ }
+ } else {
+ MISSING_POC
+ };
+ [ref0, ref1]
+ }
+ pub fn cmp_refs(&self, ref1: [PicRef; 2], ref2: [PicRef; 2]) -> bool {
+ if ref1 != ref2 {
+ self.cmp_ref(ref1[0], ref2[0], 0) && self.cmp_ref(ref1[1], ref2[1], 1)
+ } else {
+ true
+ }
+ }
+ fn cmp_ref(&self, ref1: PicRef, ref2: PicRef, list: u8) -> bool {
+ if ref1 == ref2 {
+ true
+ } else {
+ let idx0 = ref1.index();
+ let idx1 = ref2.index();
+ if idx0 == idx1 {
+ return true;
+ }
+ let src = if list == 0 { &self.ref_list0 } else { &self.ref_list1 };
+ if idx0 >= src.len() || idx1 >= src.len() {
+//panic!("wrong refs");
+ return false;
+ }
+ if let (Some(ref pic0), Some(ref pic1)) = (&src[idx0], &src[idx1]) {
+ pic0.full_id == pic1.full_id
+ } else {
+//panic!("missing pics");
+ false
+ }
+ }
+ }
+}
+
pub struct FrameRefs {
pub ref_pics: Vec<PictureInfo>,
pub cur_refs: SliceRefs,
diff --git a/nihav-itu/src/codecs/h264/types.rs b/nihav-itu/src/codecs/h264/types.rs
index 4cc1fca..1310daa 100644
--- a/nihav-itu/src/codecs/h264/types.rs
+++ b/nihav-itu/src/codecs/h264/types.rs
@@ -1,9 +1,31 @@
-use nihav_core::frame::NASimpleVideoFrame;
+use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame};
use nihav_codec_support::codecs::{MV, ZERO_MV};
use nihav_codec_support::data::GenericCache;
-use super::SliceRefs;
+use super::SimplifiedSliceRefs;
use super::pic_ref::FrameMBInfo;
+#[derive(Clone,Copy)]
+pub struct SimpleFrame<'a> {
+ pub data: &'a [u8],
+ pub offset: [usize; 3],
+ pub stride: [usize; 3],
+}
+
+impl<'a> SimpleFrame<'a> {
+ pub fn new(buf: &'a NAVideoBuffer<u8>) -> Self {
+ let mut offset = [0; 3];
+ let mut stride = [0; 3];
+ for (plane, (offs, strd)) in offset.iter_mut().zip(stride.iter_mut()).enumerate() {
+ *offs = buf.get_offset(plane);
+ *strd = buf.get_stride(plane);
+ }
+ Self {
+ data: buf.get_data(),
+ offset, stride
+ }
+ }
+}
+
#[repr(u8)]
#[derive(Clone,Copy,Debug,PartialEq)]
pub enum BMode {
@@ -478,7 +500,7 @@ impl SliceState {
}
}
}
- pub fn fill_deblock(&mut self, frefs: &SliceRefs, deblock_mode: u8, is_s: bool) {
+ pub fn fill_deblock(&mut self, frefs: &SimplifiedSliceRefs, deblock_mode: u8, is_s: bool) {
if deblock_mode == 1 {
return;
}
@@ -773,7 +795,7 @@ impl SliceState {
self.fill_mv (0, 0, 16, 16, 0, mv);
self.fill_ref(0, 0, 16, 16, 0, ref_idx);
}
- pub fn predict_direct_mb(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) {
+ pub fn predict_direct_mb(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) {
let (col_mb, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
if direct_8x8 {
for blk4 in 0..16 {
@@ -793,7 +815,7 @@ impl SliceState {
}
}
}
- pub fn predict_direct_sub(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) {
+ pub fn predict_direct_sub(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) {
let src_blk = if !direct8x8 { blk4 } else { BLK4_TO_D8[blk4] };
let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &mbi, r1_poc, r1_long, temporal_mv, cur_id, src_blk);
@@ -801,7 +823,7 @@ impl SliceState {
self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
}
#[allow(clippy::nonminimal_bool)]
- pub fn get_direct_mv(&self, frame_refs: &SliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) {
+ pub fn get_direct_mv(&self, frame_refs: &SimplifiedSliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) {
let blk8 = blk4_to_blk8(blk4);
let (col_mv, r0_poc, col_idx) = if mbi.ref_poc[blk8] == [MISSING_POC; 2] {
(ZERO_MV, MISSING_POC, MISSING_REF)