diff options
author | Mickaƫl Raulet <mraulet@insa-rennes.fr> | 2014-07-15 10:23:20 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-07-15 13:11:33 +0200 |
commit | 1241eb88704f75fe9e7d1de3663aa24d4318cdab (patch) | |
tree | 4e44e5bc5455b86b4fe3f8a586455a5d301550b2 /libavcodec/hevc_filter.c | |
parent | d59536159379a1b8c5f7631025edfc4a7d40b048 (diff) | |
download | ffmpeg-1241eb88704f75fe9e7d1de3663aa24d4318cdab.tar.gz |
hevc: simplify SAO computation, delay from one row its computation
(cherry picked from commit f2c5f647cec786df26f442a85e6d685a131a50c9)
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/hevc_filter.c')
-rw-r--r-- | libavcodec/hevc_filter.c | 225 |
1 files changed, 106 insertions, 119 deletions
diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c index ec7a21187d..f1ba1a3997 100644 --- a/libavcodec/hevc_filter.c +++ b/libavcodec/hevc_filter.c @@ -153,129 +153,97 @@ static void copy_CTB(uint8_t *dst, uint8_t *src, static void sao_filter_CTB(HEVCContext *s, int x, int y) { - // TODO: This should be easily parallelizable - // TODO: skip CBs when (cu_transquant_bypass_flag || (pcm_loop_filter_disable_flag && pcm_flag)) - int c_idx = 0; - int class = 1, class_index; + int c_idx; int edges[4]; // 0 left 1 top 2 right 3 bottom - SAOParams *sao[4]; - int classes[4]; - int x_shift = 0, y_shift = 0; - int x_ctb = x >> s->sps->log2_ctb_size; - int y_ctb = y >> s->sps->log2_ctb_size; - int ctb_addr_rs = y_ctb * s->sps->ctb_width + x_ctb; - int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[ctb_addr_rs]; - + int x_ctb = x >> s->sps->log2_ctb_size; + int y_ctb = y >> s->sps->log2_ctb_size; + int ctb_addr_rs = y_ctb * s->sps->ctb_width + x_ctb; + int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[ctb_addr_rs]; + SAOParams *sao = &CTB(s->sao, x_ctb, y_ctb); // flags indicating unfilterable edges - uint8_t vert_edge[] = { 0, 0, 0, 0 }; - uint8_t horiz_edge[] = { 0, 0, 0, 0 }; - uint8_t diag_edge[] = { 0, 0, 0, 0 }; - uint8_t lfase[3]; // current, above, left - uint8_t no_tile_filter = s->pps->tiles_enabled_flag && - !s->pps->loop_filter_across_tiles_enabled_flag; - uint8_t left_tile_edge = 0; - uint8_t up_tile_edge = 0; - - sao[0] = &CTB(s->sao, x_ctb, y_ctb); + uint8_t vert_edge[] = { 0, 0 }; + uint8_t horiz_edge[] = { 0, 0 }; + uint8_t diag_edge[] = { 0, 0, 0, 0 }; + uint8_t lfase = CTB(s->filter_slice_edges, x_ctb, y_ctb); + uint8_t no_tile_filter = s->pps->tiles_enabled_flag && + !s->pps->loop_filter_across_tiles_enabled_flag; + uint8_t restore = no_tile_filter || !lfase; + uint8_t left_tile_edge = 0; + uint8_t right_tile_edge = 0; + uint8_t up_tile_edge = 0; + uint8_t bottom_tile_edge = 0; + edges[0] = x_ctb == 0; edges[1] = y_ctb == 0; edges[2] = x_ctb == s->sps->ctb_width - 1; edges[3] = y_ctb == s->sps->ctb_height - 1; - lfase[0] = CTB(s->filter_slice_edges, x_ctb, y_ctb); - classes[0] = 0; - - if (!edges[0]) { - left_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]]; - sao[class] = &CTB(s->sao, x_ctb - 1, y_ctb); - vert_edge[0] = (!lfase[0] && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb)) || left_tile_edge; - vert_edge[2] = vert_edge[0]; - lfase[2] = CTB(s->filter_slice_edges, x_ctb - 1, y_ctb); - classes[class] = 2; - class++; - x_shift = 8; - } - - if (!edges[1]) { - up_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]]; - sao[class] = &CTB(s->sao, x_ctb, y_ctb - 1); - horiz_edge[0] = (!lfase[0] && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) || up_tile_edge; - horiz_edge[1] = horiz_edge[0]; - lfase[1] = CTB(s->filter_slice_edges, x_ctb, y_ctb - 1); - classes[class] = 1; - class++; - y_shift = 4; + if (restore) { if (!edges[0]) { - classes[class] = 3; - sao[class] = &CTB(s->sao, x_ctb - 1, y_ctb - 1); - class++; - - // Tile check here is done current CTB row/col, not above/left like you'd expect, - //but that is because the tile boundary always extends through the whole pic - vert_edge[1] = (!lfase[1] && CTB(s->tab_slice_address, x_ctb, y_ctb - 1) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge; - vert_edge[3] = vert_edge[1]; - horiz_edge[2] = (!lfase[2] && CTB(s->tab_slice_address, x_ctb - 1, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || up_tile_edge; - horiz_edge[3] = horiz_edge[2]; - diag_edge[0] = (!lfase[0] && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge || up_tile_edge; - diag_edge[3] = diag_edge[0]; - - // Does left CTB comes after above CTB? - if (CTB(s->tab_slice_address, x_ctb - 1, y_ctb) > - CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) { - diag_edge[2] = !lfase[2] || left_tile_edge || up_tile_edge; - diag_edge[1] = diag_edge[2]; - } else if (CTB(s->tab_slice_address, x_ctb - 1, y_ctb) < - CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) { - diag_edge[1] = !lfase[1] || left_tile_edge || up_tile_edge; - diag_edge[2] = diag_edge[1]; - } else { - // Same slice, only consider tiles - diag_edge[2] = left_tile_edge || up_tile_edge; - diag_edge[1] = diag_edge[2]; - } + left_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]]; + vert_edge[0] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb)) || left_tile_edge; + } + if (!edges[2]) { + right_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1]]; + vert_edge[1] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb)) || right_tile_edge; + } + if (!edges[1]) { + up_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]]; + horiz_edge[0] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) || up_tile_edge; + } + if (!edges[3]) { + bottom_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs + s->sps->ctb_width]]; + horiz_edge[1] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb + 1)) || bottom_tile_edge; + } + if (!edges[0] && !edges[1]) { + diag_edge[0] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge || up_tile_edge; + } + if (!edges[1] && !edges[2]) { + diag_edge[1] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb - 1)) || right_tile_edge || up_tile_edge; + } + if (!edges[2] && !edges[3]) { + diag_edge[2] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb + 1)) || right_tile_edge || bottom_tile_edge; + } + if (!edges[0] && !edges[3]) { + diag_edge[3] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb + 1)) || left_tile_edge || bottom_tile_edge; } } for (c_idx = 0; c_idx < 3; c_idx++) { - int chroma = c_idx ? 1 : 0; - int x0 = x >> chroma; - int y0 = y >> chroma; - int stride = s->frame->linesize[c_idx]; - int ctb_size = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx]; - int width = FFMIN(ctb_size, - (s->sps->width >> s->sps->hshift[c_idx]) - x0); - int height = FFMIN(ctb_size, - (s->sps->height >> s->sps->vshift[c_idx]) - y0); + int x0 = x >> s->sps->hshift[c_idx]; + int y0 = y >> s->sps->vshift[c_idx]; + int stride = s->frame->linesize[c_idx]; + int ctb_size_h = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx]; + int ctb_size_v = (1 << (s->sps->log2_ctb_size)) >> s->sps->vshift[c_idx]; + int width = FFMIN(ctb_size_h, + (s->sps->width >> s->sps->hshift[c_idx]) - x0); + int height = FFMIN(ctb_size_v, + (s->sps->height >> s->sps->vshift[c_idx]) - y0); uint8_t *src = &s->frame->data[c_idx][y0 * stride + (x0 << s->sps->pixel_shift)]; uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride + (x0 << s->sps->pixel_shift)]; - int offset = (y_shift >> chroma) * stride + ((x_shift >> chroma) << s->sps->pixel_shift); - - copy_CTB(dst - offset, src - offset, - (edges[2] ? width + (x_shift >> chroma) : width) << s->sps->pixel_shift, - (edges[3] ? height + (y_shift >> chroma) : height), stride); - - for (class_index = 0; class_index < class; class_index++) { - - switch (sao[class_index]->type_idx[c_idx]) { - case SAO_BAND: - s->hevcdsp.sao_band_filter[classes[class_index]](dst, src, - stride, - sao[class_index], - edges, width, - height, c_idx); - break; - case SAO_EDGE: - s->hevcdsp.sao_edge_filter[classes[class_index]](dst, src, - stride, - sao[class_index], - edges, width, - height, c_idx, - vert_edge[classes[class_index]], - horiz_edge[classes[class_index]], - diag_edge[classes[class_index]]); - break; - } + + switch (sao->type_idx[c_idx]) { + case SAO_BAND: + s->hevcdsp.sao_band_filter(dst, src, + stride, + sao, + edges, width, + height, c_idx); + break; + case SAO_EDGE: + s->hevcdsp.sao_edge_filter[restore](dst, src, + stride, + sao, + edges, width, + height, c_idx, + vert_edge, + horiz_edge, + diag_edge); + break; + default : + copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride); + break; } } } @@ -661,22 +629,41 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, #undef CB #undef CR -void ff_hevc_hls_filter(HEVCContext *s, int x, int y) +void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) { deblocking_filter_CTB(s, x, y); - if (s->sps->sao_enabled) - sao_filter_CTB(s, x, y); + if (s->sps->sao_enabled) { + int x_end = x >= s->sps->width - ctb_size; + int y_end = y >= s->sps->height - ctb_size; + if (y && x) + sao_filter_CTB(s, x - ctb_size, y - ctb_size); + if (x && y_end) + sao_filter_CTB(s, x - ctb_size, y); + if (y && x_end) { + sao_filter_CTB(s, x, y - ctb_size); + if (s->threads_type & FF_THREAD_FRAME ) + ff_thread_report_progress(&s->ref->tf, y - ctb_size, 0); + } + if (x_end && y_end) { + sao_filter_CTB(s, x , y); + if (s->threads_type & FF_THREAD_FRAME ) + ff_thread_report_progress(&s->ref->tf, y, 0); + } + } else { + if (y && x >= s->sps->width - ctb_size) + if (s->threads_type & FF_THREAD_FRAME ) + ff_thread_report_progress(&s->ref->tf, y, 0); + } } void ff_hevc_hls_filters(HEVCContext *s, int x_ctb, int y_ctb, int ctb_size) { + int x_end = x_ctb >= s->sps->width - ctb_size; + int y_end = y_ctb >= s->sps->height - ctb_size; if (y_ctb && x_ctb) - ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb - ctb_size); - if (y_ctb && x_ctb >= s->sps->width - ctb_size) { - ff_hevc_hls_filter(s, x_ctb, y_ctb - ctb_size); - if (s->threads_type == FF_THREAD_FRAME ) - ff_thread_report_progress(&s->ref->tf, y_ctb - ctb_size, 0); - } - if (x_ctb && y_ctb >= s->sps->height - ctb_size) - ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb); + ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb - ctb_size, ctb_size); + if (y_ctb && x_end) + ff_hevc_hls_filter(s, x_ctb, y_ctb - ctb_size, ctb_size); + if (x_ctb && y_end) + ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb, ctb_size); } |