diff options
author | Mickaƫl Raulet <mraulet@insa-rennes.fr> | 2014-07-15 10:23:20 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-07-15 13:11:33 +0200 |
commit | 1241eb88704f75fe9e7d1de3663aa24d4318cdab (patch) | |
tree | 4e44e5bc5455b86b4fe3f8a586455a5d301550b2 /libavcodec | |
parent | d59536159379a1b8c5f7631025edfc4a7d40b048 (diff) | |
download | ffmpeg-1241eb88704f75fe9e7d1de3663aa24d4318cdab.tar.gz |
hevc: simplify SAO computation, delay from one row its computation
(cherry picked from commit f2c5f647cec786df26f442a85e6d685a131a50c9)
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/hevc.c | 4 | ||||
-rw-r--r-- | libavcodec/hevc.h | 2 | ||||
-rw-r--r-- | libavcodec/hevc_filter.c | 225 | ||||
-rw-r--r-- | libavcodec/hevcdsp.c | 12 | ||||
-rw-r--r-- | libavcodec/hevcdsp.h | 15 | ||||
-rw-r--r-- | libavcodec/hevcdsp_template.c | 389 |
6 files changed, 194 insertions, 453 deletions
diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c index cc36f97782..00db01e9f9 100644 --- a/libavcodec/hevc.c +++ b/libavcodec/hevc.c @@ -2092,7 +2092,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) if (x_ctb + ctb_size >= s->sps->width && y_ctb + ctb_size >= s->sps->height) - ff_hevc_hls_filter(s, x_ctb, y_ctb); + ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size); return ctb_addr_ts; } @@ -2167,7 +2167,7 @@ static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int } if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) { - ff_hevc_hls_filter(s, x_ctb, y_ctb); + ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size); ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP); return ctb_addr_ts; } diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h index fea2d4ddb2..8d28490457 100644 --- a/libavcodec/hevc.h +++ b/libavcodec/hevc.h @@ -1001,7 +1001,7 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, int log2_trafo_size); int ff_hevc_cu_qp_delta_sign_flag(HEVCContext *s); int ff_hevc_cu_qp_delta_abs(HEVCContext *s); -void ff_hevc_hls_filter(HEVCContext *s, int x, int y); +void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size); void ff_hevc_hls_filters(HEVCContext *s, int x_ctb, int y_ctb, int ctb_size); void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, int log2_trafo_size, enum ScanType scan_idx, diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c index ec7a21187d..f1ba1a3997 100644 --- a/libavcodec/hevc_filter.c +++ b/libavcodec/hevc_filter.c @@ -153,129 +153,97 @@ static void copy_CTB(uint8_t *dst, uint8_t *src, static void sao_filter_CTB(HEVCContext *s, int x, int y) { - // TODO: This should be easily parallelizable - // TODO: skip CBs when (cu_transquant_bypass_flag || (pcm_loop_filter_disable_flag && pcm_flag)) - int c_idx = 0; - int class = 1, class_index; + int c_idx; int edges[4]; // 0 left 1 top 2 right 3 bottom - SAOParams *sao[4]; - int classes[4]; - int x_shift = 0, y_shift = 0; - int x_ctb = x >> s->sps->log2_ctb_size; - int y_ctb = y >> s->sps->log2_ctb_size; - int ctb_addr_rs = y_ctb * s->sps->ctb_width + x_ctb; - int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[ctb_addr_rs]; - + int x_ctb = x >> s->sps->log2_ctb_size; + int y_ctb = y >> s->sps->log2_ctb_size; + int ctb_addr_rs = y_ctb * s->sps->ctb_width + x_ctb; + int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[ctb_addr_rs]; + SAOParams *sao = &CTB(s->sao, x_ctb, y_ctb); // flags indicating unfilterable edges - uint8_t vert_edge[] = { 0, 0, 0, 0 }; - uint8_t horiz_edge[] = { 0, 0, 0, 0 }; - uint8_t diag_edge[] = { 0, 0, 0, 0 }; - uint8_t lfase[3]; // current, above, left - uint8_t no_tile_filter = s->pps->tiles_enabled_flag && - !s->pps->loop_filter_across_tiles_enabled_flag; - uint8_t left_tile_edge = 0; - uint8_t up_tile_edge = 0; - - sao[0] = &CTB(s->sao, x_ctb, y_ctb); + uint8_t vert_edge[] = { 0, 0 }; + uint8_t horiz_edge[] = { 0, 0 }; + uint8_t diag_edge[] = { 0, 0, 0, 0 }; + uint8_t lfase = CTB(s->filter_slice_edges, x_ctb, y_ctb); + uint8_t no_tile_filter = s->pps->tiles_enabled_flag && + !s->pps->loop_filter_across_tiles_enabled_flag; + uint8_t restore = no_tile_filter || !lfase; + uint8_t left_tile_edge = 0; + uint8_t right_tile_edge = 0; + uint8_t up_tile_edge = 0; + uint8_t bottom_tile_edge = 0; + edges[0] = x_ctb == 0; edges[1] = y_ctb == 0; edges[2] = x_ctb == s->sps->ctb_width - 1; edges[3] = y_ctb == s->sps->ctb_height - 1; - lfase[0] = CTB(s->filter_slice_edges, x_ctb, y_ctb); - classes[0] = 0; - - if (!edges[0]) { - left_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]]; - sao[class] = &CTB(s->sao, x_ctb - 1, y_ctb); - vert_edge[0] = (!lfase[0] && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb)) || left_tile_edge; - vert_edge[2] = vert_edge[0]; - lfase[2] = CTB(s->filter_slice_edges, x_ctb - 1, y_ctb); - classes[class] = 2; - class++; - x_shift = 8; - } - - if (!edges[1]) { - up_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]]; - sao[class] = &CTB(s->sao, x_ctb, y_ctb - 1); - horiz_edge[0] = (!lfase[0] && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) || up_tile_edge; - horiz_edge[1] = horiz_edge[0]; - lfase[1] = CTB(s->filter_slice_edges, x_ctb, y_ctb - 1); - classes[class] = 1; - class++; - y_shift = 4; + if (restore) { if (!edges[0]) { - classes[class] = 3; - sao[class] = &CTB(s->sao, x_ctb - 1, y_ctb - 1); - class++; - - // Tile check here is done current CTB row/col, not above/left like you'd expect, - //but that is because the tile boundary always extends through the whole pic - vert_edge[1] = (!lfase[1] && CTB(s->tab_slice_address, x_ctb, y_ctb - 1) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge; - vert_edge[3] = vert_edge[1]; - horiz_edge[2] = (!lfase[2] && CTB(s->tab_slice_address, x_ctb - 1, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || up_tile_edge; - horiz_edge[3] = horiz_edge[2]; - diag_edge[0] = (!lfase[0] && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge || up_tile_edge; - diag_edge[3] = diag_edge[0]; - - // Does left CTB comes after above CTB? - if (CTB(s->tab_slice_address, x_ctb - 1, y_ctb) > - CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) { - diag_edge[2] = !lfase[2] || left_tile_edge || up_tile_edge; - diag_edge[1] = diag_edge[2]; - } else if (CTB(s->tab_slice_address, x_ctb - 1, y_ctb) < - CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) { - diag_edge[1] = !lfase[1] || left_tile_edge || up_tile_edge; - diag_edge[2] = diag_edge[1]; - } else { - // Same slice, only consider tiles - diag_edge[2] = left_tile_edge || up_tile_edge; - diag_edge[1] = diag_edge[2]; - } + left_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]]; + vert_edge[0] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb)) || left_tile_edge; + } + if (!edges[2]) { + right_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1]]; + vert_edge[1] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb)) || right_tile_edge; + } + if (!edges[1]) { + up_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]]; + horiz_edge[0] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) || up_tile_edge; + } + if (!edges[3]) { + bottom_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs + s->sps->ctb_width]]; + horiz_edge[1] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb + 1)) || bottom_tile_edge; + } + if (!edges[0] && !edges[1]) { + diag_edge[0] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge || up_tile_edge; + } + if (!edges[1] && !edges[2]) { + diag_edge[1] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb - 1)) || right_tile_edge || up_tile_edge; + } + if (!edges[2] && !edges[3]) { + diag_edge[2] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb + 1)) || right_tile_edge || bottom_tile_edge; + } + if (!edges[0] && !edges[3]) { + diag_edge[3] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb + 1)) || left_tile_edge || bottom_tile_edge; } } for (c_idx = 0; c_idx < 3; c_idx++) { - int chroma = c_idx ? 1 : 0; - int x0 = x >> chroma; - int y0 = y >> chroma; - int stride = s->frame->linesize[c_idx]; - int ctb_size = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx]; - int width = FFMIN(ctb_size, - (s->sps->width >> s->sps->hshift[c_idx]) - x0); - int height = FFMIN(ctb_size, - (s->sps->height >> s->sps->vshift[c_idx]) - y0); + int x0 = x >> s->sps->hshift[c_idx]; + int y0 = y >> s->sps->vshift[c_idx]; + int stride = s->frame->linesize[c_idx]; + int ctb_size_h = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx]; + int ctb_size_v = (1 << (s->sps->log2_ctb_size)) >> s->sps->vshift[c_idx]; + int width = FFMIN(ctb_size_h, + (s->sps->width >> s->sps->hshift[c_idx]) - x0); + int height = FFMIN(ctb_size_v, + (s->sps->height >> s->sps->vshift[c_idx]) - y0); uint8_t *src = &s->frame->data[c_idx][y0 * stride + (x0 << s->sps->pixel_shift)]; uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride + (x0 << s->sps->pixel_shift)]; - int offset = (y_shift >> chroma) * stride + ((x_shift >> chroma) << s->sps->pixel_shift); - - copy_CTB(dst - offset, src - offset, - (edges[2] ? width + (x_shift >> chroma) : width) << s->sps->pixel_shift, - (edges[3] ? height + (y_shift >> chroma) : height), stride); - - for (class_index = 0; class_index < class; class_index++) { - - switch (sao[class_index]->type_idx[c_idx]) { - case SAO_BAND: - s->hevcdsp.sao_band_filter[classes[class_index]](dst, src, - stride, - sao[class_index], - edges, width, - height, c_idx); - break; - case SAO_EDGE: - s->hevcdsp.sao_edge_filter[classes[class_index]](dst, src, - stride, - sao[class_index], - edges, width, - height, c_idx, - vert_edge[classes[class_index]], - horiz_edge[classes[class_index]], - diag_edge[classes[class_index]]); - break; - } + + switch (sao->type_idx[c_idx]) { + case SAO_BAND: + s->hevcdsp.sao_band_filter(dst, src, + stride, + sao, + edges, width, + height, c_idx); + break; + case SAO_EDGE: + s->hevcdsp.sao_edge_filter[restore](dst, src, + stride, + sao, + edges, width, + height, c_idx, + vert_edge, + horiz_edge, + diag_edge); + break; + default : + copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride); + break; } } } @@ -661,22 +629,41 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, #undef CB #undef CR -void ff_hevc_hls_filter(HEVCContext *s, int x, int y) +void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) { deblocking_filter_CTB(s, x, y); - if (s->sps->sao_enabled) - sao_filter_CTB(s, x, y); + if (s->sps->sao_enabled) { + int x_end = x >= s->sps->width - ctb_size; + int y_end = y >= s->sps->height - ctb_size; + if (y && x) + sao_filter_CTB(s, x - ctb_size, y - ctb_size); + if (x && y_end) + sao_filter_CTB(s, x - ctb_size, y); + if (y && x_end) { + sao_filter_CTB(s, x, y - ctb_size); + if (s->threads_type & FF_THREAD_FRAME ) + ff_thread_report_progress(&s->ref->tf, y - ctb_size, 0); + } + if (x_end && y_end) { + sao_filter_CTB(s, x , y); + if (s->threads_type & FF_THREAD_FRAME ) + ff_thread_report_progress(&s->ref->tf, y, 0); + } + } else { + if (y && x >= s->sps->width - ctb_size) + if (s->threads_type & FF_THREAD_FRAME ) + ff_thread_report_progress(&s->ref->tf, y, 0); + } } void ff_hevc_hls_filters(HEVCContext *s, int x_ctb, int y_ctb, int ctb_size) { + int x_end = x_ctb >= s->sps->width - ctb_size; + int y_end = y_ctb >= s->sps->height - ctb_size; if (y_ctb && x_ctb) - ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb - ctb_size); - if (y_ctb && x_ctb >= s->sps->width - ctb_size) { - ff_hevc_hls_filter(s, x_ctb, y_ctb - ctb_size); - if (s->threads_type == FF_THREAD_FRAME ) - ff_thread_report_progress(&s->ref->tf, y_ctb - ctb_size, 0); - } - if (x_ctb && y_ctb >= s->sps->height - ctb_size) - ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb); + ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb - ctb_size, ctb_size); + if (y_ctb && x_end) + ff_hevc_hls_filter(s, x_ctb, y_ctb - ctb_size, ctb_size); + if (x_ctb && y_end) + ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb, ctb_size); } diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c index 061651c3aa..d89d6db3e7 100644 --- a/libavcodec/hevcdsp.c +++ b/libavcodec/hevcdsp.c @@ -207,15 +207,9 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) hevcdsp->transform_dc_add[2] = FUNC(transform_16x16_dc_add, depth); \ hevcdsp->transform_dc_add[3] = FUNC(transform_32x32_dc_add, depth); \ \ - hevcdsp->sao_band_filter[0] = FUNC(sao_band_filter_0, depth); \ - hevcdsp->sao_band_filter[1] = FUNC(sao_band_filter_1, depth); \ - hevcdsp->sao_band_filter[2] = FUNC(sao_band_filter_2, depth); \ - hevcdsp->sao_band_filter[3] = FUNC(sao_band_filter_3, depth); \ - \ - hevcdsp->sao_edge_filter[0] = FUNC(sao_edge_filter_0, depth); \ - hevcdsp->sao_edge_filter[1] = FUNC(sao_edge_filter_1, depth); \ - hevcdsp->sao_edge_filter[2] = FUNC(sao_edge_filter_2, depth); \ - hevcdsp->sao_edge_filter[3] = FUNC(sao_edge_filter_3, depth); \ + hevcdsp->sao_band_filter = FUNC(sao_band_filter_0, depth); \ + hevcdsp->sao_edge_filter[0] = FUNC(sao_edge_filter_0, depth); \ + hevcdsp->sao_edge_filter[1] = FUNC(sao_edge_filter_1, depth); \ \ QPEL_FUNCS(depth); \ QPEL_UNI_FUNCS(depth); \ diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h index b16251604d..3a76fc4937 100644 --- a/libavcodec/hevcdsp.h +++ b/libavcodec/hevcdsp.h @@ -54,13 +54,14 @@ typedef struct HEVCDSPContext { void (*transform_dc_add[4])(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); - void (*sao_band_filter[4])(uint8_t *dst, uint8_t *src, ptrdiff_t stride, - struct SAOParams *sao, int *borders, - int width, int height, int c_idx); - void (*sao_edge_filter[4])(uint8_t *dst, uint8_t *src, ptrdiff_t stride, - struct SAOParams *sao, int *borders, int width, - int height, int c_idx, uint8_t vert_edge, - uint8_t horiz_edge, uint8_t diag_edge); + void (*sao_band_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride, + struct SAOParams *sao, int *borders, + int width, int height, int c_idx); + + void (*sao_edge_filter[2])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride, + struct SAOParams *sao, int *borders, int _width, + int _height, int c_idx, uint8_t *vert_edge, + uint8_t *horiz_edge, uint8_t *diag_edge); void (*put_hevc_qpel[10][2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width); diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c index 9ca1236974..ebfb9e818e 100644 --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@ -294,133 +294,87 @@ TRANSFORM_DC_ADD( 8) TRANSFORM_DC_ADD(16) TRANSFORM_DC_ADD(32) - -static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src, +static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, SAOParams *sao, int *borders, int width, int height, - int c_idx, int class) + int c_idx) { pixel *dst = (pixel *)_dst; pixel *src = (pixel *)_src; int offset_table[32] = { 0 }; int k, y, x; - int chroma = !!c_idx; int shift = BIT_DEPTH - 5; int *sao_offset_val = sao->offset_val[c_idx]; int sao_left_class = sao->band_position[c_idx]; - int init_y = 0, init_x = 0; stride /= sizeof(pixel); - switch (class) { - case 0: - if (!borders[2]) - width -= (8 >> chroma) + 2; - if (!borders[3]) - height -= (4 >> chroma) + 2; - break; - case 1: - init_y = -(4 >> chroma) - 2; - if (!borders[2]) - width -= (8 >> chroma) + 2; - height = (4 >> chroma) + 2; - break; - case 2: - init_x = -(8 >> chroma) - 2; - width = (8 >> chroma) + 2; - if (!borders[3]) - height -= (4 >> chroma) + 2; - break; - case 3: - init_y = -(4 >> chroma) - 2; - init_x = -(8 >> chroma) - 2; - width = (8 >> chroma) + 2; - height = (4 >> chroma) + 2; - break; - } - - dst = dst + (init_y * stride + init_x); - src = src + (init_y * stride + init_x); for (k = 0; k < 4; k++) offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1]; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) - dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]); + dst[x] = av_clip_pixel(src[x] + offset_table[av_clip_pixel(src[x] >> shift)]); dst += stride; src += stride; } } -static void FUNC(sao_band_filter_0)(uint8_t *dst, uint8_t *src, - ptrdiff_t stride, SAOParams *sao, - int *borders, int width, int height, - int c_idx) -{ - FUNC(sao_band_filter)(dst, src, stride, sao, borders, - width, height, c_idx, 0); -} +#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1)) -static void FUNC(sao_band_filter_1)(uint8_t *dst, uint8_t *src, - ptrdiff_t stride, SAOParams *sao, - int *borders, int width, int height, - int c_idx) -{ - FUNC(sao_band_filter)(dst, src, stride, sao, borders, - width, height, c_idx, 1); -} +static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, + ptrdiff_t stride, SAOParams *sao, + int width, int height, + int c_idx, int init_x, int init_y) { -static void FUNC(sao_band_filter_2)(uint8_t *dst, uint8_t *src, - ptrdiff_t stride, SAOParams *sao, - int *borders, int width, int height, - int c_idx) -{ - FUNC(sao_band_filter)(dst, src, stride, sao, borders, - width, height, c_idx, 2); -} + static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 }; + static const int8_t pos[4][2][2] = { + { { -1, 0 }, { 1, 0 } }, // horizontal + { { 0, -1 }, { 0, 1 } }, // vertical + { { -1, -1 }, { 1, 1 } }, // 45 degree + { { 1, -1 }, { -1, 1 } }, // 135 degree + }; + int *sao_offset_val = sao->offset_val[c_idx]; + int sao_eo_class = sao->eo_class[c_idx]; + pixel *dst = (pixel *)_dst; + pixel *src = (pixel *)_src; -static void FUNC(sao_band_filter_3)(uint8_t *_dst, uint8_t *_src, - ptrdiff_t stride, SAOParams *sao, - int *borders, int width, int height, - int c_idx) -{ - FUNC(sao_band_filter)(_dst, _src, stride, sao, borders, - width, height, c_idx, 3); + int y_stride = init_y * stride; + int pos_0_0 = pos[sao_eo_class][0][0]; + int pos_0_1 = pos[sao_eo_class][0][1]; + int pos_1_0 = pos[sao_eo_class][1][0]; + int pos_1_1 = pos[sao_eo_class][1][1]; + int x, y; + + int y_stride_0_1 = (init_y + pos_0_1) * stride; + int y_stride_1_1 = (init_y + pos_1_1) * stride; + for (y = init_y; y < height; y++) { + for (x = init_x; x < width; x++) { + int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]); + int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]); + int offset_val = edge_idx[2 + diff0 + diff1]; + dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]); + } + y_stride += stride; + y_stride_0_1 += stride; + y_stride_1_1 += stride; + } } static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, SAOParams *sao, int *borders, int _width, int _height, - int c_idx, uint8_t vert_edge, - uint8_t horiz_edge, uint8_t diag_edge) + int c_idx, uint8_t *vert_edge, + uint8_t *horiz_edge, uint8_t *diag_edge) { int x, y; pixel *dst = (pixel *)_dst; pixel *src = (pixel *)_src; - int chroma = !!c_idx; int *sao_offset_val = sao->offset_val[c_idx]; int sao_eo_class = sao->eo_class[c_idx]; int init_x = 0, init_y = 0, width = _width, height = _height; - static const int8_t pos[4][2][2] = { - { { -1, 0 }, { 1, 0 } }, // horizontal - { { 0, -1 }, { 0, 1 } }, // vertical - { { -1, -1 }, { 1, 1 } }, // 45 degree - { { 1, -1 }, { -1, 1 } }, // 135 degree - }; - static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 }; - -#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1)) - stride /= sizeof(pixel); - if (!borders[2]) - width -= (8 >> chroma) + 2; - if (!borders[3]) - height -= (4 >> chroma) + 2; - - dst = dst + (init_y * stride + init_x); - src = src + (init_y * stride + init_x); - init_y = init_x = 0; if (sao_eo_class != SAO_EO_VERT) { if (borders[0]) { int offset_val = sao_offset_val[0]; @@ -456,78 +410,25 @@ static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src, height--; } } - { - int y_stride = init_y * stride; - int pos_0_0 = pos[sao_eo_class][0][0]; - int pos_0_1 = pos[sao_eo_class][0][1]; - int pos_1_0 = pos[sao_eo_class][1][0]; - int pos_1_1 = pos[sao_eo_class][1][1]; - - int y_stride_0_1 = (init_y + pos_0_1) * stride; - int y_stride_1_1 = (init_y + pos_1_1) * stride; - for (y = init_y; y < height; y++) { - for (x = init_x; x < width; x++) { - int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]); - int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]); - int offset_val = edge_idx[2 + diff0 + diff1]; - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]); - } - y_stride += stride; - y_stride_0_1 += stride; - y_stride_1_1 += stride; - } - } - { - // Restore pixels that can't be modified - int save_upper_left = !diag_edge && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1]; - if (vert_edge && sao_eo_class != SAO_EO_VERT) - for (y = init_y+save_upper_left; y< height; y++) - dst[y*stride] = src[y*stride]; - if(horiz_edge && sao_eo_class != SAO_EO_HORIZ) - for(x = init_x+save_upper_left; x<width; x++) - dst[x] = src[x]; - if(diag_edge && sao_eo_class == SAO_EO_135D) - dst[0] = src[0]; - } - -#undef CMP + FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride, sao, width, height, c_idx, init_x, init_y); } static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, SAOParams *sao, int *borders, int _width, int _height, - int c_idx, uint8_t vert_edge, - uint8_t horiz_edge, uint8_t diag_edge) + int c_idx, uint8_t *vert_edge, + uint8_t *horiz_edge, uint8_t *diag_edge) { int x, y; pixel *dst = (pixel *)_dst; pixel *src = (pixel *)_src; - int chroma = !!c_idx; int *sao_offset_val = sao->offset_val[c_idx]; int sao_eo_class = sao->eo_class[c_idx]; int init_x = 0, init_y = 0, width = _width, height = _height; - static const int8_t pos[4][2][2] = { - { { -1, 0 }, { 1, 0 } }, // horizontal - { { 0, -1 }, { 0, 1 } }, // vertical - { { -1, -1 }, { 1, 1 } }, // 45 degree - { { 1, -1 }, { -1, 1 } }, // 135 degree - }; - static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 }; - -#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1)) - stride /= sizeof(pixel); - init_y = -(4 >> chroma) - 2; - if (!borders[2]) - width -= (8 >> chroma) + 2; - height = (4 >> chroma) + 2; - - dst = dst + (init_y * stride + init_x); - src = src + (init_y * stride + init_x); - init_y = init_x = 0; if (sao_eo_class != SAO_EO_VERT) { if (borders[0]) { int offset_val = sao_offset_val[0]; @@ -548,78 +449,6 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, width--; } } - { - int y_stride = init_y * stride; - int pos_0_0 = pos[sao_eo_class][0][0]; - int pos_0_1 = pos[sao_eo_class][0][1]; - int pos_1_0 = pos[sao_eo_class][1][0]; - int pos_1_1 = pos[sao_eo_class][1][1]; - - int y_stride_0_1 = (init_y + pos_0_1) * stride; - int y_stride_1_1 = (init_y + pos_1_1) * stride; - for (y = init_y; y < height; y++) { - for (x = init_x; x < width; x++) { - int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]); - int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]); - int offset_val = edge_idx[2 + diff0 + diff1]; - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]); - } - y_stride += stride; - y_stride_0_1 += stride; - y_stride_1_1 += stride; - } - } - - { - // Restore pixels that can't be modified - int save_lower_left = !diag_edge && sao_eo_class == SAO_EO_45D && !borders[0]; - if(vert_edge && sao_eo_class != SAO_EO_VERT) - for(y = init_y; y< height-save_lower_left; y++) - dst[y*stride] = src[y*stride]; - if(horiz_edge && sao_eo_class != SAO_EO_HORIZ) - for(x = init_x+save_lower_left; x<width; x++) - dst[(height-1)*stride+x] = src[(height-1)*stride+x]; - if(diag_edge && sao_eo_class == SAO_EO_45D) - dst[stride*(height-1)] = src[stride*(height-1)]; - } - -#undef CMP -} - -static void FUNC(sao_edge_filter_2)(uint8_t *_dst, uint8_t *_src, - ptrdiff_t stride, SAOParams *sao, - int *borders, int _width, int _height, - int c_idx, uint8_t vert_edge, - uint8_t horiz_edge, uint8_t diag_edge) -{ - int x, y; - pixel *dst = (pixel *)_dst; - pixel *src = (pixel *)_src; - int chroma = !!c_idx; - int *sao_offset_val = sao->offset_val[c_idx]; - int sao_eo_class = sao->eo_class[c_idx]; - int init_x = 0, init_y = 0, width = _width, height = _height; - - static const int8_t pos[4][2][2] = { - { { -1, 0 }, { 1, 0 } }, // horizontal - { { 0, -1 }, { 0, 1 } }, // vertical - { { -1, -1 }, { 1, 1 } }, // 45 degree - { { 1, -1 }, { -1, 1 } }, // 135 degree - }; - static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 }; - -#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1)) - - stride /= sizeof(pixel); - - init_x = -(8 >> chroma) - 2; - width = (8 >> chroma) + 2; - if (!borders[3]) - height -= (4 >> chroma) + 2; - - dst = dst + (init_y * stride + init_x); - src = src + (init_y * stride + init_x); - init_y = init_x = 0; if (sao_eo_class != SAO_EO_HORIZ) { if (borders[1]) { int offset_val = sao_offset_val[0]; @@ -635,117 +464,47 @@ static void FUNC(sao_edge_filter_2)(uint8_t *_dst, uint8_t *_src, height--; } } - { - int y_stride = init_y * stride; - int pos_0_0 = pos[sao_eo_class][0][0]; - int pos_0_1 = pos[sao_eo_class][0][1]; - int pos_1_0 = pos[sao_eo_class][1][0]; - int pos_1_1 = pos[sao_eo_class][1][1]; - - int y_stride_0_1 = (init_y + pos_0_1) * stride; - int y_stride_1_1 = (init_y + pos_1_1) * stride; - for (y = init_y; y < height; y++) { - for (x = init_x; x < width; x++) { - int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]); - int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]); - int offset_val = edge_idx[2 + diff0 + diff1]; - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]); - } - y_stride += stride; - y_stride_0_1 += stride; - y_stride_1_1 += stride; - } - } - { - // Restore pixels that can't be modified - int save_upper_right = !diag_edge && sao_eo_class == SAO_EO_45D && !borders[1]; - if(vert_edge && sao_eo_class != SAO_EO_VERT) - for(y = init_y+save_upper_right; y< height; y++) - dst[y*stride+width-1] = src[y*stride+width-1]; - if(horiz_edge && sao_eo_class != SAO_EO_HORIZ) - for(x = init_x; x<width-save_upper_right; x++) - dst[x] = src[x]; - if(diag_edge && sao_eo_class == SAO_EO_45D) - dst[width-1] = src[width-1]; - } -#undef CMP -} - -static void FUNC(sao_edge_filter_3)(uint8_t *_dst, uint8_t *_src, - ptrdiff_t stride, SAOParams *sao, - int *borders, int _width, int _height, - int c_idx, uint8_t vert_edge, - uint8_t horiz_edge, uint8_t diag_edge) -{ - int x, y; - pixel *dst = (pixel *)_dst; - pixel *src = (pixel *)_src; - int chroma = !!c_idx; - int *sao_offset_val = sao->offset_val[c_idx]; - int sao_eo_class = sao->eo_class[c_idx]; - int init_x = 0, init_y = 0, width = _width, height = _height; - - static const int8_t pos[4][2][2] = { - { { -1, 0 }, { 1, 0 } }, // horizontal - { { 0, -1 }, { 0, 1 } }, // vertical - { { -1, -1 }, { 1, 1 } }, // 45 degree - { { 1, -1 }, { -1, 1 } }, // 135 degree - }; - static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 }; - -#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1)) - - stride /= sizeof(pixel); - - init_y = -(4 >> chroma) - 2; - init_x = -(8 >> chroma) - 2; - width = (8 >> chroma) + 2; - height = (4 >> chroma) + 2; - - - dst = dst + (init_y * stride + init_x); - src = src + (init_y * stride + init_x); - init_y = init_x = 0; + FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride, sao, width, height, c_idx, init_x, init_y); { - int y_stride = init_y * stride; - int pos_0_0 = pos[sao_eo_class][0][0]; - int pos_0_1 = pos[sao_eo_class][0][1]; - int pos_1_0 = pos[sao_eo_class][1][0]; - int pos_1_1 = pos[sao_eo_class][1][1]; - - int y_stride_0_1 = (init_y + pos_0_1) * stride; - int y_stride_1_1 = (init_y + pos_1_1) * stride; - - for (y = init_y; y < height; y++) { - for (x = init_x; x < width; x++) { - int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]); - int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]); - int offset_val = edge_idx[2 + diff0 + diff1]; - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]); - } - y_stride += stride; - y_stride_0_1 += stride; - y_stride_1_1 += stride; - } - } + int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1]; + int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2]; + int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3]; + int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3]; - { // Restore pixels that can't be modified - int save_lower_right = !diag_edge && sao_eo_class == SAO_EO_135D; - if(vert_edge && sao_eo_class != SAO_EO_VERT) - for(y = init_y; y< height-save_lower_right; y++) + if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) { + for(y = init_y+save_upper_left; y< height-save_lower_left; y++) + dst[y*stride] = src[y*stride]; + } + if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) { + for(y = init_y+save_upper_right; y< height-save_lower_right; y++) dst[y*stride+width-1] = src[y*stride+width-1]; - if(horiz_edge && sao_eo_class != SAO_EO_HORIZ) - for(x = init_x; x<width-save_lower_right; x++) + } + + if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) { + for(x = init_x+save_upper_left; x < width-save_upper_right; x++) + dst[x] = src[x]; + } + if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) { + for(x = init_x+save_lower_left; x < width-save_lower_right; x++) dst[(height-1)*stride+x] = src[(height-1)*stride+x]; - if(diag_edge && sao_eo_class == SAO_EO_135D) + } + if(diag_edge[0] && sao_eo_class == SAO_EO_135D) + dst[0] = src[0]; + if(diag_edge[1] && sao_eo_class == SAO_EO_45D) + dst[width-1] = src[width-1]; + if(diag_edge[2] && sao_eo_class == SAO_EO_135D) dst[stride*(height-1)+width-1] = src[stride*(height-1)+width-1]; + if(diag_edge[3] && sao_eo_class == SAO_EO_45D) + dst[stride*(height-1)] = src[stride*(height-1)]; + } -#undef CMP } +#undef CMP + #undef SET #undef SCALE #undef ADD_AND_SCALE |