diff options
author | Mickaƫl Raulet <mraulet@insa-rennes.fr> | 2014-07-16 23:15:32 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-07-18 22:46:50 +0200 |
commit | d249e6828e8a84758010ec020a84dfcd156b585e (patch) | |
tree | 14a2103e8c7bd59d1082915c3c7e277e1b974108 | |
parent | 2897447ddf805edc0a7935ab633c28e29a89b7b3 (diff) | |
download | ffmpeg-d249e6828e8a84758010ec020a84dfcd156b585e.tar.gz |
hevc/sao: optimze sao implementation
- adding one extra pixel all around the frame
- do not copy when SAO is not applied
5% improvement
cherry picked from commit 10fc29fc19a12c4d8168fbe1a954b76386db12d0
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/hevc.c | 27 | ||||
-rw-r--r-- | libavcodec/hevc.h | 1 | ||||
-rw-r--r-- | libavcodec/hevc_filter.c | 66 | ||||
-rw-r--r-- | libavcodec/hevcdsp.h | 4 | ||||
-rw-r--r-- | libavcodec/hevcdsp_template.c | 91 |
5 files changed, 117 insertions, 72 deletions
diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c index afb2baaf65..d466aa6253 100644 --- a/libavcodec/hevc.c +++ b/libavcodec/hevc.c @@ -276,6 +276,24 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb) return 0; } +static int get_buffer_sao(HEVCContext *s, AVFrame *frame) +{ + int ret, i; + + frame->width = s->avctx->width + 2; + frame->height = s->avctx->height + 2; + if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) + return ret; + for (i = 0; frame->data[i]; i++) { + int offset = frame->linesize[i] + 1; + frame->data[i] += offset; + } + frame->width = s->avctx->width; + frame->height = s->avctx->height; + + return 0; +} + static int set_sps(HEVCContext *s, const HEVCSPS *sps) { int ret; @@ -317,10 +335,8 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps) if (sps->sao_enabled) { av_frame_unref(s->tmp_frame); - ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF); - if (ret < 0) - goto fail; - s->frame = s->tmp_frame; + ret = get_buffer_sao(s, s->tmp_frame); + s->sao_frame = s->tmp_frame; } s->sps = sps; @@ -2582,8 +2598,7 @@ static int hevc_frame_start(HEVCContext *s) if (s->pps->tiles_enabled_flag) lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size; - ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame, - s->poc); + ret = ff_hevc_set_new_ref(s, &s->frame, s->poc); if (ret < 0) goto fail; diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h index 2a5ce2523c..c38de1a2ea 100644 --- a/libavcodec/hevc.h +++ b/libavcodec/hevc.h @@ -260,6 +260,7 @@ enum SAOType { SAO_NOT_APPLIED = 0, SAO_BAND, SAO_EDGE, + SAO_APPLIED }; enum SAOEOClass { diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c index 0d3e238d69..b0b197cc66 100644 --- a/libavcodec/hevc_filter.c +++ b/libavcodec/hevc_filter.c @@ -142,14 +142,14 @@ static int get_qPy(HEVCContext *s, int xC, int yC) } static void copy_CTB(uint8_t *dst, uint8_t *src, - int width, int height, int stride) + int width, int height, int stride_dst, int stride_src) { int i; for (i = 0; i < height; i++) { memcpy(dst, src, width); - dst += stride; - src += stride; + dst += stride_dst; + src += stride_src; } } @@ -174,7 +174,7 @@ static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int he uint8_t *src = &s->frame->data[c_idx][ ((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)]; uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)]; for (n = 0; n < (min_pu_size >> vshift); n++) { - memcpy(dst, src, len); + memcpy(src, dst, len); src += stride; dst += stride; } @@ -247,29 +247,58 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y) for (c_idx = 0; c_idx < 3; c_idx++) { int x0 = x >> s->sps->hshift[c_idx]; int y0 = y >> s->sps->vshift[c_idx]; - int stride = s->frame->linesize[c_idx]; + int stride_src = s->frame->linesize[c_idx]; + int stride_dst = s->sao_frame->linesize[c_idx]; int ctb_size_h = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx]; int ctb_size_v = (1 << (s->sps->log2_ctb_size)) >> s->sps->vshift[c_idx]; - int width = FFMIN(ctb_size_h, - (s->sps->width >> s->sps->hshift[c_idx]) - x0); - int height = FFMIN(ctb_size_v, - (s->sps->height >> s->sps->vshift[c_idx]) - y0); - - uint8_t *src = &s->frame->data[c_idx][y0 * stride + (x0 << s->sps->pixel_shift)]; - uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride + (x0 << s->sps->pixel_shift)]; + int width = FFMIN(ctb_size_h, (s->sps->width >> s->sps->hshift[c_idx]) - x0); + int height = FFMIN(ctb_size_v, (s->sps->height >> s->sps->vshift[c_idx]) - y0); + uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->sps->pixel_shift)]; + uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride_dst + (x0 << s->sps->pixel_shift)]; switch (sao->type_idx[c_idx]) { case SAO_BAND: - s->hevcdsp.sao_band_filter(dst, src, - stride, + copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride_dst, stride_src); + s->hevcdsp.sao_band_filter(src, dst, + stride_src, stride_dst, sao, edges, width, height, c_idx); restore_tqb_pixels(s, x, y, width, height, c_idx); + sao->type_idx[c_idx] = SAO_APPLIED; break; case SAO_EDGE: - s->hevcdsp.sao_edge_filter[restore](dst, src, - stride, + { + uint8_t left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED); + if (!edges[1]) { + uint8_t top_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED); + uint8_t top_right = !edges[2] && (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED); + if (CTB(s->sao, x_ctb , y_ctb-1).type_idx[c_idx] == 0) + memcpy( dst - stride_dst - (top_left << s->sps->pixel_shift), + src - stride_src - (top_left << s->sps->pixel_shift), + (top_left + width + top_right) << s->sps->pixel_shift); + else { + if (top_left) + memcpy( dst - stride_dst - (1 << s->sps->pixel_shift), + src - stride_src - (1 << s->sps->pixel_shift), + 1 << s->sps->pixel_shift); + if(top_right) + memcpy( dst - stride_dst + (width << s->sps->pixel_shift), + src - stride_src + (width << s->sps->pixel_shift), + 1 << s->sps->pixel_shift); + } + } + if (!edges[3]) { // bottom and bottom right + uint8_t bottom_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] != SAO_APPLIED); + memcpy( dst + height * stride_dst - (bottom_left << s->sps->pixel_shift), + src + height * stride_src - (bottom_left << s->sps->pixel_shift), + (width + 1 + bottom_left) << s->sps->pixel_shift); + } + copy_CTB(dst - (left_pixels << s->sps->pixel_shift), + src - (left_pixels << s->sps->pixel_shift), + (width + 1 + left_pixels) << s->sps->pixel_shift, height, stride_dst, stride_src); + s->hevcdsp.sao_edge_filter[restore](src, dst, + stride_src, stride_dst, sao, edges, width, height, c_idx, @@ -277,10 +306,9 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y) horiz_edge, diag_edge); restore_tqb_pixels(s, x, y, width, height, c_idx); + sao->type_idx[c_idx] = SAO_APPLIED; break; - default : - copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride); - break; + } } } } diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h index 3b82fabff1..dfff7801f7 100644 --- a/libavcodec/hevcdsp.h +++ b/libavcodec/hevcdsp.h @@ -58,11 +58,11 @@ typedef struct HEVCDSPContext { void (*transform_dc_add[4])(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); - void (*sao_band_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride, + void (*sao_band_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, struct SAOParams *sao, int *borders, int width, int height, int c_idx); - void (*sao_edge_filter[2])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride, + void (*sao_edge_filter[2])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, struct SAOParams *sao, int *borders, int _width, int _height, int c_idx, uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge); diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c index f0bb256b4b..7840ec7472 100644 --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@ -329,7 +329,7 @@ IDCT_DC(32) #undef ADD_AND_SCALE static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src, - ptrdiff_t stride, SAOParams *sao, + ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, int *borders, int width, int height, int c_idx) { @@ -341,22 +341,23 @@ static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src, int *sao_offset_val = sao->offset_val[c_idx]; int sao_left_class = sao->band_position[c_idx]; - stride /= sizeof(pixel); + stride_dst /= sizeof(pixel); + stride_src /= sizeof(pixel); for (k = 0; k < 4; k++) offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1]; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) - dst[x] = av_clip_pixel(src[x] + offset_table[av_clip_pixel(src[x] >> shift)]); - dst += stride; - src += stride; + dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]); + dst += stride_dst; + src += stride_src; } } #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1)) static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, - ptrdiff_t stride, SAOParams *sao, + ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, int width, int height, int c_idx, int init_x, int init_y) { @@ -372,30 +373,32 @@ static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, pixel *dst = (pixel *)_dst; pixel *src = (pixel *)_src; - int y_stride = init_y * stride; + int y_stride_src = init_y * stride_src; + int y_stride_dst = init_y * stride_dst; int pos_0_0 = pos[sao_eo_class][0][0]; int pos_0_1 = pos[sao_eo_class][0][1]; int pos_1_0 = pos[sao_eo_class][1][0]; int pos_1_1 = pos[sao_eo_class][1][1]; int x, y; - int y_stride_0_1 = (init_y + pos_0_1) * stride; - int y_stride_1_1 = (init_y + pos_1_1) * stride; + int y_stride_0_1 = (init_y + pos_0_1) * stride_src; + int y_stride_1_1 = (init_y + pos_1_1) * stride_src; for (y = init_y; y < height; y++) { for (x = init_x; x < width; x++) { - int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]); - int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]); - int offset_val = edge_idx[2 + diff0 + diff1]; - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]); + int diff0 = CMP(src[x + y_stride_src], src[x + pos_0_0 + y_stride_0_1]); + int diff1 = CMP(src[x + y_stride_src], src[x + pos_1_0 + y_stride_1_1]); + int offset_val = edge_idx[2 + diff0 + diff1]; + dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + sao_offset_val[offset_val]); } - y_stride += stride; - y_stride_0_1 += stride; - y_stride_1_1 += stride; + y_stride_src += stride_src; + y_stride_dst += stride_dst; + y_stride_0_1 += stride_src; + y_stride_1_1 += stride_src; } } static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src, - ptrdiff_t stride, SAOParams *sao, + ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, int *borders, int _width, int _height, int c_idx, uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge) @@ -407,24 +410,22 @@ static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src, int sao_eo_class = sao->eo_class[c_idx]; int init_x = 0, init_y = 0, width = _width, height = _height; - stride /= sizeof(pixel); + stride_dst /= sizeof(pixel); + stride_src /= sizeof(pixel); if (sao_eo_class != SAO_EO_VERT) { if (borders[0]) { int offset_val = sao_offset_val[0]; - int y_stride = 0; for (y = 0; y < height; y++) { - dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val); - y_stride += stride; + dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val); } init_x = 1; } if (borders[2]) { int offset_val = sao_offset_val[0]; - int x_stride = width - 1; + int offset = width - 1; for (x = 0; x < height; x++) { - dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val); - x_stride += stride; + dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val); } width--; } @@ -437,19 +438,20 @@ static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src, init_y = 1; } if (borders[3]) { - int offset_val = sao_offset_val[0]; - int y_stride = stride * (height - 1); + int offset_val = sao_offset_val[0]; + int y_stride_dst = stride_dst * (height - 1); + int y_stride_src = stride_src * (height - 1); for (x = init_x; x < width; x++) - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val); + dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); height--; } } - FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride, sao, width, height, c_idx, init_x, init_y); + FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y); } static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, - ptrdiff_t stride, SAOParams *sao, + ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, int *borders, int _width, int _height, int c_idx, uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge) @@ -461,24 +463,22 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, int sao_eo_class = sao->eo_class[c_idx]; int init_x = 0, init_y = 0, width = _width, height = _height; - stride /= sizeof(pixel); + stride_dst /= sizeof(pixel); + stride_src /= sizeof(pixel); if (sao_eo_class != SAO_EO_VERT) { if (borders[0]) { int offset_val = sao_offset_val[0]; - int y_stride = 0; for (y = 0; y < height; y++) { - dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val); - y_stride += stride; + dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val); } init_x = 1; } if (borders[2]) { int offset_val = sao_offset_val[0]; - int x_stride = width - 1; + int offset = width - 1; for (x = 0; x < height; x++) { - dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val); - x_stride += stride; + dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val); } width--; } @@ -491,15 +491,16 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, init_y = 1; } if (borders[3]) { - int offset_val = sao_offset_val[0]; - int y_stride = stride * (height - 1); + int offset_val = sao_offset_val[0]; + int y_stride_dst = stride_dst * (height - 1); + int y_stride_src = stride_src * (height - 1); for (x = init_x; x < width; x++) - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val); + dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); height--; } } - FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride, sao, width, height, c_idx, init_x, init_y); + FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y); { int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1]; @@ -510,11 +511,11 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, // Restore pixels that can't be modified if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) { for(y = init_y+save_upper_left; y< height-save_lower_left; y++) - dst[y*stride] = src[y*stride]; + dst[y*stride_dst] = src[y*stride_src]; } if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) { for(y = init_y+save_upper_right; y< height-save_lower_right; y++) - dst[y*stride+width-1] = src[y*stride+width-1]; + dst[y*stride_dst+width-1] = src[y*stride_src+width-1]; } if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) { @@ -523,16 +524,16 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, } if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) { for(x = init_x+save_lower_left; x < width-save_lower_right; x++) - dst[(height-1)*stride+x] = src[(height-1)*stride+x]; + dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x]; } if(diag_edge[0] && sao_eo_class == SAO_EO_135D) dst[0] = src[0]; if(diag_edge[1] && sao_eo_class == SAO_EO_45D) dst[width-1] = src[width-1]; if(diag_edge[2] && sao_eo_class == SAO_EO_135D) - dst[stride*(height-1)+width-1] = src[stride*(height-1)+width-1]; + dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1]; if(diag_edge[3] && sao_eo_class == SAO_EO_45D) - dst[stride*(height-1)] = src[stride*(height-1)]; + dst[stride_dst*(height-1)] = src[stride_src*(height-1)]; } } |