diff options
author | Mickaƫl Raulet <mraulet@insa-rennes.fr> | 2014-07-16 23:15:32 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-07-18 22:46:50 +0200 |
commit | d249e6828e8a84758010ec020a84dfcd156b585e (patch) | |
tree | 14a2103e8c7bd59d1082915c3c7e277e1b974108 /libavcodec/hevcdsp_template.c | |
parent | 2897447ddf805edc0a7935ab633c28e29a89b7b3 (diff) | |
download | ffmpeg-d249e6828e8a84758010ec020a84dfcd156b585e.tar.gz |
hevc/sao: optimze sao implementation
- adding one extra pixel all around the frame
- do not copy when SAO is not applied
5% improvement
cherry picked from commit 10fc29fc19a12c4d8168fbe1a954b76386db12d0
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/hevcdsp_template.c')
-rw-r--r-- | libavcodec/hevcdsp_template.c | 91 |
1 files changed, 46 insertions, 45 deletions
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c index f0bb256b4b..7840ec7472 100644 --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@ -329,7 +329,7 @@ IDCT_DC(32) #undef ADD_AND_SCALE static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src, - ptrdiff_t stride, SAOParams *sao, + ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, int *borders, int width, int height, int c_idx) { @@ -341,22 +341,23 @@ static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src, int *sao_offset_val = sao->offset_val[c_idx]; int sao_left_class = sao->band_position[c_idx]; - stride /= sizeof(pixel); + stride_dst /= sizeof(pixel); + stride_src /= sizeof(pixel); for (k = 0; k < 4; k++) offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1]; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) - dst[x] = av_clip_pixel(src[x] + offset_table[av_clip_pixel(src[x] >> shift)]); - dst += stride; - src += stride; + dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]); + dst += stride_dst; + src += stride_src; } } #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1)) static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, - ptrdiff_t stride, SAOParams *sao, + ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, int width, int height, int c_idx, int init_x, int init_y) { @@ -372,30 +373,32 @@ static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, pixel *dst = (pixel *)_dst; pixel *src = (pixel *)_src; - int y_stride = init_y * stride; + int y_stride_src = init_y * stride_src; + int y_stride_dst = init_y * stride_dst; int pos_0_0 = pos[sao_eo_class][0][0]; int pos_0_1 = pos[sao_eo_class][0][1]; int pos_1_0 = pos[sao_eo_class][1][0]; int pos_1_1 = pos[sao_eo_class][1][1]; int x, y; - int y_stride_0_1 = (init_y + pos_0_1) * stride; - int y_stride_1_1 = (init_y + pos_1_1) * stride; + int y_stride_0_1 = (init_y + pos_0_1) * stride_src; + int y_stride_1_1 = (init_y + pos_1_1) * stride_src; for (y = init_y; y < height; y++) { for (x = init_x; x < width; x++) { - int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]); - int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]); - int offset_val = edge_idx[2 + diff0 + diff1]; - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]); + int diff0 = CMP(src[x + y_stride_src], src[x + pos_0_0 + y_stride_0_1]); + int diff1 = CMP(src[x + y_stride_src], src[x + pos_1_0 + y_stride_1_1]); + int offset_val = edge_idx[2 + diff0 + diff1]; + dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + sao_offset_val[offset_val]); } - y_stride += stride; - y_stride_0_1 += stride; - y_stride_1_1 += stride; + y_stride_src += stride_src; + y_stride_dst += stride_dst; + y_stride_0_1 += stride_src; + y_stride_1_1 += stride_src; } } static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src, - ptrdiff_t stride, SAOParams *sao, + ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, int *borders, int _width, int _height, int c_idx, uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge) @@ -407,24 +410,22 @@ static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src, int sao_eo_class = sao->eo_class[c_idx]; int init_x = 0, init_y = 0, width = _width, height = _height; - stride /= sizeof(pixel); + stride_dst /= sizeof(pixel); + stride_src /= sizeof(pixel); if (sao_eo_class != SAO_EO_VERT) { if (borders[0]) { int offset_val = sao_offset_val[0]; - int y_stride = 0; for (y = 0; y < height; y++) { - dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val); - y_stride += stride; + dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val); } init_x = 1; } if (borders[2]) { int offset_val = sao_offset_val[0]; - int x_stride = width - 1; + int offset = width - 1; for (x = 0; x < height; x++) { - dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val); - x_stride += stride; + dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val); } width--; } @@ -437,19 +438,20 @@ static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src, init_y = 1; } if (borders[3]) { - int offset_val = sao_offset_val[0]; - int y_stride = stride * (height - 1); + int offset_val = sao_offset_val[0]; + int y_stride_dst = stride_dst * (height - 1); + int y_stride_src = stride_src * (height - 1); for (x = init_x; x < width; x++) - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val); + dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); height--; } } - FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride, sao, width, height, c_idx, init_x, init_y); + FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y); } static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, - ptrdiff_t stride, SAOParams *sao, + ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, int *borders, int _width, int _height, int c_idx, uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge) @@ -461,24 +463,22 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, int sao_eo_class = sao->eo_class[c_idx]; int init_x = 0, init_y = 0, width = _width, height = _height; - stride /= sizeof(pixel); + stride_dst /= sizeof(pixel); + stride_src /= sizeof(pixel); if (sao_eo_class != SAO_EO_VERT) { if (borders[0]) { int offset_val = sao_offset_val[0]; - int y_stride = 0; for (y = 0; y < height; y++) { - dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val); - y_stride += stride; + dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val); } init_x = 1; } if (borders[2]) { int offset_val = sao_offset_val[0]; - int x_stride = width - 1; + int offset = width - 1; for (x = 0; x < height; x++) { - dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val); - x_stride += stride; + dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val); } width--; } @@ -491,15 +491,16 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, init_y = 1; } if (borders[3]) { - int offset_val = sao_offset_val[0]; - int y_stride = stride * (height - 1); + int offset_val = sao_offset_val[0]; + int y_stride_dst = stride_dst * (height - 1); + int y_stride_src = stride_src * (height - 1); for (x = init_x; x < width; x++) - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val); + dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); height--; } } - FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride, sao, width, height, c_idx, init_x, init_y); + FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y); { int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1]; @@ -510,11 +511,11 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, // Restore pixels that can't be modified if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) { for(y = init_y+save_upper_left; y< height-save_lower_left; y++) - dst[y*stride] = src[y*stride]; + dst[y*stride_dst] = src[y*stride_src]; } if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) { for(y = init_y+save_upper_right; y< height-save_lower_right; y++) - dst[y*stride+width-1] = src[y*stride+width-1]; + dst[y*stride_dst+width-1] = src[y*stride_src+width-1]; } if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) { @@ -523,16 +524,16 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, } if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) { for(x = init_x+save_lower_left; x < width-save_lower_right; x++) - dst[(height-1)*stride+x] = src[(height-1)*stride+x]; + dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x]; } if(diag_edge[0] && sao_eo_class == SAO_EO_135D) dst[0] = src[0]; if(diag_edge[1] && sao_eo_class == SAO_EO_45D) dst[width-1] = src[width-1]; if(diag_edge[2] && sao_eo_class == SAO_EO_135D) - dst[stride*(height-1)+width-1] = src[stride*(height-1)+width-1]; + dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1]; if(diag_edge[3] && sao_eo_class == SAO_EO_45D) - dst[stride*(height-1)] = src[stride*(height-1)]; + dst[stride_dst*(height-1)] = src[stride_src*(height-1)]; } } |