aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/hevcdsp_template.c
diff options
context:
space:
mode:
authorMickaƫl Raulet <mraulet@insa-rennes.fr>2014-07-16 23:15:32 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-07-18 22:46:50 +0200
commitd249e6828e8a84758010ec020a84dfcd156b585e (patch)
tree14a2103e8c7bd59d1082915c3c7e277e1b974108 /libavcodec/hevcdsp_template.c
parent2897447ddf805edc0a7935ab633c28e29a89b7b3 (diff)
downloadffmpeg-d249e6828e8a84758010ec020a84dfcd156b585e.tar.gz
hevc/sao: optimze sao implementation
- adding one extra pixel all around the frame - do not copy when SAO is not applied 5% improvement cherry picked from commit 10fc29fc19a12c4d8168fbe1a954b76386db12d0 Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/hevcdsp_template.c')
-rw-r--r--libavcodec/hevcdsp_template.c91
1 files changed, 46 insertions, 45 deletions
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c
index f0bb256b4b..7840ec7472 100644
--- a/libavcodec/hevcdsp_template.c
+++ b/libavcodec/hevcdsp_template.c
@@ -329,7 +329,7 @@ IDCT_DC(32)
#undef ADD_AND_SCALE
static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src,
- ptrdiff_t stride, SAOParams *sao,
+ ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
int *borders, int width, int height,
int c_idx)
{
@@ -341,22 +341,23 @@ static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src,
int *sao_offset_val = sao->offset_val[c_idx];
int sao_left_class = sao->band_position[c_idx];
- stride /= sizeof(pixel);
+ stride_dst /= sizeof(pixel);
+ stride_src /= sizeof(pixel);
for (k = 0; k < 4; k++)
offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++)
- dst[x] = av_clip_pixel(src[x] + offset_table[av_clip_pixel(src[x] >> shift)]);
- dst += stride;
- src += stride;
+ dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
+ dst += stride_dst;
+ src += stride_src;
}
}
#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src,
- ptrdiff_t stride, SAOParams *sao,
+ ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
int width, int height,
int c_idx, int init_x, int init_y) {
@@ -372,30 +373,32 @@ static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src,
pixel *dst = (pixel *)_dst;
pixel *src = (pixel *)_src;
- int y_stride = init_y * stride;
+ int y_stride_src = init_y * stride_src;
+ int y_stride_dst = init_y * stride_dst;
int pos_0_0 = pos[sao_eo_class][0][0];
int pos_0_1 = pos[sao_eo_class][0][1];
int pos_1_0 = pos[sao_eo_class][1][0];
int pos_1_1 = pos[sao_eo_class][1][1];
int x, y;
- int y_stride_0_1 = (init_y + pos_0_1) * stride;
- int y_stride_1_1 = (init_y + pos_1_1) * stride;
+ int y_stride_0_1 = (init_y + pos_0_1) * stride_src;
+ int y_stride_1_1 = (init_y + pos_1_1) * stride_src;
for (y = init_y; y < height; y++) {
for (x = init_x; x < width; x++) {
- int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
- int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
- int offset_val = edge_idx[2 + diff0 + diff1];
- dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
+ int diff0 = CMP(src[x + y_stride_src], src[x + pos_0_0 + y_stride_0_1]);
+ int diff1 = CMP(src[x + y_stride_src], src[x + pos_1_0 + y_stride_1_1]);
+ int offset_val = edge_idx[2 + diff0 + diff1];
+ dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + sao_offset_val[offset_val]);
}
- y_stride += stride;
- y_stride_0_1 += stride;
- y_stride_1_1 += stride;
+ y_stride_src += stride_src;
+ y_stride_dst += stride_dst;
+ y_stride_0_1 += stride_src;
+ y_stride_1_1 += stride_src;
}
}
static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
- ptrdiff_t stride, SAOParams *sao,
+ ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
int *borders, int _width, int _height,
int c_idx, uint8_t *vert_edge,
uint8_t *horiz_edge, uint8_t *diag_edge)
@@ -407,24 +410,22 @@ static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
int sao_eo_class = sao->eo_class[c_idx];
int init_x = 0, init_y = 0, width = _width, height = _height;
- stride /= sizeof(pixel);
+ stride_dst /= sizeof(pixel);
+ stride_src /= sizeof(pixel);
if (sao_eo_class != SAO_EO_VERT) {
if (borders[0]) {
int offset_val = sao_offset_val[0];
- int y_stride = 0;
for (y = 0; y < height; y++) {
- dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
- y_stride += stride;
+ dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
}
init_x = 1;
}
if (borders[2]) {
int offset_val = sao_offset_val[0];
- int x_stride = width - 1;
+ int offset = width - 1;
for (x = 0; x < height; x++) {
- dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
- x_stride += stride;
+ dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
}
width--;
}
@@ -437,19 +438,20 @@ static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
init_y = 1;
}
if (borders[3]) {
- int offset_val = sao_offset_val[0];
- int y_stride = stride * (height - 1);
+ int offset_val = sao_offset_val[0];
+ int y_stride_dst = stride_dst * (height - 1);
+ int y_stride_src = stride_src * (height - 1);
for (x = init_x; x < width; x++)
- dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
+ dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
height--;
}
}
- FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride, sao, width, height, c_idx, init_x, init_y);
+ FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
}
static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
- ptrdiff_t stride, SAOParams *sao,
+ ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
int *borders, int _width, int _height,
int c_idx, uint8_t *vert_edge,
uint8_t *horiz_edge, uint8_t *diag_edge)
@@ -461,24 +463,22 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
int sao_eo_class = sao->eo_class[c_idx];
int init_x = 0, init_y = 0, width = _width, height = _height;
- stride /= sizeof(pixel);
+ stride_dst /= sizeof(pixel);
+ stride_src /= sizeof(pixel);
if (sao_eo_class != SAO_EO_VERT) {
if (borders[0]) {
int offset_val = sao_offset_val[0];
- int y_stride = 0;
for (y = 0; y < height; y++) {
- dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
- y_stride += stride;
+ dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
}
init_x = 1;
}
if (borders[2]) {
int offset_val = sao_offset_val[0];
- int x_stride = width - 1;
+ int offset = width - 1;
for (x = 0; x < height; x++) {
- dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
- x_stride += stride;
+ dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
}
width--;
}
@@ -491,15 +491,16 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
init_y = 1;
}
if (borders[3]) {
- int offset_val = sao_offset_val[0];
- int y_stride = stride * (height - 1);
+ int offset_val = sao_offset_val[0];
+ int y_stride_dst = stride_dst * (height - 1);
+ int y_stride_src = stride_src * (height - 1);
for (x = init_x; x < width; x++)
- dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
+ dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
height--;
}
}
- FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride, sao, width, height, c_idx, init_x, init_y);
+ FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
{
int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
@@ -510,11 +511,11 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
// Restore pixels that can't be modified
if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
- dst[y*stride] = src[y*stride];
+ dst[y*stride_dst] = src[y*stride_src];
}
if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
- dst[y*stride+width-1] = src[y*stride+width-1];
+ dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
}
if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
@@ -523,16 +524,16 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
}
if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
- dst[(height-1)*stride+x] = src[(height-1)*stride+x];
+ dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
}
if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
dst[0] = src[0];
if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
dst[width-1] = src[width-1];
if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
- dst[stride*(height-1)+width-1] = src[stride*(height-1)+width-1];
+ dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
- dst[stride*(height-1)] = src[stride*(height-1)];
+ dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
}
}