aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/hevc_filter.c
diff options
context:
space:
mode:
authorFabrice Bellard <fabrice@bellard.org>2015-01-12 23:09:23 +0100
committerMichael Niedermayer <michaelni@gmx.at>2015-02-04 01:54:01 +0100
commitda81cc38e8a90071484508fbdd8fb888dc9a4b7b (patch)
tree51df6e7f3b192aa3a0c189aa70746a68bb74fb92 /libavcodec/hevc_filter.c
parentb737a2c52857b214be246ff615c6293730033cfa (diff)
downloadffmpeg-da81cc38e8a90071484508fbdd8fb888dc9a4b7b.tar.gz
avcodec/hevc: reduce memory for SAO
cherry picked from commit 5d9f79edef2c11b915bdac3a025b59a32082f409 SAO edge filter uses pre-SAO pixel data on the left and top of the ctb, so this data must be kept available. This was done previously by having 2 copies of the frame, one before and one after SAO. This commit reduces the storage to just that, instead of the previous whole frame. Commit message taken from patch by Christophe Gisquet <christophe.gisquet@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/hevc_filter.c')
-rw-r--r--libavcodec/hevc_filter.c184
1 files changed, 173 insertions, 11 deletions
diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c
index 90bf1feb15..b8aa71e25a 100644
--- a/libavcodec/hevc_filter.c
+++ b/libavcodec/hevc_filter.c
@@ -139,7 +139,7 @@ static int get_qPy(HEVCContext *s, int xC, int yC)
return s->qp_y_tab[x + y * s->sps->min_cb_width];
}
-static void copy_CTB(uint8_t *dst, uint8_t *src, int width, int height,
+static void copy_CTB(uint8_t *dst, const uint8_t *src, int width, int height,
intptr_t stride_dst, intptr_t stride_src)
{
int i, j;
@@ -161,13 +161,65 @@ int i, j;
}
}
-static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int height, int c_idx)
+#if defined(USE_SAO_SMALL_BUFFER)
+static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift)
+{
+ if (pixel_shift)
+ *(uint16_t *)dst = *(uint16_t *)src;
+ else
+ *dst = *src;
+
+}
+
+static void copy_vert(uint8_t *dst, const uint8_t *src,
+ int pixel_shift, int height,
+ int stride_dst, int stride_src)
+{
+ int i;
+ if (pixel_shift == 0) {
+ for (i = 0; i < height; i++) {
+ *dst = *src;
+ dst += stride_dst;
+ src += stride_src;
+ }
+ } else {
+ for (i = 0; i < height; i++) {
+ *(uint16_t *)dst = *(uint16_t *)src;
+ dst += stride_dst;
+ src += stride_src;
+ }
+ }
+}
+
+static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src,
+ int stride_src, int x, int y, int width, int height,
+ int c_idx, int x_ctb, int y_ctb)
+{
+ int sh = s->sps->pixel_shift;
+ int w = s->sps->width >> s->sps->hshift[c_idx];
+ int h = s->sps->height >> s->sps->vshift[c_idx];
+
+ /* copy horizontal edges */
+ memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << sh),
+ src, width << sh);
+ memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 1) * w + x) << sh),
+ src + stride_src * (height - 1), width << sh);
+
+ /* copy vertical edges */
+ copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb) * h + y) << sh), src, sh, height, 1 << sh, stride_src);
+
+ copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src);
+}
+#endif
+
+static void restore_tqb_pixels(HEVCContext *s,
+ uint8_t *src1, const uint8_t *dst1,
+ ptrdiff_t stride_src, ptrdiff_t stride_dst,
+ int x0, int y0, int width, int height, int c_idx)
{
if ( s->pps->transquant_bypass_enable_flag ||
(s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) {
int x, y;
- ptrdiff_t stride_dst = s->sao_frame->linesize[c_idx];
- ptrdiff_t stride_src = s->frame->linesize[c_idx];
int min_pu_size = 1 << s->sps->log2_min_pu_size;
int hshift = s->sps->hshift[c_idx];
int vshift = s->sps->vshift[c_idx];
@@ -175,13 +227,13 @@ static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int he
int y_min = ((y0 ) >> s->sps->log2_min_pu_size);
int x_max = ((x0 + width ) >> s->sps->log2_min_pu_size);
int y_max = ((y0 + height) >> s->sps->log2_min_pu_size);
- int len = min_pu_size >> hshift;
+ int len = (min_pu_size >> hshift) << s->sps->pixel_shift;
for (y = y_min; y < y_max; y++) {
for (x = x_min; x < x_max; x++) {
if (s->is_pcm[y * s->sps->min_pu_width + x]) {
int n;
- uint8_t *src = &s->frame->data[c_idx][ ((y << s->sps->log2_min_pu_size) >> vshift) * stride_src + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
- uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride_dst + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
+ uint8_t *src = src1 + (((y << s->sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->sps->log2_min_pu_size) - x0) >> hshift) << s->sps->pixel_shift);
+ const uint8_t *dst = dst1 + (((y << s->sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->sps->log2_min_pu_size) - x0) >> hshift) << s->sps->pixel_shift);
for (n = 0; n < (min_pu_size >> vshift); n++) {
memcpy(src, dst, len);
src += stride_src;
@@ -198,6 +250,7 @@ static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int he
static void sao_filter_CTB(HEVCContext *s, int x, int y)
{
static const uint8_t band_tab[8] = { 0, 1, 2, 2, 3, 3, 4, 4 };
+ HEVCLocalContext *lc = s->HEVClc;
int c_idx;
int edges[4]; // 0 left 1 top 2 right 3 bottom
int x_ctb = x >> s->sps->log2_ctb_size;
@@ -258,27 +311,132 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
int x0 = x >> s->sps->hshift[c_idx];
int y0 = y >> s->sps->vshift[c_idx];
int stride_src = s->frame->linesize[c_idx];
- int stride_dst = s->sao_frame->linesize[c_idx];
int ctb_size_h = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx];
int ctb_size_v = (1 << (s->sps->log2_ctb_size)) >> s->sps->vshift[c_idx];
int width = FFMIN(ctb_size_h, (s->sps->width >> s->sps->hshift[c_idx]) - x0);
int height = FFMIN(ctb_size_v, (s->sps->height >> s->sps->vshift[c_idx]) - y0);
int tab = band_tab[(FFALIGN(width, 8) >> 3) - 1];
uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->sps->pixel_shift)];
+#if defined(USE_SAO_SMALL_BUFFER)
+ int stride_dst = ((1 << (s->sps->log2_ctb_size)) + 2) << s->sps->pixel_shift;
+ uint8_t *dst = lc->sao_pixel_buffer + (1 * stride_dst) + (1 << s->sps->pixel_shift);
+#else
+ int stride_dst = s->sao_frame->linesize[c_idx];
uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride_dst + (x0 << s->sps->pixel_shift)];
+#endif
switch (sao->type_idx[c_idx]) {
case SAO_BAND:
copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride_dst, stride_src);
+#if defined(USE_SAO_SMALL_BUFFER)
+ copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
+ x_ctb, y_ctb);
+#endif
s->hevcdsp.sao_band_filter[tab](src, dst, stride_src, stride_dst,
sao->offset_val[c_idx], sao->band_position[c_idx],
width, height);
- restore_tqb_pixels(s, x, y, width, height, c_idx);
+ restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
+ x, y, width, height, c_idx);
sao->type_idx[c_idx] = SAO_APPLIED;
break;
case SAO_EDGE:
{
- uint8_t left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED);
+#if defined(USE_SAO_SMALL_BUFFER)
+ int w = s->sps->width >> s->sps->hshift[c_idx];
+ int h = s->sps->height >> s->sps->vshift[c_idx];
+ int left_edge = edges[0];
+ int top_edge = edges[1];
+ int right_edge = edges[2];
+ int bottom_edge = edges[3];
+ int sh = s->sps->pixel_shift;
+ int left_pixels, right_pixels;
+
+ if (!top_edge) {
+ int left = 1 - left_edge;
+ int right = 1 - right_edge;
+ const uint8_t *src1[2];
+ uint8_t *dst1;
+ int src_idx, pos;
+
+ dst1 = dst - stride_dst - (left << sh);
+ src1[0] = src - stride_src - (left << sh);
+ src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb - 1) * w + x0 - left) << sh);
+ pos = 0;
+ if (left) {
+ src_idx = (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ copy_pixel(dst1, src1[src_idx], sh);
+ pos += (1 << sh);
+ }
+ src_idx = (CTB(s->sao, x_ctb, y_ctb-1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
+ if (right) {
+ pos += width << sh;
+ src_idx = (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
+ }
+ }
+ if (!bottom_edge) {
+ int left = 1 - left_edge;
+ int right = 1 - right_edge;
+ const uint8_t *src1[2];
+ uint8_t *dst1;
+ int src_idx, pos;
+
+ dst1 = dst + height * stride_dst - (left << sh);
+ src1[0] = src + height * stride_src - (left << sh);
+ src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 2) * w + x0 - left) << sh);
+ pos = 0;
+ if (left) {
+ src_idx = (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ copy_pixel(dst1, src1[src_idx], sh);
+ pos += (1 << sh);
+ }
+ src_idx = (CTB(s->sao, x_ctb, y_ctb+1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
+ if (right) {
+ pos += width << sh;
+ src_idx = (CTB(s->sao, x_ctb+1, y_ctb+1).type_idx[c_idx] ==
+ SAO_APPLIED);
+ copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
+ }
+ }
+ left_pixels = 0;
+ if (!left_edge) {
+ if (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
+ copy_vert(dst - (1 << sh),
+ s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb - 1) * h + y0) << sh),
+ sh, height, stride_dst, 1 << sh);
+ } else {
+ left_pixels = 1;
+ }
+ }
+ right_pixels = 0;
+ if (!right_edge) {
+ if (CTB(s->sao, x_ctb+1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
+ copy_vert(dst + (width << sh),
+ s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 2) * h + y0) << sh),
+ sh, height, stride_dst, 1 << sh);
+ } else {
+ right_pixels = 1;
+ }
+ }
+
+ copy_CTB(dst - (left_pixels << sh),
+ src - (left_pixels << sh),
+ (width + left_pixels + right_pixels) << sh,
+ height, stride_dst, stride_src);
+
+ copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
+ x_ctb, y_ctb);
+#else
+ uint8_t left_pixels;
+ /* get the CTB edge pixels from the SAO pixel buffer */
+ left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED);
if (!edges[1]) {
uint8_t top_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
uint8_t top_right = !edges[2] && (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
@@ -306,6 +464,9 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
copy_CTB(dst - (left_pixels << s->sps->pixel_shift),
src - (left_pixels << s->sps->pixel_shift),
(width + 1 + left_pixels) << s->sps->pixel_shift, height, stride_dst, stride_src);
+#endif
+ /* XXX: could handle the restoration here to simplify the
+ DSP functions */
s->hevcdsp.sao_edge_filter[restore](src, dst,
stride_src, stride_dst,
sao,
@@ -314,7 +475,8 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
vert_edge,
horiz_edge,
diag_edge);
- restore_tqb_pixels(s, x, y, width, height, c_idx);
+ restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
+ x, y, width, height, c_idx);
sao->type_idx[c_idx] = SAO_APPLIED;
break;
}