aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/utvideodec.c
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2016-12-23 15:41:51 +0100
committerPaul B Mahol <onemda@gmail.com>2016-12-23 17:44:01 +0100
commitea93052db3594f93f2d10be085a770184da0513d (patch)
tree6128b1b696d84e4cd78f260b34ecbe4a32e372f8 /libavcodec/utvideodec.c
parent373f21a91126d052cd310d675f43f548a407649e (diff)
downloadffmpeg-ea93052db3594f93f2d10be085a770184da0513d.tar.gz
avcodec/utvideodec: add SIMD support for median prediction for planar formats
~10% faster overall. Signed-off-by: Paul B Mahol <onemda@gmail.com>
Diffstat (limited to 'libavcodec/utvideodec.c')
-rw-r--r--libavcodec/utvideodec.c164
1 files changed, 134 insertions, 30 deletions
diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
index 650c0ec67d..cae0ff5f5c 100644
--- a/libavcodec/utvideodec.c
+++ b/libavcodec/utvideodec.c
@@ -372,8 +372,111 @@ static void restore_rgb_planes10(AVFrame *frame, int width, int height)
}
}
-static void restore_median(uint8_t *src, int step, int stride,
- int width, int height, int slices, int rmode)
+#undef A
+#undef B
+#undef C
+
+static void restore_median_planar(UtvideoContext *c, uint8_t *src, int stride,
+ int width, int height, int slices, int rmode)
+{
+ int i, j, slice;
+ int A, B, C;
+ uint8_t *bsrc;
+ int slice_start, slice_height;
+ const int cmask = ~rmode;
+
+ for (slice = 0; slice < slices; slice++) {
+ slice_start = ((slice * height) / slices) & cmask;
+ slice_height = ((((slice + 1) * height) / slices) & cmask) -
+ slice_start;
+
+ if (!slice_height)
+ continue;
+ bsrc = src + slice_start * stride;
+
+ // first line - left neighbour prediction
+ bsrc[0] += 0x80;
+ c->hdspdec.add_hfyu_left_pred(bsrc, bsrc, width, 0);
+ bsrc += stride;
+ if (slice_height <= 1)
+ continue;
+ // second line - first element has top prediction, the rest uses median
+ C = bsrc[-stride];
+ bsrc[0] += C;
+ A = bsrc[0];
+ for (i = 1; i < width; i++) {
+ B = bsrc[i - stride];
+ bsrc[i] += mid_pred(A, B, (uint8_t)(A + B - C));
+ C = B;
+ A = bsrc[i];
+ }
+ bsrc += stride;
+ // the rest of lines use continuous median prediction
+ for (j = 2; j < slice_height; j++) {
+ c->hdspdec.add_hfyu_median_pred(bsrc, bsrc - stride,
+ bsrc, width, &B, &C);
+ bsrc += stride;
+ }
+ }
+}
+
+/* UtVideo interlaced mode treats every two lines as a single one,
+ * so restoring function should take care of possible padding between
+ * two parts of the same "line".
+ */
+static void restore_median_planar_il(UtvideoContext *c, uint8_t *src, int stride,
+ int width, int height, int slices, int rmode)
+{
+ int i, j, slice;
+ int A, B, C;
+ uint8_t *bsrc;
+ int slice_start, slice_height;
+ const int cmask = ~(rmode ? 3 : 1);
+ const int stride2 = stride << 1;
+
+ for (slice = 0; slice < slices; slice++) {
+ slice_start = ((slice * height) / slices) & cmask;
+ slice_height = ((((slice + 1) * height) / slices) & cmask) -
+ slice_start;
+ slice_height >>= 1;
+ if (!slice_height)
+ continue;
+
+ bsrc = src + slice_start * stride;
+
+ // first line - left neighbour prediction
+ bsrc[0] += 0x80;
+ A = c->hdspdec.add_hfyu_left_pred(bsrc, bsrc, width, 0);
+ c->hdspdec.add_hfyu_left_pred(bsrc + stride, bsrc + stride, width, A);
+ bsrc += stride2;
+ if (slice_height <= 1)
+ continue;
+ // second line - first element has top prediction, the rest uses median
+ C = bsrc[-stride2];
+ bsrc[0] += C;
+ A = bsrc[0];
+ for (i = 1; i < width; i++) {
+ B = bsrc[i - stride2];
+ bsrc[i] += mid_pred(A, B, (uint8_t)(A + B - C));
+ C = B;
+ A = bsrc[i];
+ }
+ c->hdspdec.add_hfyu_median_pred(bsrc + stride, bsrc - stride,
+ bsrc + stride, width, &B, &C);
+ bsrc += stride2;
+ // the rest of lines use continuous median prediction
+ for (j = 2; j < slice_height; j++) {
+ c->hdspdec.add_hfyu_median_pred(bsrc, bsrc - stride2,
+ bsrc, width, &B, &C);
+ c->hdspdec.add_hfyu_median_pred(bsrc + stride, bsrc - stride,
+ bsrc + stride, width, &B, &C);
+ bsrc += stride2;
+ }
+ }
+}
+
+static void restore_median_packed(uint8_t *src, int step, int stride,
+ int width, int height, int slices, int rmode)
{
int i, j, slice;
int A, B, C;
@@ -428,8 +531,8 @@ static void restore_median(uint8_t *src, int step, int stride,
* so restoring function should take care of possible padding between
* two parts of the same "line".
*/
-static void restore_median_il(uint8_t *src, int step, int stride,
- int width, int height, int slices, int rmode)
+static void restore_median_packed_il(uint8_t *src, int step, int stride,
+ int width, int height, int slices, int rmode)
{
int i, j, slice;
int A, B, C;
@@ -608,14 +711,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
return ret;
if (c->frame_pred == PRED_MEDIAN) {
if (!c->interlaced) {
- restore_median(frame.f->data[0] + ff_ut_rgb_order[i],
- c->planes, frame.f->linesize[0], avctx->width,
- avctx->height, c->slices, 0);
+ restore_median_packed(frame.f->data[0] + ff_ut_rgb_order[i],
+ c->planes, frame.f->linesize[0], avctx->width,
+ avctx->height, c->slices, 0);
} else {
- restore_median_il(frame.f->data[0] + ff_ut_rgb_order[i],
- c->planes, frame.f->linesize[0],
- avctx->width, avctx->height, c->slices,
- 0);
+ restore_median_packed_il(frame.f->data[0] + ff_ut_rgb_order[i],
+ c->planes, frame.f->linesize[0],
+ avctx->width, avctx->height, c->slices,
+ 0);
}
}
}
@@ -644,14 +747,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
return ret;
if (c->frame_pred == PRED_MEDIAN) {
if (!c->interlaced) {
- restore_median(frame.f->data[i], 1, frame.f->linesize[i],
- avctx->width >> !!i, avctx->height >> !!i,
- c->slices, !i);
+ restore_median_planar(c, frame.f->data[i], frame.f->linesize[i],
+ avctx->width >> !!i, avctx->height >> !!i,
+ c->slices, !i);
} else {
- restore_median_il(frame.f->data[i], 1, frame.f->linesize[i],
- avctx->width >> !!i,
- avctx->height >> !!i,
- c->slices, !i);
+ restore_median_planar_il(c, frame.f->data[i], frame.f->linesize[i],
+ avctx->width >> !!i,
+ avctx->height >> !!i,
+ c->slices, !i);
}
}
}
@@ -665,13 +768,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
return ret;
if (c->frame_pred == PRED_MEDIAN) {
if (!c->interlaced) {
- restore_median(frame.f->data[i], 1, frame.f->linesize[i],
- avctx->width >> !!i, avctx->height,
- c->slices, 0);
+ restore_median_planar(c, frame.f->data[i], frame.f->linesize[i],
+ avctx->width >> !!i, avctx->height,
+ c->slices, 0);
} else {
- restore_median_il(frame.f->data[i], 1, frame.f->linesize[i],
- avctx->width >> !!i, avctx->height,
- c->slices, 0);
+ restore_median_planar_il(c, frame.f->data[i], frame.f->linesize[i],
+ avctx->width >> !!i, avctx->height,
+ c->slices, 0);
}
}
}
@@ -685,13 +788,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
return ret;
if (c->frame_pred == PRED_MEDIAN) {
if (!c->interlaced) {
- restore_median(frame.f->data[i], 1, frame.f->linesize[i],
- avctx->width, avctx->height,
- c->slices, 0);
+ restore_median_planar(c, frame.f->data[i], frame.f->linesize[i],
+ avctx->width, avctx->height,
+ c->slices, 0);
} else {
- restore_median_il(frame.f->data[i], 1, frame.f->linesize[i],
- avctx->width, avctx->height,
- c->slices, 0);
+ restore_median_planar_il(c, frame.f->data[i], frame.f->linesize[i],
+ avctx->width, avctx->height,
+ c->slices, 0);
}
}
}
@@ -724,6 +827,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
c->avctx = avctx;
ff_bswapdsp_init(&c->bdsp);
+ ff_huffyuvdsp_init(&c->hdspdec);
if (avctx->extradata_size >= 16) {
av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d.%d.%d\n",