aboutsummaryrefslogtreecommitdiffstats
path: root/libavfilter/vf_hflip.c
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2017-12-01 20:56:45 +0100
committerPaul B Mahol <onemda@gmail.com>2017-12-04 09:58:25 +0100
commit86fda8be3f3892c48474a319e0ef7509dc137e3e (patch)
tree0cb45a7a8dead1053f2d91d7c5f1f00cce669d86 /libavfilter/vf_hflip.c
parentd1d6f965d81bab2076d67cdc1dabd5a1f5bb2d30 (diff)
downloadffmpeg-86fda8be3f3892c48474a319e0ef7509dc137e3e.tar.gz
avfilter: add hflip x86 SIMD
Signed-off-by: Paul B Mahol <onemda@gmail.com>
Diffstat (limited to 'libavfilter/vf_hflip.c')
-rw-r--r--libavfilter/vf_hflip.c133
1 files changed, 86 insertions, 47 deletions
diff --git a/libavfilter/vf_hflip.c b/libavfilter/vf_hflip.c
index cf20c193f7..957ddd9900 100644
--- a/libavfilter/vf_hflip.c
+++ b/libavfilter/vf_hflip.c
@@ -29,6 +29,7 @@
#include "libavutil/opt.h"
#include "avfilter.h"
#include "formats.h"
+#include "hflip.h"
#include "internal.h"
#include "video.h"
#include "libavutil/pixdesc.h"
@@ -36,13 +37,6 @@
#include "libavutil/intreadwrite.h"
#include "libavutil/imgutils.h"
-typedef struct FlipContext {
- const AVClass *class;
- int max_step[4]; ///< max pixel step for each plane, expressed as a number of bytes
- int planewidth[4]; ///< width of each plane
- int planeheight[4]; ///< height of each plane
-} FlipContext;
-
static const AVOption hflip_options[] = {
{ NULL }
};
@@ -67,12 +61,77 @@ static int query_formats(AVFilterContext *ctx)
return ff_set_common_formats(ctx, pix_fmts);
}
+static void hflip_byte_c(const uint8_t *src, uint8_t *dst, int w)
+{
+ int j;
+
+ for (j = 0; j < w; j++)
+ dst[j] = src[-j];
+}
+
+static void hflip_short_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+ const uint16_t *src = (const uint16_t *)ssrc;
+ uint16_t *dst = (uint16_t *)ddst;
+ int j;
+
+ for (j = 0; j < w; j++)
+ dst[j] = src[-j];
+}
+
+static void hflip_dword_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+ const uint32_t *src = (const uint32_t *)ssrc;
+ uint32_t *dst = (uint32_t *)ddst;
+ int j;
+
+ for (j = 0; j < w; j++)
+ dst[j] = src[-j];
+}
+
+static void hflip_b24_c(const uint8_t *src, uint8_t *dst, int w)
+{
+ const uint8_t *in = src;
+ uint8_t *out = dst;
+ int j;
+
+ for (j = 0; j < w; j++, out += 3, in -= 3) {
+ int32_t v = AV_RB24(in);
+
+ AV_WB24(out, v);
+ }
+}
+
+static void hflip_b48_c(const uint8_t *src, uint8_t *dst, int w)
+{
+ const uint8_t *in = src;
+ uint8_t *out = dst;
+ int j;
+
+ for (j = 0; j < w; j++, out += 6, in -= 6) {
+ int64_t v = AV_RB48(in);
+
+ AV_WB48(out, v);
+ }
+}
+
+static void hflip_qword_c(const uint8_t *ssrc, uint8_t *ddst, int w)
+{
+ const uint64_t *src = (const uint64_t *)ssrc;
+ uint64_t *dst = (uint64_t *)ddst;
+ int j;
+
+ for (j = 0; j < w; j++)
+ dst[j] = src[-j];
+}
+
static int config_props(AVFilterLink *inlink)
{
FlipContext *s = inlink->dst->priv;
const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
const int hsub = pix_desc->log2_chroma_w;
const int vsub = pix_desc->log2_chroma_h;
+ int nb_planes, i;
av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);
s->planewidth[0] = s->planewidth[3] = inlink->w;
@@ -80,6 +139,24 @@ static int config_props(AVFilterLink *inlink)
s->planeheight[0] = s->planeheight[3] = inlink->h;
s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
+ nb_planes = av_pix_fmt_count_planes(inlink->format);
+
+ for (i = 0; i < nb_planes; i++) {
+ switch (s->max_step[i]) {
+ case 1: s->flip_line[i] = hflip_byte_c; break;
+ case 2: s->flip_line[i] = hflip_short_c; break;
+ case 3: s->flip_line[i] = hflip_b24_c; break;
+ case 4: s->flip_line[i] = hflip_dword_c; break;
+ case 6: s->flip_line[i] = hflip_b48_c; break;
+ case 8: s->flip_line[i] = hflip_qword_c; break;
+ default:
+ return AVERROR_BUG;
+ }
+ }
+
+ if (ARCH_X86)
+ ff_hflip_init_x86(s, s->max_step, nb_planes);
+
return 0;
}
@@ -94,7 +171,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int job, int nb_jobs)
AVFrame *in = td->in;
AVFrame *out = td->out;
uint8_t *inrow, *outrow;
- int i, j, plane, step;
+ int i, plane, step;
for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; plane++) {
const int width = s->planewidth[plane];
@@ -107,45 +184,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int job, int nb_jobs)
outrow = out->data[plane] + start * out->linesize[plane];
inrow = in ->data[plane] + start * in->linesize[plane] + (width - 1) * step;
for (i = start; i < end; i++) {
- switch (step) {
- case 1:
- for (j = 0; j < width; j++)
- outrow[j] = inrow[-j];
- break;
-
- case 2:
- {
- uint16_t *outrow16 = (uint16_t *)outrow;
- uint16_t * inrow16 = (uint16_t *) inrow;
- for (j = 0; j < width; j++)
- outrow16[j] = inrow16[-j];
- }
- break;
-
- case 3:
- {
- uint8_t *in = inrow;
- uint8_t *out = outrow;
- for (j = 0; j < width; j++, out += 3, in -= 3) {
- int32_t v = AV_RB24(in);
- AV_WB24(out, v);
- }
- }
- break;
-
- case 4:
- {
- uint32_t *outrow32 = (uint32_t *)outrow;
- uint32_t * inrow32 = (uint32_t *) inrow;
- for (j = 0; j < width; j++)
- outrow32[j] = inrow32[-j];
- }
- break;
-
- default:
- for (j = 0; j < width; j++)
- memcpy(outrow + j*step, inrow - j*step, step);
- }
+ s->flip_line[plane](inrow, outrow, width);
inrow += in ->linesize[plane];
outrow += out->linesize[plane];