diff options
author | Ashish Singh <ashk43712@gmail.com> | 2017-09-16 02:17:58 +0530 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2017-09-30 11:47:59 -0400 |
commit | 148c8e88c43cfbabd6aee9f01ef30942cee9d359 (patch) | |
tree | e319e2df00e71ae4670fe3ebb081843c3af4d1fa /libavfilter | |
parent | 450cee522ec16a4375c02d3836de4893dfff26aa (diff) | |
download | ffmpeg-148c8e88c43cfbabd6aee9f01ef30942cee9d359.tar.gz |
avfilter: add vmafmotion filter
Signed-off-by: Ashish Singh <ashk43712@gmail.com>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavfilter')
-rw-r--r-- | libavfilter/Makefile | 1 | ||||
-rw-r--r-- | libavfilter/allfilters.c | 1 | ||||
-rw-r--r-- | libavfilter/vf_vmafmotion.c | 365 | ||||
-rw-r--r-- | libavfilter/vmaf_motion.h | 58 |
4 files changed, 425 insertions, 0 deletions
diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 9eeb7d218f..d2f0495f37 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -331,6 +331,7 @@ OBJS-$(CONFIG_VFLIP_FILTER) += vf_vflip.o OBJS-$(CONFIG_VIDSTABDETECT_FILTER) += vidstabutils.o vf_vidstabdetect.o OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER) += vidstabutils.o vf_vidstabtransform.o OBJS-$(CONFIG_VIGNETTE_FILTER) += vf_vignette.o +OBJS-$(CONFIG_VMAFMOTION_FILTER) += vf_vmafmotion.o framesync.o OBJS-$(CONFIG_VSTACK_FILTER) += vf_stack.o framesync.o OBJS-$(CONFIG_W3FDIF_FILTER) += vf_w3fdif.o OBJS-$(CONFIG_WAVEFORM_FILTER) += vf_waveform.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index baa84a3e72..9b672a7a7e 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -342,6 +342,7 @@ static void register_all(void) REGISTER_FILTER(VIDSTABDETECT, vidstabdetect, vf); REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf); REGISTER_FILTER(VIGNETTE, vignette, vf); + REGISTER_FILTER(VMAFMOTION, vmafmotion, vf); REGISTER_FILTER(VSTACK, vstack, vf); REGISTER_FILTER(W3FDIF, w3fdif, vf); REGISTER_FILTER(WAVEFORM, waveform, vf); diff --git a/libavfilter/vf_vmafmotion.c b/libavfilter/vf_vmafmotion.c new file mode 100644 index 0000000000..6b6150a931 --- /dev/null +++ b/libavfilter/vf_vmafmotion.c @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com> + * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Calculate VMAF Motion score. + */ + +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "avfilter.h" +#include "drawutils.h" +#include "formats.h" +#include "internal.h" +#include "vmaf_motion.h" + +#define BIT_SHIFT 15 + +static const float FILTER_5[5] = { + 0.054488685, + 0.244201342, + 0.402619947, + 0.244201342, + 0.054488685 +}; + +typedef struct VMAFMotionContext { + const AVClass *class; + VMAFMotionData data; + FILE *stats_file; + char *stats_file_str; +} VMAFMotionContext; + +#define OFFSET(x) offsetof(VMAFMotionContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM + +static const AVOption vmafmotion_options[] = { + {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(vmafmotion); + +static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w, + int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride) +{ + ptrdiff_t img1_stride = _img1_stride / sizeof(*img1); + ptrdiff_t img2_stride = _img2_stride / sizeof(*img2); + uint64_t sum = 0; + int i, j; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + sum += abs(img1[j] - img2[j]); + } + img1 += img1_stride; + img2 += img2_stride; + } + + return sum; +} + +static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src, + uint16_t *dst, int w, int h, ptrdiff_t _src_stride, + ptrdiff_t _dst_stride) +{ + ptrdiff_t src_stride = _src_stride / sizeof(*src); + ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); + int radius = filt_w / 2; + int borders_left = radius; + int borders_right = w - (filt_w - radius); + int i, j, k; + int sum = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < borders_left; j++) { + sum = 0; + for (k = 0; k < filt_w; k++) { + int j_tap = FFABS(j - radius + k); + if (j_tap >= w) { + j_tap = w - (j_tap - w + 1); + } + sum += filter[k] * src[i * src_stride + j_tap]; + } + dst[i * dst_stride + j] = sum >> BIT_SHIFT; + } + + for (j = borders_left; j < borders_right; j++) { + int sum = 0; + for (k = 0; k < filt_w; k++) { + sum += filter[k] * src[i * src_stride + j - radius + k]; + } + dst[i * dst_stride + j] = sum >> BIT_SHIFT; + } + + for (j = borders_right; j < w; j++) { + sum = 0; + for (k = 0; k < filt_w; k++) { + int j_tap = FFABS(j - radius + k); + if (j_tap >= w) { + j_tap = w - (j_tap - w + 1); + } + sum += filter[k] * src[i * src_stride + j_tap]; + } + dst[i * dst_stride + j] = sum >> BIT_SHIFT; + } + } +} + +#define conv_y_fn(type, bits) \ +static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \ + const uint8_t *_src, uint16_t *dst, \ + int w, int h, ptrdiff_t _src_stride, \ + ptrdiff_t _dst_stride) \ +{ \ + const type *src = (const type *) _src; \ + ptrdiff_t src_stride = _src_stride / sizeof(*src); \ + ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \ + int radius = filt_w / 2; \ + int borders_top = radius; \ + int borders_bottom = h - (filt_w - radius); \ + int i, j, k; \ + int sum = 0; \ + \ + for (i = 0; i < borders_top; i++) { \ + for (j = 0; j < w; j++) { \ + sum = 0; \ + for (k = 0; k < filt_w; k++) { \ + int i_tap = FFABS(i - radius + k); \ + if (i_tap >= h) { \ + i_tap = h - (i_tap - h + 1); \ + } \ + sum += filter[k] * src[i_tap * src_stride + j]; \ + } \ + dst[i * dst_stride + j] = sum >> bits; \ + } \ + } \ + for (i = borders_top; i < borders_bottom; i++) { \ + for (j = 0; j < w; j++) { \ + sum = 0; \ + for (k = 0; k < filt_w; k++) { \ + sum += filter[k] * src[(i - radius + k) * src_stride + j]; \ + } \ + dst[i * dst_stride + j] = sum >> bits; \ + } \ + } \ + for (i = borders_bottom; i < h; i++) { \ + for (j = 0; j < w; j++) { \ + sum = 0; \ + for (k = 0; k < filt_w; k++) { \ + int i_tap = FFABS(i - radius + k); \ + if (i_tap >= h) { \ + i_tap = h - (i_tap - h + 1); \ + } \ + sum += filter[k] * src[i_tap * src_stride + j]; \ + } \ + dst[i * dst_stride + j] = sum >> bits; \ + } \ + } \ +} + +conv_y_fn(uint8_t, 8); +conv_y_fn(uint16_t, 10); + +static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) { + dsp->convolution_x = convolution_x; + dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit; + dsp->sad = image_sad; +} + +double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref) +{ + double score; + + s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data, + s->width, s->height, ref->linesize[0], s->stride); + s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0], + s->width, s->height, s->stride, s->stride); + + if (!s->nb_frames) { + score = 0.0; + } else { + uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0], + s->width, s->height, s->stride, s->stride); + // the output score is always normalized to 8 bits + score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8))); + } + + FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]); + s->nb_frames++; + s->motion_sum += score; + + return score; +} + +static void set_meta(AVDictionary **metadata, const char *key, float d) +{ + char value[128]; + snprintf(value, sizeof(value), "%0.2f", d); + av_dict_set(metadata, key, value, 0); +} + +static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref) +{ + VMAFMotionContext *s = ctx->priv; + double score; + + score = ff_vmafmotion_process(&s->data, ref); + set_meta(&ref->metadata, "lavfi.vmafmotion.score", score); + if (s->stats_file) { + fprintf(s->stats_file, + "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score); + } +} + + +int ff_vmafmotion_init(VMAFMotionData *s, + int w, int h, enum AVPixelFormat fmt) +{ + size_t data_sz; + int i; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); + + s->width = w; + s->height = h; + s->stride = FFALIGN(w * sizeof(uint16_t), 32); + + data_sz = (size_t) s->stride * h; + if (!(s->blur_data[0] = av_malloc(data_sz)) || + !(s->blur_data[1] = av_malloc(data_sz)) || + !(s->temp_data = av_malloc(data_sz))) { + return AVERROR(ENOMEM); + } + + for (i = 0; i < 5; i++) { + s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT)); + } + + vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth); + + return 0; +} + +static int query_formats(AVFilterContext *ctx) +{ + static const enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV420P10, + AV_PIX_FMT_NONE + }; + + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); + if (!fmts_list) + return AVERROR(ENOMEM); + return ff_set_common_formats(ctx, fmts_list); +} + +static int config_input_ref(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + VMAFMotionContext *s = ctx->priv; + + return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w, + ctx->inputs[0]->h, ctx->inputs[0]->format); +} + +double ff_vmafmotion_uninit(VMAFMotionData *s) +{ + av_free(s->blur_data[0]); + av_free(s->blur_data[1]); + av_free(s->temp_data); + + return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0; +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *ref) +{ + AVFilterContext *ctx = inlink->dst; + do_vmafmotion(ctx, ref); + return ff_filter_frame(ctx->outputs[0], ref); +} + +static av_cold int init(AVFilterContext *ctx) +{ + VMAFMotionContext *s = ctx->priv; + + if (s->stats_file_str) { + if (!strcmp(s->stats_file_str, "-")) { + s->stats_file = stdout; + } else { + s->stats_file = fopen(s->stats_file_str, "w"); + if (!s->stats_file) { + int err = AVERROR(errno); + char buf[128]; + av_strerror(err, buf, sizeof(buf)); + av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n", + s->stats_file_str, buf); + return err; + } + } + } + + return 0; +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + VMAFMotionContext *s = ctx->priv; + double avg_motion = ff_vmafmotion_uninit(&s->data); + + if (s->data.nb_frames > 0) { + av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion); + } + + if (s->stats_file && s->stats_file != stdout) + fclose(s->stats_file); +} + +static const AVFilterPad vmafmotion_inputs[] = { + { + .name = "reference", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + .config_props = config_input_ref, + }, + { NULL } +}; + +static const AVFilterPad vmafmotion_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + }, + { NULL } +}; + +AVFilter ff_vf_vmafmotion = { + .name = "vmafmotion", + .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."), + .init = init, + .uninit = uninit, + .query_formats = query_formats, + .priv_size = sizeof(VMAFMotionContext), + .priv_class = &vmafmotion_class, + .inputs = vmafmotion_inputs, + .outputs = vmafmotion_outputs, +}; diff --git a/libavfilter/vmaf_motion.h b/libavfilter/vmaf_motion.h new file mode 100644 index 0000000000..0c71182f95 --- /dev/null +++ b/libavfilter/vmaf_motion.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com> + * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VMAFMOTION_H +#define AVFILTER_VMAFMOTION_H + +#include <stddef.h> +#include <stdint.h> +#include "video.h" + +typedef struct VMAFMotionDSPContext { + uint64_t (*sad)(const uint16_t *img1, const uint16_t *img2, int w, int h, + ptrdiff_t img1_stride, ptrdiff_t img2_stride); + void (*convolution_x)(const uint16_t *filter, int filt_w, const uint16_t *src, + uint16_t *dst, int w, int h, ptrdiff_t src_stride, + ptrdiff_t dst_stride); + void (*convolution_y)(const uint16_t *filter, int filt_w, const uint8_t *src, + uint16_t *dst, int w, int h, ptrdiff_t src_stride, + ptrdiff_t dst_stride); +} VMAFMotionDSPContext; + +void ff_vmafmotion_init_x86(VMAFMotionDSPContext *dsp); + +typedef struct VMAFMotionData { + uint16_t filter[5]; + int width; + int height; + ptrdiff_t stride; + uint16_t *blur_data[2 /* cur, prev */]; + uint16_t *temp_data; + double motion_sum; + uint64_t nb_frames; + VMAFMotionDSPContext vmafdsp; +} VMAFMotionData; + +int ff_vmafmotion_init(VMAFMotionData *data, int w, int h, enum AVPixelFormat fmt); +double ff_vmafmotion_process(VMAFMotionData *data, AVFrame *frame); +double ff_vmafmotion_uninit(VMAFMotionData *data); + +#endif /* AVFILTER_VMAFMOTION_H */ |