libavfilter/vf_removelogo.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569

/*
 * Copyright (c) 2005 Robert Edele <yartrebo@earthlink.net>
 * Copyright (c) 2012 Stefano Sabatini
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * Advanced blur-based logo removing filter
 *
 * This filter loads an image mask file showing where a logo is and
 * uses a blur transform to remove the logo.
 *
 * Based on the libmpcodecs remove-logo filter by Robert Edele.
 */

/**
 * This code implements a filter to remove annoying TV logos and other annoying
 * images placed onto a video stream. It works by filling in the pixels that
 * comprise the logo with neighboring pixels. The transform is very loosely
 * based on a gaussian blur, but it is different enough to merit its own
 * paragraph later on. It is a major improvement on the old delogo filter as it
 * both uses a better blurring algorithm and uses a bitmap to use an arbitrary
 * and generally much tighter fitting shape than a rectangle.
 *
 * The logo removal algorithm has two key points. The first is that it
 * distinguishes between pixels in the logo and those not in the logo by using
 * the passed-in bitmap. Pixels not in the logo are copied over directly without
 * being modified and they also serve as source pixels for the logo
 * fill-in. Pixels inside the logo have the mask applied.
 *
 * At init-time the bitmap is reprocessed internally, and the distance to the
 * nearest edge of the logo (Manhattan distance), along with a little extra to
 * remove rough edges, is stored in each pixel. This is done using an in-place
 * erosion algorithm, and incrementing each pixel that survives any given
 * erosion.  Once every pixel is eroded, the maximum value is recorded, and a
 * set of masks from size 0 to this size are generaged. The masks are circular
 * binary masks, where each pixel within a radius N (where N is the size of the
 * mask) is a 1, and all other pixels are a 0. Although a gaussian mask would be
 * more mathematically accurate, a binary mask works better in practice because
 * we generally do not use the central pixels in the mask (because they are in
 * the logo region), and thus a gaussian mask will cause too little blur and
 * thus a very unstable image.
 *
 * The mask is applied in a special way. Namely, only pixels in the mask that
 * line up to pixels outside the logo are used. The dynamic mask size means that
 * the mask is just big enough so that the edges touch pixels outside the logo,
 * so the blurring is kept to a minimum and at least the first boundary
 * condition is met (that the image function itself is continuous), even if the
 * second boundary condition (that the derivative of the image function is
 * continuous) is not met. A masking algorithm that does preserve the second
 * boundary coundition (perhaps something based on a highly-modified bi-cubic
 * algorithm) should offer even better results on paper, but the noise in a
 * typical TV signal should make anything based on derivatives hopelessly noisy.
 */

#include "libavutil/imgutils.h"
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#include "avfilter.h"
#include "filters.h"
#include "video.h"
#include "bbox.h"
#include "lavfutils.h"
#include "lswsutils.h"

typedef struct RemovelogoContext {
    const AVClass *class;
    char *filename;
    /* Stores our collection of masks. The first is for an array of
       the second for the y axis, and the third for the x axis. */
    int ***mask;
    int max_mask_size;
    int mask_w, mask_h;

    uint8_t      *full_mask_data;
    FFBoundingBox full_mask_bbox;
    uint8_t      *half_mask_data;
    FFBoundingBox half_mask_bbox;
} RemovelogoContext;

#define OFFSET(x) offsetof(RemovelogoContext, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
static const AVOption removelogo_options[] = {
    { "filename", "set bitmap filename", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
    { "f",        "set bitmap filename", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
    { NULL }
};

AVFILTER_DEFINE_CLASS(removelogo);

/**
 * Choose a slightly larger mask size to improve performance.
 *
 * This function maps the absolute minimum mask size needed to the
 * mask size we'll actually use. f(x) = x (the smallest that will
 * work) will produce the sharpest results, but will be quite
 * jittery. f(x) = 1.25x (what I'm using) is a good tradeoff in my
 * opinion. This will calculate only at init-time, so you can put a
 * long expression here without effecting performance.
 */
#define apply_mask_fudge_factor(x) (((x) >> 2) + (x))

/**
 * Pre-process an image to give distance information.
 *
 * This function takes a bitmap image and converts it in place into a
 * distance image. A distance image is zero for pixels outside of the
 * logo and is the Manhattan distance (|dx| + |dy|) from the logo edge
 * for pixels inside of the logo. This will overestimate the distance,
 * but that is safe, and is far easier to implement than a proper
 * pythagorean distance since I'm using a modified erosion algorithm
 * to compute the distances.
 *
 * @param mask image which will be converted from a greyscale image
 * into a distance image.
 */
static void convert_mask_to_strength_mask(uint8_t *data, int linesize,
                                          int w, int h, int min_val,
                                          int *max_mask_size)
{
    int x, y;

    /* How many times we've gone through the loop. Used in the
       in-place erosion algorithm and to get us max_mask_size later on. */
    int current_pass = 0;

    /* set all non-zero values to 1 */
    for (y = 0; y < h; y++)
        for (x = 0; x < w; x++)
            data[y*linesize + x] = data[y*linesize + x] > min_val;

    /* For each pass, if a pixel is itself the same value as the
       current pass, and its four neighbors are too, then it is
       incremented. If no pixels are incremented by the end of the
       pass, then we go again. Edge pixels are counted as always
       excluded (this should be true anyway for any sane mask, but if
       it isn't this will ensure that we eventually exit). */
    while (1) {
        /* If this doesn't get set by the end of this pass, then we're done. */
        int has_anything_changed = 0;
        uint8_t *current_pixel0 = data + 1 + linesize, *current_pixel;
        current_pass++;

        for (y = 1; y < h-1; y++) {
            current_pixel = current_pixel0;
            for (x = 1; x < w-1; x++) {
                /* Apply the in-place erosion transform. It is based
                   on the following two premises:
                   1 - Any pixel that fails 1 erosion will fail all
                       future erosions.

                   2 - Only pixels having survived all erosions up to
                       the present will be >= to current_pass.
                   It doesn't matter if it survived the current pass,
                   failed it, or hasn't been tested yet.  By using >=
                   instead of ==, we allow the algorithm to work in
                   place. */
                if ( *current_pixel      >= current_pass &&
                    *(current_pixel + 1) >= current_pass &&
                    *(current_pixel - 1) >= current_pass &&
                    *(current_pixel + linesize) >= current_pass &&
                    *(current_pixel - linesize) >= current_pass) {
                    /* Increment the value since it still has not been
                     * eroded, as evidenced by the if statement that
                     * just evaluated to true. */
                    (*current_pixel)++;
                    has_anything_changed = 1;
                }
                current_pixel++;
            }
            current_pixel0 += linesize;
        }
        if (!has_anything_changed)
            break;
    }

    /* Apply the fudge factor, which will increase the size of the
     * mask a little to reduce jitter at the cost of more blur. */
    for (y = 1; y < h - 1; y++)
        for (x = 1; x < w - 1; x++)
            data[(y * linesize) + x] = apply_mask_fudge_factor(data[(y * linesize) + x]);

    /* As a side-effect, we now know the maximum mask size, which
     * we'll use to generate our masks. */
    /* Apply the fudge factor to this number too, since we must ensure
     * that enough masks are generated. */
    *max_mask_size = apply_mask_fudge_factor(current_pass + 1);
}

static int load_mask(uint8_t **mask, int *w, int *h,
                     const char *filename, void *log_ctx)
{
    int ret;
    enum AVPixelFormat pix_fmt;
    uint8_t *src_data[4], *gray_data[4];
    int src_linesize[4], gray_linesize[4];

    /* load image from file */
    if ((ret = ff_load_image(src_data, src_linesize, w, h, &pix_fmt, filename, log_ctx)) < 0)
        return ret;

    /* convert the image to GRAY8 */
    if ((ret = ff_scale_image(gray_data, gray_linesize, *w, *h, AV_PIX_FMT_GRAY8,
                              src_data, src_linesize, *w, *h, pix_fmt,
                              log_ctx)) < 0)
        goto end;

    /* copy mask to a newly allocated array */
    *mask = av_malloc(*w * *h);
    if (!*mask)
        ret = AVERROR(ENOMEM);
    av_image_copy_plane(*mask, *w, gray_data[0], gray_linesize[0], *w, *h);

end:
    av_freep(&src_data[0]);
    av_freep(&gray_data[0]);
    return ret;
}

/**
 * Generate a scaled down image with half width, height, and intensity.
 *
 * This function not only scales down an image, but halves the value
 * in each pixel too. The purpose of this is to produce a chroma
 * filter image out of a luma filter image. The pixel values store the
 * distance to the edge of the logo and halving the dimensions halves
 * the distance. This function rounds up, because a downwards rounding
 * error could cause the filter to fail, but an upwards rounding error
 * will only cause a minor amount of excess blur in the chroma planes.
 */
static void generate_half_size_image(const uint8_t *src_data, int src_linesize,
                                     uint8_t *dst_data, int dst_linesize,
                                     int src_w, int src_h,
                                     int *max_mask_size)
{
    int x, y;

    /* Copy over the image data, using the average of 4 pixels for to
     * calculate each downsampled pixel. */
    for (y = 0; y < src_h/2; y++) {
        for (x = 0; x < src_w/2; x++) {
            /* Set the pixel if there exists a non-zero value in the
             * source pixels, else clear it. */
            dst_data[(y * dst_linesize) + x] =
                src_data[((y << 1) * src_linesize) + (x << 1)] ||
                src_data[((y << 1) * src_linesize) + (x << 1) + 1] ||
                src_data[(((y << 1) + 1) * src_linesize) + (x << 1)] ||
                src_data[(((y << 1) + 1) * src_linesize) + (x << 1) + 1];
            dst_data[(y * dst_linesize) + x] = FFMIN(1, dst_data[(y * dst_linesize) + x]);
        }
    }

    convert_mask_to_strength_mask(dst_data, dst_linesize,
                                  src_w/2, src_h/2, 0, max_mask_size);
}

static av_cold int init(AVFilterContext *ctx)
{
    RemovelogoContext *s = ctx->priv;
    int ***mask;
    int ret = 0;
    int a, b, c, w, h;
    int full_max_mask_size, half_max_mask_size;

    if (!s->filename) {
        av_log(ctx, AV_LOG_ERROR, "The bitmap file name is mandatory\n");
        return AVERROR(EINVAL);
    }

    /* Load our mask image. */
    if ((ret = load_mask(&s->full_mask_data, &w, &h, s->filename, ctx)) < 0)
        return ret;
    s->mask_w = w;
    s->mask_h = h;

    convert_mask_to_strength_mask(s->full_mask_data, w, w, h,
                                  16, &full_max_mask_size);

    /* Create the scaled down mask image for the chroma planes. */
    if (!(s->half_mask_data = av_mallocz(w/2 * h/2)))
        return AVERROR(ENOMEM);
    generate_half_size_image(s->full_mask_data, w,
                             s->half_mask_data, w/2,
                             w, h, &half_max_mask_size);

    s->max_mask_size = FFMAX(full_max_mask_size, half_max_mask_size);

    /* Create a circular mask for each size up to max_mask_size. When
       the filter is applied, the mask size is determined on a pixel
       by pixel basis, with pixels nearer the edge of the logo getting
       smaller mask sizes. */
    mask = (int ***)av_malloc_array(s->max_mask_size + 1, sizeof(int **));
    if (!mask)
        return AVERROR(ENOMEM);

    for (a = 0; a <= s->max_mask_size; a++) {
        mask[a] = (int **)av_malloc_array((a * 2) + 1, sizeof(int *));
        if (!mask[a]) {
            av_free(mask);
            return AVERROR(ENOMEM);
        }
        for (b = -a; b <= a; b++) {
            mask[a][b + a] = (int *)av_malloc_array((a * 2) + 1, sizeof(int));
            if (!mask[a][b + a]) {
                av_free(mask);
                return AVERROR(ENOMEM);
            }
            for (c = -a; c <= a; c++) {
                if ((b * b) + (c * c) <= (a * a)) /* Circular 0/1 mask. */
                    mask[a][b + a][c + a] = 1;
                else
                    mask[a][b + a][c + a] = 0;
            }
        }
    }
    s->mask = mask;

    /* Calculate our bounding rectangles, which determine in what
     * region the logo resides for faster processing. */
    ff_calculate_bounding_box(&s->full_mask_bbox, s->full_mask_data, w, w, h, 0, 8);
    ff_calculate_bounding_box(&s->half_mask_bbox, s->half_mask_data, w/2, w/2, h/2, 0, 8);

#define SHOW_LOGO_INFO(mask_type)                                       \
    av_log(ctx, AV_LOG_VERBOSE, #mask_type " x1:%d x2:%d y1:%d y2:%d max_mask_size:%d\n", \
           s->mask_type##_mask_bbox.x1, s->mask_type##_mask_bbox.x2, \
           s->mask_type##_mask_bbox.y1, s->mask_type##_mask_bbox.y2, \
           mask_type##_max_mask_size);
    SHOW_LOGO_INFO(full);
    SHOW_LOGO_INFO(half);

    return 0;
}

static int config_props_input(AVFilterLink *inlink)
{
    AVFilterContext *ctx = inlink->dst;
    RemovelogoContext *s = ctx->priv;

    if (inlink->w != s->mask_w || inlink->h != s->mask_h) {
        av_log(ctx, AV_LOG_INFO,
               "Mask image size %dx%d does not match with the input video size %dx%d\n",
               s->mask_w, s->mask_h, inlink->w, inlink->h);
        return AVERROR(EINVAL);
    }

    return 0;
}

/**
 * Blur image.
 *
 * It takes a pixel that is inside the mask and blurs it. It does so
 * by finding the average of all the pixels within the mask and
 * outside of the mask.
 *
 * @param mask_data  the mask plane to use for averaging
 * @param image_data the image plane to blur
 * @param w width of the image
 * @param h height of the image
 * @param x x-coordinate of the pixel to blur
 * @param y y-coordinate of the pixel to blur
 */
static unsigned int blur_pixel(int ***mask,
                               const uint8_t *mask_data, int mask_linesize,
                               uint8_t       *image_data, int image_linesize,
                               int w, int h, int x, int y)
{
    /* Mask size tells how large a circle to use. The radius is about
     * (slightly larger than) mask size. */
    int mask_size;
    int start_posx, start_posy, end_posx, end_posy;
    int i, j;
    unsigned int accumulator = 0, divisor = 0;
    /* What pixel we are reading out of the circular blur mask. */
    const uint8_t *image_read_position;
    /* What pixel we are reading out of the filter image. */
    const uint8_t *mask_read_position;

    /* Prepare our bounding rectangle and clip it if need be. */
    mask_size  = mask_data[y * mask_linesize + x];
    start_posx = FFMAX(0, x - mask_size);
    start_posy = FFMAX(0, y - mask_size);
    end_posx   = FFMIN(w - 1, x + mask_size);
    end_posy   = FFMIN(h - 1, y + mask_size);

    image_read_position = image_data + image_linesize * start_posy + start_posx;
    mask_read_position  = mask_data  + mask_linesize  * start_posy + start_posx;

    for (j = start_posy; j <= end_posy; j++) {
        for (i = start_posx; i <= end_posx; i++) {
            /* Check if this pixel is in the mask or not. Only use the
             * pixel if it is not. */
            if (!(*mask_read_position) && mask[mask_size][i - start_posx][j - start_posy]) {
                accumulator += *image_read_position;
                divisor++;
            }

            image_read_position++;
            mask_read_position++;
        }

        image_read_position += (image_linesize - ((end_posx + 1) - start_posx));
        mask_read_position  += (mask_linesize - ((end_posx + 1) - start_posx));
    }

    /* If divisor is 0, it means that not a single pixel is outside of
       the logo, so we have no data.  Else we need to normalise the
       data using the divisor. */
    return divisor == 0 ? 255:
        (accumulator + (divisor / 2)) / divisor;  /* divide, taking into account average rounding error */
}

/**
 * Blur image plane using a mask.
 *
 * @param source The image to have it's logo removed.
 * @param destination Where the output image will be stored.
 * @param source_stride How far apart (in memory) two consecutive lines are.
 * @param destination Same as source_stride, but for the destination image.
 * @param width Width of the image. This is the same for source and destination.
 * @param height Height of the image. This is the same for source and destination.
 * @param is_image_direct If the image is direct, then source and destination are
 *        the same and we can save a lot of time by not copying pixels that
 *        haven't changed.
 * @param filter The image that stores the distance to the edge of the logo for
 *        each pixel.
 * @param logo_start_x smallest x-coordinate that contains at least 1 logo pixel.
 * @param logo_start_y smallest y-coordinate that contains at least 1 logo pixel.
 * @param logo_end_x   largest x-coordinate that contains at least 1 logo pixel.
 * @param logo_end_y   largest y-coordinate that contains at least 1 logo pixel.
 *
 * This function processes an entire plane. Pixels outside of the logo are copied
 * to the output without change, and pixels inside the logo have the de-blurring
 * function applied.
 */
static void blur_image(int ***mask,
                       const uint8_t *src_data,  int src_linesize,
                             uint8_t *dst_data,  int dst_linesize,
                       const uint8_t *mask_data, int mask_linesize,
                       int w, int h, int direct,
                       FFBoundingBox *bbox)
{
    int x, y;
    uint8_t *dst_line;
    const uint8_t *src_line;

    if (!direct)
        av_image_copy_plane(dst_data, dst_linesize, src_data, src_linesize, w, h);

    for (y = bbox->y1; y <= bbox->y2; y++) {
        src_line = src_data + src_linesize * y;
        dst_line = dst_data + dst_linesize * y;

        for (x = bbox->x1; x <= bbox->x2; x++) {
             if (mask_data[y * mask_linesize + x]) {
                /* Only process if we are in the mask. */
                 dst_line[x] = blur_pixel(mask,
                                          mask_data, mask_linesize,
                                          dst_data, dst_linesize,
                                          w, h, x, y);
            } else {
                /* Else just copy the data. */
                if (!direct)
                    dst_line[x] = src_line[x];
            }
        }
    }
}

static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
{
    RemovelogoContext *s = inlink->dst->priv;
    AVFilterLink *outlink = inlink->dst->outputs[0];
    AVFrame *outpicref;
    int direct = 0;

    if (av_frame_is_writable(inpicref)) {
        direct = 1;
        outpicref = inpicref;
    } else {
        outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h);
        if (!outpicref) {
            av_frame_free(&inpicref);
            return AVERROR(ENOMEM);
        }
        av_frame_copy_props(outpicref, inpicref);
    }

    blur_image(s->mask,
               inpicref ->data[0], inpicref ->linesize[0],
               outpicref->data[0], outpicref->linesize[0],
               s->full_mask_data, inlink->w,
               inlink->w, inlink->h, direct, &s->full_mask_bbox);
    blur_image(s->mask,
               inpicref ->data[1], inpicref ->linesize[1],
               outpicref->data[1], outpicref->linesize[1],
               s->half_mask_data, inlink->w/2,
               inlink->w/2, inlink->h/2, direct, &s->half_mask_bbox);
    blur_image(s->mask,
               inpicref ->data[2], inpicref ->linesize[2],
               outpicref->data[2], outpicref->linesize[2],
               s->half_mask_data, inlink->w/2,
               inlink->w/2, inlink->h/2, direct, &s->half_mask_bbox);

    if (!direct)
        av_frame_free(&inpicref);

    return ff_filter_frame(outlink, outpicref);
}

static av_cold void uninit(AVFilterContext *ctx)
{
    RemovelogoContext *s = ctx->priv;
    int a, b;

    av_freep(&s->full_mask_data);
    av_freep(&s->half_mask_data);

    if (s->mask) {
        /* Loop through each mask. */
        for (a = 0; a <= s->max_mask_size; a++) {
            /* Loop through each scanline in a mask. */
            for (b = -a; b <= a; b++) {
                av_freep(&s->mask[a][b + a]); /* Free a scanline. */
            }
            av_freep(&s->mask[a]);
        }
        /* Free the array of pointers pointing to the masks. */
        av_freep(&s->mask);
    }
}

static const AVFilterPad removelogo_inputs[] = {
    {
        .name         = "default",
        .type         = AVMEDIA_TYPE_VIDEO,
        .config_props = config_props_input,
        .filter_frame = filter_frame,
    },
};

const AVFilter ff_vf_removelogo = {
    .name          = "removelogo",
    .description   = NULL_IF_CONFIG_SMALL("Remove a TV logo based on a mask image."),
    .priv_size     = sizeof(RemovelogoContext),
    .init          = init,
    .uninit        = uninit,
    FILTER_INPUTS(removelogo_inputs),
    FILTER_OUTPUTS(ff_video_default_filterpad),
    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_YUV420P),
    .priv_class    = &removelogo_class,
    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
};