diff options
author | Paul B Mahol <onemda@gmail.com> | 2017-12-25 15:46:04 +0100 |
---|---|---|
committer | Paul B Mahol <onemda@gmail.com> | 2017-12-25 15:46:04 +0100 |
commit | e712d301736106002264d631cff925a92dfe891d (patch) | |
tree | d719df00c7f654bc744754fb6cf7e779e0af281f | |
parent | c59e49f9b2f30184e6d74255f3c1b2a1a89c6520 (diff) | |
download | ffmpeg-e712d301736106002264d631cff925a92dfe891d.tar.gz |
avfilter/vf_convolve: add threading for complex multiplication
Signed-off-by: Paul B Mahol <onemda@gmail.com>
-rw-r--r-- | libavfilter/vf_convolve.c | 46 |
1 files changed, 32 insertions, 14 deletions
diff --git a/libavfilter/vf_convolve.c b/libavfilter/vf_convolve.c index de58cdff8f..88ae884a19 100644 --- a/libavfilter/vf_convolve.c +++ b/libavfilter/vf_convolve.c @@ -377,6 +377,35 @@ static void get_output(ConvolveContext *s, AVFrame *out, } } +static int complex_multiply(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + ThreadData *td = arg; + FFTComplex *input = td->hdata; + FFTComplex *filter = td->vdata; + const int n = td->n; + int start = (n * jobnr ) / nb_jobs; + int end = (n * (jobnr+1)) / nb_jobs; + int y, x; + + for (y = start; y < end; y++) { + int yn = y * n; + + for (x = 0; x < n; x++) { + FFTSample re, im, ire, iim; + + re = input[yn + x].re; + im = input[yn + x].im; + ire = filter[yn + x].re; + iim = filter[yn + x].im; + + input[yn + x].re = ire * re - iim * im; + input[yn + x].im = iim * re + ire * im; + } + } + + return 0; +} + static int do_convolve(FFFrameSync *fs) { AVFilterContext *ctx = fs->parent; @@ -442,21 +471,10 @@ static int do_convolve(FFFrameSync *fs) s->got_impulse[plane] = 1; } - for (y = 0; y < n; y++) { - int yn = y * n; + td.hdata = input; + td.vdata = filter; - for (x = 0; x < n; x++) { - FFTSample re, im, ire, iim; - - re = input[yn + x].re; - im = input[yn + x].im; - ire = filter[yn + x].re; - iim = filter[yn + x].im; - - input[yn + x].re = ire * re - iim * im; - input[yn + x].im = iim * re + ire * im; - } - } + ctx->internal->execute(ctx, complex_multiply, &td, NULL, FFMIN3(MAX_THREADS, n, ff_filter_get_nb_threads(ctx))); td.hdata = s->fft_hdata[plane]; td.vdata = s->fft_vdata[plane]; |