diff options
author | Mark Reid <mindmark@gmail.com> | 2021-10-05 20:58:30 -0700 |
---|---|---|
committer | Paul B Mahol <onemda@gmail.com> | 2021-10-10 22:23:48 +0200 |
commit | 716b39674059d5b416faef92afd41654a6d9469b (patch) | |
tree | 25652c77af1ac70c439ba3e1a7f879d4b08cfb5b /libavfilter/lut3d.h | |
parent | 5133f4c2c1149feef3248ba2cb29537e8d8fbe38 (diff) | |
download | ffmpeg-716b39674059d5b416faef92afd41654a6d9469b.tar.gz |
avfilter/vf_lut3d: add x86-optimized tetrahedral interpolation
I spotted an interesting pattern that I didn't see before that leads to the implementation being faster.
The bit shifting table I was using before is no longer needed, and was able to remove quite a few lines.
I also add use of FMA on the AVX2 version.
f32 1920x1080 1 thread with prelut
c impl
1434012700 UNITS in lut3d->interp, 1 runs, 0 skips
1434035335 UNITS in lut3d->interp, 2 runs, 0 skips
1423615347 UNITS in lut3d->interp, 4 runs, 0 skips
1426268863 UNITS in lut3d->interp, 8 runs, 0 skips
sse2
905484420 UNITS in lut3d->interp, 1 runs, 0 skips
905659010 UNITS in lut3d->interp, 2 runs, 0 skips
915167140 UNITS in lut3d->interp, 4 runs, 0 skips
915834222 UNITS in lut3d->interp, 8 runs, 0 skips
avx
574794860 UNITS in lut3d->interp, 1 runs, 0 skips
581035090 UNITS in lut3d->interp, 2 runs, 0 skips
584116720 UNITS in lut3d->interp, 4 runs, 0 skips
581460290 UNITS in lut3d->interp, 8 runs, 0 skips
avx2
301698880 UNITS in lut3d->interp, 1 runs, 0 skips
301982880 UNITS in lut3d->interp, 2 runs, 0 skips
306962430 UNITS in lut3d->interp, 4 runs, 0 skips
305472025 UNITS in lut3d->interp, 8 runs, 0 skips
gbrap16 1920x1080 1 thread with prelut
c impl
1480894840 UNITS in lut3d->interp, 1 runs, 0 skips
1502922990 UNITS in lut3d->interp, 2 runs, 0 skips
1496114307 UNITS in lut3d->interp, 4 runs, 0 skips
1492554551 UNITS in lut3d->interp, 8 runs, 0 skips
sse2
980777180 UNITS in lut3d->interp, 1 runs, 0 skips
986121520 UNITS in lut3d->interp, 2 runs, 0 skips
986489840 UNITS in lut3d->interp, 4 runs, 0 skips
998832248 UNITS in lut3d->interp, 8 runs, 0 skips
avx
622212360 UNITS in lut3d->interp, 1 runs, 0 skips
622981160 UNITS in lut3d->interp, 2 runs, 0 skips
645396315 UNITS in lut3d->interp, 4 runs, 0 skips
641057075 UNITS in lut3d->interp, 8 runs, 0 skips
avx2
321336400 UNITS in lut3d->interp, 1 runs, 0 skips
321268920 UNITS in lut3d->interp, 2 runs, 0 skips
323459895 UNITS in lut3d->interp, 4 runs, 0 skips
324949967 UNITS in lut3d->interp, 8 runs, 0 skips
Diffstat (limited to 'libavfilter/lut3d.h')
-rw-r--r-- | libavfilter/lut3d.h | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/libavfilter/lut3d.h b/libavfilter/lut3d.h new file mode 100644 index 0000000000..bc32eac91c --- /dev/null +++ b/libavfilter/lut3d.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2013 Clément Bœsch + * Copyright (c) 2018 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef AVFILTER_LUT3D_H +#define AVFILTER_LUT3D_H + +#include "libavutil/pixdesc.h" +#include "framesync.h" +#include "avfilter.h" + +enum interp_mode { + INTERPOLATE_NEAREST, + INTERPOLATE_TRILINEAR, + INTERPOLATE_TETRAHEDRAL, + INTERPOLATE_PYRAMID, + INTERPOLATE_PRISM, + NB_INTERP_MODE +}; + +struct rgbvec { + float r, g, b; +}; + +/* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT + * of 512x512 (64x64x64) */ +#define MAX_LEVEL 256 +#define PRELUT_SIZE 65536 + +typedef struct Lut3DPreLut { + int size; + float min[3]; + float max[3]; + float scale[3]; + float* lut[3]; +} Lut3DPreLut; + +typedef struct LUT3DContext { + const AVClass *class; + struct rgbvec *lut; + int lutsize; + int lutsize2; + struct rgbvec scale; + int interpolation; ///<interp_mode + char *file; + uint8_t rgba_map[4]; + int step; + avfilter_action_func *interp; + Lut3DPreLut prelut; +#if CONFIG_HALDCLUT_FILTER + uint8_t clut_rgba_map[4]; + int clut_step; + int clut_bits; + int clut_planar; + int clut_float; + int clut_width; + FFFrameSync fs; +#endif +} LUT3DContext; + +typedef struct ThreadData { + AVFrame *in, *out; +} ThreadData; + +void ff_lut3d_init_x86(LUT3DContext *s, const AVPixFmtDescriptor *desc); + +#endif /* AVFILTER_LUT3D_H */ |