diff options
author | Mark Reid <mindmark@gmail.com> | 2021-10-05 20:58:30 -0700 |
---|---|---|
committer | Paul B Mahol <onemda@gmail.com> | 2021-10-10 22:23:48 +0200 |
commit | 716b39674059d5b416faef92afd41654a6d9469b (patch) | |
tree | 25652c77af1ac70c439ba3e1a7f879d4b08cfb5b /libavfilter/vf_lut3d.c | |
parent | 5133f4c2c1149feef3248ba2cb29537e8d8fbe38 (diff) | |
download | ffmpeg-716b39674059d5b416faef92afd41654a6d9469b.tar.gz |
avfilter/vf_lut3d: add x86-optimized tetrahedral interpolation
I spotted an interesting pattern that I didn't see before that leads to the implementation being faster.
The bit shifting table I was using before is no longer needed, and was able to remove quite a few lines.
I also add use of FMA on the AVX2 version.
f32 1920x1080 1 thread with prelut
c impl
1434012700 UNITS in lut3d->interp, 1 runs, 0 skips
1434035335 UNITS in lut3d->interp, 2 runs, 0 skips
1423615347 UNITS in lut3d->interp, 4 runs, 0 skips
1426268863 UNITS in lut3d->interp, 8 runs, 0 skips
sse2
905484420 UNITS in lut3d->interp, 1 runs, 0 skips
905659010 UNITS in lut3d->interp, 2 runs, 0 skips
915167140 UNITS in lut3d->interp, 4 runs, 0 skips
915834222 UNITS in lut3d->interp, 8 runs, 0 skips
avx
574794860 UNITS in lut3d->interp, 1 runs, 0 skips
581035090 UNITS in lut3d->interp, 2 runs, 0 skips
584116720 UNITS in lut3d->interp, 4 runs, 0 skips
581460290 UNITS in lut3d->interp, 8 runs, 0 skips
avx2
301698880 UNITS in lut3d->interp, 1 runs, 0 skips
301982880 UNITS in lut3d->interp, 2 runs, 0 skips
306962430 UNITS in lut3d->interp, 4 runs, 0 skips
305472025 UNITS in lut3d->interp, 8 runs, 0 skips
gbrap16 1920x1080 1 thread with prelut
c impl
1480894840 UNITS in lut3d->interp, 1 runs, 0 skips
1502922990 UNITS in lut3d->interp, 2 runs, 0 skips
1496114307 UNITS in lut3d->interp, 4 runs, 0 skips
1492554551 UNITS in lut3d->interp, 8 runs, 0 skips
sse2
980777180 UNITS in lut3d->interp, 1 runs, 0 skips
986121520 UNITS in lut3d->interp, 2 runs, 0 skips
986489840 UNITS in lut3d->interp, 4 runs, 0 skips
998832248 UNITS in lut3d->interp, 8 runs, 0 skips
avx
622212360 UNITS in lut3d->interp, 1 runs, 0 skips
622981160 UNITS in lut3d->interp, 2 runs, 0 skips
645396315 UNITS in lut3d->interp, 4 runs, 0 skips
641057075 UNITS in lut3d->interp, 8 runs, 0 skips
avx2
321336400 UNITS in lut3d->interp, 1 runs, 0 skips
321268920 UNITS in lut3d->interp, 2 runs, 0 skips
323459895 UNITS in lut3d->interp, 4 runs, 0 skips
324949967 UNITS in lut3d->interp, 8 runs, 0 skips
Diffstat (limited to 'libavfilter/vf_lut3d.c')
-rw-r--r-- | libavfilter/vf_lut3d.c | 61 |
1 files changed, 5 insertions, 56 deletions
diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c index 8ec07f8ab0..7ef96906fc 100644 --- a/libavfilter/vf_lut3d.c +++ b/libavfilter/vf_lut3d.c @@ -31,73 +31,18 @@ #include "libavutil/intreadwrite.h" #include "libavutil/intfloat.h" #include "libavutil/avassert.h" -#include "libavutil/pixdesc.h" #include "libavutil/avstring.h" -#include "avfilter.h" #include "drawutils.h" #include "formats.h" -#include "framesync.h" #include "internal.h" #include "video.h" +#include "lut3d.h" #define R 0 #define G 1 #define B 2 #define A 3 -enum interp_mode { - INTERPOLATE_NEAREST, - INTERPOLATE_TRILINEAR, - INTERPOLATE_TETRAHEDRAL, - INTERPOLATE_PYRAMID, - INTERPOLATE_PRISM, - NB_INTERP_MODE -}; - -struct rgbvec { - float r, g, b; -}; - -/* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT - * of 512x512 (64x64x64) */ -#define MAX_LEVEL 256 -#define PRELUT_SIZE 65536 - -typedef struct Lut3DPreLut { - int size; - float min[3]; - float max[3]; - float scale[3]; - float* lut[3]; -} Lut3DPreLut; - -typedef struct LUT3DContext { - const AVClass *class; - int interpolation; ///<interp_mode - char *file; - uint8_t rgba_map[4]; - int step; - avfilter_action_func *interp; - struct rgbvec scale; - struct rgbvec *lut; - int lutsize; - int lutsize2; - Lut3DPreLut prelut; -#if CONFIG_HALDCLUT_FILTER - uint8_t clut_rgba_map[4]; - int clut_step; - int clut_bits; - int clut_planar; - int clut_float; - int clut_width; - FFFrameSync fs; -#endif -} LUT3DContext; - -typedef struct ThreadData { - AVFrame *in, *out; -} ThreadData; - #define OFFSET(x) offsetof(LUT3DContext, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM #define TFLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_RUNTIME_PARAM @@ -1203,6 +1148,10 @@ static int config_input(AVFilterLink *inlink) av_assert0(0); } + if (ARCH_X86) { + ff_lut3d_init_x86(lut3d, desc); + } + return 0; } |