aboutsummaryrefslogtreecommitdiffstats
path: root/libavfilter/vf_lut3d.c
diff options
context:
space:
mode:
authorMark Reid <mindmark@gmail.com>2021-10-05 20:58:30 -0700
committerPaul B Mahol <onemda@gmail.com>2021-10-10 22:23:48 +0200
commit716b39674059d5b416faef92afd41654a6d9469b (patch)
tree25652c77af1ac70c439ba3e1a7f879d4b08cfb5b /libavfilter/vf_lut3d.c
parent5133f4c2c1149feef3248ba2cb29537e8d8fbe38 (diff)
downloadffmpeg-716b39674059d5b416faef92afd41654a6d9469b.tar.gz
avfilter/vf_lut3d: add x86-optimized tetrahedral interpolation
I spotted an interesting pattern that I didn't see before that leads to the implementation being faster. The bit shifting table I was using before is no longer needed, and was able to remove quite a few lines.  I also add use of FMA on the AVX2 version. f32 1920x1080 1 thread with prelut c impl 1434012700 UNITS in lut3d->interp,       1 runs,      0 skips 1434035335 UNITS in lut3d->interp,       2 runs,      0 skips 1423615347 UNITS in lut3d->interp,       4 runs,      0 skips 1426268863 UNITS in lut3d->interp,       8 runs,      0 skips sse2 905484420 UNITS in lut3d->interp,       1 runs,      0 skips 905659010 UNITS in lut3d->interp,       2 runs,      0 skips 915167140 UNITS in lut3d->interp,       4 runs,      0 skips 915834222 UNITS in lut3d->interp,       8 runs,      0 skips avx 574794860 UNITS in lut3d->interp,       1 runs,      0 skips 581035090 UNITS in lut3d->interp,       2 runs,      0 skips 584116720 UNITS in lut3d->interp,       4 runs,      0 skips 581460290 UNITS in lut3d->interp,       8 runs,      0 skips avx2 301698880 UNITS in lut3d->interp,       1 runs,      0 skips 301982880 UNITS in lut3d->interp,       2 runs,      0 skips 306962430 UNITS in lut3d->interp,       4 runs,      0 skips 305472025 UNITS in lut3d->interp,       8 runs,      0 skips gbrap16 1920x1080 1 thread with prelut c impl 1480894840 UNITS in lut3d->interp,       1 runs,      0 skips 1502922990 UNITS in lut3d->interp,       2 runs,      0 skips 1496114307 UNITS in lut3d->interp,       4 runs,      0 skips 1492554551 UNITS in lut3d->interp,       8 runs,      0 skips sse2 980777180 UNITS in lut3d->interp,       1 runs,      0 skips 986121520 UNITS in lut3d->interp,       2 runs,      0 skips 986489840 UNITS in lut3d->interp,       4 runs,      0 skips 998832248 UNITS in lut3d->interp,       8 runs,      0 skips avx 622212360 UNITS in lut3d->interp,       1 runs,      0 skips 622981160 UNITS in lut3d->interp,       2 runs,      0 skips 645396315 UNITS in lut3d->interp,       4 runs,      0 skips 641057075 UNITS in lut3d->interp,       8 runs,      0 skips avx2 321336400 UNITS in lut3d->interp,       1 runs,      0 skips 321268920 UNITS in lut3d->interp,       2 runs,      0 skips 323459895 UNITS in lut3d->interp,       4 runs,      0 skips 324949967 UNITS in lut3d->interp,       8 runs,      0 skips
Diffstat (limited to 'libavfilter/vf_lut3d.c')
-rw-r--r--libavfilter/vf_lut3d.c61
1 files changed, 5 insertions, 56 deletions
diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c
index 8ec07f8ab0..7ef96906fc 100644
--- a/libavfilter/vf_lut3d.c
+++ b/libavfilter/vf_lut3d.c
@@ -31,73 +31,18 @@
#include "libavutil/intreadwrite.h"
#include "libavutil/intfloat.h"
#include "libavutil/avassert.h"
-#include "libavutil/pixdesc.h"
#include "libavutil/avstring.h"
-#include "avfilter.h"
#include "drawutils.h"
#include "formats.h"
-#include "framesync.h"
#include "internal.h"
#include "video.h"
+#include "lut3d.h"
#define R 0
#define G 1
#define B 2
#define A 3
-enum interp_mode {
- INTERPOLATE_NEAREST,
- INTERPOLATE_TRILINEAR,
- INTERPOLATE_TETRAHEDRAL,
- INTERPOLATE_PYRAMID,
- INTERPOLATE_PRISM,
- NB_INTERP_MODE
-};
-
-struct rgbvec {
- float r, g, b;
-};
-
-/* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT
- * of 512x512 (64x64x64) */
-#define MAX_LEVEL 256
-#define PRELUT_SIZE 65536
-
-typedef struct Lut3DPreLut {
- int size;
- float min[3];
- float max[3];
- float scale[3];
- float* lut[3];
-} Lut3DPreLut;
-
-typedef struct LUT3DContext {
- const AVClass *class;
- int interpolation; ///<interp_mode
- char *file;
- uint8_t rgba_map[4];
- int step;
- avfilter_action_func *interp;
- struct rgbvec scale;
- struct rgbvec *lut;
- int lutsize;
- int lutsize2;
- Lut3DPreLut prelut;
-#if CONFIG_HALDCLUT_FILTER
- uint8_t clut_rgba_map[4];
- int clut_step;
- int clut_bits;
- int clut_planar;
- int clut_float;
- int clut_width;
- FFFrameSync fs;
-#endif
-} LUT3DContext;
-
-typedef struct ThreadData {
- AVFrame *in, *out;
-} ThreadData;
-
#define OFFSET(x) offsetof(LUT3DContext, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
#define TFLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
@@ -1203,6 +1148,10 @@ static int config_input(AVFilterLink *inlink)
av_assert0(0);
}
+ if (ARCH_X86) {
+ ff_lut3d_init_x86(lut3d, desc);
+ }
+
return 0;
}