aboutsummaryrefslogtreecommitdiffstats
path: root/compat/float
diff options
context:
space:
mode:
authorMark Reid <mindmark@gmail.com>2021-10-05 20:58:30 -0700
committerPaul B Mahol <onemda@gmail.com>2021-10-10 22:23:48 +0200
commit716b39674059d5b416faef92afd41654a6d9469b (patch)
tree25652c77af1ac70c439ba3e1a7f879d4b08cfb5b /compat/float
parent5133f4c2c1149feef3248ba2cb29537e8d8fbe38 (diff)
downloadffmpeg-716b39674059d5b416faef92afd41654a6d9469b.tar.gz
avfilter/vf_lut3d: add x86-optimized tetrahedral interpolation
I spotted an interesting pattern that I didn't see before that leads to the implementation being faster. The bit shifting table I was using before is no longer needed, and was able to remove quite a few lines.  I also add use of FMA on the AVX2 version. f32 1920x1080 1 thread with prelut c impl 1434012700 UNITS in lut3d->interp,       1 runs,      0 skips 1434035335 UNITS in lut3d->interp,       2 runs,      0 skips 1423615347 UNITS in lut3d->interp,       4 runs,      0 skips 1426268863 UNITS in lut3d->interp,       8 runs,      0 skips sse2 905484420 UNITS in lut3d->interp,       1 runs,      0 skips 905659010 UNITS in lut3d->interp,       2 runs,      0 skips 915167140 UNITS in lut3d->interp,       4 runs,      0 skips 915834222 UNITS in lut3d->interp,       8 runs,      0 skips avx 574794860 UNITS in lut3d->interp,       1 runs,      0 skips 581035090 UNITS in lut3d->interp,       2 runs,      0 skips 584116720 UNITS in lut3d->interp,       4 runs,      0 skips 581460290 UNITS in lut3d->interp,       8 runs,      0 skips avx2 301698880 UNITS in lut3d->interp,       1 runs,      0 skips 301982880 UNITS in lut3d->interp,       2 runs,      0 skips 306962430 UNITS in lut3d->interp,       4 runs,      0 skips 305472025 UNITS in lut3d->interp,       8 runs,      0 skips gbrap16 1920x1080 1 thread with prelut c impl 1480894840 UNITS in lut3d->interp,       1 runs,      0 skips 1502922990 UNITS in lut3d->interp,       2 runs,      0 skips 1496114307 UNITS in lut3d->interp,       4 runs,      0 skips 1492554551 UNITS in lut3d->interp,       8 runs,      0 skips sse2 980777180 UNITS in lut3d->interp,       1 runs,      0 skips 986121520 UNITS in lut3d->interp,       2 runs,      0 skips 986489840 UNITS in lut3d->interp,       4 runs,      0 skips 998832248 UNITS in lut3d->interp,       8 runs,      0 skips avx 622212360 UNITS in lut3d->interp,       1 runs,      0 skips 622981160 UNITS in lut3d->interp,       2 runs,      0 skips 645396315 UNITS in lut3d->interp,       4 runs,      0 skips 641057075 UNITS in lut3d->interp,       8 runs,      0 skips avx2 321336400 UNITS in lut3d->interp,       1 runs,      0 skips 321268920 UNITS in lut3d->interp,       2 runs,      0 skips 323459895 UNITS in lut3d->interp,       4 runs,      0 skips 324949967 UNITS in lut3d->interp,       8 runs,      0 skips
Diffstat (limited to 'compat/float')
0 files changed, 0 insertions, 0 deletions