arm/aarch64: vp9lpf: Calculate !hev directly

Previously we first calculated hev, and then negated it. Since we were able to schedule the negation in the middle of another calculation, we don't see any gain in all cases. Before: Cortex A7 A8 A9 A53 A53/AArch64 vp9_loop_filter_v_4_8_neon: 147.0 129.0 115.8 89.0 88.7 vp9_loop_filter_v_8_8_neon: 242.0 198.5 174.7 140.0 136.7 vp9_loop_filter_v_16_8_neon: 500.0 419.5 382.7 293.0 275.7 vp9_loop_filter_v_16_16_neon: 971.2 825.5 731.5 579.0 453.0 After: vp9_loop_filter_v_4_8_neon: 143.0 127.7 114.8 88.0 87.7 vp9_loop_filter_v_8_8_neon: 241.0 197.2 173.7 140.0 136.7 vp9_loop_filter_v_16_8_neon: 497.0 419.5 379.7 293.0 275.7 vp9_loop_filter_v_16_16_neon: 965.2 818.7 731.4 579.0 452.0 This is cherrypicked from libav commit e1f9de86f454861b69b199ad801adc2ec6c3b220. Signed-off-by: Martin Storsjö <martin@martin.st>
author: Martin Storsjö <martin@martin.st> 2017-01-12 16:52:33 +0200
committer: Martin Storsjö <martin@martin.st> 2017-03-11 13:14:48 +0200
commit: f0ecbb13cf1cf706a1350dad657219dc7b3c131e (patch)
tree: cb74533c73c2f2426a33a7476ebc367a46a196ae /libavcodec/aarch64
parent: 148cc0bb890839bc2a9cda514c5e71acc39eb374 (diff)
download: ffmpeg-f0ecbb13cf1cf706a1350dad657219dc7b3c131e.tar.gz
1 files changed, 2 insertions, 3 deletions
diff --git a/libavcodec/aarch64/vp9lpf_neon.S b/libavcodec/aarch64/vp9lpf_neon.S
index 55e1964c47..7fe2c88f9e 100644
--- a/libavcodec/aarch64/vp9lpf_neon.S
+++ b/libavcodec/aarch64/vp9lpf_neon.S
@@ -292,7 +292,7 @@
 .if \mix != 0
         sxtl            v1.8h,  v1.8b
 .endif
-        cmhi            v5\sz,  v5\sz,  v3\sz  // hev
+        cmhs            v5\sz,  v3\sz,  v5\sz  // !hev
 .if \wd == 8
         // If a 4/8 or 8/4 mix is used, clear the relevant half of v6
 .if \mix != 0
@@ -306,11 +306,10 @@
 .elseif \wd == 8
         bic             v4\sz,  v4\sz,  v6\sz  // fm && !flat8in
 .endif
-        mvn             v5\sz,  v5\sz          // !hev
+        and             v5\sz,  v5\sz,  v4\sz  // !hev && fm && !flat8in
 .if \wd == 16
         and             v7\sz,  v7\sz,  v6\sz  // flat8out && flat8in && fm
 .endif
-        and             v5\sz,  v5\sz,  v4\sz  // !hev && fm && !flat8in
 
         mul_sz          \tmp3\().8h,  \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  \tmp5\().8h,  \tmp5\().8h, \sz // 3 * (q0 - p0)
         bic             \tmp1\sz,  \tmp1\sz,  v5\sz    // if (!hev) av_clip_int8 = 0
author	Martin Storsjö <martin@martin.st>	2017-01-12 16:52:33 +0200
committer	Martin Storsjö <martin@martin.st>	2017-03-11 13:14:48 +0200
commit	f0ecbb13cf1cf706a1350dad657219dc7b3c131e (patch)
tree	cb74533c73c2f2426a33a7476ebc367a46a196ae /libavcodec/aarch64
parent	148cc0bb890839bc2a9cda514c5e71acc39eb374 (diff)
download	ffmpeg-f0ecbb13cf1cf706a1350dad657219dc7b3c131e.tar.gz