aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/arm/dca.h
diff options
context:
space:
mode:
authorMans Rullgard <mans@mansr.com>2011-10-23 17:39:49 +0100
committerMans Rullgard <mans@mansr.com>2011-11-25 13:19:53 +0000
commit00a856e3f95214c54a878b7cbd6e8ae8c5ce3ca9 (patch)
tree5fa20f30bafc61e71df1b89370f03b1dbf69505d /libavcodec/arm/dca.h
parent035af998ad03020a3dda4e662dfb97c68bbabaaa (diff)
downloadffmpeg-00a856e3f95214c54a878b7cbd6e8ae8c5ce3ca9.tar.gz
dca: ARMv6 optimised decode_blockcode()
This is a hand-tuned version of the code with impossible parts of the FASTDIV function ommitted. 2-5% faster overall on Cortex-A8. Signed-off-by: Mans Rullgard <mans@mansr.com>
Diffstat (limited to 'libavcodec/arm/dca.h')
-rw-r--r--libavcodec/arm/dca.h55
1 files changed, 55 insertions, 0 deletions
diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h
index 38c8d1f9cc..9ff7f7c75e 100644
--- a/libavcodec/arm/dca.h
+++ b/libavcodec/arm/dca.h
@@ -23,6 +23,61 @@
#include <stdint.h>
#include "config.h"
+#include "libavutil/intmath.h"
+
+#if HAVE_ARMV6 && HAVE_INLINE_ASM
+
+#define decode_blockcodes decode_blockcodes
+static inline int decode_blockcodes(int code1, int code2, int levels,
+ int *values)
+{
+ int v0, v1, v2, v3, v4, v5;
+
+ __asm__ ("smmul %8, %14, %18 \n"
+ "smmul %11, %15, %18 \n"
+ "smlabb %14, %8, %17, %14 \n"
+ "smlabb %15, %11, %17, %15 \n"
+ "smmul %9, %8, %18 \n"
+ "smmul %12, %11, %18 \n"
+ "sub %14, %14, %16, lsr #1 \n"
+ "sub %15, %15, %16, lsr #1 \n"
+ "smlabb %8, %9, %17, %8 \n"
+ "smlabb %11, %12, %17, %11 \n"
+ "smmul %10, %9, %18 \n"
+ "smmul %13, %12, %18 \n"
+ "str %14, %0 \n"
+ "str %15, %4 \n"
+ "sub %8, %8, %16, lsr #1 \n"
+ "sub %11, %11, %16, lsr #1 \n"
+ "smlabb %9, %10, %17, %9 \n"
+ "smlabb %12, %13, %17, %12 \n"
+ "smmul %14, %10, %18 \n"
+ "smmul %15, %13, %18 \n"
+ "str %8, %1 \n"
+ "str %11, %5 \n"
+ "sub %9, %9, %16, lsr #1 \n"
+ "sub %12, %12, %16, lsr #1 \n"
+ "smlabb %10, %14, %17, %10 \n"
+ "smlabb %13, %15, %17, %13 \n"
+ "str %9, %2 \n"
+ "str %12, %6 \n"
+ "sub %10, %10, %16, lsr #1 \n"
+ "sub %13, %13, %16, lsr #1 \n"
+ "str %10, %3 \n"
+ "str %13, %7 \n"
+ : "=m"(values[0]), "=m"(values[1]),
+ "=m"(values[2]), "=m"(values[3]),
+ "=m"(values[4]), "=m"(values[5]),
+ "=m"(values[6]), "=m"(values[7]),
+ "=&r"(v0), "=&r"(v1), "=&r"(v2),
+ "=&r"(v3), "=&r"(v4), "=&r"(v5),
+ "+&r"(code1), "+&r"(code2)
+ : "r"(levels - 1), "r"(-levels), "r"(ff_inverse[levels]));
+
+ return code1 | code2;
+}
+
+#endif
#if HAVE_NEON && HAVE_INLINE_ASM && HAVE_ASM_MOD_Y