aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/arm/mlpdsp_init_arm.c
diff options
context:
space:
mode:
authorBen Avison <bavison@riscosopen.org>2014-03-20 18:58:40 +0000
committerMartin Storsjö <martin@martin.st>2014-03-26 19:54:32 +0200
commit3b5946bccef6cd219f01d22e542ca5c6de68a7be (patch)
tree20f5eda099a221bcfba80044c60318704b103c92 /libavcodec/arm/mlpdsp_init_arm.c
parentb9eb03416d93a5c4ece27ffef5e6e11c81bec6fa (diff)
downloadffmpeg-3b5946bccef6cd219f01d22e542ca5c6de68a7be.tar.gz
truehd: add hand-scheduled ARM asm version of ff_mlp_pack_output.
Profiling results for overall decode and the output_data function in particular are as follows: Before After Mean StdDev Mean StdDev Confidence Change 6:2 total 339.6 15.1 329.3 16.0 95.8% +3.1% (insignificant) 6:2 function 24.6 6.0 9.9 3.1 100.0% +148.5% 8:2 total 324.5 15.5 323.6 14.3 15.2% +0.3% (insignificant) 8:2 function 20.4 3.9 9.9 3.4 100.0% +104.7% 6:6 total 572.8 20.6 539.9 24.2 100.0% +6.1% 6:6 function 54.5 5.6 16.0 3.8 100.0% +240.9% 8:8 total 741.5 21.2 702.5 18.5 100.0% +5.6% 8:8 function 63.9 7.6 18.4 4.8 100.0% +247.3% The assembly version has also been tested with a fuzz tester to ensure that any combinations of inputs not exercised by my available test streams still generate mathematically identical results to the C version. Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/arm/mlpdsp_init_arm.c')
-rw-r--r--libavcodec/arm/mlpdsp_init_arm.c94
1 files changed, 94 insertions, 0 deletions
diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
index e3eeb21e06..4cdd10caf5 100644
--- a/libavcodec/arm/mlpdsp_init_arm.c
+++ b/libavcodec/arm/mlpdsp_init_arm.c
@@ -41,6 +41,98 @@ void ff_mlp_rematrix_channel_arm(int32_t *samples,
int access_unit_size_pow2,
int32_t mask);
+#define DECLARE_PACK(order,channels,shift) \
+ int32_t ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int);
+#define ENUMERATE_PACK(order,channels,shift) \
+ ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6,
+#define PACK_CHANNELS(macro,order,channels) \
+ macro(order,channels,0) \
+ macro(order,channels,1) \
+ macro(order,channels,2) \
+ macro(order,channels,3) \
+ macro(order,channels,4) \
+ macro(order,channels,5) \
+ macro(order,channels,mixed)
+#define PACK_ORDER(macro,order) \
+ PACK_CHANNELS(macro,order,2) \
+ PACK_CHANNELS(macro,order,6) \
+ PACK_CHANNELS(macro,order,8)
+#define PACK_ALL(macro) \
+ PACK_ORDER(macro,outof) \
+ PACK_ORDER(macro,in)
+PACK_ALL(DECLARE_PACK)
+
+#define ff_mlp_pack_output_outoforder_2ch_mixedshift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_mixedshift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_mixedshift_armv6 0
+#if CONFIG_THUMB
+#define ff_mlp_pack_output_outoforder_2ch_0shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_1shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_2shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_3shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_4shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_5shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_0shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_1shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_2shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_3shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_4shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_5shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_0shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_1shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_2shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_3shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_4shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_5shift_armv6 0
+#endif
+
+static int32_t (*mlp_select_pack_output_armv6(uint8_t *ch_assign,
+ int8_t *output_shift,
+ uint8_t max_matrix_channel,
+ int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int)
+{
+ int ch_index;
+ int shift = output_shift[0] < 0 || output_shift[0] > 5 ? 6 : output_shift[0];
+ int inorder = 1;
+ static int32_t (*const routine[2*3*7])(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int) = {
+ PACK_ALL(ENUMERATE_PACK)
+ };
+ int i;
+
+ if (!is32) // don't support 16-bit output (it's not used by TrueHD)
+ return ff_mlp_pack_output;
+
+ switch (max_matrix_channel) {
+ case 1:
+ ch_index = 0;
+ break;
+ case 5:
+ ch_index = 1;
+ break;
+ case 7:
+ ch_index = 2;
+ break;
+ default:
+ return ff_mlp_pack_output;
+ }
+
+ for (i = 0; i <= max_matrix_channel; i++) {
+ if (shift != 6 && output_shift[i] != shift)
+ shift = 6; // indicate mixed shifts
+ if (ch_assign[i] != i)
+ inorder = 0;
+ }
+#if CONFIG_THUMB
+ if (!inorder)
+ return ff_mlp_pack_output; // can't currently handle an order array except in ARM mode
+#else
+ if (shift == 6 && !inorder)
+ return ff_mlp_pack_output; // can't currently handle both an order array and a shift array
+#endif
+
+ return routine[(inorder*3+ch_index)*7+shift];
+}
+
av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
@@ -49,4 +141,6 @@ av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
c->mlp_filter_channel = ff_mlp_filter_channel_arm;
c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
}
+ if (have_armv6(cpu_flags))
+ c->mlp_select_pack_output = mlp_select_pack_output_armv6;
}