arm: Avoid using .dn register aliases

clang now (in the upcoming 5.0 version) is capable of building our arm assembly without relying on gas-preprocessor, although clang/LLVM doesn't support .dn register aliases. The VC1 MC assembly was only built and used if the chosen assembler supported the .dn directives though. This was supported as long as gas-preprocessor was used. This means that VC1 decoding got a speed regression on clang 5.0, unless the user manually chose using gas-preprocessor again. By avoiding using the .dn register aliases, we can build the VC1 MC assembly with the latest clang version. Support for the .dn/.qn directives in clang/LLVM isn't actively planned, see https://bugs.llvm.org/show_bug.cgi?id=18199. This partially reverts 896a5bff64264f4d01ed98eacc97a67260c1e17e. Signed-off-by: Martin Storsjö <martin@martin.st>
author: Martin Storsjö <martin@martin.st> 2017-05-09 10:25:44 +0300
committer: Martin Storsjö <martin@martin.st> 2017-05-15 09:52:18 +0300
commit: d7320ca3ed10f0d35b3740fa03341161e74275ea (patch)
tree: 399ceab1c0b0f6d92d493ae72dfd1cce22d63ad0 /libavcodec
parent: 6ccf76aec73b2cd598bb1e65d126d8a12540c411 (diff)
download: ffmpeg-d7320ca3ed10f0d35b3740fa03341161e74275ea.tar.gz
2 files changed, 5 insertions, 20 deletions
diff --git a/libavcodec/arm/vc1dsp_init_neon.c b/libavcodec/arm/vc1dsp_init_neon.c
index 08c07c4443..1c065970a7 100644
--- a/libavcodec/arm/vc1dsp_init_neon.c
+++ b/libavcodec/arm/vc1dsp_init_neon.c
@@ -22,8 +22,6 @@
 #include "libavcodec/vc1dsp.h"
 #include "vc1dsp.h"
 
-#include "config.h"
-
 void ff_vc1_inv_trans_8x8_neon(int16_t *block);
 void ff_vc1_inv_trans_4x8_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
 void ff_vc1_inv_trans_8x4_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
@@ -93,7 +91,6 @@ av_cold void ff_vc1dsp_init_neon(VC1DSPContext *dsp)
     dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_neon;
 
     dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_pixels8x8_neon;
-    if (HAVE_AS_DN_DIRECTIVE) {
     dsp->put_vc1_mspel_pixels_tab[ 1] = ff_put_vc1_mspel_mc10_neon;
     dsp->put_vc1_mspel_pixels_tab[ 2] = ff_put_vc1_mspel_mc20_neon;
     dsp->put_vc1_mspel_pixels_tab[ 3] = ff_put_vc1_mspel_mc30_neon;
@@ -109,7 +106,6 @@ av_cold void ff_vc1dsp_init_neon(VC1DSPContext *dsp)
     dsp->put_vc1_mspel_pixels_tab[13] = ff_put_vc1_mspel_mc13_neon;
     dsp->put_vc1_mspel_pixels_tab[14] = ff_put_vc1_mspel_mc23_neon;
     dsp->put_vc1_mspel_pixels_tab[15] = ff_put_vc1_mspel_mc33_neon;
-    }
 
     dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_neon;
     dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_neon;
diff --git a/libavcodec/arm/vc1dsp_neon.S b/libavcodec/arm/vc1dsp_neon.S
index 1653a4c5ad..ff88fe23c7 100644
--- a/libavcodec/arm/vc1dsp_neon.S
+++ b/libavcodec/arm/vc1dsp_neon.S
@@ -663,7 +663,6 @@ function ff_vc1_inv_trans_4x4_neon, export=1
         bx              lr
 endfunc
 
-#if HAVE_AS_DN_DIRECTIVE
 @ The absolute value of multiplication constants from vc1_mspel_filter and vc1_mspel_{ver,hor}_filter_16bits.
 @ The sign is embedded in the code below that carries out the multiplication (mspel_filter{,.16}).
 #define MSPEL_MODE_1_MUL_CONSTANTS  4 53 18 3
@@ -689,22 +688,18 @@ endfunc
 
 @ Setup constants in registers for a subsequent use of mspel_filter{,.16}.
 .macro mspel_constants typesize reg_a reg_b reg_c reg_d filter_a filter_b filter_c filter_d reg_add filter_add_register
-  @ Define double-word register aliases. Typesize should be i8 or i16.
-  ra .dn \reg_a\().\typesize
-  rb .dn \reg_b\().\typesize
-  rc .dn \reg_c\().\typesize
-  rd .dn \reg_d\().\typesize
+  @ Typesize should be i8 or i16.
 
   @ Only set the register if the value is not 1 and unique
   .if \filter_a != 1
-        vmov            ra,  #\filter_a              @ ra = filter_a
+        vmov.\typesize  \reg_a,  #\filter_a          @ reg_a = filter_a
   .endif
-        vmov            rb,  #\filter_b              @ rb = filter_b
+        vmov.\typesize  \reg_b,  #\filter_b          @ reg_b = filter_b
   .if \filter_b != \filter_c
-        vmov            rc,  #\filter_c              @ rc = filter_c
+        vmov.\typesize  \reg_c,  #\filter_c          @ reg_c = filter_c
   .endif
   .if \filter_d != 1
-        vmov            rd,  #\filter_d              @ rd = filter_d
+        vmov.\typesize  \reg_d,  #\filter_d          @ reg_d = filter_d
   .endif
   @ vdup to double the size of typesize
   .ifc \typesize,i8
@@ -712,11 +707,6 @@ endfunc
   .else
         vdup.32         \reg_add,  \filter_add_register     @ reg_add = filter_add_register
   .endif
-
-  .unreq ra
-  .unreq rb
-  .unreq rc
-  .unreq rd
 .endm
 
 @ After mspel_constants has been used, do the filtering.
@@ -987,7 +977,6 @@ PUT_VC1_MSPEL_MC_V_ONLY(2)
 PUT_VC1_MSPEL_MC_V_ONLY(3)
 
 #undef PUT_VC1_MSPEL_MC_V_ONLY
-#endif
 
 function ff_put_pixels8x8_neon, export=1
         vld1.64         {d0}, [r1], r2
author	Martin Storsjö <martin@martin.st>	2017-05-09 10:25:44 +0300
committer	Martin Storsjö <martin@martin.st>	2017-05-15 09:52:18 +0300
commit	d7320ca3ed10f0d35b3740fa03341161e74275ea (patch)
tree	399ceab1c0b0f6d92d493ae72dfd1cce22d63ad0 /libavcodec
parent	6ccf76aec73b2cd598bb1e65d126d8a12540c411 (diff)
download	ffmpeg-d7320ca3ed10f0d35b3740fa03341161e74275ea.tar.gz