arm: vp9itxfm: Move the load_add_store macro out from the itxfm16 pass2 function

This allows reusing the macro for a separate implementation of the pass2 function. Signed-off-by: Martin Storsjö <martin@martin.st>
author: Martin Storsjö <martin@martin.st> 2017-02-05 22:55:20 +0200
committer: Martin Storsjö <martin@martin.st> 2017-02-09 12:31:53 +0200
commit: 47b3c2c18d1897f3c753ba0cec4b2d7aa24526af (patch)
tree: a13cb9c5b58916e5ec3fc06122fd0ec826ce78f9 /libavcodec/arm
parent: 115476018d2c97df7e9b4445fe8f6cc7420ab91f (diff)
download: ffmpeg-47b3c2c18d1897f3c753ba0cec4b2d7aa24526af.tar.gz
1 files changed, 36 insertions, 36 deletions
diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index fd53a20a73..b3188bc711 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -657,6 +657,42 @@ function iadst16
         bx              lr
 endfunc
 
+.macro load_add_store coef0, coef1, coef2, coef3
+        vrshr.s16       \coef0, \coef0, #6
+        vrshr.s16       \coef1, \coef1, #6
+
+        vld1.32         {d4[]},   [r0,:32], r1
+        vld1.32         {d4[1]},  [r3,:32], r1
+        vrshr.s16       \coef2, \coef2, #6
+        vrshr.s16       \coef3, \coef3, #6
+        vld1.32         {d5[]},   [r0,:32], r1
+        vld1.32         {d5[1]},  [r3,:32], r1
+        vaddw.u8        \coef0, \coef0, d4
+        vld1.32         {d6[]},   [r0,:32], r1
+        vld1.32         {d6[1]},  [r3,:32], r1
+        vaddw.u8        \coef1, \coef1, d5
+        vld1.32         {d7[]},   [r0,:32], r1
+        vld1.32         {d7[1]},  [r3,:32], r1
+
+        vqmovun.s16     d4,  \coef0
+        vqmovun.s16     d5,  \coef1
+        sub             r0,  r0,  r1, lsl #2
+        sub             r3,  r3,  r1, lsl #2
+        vaddw.u8        \coef2, \coef2, d6
+        vaddw.u8        \coef3, \coef3, d7
+        vst1.32         {d4[0]},  [r0,:32], r1
+        vst1.32         {d4[1]},  [r3,:32], r1
+        vqmovun.s16     d6,  \coef2
+        vst1.32         {d5[0]},  [r0,:32], r1
+        vst1.32         {d5[1]},  [r3,:32], r1
+        vqmovun.s16     d7,  \coef3
+
+        vst1.32         {d6[0]},  [r0,:32], r1
+        vst1.32         {d6[1]},  [r3,:32], r1
+        vst1.32         {d7[0]},  [r0,:32], r1
+        vst1.32         {d7[1]},  [r3,:32], r1
+.endm
+
 .macro itxfm16_1d_funcs txfm
 @ Read a vertical 4x16 slice out of a 16x16 matrix, do a transform on it,
 @ transpose into a horizontal 16x4 slice and store.
@@ -739,44 +775,8 @@ function \txfm\()16_1d_4x16_pass2_neon
         lsl             r1,  r1,  #1
         bl              \txfm\()16
 
-.macro load_add_store coef0, coef1, coef2, coef3
-        vrshr.s16       \coef0, \coef0, #6
-        vrshr.s16       \coef1, \coef1, #6
-
-        vld1.32         {d4[]},   [r0,:32], r1
-        vld1.32         {d4[1]},  [r3,:32], r1
-        vrshr.s16       \coef2, \coef2, #6
-        vrshr.s16       \coef3, \coef3, #6
-        vld1.32         {d5[]},   [r0,:32], r1
-        vld1.32         {d5[1]},  [r3,:32], r1
-        vaddw.u8        \coef0, \coef0, d4
-        vld1.32         {d6[]},   [r0,:32], r1
-        vld1.32         {d6[1]},  [r3,:32], r1
-        vaddw.u8        \coef1, \coef1, d5
-        vld1.32         {d7[]},   [r0,:32], r1
-        vld1.32         {d7[1]},  [r3,:32], r1
-
-        vqmovun.s16     d4,  \coef0
-        vqmovun.s16     d5,  \coef1
-        sub             r0,  r0,  r1, lsl #2
-        sub             r3,  r3,  r1, lsl #2
-        vaddw.u8        \coef2, \coef2, d6
-        vaddw.u8        \coef3, \coef3, d7
-        vst1.32         {d4[0]},  [r0,:32], r1
-        vst1.32         {d4[1]},  [r3,:32], r1
-        vqmovun.s16     d6,  \coef2
-        vst1.32         {d5[0]},  [r0,:32], r1
-        vst1.32         {d5[1]},  [r3,:32], r1
-        vqmovun.s16     d7,  \coef3
-
-        vst1.32         {d6[0]},  [r0,:32], r1
-        vst1.32         {d6[1]},  [r3,:32], r1
-        vst1.32         {d7[0]},  [r0,:32], r1
-        vst1.32         {d7[1]},  [r3,:32], r1
-.endm
         load_add_store  q8,  q9,  q10, q11
         load_add_store  q12, q13, q14, q15
-.purgem load_add_store
 
         pop             {pc}
 endfunc
author	Martin Storsjö <martin@martin.st>	2017-02-05 22:55:20 +0200
committer	Martin Storsjö <martin@martin.st>	2017-02-09 12:31:53 +0200
commit	47b3c2c18d1897f3c753ba0cec4b2d7aa24526af (patch)
tree	a13cb9c5b58916e5ec3fc06122fd0ec826ce78f9 /libavcodec/arm
parent	115476018d2c97df7e9b4445fe8f6cc7420ab91f (diff)
download	ffmpeg-47b3c2c18d1897f3c753ba0cec4b2d7aa24526af.tar.gz