diff options
author | Janne Grunau <janne-libav@jannau.net> | 2011-11-29 13:38:10 +0000 |
---|---|---|
committer | Mans Rullgard <mans@mansr.com> | 2011-12-02 19:59:18 +0000 |
commit | a760f530bba6d21484c611de67d072fdab56e08e (patch) | |
tree | 8a660dfc11e4ecd29c9829e635c76ce1ee108724 /libavcodec | |
parent | 150ddbc1482c65b9aac803f011d7fcd734f776ec (diff) | |
download | ffmpeg-a760f530bba6d21484c611de67d072fdab56e08e.tar.gz |
ARM: make some NEON macros reusable
Signed-off-by: Mans Rullgard <mans@mansr.com>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/arm/h264dsp_neon.S | 41 | ||||
-rw-r--r-- | libavcodec/arm/neon.S | 59 | ||||
-rw-r--r-- | libavcodec/arm/vp8dsp_neon.S | 26 |
3 files changed, 65 insertions, 61 deletions
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S index 5156538ed2..1e97908db2 100644 --- a/libavcodec/arm/h264dsp_neon.S +++ b/libavcodec/arm/h264dsp_neon.S @@ -19,46 +19,7 @@ */ #include "asm.S" - - .macro transpose_8x8 r0 r1 r2 r3 r4 r5 r6 r7 - vtrn.32 \r0, \r4 - vtrn.32 \r1, \r5 - vtrn.32 \r2, \r6 - vtrn.32 \r3, \r7 - vtrn.16 \r0, \r2 - vtrn.16 \r1, \r3 - vtrn.16 \r4, \r6 - vtrn.16 \r5, \r7 - vtrn.8 \r0, \r1 - vtrn.8 \r2, \r3 - vtrn.8 \r4, \r5 - vtrn.8 \r6, \r7 - .endm - - .macro transpose_4x4 r0 r1 r2 r3 - vtrn.16 \r0, \r2 - vtrn.16 \r1, \r3 - vtrn.8 \r0, \r1 - vtrn.8 \r2, \r3 - .endm - - .macro swap4 r0 r1 r2 r3 r4 r5 r6 r7 - vswp \r0, \r4 - vswp \r1, \r5 - vswp \r2, \r6 - vswp \r3, \r7 - .endm - - .macro transpose16_4x4 r0 r1 r2 r3 r4 r5 r6 r7 - vtrn.32 \r0, \r2 - vtrn.32 \r1, \r3 - vtrn.32 \r4, \r6 - vtrn.32 \r5, \r7 - vtrn.16 \r0, \r1 - vtrn.16 \r2, \r3 - vtrn.16 \r4, \r5 - vtrn.16 \r6, \r7 - .endm +#include "neon.S" /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ .macro h264_chroma_mc8 type diff --git a/libavcodec/arm/neon.S b/libavcodec/arm/neon.S new file mode 100644 index 0000000000..716a607af7 --- /dev/null +++ b/libavcodec/arm/neon.S @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +.macro transpose_8x8 r0, r1, r2, r3, r4, r5, r6, r7 + vtrn.32 \r0, \r4 + vtrn.32 \r1, \r5 + vtrn.32 \r2, \r6 + vtrn.32 \r3, \r7 + vtrn.16 \r0, \r2 + vtrn.16 \r1, \r3 + vtrn.16 \r4, \r6 + vtrn.16 \r5, \r7 + vtrn.8 \r0, \r1 + vtrn.8 \r2, \r3 + vtrn.8 \r4, \r5 + vtrn.8 \r6, \r7 +.endm + +.macro transpose_4x4 r0, r1, r2, r3 + vtrn.16 \r0, \r2 + vtrn.16 \r1, \r3 + vtrn.8 \r0, \r1 + vtrn.8 \r2, \r3 +.endm + +.macro swap4 r0, r1, r2, r3, r4, r5, r6, r7 + vswp \r0, \r4 + vswp \r1, \r5 + vswp \r2, \r6 + vswp \r3, \r7 +.endm + +.macro transpose16_4x4 r0, r1, r2, r3, r4, r5, r6, r7 + vtrn.32 \r0, \r2 + vtrn.32 \r1, \r3 + vtrn.32 \r4, \r6 + vtrn.32 \r5, \r7 + vtrn.16 \r0, \r1 + vtrn.16 \r2, \r3 + vtrn.16 \r4, \r5 + vtrn.16 \r6, \r7 +.endm diff --git a/libavcodec/arm/vp8dsp_neon.S b/libavcodec/arm/vp8dsp_neon.S index 1b9f24eef2..4ff53ad70f 100644 --- a/libavcodec/arm/vp8dsp_neon.S +++ b/libavcodec/arm/vp8dsp_neon.S @@ -22,6 +22,7 @@ */ #include "asm.S" +#include "neon.S" function ff_vp8_luma_dc_wht_neon, export=1 vld1.16 {q0-q1}, [r1,:128] @@ -454,23 +455,6 @@ endfunc .endif .endm -.macro transpose8x16matrix - vtrn.32 q0, q4 - vtrn.32 q1, q5 - vtrn.32 q2, q6 - vtrn.32 q3, q7 - - vtrn.16 q0, q2 - vtrn.16 q1, q3 - vtrn.16 q4, q6 - vtrn.16 q5, q7 - - vtrn.8 q0, q1 - vtrn.8 q2, q3 - vtrn.8 q4, q5 - vtrn.8 q6, q7 -.endm - .macro vp8_v_loop_filter16 name, inner=0, simple=0 function ff_vp8_v_loop_filter16\name\()_neon, export=1 vpush {q4-q7} @@ -605,7 +589,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1 vld1.8 {d13}, [r0], r1 vld1.8 {d15}, [r0], r1 - transpose8x16matrix + transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7 vdup.8 q14, r2 @ flim_E .if !\simple @@ -616,7 +600,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1 sub r0, r0, r1, lsl #4 @ backup 16 rows - transpose8x16matrix + transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7 @ Store pixels: vst1.8 {d0}, [r0], r1 @@ -670,7 +654,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1 vld1.8 {d14}, [r0], r2 vld1.8 {d15}, [r1], r2 - transpose8x16matrix + transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7 vdup.8 q14, r3 @ flim_E vdup.8 q15, r12 @ flim_I @@ -681,7 +665,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1 sub r0, r0, r2, lsl #3 @ backup u 8 rows sub r1, r1, r2, lsl #3 @ backup v 8 rows - transpose8x16matrix + transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7 @ Store pixels: vst1.8 {d0}, [r0], r2 |