aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMans Rullgard <mans@mansr.com>2012-05-10 00:55:18 +0100
committerMans Rullgard <mans@mansr.com>2012-05-10 14:39:34 +0100
commitcbc7d60afa0c56f8e50131830278fd32a89aed9d (patch)
tree84874a4fbf59de529baf54be79485c78c94a1a53
parent1de53d006b754c8ecab2f31a223acfaea15924f4 (diff)
downloadffmpeg-cbc7d60afa0c56f8e50131830278fd32a89aed9d.tar.gz
arm: dsputil: fix overreads in put/avg_pixels functions
The vertically interpolating variants of these functions read ahead one line to optimise the loop. On the last line processed, this might be outside the buffer. Fix these invalid reads by processing the last line outside the loop. Signed-off-by: Mans Rullgard <mans@mansr.com>
-rw-r--r--libavcodec/arm/dsputil_neon.S92
1 files changed, 92 insertions, 0 deletions
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index d49aedd6c4..4bdcd95061 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -95,6 +95,7 @@ endfunc
.endm
.macro pixels16_y2 rnd=1, avg=0
+ sub r3, r3, #2
vld1.64 {q0}, [r1], r2
vld1.64 {q1}, [r1], r2
1: subs r3, r3, #2
@@ -114,10 +115,25 @@ endfunc
vst1.64 {q2}, [r0,:128], r2
vst1.64 {q3}, [r0,:128], r2
bne 1b
+
+ avg q2, q0, q1
+ vld1.64 {q0}, [r1], r2
+ avg q3, q0, q1
+ .if \avg
+ vld1.8 {q8}, [r0,:128], r2
+ vld1.8 {q9}, [r0,:128]
+ vrhadd.u8 q2, q2, q8
+ vrhadd.u8 q3, q3, q9
+ sub r0, r0, r2
+ .endif
+ vst1.64 {q2}, [r0,:128], r2
+ vst1.64 {q3}, [r0,:128], r2
+
bx lr
.endm
.macro pixels16_xy2 rnd=1, avg=0
+ sub r3, r3, #2
vld1.64 {d0-d2}, [r1], r2
vld1.64 {d4-d6}, [r1], r2
.ifeq \rnd
@@ -173,6 +189,42 @@ endfunc
vaddl.u8 q11, d3, d5
vst1.64 {q15}, [r0,:128], r2
bgt 1b
+
+ vld1.64 {d0-d2}, [r1], r2
+ vadd.u16 q12, q8, q9
+ .ifeq \rnd
+ vadd.u16 q12, q12, q13
+ .endif
+ vext.8 q15, q0, q1, #1
+ vadd.u16 q1 , q10, q11
+ shrn d28, q12, #2
+ .ifeq \rnd
+ vadd.u16 q1, q1, q13
+ .endif
+ shrn d29, q1, #2
+ .if \avg
+ vld1.8 {q8}, [r0,:128]
+ vrhadd.u8 q14, q14, q8
+ .endif
+ vaddl.u8 q8, d0, d30
+ vaddl.u8 q10, d1, d31
+ vst1.64 {q14}, [r0,:128], r2
+ vadd.u16 q12, q8, q9
+ .ifeq \rnd
+ vadd.u16 q12, q12, q13
+ .endif
+ vadd.u16 q0, q10, q11
+ shrn d30, q12, #2
+ .ifeq \rnd
+ vadd.u16 q0, q0, q13
+ .endif
+ shrn d31, q0, #2
+ .if \avg
+ vld1.8 {q9}, [r0,:128]
+ vrhadd.u8 q15, q15, q9
+ .endif
+ vst1.64 {q15}, [r0,:128], r2
+
bx lr
.endm
@@ -228,6 +280,7 @@ endfunc
.endm
.macro pixels8_y2 rnd=1, avg=0
+ sub r3, r3, #2
vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2
1: subs r3, r3, #2
@@ -246,10 +299,24 @@ endfunc
vst1.64 {d4}, [r0,:64], r2
vst1.64 {d5}, [r0,:64], r2
bne 1b
+
+ avg d4, d0, d1
+ vld1.64 {d0}, [r1], r2
+ avg d5, d0, d1
+ .if \avg
+ vld1.8 {d2}, [r0,:64], r2
+ vld1.8 {d3}, [r0,:64]
+ vrhadd.u8 q2, q2, q1
+ sub r0, r0, r2
+ .endif
+ vst1.64 {d4}, [r0,:64], r2
+ vst1.64 {d5}, [r0,:64], r2
+
bx lr
.endm
.macro pixels8_xy2 rnd=1, avg=0
+ sub r3, r3, #2
vld1.64 {q0}, [r1], r2
vld1.64 {q1}, [r1], r2
.ifeq \rnd
@@ -291,6 +358,31 @@ endfunc
vaddl.u8 q9, d2, d6
vst1.64 {d7}, [r0,:64], r2
bgt 1b
+
+ vld1.64 {q0}, [r1], r2
+ vadd.u16 q10, q8, q9
+ vext.8 d4, d0, d1, #1
+ .ifeq \rnd
+ vadd.u16 q10, q10, q11
+ .endif
+ vaddl.u8 q8, d0, d4
+ shrn d5, q10, #2
+ vadd.u16 q10, q8, q9
+ .if \avg
+ vld1.8 {d7}, [r0,:64]
+ vrhadd.u8 d5, d5, d7
+ .endif
+ .ifeq \rnd
+ vadd.u16 q10, q10, q11
+ .endif
+ vst1.64 {d5}, [r0,:64], r2
+ shrn d7, q10, #2
+ .if \avg
+ vld1.8 {d5}, [r0,:64]
+ vrhadd.u8 d7, d7, d5
+ .endif
+ vst1.64 {d7}, [r0,:64], r2
+
bx lr
.endm