1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
@
@ ARMv4 optimized DSP utils
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
@
@ This file is part of FFmpeg.
@
@ FFmpeg is free software; you can redistribute it and/or
@ modify it under the terms of the GNU Lesser General Public
@ License as published by the Free Software Foundation; either
@ version 2.1 of the License, or (at your option) any later version.
@
@ FFmpeg is distributed in the hope that it will be useful,
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
@ Lesser General Public License for more details.
@
@ You should have received a copy of the GNU Lesser General Public
@ License along with FFmpeg; if not, write to the Free Software
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@
#include "config.h"
#include "libavutil/arm/asm.S"
.align 5
@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
function ff_add_pixels_clamped_arm, export=1
push {r4-r10}
mov r10, #8
1:
ldr r4, [r1] /* load dest */
/* block[0] and block[1]*/
ldrsh r5, [r0]
ldrsh r7, [r0, #2]
and r6, r4, #0xFF
and r8, r4, #0xFF00
add r6, r6, r5
add r8, r7, r8, lsr #8
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
mov r9, r6
ldrsh r5, [r0, #4] /* moved form [A] */
orr r9, r9, r8, lsl #8
/* block[2] and block[3] */
/* [A] */
ldrsh r7, [r0, #6]
and r6, r4, #0xFF0000
and r8, r4, #0xFF000000
add r6, r5, r6, lsr #16
add r8, r7, r8, lsr #24
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
orr r9, r9, r6, lsl #16
ldr r4, [r1, #4] /* moved form [B] */
orr r9, r9, r8, lsl #24
/* store dest */
ldrsh r5, [r0, #8] /* moved form [C] */
str r9, [r1]
/* load dest */
/* [B] */
/* block[4] and block[5] */
/* [C] */
ldrsh r7, [r0, #10]
and r6, r4, #0xFF
and r8, r4, #0xFF00
add r6, r6, r5
add r8, r7, r8, lsr #8
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
mov r9, r6
ldrsh r5, [r0, #12] /* moved from [D] */
orr r9, r9, r8, lsl #8
/* block[6] and block[7] */
/* [D] */
ldrsh r7, [r0, #14]
and r6, r4, #0xFF0000
and r8, r4, #0xFF000000
add r6, r5, r6, lsr #16
add r8, r7, r8, lsr #24
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
orr r9, r9, r6, lsl #16
add r0, r0, #16 /* moved from [E] */
orr r9, r9, r8, lsl #24
subs r10, r10, #1 /* moved from [F] */
/* store dest */
str r9, [r1, #4]
/* [E] */
/* [F] */
add r1, r1, r2
bne 1b
pop {r4-r10}
bx lr
endfunc
|