1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
|
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build mips || mipsle
#include "textflag.h"
#ifdef GOARCH_mips
#define MOVWHI MOVWL
#define MOVWLO MOVWR
#else
#define MOVWHI MOVWR
#define MOVWLO MOVWL
#endif
// See memmove Go doc for important implementation constraints.
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB),NOSPLIT,$-0-12
MOVW n+8(FP), R3
MOVW from+4(FP), R2
MOVW to+0(FP), R1
ADDU R3, R2, R4 // end pointer for source
ADDU R3, R1, R5 // end pointer for destination
// if destination is ahead of source, start at the end of the buffer and go backward.
SGTU R1, R2, R6
BNE R6, backward
// if less than 4 bytes, use byte by byte copying
SGTU $4, R3, R6
BNE R6, f_small_copy
// align destination to 4 bytes
AND $3, R1, R6
BEQ R6, f_dest_aligned
SUBU R1, R0, R6
AND $3, R6
MOVWHI 0(R2), R7
SUBU R6, R3
MOVWLO 3(R2), R7
ADDU R6, R2
MOVWHI R7, 0(R1)
ADDU R6, R1
f_dest_aligned:
AND $31, R3, R7
AND $3, R3, R6
SUBU R7, R5, R7 // end pointer for 32-byte chunks
SUBU R6, R5, R6 // end pointer for 4-byte chunks
// if source is not aligned, use unaligned reads
AND $3, R2, R8
BNE R8, f_large_ua
f_large:
BEQ R1, R7, f_words
ADDU $32, R1
MOVW 0(R2), R8
MOVW 4(R2), R9
MOVW 8(R2), R10
MOVW 12(R2), R11
MOVW 16(R2), R12
MOVW 20(R2), R13
MOVW 24(R2), R14
MOVW 28(R2), R15
ADDU $32, R2
MOVW R8, -32(R1)
MOVW R9, -28(R1)
MOVW R10, -24(R1)
MOVW R11, -20(R1)
MOVW R12, -16(R1)
MOVW R13, -12(R1)
MOVW R14, -8(R1)
MOVW R15, -4(R1)
JMP f_large
f_words:
BEQ R1, R6, f_tail
ADDU $4, R1
MOVW 0(R2), R8
ADDU $4, R2
MOVW R8, -4(R1)
JMP f_words
f_tail:
BEQ R1, R5, ret
MOVWLO -1(R4), R8
MOVWLO R8, -1(R5)
ret:
RET
f_large_ua:
BEQ R1, R7, f_words_ua
ADDU $32, R1
MOVWHI 0(R2), R8
MOVWHI 4(R2), R9
MOVWHI 8(R2), R10
MOVWHI 12(R2), R11
MOVWHI 16(R2), R12
MOVWHI 20(R2), R13
MOVWHI 24(R2), R14
MOVWHI 28(R2), R15
MOVWLO 3(R2), R8
MOVWLO 7(R2), R9
MOVWLO 11(R2), R10
MOVWLO 15(R2), R11
MOVWLO 19(R2), R12
MOVWLO 23(R2), R13
MOVWLO 27(R2), R14
MOVWLO 31(R2), R15
ADDU $32, R2
MOVW R8, -32(R1)
MOVW R9, -28(R1)
MOVW R10, -24(R1)
MOVW R11, -20(R1)
MOVW R12, -16(R1)
MOVW R13, -12(R1)
MOVW R14, -8(R1)
MOVW R15, -4(R1)
JMP f_large_ua
f_words_ua:
BEQ R1, R6, f_tail_ua
MOVWHI 0(R2), R8
ADDU $4, R1
MOVWLO 3(R2), R8
ADDU $4, R2
MOVW R8, -4(R1)
JMP f_words_ua
f_tail_ua:
BEQ R1, R5, ret
MOVWHI -4(R4), R8
MOVWLO -1(R4), R8
MOVWLO R8, -1(R5)
JMP ret
f_small_copy:
BEQ R1, R5, ret
ADDU $1, R1
MOVB 0(R2), R6
ADDU $1, R2
MOVB R6, -1(R1)
JMP f_small_copy
backward:
SGTU $4, R3, R6
BNE R6, b_small_copy
AND $3, R5, R6
BEQ R6, b_dest_aligned
MOVWHI -4(R4), R7
SUBU R6, R3
MOVWLO -1(R4), R7
SUBU R6, R4
MOVWLO R7, -1(R5)
SUBU R6, R5
b_dest_aligned:
AND $31, R3, R7
AND $3, R3, R6
ADDU R7, R1, R7
ADDU R6, R1, R6
AND $3, R4, R8
BNE R8, b_large_ua
b_large:
BEQ R5, R7, b_words
ADDU $-32, R5
MOVW -4(R4), R8
MOVW -8(R4), R9
MOVW -12(R4), R10
MOVW -16(R4), R11
MOVW -20(R4), R12
MOVW -24(R4), R13
MOVW -28(R4), R14
MOVW -32(R4), R15
ADDU $-32, R4
MOVW R8, 28(R5)
MOVW R9, 24(R5)
MOVW R10, 20(R5)
MOVW R11, 16(R5)
MOVW R12, 12(R5)
MOVW R13, 8(R5)
MOVW R14, 4(R5)
MOVW R15, 0(R5)
JMP b_large
b_words:
BEQ R5, R6, b_tail
ADDU $-4, R5
MOVW -4(R4), R8
ADDU $-4, R4
MOVW R8, 0(R5)
JMP b_words
b_tail:
BEQ R5, R1, ret
MOVWHI 0(R2), R8 // R2 and R1 have the same alignment so we don't need to load a whole word
MOVWHI R8, 0(R1)
JMP ret
b_large_ua:
BEQ R5, R7, b_words_ua
ADDU $-32, R5
MOVWHI -4(R4), R8
MOVWHI -8(R4), R9
MOVWHI -12(R4), R10
MOVWHI -16(R4), R11
MOVWHI -20(R4), R12
MOVWHI -24(R4), R13
MOVWHI -28(R4), R14
MOVWHI -32(R4), R15
MOVWLO -1(R4), R8
MOVWLO -5(R4), R9
MOVWLO -9(R4), R10
MOVWLO -13(R4), R11
MOVWLO -17(R4), R12
MOVWLO -21(R4), R13
MOVWLO -25(R4), R14
MOVWLO -29(R4), R15
ADDU $-32, R4
MOVW R8, 28(R5)
MOVW R9, 24(R5)
MOVW R10, 20(R5)
MOVW R11, 16(R5)
MOVW R12, 12(R5)
MOVW R13, 8(R5)
MOVW R14, 4(R5)
MOVW R15, 0(R5)
JMP b_large_ua
b_words_ua:
BEQ R5, R6, b_tail_ua
MOVWHI -4(R4), R8
ADDU $-4, R5
MOVWLO -1(R4), R8
ADDU $-4, R4
MOVW R8, 0(R5)
JMP b_words_ua
b_tail_ua:
BEQ R5, R1, ret
MOVWHI (R2), R8
MOVWLO 3(R2), R8
MOVWHI R8, 0(R1)
JMP ret
b_small_copy:
BEQ R5, R1, ret
ADDU $-1, R5
MOVB -1(R4), R6
ADDU $-1, R4
MOVB R6, 0(R5)
JMP b_small_copy
|