]> Git Repo - linux.git/blame - arch/powerpc/lib/copyuser_power7.S
Merge tag 'selinux-pr-20180605' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux.git] / arch / powerpc / lib / copyuser_power7.S
CommitLineData
a66086b8
AB
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2011
17 *
18 * Author: Anton Blanchard <[email protected]>
19 */
20#include <asm/ppc_asm.h>
21
32ee1e18
AB
22#ifdef __BIG_ENDIAN__
23#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
24#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
25#else
26#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
27#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
28#endif
29
a66086b8
AB
30 .macro err1
31100:
24bfa6a9 32 EX_TABLE(100b,.Ldo_err1)
a66086b8
AB
33 .endm
34
35 .macro err2
36200:
24bfa6a9 37 EX_TABLE(200b,.Ldo_err2)
a66086b8
AB
38 .endm
39
40#ifdef CONFIG_ALTIVEC
41 .macro err3
42300:
24bfa6a9 43 EX_TABLE(300b,.Ldo_err3)
a66086b8
AB
44 .endm
45
46 .macro err4
47400:
24bfa6a9 48 EX_TABLE(400b,.Ldo_err4)
a66086b8
AB
49 .endm
50
51
52.Ldo_err4:
c75df6f9
MN
53 ld r16,STK_REG(R16)(r1)
54 ld r15,STK_REG(R15)(r1)
55 ld r14,STK_REG(R14)(r1)
a66086b8 56.Ldo_err3:
b1576fec 57 bl exit_vmx_usercopy
a66086b8
AB
58 ld r0,STACKFRAMESIZE+16(r1)
59 mtlr r0
60 b .Lexit
61#endif /* CONFIG_ALTIVEC */
62
63.Ldo_err2:
c75df6f9
MN
64 ld r22,STK_REG(R22)(r1)
65 ld r21,STK_REG(R21)(r1)
66 ld r20,STK_REG(R20)(r1)
67 ld r19,STK_REG(R19)(r1)
68 ld r18,STK_REG(R18)(r1)
69 ld r17,STK_REG(R17)(r1)
70 ld r16,STK_REG(R16)(r1)
71 ld r15,STK_REG(R15)(r1)
72 ld r14,STK_REG(R14)(r1)
a66086b8
AB
73.Lexit:
74 addi r1,r1,STACKFRAMESIZE
75.Ldo_err1:
752a6422
UW
76 ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
77 ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
78 ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
a66086b8
AB
79 b __copy_tofrom_user_base
80
81
82_GLOBAL(__copy_tofrom_user_power7)
83#ifdef CONFIG_ALTIVEC
84 cmpldi r5,16
a3f952df 85 cmpldi cr1,r5,3328
a66086b8 86
752a6422
UW
87 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
88 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
89 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
a66086b8
AB
90
91 blt .Lshort_copy
a3f952df 92 bge cr1,.Lvmx_copy
a66086b8
AB
93#else
94 cmpldi r5,16
95
752a6422
UW
96 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
97 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
98 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
a66086b8
AB
99
100 blt .Lshort_copy
101#endif
102
103.Lnonvmx_copy:
104 /* Get the source 8B aligned */
105 neg r6,r4
106 mtocrf 0x01,r6
107 clrldi r6,r6,(64-3)
108
109 bf cr7*4+3,1f
110err1; lbz r0,0(r4)
111 addi r4,r4,1
112err1; stb r0,0(r3)
113 addi r3,r3,1
114
1151: bf cr7*4+2,2f
116err1; lhz r0,0(r4)
117 addi r4,r4,2
118err1; sth r0,0(r3)
119 addi r3,r3,2
120
1212: bf cr7*4+1,3f
122err1; lwz r0,0(r4)
123 addi r4,r4,4
124err1; stw r0,0(r3)
125 addi r3,r3,4
126
1273: sub r5,r5,r6
128 cmpldi r5,128
129 blt 5f
130
131 mflr r0
132 stdu r1,-STACKFRAMESIZE(r1)
c75df6f9
MN
133 std r14,STK_REG(R14)(r1)
134 std r15,STK_REG(R15)(r1)
135 std r16,STK_REG(R16)(r1)
136 std r17,STK_REG(R17)(r1)
137 std r18,STK_REG(R18)(r1)
138 std r19,STK_REG(R19)(r1)
139 std r20,STK_REG(R20)(r1)
140 std r21,STK_REG(R21)(r1)
141 std r22,STK_REG(R22)(r1)
a66086b8
AB
142 std r0,STACKFRAMESIZE+16(r1)
143
144 srdi r6,r5,7
145 mtctr r6
146
147 /* Now do cacheline (128B) sized loads and stores. */
148 .align 5
1494:
150err2; ld r0,0(r4)
151err2; ld r6,8(r4)
152err2; ld r7,16(r4)
153err2; ld r8,24(r4)
154err2; ld r9,32(r4)
155err2; ld r10,40(r4)
156err2; ld r11,48(r4)
157err2; ld r12,56(r4)
158err2; ld r14,64(r4)
159err2; ld r15,72(r4)
160err2; ld r16,80(r4)
161err2; ld r17,88(r4)
162err2; ld r18,96(r4)
163err2; ld r19,104(r4)
164err2; ld r20,112(r4)
165err2; ld r21,120(r4)
166 addi r4,r4,128
167err2; std r0,0(r3)
168err2; std r6,8(r3)
169err2; std r7,16(r3)
170err2; std r8,24(r3)
171err2; std r9,32(r3)
172err2; std r10,40(r3)
173err2; std r11,48(r3)
174err2; std r12,56(r3)
175err2; std r14,64(r3)
176err2; std r15,72(r3)
177err2; std r16,80(r3)
178err2; std r17,88(r3)
179err2; std r18,96(r3)
180err2; std r19,104(r3)
181err2; std r20,112(r3)
182err2; std r21,120(r3)
183 addi r3,r3,128
184 bdnz 4b
185
186 clrldi r5,r5,(64-7)
187
c75df6f9
MN
188 ld r14,STK_REG(R14)(r1)
189 ld r15,STK_REG(R15)(r1)
190 ld r16,STK_REG(R16)(r1)
191 ld r17,STK_REG(R17)(r1)
192 ld r18,STK_REG(R18)(r1)
193 ld r19,STK_REG(R19)(r1)
194 ld r20,STK_REG(R20)(r1)
195 ld r21,STK_REG(R21)(r1)
196 ld r22,STK_REG(R22)(r1)
a66086b8
AB
197 addi r1,r1,STACKFRAMESIZE
198
199 /* Up to 127B to go */
2005: srdi r6,r5,4
201 mtocrf 0x01,r6
202
2036: bf cr7*4+1,7f
204err1; ld r0,0(r4)
205err1; ld r6,8(r4)
206err1; ld r7,16(r4)
207err1; ld r8,24(r4)
208err1; ld r9,32(r4)
209err1; ld r10,40(r4)
210err1; ld r11,48(r4)
211err1; ld r12,56(r4)
212 addi r4,r4,64
213err1; std r0,0(r3)
214err1; std r6,8(r3)
215err1; std r7,16(r3)
216err1; std r8,24(r3)
217err1; std r9,32(r3)
218err1; std r10,40(r3)
219err1; std r11,48(r3)
220err1; std r12,56(r3)
221 addi r3,r3,64
222
223 /* Up to 63B to go */
2247: bf cr7*4+2,8f
225err1; ld r0,0(r4)
226err1; ld r6,8(r4)
227err1; ld r7,16(r4)
228err1; ld r8,24(r4)
229 addi r4,r4,32
230err1; std r0,0(r3)
231err1; std r6,8(r3)
232err1; std r7,16(r3)
233err1; std r8,24(r3)
234 addi r3,r3,32
235
236 /* Up to 31B to go */
2378: bf cr7*4+3,9f
238err1; ld r0,0(r4)
239err1; ld r6,8(r4)
240 addi r4,r4,16
241err1; std r0,0(r3)
242err1; std r6,8(r3)
243 addi r3,r3,16
244
2459: clrldi r5,r5,(64-4)
246
247 /* Up to 15B to go */
248.Lshort_copy:
249 mtocrf 0x01,r5
250 bf cr7*4+0,12f
251err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
252err1; lwz r6,4(r4)
253 addi r4,r4,8
254err1; stw r0,0(r3)
255err1; stw r6,4(r3)
256 addi r3,r3,8
257
25812: bf cr7*4+1,13f
259err1; lwz r0,0(r4)
260 addi r4,r4,4
261err1; stw r0,0(r3)
262 addi r3,r3,4
263
26413: bf cr7*4+2,14f
265err1; lhz r0,0(r4)
266 addi r4,r4,2
267err1; sth r0,0(r3)
268 addi r3,r3,2
269
27014: bf cr7*4+3,15f
271err1; lbz r0,0(r4)
272err1; stb r0,0(r3)
273
27415: li r3,0
275 blr
276
277.Lunwind_stack_nonvmx_copy:
278 addi r1,r1,STACKFRAMESIZE
279 b .Lnonvmx_copy
280
281#ifdef CONFIG_ALTIVEC
282.Lvmx_copy:
283 mflr r0
284 std r0,16(r1)
285 stdu r1,-STACKFRAMESIZE(r1)
b1576fec 286 bl enter_vmx_usercopy
2fae7cdb 287 cmpwi cr1,r3,0
a66086b8 288 ld r0,STACKFRAMESIZE+16(r1)
752a6422
UW
289 ld r3,STK_REG(R31)(r1)
290 ld r4,STK_REG(R30)(r1)
291 ld r5,STK_REG(R29)(r1)
a66086b8
AB
292 mtlr r0
293
a9514dc6
AB
294 /*
295 * We prefetch both the source and destination using enhanced touch
296 * instructions. We use a stream ID of 0 for the load side and
297 * 1 for the store side.
298 */
299 clrrdi r6,r4,7
300 clrrdi r9,r3,7
301 ori r9,r9,1 /* stream=1 */
302
303 srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
304 cmpldi r7,0x3FF
305 ble 1f
306 li r7,0x3FF
3071: lis r0,0x0E00 /* depth=7 */
308 sldi r7,r7,7
309 or r7,r7,r0
310 ori r10,r7,1 /* stream=1 */
311
312 lis r8,0x8000 /* GO=1 */
313 clrldi r8,r8,32
314
280a5ba2 315 /* setup read stream 0 */
8a583c0a
AS
316 dcbt 0,r6,0b01000 /* addr from */
317 dcbt 0,r7,0b01010 /* length and depth from */
280a5ba2 318 /* setup write stream 1 */
8a583c0a
AS
319 dcbtst 0,r9,0b01000 /* addr to */
320 dcbtst 0,r10,0b01010 /* length and depth to */
a9514dc6 321 eieio
8a583c0a 322 dcbt 0,r8,0b01010 /* all streams GO */
a9514dc6 323
2fae7cdb 324 beq cr1,.Lunwind_stack_nonvmx_copy
a66086b8
AB
325
326 /*
327 * If source and destination are not relatively aligned we use a
328 * slower permute loop.
329 */
330 xor r6,r4,r3
331 rldicl. r6,r6,0,(64-4)
332 bne .Lvmx_unaligned_copy
333
334 /* Get the destination 16B aligned */
335 neg r6,r3
336 mtocrf 0x01,r6
337 clrldi r6,r6,(64-4)
338
339 bf cr7*4+3,1f
340err3; lbz r0,0(r4)
341 addi r4,r4,1
342err3; stb r0,0(r3)
343 addi r3,r3,1
344
3451: bf cr7*4+2,2f
346err3; lhz r0,0(r4)
347 addi r4,r4,2
348err3; sth r0,0(r3)
349 addi r3,r3,2
350
3512: bf cr7*4+1,3f
352err3; lwz r0,0(r4)
353 addi r4,r4,4
354err3; stw r0,0(r3)
355 addi r3,r3,4
356
3573: bf cr7*4+0,4f
358err3; ld r0,0(r4)
359 addi r4,r4,8
360err3; std r0,0(r3)
361 addi r3,r3,8
362
3634: sub r5,r5,r6
364
365 /* Get the desination 128B aligned */
366 neg r6,r3
367 srdi r7,r6,4
368 mtocrf 0x01,r7
369 clrldi r6,r6,(64-7)
370
371 li r9,16
372 li r10,32
373 li r11,48
374
375 bf cr7*4+3,5f
8a583c0a 376err3; lvx v1,0,r4
a66086b8 377 addi r4,r4,16
8a583c0a 378err3; stvx v1,0,r3
a66086b8
AB
379 addi r3,r3,16
380
3815: bf cr7*4+2,6f
8a583c0a 382err3; lvx v1,0,r4
c2ce6f9f 383err3; lvx v0,r4,r9
a66086b8 384 addi r4,r4,32
8a583c0a 385err3; stvx v1,0,r3
c2ce6f9f 386err3; stvx v0,r3,r9
a66086b8
AB
387 addi r3,r3,32
388
3896: bf cr7*4+1,7f
8a583c0a 390err3; lvx v3,0,r4
c2ce6f9f
AB
391err3; lvx v2,r4,r9
392err3; lvx v1,r4,r10
393err3; lvx v0,r4,r11
a66086b8 394 addi r4,r4,64
8a583c0a 395err3; stvx v3,0,r3
c2ce6f9f
AB
396err3; stvx v2,r3,r9
397err3; stvx v1,r3,r10
398err3; stvx v0,r3,r11
a66086b8
AB
399 addi r3,r3,64
400
4017: sub r5,r5,r6
402 srdi r6,r5,7
403
c75df6f9
MN
404 std r14,STK_REG(R14)(r1)
405 std r15,STK_REG(R15)(r1)
406 std r16,STK_REG(R16)(r1)
a66086b8
AB
407
408 li r12,64
409 li r14,80
410 li r15,96
411 li r16,112
412
413 mtctr r6
414
415 /*
416 * Now do cacheline sized loads and stores. By this stage the
417 * cacheline stores are also cacheline aligned.
418 */
419 .align 5
4208:
8a583c0a 421err4; lvx v7,0,r4
c2ce6f9f
AB
422err4; lvx v6,r4,r9
423err4; lvx v5,r4,r10
424err4; lvx v4,r4,r11
425err4; lvx v3,r4,r12
426err4; lvx v2,r4,r14
427err4; lvx v1,r4,r15
428err4; lvx v0,r4,r16
a66086b8 429 addi r4,r4,128
8a583c0a 430err4; stvx v7,0,r3
c2ce6f9f
AB
431err4; stvx v6,r3,r9
432err4; stvx v5,r3,r10
433err4; stvx v4,r3,r11
434err4; stvx v3,r3,r12
435err4; stvx v2,r3,r14
436err4; stvx v1,r3,r15
437err4; stvx v0,r3,r16
a66086b8
AB
438 addi r3,r3,128
439 bdnz 8b
440
c75df6f9
MN
441 ld r14,STK_REG(R14)(r1)
442 ld r15,STK_REG(R15)(r1)
443 ld r16,STK_REG(R16)(r1)
a66086b8
AB
444
445 /* Up to 127B to go */
446 clrldi r5,r5,(64-7)
447 srdi r6,r5,4
448 mtocrf 0x01,r6
449
450 bf cr7*4+1,9f
8a583c0a 451err3; lvx v3,0,r4
c2ce6f9f
AB
452err3; lvx v2,r4,r9
453err3; lvx v1,r4,r10
454err3; lvx v0,r4,r11
a66086b8 455 addi r4,r4,64
8a583c0a 456err3; stvx v3,0,r3
c2ce6f9f
AB
457err3; stvx v2,r3,r9
458err3; stvx v1,r3,r10
459err3; stvx v0,r3,r11
a66086b8
AB
460 addi r3,r3,64
461
4629: bf cr7*4+2,10f
8a583c0a 463err3; lvx v1,0,r4
c2ce6f9f 464err3; lvx v0,r4,r9
a66086b8 465 addi r4,r4,32
8a583c0a 466err3; stvx v1,0,r3
c2ce6f9f 467err3; stvx v0,r3,r9
a66086b8
AB
468 addi r3,r3,32
469
47010: bf cr7*4+3,11f
8a583c0a 471err3; lvx v1,0,r4
a66086b8 472 addi r4,r4,16
8a583c0a 473err3; stvx v1,0,r3
a66086b8
AB
474 addi r3,r3,16
475
476 /* Up to 15B to go */
47711: clrldi r5,r5,(64-4)
478 mtocrf 0x01,r5
479 bf cr7*4+0,12f
480err3; ld r0,0(r4)
481 addi r4,r4,8
482err3; std r0,0(r3)
483 addi r3,r3,8
484
48512: bf cr7*4+1,13f
486err3; lwz r0,0(r4)
487 addi r4,r4,4
488err3; stw r0,0(r3)
489 addi r3,r3,4
490
49113: bf cr7*4+2,14f
492err3; lhz r0,0(r4)
493 addi r4,r4,2
494err3; sth r0,0(r3)
495 addi r3,r3,2
496
49714: bf cr7*4+3,15f
498err3; lbz r0,0(r4)
499err3; stb r0,0(r3)
500
50115: addi r1,r1,STACKFRAMESIZE
b1576fec 502 b exit_vmx_usercopy /* tail call optimise */
a66086b8
AB
503
504.Lvmx_unaligned_copy:
505 /* Get the destination 16B aligned */
506 neg r6,r3
507 mtocrf 0x01,r6
508 clrldi r6,r6,(64-4)
509
510 bf cr7*4+3,1f
511err3; lbz r0,0(r4)
512 addi r4,r4,1
513err3; stb r0,0(r3)
514 addi r3,r3,1
515
5161: bf cr7*4+2,2f
517err3; lhz r0,0(r4)
518 addi r4,r4,2
519err3; sth r0,0(r3)
520 addi r3,r3,2
521
5222: bf cr7*4+1,3f
523err3; lwz r0,0(r4)
524 addi r4,r4,4
525err3; stw r0,0(r3)
526 addi r3,r3,4
527
5283: bf cr7*4+0,4f
529err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
530err3; lwz r7,4(r4)
531 addi r4,r4,8
532err3; stw r0,0(r3)
533err3; stw r7,4(r3)
534 addi r3,r3,8
535
5364: sub r5,r5,r6
537
538 /* Get the desination 128B aligned */
539 neg r6,r3
540 srdi r7,r6,4
541 mtocrf 0x01,r7
542 clrldi r6,r6,(64-7)
543
544 li r9,16
545 li r10,32
546 li r11,48
547
c2ce6f9f
AB
548 LVS(v16,0,r4) /* Setup permute control vector */
549err3; lvx v0,0,r4
a66086b8
AB
550 addi r4,r4,16
551
552 bf cr7*4+3,5f
8a583c0a 553err3; lvx v1,0,r4
c2ce6f9f 554 VPERM(v8,v0,v1,v16)
a66086b8 555 addi r4,r4,16
8a583c0a 556err3; stvx v8,0,r3
a66086b8 557 addi r3,r3,16
c2ce6f9f 558 vor v0,v1,v1
a66086b8
AB
559
5605: bf cr7*4+2,6f
8a583c0a 561err3; lvx v1,0,r4
c2ce6f9f
AB
562 VPERM(v8,v0,v1,v16)
563err3; lvx v0,r4,r9
564 VPERM(v9,v1,v0,v16)
a66086b8 565 addi r4,r4,32
8a583c0a 566err3; stvx v8,0,r3
c2ce6f9f 567err3; stvx v9,r3,r9
a66086b8
AB
568 addi r3,r3,32
569
5706: bf cr7*4+1,7f
8a583c0a 571err3; lvx v3,0,r4
c2ce6f9f
AB
572 VPERM(v8,v0,v3,v16)
573err3; lvx v2,r4,r9
574 VPERM(v9,v3,v2,v16)
575err3; lvx v1,r4,r10
576 VPERM(v10,v2,v1,v16)
577err3; lvx v0,r4,r11
578 VPERM(v11,v1,v0,v16)
a66086b8 579 addi r4,r4,64
8a583c0a 580err3; stvx v8,0,r3
c2ce6f9f
AB
581err3; stvx v9,r3,r9
582err3; stvx v10,r3,r10
583err3; stvx v11,r3,r11
a66086b8
AB
584 addi r3,r3,64
585
5867: sub r5,r5,r6
587 srdi r6,r5,7
588
c75df6f9
MN
589 std r14,STK_REG(R14)(r1)
590 std r15,STK_REG(R15)(r1)
591 std r16,STK_REG(R16)(r1)
a66086b8
AB
592
593 li r12,64
594 li r14,80
595 li r15,96
596 li r16,112
597
598 mtctr r6
599
600 /*
601 * Now do cacheline sized loads and stores. By this stage the
602 * cacheline stores are also cacheline aligned.
603 */
604 .align 5
6058:
8a583c0a 606err4; lvx v7,0,r4
c2ce6f9f
AB
607 VPERM(v8,v0,v7,v16)
608err4; lvx v6,r4,r9
609 VPERM(v9,v7,v6,v16)
610err4; lvx v5,r4,r10
611 VPERM(v10,v6,v5,v16)
612err4; lvx v4,r4,r11
613 VPERM(v11,v5,v4,v16)
614err4; lvx v3,r4,r12
615 VPERM(v12,v4,v3,v16)
616err4; lvx v2,r4,r14
617 VPERM(v13,v3,v2,v16)
618err4; lvx v1,r4,r15
619 VPERM(v14,v2,v1,v16)
620err4; lvx v0,r4,r16
621 VPERM(v15,v1,v0,v16)
a66086b8 622 addi r4,r4,128
8a583c0a 623err4; stvx v8,0,r3
c2ce6f9f
AB
624err4; stvx v9,r3,r9
625err4; stvx v10,r3,r10
626err4; stvx v11,r3,r11
627err4; stvx v12,r3,r12
628err4; stvx v13,r3,r14
629err4; stvx v14,r3,r15
630err4; stvx v15,r3,r16
a66086b8
AB
631 addi r3,r3,128
632 bdnz 8b
633
c75df6f9
MN
634 ld r14,STK_REG(R14)(r1)
635 ld r15,STK_REG(R15)(r1)
636 ld r16,STK_REG(R16)(r1)
a66086b8
AB
637
638 /* Up to 127B to go */
639 clrldi r5,r5,(64-7)
640 srdi r6,r5,4
641 mtocrf 0x01,r6
642
643 bf cr7*4+1,9f
8a583c0a 644err3; lvx v3,0,r4
c2ce6f9f
AB
645 VPERM(v8,v0,v3,v16)
646err3; lvx v2,r4,r9
647 VPERM(v9,v3,v2,v16)
648err3; lvx v1,r4,r10
649 VPERM(v10,v2,v1,v16)
650err3; lvx v0,r4,r11
651 VPERM(v11,v1,v0,v16)
a66086b8 652 addi r4,r4,64
8a583c0a 653err3; stvx v8,0,r3
c2ce6f9f
AB
654err3; stvx v9,r3,r9
655err3; stvx v10,r3,r10
656err3; stvx v11,r3,r11
a66086b8
AB
657 addi r3,r3,64
658
6599: bf cr7*4+2,10f
8a583c0a 660err3; lvx v1,0,r4
c2ce6f9f
AB
661 VPERM(v8,v0,v1,v16)
662err3; lvx v0,r4,r9
663 VPERM(v9,v1,v0,v16)
a66086b8 664 addi r4,r4,32
8a583c0a 665err3; stvx v8,0,r3
c2ce6f9f 666err3; stvx v9,r3,r9
a66086b8
AB
667 addi r3,r3,32
668
66910: bf cr7*4+3,11f
8a583c0a 670err3; lvx v1,0,r4
c2ce6f9f 671 VPERM(v8,v0,v1,v16)
a66086b8 672 addi r4,r4,16
8a583c0a 673err3; stvx v8,0,r3
a66086b8
AB
674 addi r3,r3,16
675
676 /* Up to 15B to go */
67711: clrldi r5,r5,(64-4)
678 addi r4,r4,-16 /* Unwind the +16 load offset */
679 mtocrf 0x01,r5
680 bf cr7*4+0,12f
681err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
682err3; lwz r6,4(r4)
683 addi r4,r4,8
684err3; stw r0,0(r3)
685err3; stw r6,4(r3)
686 addi r3,r3,8
687
68812: bf cr7*4+1,13f
689err3; lwz r0,0(r4)
690 addi r4,r4,4
691err3; stw r0,0(r3)
692 addi r3,r3,4
693
69413: bf cr7*4+2,14f
695err3; lhz r0,0(r4)
696 addi r4,r4,2
697err3; sth r0,0(r3)
698 addi r3,r3,2
699
70014: bf cr7*4+3,15f
701err3; lbz r0,0(r4)
702err3; stb r0,0(r3)
703
70415: addi r1,r1,STACKFRAMESIZE
b1576fec 705 b exit_vmx_usercopy /* tail call optimise */
c2522dcd 706#endif /* CONFIG_ALTIVEC */
This page took 0.421016 seconds and 4 git commands to generate.