]> Git Repo - linux.git/blame - arch/powerpc/lib/copyuser_64.S
Merge tag 'drm-next-2018-06-15' of git://anongit.freedesktop.org/drm/drm
[linux.git] / arch / powerpc / lib / copyuser_64.S
CommitLineData
14cf11af 1/*
14cf11af
PM
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
9445aa1a 11#include <asm/export.h>
14cf11af 12
20151169
PM
13#ifdef __BIG_ENDIAN__
14#define sLd sld /* Shift towards low-numbered address. */
15#define sHd srd /* Shift towards high-numbered address. */
16#else
17#define sLd srd /* Shift towards low-numbered address. */
18#define sHd sld /* Shift towards high-numbered address. */
19#endif
20
14cf11af 21 .align 7
169c7cee 22_GLOBAL_TOC(__copy_tofrom_user)
15a3204d 23#ifdef CONFIG_PPC_BOOK3S_64
a66086b8
AB
24BEGIN_FTR_SECTION
25 nop
26FTR_SECTION_ELSE
27 b __copy_tofrom_user_power7
28ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
15a3204d 29#endif
a66086b8 30_GLOBAL(__copy_tofrom_user_base)
14cf11af
PM
31 /* first check for a whole page copy on a page boundary */
32 cmpldi cr1,r5,16
33 cmpdi cr6,r5,4096
34 or r0,r3,r4
35 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
36 andi. r0,r0,4095
37 std r3,-24(r1)
38 crand cr0*4+2,cr0*4+2,cr6*4+2
39 std r4,-16(r1)
40 std r5,-8(r1)
41 dcbt 0,r4
3c726f8d 42 beq .Lcopy_page_4K
14cf11af 43 andi. r6,r6,7
694caf02 44 PPC_MTOCRF(0x01,r5)
14cf11af 45 blt cr1,.Lshort_copy
a4e22f02
MN
46/* Below we want to nop out the bne if we're on a CPU that has the
47 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
48 * cleared.
49 * At the time of writing the only CPU that has this combination of bits
50 * set is Power6.
51 */
52BEGIN_FTR_SECTION
53 nop
54FTR_SECTION_ELSE
14cf11af 55 bne .Ldst_unaligned
a4e22f02
MN
56ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
57 CPU_FTR_UNALIGNED_LD_STD)
14cf11af 58.Ldst_aligned:
14cf11af 59 addi r3,r3,-16
a4e22f02
MN
60BEGIN_FTR_SECTION
61 andi. r0,r4,7
14cf11af 62 bne .Lsrc_unaligned
a4e22f02 63END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
789c299c
AB
64 blt cr1,.Ldo_tail /* if < 16 bytes to copy */
65 srdi r0,r5,5
66 cmpdi cr1,r0,0
6720: ld r7,0(r4)
68220: ld r6,8(r4)
69 addi r4,r4,16
70 mtctr r0
71 andi. r0,r5,0x10
72 beq 22f
73 addi r3,r3,16
74 addi r4,r4,-16
75 mr r9,r7
76 mr r8,r6
77 beq cr1,72f
7821: ld r7,16(r4)
79221: ld r6,24(r4)
80 addi r4,r4,32
8170: std r9,0(r3)
82270: std r8,8(r3)
8322: ld r9,0(r4)
84222: ld r8,8(r4)
8571: std r7,16(r3)
86271: std r6,24(r3)
87 addi r3,r3,32
14cf11af 88 bdnz 21b
789c299c
AB
8972: std r9,0(r3)
90272: std r8,8(r3)
91 andi. r5,r5,0xf
14cf11af 92 beq+ 3f
789c299c 93 addi r4,r4,16
14cf11af 94.Ldo_tail:
789c299c
AB
95 addi r3,r3,16
96 bf cr7*4+0,246f
97244: ld r9,0(r4)
98 addi r4,r4,8
99245: std r9,0(r3)
100 addi r3,r3,8
101246: bf cr7*4+1,1f
10223: lwz r9,0(r4)
f72b728b 103 addi r4,r4,4
14cf11af
PM
10473: stw r9,0(r3)
105 addi r3,r3,4
1061: bf cr7*4+2,2f
789c299c 10744: lhz r9,0(r4)
f72b728b 108 addi r4,r4,2
14cf11af
PM
10974: sth r9,0(r3)
110 addi r3,r3,2
1112: bf cr7*4+3,3f
789c299c 11245: lbz r9,0(r4)
14cf11af
PM
11375: stb r9,0(r3)
1143: li r3,0
115 blr
116
117.Lsrc_unaligned:
118 srdi r6,r5,3
119 addi r5,r5,-16
120 subf r4,r0,r4
121 srdi r7,r5,4
122 sldi r10,r0,3
123 cmpldi cr6,r6,3
124 andi. r5,r5,7
125 mtctr r7
126 subfic r11,r10,64
127 add r5,r5,r0
128 bt cr7*4+0,28f
129
13024: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
13125: ld r0,8(r4)
20151169 132 sLd r6,r9,r10
14cf11af 13326: ldu r9,16(r4)
20151169
PM
134 sHd r7,r0,r11
135 sLd r8,r0,r10
14cf11af
PM
136 or r7,r7,r6
137 blt cr6,79f
13827: ld r0,8(r4)
139 b 2f
140
14128: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
14229: ldu r9,8(r4)
20151169 143 sLd r8,r0,r10
14cf11af
PM
144 addi r3,r3,-8
145 blt cr6,5f
14630: ld r0,8(r4)
20151169
PM
147 sHd r12,r9,r11
148 sLd r6,r9,r10
14cf11af
PM
14931: ldu r9,16(r4)
150 or r12,r8,r12
20151169
PM
151 sHd r7,r0,r11
152 sLd r8,r0,r10
14cf11af
PM
153 addi r3,r3,16
154 beq cr6,78f
155
1561: or r7,r7,r6
15732: ld r0,8(r4)
15876: std r12,8(r3)
20151169
PM
1592: sHd r12,r9,r11
160 sLd r6,r9,r10
14cf11af
PM
16133: ldu r9,16(r4)
162 or r12,r8,r12
16377: stdu r7,16(r3)
20151169
PM
164 sHd r7,r0,r11
165 sLd r8,r0,r10
14cf11af
PM
166 bdnz 1b
167
16878: std r12,8(r3)
169 or r7,r7,r6
17079: std r7,16(r3)
20151169 1715: sHd r12,r9,r11
14cf11af
PM
172 or r12,r8,r12
17380: std r12,24(r3)
174 bne 6f
175 li r3,0
176 blr
1776: cmpwi cr1,r5,8
178 addi r3,r3,32
20151169 179 sLd r9,r9,r10
f72b728b 180 ble cr1,7f
14cf11af 18134: ld r0,8(r4)
20151169 182 sHd r7,r0,r11
14cf11af 183 or r9,r7,r9
f72b728b
MN
1847:
185 bf cr7*4+1,1f
20151169 186#ifdef __BIG_ENDIAN__
f72b728b 187 rotldi r9,r9,32
20151169 188#endif
f72b728b 18994: stw r9,0(r3)
20151169
PM
190#ifdef __LITTLE_ENDIAN__
191 rotrdi r9,r9,32
192#endif
f72b728b
MN
193 addi r3,r3,4
1941: bf cr7*4+2,2f
20151169 195#ifdef __BIG_ENDIAN__
f72b728b 196 rotldi r9,r9,16
20151169 197#endif
f72b728b 19895: sth r9,0(r3)
20151169
PM
199#ifdef __LITTLE_ENDIAN__
200 rotrdi r9,r9,16
201#endif
f72b728b
MN
202 addi r3,r3,2
2032: bf cr7*4+3,3f
20151169 204#ifdef __BIG_ENDIAN__
f72b728b 205 rotldi r9,r9,8
20151169 206#endif
f72b728b 20796: stb r9,0(r3)
20151169
PM
208#ifdef __LITTLE_ENDIAN__
209 rotrdi r9,r9,8
210#endif
f72b728b
MN
2113: li r3,0
212 blr
14cf11af
PM
213
214.Ldst_unaligned:
694caf02 215 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
14cf11af
PM
216 subf r5,r6,r5
217 li r7,0
a4e22f02 218 cmpldi cr1,r5,16
14cf11af
PM
219 bf cr7*4+3,1f
22035: lbz r0,0(r4)
22181: stb r0,0(r3)
222 addi r7,r7,1
2231: bf cr7*4+2,2f
22436: lhzx r0,r7,r4
22582: sthx r0,r7,r3
226 addi r7,r7,2
2272: bf cr7*4+1,3f
22837: lwzx r0,r7,r4
22983: stwx r0,r7,r3
694caf02 2303: PPC_MTOCRF(0x01,r5)
14cf11af
PM
231 add r4,r6,r4
232 add r3,r6,r3
233 b .Ldst_aligned
234
235.Lshort_copy:
236 bf cr7*4+0,1f
23738: lwz r0,0(r4)
23839: lwz r9,4(r4)
239 addi r4,r4,8
24084: stw r0,0(r3)
24185: stw r9,4(r3)
242 addi r3,r3,8
2431: bf cr7*4+1,2f
24440: lwz r0,0(r4)
245 addi r4,r4,4
24686: stw r0,0(r3)
247 addi r3,r3,4
2482: bf cr7*4+2,3f
24941: lhz r0,0(r4)
250 addi r4,r4,2
25187: sth r0,0(r3)
252 addi r3,r3,2
2533: bf cr7*4+3,4f
25442: lbz r0,0(r4)
25588: stb r0,0(r3)
2564: li r3,0
257 blr
258
259/*
260 * exception handlers follow
261 * we have to return the number of bytes not copied
262 * for an exception on a load, we set the rest of the destination to 0
263 */
264
265136:
266137:
267 add r3,r3,r7
268 b 1f
269130:
270131:
271 addi r3,r3,8
272120:
789c299c 273320:
14cf11af 274122:
789c299c 275322:
14cf11af
PM
276124:
277125:
278126:
279127:
280128:
281129:
282133:
283 addi r3,r3,8
14cf11af
PM
284132:
285 addi r3,r3,8
789c299c
AB
286121:
287321:
288344:
14cf11af
PM
289134:
290135:
291138:
292139:
293140:
294141:
295142:
f72b728b
MN
296123:
297144:
298145:
14cf11af
PM
299
300/*
301 * here we have had a fault on a load and r3 points to the first
302 * unmodified byte of the destination
303 */
3041: ld r6,-24(r1)
305 ld r4,-16(r1)
306 ld r5,-8(r1)
307 subf r6,r6,r3
308 add r4,r4,r6
309 subf r5,r6,r5 /* #bytes left to go */
310
311/*
312 * first see if we can copy any more bytes before hitting another exception
313 */
314 mtctr r5
31543: lbz r0,0(r4)
316 addi r4,r4,1
31789: stb r0,0(r3)
318 addi r3,r3,1
319 bdnz 43b
320 li r3,0 /* huh? all copied successfully this time? */
321 blr
322
323/*
3448890c 324 * here we have trapped again, amount remaining is in ctr.
14cf11af 325 */
3448890c 326143: mfctr r3
14cf11af
PM
327 blr
328
329/*
330 * exception handlers for stores: we just need to work
331 * out how many bytes weren't copied
332 */
333182:
334183:
335 add r3,r3,r7
336 b 1f
789c299c 337371:
14cf11af
PM
338180:
339 addi r3,r3,8
340171:
341177:
1a34439e 342179:
14cf11af 343 addi r3,r3,8
789c299c
AB
344370:
345372:
14cf11af
PM
346176:
347178:
348 addi r3,r3,4
349185:
350 addi r3,r3,4
789c299c
AB
351170:
352172:
353345:
14cf11af
PM
354173:
355174:
356175:
14cf11af
PM
357181:
358184:
359186:
360187:
361188:
362189:
f72b728b
MN
363194:
364195:
365196:
14cf11af
PM
3661:
367 ld r6,-24(r1)
368 ld r5,-8(r1)
369 add r6,r6,r5
370 subf r3,r3,r6 /* #bytes not copied */
3448890c 371 blr
14cf11af 372
24bfa6a9
NP
373 EX_TABLE(20b,120b)
374 EX_TABLE(220b,320b)
375 EX_TABLE(21b,121b)
376 EX_TABLE(221b,321b)
377 EX_TABLE(70b,170b)
378 EX_TABLE(270b,370b)
379 EX_TABLE(22b,122b)
380 EX_TABLE(222b,322b)
381 EX_TABLE(71b,171b)
382 EX_TABLE(271b,371b)
383 EX_TABLE(72b,172b)
384 EX_TABLE(272b,372b)
385 EX_TABLE(244b,344b)
386 EX_TABLE(245b,345b)
387 EX_TABLE(23b,123b)
388 EX_TABLE(73b,173b)
389 EX_TABLE(44b,144b)
390 EX_TABLE(74b,174b)
391 EX_TABLE(45b,145b)
392 EX_TABLE(75b,175b)
393 EX_TABLE(24b,124b)
394 EX_TABLE(25b,125b)
395 EX_TABLE(26b,126b)
396 EX_TABLE(27b,127b)
397 EX_TABLE(28b,128b)
398 EX_TABLE(29b,129b)
399 EX_TABLE(30b,130b)
400 EX_TABLE(31b,131b)
401 EX_TABLE(32b,132b)
402 EX_TABLE(76b,176b)
403 EX_TABLE(33b,133b)
404 EX_TABLE(77b,177b)
405 EX_TABLE(78b,178b)
406 EX_TABLE(79b,179b)
407 EX_TABLE(80b,180b)
408 EX_TABLE(34b,134b)
409 EX_TABLE(94b,194b)
410 EX_TABLE(95b,195b)
411 EX_TABLE(96b,196b)
412 EX_TABLE(35b,135b)
413 EX_TABLE(81b,181b)
414 EX_TABLE(36b,136b)
415 EX_TABLE(82b,182b)
416 EX_TABLE(37b,137b)
417 EX_TABLE(83b,183b)
418 EX_TABLE(38b,138b)
419 EX_TABLE(39b,139b)
420 EX_TABLE(84b,184b)
421 EX_TABLE(85b,185b)
422 EX_TABLE(40b,140b)
423 EX_TABLE(86b,186b)
424 EX_TABLE(41b,141b)
425 EX_TABLE(87b,187b)
426 EX_TABLE(42b,142b)
427 EX_TABLE(88b,188b)
428 EX_TABLE(43b,143b)
429 EX_TABLE(89b,189b)
14cf11af
PM
430
431/*
432 * Routine to copy a whole page of data, optimized for POWER4.
433 * On POWER4 it is more than 50% faster than the simple loop
0f369103 434 * above (following the .Ldst_aligned label).
14cf11af 435 */
3c726f8d 436.Lcopy_page_4K:
14cf11af
PM
437 std r31,-32(1)
438 std r30,-40(1)
439 std r29,-48(1)
440 std r28,-56(1)
441 std r27,-64(1)
442 std r26,-72(1)
443 std r25,-80(1)
444 std r24,-88(1)
445 std r23,-96(1)
446 std r22,-104(1)
447 std r21,-112(1)
448 std r20,-120(1)
449 li r5,4096/32 - 1
450 addi r3,r3,-8
451 li r0,5
4520: addi r5,r5,-24
453 mtctr r0
45420: ld r22,640(4)
45521: ld r21,512(4)
45622: ld r20,384(4)
45723: ld r11,256(4)
45824: ld r9,128(4)
45925: ld r7,0(4)
46026: ld r25,648(4)
46127: ld r24,520(4)
46228: ld r23,392(4)
46329: ld r10,264(4)
46430: ld r8,136(4)
46531: ldu r6,8(4)
466 cmpwi r5,24
4671:
46832: std r22,648(3)
46933: std r21,520(3)
47034: std r20,392(3)
47135: std r11,264(3)
47236: std r9,136(3)
47337: std r7,8(3)
47438: ld r28,648(4)
47539: ld r27,520(4)
47640: ld r26,392(4)
47741: ld r31,264(4)
47842: ld r30,136(4)
47943: ld r29,8(4)
48044: std r25,656(3)
48145: std r24,528(3)
48246: std r23,400(3)
48347: std r10,272(3)
48448: std r8,144(3)
48549: std r6,16(3)
48650: ld r22,656(4)
48751: ld r21,528(4)
48852: ld r20,400(4)
48953: ld r11,272(4)
49054: ld r9,144(4)
49155: ld r7,16(4)
49256: std r28,664(3)
49357: std r27,536(3)
49458: std r26,408(3)
49559: std r31,280(3)
49660: std r30,152(3)
49761: stdu r29,24(3)
49862: ld r25,664(4)
49963: ld r24,536(4)
50064: ld r23,408(4)
50165: ld r10,280(4)
50266: ld r8,152(4)
50367: ldu r6,24(4)
504 bdnz 1b
50568: std r22,648(3)
50669: std r21,520(3)
50770: std r20,392(3)
50871: std r11,264(3)
50972: std r9,136(3)
51073: std r7,8(3)
51174: addi r4,r4,640
51275: addi r3,r3,648
513 bge 0b
514 mtctr r5
51576: ld r7,0(4)
51677: ld r8,8(4)
51778: ldu r9,16(4)
5183:
51979: ld r10,8(4)
52080: std r7,8(3)
52181: ld r7,16(4)
52282: std r8,16(3)
52383: ld r8,24(4)
52484: std r9,24(3)
52585: ldu r9,32(4)
52686: stdu r10,32(3)
527 bdnz 3b
5284:
52987: ld r10,8(4)
53088: std r7,8(3)
53189: std r8,16(3)
53290: std r9,24(3)
53391: std r10,32(3)
5349: ld r20,-120(1)
535 ld r21,-112(1)
536 ld r22,-104(1)
537 ld r23,-96(1)
538 ld r24,-88(1)
539 ld r25,-80(1)
540 ld r26,-72(1)
541 ld r27,-64(1)
542 ld r28,-56(1)
543 ld r29,-48(1)
544 ld r30,-40(1)
545 ld r31,-32(1)
546 li r3,0
547 blr
548
549/*
550 * on an exception, reset to the beginning and jump back into the
551 * standard __copy_tofrom_user
552 */
553100: ld r20,-120(1)
554 ld r21,-112(1)
555 ld r22,-104(1)
556 ld r23,-96(1)
557 ld r24,-88(1)
558 ld r25,-80(1)
559 ld r26,-72(1)
560 ld r27,-64(1)
561 ld r28,-56(1)
562 ld r29,-48(1)
563 ld r30,-40(1)
564 ld r31,-32(1)
565 ld r3,-24(r1)
566 ld r4,-16(r1)
567 li r5,4096
568 b .Ldst_aligned
569
24bfa6a9
NP
570 EX_TABLE(20b,100b)
571 EX_TABLE(21b,100b)
572 EX_TABLE(22b,100b)
573 EX_TABLE(23b,100b)
574 EX_TABLE(24b,100b)
575 EX_TABLE(25b,100b)
576 EX_TABLE(26b,100b)
577 EX_TABLE(27b,100b)
578 EX_TABLE(28b,100b)
579 EX_TABLE(29b,100b)
580 EX_TABLE(30b,100b)
581 EX_TABLE(31b,100b)
582 EX_TABLE(32b,100b)
583 EX_TABLE(33b,100b)
584 EX_TABLE(34b,100b)
585 EX_TABLE(35b,100b)
586 EX_TABLE(36b,100b)
587 EX_TABLE(37b,100b)
588 EX_TABLE(38b,100b)
589 EX_TABLE(39b,100b)
590 EX_TABLE(40b,100b)
591 EX_TABLE(41b,100b)
592 EX_TABLE(42b,100b)
593 EX_TABLE(43b,100b)
594 EX_TABLE(44b,100b)
595 EX_TABLE(45b,100b)
596 EX_TABLE(46b,100b)
597 EX_TABLE(47b,100b)
598 EX_TABLE(48b,100b)
599 EX_TABLE(49b,100b)
600 EX_TABLE(50b,100b)
601 EX_TABLE(51b,100b)
602 EX_TABLE(52b,100b)
603 EX_TABLE(53b,100b)
604 EX_TABLE(54b,100b)
605 EX_TABLE(55b,100b)
606 EX_TABLE(56b,100b)
607 EX_TABLE(57b,100b)
608 EX_TABLE(58b,100b)
609 EX_TABLE(59b,100b)
610 EX_TABLE(60b,100b)
611 EX_TABLE(61b,100b)
612 EX_TABLE(62b,100b)
613 EX_TABLE(63b,100b)
614 EX_TABLE(64b,100b)
615 EX_TABLE(65b,100b)
616 EX_TABLE(66b,100b)
617 EX_TABLE(67b,100b)
618 EX_TABLE(68b,100b)
619 EX_TABLE(69b,100b)
620 EX_TABLE(70b,100b)
621 EX_TABLE(71b,100b)
622 EX_TABLE(72b,100b)
623 EX_TABLE(73b,100b)
624 EX_TABLE(74b,100b)
625 EX_TABLE(75b,100b)
626 EX_TABLE(76b,100b)
627 EX_TABLE(77b,100b)
628 EX_TABLE(78b,100b)
629 EX_TABLE(79b,100b)
630 EX_TABLE(80b,100b)
631 EX_TABLE(81b,100b)
632 EX_TABLE(82b,100b)
633 EX_TABLE(83b,100b)
634 EX_TABLE(84b,100b)
635 EX_TABLE(85b,100b)
636 EX_TABLE(86b,100b)
637 EX_TABLE(87b,100b)
638 EX_TABLE(88b,100b)
639 EX_TABLE(89b,100b)
640 EX_TABLE(90b,100b)
641 EX_TABLE(91b,100b)
642
9445aa1a 643EXPORT_SYMBOL(__copy_tofrom_user)
This page took 0.802027 seconds and 4 git commands to generate.