]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* memcpy.S: optimised assembly memcpy |
2 | * | |
3 | * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. | |
4 | * Written by David Howells ([email protected]) | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the License, or (at your option) any later version. | |
10 | */ | |
11 | ||
12 | ||
13 | .text | |
14 | .p2align 4 | |
15 | ||
16 | ############################################################################### | |
17 | # | |
18 | # void *memcpy(void *to, const char *from, size_t count) | |
19 | # | |
20 | # - NOTE: must not use any stack. exception detection performs function return | |
21 | # to caller's fixup routine, aborting the remainder of the copy | |
22 | # | |
23 | ############################################################################### | |
24 | .globl memcpy,__memcpy_end | |
25 | .type memcpy,@function | |
26 | memcpy: | |
27 | or.p gr8,gr9,gr4 | |
28 | orcc gr10,gr0,gr0,icc3 | |
29 | or.p gr10,gr4,gr4 | |
30 | beqlr icc3,#0 | |
31 | ||
32 | # optimise based on best common alignment for to, from & count | |
33 | andicc.p gr4,#0x0f,gr0,icc0 | |
34 | setlos #8,gr11 | |
35 | andicc.p gr4,#0x07,gr0,icc1 | |
36 | beq icc0,#0,memcpy_16 | |
37 | andicc.p gr4,#0x03,gr0,icc0 | |
38 | beq icc1,#0,memcpy_8 | |
39 | andicc.p gr4,#0x01,gr0,icc1 | |
40 | beq icc0,#0,memcpy_4 | |
41 | setlos.p #1,gr11 | |
42 | beq icc1,#0,memcpy_2 | |
43 | ||
44 | # do byte by byte copy | |
45 | sub.p gr8,gr11,gr3 | |
46 | sub gr9,gr11,gr9 | |
47 | 0: ldubu.p @(gr9,gr11),gr4 | |
48 | subicc gr10,#1,gr10,icc0 | |
49 | stbu.p gr4,@(gr3,gr11) | |
50 | bne icc0,#2,0b | |
51 | bralr | |
52 | ||
53 | # do halfword by halfword copy | |
54 | memcpy_2: | |
55 | setlos #2,gr11 | |
56 | sub.p gr8,gr11,gr3 | |
57 | sub gr9,gr11,gr9 | |
58 | 0: lduhu.p @(gr9,gr11),gr4 | |
59 | subicc gr10,#2,gr10,icc0 | |
60 | sthu.p gr4,@(gr3,gr11) | |
61 | bne icc0,#2,0b | |
62 | bralr | |
63 | ||
64 | # do word by word copy | |
65 | memcpy_4: | |
66 | setlos #4,gr11 | |
67 | sub.p gr8,gr11,gr3 | |
68 | sub gr9,gr11,gr9 | |
69 | 0: ldu.p @(gr9,gr11),gr4 | |
70 | subicc gr10,#4,gr10,icc0 | |
71 | stu.p gr4,@(gr3,gr11) | |
72 | bne icc0,#2,0b | |
73 | bralr | |
74 | ||
75 | # do double-word by double-word copy | |
76 | memcpy_8: | |
77 | sub.p gr8,gr11,gr3 | |
78 | sub gr9,gr11,gr9 | |
79 | 0: lddu.p @(gr9,gr11),gr4 | |
80 | subicc gr10,#8,gr10,icc0 | |
81 | stdu.p gr4,@(gr3,gr11) | |
82 | bne icc0,#2,0b | |
83 | bralr | |
84 | ||
85 | # do quad-word by quad-word copy | |
86 | memcpy_16: | |
87 | sub.p gr8,gr11,gr3 | |
88 | sub gr9,gr11,gr9 | |
89 | 0: lddu @(gr9,gr11),gr4 | |
90 | lddu.p @(gr9,gr11),gr6 | |
91 | subicc gr10,#16,gr10,icc0 | |
92 | stdu gr4,@(gr3,gr11) | |
93 | stdu.p gr6,@(gr3,gr11) | |
94 | bne icc0,#2,0b | |
95 | bralr | |
96 | __memcpy_end: | |
97 | ||
98 | .size memcpy, __memcpy_end-memcpy | |
99 | ||
100 | ############################################################################### | |
101 | # | |
102 | # copy to/from userspace | |
103 | # - return the number of bytes that could not be copied (0 on complete success) | |
104 | # | |
105 | # long __memcpy_user(void *dst, const void *src, size_t count) | |
106 | # | |
107 | ############################################################################### | |
108 | .globl __memcpy_user, __memcpy_user_error_lr, __memcpy_user_error_handler | |
109 | .type __memcpy_user,@function | |
110 | __memcpy_user: | |
111 | movsg lr,gr7 | |
112 | subi.p sp,#8,sp | |
113 | add gr8,gr10,gr6 ; calculate expected end address | |
114 | stdi gr6,@(sp,#0) | |
115 | ||
116 | # abuse memcpy to do the dirty work | |
117 | call memcpy | |
118 | __memcpy_user_error_lr: | |
119 | ldi.p @(sp,#4),gr7 | |
120 | setlos #0,gr8 | |
121 | jmpl.p @(gr7,gr0) | |
122 | addi sp,#8,sp | |
123 | ||
124 | # deal any exception generated by memcpy | |
125 | # GR8 - memcpy's current dest address | |
126 | # GR11 - memset's step value (index register for store insns) | |
127 | __memcpy_user_error_handler: | |
128 | lddi.p @(sp,#0),gr4 ; load GR4 with dst+count, GR5 with ret addr | |
129 | add gr11,gr3,gr7 | |
130 | sub.p gr4,gr7,gr8 | |
131 | ||
132 | addi sp,#8,sp | |
133 | jmpl @(gr5,gr0) | |
134 | ||
135 | .size __memcpy_user, .-__memcpy_user |