]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
4466b20c PM |
2 | /* Cloned and hacked for uClibc by Paul Mundt, December 2003 */ |
3 | /* Modified by SuperH, Inc. September 2003 */ | |
4 | ! | |
5 | ! Fast SH memset | |
6 | ! | |
7 | ! by Toshiyasu Morita ([email protected]) | |
8 | ! | |
9 | ! SH5 code by J"orn Rennecke ([email protected]) | |
10 | ! Copyright 2002 SuperH Ltd. | |
11 | ! | |
12 | ||
13 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
14 | #define SHHI shlld | |
15 | #define SHLO shlrd | |
16 | #else | |
17 | #define SHHI shlrd | |
18 | #define SHLO shlld | |
19 | #endif | |
20 | ||
21 | .section .text..SHmedia32,"ax" | |
22 | .globl memset | |
23 | .type memset, @function | |
24 | ||
25 | .align 5 | |
26 | ||
27 | memset: | |
28 | pta/l multiquad, tr0 | |
29 | andi r2, 7, r22 | |
30 | ptabs r18, tr2 | |
31 | mshflo.b r3,r3,r3 | |
32 | add r4, r22, r23 | |
33 | mperm.w r3, r63, r3 // Fill pattern now in every byte of r3 | |
34 | ||
35 | movi 8, r9 | |
36 | bgtu/u r23, r9, tr0 // multiquad | |
37 | ||
38 | beqi/u r4, 0, tr2 // Return with size 0 - ensures no mem accesses | |
39 | ldlo.q r2, 0, r7 | |
40 | shlli r4, 2, r4 | |
41 | movi -1, r8 | |
42 | SHHI r8, r4, r8 | |
43 | SHHI r8, r4, r8 | |
44 | mcmv r7, r8, r3 | |
45 | stlo.q r2, 0, r3 | |
46 | blink tr2, r63 | |
47 | ||
48 | multiquad: | |
49 | pta/l lastquad, tr0 | |
50 | stlo.q r2, 0, r3 | |
51 | shlri r23, 3, r24 | |
52 | add r2, r4, r5 | |
53 | beqi/u r24, 1, tr0 // lastquad | |
54 | pta/l loop, tr1 | |
55 | sub r2, r22, r25 | |
56 | andi r5, -8, r20 // calculate end address and | |
57 | addi r20, -7*8, r8 // loop end address; This might overflow, so we need | |
58 | // to use a different test before we start the loop | |
59 | bge/u r24, r9, tr1 // loop | |
60 | st.q r25, 8, r3 | |
61 | st.q r20, -8, r3 | |
62 | shlri r24, 1, r24 | |
63 | beqi/u r24, 1, tr0 // lastquad | |
64 | st.q r25, 16, r3 | |
65 | st.q r20, -16, r3 | |
66 | beqi/u r24, 2, tr0 // lastquad | |
67 | st.q r25, 24, r3 | |
68 | st.q r20, -24, r3 | |
69 | lastquad: | |
70 | sthi.q r5, -1, r3 | |
71 | blink tr2,r63 | |
72 | ||
73 | loop: | |
74 | !!! alloco r25, 32 // QQQ comment out for short-term fix to SHUK #3895. | |
75 | // QQQ commenting out is locically correct, but sub-optimal | |
76 | // QQQ Sean McGoogan - 4th April 2003. | |
77 | st.q r25, 8, r3 | |
78 | st.q r25, 16, r3 | |
79 | st.q r25, 24, r3 | |
80 | st.q r25, 32, r3 | |
81 | addi r25, 32, r25 | |
82 | bgeu/l r8, r25, tr1 // loop | |
83 | ||
84 | st.q r20, -40, r3 | |
85 | st.q r20, -32, r3 | |
86 | st.q r20, -24, r3 | |
87 | st.q r20, -16, r3 | |
88 | st.q r20, -8, r3 | |
89 | sthi.q r5, -1, r3 | |
90 | blink tr2,r63 | |
91 | ||
92 | .size memset,.-memset |