]>
Commit | Line | Data |
---|---|---|
e9f53129 AM |
1 | /* Overlay manager for SPU. |
2 | ||
2e444bea | 3 | Copyright 2006, 2007, 2008 Free Software Foundation, Inc. |
e9f53129 | 4 | |
f96b4a7b | 5 | This file is part of the GNU Binutils. |
e9f53129 | 6 | |
f96b4a7b | 7 | This program is free software; you can redistribute it and/or modify |
e9f53129 | 8 | it under the terms of the GNU General Public License as published by |
f96b4a7b NC |
9 | the Free Software Foundation; either version 3 of the License, or |
10 | (at your option) any later version. | |
e9f53129 | 11 | |
f96b4a7b | 12 | This program is distributed in the hope that it will be useful, |
e9f53129 AM |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
f96b4a7b NC |
18 | along with this program; if not, write to the Free Software |
19 | Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, | |
20 | MA 02110-1301, USA. */ | |
e9f53129 | 21 | |
47f6dab9 | 22 | /* MFC DMA defn's. */ |
e9f53129 AM |
23 | #define MFC_GET_CMD 0x40 |
24 | #define MFC_MAX_DMA_SIZE 0x4000 | |
25 | #define MFC_TAG_UPDATE_ALL 2 | |
26 | #define MFC_TAG_ID 0 | |
27 | ||
47f6dab9 AM |
28 | /* Register usage. */ |
29 | #define reserved1 $75 | |
30 | #define parm $75 | |
31 | #define tab1 reserved1 | |
32 | #define tab2 reserved1 | |
33 | #define vma reserved1 | |
34 | #define oldvma reserved1 | |
35 | #define newmask reserved1 | |
36 | #define map reserved1 | |
37 | ||
38 | #define reserved2 $76 | |
39 | #define off1 reserved2 | |
40 | #define off2 reserved2 | |
41 | #define present1 reserved2 | |
42 | #define present2 reserved2 | |
43 | #define sz reserved2 | |
44 | #define cmp reserved2 | |
45 | #define add64 reserved2 | |
46 | #define cgbits reserved2 | |
47 | #define off3 reserved2 | |
48 | #define off4 reserved2 | |
2e444bea | 49 | #define addr4 reserved2 |
47f6dab9 AM |
50 | #define off5 reserved2 |
51 | #define tagstat reserved2 | |
52 | ||
53 | #define reserved3 $77 | |
2e444bea AM |
54 | #define size1 reserved3 |
55 | #define size2 reserved3 | |
47f6dab9 AM |
56 | #define rv3 reserved3 |
57 | #define ealo reserved3 | |
58 | #define cmd reserved3 | |
59 | #define off64 reserved3 | |
60 | #define tab3 reserved3 | |
61 | #define tab4 reserved3 | |
62 | #define tab5 reserved3 | |
63 | ||
64 | #define reserved4 $78 | |
65 | #define ovl reserved4 | |
66 | #define rv2 reserved4 | |
67 | #define rv5 reserved4 | |
68 | #define cgshuf reserved4 | |
69 | #define newovl reserved4 | |
99302af9 AM |
70 | #define irqtmp1 reserved4 |
71 | #define irqtmp2 reserved4 | |
47f6dab9 AM |
72 | |
73 | #define reserved5 $79 | |
74 | #define target reserved5 | |
75 | ||
99302af9 | 76 | #define save1 $74 |
47f6dab9 AM |
77 | #define rv4 save1 |
78 | #define rv7 save1 | |
79 | #define tagid save1 | |
80 | #define maxsize save1 | |
81 | #define pbyte save1 | |
82 | #define pbit save1 | |
83 | ||
84 | #define save2 $73 | |
85 | #define cur save2 | |
86 | #define rv6 save2 | |
87 | #define osize save2 | |
88 | #define zovl save2 | |
89 | #define oldovl save2 | |
90 | #define newvma save2 | |
91 | ||
99302af9 | 92 | #define save3 $72 |
47f6dab9 AM |
93 | #define rv1 save3 |
94 | #define ea64 save3 | |
95 | #define buf3 save3 | |
96 | #define genwi save3 | |
97 | #define newmap save3 | |
98 | #define oldmask save3 | |
e9f53129 | 99 | |
99302af9 AM |
100 | #define save4 $71 |
101 | #define irq_stat save4 | |
c828a49f | 102 | |
e9f53129 | 103 | .text |
47f6dab9 AM |
104 | .align 4 |
105 | .type __rv_pattern, @object | |
106 | .size __rv_pattern, 16 | |
e9f53129 | 107 | __rv_pattern: |
47f6dab9 AM |
108 | .word 0x00010203, 0x10111213, 0x80808080, 0x80808080 |
109 | ||
110 | .type __cg_pattern, @object | |
111 | .size __cg_pattern, 16 | |
e9f53129 | 112 | __cg_pattern: |
47f6dab9 AM |
113 | .word 0x04050607, 0x80808080, 0x80808080, 0x80808080 |
114 | ||
115 | .type __ovly_current, @object | |
116 | .size __ovly_current, 16 | |
117 | __ovly_current: | |
118 | .space 16 | |
e9f53129 | 119 | |
47f6dab9 | 120 | /* |
e9f53129 AM |
121 | * __ovly_return - stub for returning from overlay functions. |
122 | * | |
47f6dab9 AM |
123 | * On entry the four slots of $lr are: |
124 | * __ovly_return, prev ovl index, caller return addr, undefined. | |
e9f53129 | 125 | * |
47f6dab9 AM |
126 | * Load the previous overlay and jump to the caller return address. |
127 | * Updates __ovly_current. | |
e9f53129 | 128 | */ |
47f6dab9 AM |
129 | .align 4 |
130 | .global __ovly_return | |
131 | .type __ovly_return, @function | |
e9f53129 | 132 | __ovly_return: |
47f6dab9 AM |
133 | ila tab1, _ovly_table - 16 # 0,2 0 |
134 | shlqbyi ovl, $lr, 4 # 1,4 0 | |
135 | #nop | |
136 | shlqbyi target, $lr, 8 # 1,4 1 | |
137 | #nop; lnop | |
138 | #nop; lnop | |
139 | shli off1, ovl, 4 # 0,4 4 | |
140 | #lnop | |
141 | #nop | |
142 | hbr ovly_ret9, target # 1,15 5 | |
143 | #nop; lnop | |
144 | #nop; lnop | |
145 | #nop | |
146 | lqx vma, tab1, off1 # 1,6 8 | |
99302af9 AM |
147 | #ifdef OVLY_IRQ_SAVE |
148 | nop | |
149 | stqd save4, -64($sp) # 1,6 9 | |
150 | #else | |
47f6dab9 | 151 | #nop; lnop |
99302af9 | 152 | #endif |
47f6dab9 AM |
153 | #nop; lnop |
154 | #nop; lnop | |
155 | #nop; lnop | |
156 | #nop; lnop | |
157 | #nop | |
2e444bea | 158 | rotqbyi size1, vma, 4 # 1,4 14 |
47f6dab9 AM |
159 | #nop |
160 | stqd save3, -48($sp) # 1,6 15 | |
161 | #nop | |
162 | stqd save2, -32($sp) # 1,6 16 | |
163 | #nop | |
164 | stqd save1, -16($sp) # 1,6 17 | |
2e444bea AM |
165 | andi present1, size1, 1 # 0,2 18 |
166 | stqr ovl, __ovly_current # 1,6 18 | |
47f6dab9 AM |
167 | #nop; lnop |
168 | #nop | |
2e444bea | 169 | brz present1, do_load # 1,4 20 |
47f6dab9 AM |
170 | ovly_ret9: |
171 | #nop | |
172 | bi target # 1,4 21 | |
173 | ||
174 | /* | |
e9f53129 AM |
175 | * __ovly_load - copy an overlay partion to local store. |
176 | * | |
47f6dab9 AM |
177 | * On entry $75 points to a word consisting of the overlay index in |
178 | * the top 14 bits, and the target address in the bottom 18 bits. | |
e9f53129 | 179 | * |
99302af9 AM |
180 | * Sets up $lr to return via __ovly_return. If $lr is already set |
181 | * to return via __ovly_return, don't change it. In that case we | |
182 | * have a tail call from one overlay function to another. | |
47f6dab9 | 183 | * Updates __ovly_current. |
e9f53129 | 184 | */ |
47f6dab9 AM |
185 | .align 3 |
186 | .global __ovly_load | |
187 | .type __ovly_load, @function | |
e9f53129 | 188 | __ovly_load: |
47f6dab9 AM |
189 | #if OVL_STUB_SIZE == 8 |
190 | ######## | |
191 | #nop | |
192 | lqd target, 0(parm) # 1,6 -11 | |
193 | #nop; lnop | |
194 | #nop; lnop | |
195 | #nop; lnop | |
196 | #nop; lnop | |
197 | #nop; lnop | |
198 | #nop | |
199 | rotqby target, target, parm # 1,4 -5 | |
200 | ila tab2, _ovly_table - 16 # 0,2 -4 | |
201 | stqd save3, -48($sp) # 1,6 -4 | |
202 | #nop | |
203 | stqd save2, -32($sp) # 1,6 -3 | |
204 | #nop | |
205 | stqd save1, -16($sp) # 1,6 -2 | |
206 | rotmi ovl, target, -18 # 0,4 -1 | |
207 | hbr ovly_load9, target # 1,15 -1 | |
208 | ila rv1, __ovly_return # 0,2 0 | |
209 | #lnop | |
210 | #nop; lnop | |
211 | #nop | |
2e444bea | 212 | lqr cur, __ovly_current # 1,6 2 |
47f6dab9 | 213 | shli off2, ovl, 4 # 0,4 3 |
2e444bea | 214 | stqr ovl, __ovly_current # 1,6 3 |
47f6dab9 | 215 | ceq rv2, $lr, rv1 # 0,2 4 |
2e444bea | 216 | lqr rv3, __rv_pattern # 1,6 4 |
47f6dab9 AM |
217 | #nop; lnop |
218 | #nop; lnop | |
219 | #nop | |
220 | lqx vma, tab2, off2 # 1,6 7 | |
221 | ######## | |
222 | #else /* OVL_STUB_SIZE == 16 */ | |
223 | ######## | |
224 | ila tab2, _ovly_table - 16 # 0,2 0 | |
225 | stqd save3, -48($sp) # 1,6 0 | |
226 | ila rv1, __ovly_return # 0,2 1 | |
227 | stqd save2, -32($sp) # 1,6 1 | |
228 | shli off2, ovl, 4 # 0,4 2 | |
2e444bea | 229 | lqr cur, __ovly_current # 1,6 2 |
47f6dab9 | 230 | nop |
2e444bea | 231 | stqr ovl, __ovly_current # 1,6 3 |
47f6dab9 | 232 | ceq rv2, $lr, rv1 # 0,2 4 |
2e444bea | 233 | lqr rv3, __rv_pattern # 1,6 4 |
47f6dab9 AM |
234 | #nop |
235 | hbr ovly_load9, target # 1,15 5 | |
236 | #nop | |
237 | lqx vma, tab2, off2 # 1,6 6 | |
238 | #nop | |
239 | stqd save1, -16($sp) # 1,6 7 | |
240 | ######## | |
c828a49f AM |
241 | #endif |
242 | ||
47f6dab9 AM |
243 | #nop; lnop |
244 | #nop; lnop | |
245 | #nop | |
246 | shufb rv4, rv1, cur, rv3 # 1,4 10 | |
247 | #nop | |
248 | fsmb rv5, rv2 # 1,4 11 | |
249 | #nop | |
250 | rotqmbyi rv6, $lr, -8 # 1,4 12 | |
251 | #nop | |
2e444bea | 252 | rotqbyi size2, vma, 4 # 1,4 13 |
47f6dab9 AM |
253 | #nop |
254 | lqd save3, -48($sp) # 1,6 14 | |
255 | #nop; lnop | |
256 | or rv7, rv4, rv6 # 0,2 16 | |
257 | lqd save2, -32($sp) # 1,6 16 | |
2e444bea | 258 | andi present2, size2, 1 # 0,2 17 |
99302af9 AM |
259 | #ifdef OVLY_IRQ_SAVE |
260 | stqd save4, -64($sp) # 1,6 17 | |
261 | #else | |
47f6dab9 | 262 | lnop # 1,0 17 |
99302af9 | 263 | #endif |
47f6dab9 AM |
264 | selb $lr, rv7, $lr, rv5 # 0,2 18 |
265 | lqd save1, -16($sp) # 1,6 18 | |
266 | #nop | |
2e444bea | 267 | brz present2, do_load # 1,4 19 |
47f6dab9 AM |
268 | ovly_load9: |
269 | #nop | |
270 | bi target # 1,4 20 | |
271 | ||
272 | /* If we get here, we are about to load a new overlay. | |
273 | * "vma" contains the relevant entry from _ovly_table[]. | |
e9f53129 AM |
274 | * extern struct { |
275 | * u32 vma; | |
276 | * u32 size; | |
277 | * u32 file_offset; | |
278 | * u32 buf; | |
279 | * } _ovly_table[]; | |
280 | */ | |
47f6dab9 AM |
281 | .align 3 |
282 | .global __ovly_load_event | |
283 | .type __ovly_load_event, @function | |
b1e37473 | 284 | __ovly_load_event: |
2e444bea | 285 | do_load: |
99302af9 AM |
286 | #ifdef OVLY_IRQ_SAVE |
287 | ila irqtmp1, do_load10 # 0,2 -5 | |
288 | rotqbyi sz, vma, 8 # 1,4 -5 | |
47f6dab9 | 289 | #nop |
99302af9 | 290 | rdch irq_stat, $SPU_RdMachStat # 1,6 -4 |
47f6dab9 | 291 | #nop |
99302af9 AM |
292 | bid irqtmp1 # 1,4 -3 |
293 | do_load10: | |
294 | nop | |
295 | #else | |
296 | #nop | |
297 | rotqbyi sz, vma, 8 # 1,4 0 | |
298 | #endif | |
47f6dab9 AM |
299 | rotqbyi osize, vma, 4 # 1,4 1 |
300 | #nop | |
301 | lqa ea64, _EAR_ # 1,6 2 | |
302 | #nop | |
2e444bea | 303 | lqr cgshuf, __cg_pattern # 1,6 3 |
47f6dab9 AM |
304 | |
305 | /* We could predict the branch at the end of this loop by adding a few | |
306 | instructions, and there are plenty of free cycles to do so without | |
307 | impacting loop execution time. However, it doesn't make a great | |
308 | deal of sense since we need to wait for the dma to complete anyway. */ | |
e9f53129 | 309 | __ovly_xfer_loop: |
47f6dab9 AM |
310 | #nop |
311 | rotqmbyi off64, sz, -4 # 1,4 4 | |
312 | #nop; lnop | |
313 | #nop; lnop | |
314 | #nop; lnop | |
315 | cg cgbits, ea64, off64 # 0,2 8 | |
316 | #lnop | |
317 | #nop; lnop | |
318 | #nop | |
319 | shufb add64, cgbits, cgbits, cgshuf # 1,4 10 | |
320 | #nop; lnop | |
321 | #nop; lnop | |
322 | #nop; lnop | |
323 | addx add64, ea64, off64 # 0,2 14 | |
324 | #lnop | |
325 | ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15 | |
326 | lnop | |
327 | ori ea64, add64, 0 # 0,2 16 | |
328 | rotqbyi ealo, add64, 4 # 1,4 16 | |
329 | cgt cmp, osize, maxsize # 0,2 17 | |
330 | wrch $MFC_LSA, vma # 1,6 17 | |
331 | #nop; lnop | |
332 | selb sz, osize, maxsize, cmp # 0,2 19 | |
333 | wrch $MFC_EAH, ea64 # 1,6 19 | |
334 | ila tagid, MFC_TAG_ID # 0,2 20 | |
335 | wrch $MFC_EAL, ealo # 1,6 20 | |
336 | ila cmd, MFC_GET_CMD # 0,2 21 | |
337 | wrch $MFC_Size, sz # 1,6 21 | |
338 | sf osize, sz, osize # 0,2 22 | |
339 | wrch $MFC_TagId, tagid # 1,6 22 | |
340 | a vma, vma, sz # 0,2 23 | |
341 | wrch $MFC_Cmd, cmd # 1,6 23 | |
342 | #nop | |
343 | brnz osize, __ovly_xfer_loop # 1,4 24 | |
344 | ||
345 | /* Now update our data structions while waiting for DMA to complete. | |
2e444bea | 346 | Low bit of .size needs to be cleared on the _ovly_table entry |
47f6dab9 AM |
347 | corresponding to the evicted overlay, and set on the entry for the |
348 | newly loaded overlay. Note that no overlay may in fact be evicted | |
2e444bea | 349 | as _ovly_buf_table[] starts with all zeros. Don't zap .size entry |
47f6dab9 AM |
350 | for zero index! Also of course update the _ovly_buf_table entry. */ |
351 | #nop | |
2e444bea | 352 | lqr newovl, __ovly_current # 1,6 25 |
47f6dab9 AM |
353 | #nop; lnop |
354 | #nop; lnop | |
355 | #nop; lnop | |
356 | #nop; lnop | |
357 | #nop; lnop | |
358 | shli off3, newovl, 4 # 0,4 31 | |
359 | #lnop | |
360 | ila tab3, _ovly_table - 16 # 0,2 32 | |
361 | #lnop | |
362 | #nop | |
2e444bea | 363 | fsmbi pbyte, 0x100 # 1,4 33 |
47f6dab9 AM |
364 | #nop; lnop |
365 | #nop | |
366 | lqx vma, tab3, off3 # 1,6 35 | |
367 | #nop; lnop | |
368 | andi pbit, pbyte, 1 # 0,2 37 | |
369 | lnop | |
370 | #nop; lnop | |
371 | #nop; lnop | |
372 | #nop; lnop | |
373 | or newvma, vma, pbit # 0,2 41 | |
374 | rotqbyi buf3, vma, 12 # 1,4 41 | |
375 | #nop; lnop | |
376 | #nop | |
377 | stqx newvma, tab3, off3 # 1,6 43 | |
378 | #nop; lnop | |
379 | shli off4, buf3, 2 # 1,4 45 | |
380 | #lnop | |
2e444bea | 381 | ila tab4, _ovly_buf_table - 4 # 0,2 46 |
47f6dab9 AM |
382 | #lnop |
383 | #nop; lnop | |
384 | #nop; lnop | |
385 | #nop | |
386 | lqx map, tab4, off4 # 1,6 49 | |
387 | #nop | |
388 | cwx genwi, tab4, off4 # 1,4 50 | |
2e444bea AM |
389 | a addr4, tab4, off4 # 0,2 51 |
390 | #lnop | |
47f6dab9 AM |
391 | #nop; lnop |
392 | #nop; lnop | |
393 | #nop; lnop | |
394 | #nop | |
2e444bea AM |
395 | rotqby oldovl, map, addr4 # 1,4 55 |
396 | #nop | |
47f6dab9 | 397 | shufb newmap, newovl, map, genwi # 0,4 56 |
e9f53129 | 398 | #if MFC_TAG_ID < 16 |
47f6dab9 | 399 | ila newmask, 1 << MFC_TAG_ID # 0,2 57 |
e9f53129 | 400 | #else |
47f6dab9 | 401 | ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57 |
c828a49f | 402 | #endif |
47f6dab9 AM |
403 | #lnop |
404 | #nop; lnop | |
405 | #nop; lnop | |
2e444bea | 406 | stqd newmap, 0(addr4) # 1,6 60 |
47f6dab9 AM |
407 | |
408 | /* Save app's tagmask, wait for DMA complete, restore mask. */ | |
409 | ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61 | |
410 | rdch oldmask, $MFC_RdTagMask # 1,6 61 | |
411 | #nop | |
412 | wrch $MFC_WrTagMask, newmask # 1,6 62 | |
413 | #nop | |
414 | wrch $MFC_WrTagUpdate, tagstat # 1,6 63 | |
415 | #nop | |
416 | rdch tagstat, $MFC_RdTagStat # 1,6 64 | |
417 | #nop | |
418 | sync # 1,4 65 | |
419 | /* Any hint prior to the sync is lost. A hint here allows the branch | |
420 | to complete 15 cycles after the hint. With no hint the branch will | |
421 | take 18 or 19 cycles. */ | |
422 | ila tab5, _ovly_table - 16 # 0,2 66 | |
423 | hbr do_load99, target # 1,15 66 | |
424 | shli off5, oldovl, 4 # 0,4 67 | |
425 | wrch $MFC_WrTagMask, oldmask # 1,6 67 | |
426 | ceqi zovl, oldovl, 0 # 0,2 68 | |
427 | #lnop | |
428 | #nop; lnop | |
429 | #nop | |
430 | fsm zovl, zovl # 1,4 70 | |
431 | #nop | |
432 | lqx oldvma, tab5, off5 # 1,6 71 | |
433 | #nop | |
434 | lqd save3, -48($sp) # 1,6 72 | |
435 | #nop; lnop | |
436 | andc pbit, pbit, zovl # 0,2 74 | |
437 | lqd save2, -32($sp) # 1,6 74 | |
99302af9 AM |
438 | #ifdef OVLY_IRQ_SAVE |
439 | ila irqtmp2, do_load90 # 0,2 75 | |
440 | #lnop | |
441 | andi irq_stat, irq_stat, 1 # 0,2 76 | |
442 | #lnop | |
443 | #else | |
47f6dab9 AM |
444 | #nop; lnop |
445 | #nop; lnop | |
99302af9 | 446 | #endif |
47f6dab9 AM |
447 | andc oldvma, oldvma, pbit # 0,2 77 |
448 | lqd save1, -16($sp) # 1,6 77 | |
99302af9 AM |
449 | nop # 0,0 78 |
450 | #lnop | |
451 | #nop | |
47f6dab9 | 452 | stqx oldvma, tab5, off5 # 1,6 79 |
99302af9 AM |
453 | #nop |
454 | #ifdef OVLY_IRQ_SAVE | |
455 | binze irq_stat, irqtmp2 # 1,4 80 | |
456 | do_load90: | |
457 | #nop | |
458 | lqd save4, -64($sp) # 1,6 84 | |
459 | #else | |
47f6dab9 | 460 | #nop; lnop |
99302af9 | 461 | #endif |
c828a49f | 462 | |
47f6dab9 AM |
463 | .global _ovly_debug_event |
464 | .type _ovly_debug_event, @function | |
e9f53129 | 465 | _ovly_debug_event: |
e9f53129 | 466 | nop |
e9f53129 | 467 | /* Branch to target address. */ |
47f6dab9 | 468 | do_load99: |
99302af9 | 469 | bi target # 1,4 81/85 |
b1e37473 | 470 | |
47f6dab9 | 471 | .size __ovly_load, . - __ovly_load |