]>
Commit | Line | Data |
---|---|---|
83d290c5 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
45b5a378 SG |
2 | /* |
3 | * Copyright (C) 2015 Google, Inc | |
4 | * | |
45b5a378 SG |
5 | * Based on code from the coreboot file of the same name |
6 | */ | |
7 | ||
8 | #include <common.h> | |
9 | #include <cpu.h> | |
10 | #include <dm.h> | |
11 | #include <errno.h> | |
f7ae49fc | 12 | #include <log.h> |
45b5a378 | 13 | #include <malloc.h> |
18686590 | 14 | #include <qfw.h> |
45b5a378 SG |
15 | #include <asm/atomic.h> |
16 | #include <asm/cpu.h> | |
401d1c4f | 17 | #include <asm/global_data.h> |
45b5a378 | 18 | #include <asm/interrupt.h> |
c33aa352 | 19 | #include <asm/io.h> |
45b5a378 | 20 | #include <asm/lapic.h> |
6bcb675b | 21 | #include <asm/microcode.h> |
45b5a378 | 22 | #include <asm/mp.h> |
a2d73fdb | 23 | #include <asm/msr.h> |
45b5a378 | 24 | #include <asm/mtrr.h> |
a2d73fdb | 25 | #include <asm/processor.h> |
45b5a378 SG |
26 | #include <asm/sipi.h> |
27 | #include <dm/device-internal.h> | |
28 | #include <dm/uclass-internal.h> | |
de752c5e MY |
29 | #include <dm/lists.h> |
30 | #include <dm/root.h> | |
c05ed00a | 31 | #include <linux/delay.h> |
45b5a378 SG |
32 | #include <linux/linkage.h> |
33 | ||
8b097916 SG |
34 | DECLARE_GLOBAL_DATA_PTR; |
35 | ||
c6efee50 SG |
36 | /* |
37 | * Setting up multiprocessing | |
38 | * | |
39 | * See https://www.intel.com/content/www/us/en/intelligent-systems/intel-boot-loader-development-kit/minimal-intel-architecture-boot-loader-paper.html | |
40 | * | |
41 | * Note that this file refers to the boot CPU (the one U-Boot is running on) as | |
42 | * the BSP (BootStrap Processor) and the others as APs (Application Processors). | |
43 | * | |
44 | * This module works by loading some setup code into RAM at AP_DEFAULT_BASE and | |
45 | * telling each AP to execute it. The code that each AP runs is in | |
46 | * sipi_vector.S (see ap_start16) which includes a struct sipi_params at the | |
47 | * end of it. Those parameters are set up by the C code. | |
48 | ||
49 | * Setting up is handled by load_sipi_vector(). It inits the common block of | |
50 | * parameters (sipi_params) which tell the APs what to do. This block includes | |
51 | * microcode and the MTTRs (Memory-Type-Range Registers) from the main CPU. | |
52 | * There is also an ap_count which each AP increments as it starts up, so the | |
53 | * BSP can tell how many checked in. | |
54 | * | |
55 | * The APs are started with a SIPI (Startup Inter-Processor Interrupt) which | |
56 | * tells an AP to start executing at a particular address, in this case | |
57 | * AP_DEFAULT_BASE which contains the code copied from ap_start16. This protocol | |
58 | * is handled by start_aps(). | |
59 | * | |
60 | * After being started, each AP runs the code in ap_start16, switches to 32-bit | |
61 | * mode, runs the code at ap_start, then jumps to c_handler which is ap_init(). | |
62 | * This runs a very simple 'flight plan' described in mp_steps(). This sets up | |
63 | * the CPU and waits for further instructions by looking at its entry in | |
64 | * ap_callbacks[]. Note that the flight plan is only actually run for each CPU | |
65 | * in bsp_do_flight_plan(): once the BSP completes each flight record, it sets | |
66 | * mp_flight_record->barrier to 1 to allow the APs to executed the record one | |
67 | * by one. | |
68 | * | |
69 | * CPUS are numbered sequentially from 0 using the device tree: | |
70 | * | |
71 | * cpus { | |
72 | * u-boot,dm-pre-reloc; | |
73 | * #address-cells = <1>; | |
74 | * #size-cells = <0>; | |
75 | * | |
76 | * cpu@0 { | |
77 | * u-boot,dm-pre-reloc; | |
78 | * device_type = "cpu"; | |
79 | * compatible = "intel,apl-cpu"; | |
80 | * reg = <0>; | |
81 | * intel,apic-id = <0>; | |
82 | * }; | |
83 | * | |
84 | * cpu@1 { | |
85 | * device_type = "cpu"; | |
86 | * compatible = "intel,apl-cpu"; | |
87 | * reg = <1>; | |
88 | * intel,apic-id = <2>; | |
89 | * }; | |
90 | * | |
df3dc209 | 91 | * Here the 'reg' property is the CPU number and then is placed in dev_seq(cpu) |
c6efee50 SG |
92 | * so that we can index into ap_callbacks[] using that. The APIC ID is different |
93 | * and may not be sequential (it typically is if hyperthreading is supported). | |
94 | * | |
95 | * Once APs are inited they wait in ap_wait_for_instruction() for instructions. | |
96 | * Instructions come in the form of a function to run. This logic is in | |
97 | * mp_run_on_cpus() which supports running on any one AP, all APs, just the BSP | |
98 | * or all CPUs. The BSP logic is handled directly in mp_run_on_cpus(), by | |
99 | * calling the function. For the APs, callback information is stored in a | |
100 | * single, common struct mp_callback and a pointer to this is written to each | |
101 | * AP's slot in ap_callbacks[] by run_ap_work(). All APs get the message even | |
102 | * if it is only for one of them. When an AP notices a message it checks whether | |
103 | * it should call the function (see check in ap_wait_for_instruction()) and then | |
104 | * does so if needed. After that it sets its slot to NULL to indicate it is | |
105 | * done. | |
106 | * | |
107 | * While U-Boot is running it can use mp_run_on_cpus() to run code on the APs. | |
108 | * An example of this is the 'mtrr' command which allows reading and changing | |
109 | * the MTRRs on all CPUs. | |
110 | * | |
111 | * Before U-Boot exits it calls mp_park_aps() which tells all CPUs to halt by | |
112 | * executing a 'hlt' instruction. That allows them to be used by Linux when it | |
113 | * starts up. | |
114 | */ | |
115 | ||
45b5a378 | 116 | /* This also needs to match the sipi.S assembly code for saved MSR encoding */ |
c6efee50 | 117 | struct __packed saved_msr { |
45b5a378 SG |
118 | uint32_t index; |
119 | uint32_t lo; | |
120 | uint32_t hi; | |
c6efee50 | 121 | }; |
45b5a378 | 122 | |
c6efee50 SG |
123 | /** |
124 | * struct mp_flight_plan - Holds the flight plan | |
125 | * | |
126 | * @num_records: Number of flight records | |
127 | * @records: Pointer to each record | |
128 | */ | |
45b5a378 SG |
129 | struct mp_flight_plan { |
130 | int num_records; | |
131 | struct mp_flight_record *records; | |
132 | }; | |
133 | ||
c33aa352 SG |
134 | /** |
135 | * struct mp_callback - Callback information for APs | |
136 | * | |
137 | * @func: Function to run | |
138 | * @arg: Argument to pass to the function | |
df3dc209 | 139 | * @logical_cpu_number: Either a CPU number (i.e. dev_seq(cpu) or a special |
c33aa352 SG |
140 | * value like MP_SELECT_BSP. It tells the AP whether it should process this |
141 | * callback | |
142 | */ | |
143 | struct mp_callback { | |
84d3ed12 | 144 | mp_run_func func; |
c33aa352 SG |
145 | void *arg; |
146 | int logical_cpu_number; | |
147 | }; | |
148 | ||
c6efee50 | 149 | /* Stores the flight plan so that APs can find it */ |
45b5a378 SG |
150 | static struct mp_flight_plan mp_info; |
151 | ||
c33aa352 SG |
152 | /* |
153 | * ap_callbacks - Callback mailbox array | |
154 | * | |
155 | * Array of callback, one entry for each available CPU, indexed by the CPU | |
df3dc209 | 156 | * number, which is dev_seq(cpu). The entry for the main CPU is never used. |
c33aa352 SG |
157 | * When this is NULL, there is no pending work for the CPU to run. When |
158 | * non-NULL it points to the mp_callback structure. This is shared between all | |
159 | * CPUs, so should only be written by the main CPU. | |
160 | */ | |
161 | static struct mp_callback **ap_callbacks; | |
45b5a378 SG |
162 | |
163 | static inline void barrier_wait(atomic_t *b) | |
164 | { | |
165 | while (atomic_read(b) == 0) | |
166 | asm("pause"); | |
167 | mfence(); | |
168 | } | |
169 | ||
170 | static inline void release_barrier(atomic_t *b) | |
171 | { | |
172 | mfence(); | |
173 | atomic_set(b, 1); | |
174 | } | |
175 | ||
a2d73fdb BM |
176 | static inline void stop_this_cpu(void) |
177 | { | |
178 | /* Called by an AP when it is ready to halt and wait for a new task */ | |
179 | for (;;) | |
180 | cpu_hlt(); | |
181 | } | |
182 | ||
45b5a378 SG |
183 | /* Returns 1 if timeout waiting for APs. 0 if target APs found */ |
184 | static int wait_for_aps(atomic_t *val, int target, int total_delay, | |
185 | int delay_step) | |
186 | { | |
187 | int timeout = 0; | |
188 | int delayed = 0; | |
189 | ||
190 | while (atomic_read(val) != target) { | |
191 | udelay(delay_step); | |
192 | delayed += delay_step; | |
193 | if (delayed >= total_delay) { | |
194 | timeout = 1; | |
195 | break; | |
196 | } | |
197 | } | |
198 | ||
199 | return timeout; | |
200 | } | |
201 | ||
202 | static void ap_do_flight_plan(struct udevice *cpu) | |
203 | { | |
204 | int i; | |
205 | ||
206 | for (i = 0; i < mp_info.num_records; i++) { | |
207 | struct mp_flight_record *rec = &mp_info.records[i]; | |
208 | ||
209 | atomic_inc(&rec->cpus_entered); | |
210 | barrier_wait(&rec->barrier); | |
211 | ||
212 | if (rec->ap_call != NULL) | |
213 | rec->ap_call(cpu, rec->ap_arg); | |
214 | } | |
215 | } | |
216 | ||
24fb4907 | 217 | static int find_cpu_by_apic_id(int apic_id, struct udevice **devp) |
45b5a378 SG |
218 | { |
219 | struct udevice *dev; | |
220 | ||
221 | *devp = NULL; | |
222 | for (uclass_find_first_device(UCLASS_CPU, &dev); | |
223 | dev; | |
224 | uclass_find_next_device(&dev)) { | |
8a8d24bd | 225 | struct cpu_plat *plat = dev_get_parent_plat(dev); |
45b5a378 SG |
226 | |
227 | if (plat->cpu_id == apic_id) { | |
228 | *devp = dev; | |
229 | return 0; | |
230 | } | |
231 | } | |
232 | ||
233 | return -ENOENT; | |
234 | } | |
235 | ||
236 | /* | |
237 | * By the time APs call ap_init() caching has been setup, and microcode has | |
238 | * been loaded | |
239 | */ | |
240 | static void ap_init(unsigned int cpu_index) | |
241 | { | |
242 | struct udevice *dev; | |
243 | int apic_id; | |
244 | int ret; | |
245 | ||
246 | /* Ensure the local apic is enabled */ | |
247 | enable_lapic(); | |
248 | ||
249 | apic_id = lapicid(); | |
24fb4907 | 250 | ret = find_cpu_by_apic_id(apic_id, &dev); |
45b5a378 SG |
251 | if (ret) { |
252 | debug("Unknown CPU apic_id %x\n", apic_id); | |
253 | goto done; | |
254 | } | |
255 | ||
256 | debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id, | |
257 | dev ? dev->name : "(apic_id not found)"); | |
258 | ||
c33aa352 SG |
259 | /* |
260 | * Walk the flight plan, which only returns if CONFIG_SMP_AP_WORK is not | |
261 | * enabled | |
262 | */ | |
45b5a378 SG |
263 | ap_do_flight_plan(dev); |
264 | ||
45b5a378 SG |
265 | done: |
266 | stop_this_cpu(); | |
267 | } | |
268 | ||
269 | static const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = { | |
270 | MTRR_FIX_64K_00000_MSR, MTRR_FIX_16K_80000_MSR, MTRR_FIX_16K_A0000_MSR, | |
271 | MTRR_FIX_4K_C0000_MSR, MTRR_FIX_4K_C8000_MSR, MTRR_FIX_4K_D0000_MSR, | |
272 | MTRR_FIX_4K_D8000_MSR, MTRR_FIX_4K_E0000_MSR, MTRR_FIX_4K_E8000_MSR, | |
273 | MTRR_FIX_4K_F0000_MSR, MTRR_FIX_4K_F8000_MSR, | |
274 | }; | |
275 | ||
276 | static inline struct saved_msr *save_msr(int index, struct saved_msr *entry) | |
277 | { | |
278 | msr_t msr; | |
279 | ||
280 | msr = msr_read(index); | |
281 | entry->index = index; | |
282 | entry->lo = msr.lo; | |
283 | entry->hi = msr.hi; | |
284 | ||
285 | /* Return the next entry */ | |
286 | entry++; | |
287 | return entry; | |
288 | } | |
289 | ||
290 | static int save_bsp_msrs(char *start, int size) | |
291 | { | |
292 | int msr_count; | |
293 | int num_var_mtrrs; | |
294 | struct saved_msr *msr_entry; | |
295 | int i; | |
296 | msr_t msr; | |
297 | ||
298 | /* Determine number of MTRRs need to be saved */ | |
299 | msr = msr_read(MTRR_CAP_MSR); | |
300 | num_var_mtrrs = msr.lo & 0xff; | |
301 | ||
302 | /* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE */ | |
303 | msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1; | |
304 | ||
305 | if ((msr_count * sizeof(struct saved_msr)) > size) { | |
2254e34c | 306 | printf("Cannot mirror all %d msrs\n", msr_count); |
45b5a378 SG |
307 | return -ENOSPC; |
308 | } | |
309 | ||
310 | msr_entry = (void *)start; | |
311 | for (i = 0; i < NUM_FIXED_MTRRS; i++) | |
312 | msr_entry = save_msr(fixed_mtrrs[i], msr_entry); | |
313 | ||
314 | for (i = 0; i < num_var_mtrrs; i++) { | |
315 | msr_entry = save_msr(MTRR_PHYS_BASE_MSR(i), msr_entry); | |
316 | msr_entry = save_msr(MTRR_PHYS_MASK_MSR(i), msr_entry); | |
317 | } | |
318 | ||
319 | msr_entry = save_msr(MTRR_DEF_TYPE_MSR, msr_entry); | |
320 | ||
321 | return msr_count; | |
322 | } | |
323 | ||
b28cecdf | 324 | static int load_sipi_vector(atomic_t **ap_countp, int num_cpus) |
45b5a378 SG |
325 | { |
326 | struct sipi_params_16bit *params16; | |
327 | struct sipi_params *params; | |
328 | static char msr_save[512]; | |
329 | char *stack; | |
330 | ulong addr; | |
331 | int code_len; | |
332 | int size; | |
333 | int ret; | |
334 | ||
335 | /* Copy in the code */ | |
336 | code_len = ap_start16_code_end - ap_start16; | |
337 | debug("Copying SIPI code to %x: %d bytes\n", AP_DEFAULT_BASE, | |
338 | code_len); | |
339 | memcpy((void *)AP_DEFAULT_BASE, ap_start16, code_len); | |
340 | ||
341 | addr = AP_DEFAULT_BASE + (ulong)sipi_params_16bit - (ulong)ap_start16; | |
342 | params16 = (struct sipi_params_16bit *)addr; | |
343 | params16->ap_start = (uint32_t)ap_start; | |
344 | params16->gdt = (uint32_t)gd->arch.gdt; | |
345 | params16->gdt_limit = X86_GDT_SIZE - 1; | |
346 | debug("gdt = %x, gdt_limit = %x\n", params16->gdt, params16->gdt_limit); | |
347 | ||
348 | params = (struct sipi_params *)sipi_params; | |
349 | debug("SIPI 32-bit params at %p\n", params); | |
350 | params->idt_ptr = (uint32_t)x86_get_idt(); | |
351 | ||
352 | params->stack_size = CONFIG_AP_STACK_SIZE; | |
b28cecdf | 353 | size = params->stack_size * num_cpus; |
4fd64d02 | 354 | stack = memalign(4096, size); |
45b5a378 SG |
355 | if (!stack) |
356 | return -ENOMEM; | |
357 | params->stack_top = (u32)(stack + size); | |
308c75e0 AS |
358 | #if !defined(CONFIG_QEMU) && !defined(CONFIG_HAVE_FSP) && \ |
359 | !defined(CONFIG_INTEL_MID) | |
e77b62e2 SG |
360 | params->microcode_ptr = ucode_base; |
361 | debug("Microcode at %x\n", params->microcode_ptr); | |
362 | #endif | |
45b5a378 SG |
363 | params->msr_table_ptr = (u32)msr_save; |
364 | ret = save_bsp_msrs(msr_save, sizeof(msr_save)); | |
365 | if (ret < 0) | |
366 | return ret; | |
367 | params->msr_count = ret; | |
368 | ||
369 | params->c_handler = (uint32_t)&ap_init; | |
370 | ||
371 | *ap_countp = ¶ms->ap_count; | |
372 | atomic_set(*ap_countp, 0); | |
373 | debug("SIPI vector is ready\n"); | |
374 | ||
375 | return 0; | |
376 | } | |
377 | ||
378 | static int check_cpu_devices(int expected_cpus) | |
379 | { | |
380 | int i; | |
381 | ||
382 | for (i = 0; i < expected_cpus; i++) { | |
383 | struct udevice *dev; | |
384 | int ret; | |
385 | ||
386 | ret = uclass_find_device(UCLASS_CPU, i, &dev); | |
387 | if (ret) { | |
388 | debug("Cannot find CPU %d in device tree\n", i); | |
389 | return ret; | |
390 | } | |
391 | } | |
392 | ||
393 | return 0; | |
394 | } | |
395 | ||
396 | /* Returns 1 for timeout. 0 on success */ | |
2254e34c | 397 | static int apic_wait_timeout(int total_delay, const char *msg) |
45b5a378 SG |
398 | { |
399 | int total = 0; | |
45b5a378 | 400 | |
2254e34c SG |
401 | if (!(lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) |
402 | return 0; | |
403 | ||
404 | debug("Waiting for %s...", msg); | |
45b5a378 | 405 | while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) { |
2254e34c SG |
406 | udelay(50); |
407 | total += 50; | |
45b5a378 | 408 | if (total >= total_delay) { |
2254e34c SG |
409 | debug("timed out: aborting\n"); |
410 | return -ETIMEDOUT; | |
45b5a378 SG |
411 | } |
412 | } | |
2254e34c | 413 | debug("done\n"); |
45b5a378 | 414 | |
2254e34c | 415 | return 0; |
45b5a378 SG |
416 | } |
417 | ||
3a5752cc SG |
418 | /** |
419 | * start_aps() - Start up the APs and count how many we find | |
420 | * | |
421 | * This is called on the boot processor to start up all the other processors | |
422 | * (here called APs). | |
423 | * | |
424 | * @num_aps: Number of APs we expect to find | |
425 | * @ap_count: Initially zero. Incremented by this function for each AP found | |
426 | * @return 0 if all APs were set up correctly or there are none to set up, | |
427 | * -ENOSPC if the SIPI vector is too high in memory, | |
428 | * -ETIMEDOUT if the ICR is busy or the second SIPI fails to complete | |
429 | * -EIO if not all APs check in correctly | |
430 | */ | |
431 | static int start_aps(int num_aps, atomic_t *ap_count) | |
45b5a378 SG |
432 | { |
433 | int sipi_vector; | |
434 | /* Max location is 4KiB below 1MiB */ | |
435 | const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12; | |
436 | ||
3a5752cc | 437 | if (num_aps == 0) |
45b5a378 SG |
438 | return 0; |
439 | ||
440 | /* The vector is sent as a 4k aligned address in one byte */ | |
441 | sipi_vector = AP_DEFAULT_BASE >> 12; | |
442 | ||
443 | if (sipi_vector > max_vector_loc) { | |
444 | printf("SIPI vector too large! 0x%08x\n", | |
445 | sipi_vector); | |
7b140238 | 446 | return -ENOSPC; |
45b5a378 SG |
447 | } |
448 | ||
3a5752cc | 449 | debug("Attempting to start %d APs\n", num_aps); |
45b5a378 | 450 | |
2254e34c SG |
451 | if (apic_wait_timeout(1000, "ICR not to be busy")) |
452 | return -ETIMEDOUT; | |
45b5a378 SG |
453 | |
454 | /* Send INIT IPI to all but self */ | |
a2d73fdb BM |
455 | lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); |
456 | lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | | |
457 | LAPIC_DM_INIT); | |
2254e34c | 458 | debug("Waiting for 10ms after sending INIT\n"); |
45b5a378 SG |
459 | mdelay(10); |
460 | ||
461 | /* Send 1st SIPI */ | |
2254e34c SG |
462 | if (apic_wait_timeout(1000, "ICR not to be busy")) |
463 | return -ETIMEDOUT; | |
45b5a378 | 464 | |
a2d73fdb BM |
465 | lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); |
466 | lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | | |
467 | LAPIC_DM_STARTUP | sipi_vector); | |
2254e34c SG |
468 | if (apic_wait_timeout(10000, "first SIPI to complete")) |
469 | return -ETIMEDOUT; | |
45b5a378 SG |
470 | |
471 | /* Wait for CPUs to check in up to 200 us */ | |
3a5752cc | 472 | wait_for_aps(ap_count, num_aps, 200, 15); |
45b5a378 SG |
473 | |
474 | /* Send 2nd SIPI */ | |
2254e34c SG |
475 | if (apic_wait_timeout(1000, "ICR not to be busy")) |
476 | return -ETIMEDOUT; | |
45b5a378 | 477 | |
a2d73fdb BM |
478 | lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); |
479 | lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | | |
480 | LAPIC_DM_STARTUP | sipi_vector); | |
2254e34c SG |
481 | if (apic_wait_timeout(10000, "second SIPI to complete")) |
482 | return -ETIMEDOUT; | |
45b5a378 SG |
483 | |
484 | /* Wait for CPUs to check in */ | |
3a5752cc | 485 | if (wait_for_aps(ap_count, num_aps, 10000, 50)) { |
2254e34c | 486 | debug("Not all APs checked in: %d/%d\n", |
3a5752cc | 487 | atomic_read(ap_count), num_aps); |
7b140238 | 488 | return -EIO; |
45b5a378 SG |
489 | } |
490 | ||
491 | return 0; | |
492 | } | |
493 | ||
a6c9fd4d SG |
494 | /** |
495 | * bsp_do_flight_plan() - Do the flight plan on the BSP | |
496 | * | |
497 | * This runs the flight plan on the main CPU used to boot U-Boot | |
498 | * | |
499 | * @cpu: Device for the main CPU | |
500 | * @plan: Flight plan to run | |
501 | * @num_aps: Number of APs (CPUs other than the BSP) | |
502 | * @returns 0 on success, -ETIMEDOUT if an AP failed to come up | |
503 | */ | |
504 | static int bsp_do_flight_plan(struct udevice *cpu, struct mp_flight_plan *plan, | |
505 | int num_aps) | |
45b5a378 SG |
506 | { |
507 | int i; | |
508 | int ret = 0; | |
509 | const int timeout_us = 100000; | |
510 | const int step_us = 100; | |
45b5a378 | 511 | |
78d57d63 SG |
512 | for (i = 0; i < plan->num_records; i++) { |
513 | struct mp_flight_record *rec = &plan->records[i]; | |
45b5a378 SG |
514 | |
515 | /* Wait for APs if the record is not released */ | |
516 | if (atomic_read(&rec->barrier) == 0) { | |
517 | /* Wait for the APs to check in */ | |
518 | if (wait_for_aps(&rec->cpus_entered, num_aps, | |
519 | timeout_us, step_us)) { | |
2254e34c | 520 | debug("MP record %d timeout\n", i); |
7b140238 | 521 | ret = -ETIMEDOUT; |
45b5a378 SG |
522 | } |
523 | } | |
524 | ||
525 | if (rec->bsp_call != NULL) | |
526 | rec->bsp_call(cpu, rec->bsp_arg); | |
527 | ||
528 | release_barrier(&rec->barrier); | |
529 | } | |
a6c9fd4d | 530 | |
45b5a378 SG |
531 | return ret; |
532 | } | |
533 | ||
20b049e8 SG |
534 | /** |
535 | * get_bsp() - Get information about the bootstrap processor | |
536 | * | |
537 | * @devp: If non-NULL, returns CPU device corresponding to the BSP | |
538 | * @cpu_countp: If non-NULL, returns the total number of CPUs | |
539 | * @return CPU number of the BSP, or -ve on error. If multiprocessing is not | |
540 | * enabled, returns 0 | |
541 | */ | |
542 | static int get_bsp(struct udevice **devp, int *cpu_countp) | |
45b5a378 SG |
543 | { |
544 | char processor_name[CPU_MAX_NAME_LEN]; | |
20b049e8 | 545 | struct udevice *dev; |
45b5a378 SG |
546 | int apic_id; |
547 | int ret; | |
548 | ||
549 | cpu_get_name(processor_name); | |
2254e34c | 550 | debug("CPU: %s\n", processor_name); |
45b5a378 | 551 | |
45b5a378 | 552 | apic_id = lapicid(); |
20b049e8 SG |
553 | ret = find_cpu_by_apic_id(apic_id, &dev); |
554 | if (ret < 0) { | |
45b5a378 SG |
555 | printf("Cannot find boot CPU, APIC ID %d\n", apic_id); |
556 | return ret; | |
557 | } | |
20b049e8 SG |
558 | ret = cpu_get_count(dev); |
559 | if (ret < 0) | |
560 | return log_msg_ret("count", ret); | |
561 | if (devp) | |
562 | *devp = dev; | |
563 | if (cpu_countp) | |
564 | *cpu_countp = ret; | |
45b5a378 | 565 | |
df3dc209 | 566 | return dev_seq(dev) >= 0 ? dev_seq(dev) : 0; |
45b5a378 SG |
567 | } |
568 | ||
c33aa352 SG |
569 | /** |
570 | * read_callback() - Read the pointer in a callback slot | |
571 | * | |
572 | * This is called by APs to read their callback slot to see if there is a | |
573 | * pointer to new instructions | |
574 | * | |
575 | * @slot: Pointer to the AP's callback slot | |
576 | * @return value of that pointer | |
577 | */ | |
578 | static struct mp_callback *read_callback(struct mp_callback **slot) | |
579 | { | |
580 | dmb(); | |
581 | ||
582 | return *slot; | |
583 | } | |
584 | ||
585 | /** | |
586 | * store_callback() - Store a pointer to the callback slot | |
587 | * | |
588 | * This is called by APs to write NULL into the callback slot when they have | |
589 | * finished the work requested by the BSP. | |
590 | * | |
591 | * @slot: Pointer to the AP's callback slot | |
592 | * @val: Value to write (e.g. NULL) | |
593 | */ | |
594 | static void store_callback(struct mp_callback **slot, struct mp_callback *val) | |
595 | { | |
596 | *slot = val; | |
597 | dmb(); | |
598 | } | |
599 | ||
84d3ed12 SG |
600 | /** |
601 | * run_ap_work() - Run a callback on selected APs | |
602 | * | |
603 | * This writes @callback to all APs and waits for them all to acknowledge it, | |
604 | * Note that whether each AP actually calls the callback depends on the value | |
605 | * of logical_cpu_number (see struct mp_callback). The logical CPU number is | |
606 | * the CPU device's req->seq value. | |
607 | * | |
608 | * @callback: Callback information to pass to all APs | |
609 | * @bsp: CPU device for the BSP | |
610 | * @num_cpus: The number of CPUs in the system (= number of APs + 1) | |
611 | * @expire_ms: Timeout to wait for all APs to finish, in milliseconds, or 0 for | |
612 | * no timeout | |
613 | * @return 0 if OK, -ETIMEDOUT if one or more APs failed to respond in time | |
614 | */ | |
615 | static int run_ap_work(struct mp_callback *callback, struct udevice *bsp, | |
616 | int num_cpus, uint expire_ms) | |
617 | { | |
df3dc209 | 618 | int cur_cpu = dev_seq(bsp); |
84d3ed12 SG |
619 | int num_aps = num_cpus - 1; /* number of non-BSPs to get this message */ |
620 | int cpus_accepted; | |
621 | ulong start; | |
622 | int i; | |
623 | ||
624 | if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) { | |
625 | printf("APs already parked. CONFIG_SMP_AP_WORK not enabled\n"); | |
626 | return -ENOTSUPP; | |
627 | } | |
628 | ||
629 | /* Signal to all the APs to run the func. */ | |
630 | for (i = 0; i < num_cpus; i++) { | |
631 | if (cur_cpu != i) | |
632 | store_callback(&ap_callbacks[i], callback); | |
633 | } | |
634 | mfence(); | |
635 | ||
636 | /* Wait for all the APs to signal back that call has been accepted. */ | |
637 | start = get_timer(0); | |
638 | ||
639 | do { | |
640 | mdelay(1); | |
641 | cpus_accepted = 0; | |
642 | ||
643 | for (i = 0; i < num_cpus; i++) { | |
644 | if (cur_cpu == i) | |
645 | continue; | |
646 | if (!read_callback(&ap_callbacks[i])) | |
647 | cpus_accepted++; | |
648 | } | |
649 | ||
650 | if (expire_ms && get_timer(start) >= expire_ms) { | |
651 | log(UCLASS_CPU, LOGL_CRIT, | |
652 | "AP call expired; %d/%d CPUs accepted\n", | |
653 | cpus_accepted, num_aps); | |
654 | return -ETIMEDOUT; | |
655 | } | |
656 | } while (cpus_accepted != num_aps); | |
657 | ||
658 | /* Make sure we can see any data written by the APs */ | |
659 | mfence(); | |
660 | ||
661 | return 0; | |
662 | } | |
663 | ||
c33aa352 SG |
664 | /** |
665 | * ap_wait_for_instruction() - Wait for and process requests from the main CPU | |
666 | * | |
667 | * This is called by APs (here, everything other than the main boot CPU) to | |
668 | * await instructions. They arrive in the form of a function call and argument, | |
669 | * which is then called. This uses a simple mailbox with atomic read/set | |
670 | * | |
671 | * @cpu: CPU that is waiting | |
672 | * @unused: Optional argument provided by struct mp_flight_record, not used here | |
673 | * @return Does not return | |
674 | */ | |
675 | static int ap_wait_for_instruction(struct udevice *cpu, void *unused) | |
676 | { | |
677 | struct mp_callback lcb; | |
678 | struct mp_callback **per_cpu_slot; | |
679 | ||
680 | if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) | |
681 | return 0; | |
682 | ||
df3dc209 | 683 | per_cpu_slot = &ap_callbacks[dev_seq(cpu)]; |
c33aa352 SG |
684 | |
685 | while (1) { | |
686 | struct mp_callback *cb = read_callback(per_cpu_slot); | |
687 | ||
688 | if (!cb) { | |
689 | asm ("pause"); | |
690 | continue; | |
691 | } | |
692 | ||
693 | /* Copy to local variable before using the value */ | |
694 | memcpy(&lcb, cb, sizeof(lcb)); | |
695 | mfence(); | |
696 | if (lcb.logical_cpu_number == MP_SELECT_ALL || | |
697 | lcb.logical_cpu_number == MP_SELECT_APS || | |
df3dc209 | 698 | dev_seq(cpu) == lcb.logical_cpu_number) |
c33aa352 SG |
699 | lcb.func(lcb.arg); |
700 | ||
701 | /* Indicate we are finished */ | |
702 | store_callback(per_cpu_slot, NULL); | |
703 | } | |
704 | ||
705 | return 0; | |
706 | } | |
707 | ||
e6248584 SG |
708 | static int mp_init_cpu(struct udevice *cpu, void *unused) |
709 | { | |
8a8d24bd | 710 | struct cpu_plat *plat = dev_get_parent_plat(cpu); |
e6248584 | 711 | |
e6248584 SG |
712 | plat->ucode_version = microcode_read_rev(); |
713 | plat->device_id = gd->arch.x86_device; | |
714 | ||
715 | return device_probe(cpu); | |
716 | } | |
717 | ||
718 | static struct mp_flight_record mp_steps[] = { | |
719 | MP_FR_BLOCK_APS(mp_init_cpu, NULL, mp_init_cpu, NULL), | |
c33aa352 | 720 | MP_FR_BLOCK_APS(ap_wait_for_instruction, NULL, NULL, NULL), |
e6248584 SG |
721 | }; |
722 | ||
84d3ed12 SG |
723 | int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg) |
724 | { | |
725 | struct mp_callback lcb = { | |
726 | .func = func, | |
727 | .arg = arg, | |
728 | .logical_cpu_number = cpu_select, | |
729 | }; | |
730 | struct udevice *dev; | |
731 | int num_cpus; | |
732 | int ret; | |
733 | ||
734 | ret = get_bsp(&dev, &num_cpus); | |
735 | if (ret < 0) | |
736 | return log_msg_ret("bsp", ret); | |
737 | if (cpu_select == MP_SELECT_ALL || cpu_select == MP_SELECT_BSP || | |
738 | cpu_select == ret) { | |
739 | /* Run on BSP first */ | |
740 | func(arg); | |
741 | } | |
742 | ||
743 | if (!IS_ENABLED(CONFIG_SMP_AP_WORK) || | |
744 | !(gd->flags & GD_FLG_SMP_READY)) { | |
745 | /* Allow use of this function on the BSP only */ | |
746 | if (cpu_select == MP_SELECT_BSP || !cpu_select) | |
747 | return 0; | |
748 | return -ENOTSUPP; | |
749 | } | |
750 | ||
751 | /* Allow up to 1 second for all APs to finish */ | |
752 | ret = run_ap_work(&lcb, dev, num_cpus, 1000 /* ms */); | |
753 | if (ret) | |
754 | return log_msg_ret("aps", ret); | |
755 | ||
756 | return 0; | |
757 | } | |
758 | ||
99a573fb SG |
759 | static void park_this_cpu(void *unused) |
760 | { | |
761 | stop_this_cpu(); | |
762 | } | |
763 | ||
764 | int mp_park_aps(void) | |
765 | { | |
766 | int ret; | |
767 | ||
768 | ret = mp_run_on_cpus(MP_SELECT_APS, park_this_cpu, NULL); | |
769 | if (ret) | |
770 | return log_ret(ret); | |
771 | ||
772 | return 0; | |
773 | } | |
774 | ||
0538d683 SG |
775 | int mp_first_cpu(int cpu_select) |
776 | { | |
777 | struct udevice *dev; | |
778 | int num_cpus; | |
779 | int ret; | |
780 | ||
781 | /* | |
782 | * This assumes that CPUs are numbered from 0. This function tries to | |
783 | * avoid assuming the CPU 0 is the boot CPU | |
784 | */ | |
785 | if (cpu_select == MP_SELECT_ALL) | |
786 | return 0; /* start with the first one */ | |
787 | ||
788 | ret = get_bsp(&dev, &num_cpus); | |
789 | if (ret < 0) | |
790 | return log_msg_ret("bsp", ret); | |
791 | ||
792 | /* Return boot CPU if requested */ | |
793 | if (cpu_select == MP_SELECT_BSP) | |
794 | return ret; | |
795 | ||
796 | /* Return something other than the boot CPU, if APs requested */ | |
797 | if (cpu_select == MP_SELECT_APS && num_cpus > 1) | |
798 | return ret == 0 ? 1 : 0; | |
799 | ||
800 | /* Try to check for an invalid value */ | |
801 | if (cpu_select < 0 || cpu_select >= num_cpus) | |
802 | return -EINVAL; | |
803 | ||
804 | return cpu_select; /* return the only selected one */ | |
805 | } | |
806 | ||
807 | int mp_next_cpu(int cpu_select, int prev_cpu) | |
808 | { | |
809 | struct udevice *dev; | |
810 | int num_cpus; | |
811 | int ret; | |
812 | int bsp; | |
813 | ||
814 | /* If we selected the BSP or a particular single CPU, we are done */ | |
815 | if (!IS_ENABLED(CONFIG_SMP_AP_WORK) || cpu_select == MP_SELECT_BSP || | |
816 | cpu_select >= 0) | |
817 | return -EFBIG; | |
818 | ||
819 | /* Must be doing MP_SELECT_ALL or MP_SELECT_APS; return the next CPU */ | |
820 | ret = get_bsp(&dev, &num_cpus); | |
821 | if (ret < 0) | |
822 | return log_msg_ret("bsp", ret); | |
823 | bsp = ret; | |
824 | ||
825 | /* Move to the next CPU */ | |
826 | assert(prev_cpu >= 0); | |
827 | ret = prev_cpu + 1; | |
828 | ||
829 | /* Skip the BSP if needed */ | |
830 | if (cpu_select == MP_SELECT_APS && ret == bsp) | |
831 | ret++; | |
832 | if (ret >= num_cpus) | |
833 | return -EFBIG; | |
834 | ||
835 | return ret; | |
836 | } | |
837 | ||
78d57d63 | 838 | int mp_init(void) |
45b5a378 | 839 | { |
a6c9fd4d | 840 | int num_aps, num_cpus; |
45b5a378 SG |
841 | atomic_t *ap_count; |
842 | struct udevice *cpu; | |
77a5e2d3 | 843 | int ret; |
45b5a378 | 844 | |
baaeb92c SG |
845 | if (IS_ENABLED(CONFIG_QFW)) { |
846 | ret = qemu_cpu_fixup(); | |
847 | if (ret) | |
848 | return ret; | |
849 | } | |
de752c5e | 850 | |
20b049e8 SG |
851 | ret = get_bsp(&cpu, &num_cpus); |
852 | if (ret < 0) { | |
45b5a378 SG |
853 | debug("Cannot init boot CPU: err=%d\n", ret); |
854 | return ret; | |
855 | } | |
856 | ||
6e6f4ce4 BM |
857 | if (num_cpus < 2) |
858 | debug("Warning: Only 1 CPU is detected\n"); | |
859 | ||
860 | ret = check_cpu_devices(num_cpus); | |
45b5a378 | 861 | if (ret) |
20b049e8 | 862 | log_warning("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n"); |
45b5a378 | 863 | |
c33aa352 SG |
864 | ap_callbacks = calloc(num_cpus, sizeof(struct mp_callback *)); |
865 | if (!ap_callbacks) | |
866 | return -ENOMEM; | |
867 | ||
45b5a378 | 868 | /* Copy needed parameters so that APs have a reference to the plan */ |
78d57d63 SG |
869 | mp_info.num_records = ARRAY_SIZE(mp_steps); |
870 | mp_info.records = mp_steps; | |
45b5a378 SG |
871 | |
872 | /* Load the SIPI vector */ | |
b28cecdf | 873 | ret = load_sipi_vector(&ap_count, num_cpus); |
45b5a378 | 874 | if (ap_count == NULL) |
7b140238 | 875 | return -ENOENT; |
45b5a378 SG |
876 | |
877 | /* | |
878 | * Make sure SIPI data hits RAM so the APs that come up will see | |
879 | * the startup code even if the caches are disabled | |
880 | */ | |
881 | wbinvd(); | |
882 | ||
883 | /* Start the APs providing number of APs and the cpus_entered field */ | |
6e6f4ce4 | 884 | num_aps = num_cpus - 1; |
45b5a378 SG |
885 | ret = start_aps(num_aps, ap_count); |
886 | if (ret) { | |
887 | mdelay(1000); | |
888 | debug("%d/%d eventually checked in?\n", atomic_read(ap_count), | |
889 | num_aps); | |
890 | return ret; | |
891 | } | |
892 | ||
893 | /* Walk the flight plan for the BSP */ | |
a6c9fd4d | 894 | ret = bsp_do_flight_plan(cpu, &mp_info, num_aps); |
45b5a378 SG |
895 | if (ret) { |
896 | debug("CPU init failed: err=%d\n", ret); | |
897 | return ret; | |
898 | } | |
db3a37c7 | 899 | gd->flags |= GD_FLG_SMP_READY; |
45b5a378 SG |
900 | |
901 | return 0; | |
902 | } |