1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * CPU Microcode Update Driver for Linux
9 * X86 CPU microcode early update for Linux:
15 * This driver allows to upgrade microcode on x86 processors.
18 #define pr_fmt(fmt) "microcode: " fmt
20 #include <linux/platform_device.h>
21 #include <linux/stop_machine.h>
22 #include <linux/syscore_ops.h>
23 #include <linux/miscdevice.h>
24 #include <linux/capability.h>
25 #include <linux/firmware.h>
26 #include <linux/cpumask.h>
27 #include <linux/kernel.h>
28 #include <linux/delay.h>
29 #include <linux/mutex.h>
30 #include <linux/cpu.h>
31 #include <linux/nmi.h>
36 #include <asm/cpu_device_id.h>
37 #include <asm/perf_event.h>
38 #include <asm/processor.h>
39 #include <asm/cmdline.h>
40 #include <asm/setup.h>
44 static struct microcode_ops *microcode_ops;
45 bool dis_ucode_ldr = true;
47 bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV);
48 module_param(force_minrev, bool, S_IRUSR | S_IWUSR);
53 * All non cpu-hotplug-callback call sites use:
55 * - cpus_read_lock/unlock() to synchronize with
56 * the cpu-hotplug-callback call sites.
58 * We guarantee that only a single cpu is being
59 * updated at any particular moment of time.
61 struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
64 * Those patch levels cannot be updated to newer ones and thus should be final.
66 static u32 final_levels[] = {
70 0, /* T-101 terminator */
73 struct early_load_data early_data;
76 * Check the current patch level on this CPU.
79 * - true: if update should stop
82 static bool amd_check_current_patch_level(void)
87 native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
89 levels = final_levels;
91 for (i = 0; levels[i]; i++) {
98 static bool __init check_loader_disabled_bsp(void)
100 static const char *__dis_opt_str = "dis_ucode_ldr";
101 const char *cmdline = boot_command_line;
102 const char *option = __dis_opt_str;
105 * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
106 * completely accurate as xen pv guests don't see that CPUID bit set but
107 * that's good enough as they don't land on the BSP path anyway.
109 if (native_cpuid_ecx(1) & BIT(31))
112 if (x86_cpuid_vendor() == X86_VENDOR_AMD) {
113 if (amd_check_current_patch_level())
117 if (cmdline_find_option_bool(cmdline, option) <= 0)
118 dis_ucode_ldr = false;
120 return dis_ucode_ldr;
123 void __init load_ucode_bsp(void)
125 unsigned int cpuid_1_eax;
131 cpuid_1_eax = native_cpuid_eax(1);
133 switch (x86_cpuid_vendor()) {
134 case X86_VENDOR_INTEL:
135 if (x86_family(cpuid_1_eax) < 6)
140 if (x86_family(cpuid_1_eax) < 0x10)
149 if (check_loader_disabled_bsp())
153 load_ucode_intel_bsp(&early_data);
155 load_ucode_amd_bsp(&early_data, cpuid_1_eax);
158 void load_ucode_ap(void)
160 unsigned int cpuid_1_eax;
165 cpuid_1_eax = native_cpuid_eax(1);
167 switch (x86_cpuid_vendor()) {
168 case X86_VENDOR_INTEL:
169 if (x86_family(cpuid_1_eax) >= 6)
170 load_ucode_intel_ap();
173 if (x86_family(cpuid_1_eax) >= 0x10)
174 load_ucode_amd_ap(cpuid_1_eax);
181 struct cpio_data __init find_microcode_in_initrd(const char *path)
183 #ifdef CONFIG_BLK_DEV_INITRD
184 unsigned long start = 0;
188 size = boot_params.hdr.ramdisk_size;
189 /* Early load on BSP has a temporary mapping. */
191 start = initrd_start_early;
193 #else /* CONFIG_X86_64 */
194 size = (unsigned long)boot_params.ext_ramdisk_size << 32;
195 size |= boot_params.hdr.ramdisk_size;
198 start = (unsigned long)boot_params.ext_ramdisk_image << 32;
199 start |= boot_params.hdr.ramdisk_image;
200 start += PAGE_OFFSET;
205 * Fixup the start address: after reserve_initrd() runs, initrd_start
206 * has the virtual address of the beginning of the initrd. It also
207 * possibly relocates the ramdisk. In either case, initrd_start contains
208 * the updated address so use that instead.
211 start = initrd_start;
213 return find_cpio_data(path, (void *)start, size, NULL);
214 #else /* !CONFIG_BLK_DEV_INITRD */
215 return (struct cpio_data){ NULL, 0, "" };
219 static void reload_early_microcode(unsigned int cpu)
223 vendor = x86_cpuid_vendor();
224 family = x86_cpuid_family();
227 case X86_VENDOR_INTEL:
229 reload_ucode_intel();
233 reload_ucode_amd(cpu);
240 /* fake device for request_firmware */
241 static struct platform_device *microcode_pdev;
243 #ifdef CONFIG_MICROCODE_LATE_LOADING
245 * Late loading dance. Why the heavy-handed stomp_machine effort?
247 * - HT siblings must be idle and not execute other code while the other sibling
248 * is loading microcode in order to avoid any negative interactions caused by
251 * - In addition, microcode update on the cores must be serialized until this
252 * requirement can be relaxed in the future. Right now, this is conservative
256 /* Spinwait with timeout */
258 /* Invoke the microcode_apply() callback */
260 /* Proceed without invoking the microcode_apply() callback */
264 struct microcode_ctrl {
265 enum sibling_ctrl ctrl;
266 enum ucode_state result;
267 unsigned int ctrl_cpu;
271 DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable);
272 static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
273 static atomic_t late_cpus_in, offline_in_nmi;
274 static unsigned int loops_per_usec;
275 static cpumask_t cpu_offline_mask;
277 static noinstr bool wait_for_cpus(atomic_t *cnt)
279 unsigned int timeout, loops;
281 WARN_ON_ONCE(raw_atomic_dec_return(cnt) < 0);
283 for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
284 if (!raw_atomic_read(cnt))
287 for (loops = 0; loops < loops_per_usec; loops++)
290 /* If invoked directly, tickle the NMI watchdog */
291 if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
292 instrumentation_begin();
293 touch_nmi_watchdog();
294 instrumentation_end();
297 /* Prevent the late comers from making progress and let them time out */
302 static noinstr bool wait_for_ctrl(void)
304 unsigned int timeout, loops;
306 for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
307 if (raw_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT)
310 for (loops = 0; loops < loops_per_usec; loops++)
313 /* If invoked directly, tickle the NMI watchdog */
314 if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
315 instrumentation_begin();
316 touch_nmi_watchdog();
317 instrumentation_end();
324 * Protected against instrumentation up to the point where the primary
325 * thread completed the update. See microcode_nmi_handler() for details.
327 static noinstr bool load_secondary_wait(unsigned int ctrl_cpu)
329 /* Initial rendezvous to ensure that all CPUs have arrived */
330 if (!wait_for_cpus(&late_cpus_in)) {
331 raw_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
336 * Wait for primary threads to complete. If one of them hangs due
337 * to the update, there is no way out. This is non-recoverable
338 * because the CPU might hold locks or resources and confuse the
339 * scheduler, watchdogs etc. There is no way to safely evacuate the
345 instrumentation_begin();
346 panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu);
347 instrumentation_end();
351 * Protected against instrumentation up to the point where the primary
352 * thread completed the update. See microcode_nmi_handler() for details.
354 static noinstr void load_secondary(unsigned int cpu)
356 unsigned int ctrl_cpu = raw_cpu_read(ucode_ctrl.ctrl_cpu);
357 enum ucode_state ret;
359 if (!load_secondary_wait(ctrl_cpu)) {
360 instrumentation_begin();
361 pr_err_once("load: %d CPUs timed out\n",
362 atomic_read(&late_cpus_in) - 1);
363 instrumentation_end();
367 /* Primary thread completed. Allow to invoke instrumentable code */
368 instrumentation_begin();
370 * If the primary succeeded then invoke the apply() callback,
371 * otherwise copy the state from the primary thread.
373 if (this_cpu_read(ucode_ctrl.ctrl) == SCTRL_APPLY)
374 ret = microcode_ops->apply_microcode(cpu);
376 ret = per_cpu(ucode_ctrl.result, ctrl_cpu);
378 this_cpu_write(ucode_ctrl.result, ret);
379 this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
380 instrumentation_end();
383 static void __load_primary(unsigned int cpu)
385 struct cpumask *secondaries = topology_sibling_cpumask(cpu);
386 enum sibling_ctrl ctrl;
387 enum ucode_state ret;
388 unsigned int sibling;
390 /* Initial rendezvous to ensure that all CPUs have arrived */
391 if (!wait_for_cpus(&late_cpus_in)) {
392 this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
393 pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
397 ret = microcode_ops->apply_microcode(cpu);
398 this_cpu_write(ucode_ctrl.result, ret);
399 this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
402 * If the update was successful, let the siblings run the apply()
403 * callback. If not, tell them it's done. This also covers the
404 * case where the CPU has uniform loading at package or system
405 * scope implemented but does not advertise it.
407 if (ret == UCODE_UPDATED || ret == UCODE_OK)
412 for_each_cpu(sibling, secondaries) {
414 per_cpu(ucode_ctrl.ctrl, sibling) = ctrl;
418 static bool kick_offline_cpus(unsigned int nr_offl)
420 unsigned int cpu, timeout;
422 for_each_cpu(cpu, &cpu_offline_mask) {
423 /* Enable the rendezvous handler and send NMI */
424 per_cpu(ucode_ctrl.nmi_enabled, cpu) = true;
425 apic_send_nmi_to_offline_cpu(cpu);
428 /* Wait for them to arrive */
429 for (timeout = 0; timeout < (USEC_PER_SEC / 2); timeout++) {
430 if (atomic_read(&offline_in_nmi) == nr_offl)
434 /* Let the others time out */
438 static void release_offline_cpus(void)
442 for_each_cpu(cpu, &cpu_offline_mask)
443 per_cpu(ucode_ctrl.ctrl, cpu) = SCTRL_DONE;
446 static void load_primary(unsigned int cpu)
448 unsigned int nr_offl = cpumask_weight(&cpu_offline_mask);
451 /* Kick soft-offlined SMT siblings if required */
453 proceed = kick_offline_cpus(nr_offl);
455 /* If the soft-offlined CPUs did not respond, abort */
459 /* Unconditionally release soft-offlined SMT siblings if required */
461 release_offline_cpus();
465 * Minimal stub rendezvous handler for soft-offlined CPUs which participate
466 * in the NMI rendezvous to protect against a concurrent NMI on affected
469 void noinstr microcode_offline_nmi_handler(void)
471 if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
473 raw_cpu_write(ucode_ctrl.nmi_enabled, false);
474 raw_cpu_write(ucode_ctrl.result, UCODE_OFFLINE);
475 raw_atomic_inc(&offline_in_nmi);
479 static noinstr bool microcode_update_handler(void)
481 unsigned int cpu = raw_smp_processor_id();
483 if (raw_cpu_read(ucode_ctrl.ctrl_cpu) == cpu) {
484 instrumentation_begin();
486 instrumentation_end();
491 instrumentation_begin();
492 touch_nmi_watchdog();
493 instrumentation_end();
499 * Protection against instrumentation is required for CPUs which are not
500 * safe against an NMI which is delivered to the secondary SMT sibling
501 * while the primary thread updates the microcode. Instrumentation can end
502 * up in #INT3, #DB and #PF. The IRET from those exceptions reenables NMI
503 * which is the opposite of what the NMI rendezvous is trying to achieve.
505 * The primary thread is safe versus instrumentation as the actual
506 * microcode update handles this correctly. It's only the sibling code
507 * path which must be NMI safe until the primary thread completed the
510 bool noinstr microcode_nmi_handler(void)
512 if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
515 raw_cpu_write(ucode_ctrl.nmi_enabled, false);
516 return microcode_update_handler();
519 static int load_cpus_stopped(void *unused)
521 if (microcode_ops->use_nmi) {
522 /* Enable the NMI handler and raise NMI */
523 this_cpu_write(ucode_ctrl.nmi_enabled, true);
524 apic->send_IPI(smp_processor_id(), NMI_VECTOR);
526 /* Just invoke the handler directly */
527 microcode_update_handler();
532 static int load_late_stop_cpus(bool is_safe)
534 unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
535 unsigned int nr_offl, offline = 0;
536 int old_rev = boot_cpu_data.microcode;
537 struct cpuinfo_x86 prev_info;
540 pr_err("Late microcode loading without minimal revision check.\n");
541 pr_err("You should switch to early loading, if possible.\n");
544 atomic_set(&late_cpus_in, num_online_cpus());
545 atomic_set(&offline_in_nmi, 0);
546 loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000);
549 * Take a snapshot before the microcode update in order to compare and
550 * check whether any bits changed after an update.
552 store_cpu_caps(&prev_info);
554 if (microcode_ops->use_nmi)
555 static_branch_enable_cpuslocked(µcode_nmi_handler_enable);
557 stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);
559 if (microcode_ops->use_nmi)
560 static_branch_disable_cpuslocked(µcode_nmi_handler_enable);
562 /* Analyze the results */
563 for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
564 switch (per_cpu(ucode_ctrl.result, cpu)) {
565 case UCODE_UPDATED: updated++; break;
566 case UCODE_TIMEOUT: timedout++; break;
567 case UCODE_OK: siblings++; break;
568 case UCODE_OFFLINE: offline++; break;
569 default: failed++; break;
573 if (microcode_ops->finalize_late_load)
574 microcode_ops->finalize_late_load(!updated);
577 /* Nothing changed. */
578 if (!failed && !timedout)
581 nr_offl = cpumask_weight(&cpu_offline_mask);
582 if (offline < nr_offl) {
583 pr_warn("%u offline siblings did not respond.\n",
584 nr_offl - atomic_read(&offline_in_nmi));
587 pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
592 if (!is_safe || failed || timedout)
593 add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
595 pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
596 if (failed || timedout) {
597 pr_err("load incomplete. %u CPUs timed out or failed\n",
598 num_online_cpus() - (updated + siblings));
600 pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
601 microcode_check(&prev_info);
603 return updated + siblings == num_online_cpus() ? 0 : -EIO;
607 * This function does two things:
609 * 1) Ensure that all required CPUs which are present and have been booted
612 * To pass this check, all primary threads must be online.
614 * If the microcode load is not safe against NMI then all SMT threads
615 * must be online as well because they still react to NMIs when they are
616 * soft-offlined and parked in one of the play_dead() variants. So if a
617 * NMI hits while the primary thread updates the microcode the resulting
618 * behaviour is undefined. The default play_dead() implementation on
619 * modern CPUs uses MWAIT, which is also not guaranteed to be safe
620 * against a microcode update which affects MWAIT.
622 * As soft-offlined CPUs still react on NMIs, the SMT sibling
623 * restriction can be lifted when the vendor driver signals to use NMI
624 * for rendezvous and the APIC provides a mechanism to send an NMI to a
625 * soft-offlined CPU. The soft-offlined CPUs are then able to
626 * participate in the rendezvous in a trivial stub handler.
628 * 2) Initialize the per CPU control structure and create a cpumask
629 * which contains "offline"; secondary threads, so they can be handled
630 * correctly by a control CPU.
632 static bool setup_cpus(void)
634 struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT, .result = -1, };
635 bool allow_smt_offline;
638 allow_smt_offline = microcode_ops->nmi_safe ||
639 (microcode_ops->use_nmi && apic->nmi_to_offline_cpu);
641 cpumask_clear(&cpu_offline_mask);
643 for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
645 * Offline CPUs sit in one of the play_dead() functions
646 * with interrupts disabled, but they still react on NMIs
647 * and execute arbitrary code. Also MWAIT being updated
648 * while the offline CPU sits there is not necessarily safe
649 * on all CPU variants.
651 * Mark them in the offline_cpus mask which will be handled
652 * by CPU0 later in the update process.
654 * Ensure that the primary thread is online so that it is
655 * guaranteed that all cores are updated.
657 if (!cpu_online(cpu)) {
658 if (topology_is_primary_thread(cpu) || !allow_smt_offline) {
659 pr_err("CPU %u not online, loading aborted\n", cpu);
662 cpumask_set_cpu(cpu, &cpu_offline_mask);
663 per_cpu(ucode_ctrl, cpu) = ctrl;
668 * Initialize the per CPU state. This is core scope for now,
669 * but prepared to take package or system scope into account.
671 ctrl.ctrl_cpu = cpumask_first(topology_sibling_cpumask(cpu));
672 per_cpu(ucode_ctrl, cpu) = ctrl;
677 static int load_late_locked(void)
682 switch (microcode_ops->request_microcode_fw(0, µcode_pdev->dev)) {
684 return load_late_stop_cpus(false);
686 return load_late_stop_cpus(true);
694 static ssize_t reload_store(struct device *dev,
695 struct device_attribute *attr,
696 const char *buf, size_t size)
701 ret = kstrtoul(buf, 0, &val);
706 ret = load_late_locked();
712 static DEVICE_ATTR_WO(reload);
715 static ssize_t version_show(struct device *dev,
716 struct device_attribute *attr, char *buf)
718 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
720 return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
723 static ssize_t processor_flags_show(struct device *dev,
724 struct device_attribute *attr, char *buf)
726 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
728 return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
731 static DEVICE_ATTR_RO(version);
732 static DEVICE_ATTR_RO(processor_flags);
734 static struct attribute *mc_default_attrs[] = {
735 &dev_attr_version.attr,
736 &dev_attr_processor_flags.attr,
740 static const struct attribute_group mc_attr_group = {
741 .attrs = mc_default_attrs,
745 static void microcode_fini_cpu(int cpu)
747 if (microcode_ops->microcode_fini_cpu)
748 microcode_ops->microcode_fini_cpu(cpu);
752 * microcode_bsp_resume - Update boot CPU microcode during resume.
754 void microcode_bsp_resume(void)
756 int cpu = smp_processor_id();
757 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
760 microcode_ops->apply_microcode(cpu);
762 reload_early_microcode(cpu);
765 static struct syscore_ops mc_syscore_ops = {
766 .resume = microcode_bsp_resume,
769 static int mc_cpu_online(unsigned int cpu)
771 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
772 struct device *dev = get_cpu_device(cpu);
774 memset(uci, 0, sizeof(*uci));
776 microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
777 cpu_data(cpu).microcode = uci->cpu_sig.rev;
779 boot_cpu_data.microcode = uci->cpu_sig.rev;
781 if (sysfs_create_group(&dev->kobj, &mc_attr_group))
782 pr_err("Failed to create group for CPU%d\n", cpu);
786 static int mc_cpu_down_prep(unsigned int cpu)
788 struct device *dev = get_cpu_device(cpu);
790 microcode_fini_cpu(cpu);
791 sysfs_remove_group(&dev->kobj, &mc_attr_group);
795 static struct attribute *cpu_root_microcode_attrs[] = {
796 #ifdef CONFIG_MICROCODE_LATE_LOADING
797 &dev_attr_reload.attr,
802 static const struct attribute_group cpu_root_microcode_group = {
804 .attrs = cpu_root_microcode_attrs,
807 static int __init microcode_init(void)
809 struct device *dev_root;
810 struct cpuinfo_x86 *c = &boot_cpu_data;
816 if (c->x86_vendor == X86_VENDOR_INTEL)
817 microcode_ops = init_intel_microcode();
818 else if (c->x86_vendor == X86_VENDOR_AMD)
819 microcode_ops = init_amd_microcode();
821 pr_err("no support for this CPU vendor\n");
826 pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev));
828 if (early_data.new_rev)
829 pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev);
831 microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0);
832 if (IS_ERR(microcode_pdev))
833 return PTR_ERR(microcode_pdev);
835 dev_root = bus_get_dev_root(&cpu_subsys);
837 error = sysfs_create_group(&dev_root->kobj, &cpu_root_microcode_group);
838 put_device(dev_root);
840 pr_err("Error creating microcode group!\n");
845 register_syscore_ops(&mc_syscore_ops);
846 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
847 mc_cpu_online, mc_cpu_down_prep);
852 platform_device_unregister(microcode_pdev);
856 late_initcall(microcode_init);