1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * CPU Microcode Update Driver for Linux
9 * X86 CPU microcode early update for Linux:
15 * This driver allows to upgrade microcode on x86 processors.
18 #define pr_fmt(fmt) "microcode: " fmt
20 #include <linux/platform_device.h>
21 #include <linux/stop_machine.h>
22 #include <linux/syscore_ops.h>
23 #include <linux/miscdevice.h>
24 #include <linux/capability.h>
25 #include <linux/firmware.h>
26 #include <linux/cpumask.h>
27 #include <linux/kernel.h>
28 #include <linux/delay.h>
29 #include <linux/mutex.h>
30 #include <linux/cpu.h>
31 #include <linux/nmi.h>
36 #include <asm/cpu_device_id.h>
37 #include <asm/perf_event.h>
38 #include <asm/processor.h>
39 #include <asm/cmdline.h>
40 #include <asm/setup.h>
44 static struct microcode_ops *microcode_ops;
45 bool dis_ucode_ldr = true;
47 bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV);
48 module_param(force_minrev, bool, S_IRUSR | S_IWUSR);
53 * All non cpu-hotplug-callback call sites use:
55 * - cpus_read_lock/unlock() to synchronize with
56 * the cpu-hotplug-callback call sites.
58 * We guarantee that only a single cpu is being
59 * updated at any particular moment of time.
61 struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
64 struct cpu_signature *cpu_sig;
69 * Those patch levels cannot be updated to newer ones and thus should be final.
71 static u32 final_levels[] = {
75 0, /* T-101 terminator */
78 struct early_load_data early_data;
81 * Check the current patch level on this CPU.
84 * - true: if update should stop
87 static bool amd_check_current_patch_level(void)
92 native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
94 levels = final_levels;
96 for (i = 0; levels[i]; i++) {
103 static bool __init check_loader_disabled_bsp(void)
105 static const char *__dis_opt_str = "dis_ucode_ldr";
106 const char *cmdline = boot_command_line;
107 const char *option = __dis_opt_str;
110 * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
111 * completely accurate as xen pv guests don't see that CPUID bit set but
112 * that's good enough as they don't land on the BSP path anyway.
114 if (native_cpuid_ecx(1) & BIT(31))
117 if (x86_cpuid_vendor() == X86_VENDOR_AMD) {
118 if (amd_check_current_patch_level())
122 if (cmdline_find_option_bool(cmdline, option) <= 0)
123 dis_ucode_ldr = false;
125 return dis_ucode_ldr;
128 void __init load_ucode_bsp(void)
130 unsigned int cpuid_1_eax;
136 cpuid_1_eax = native_cpuid_eax(1);
138 switch (x86_cpuid_vendor()) {
139 case X86_VENDOR_INTEL:
140 if (x86_family(cpuid_1_eax) < 6)
145 if (x86_family(cpuid_1_eax) < 0x10)
154 if (check_loader_disabled_bsp())
158 load_ucode_intel_bsp(&early_data);
160 load_ucode_amd_bsp(&early_data, cpuid_1_eax);
163 void load_ucode_ap(void)
165 unsigned int cpuid_1_eax;
170 cpuid_1_eax = native_cpuid_eax(1);
172 switch (x86_cpuid_vendor()) {
173 case X86_VENDOR_INTEL:
174 if (x86_family(cpuid_1_eax) >= 6)
175 load_ucode_intel_ap();
178 if (x86_family(cpuid_1_eax) >= 0x10)
179 load_ucode_amd_ap(cpuid_1_eax);
186 struct cpio_data __init find_microcode_in_initrd(const char *path)
188 #ifdef CONFIG_BLK_DEV_INITRD
189 unsigned long start = 0;
193 size = boot_params.hdr.ramdisk_size;
194 /* Early load on BSP has a temporary mapping. */
196 start = initrd_start_early;
198 #else /* CONFIG_X86_64 */
199 size = (unsigned long)boot_params.ext_ramdisk_size << 32;
200 size |= boot_params.hdr.ramdisk_size;
203 start = (unsigned long)boot_params.ext_ramdisk_image << 32;
204 start |= boot_params.hdr.ramdisk_image;
205 start += PAGE_OFFSET;
210 * Fixup the start address: after reserve_initrd() runs, initrd_start
211 * has the virtual address of the beginning of the initrd. It also
212 * possibly relocates the ramdisk. In either case, initrd_start contains
213 * the updated address so use that instead.
216 start = initrd_start;
218 return find_cpio_data(path, (void *)start, size, NULL);
219 #else /* !CONFIG_BLK_DEV_INITRD */
220 return (struct cpio_data){ NULL, 0, "" };
224 static void reload_early_microcode(unsigned int cpu)
228 vendor = x86_cpuid_vendor();
229 family = x86_cpuid_family();
232 case X86_VENDOR_INTEL:
234 reload_ucode_intel();
238 reload_ucode_amd(cpu);
245 /* fake device for request_firmware */
246 static struct platform_device *microcode_pdev;
248 #ifdef CONFIG_MICROCODE_LATE_LOADING
250 * Late loading dance. Why the heavy-handed stomp_machine effort?
252 * - HT siblings must be idle and not execute other code while the other sibling
253 * is loading microcode in order to avoid any negative interactions caused by
256 * - In addition, microcode update on the cores must be serialized until this
257 * requirement can be relaxed in the future. Right now, this is conservative
261 /* Spinwait with timeout */
263 /* Invoke the microcode_apply() callback */
265 /* Proceed without invoking the microcode_apply() callback */
269 struct microcode_ctrl {
270 enum sibling_ctrl ctrl;
271 enum ucode_state result;
272 unsigned int ctrl_cpu;
276 DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable);
277 static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
278 static atomic_t late_cpus_in, offline_in_nmi;
279 static unsigned int loops_per_usec;
280 static cpumask_t cpu_offline_mask;
282 static noinstr bool wait_for_cpus(atomic_t *cnt)
284 unsigned int timeout, loops;
286 WARN_ON_ONCE(raw_atomic_dec_return(cnt) < 0);
288 for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
289 if (!raw_atomic_read(cnt))
292 for (loops = 0; loops < loops_per_usec; loops++)
295 /* If invoked directly, tickle the NMI watchdog */
296 if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
297 instrumentation_begin();
298 touch_nmi_watchdog();
299 instrumentation_end();
302 /* Prevent the late comers from making progress and let them time out */
307 static noinstr bool wait_for_ctrl(void)
309 unsigned int timeout, loops;
311 for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
312 if (raw_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT)
315 for (loops = 0; loops < loops_per_usec; loops++)
318 /* If invoked directly, tickle the NMI watchdog */
319 if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) {
320 instrumentation_begin();
321 touch_nmi_watchdog();
322 instrumentation_end();
329 * Protected against instrumentation up to the point where the primary
330 * thread completed the update. See microcode_nmi_handler() for details.
332 static noinstr bool load_secondary_wait(unsigned int ctrl_cpu)
334 /* Initial rendezvous to ensure that all CPUs have arrived */
335 if (!wait_for_cpus(&late_cpus_in)) {
336 raw_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
341 * Wait for primary threads to complete. If one of them hangs due
342 * to the update, there is no way out. This is non-recoverable
343 * because the CPU might hold locks or resources and confuse the
344 * scheduler, watchdogs etc. There is no way to safely evacuate the
350 instrumentation_begin();
351 panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu);
352 instrumentation_end();
356 * Protected against instrumentation up to the point where the primary
357 * thread completed the update. See microcode_nmi_handler() for details.
359 static noinstr void load_secondary(unsigned int cpu)
361 unsigned int ctrl_cpu = raw_cpu_read(ucode_ctrl.ctrl_cpu);
362 enum ucode_state ret;
364 if (!load_secondary_wait(ctrl_cpu)) {
365 instrumentation_begin();
366 pr_err_once("load: %d CPUs timed out\n",
367 atomic_read(&late_cpus_in) - 1);
368 instrumentation_end();
372 /* Primary thread completed. Allow to invoke instrumentable code */
373 instrumentation_begin();
375 * If the primary succeeded then invoke the apply() callback,
376 * otherwise copy the state from the primary thread.
378 if (this_cpu_read(ucode_ctrl.ctrl) == SCTRL_APPLY)
379 ret = microcode_ops->apply_microcode(cpu);
381 ret = per_cpu(ucode_ctrl.result, ctrl_cpu);
383 this_cpu_write(ucode_ctrl.result, ret);
384 this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
385 instrumentation_end();
388 static void __load_primary(unsigned int cpu)
390 struct cpumask *secondaries = topology_sibling_cpumask(cpu);
391 enum sibling_ctrl ctrl;
392 enum ucode_state ret;
393 unsigned int sibling;
395 /* Initial rendezvous to ensure that all CPUs have arrived */
396 if (!wait_for_cpus(&late_cpus_in)) {
397 this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
398 pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
402 ret = microcode_ops->apply_microcode(cpu);
403 this_cpu_write(ucode_ctrl.result, ret);
404 this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
407 * If the update was successful, let the siblings run the apply()
408 * callback. If not, tell them it's done. This also covers the
409 * case where the CPU has uniform loading at package or system
410 * scope implemented but does not advertise it.
412 if (ret == UCODE_UPDATED || ret == UCODE_OK)
417 for_each_cpu(sibling, secondaries) {
419 per_cpu(ucode_ctrl.ctrl, sibling) = ctrl;
423 static bool kick_offline_cpus(unsigned int nr_offl)
425 unsigned int cpu, timeout;
427 for_each_cpu(cpu, &cpu_offline_mask) {
428 /* Enable the rendezvous handler and send NMI */
429 per_cpu(ucode_ctrl.nmi_enabled, cpu) = true;
430 apic_send_nmi_to_offline_cpu(cpu);
433 /* Wait for them to arrive */
434 for (timeout = 0; timeout < (USEC_PER_SEC / 2); timeout++) {
435 if (atomic_read(&offline_in_nmi) == nr_offl)
439 /* Let the others time out */
443 static void release_offline_cpus(void)
447 for_each_cpu(cpu, &cpu_offline_mask)
448 per_cpu(ucode_ctrl.ctrl, cpu) = SCTRL_DONE;
451 static void load_primary(unsigned int cpu)
453 unsigned int nr_offl = cpumask_weight(&cpu_offline_mask);
456 /* Kick soft-offlined SMT siblings if required */
458 proceed = kick_offline_cpus(nr_offl);
460 /* If the soft-offlined CPUs did not respond, abort */
464 /* Unconditionally release soft-offlined SMT siblings if required */
466 release_offline_cpus();
470 * Minimal stub rendezvous handler for soft-offlined CPUs which participate
471 * in the NMI rendezvous to protect against a concurrent NMI on affected
474 void noinstr microcode_offline_nmi_handler(void)
476 if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
478 raw_cpu_write(ucode_ctrl.nmi_enabled, false);
479 raw_cpu_write(ucode_ctrl.result, UCODE_OFFLINE);
480 raw_atomic_inc(&offline_in_nmi);
484 static noinstr bool microcode_update_handler(void)
486 unsigned int cpu = raw_smp_processor_id();
488 if (raw_cpu_read(ucode_ctrl.ctrl_cpu) == cpu) {
489 instrumentation_begin();
491 instrumentation_end();
496 instrumentation_begin();
497 touch_nmi_watchdog();
498 instrumentation_end();
504 * Protection against instrumentation is required for CPUs which are not
505 * safe against an NMI which is delivered to the secondary SMT sibling
506 * while the primary thread updates the microcode. Instrumentation can end
507 * up in #INT3, #DB and #PF. The IRET from those exceptions reenables NMI
508 * which is the opposite of what the NMI rendezvous is trying to achieve.
510 * The primary thread is safe versus instrumentation as the actual
511 * microcode update handles this correctly. It's only the sibling code
512 * path which must be NMI safe until the primary thread completed the
515 bool noinstr microcode_nmi_handler(void)
517 if (!raw_cpu_read(ucode_ctrl.nmi_enabled))
520 raw_cpu_write(ucode_ctrl.nmi_enabled, false);
521 return microcode_update_handler();
524 static int load_cpus_stopped(void *unused)
526 if (microcode_ops->use_nmi) {
527 /* Enable the NMI handler and raise NMI */
528 this_cpu_write(ucode_ctrl.nmi_enabled, true);
529 apic->send_IPI(smp_processor_id(), NMI_VECTOR);
531 /* Just invoke the handler directly */
532 microcode_update_handler();
537 static int load_late_stop_cpus(bool is_safe)
539 unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
540 unsigned int nr_offl, offline = 0;
541 int old_rev = boot_cpu_data.microcode;
542 struct cpuinfo_x86 prev_info;
545 pr_err("Late microcode loading without minimal revision check.\n");
546 pr_err("You should switch to early loading, if possible.\n");
549 atomic_set(&late_cpus_in, num_online_cpus());
550 atomic_set(&offline_in_nmi, 0);
551 loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000);
554 * Take a snapshot before the microcode update in order to compare and
555 * check whether any bits changed after an update.
557 store_cpu_caps(&prev_info);
559 if (microcode_ops->use_nmi)
560 static_branch_enable_cpuslocked(µcode_nmi_handler_enable);
562 stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);
564 if (microcode_ops->use_nmi)
565 static_branch_disable_cpuslocked(µcode_nmi_handler_enable);
567 /* Analyze the results */
568 for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
569 switch (per_cpu(ucode_ctrl.result, cpu)) {
570 case UCODE_UPDATED: updated++; break;
571 case UCODE_TIMEOUT: timedout++; break;
572 case UCODE_OK: siblings++; break;
573 case UCODE_OFFLINE: offline++; break;
574 default: failed++; break;
578 if (microcode_ops->finalize_late_load)
579 microcode_ops->finalize_late_load(!updated);
582 /* Nothing changed. */
583 if (!failed && !timedout)
586 nr_offl = cpumask_weight(&cpu_offline_mask);
587 if (offline < nr_offl) {
588 pr_warn("%u offline siblings did not respond.\n",
589 nr_offl - atomic_read(&offline_in_nmi));
592 pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
597 if (!is_safe || failed || timedout)
598 add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
600 pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
601 if (failed || timedout) {
602 pr_err("load incomplete. %u CPUs timed out or failed\n",
603 num_online_cpus() - (updated + siblings));
605 pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
606 microcode_check(&prev_info);
608 return updated + siblings == num_online_cpus() ? 0 : -EIO;
612 * This function does two things:
614 * 1) Ensure that all required CPUs which are present and have been booted
617 * To pass this check, all primary threads must be online.
619 * If the microcode load is not safe against NMI then all SMT threads
620 * must be online as well because they still react to NMIs when they are
621 * soft-offlined and parked in one of the play_dead() variants. So if a
622 * NMI hits while the primary thread updates the microcode the resulting
623 * behaviour is undefined. The default play_dead() implementation on
624 * modern CPUs uses MWAIT, which is also not guaranteed to be safe
625 * against a microcode update which affects MWAIT.
627 * As soft-offlined CPUs still react on NMIs, the SMT sibling
628 * restriction can be lifted when the vendor driver signals to use NMI
629 * for rendezvous and the APIC provides a mechanism to send an NMI to a
630 * soft-offlined CPU. The soft-offlined CPUs are then able to
631 * participate in the rendezvous in a trivial stub handler.
633 * 2) Initialize the per CPU control structure and create a cpumask
634 * which contains "offline"; secondary threads, so they can be handled
635 * correctly by a control CPU.
637 static bool setup_cpus(void)
639 struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT, .result = -1, };
640 bool allow_smt_offline;
643 allow_smt_offline = microcode_ops->nmi_safe ||
644 (microcode_ops->use_nmi && apic->nmi_to_offline_cpu);
646 cpumask_clear(&cpu_offline_mask);
648 for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
650 * Offline CPUs sit in one of the play_dead() functions
651 * with interrupts disabled, but they still react on NMIs
652 * and execute arbitrary code. Also MWAIT being updated
653 * while the offline CPU sits there is not necessarily safe
654 * on all CPU variants.
656 * Mark them in the offline_cpus mask which will be handled
657 * by CPU0 later in the update process.
659 * Ensure that the primary thread is online so that it is
660 * guaranteed that all cores are updated.
662 if (!cpu_online(cpu)) {
663 if (topology_is_primary_thread(cpu) || !allow_smt_offline) {
664 pr_err("CPU %u not online, loading aborted\n", cpu);
667 cpumask_set_cpu(cpu, &cpu_offline_mask);
668 per_cpu(ucode_ctrl, cpu) = ctrl;
673 * Initialize the per CPU state. This is core scope for now,
674 * but prepared to take package or system scope into account.
676 ctrl.ctrl_cpu = cpumask_first(topology_sibling_cpumask(cpu));
677 per_cpu(ucode_ctrl, cpu) = ctrl;
682 static int load_late_locked(void)
687 switch (microcode_ops->request_microcode_fw(0, µcode_pdev->dev)) {
689 return load_late_stop_cpus(false);
691 return load_late_stop_cpus(true);
699 static ssize_t reload_store(struct device *dev,
700 struct device_attribute *attr,
701 const char *buf, size_t size)
706 ret = kstrtoul(buf, 0, &val);
711 ret = load_late_locked();
717 static DEVICE_ATTR_WO(reload);
720 static ssize_t version_show(struct device *dev,
721 struct device_attribute *attr, char *buf)
723 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
725 return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
728 static ssize_t processor_flags_show(struct device *dev,
729 struct device_attribute *attr, char *buf)
731 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
733 return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
736 static DEVICE_ATTR_RO(version);
737 static DEVICE_ATTR_RO(processor_flags);
739 static struct attribute *mc_default_attrs[] = {
740 &dev_attr_version.attr,
741 &dev_attr_processor_flags.attr,
745 static const struct attribute_group mc_attr_group = {
746 .attrs = mc_default_attrs,
750 static void microcode_fini_cpu(int cpu)
752 if (microcode_ops->microcode_fini_cpu)
753 microcode_ops->microcode_fini_cpu(cpu);
757 * microcode_bsp_resume - Update boot CPU microcode during resume.
759 void microcode_bsp_resume(void)
761 int cpu = smp_processor_id();
762 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
765 microcode_ops->apply_microcode(cpu);
767 reload_early_microcode(cpu);
770 static struct syscore_ops mc_syscore_ops = {
771 .resume = microcode_bsp_resume,
774 static int mc_cpu_online(unsigned int cpu)
776 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
777 struct device *dev = get_cpu_device(cpu);
779 memset(uci, 0, sizeof(*uci));
781 microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
782 cpu_data(cpu).microcode = uci->cpu_sig.rev;
784 boot_cpu_data.microcode = uci->cpu_sig.rev;
786 if (sysfs_create_group(&dev->kobj, &mc_attr_group))
787 pr_err("Failed to create group for CPU%d\n", cpu);
791 static int mc_cpu_down_prep(unsigned int cpu)
793 struct device *dev = get_cpu_device(cpu);
795 microcode_fini_cpu(cpu);
796 sysfs_remove_group(&dev->kobj, &mc_attr_group);
800 static struct attribute *cpu_root_microcode_attrs[] = {
801 #ifdef CONFIG_MICROCODE_LATE_LOADING
802 &dev_attr_reload.attr,
807 static const struct attribute_group cpu_root_microcode_group = {
809 .attrs = cpu_root_microcode_attrs,
812 static int __init microcode_init(void)
814 struct device *dev_root;
815 struct cpuinfo_x86 *c = &boot_cpu_data;
821 if (c->x86_vendor == X86_VENDOR_INTEL)
822 microcode_ops = init_intel_microcode();
823 else if (c->x86_vendor == X86_VENDOR_AMD)
824 microcode_ops = init_amd_microcode();
826 pr_err("no support for this CPU vendor\n");
831 pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev));
833 if (early_data.new_rev)
834 pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev);
836 microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0);
837 if (IS_ERR(microcode_pdev))
838 return PTR_ERR(microcode_pdev);
840 dev_root = bus_get_dev_root(&cpu_subsys);
842 error = sysfs_create_group(&dev_root->kobj, &cpu_root_microcode_group);
843 put_device(dev_root);
845 pr_err("Error creating microcode group!\n");
850 register_syscore_ops(&mc_syscore_ops);
851 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
852 mc_cpu_online, mc_cpu_down_prep);
857 platform_device_unregister(microcode_pdev);
861 late_initcall(microcode_init);