1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright 2020-21 IBM Corp.
6 #define pr_fmt(fmt) "vas: " fmt
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/types.h>
12 #include <linux/delay.h>
13 #include <linux/slab.h>
14 #include <linux/interrupt.h>
15 #include <linux/irqdomain.h>
16 #include <asm/machdep.h>
17 #include <asm/hvcall.h>
18 #include <asm/plpar_wrappers.h>
19 #include <asm/firmware.h>
24 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
25 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul
26 /* The hypervisor allows one credit per window right now */
27 #define DEF_WIN_CREDS 1
29 static struct vas_all_caps caps_all;
30 static bool copypaste_feat;
31 static struct hv_vas_cop_feat_caps hv_cop_caps;
33 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
34 static DEFINE_MUTEX(vas_pseries_mutex);
35 static bool migration_in_progress;
37 static long hcall_return_busy_check(long rc)
39 /* Check if we are stalled for some time */
40 if (H_IS_LONG_BUSY(rc)) {
43 * Allocate, Modify and Deallocate HCALLs returns
44 * H_LONG_BUSY_ORDER_1_MSEC or H_LONG_BUSY_ORDER_10_MSEC
45 * for the long delay. So the sleep time should always
46 * be either 1 or 10msecs, but in case if the HCALL
47 * returns the long delay > 10 msecs, clamp the sleep
50 ms = clamp(get_longbusy_msecs(rc), 1, 10);
53 * msleep() will often sleep at least 20 msecs even
54 * though the hypervisor suggests that the OS reissue
55 * HCALLs after 1 or 10msecs. Also the delay hint from
56 * the HCALL is just a suggestion. So OK to pause for
57 * less time than the hinted delay. Use usleep_range()
58 * to ensure we don't sleep much longer than actually
61 usleep_range(ms * (USEC_PER_MSEC / 10), ms * USEC_PER_MSEC);
63 } else if (rc == H_BUSY) {
71 * Allocate VAS window hcall
73 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
74 u8 wintype, u16 credits)
76 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
80 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
81 credits, domain[0], domain[1], domain[2],
82 domain[3], domain[4], domain[5]);
84 rc = hcall_return_busy_check(rc);
85 } while (rc == H_BUSY);
87 if (rc == H_SUCCESS) {
88 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
89 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
92 win->vas_win.winid = retbuf[0];
93 win->win_addr = retbuf[1];
94 win->complete_irq = retbuf[2];
95 win->fault_irq = retbuf[3];
99 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
100 rc, wintype, credits);
106 * Deallocate VAS window hcall.
108 static int h_deallocate_vas_window(u64 winid)
113 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
115 rc = hcall_return_busy_check(rc);
116 } while (rc == H_BUSY);
121 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
128 * After the window is opened with allocate window hcall, configure it
129 * with flags and LPAR PID before using.
131 static int h_modify_vas_window(struct pseries_vas_window *win)
136 * AMR value is not supported in Linux VAS implementation.
137 * The hypervisor ignores it if 0 is passed.
140 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
141 win->vas_win.winid, win->pid, 0,
142 VAS_MOD_WIN_FLAGS, 0);
144 rc = hcall_return_busy_check(rc);
145 } while (rc == H_BUSY);
150 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
151 rc, win->vas_win.winid, win->pid);
156 * This hcall is used to determine the capabilities from the hypervisor.
157 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
158 * @query_type: If 0 is passed, the hypervisor returns the overall
159 * capabilities which provides all feature(s) that are
160 * available. Then query the hypervisor to get the
161 * corresponding capabilities for the specific feature.
162 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
163 * and VAS GZIP Default capabilities.
164 * H_QUERY_NX_CAPABILITIES provides NX GZIP
166 * @result: Return buffer to save capabilities.
168 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
172 rc = plpar_hcall_norets(hcall, query_type, result);
177 /* H_FUNCTION means HV does not support VAS so don't print an error */
178 if (rc != H_FUNCTION) {
179 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
180 (hcall == H_QUERY_VAS_CAPABILITIES) ?
181 "H_QUERY_VAS_CAPABILITIES" :
182 "H_QUERY_NX_CAPABILITIES",
183 rc, query_type, result);
188 EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
191 * hcall to get fault CRB from the hypervisor.
193 static int h_get_nx_fault(u32 winid, u64 buffer)
197 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
202 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
209 * Handle the fault interrupt.
210 * When the fault interrupt is received for each window, query the
211 * hypervisor to get the fault CRB on the specific fault. Then
212 * process the CRB by updating CSB or send signal if the user space
214 * Note: The hypervisor forwards an interrupt for each fault request.
215 * So one fault CRB to process for each H_GET_NX_FAULT hcall.
217 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
219 struct pseries_vas_window *txwin = data;
220 struct coprocessor_request_block crb;
221 struct vas_user_win_ref *tsk_ref;
224 while (atomic_read(&txwin->pending_faults)) {
225 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
227 tsk_ref = &txwin->vas_win.task_ref;
229 vas_update_csb(&crb, tsk_ref);
231 atomic_dec(&txwin->pending_faults);
238 * irq_default_primary_handler() can be used only with IRQF_ONESHOT
239 * which disables IRQ before executing the thread handler and enables
240 * it after. But this disabling interrupt sets the VAS IRQ OFF
241 * state in the hypervisor. If the NX generates fault interrupt
242 * during this window, the hypervisor will not deliver this
243 * interrupt to the LPAR. So use VAS specific IRQ handler instead
244 * of calling the default primary handler.
246 static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
248 struct pseries_vas_window *txwin = data;
251 * The thread handler will process this interrupt if it is
254 atomic_inc(&txwin->pending_faults);
256 return IRQ_WAKE_THREAD;
260 * Allocate window and setup IRQ mapping.
262 static int allocate_setup_window(struct pseries_vas_window *txwin,
263 u64 *domain, u8 wintype)
267 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
271 * On PowerVM, the hypervisor setup and forwards the fault
272 * interrupt per window. So the IRQ setup and fault handling
273 * will be done for each open window separately.
275 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
276 if (!txwin->fault_virq) {
277 pr_err("Failed irq mapping %d\n", txwin->fault_irq);
282 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
283 txwin->vas_win.winid);
289 rc = request_threaded_irq(txwin->fault_virq,
290 pseries_vas_irq_handler,
291 pseries_vas_fault_thread_fn, 0,
294 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
295 txwin->vas_win.winid, txwin->fault_virq, rc);
299 txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
305 irq_dispose_mapping(txwin->fault_virq);
307 h_deallocate_vas_window(txwin->vas_win.winid);
311 static inline void free_irq_setup(struct pseries_vas_window *txwin)
313 free_irq(txwin->fault_virq, txwin);
315 irq_dispose_mapping(txwin->fault_virq);
318 static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
319 enum vas_cop_type cop_type)
321 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
322 struct vas_cop_feat_caps *cop_feat_caps;
323 struct vas_caps *caps;
324 struct pseries_vas_window *txwin;
327 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
329 return ERR_PTR(-ENOMEM);
332 * A VAS window can have many credits which means that many
333 * requests can be issued simultaneously. But the hypervisor
334 * restricts one credit per window.
335 * The hypervisor introduces 2 different types of credits:
336 * Default credit type (Uses normal priority FIFO):
337 * A limited number of credits are assigned to partitions
338 * based on processor entitlement. But these credits may be
339 * over-committed on a system depends on whether the CPUs
340 * are in shared or dedicated modes - that is, more requests
341 * may be issued across the system than NX can service at
342 * once which can result in paste command failure (RMA_busy).
343 * Then the process has to resend requests or fall-back to
345 * Quality of Service (QoS) credit type (Uses high priority FIFO):
346 * To avoid NX HW contention, the system admins can assign
347 * QoS credits for each LPAR so that this partition is
348 * guaranteed access to NX resources. These credits are
349 * assigned to partitions via the HMC.
350 * Refer PAPR for more information.
352 * Allocate window with QoS credits if user requested. Otherwise
353 * default credits are used.
355 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
356 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
358 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
360 cop_feat_caps = &caps->caps;
362 if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
363 atomic_read(&cop_feat_caps->nr_total_credits)) {
364 pr_err_ratelimited("Credits are not available to allocate window\n");
371 * The user space is requesting to allocate a window on
372 * a VAS instance where the process is executing.
373 * On PowerVM, domain values are passed to the hypervisor
374 * to select VAS instance. Useful if the process is
375 * affinity to NUMA node.
376 * The hypervisor selects VAS instance if
377 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
378 * The h_allocate_vas_window hcall is defined to take a
379 * domain values as specified by h_home_node_associativity,
380 * So no unpacking needs to be done.
382 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
383 VPHN_FLAG_VCPU, hard_smp_processor_id());
384 if (rc != H_SUCCESS) {
385 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
390 txwin->pid = mfspr(SPRN_PID);
393 * Allocate / Deallocate window hcalls and setup / free IRQs
394 * have to be protected with mutex.
395 * Open VAS window: Allocate window hcall and setup IRQ
396 * Close VAS window: Deallocate window hcall and free IRQ
397 * The hypervisor waits until all NX requests are
398 * completed before closing the window. So expects OS
399 * to handle NX faults, means IRQ can be freed only
400 * after the deallocate window hcall is returned.
401 * So once the window is closed with deallocate hcall before
402 * the IRQ is freed, it can be assigned to new allocate
403 * hcall with the same fault IRQ by the hypervisor. It can
404 * result in setup IRQ fail for the new window since the
405 * same fault IRQ is not freed by the OS before.
407 mutex_lock(&vas_pseries_mutex);
408 if (migration_in_progress) {
411 rc = allocate_setup_window(txwin, (u64 *)&domain[0],
412 cop_feat_caps->win_type);
414 caps->nr_open_wins_progress++;
417 mutex_unlock(&vas_pseries_mutex);
422 * Modify window and it is ready to use.
424 rc = h_modify_vas_window(txwin);
426 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
430 txwin->win_type = cop_feat_caps->win_type;
433 * The migration SUSPEND thread sets migration_in_progress and
434 * closes all open windows from the list. But the window is
435 * added to the list after open and modify HCALLs. So possible
436 * that migration_in_progress is set before modify HCALL which
437 * may cause some windows are still open when the hypervisor
438 * initiates the migration.
439 * So checks the migration_in_progress flag again and close all
442 * Possible to lose the acquired credit with DLPAR core
443 * removal after the window is opened. So if there are any
444 * closed windows (means with lost credits), do not give new
445 * window to user space. New windows will be opened only
446 * after the existing windows are reopened when credits are
449 mutex_lock(&vas_pseries_mutex);
450 if (!caps->nr_close_wins && !migration_in_progress) {
451 list_add(&txwin->win_list, &caps->list);
452 caps->nr_open_windows++;
453 caps->nr_open_wins_progress--;
454 mutex_unlock(&vas_pseries_mutex);
455 vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
456 return &txwin->vas_win;
458 mutex_unlock(&vas_pseries_mutex);
460 put_vas_user_win_ref(&txwin->vas_win.task_ref);
462 pr_err_ratelimited("No credit is available to allocate window\n");
466 * Window is not operational. Free IRQ before closing
467 * window so that do not have to hold mutex.
469 free_irq_setup(txwin);
470 h_deallocate_vas_window(txwin->vas_win.winid);
472 * Hold mutex and reduce nr_open_wins_progress counter.
474 mutex_lock(&vas_pseries_mutex);
475 caps->nr_open_wins_progress--;
476 mutex_unlock(&vas_pseries_mutex);
478 atomic_dec(&cop_feat_caps->nr_used_credits);
483 static u64 vas_paste_address(struct vas_window *vwin)
485 struct pseries_vas_window *win;
487 win = container_of(vwin, struct pseries_vas_window, vas_win);
488 return win->win_addr;
491 static int deallocate_free_window(struct pseries_vas_window *win)
496 * The hypervisor waits for all requests including faults
497 * are processed before closing the window - Means all
498 * credits have to be returned. In the case of fault
499 * request, a credit is returned after OS issues
500 * H_GET_NX_FAULT hcall.
501 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
504 rc = h_deallocate_vas_window(win->vas_win.winid);
511 static int vas_deallocate_window(struct vas_window *vwin)
513 struct pseries_vas_window *win;
514 struct vas_cop_feat_caps *caps;
520 win = container_of(vwin, struct pseries_vas_window, vas_win);
522 /* Should not happen */
523 if (win->win_type >= VAS_MAX_FEAT_TYPE) {
524 pr_err("Window (%u): Invalid window type %u\n",
525 vwin->winid, win->win_type);
529 caps = &vascaps[win->win_type].caps;
530 mutex_lock(&vas_pseries_mutex);
532 * VAS window is already closed in the hypervisor when
533 * lost the credit or with migration. So just remove the entry
534 * from the list, remove task references and free vas_window
537 if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
538 !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
539 rc = deallocate_free_window(win);
541 mutex_unlock(&vas_pseries_mutex);
545 vascaps[win->win_type].nr_close_wins--;
547 list_del(&win->win_list);
548 atomic_dec(&caps->nr_used_credits);
549 vascaps[win->win_type].nr_open_windows--;
550 mutex_unlock(&vas_pseries_mutex);
552 mm_context_remove_vas_window(vwin->task_ref.mm);
553 put_vas_user_win_ref(&vwin->task_ref);
559 static const struct vas_user_win_ops vops_pseries = {
560 .open_win = vas_allocate_window, /* Open and configure window */
561 .paste_addr = vas_paste_address, /* To do copy/paste */
562 .close_win = vas_deallocate_window, /* Close window */
566 * Supporting only nx-gzip coprocessor type now, but this API code
567 * extended to other coprocessor types later.
569 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
575 return vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
577 EXPORT_SYMBOL_GPL(vas_register_api_pseries);
579 void vas_unregister_api_pseries(void)
581 vas_unregister_coproc_api();
583 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
586 * Get the specific capabilities based on the feature type.
587 * Right now supports GZIP default and GZIP QoS capabilities.
589 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
590 struct hv_vas_cop_feat_caps *hv_caps)
592 struct vas_cop_feat_caps *caps;
593 struct vas_caps *vcaps;
596 vcaps = &vascaps[type];
597 memset(vcaps, 0, sizeof(*vcaps));
598 INIT_LIST_HEAD(&vcaps->list);
603 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
604 (u64)virt_to_phys(hv_caps));
608 caps->user_mode = hv_caps->user_mode;
609 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
610 pr_err("User space COPY/PASTE is not supported\n");
614 caps->descriptor = be64_to_cpu(hv_caps->descriptor);
615 caps->win_type = hv_caps->win_type;
616 if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
617 pr_err("Unsupported window type %u\n", caps->win_type);
620 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
621 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
622 atomic_set(&caps->nr_total_credits,
623 be16_to_cpu(hv_caps->target_lpar_creds));
624 if (feat == VAS_GZIP_DEF_FEAT) {
625 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
627 if (caps->max_win_creds < DEF_WIN_CREDS) {
628 pr_err("Window creds(%u) > max allowed window creds(%u)\n",
629 DEF_WIN_CREDS, caps->max_win_creds);
634 rc = sysfs_add_vas_caps(caps);
638 copypaste_feat = true;
644 * VAS windows can be closed due to lost credits when the core is
645 * removed. So reopen them if credits are available due to DLPAR
646 * core add and set the window active status. When NX sees the page
647 * fault on the unmapped paste address, the kernel handles the fault
648 * by setting the remapping to new paste address if the window is
651 static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
654 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
655 struct vas_cop_feat_caps *caps = &vcaps->caps;
656 struct pseries_vas_window *win = NULL, *tmp;
661 * Nothing to do if there are no closed windows.
663 if (!vcaps->nr_close_wins)
667 * For the core removal, the hypervisor reduces the credits
668 * assigned to the LPAR and the kernel closes VAS windows
669 * in the hypervisor depends on reduced credits. The kernel
670 * uses LIFO (the last windows that are opened will be closed
671 * first) and expects to open in the same order when credits
673 * For example, 40 windows are closed when the LPAR lost 2 cores
674 * (dedicated). If 1 core is added, this LPAR can have 20 more
675 * credits. It means the kernel can reopen 20 windows. So move
676 * 20 entries in the VAS windows lost and reopen next 20 windows.
677 * For partition migration, reopen all windows that are closed
680 if ((vcaps->nr_close_wins > creds) && !migrate)
681 mv_ents = vcaps->nr_close_wins - creds;
683 list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
691 * Open windows if they are closed only with migration or
692 * DLPAR (lost credit) before.
695 flag = VAS_WIN_MIGRATE_CLOSE;
697 flag = VAS_WIN_NO_CRED_CLOSE;
699 list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
701 * This window is closed with DLPAR and migration events.
702 * So reopen the window with the last event.
703 * The user space is not suspended with the current
704 * migration notifier. So the user space can issue DLPAR
705 * CPU hotplug while migration in progress. In this case
706 * this window will be opened with the last event.
708 if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
709 (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
710 win->vas_win.status &= ~flag;
715 * Nothing to do on this window if it is not closed
718 if (!(win->vas_win.status & flag))
721 rc = allocate_setup_window(win, (u64 *)&domain[0],
726 rc = h_modify_vas_window(win);
730 mutex_lock(&win->vas_win.task_ref.mmap_mutex);
732 * Set window status to active
734 win->vas_win.status &= ~flag;
735 mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
736 win->win_type = caps->win_type;
737 if (!--vcaps->nr_close_wins)
744 * Window modify HCALL failed. So close the window to the
745 * hypervisor and return.
748 h_deallocate_vas_window(win->vas_win.winid);
753 * The hypervisor reduces the available credits if the LPAR lost core. It
754 * means the excessive windows should not be active and the user space
755 * should not be using these windows to send compression requests to NX.
756 * So the kernel closes the excessive windows and unmap the paste address
757 * such that the user space receives paste instruction failure. Then up to
758 * the user space to fall back to SW compression and manage with the
761 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
764 struct pseries_vas_window *win, *tmp;
765 struct vas_user_win_ref *task_ref;
766 struct vm_area_struct *vma;
770 flag = VAS_WIN_MIGRATE_CLOSE;
772 flag = VAS_WIN_NO_CRED_CLOSE;
774 list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
776 * This window is already closed due to lost credit
777 * or for migration before. Go for next window.
778 * For migration, nothing to do since this window
779 * closed for DLPAR and will be reopened even on
780 * the destination system with other DLPAR operation.
782 if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
783 (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
784 win->vas_win.status |= flag;
788 task_ref = &win->vas_win.task_ref;
790 * VAS mmap (coproc_mmap()) and its fault handler
791 * (vas_mmap_fault()) are called after holding mmap lock.
792 * So hold mmap mutex after mmap_lock to avoid deadlock.
794 mmap_write_lock(task_ref->mm);
795 mutex_lock(&task_ref->mmap_mutex);
798 * Number of available credits are reduced, So select
801 win->vas_win.status |= flag;
804 * vma is set in the original mapping. But this mapping
805 * is done with mmap() after the window is opened with ioctl.
806 * so we may not see the original mapping if the core remove
807 * is done before the original mmap() and after the ioctl.
812 mutex_unlock(&task_ref->mmap_mutex);
813 mmap_write_unlock(task_ref->mm);
815 * Close VAS window in the hypervisor, but do not
816 * free vas_window struct since it may be reused
817 * when the credit is available later (DLPAR with
818 * adding cores). This struct will be used
819 * later when the process issued with close(FD).
821 rc = deallocate_free_window(win);
823 * This failure is from the hypervisor.
824 * No way to stop migration for these failures.
825 * So ignore error and continue closing other windows.
830 vcap->nr_close_wins++;
833 * For migration, do not depend on lpar_creds in case if
834 * mismatch with the hypervisor value (should not happen).
835 * So close all active windows in the list and will be
836 * reopened windows based on the new lpar_creds on the
837 * destination system during resume.
839 if (!migrate && !--excess_creds)
847 * Get new VAS capabilities when the core add/removal configuration
848 * changes. Reconfig window configurations based on the credits
849 * availability from this new capabilities.
851 int vas_reconfig_capabilties(u8 type, int new_nr_creds)
853 struct vas_cop_feat_caps *caps;
855 struct vas_caps *vcaps;
856 int rc = 0, nr_active_wins;
858 if (type >= VAS_MAX_FEAT_TYPE) {
859 pr_err("Invalid credit type %d\n", type);
863 vcaps = &vascaps[type];
866 mutex_lock(&vas_pseries_mutex);
868 old_nr_creds = atomic_read(&caps->nr_total_credits);
870 atomic_set(&caps->nr_total_credits, new_nr_creds);
872 * The total number of available credits may be decreased or
873 * increased with DLPAR operation. Means some windows have to be
874 * closed / reopened. Hold the vas_pseries_mutex so that the
875 * user space can not open new windows.
877 if (old_nr_creds < new_nr_creds) {
879 * If the existing target credits is less than the new
880 * target, reopen windows if they are closed due to
881 * the previous DLPAR (core removal).
883 rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
887 * # active windows is more than new LPAR available
888 * credits. So close the excessive windows.
889 * On pseries, each window will have 1 credit.
891 nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
892 if (nr_active_wins > new_nr_creds)
893 rc = reconfig_close_windows(vcaps,
894 nr_active_wins - new_nr_creds,
898 mutex_unlock(&vas_pseries_mutex);
902 int pseries_vas_dlpar_cpu(void)
904 int new_nr_creds, rc;
907 * NX-GZIP is not enabled. Nothing to do for DLPAR event
913 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
914 vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
915 (u64)virt_to_phys(&hv_cop_caps));
917 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
918 rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
922 pr_err("Failed reconfig VAS capabilities with DLPAR\n");
928 * Total number of default credits available (target_credits)
929 * in LPAR depends on number of cores configured. It varies based on
930 * whether processors are in shared mode or dedicated mode.
931 * Get the notifier when CPU configuration is changed with DLPAR
932 * operation so that get the new target_credits (vas default capabilities)
933 * and then update the existing windows usage if needed.
935 static int pseries_vas_notifier(struct notifier_block *nb,
936 unsigned long action, void *data)
938 struct of_reconfig_data *rd = data;
939 struct device_node *dn = rd->dn;
940 const __be32 *intserv = NULL;
944 * For shared CPU partition, the hypervisor assigns total credits
945 * based on entitled core capacity. So updating VAS windows will
946 * be called from lparcfg_write().
948 if (is_shared_processor())
951 if ((action == OF_RECONFIG_ATTACH_NODE) ||
952 (action == OF_RECONFIG_DETACH_NODE))
953 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
956 * Processor config is not changed
961 return pseries_vas_dlpar_cpu();
964 static struct notifier_block pseries_vas_nb = {
965 .notifier_call = pseries_vas_notifier,
969 * For LPM, all windows have to be closed on the source partition
970 * before migration and reopen them on the destination partition
971 * after migration. So closing windows during suspend and
972 * reopen them during resume.
974 int vas_migration_handler(int action)
976 struct vas_cop_feat_caps *caps;
977 int old_nr_creds, new_nr_creds = 0;
978 struct vas_caps *vcaps;
981 pr_info("VAS migration event %d\n", action);
984 * NX-GZIP is not enabled. Nothing to do for migration.
989 if (action == VAS_SUSPEND)
990 migration_in_progress = true;
992 migration_in_progress = false;
994 for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
997 old_nr_creds = atomic_read(&caps->nr_total_credits);
999 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
1001 (u64)virt_to_phys(&hv_cop_caps));
1003 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
1005 * Should not happen. But incase print messages, close
1006 * all windows in the list during suspend and reopen
1007 * windows based on new lpar_creds on the destination
1010 if (old_nr_creds != new_nr_creds) {
1011 pr_err("Target credits mismatch with the hypervisor\n");
1012 pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
1013 action, old_nr_creds, new_nr_creds);
1014 pr_err("Used creds: %d, Active creds: %d\n",
1015 atomic_read(&caps->nr_used_credits),
1016 vcaps->nr_open_windows - vcaps->nr_close_wins);
1019 pr_err("state(%d): Get VAS capabilities failed with %d\n",
1022 * We can not stop migration with the current lpm
1023 * implementation. So continue closing all windows in
1024 * the list (during suspend) and return without
1025 * opening windows (during resume) if VAS capabilities
1028 if (action == VAS_RESUME)
1034 mutex_lock(&vas_pseries_mutex);
1035 rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
1038 * Windows are included in the list after successful
1039 * open. So wait for closing these in-progress open
1040 * windows in vas_allocate_window() which will be
1041 * done if the migration_in_progress is set.
1043 while (vcaps->nr_open_wins_progress) {
1044 mutex_unlock(&vas_pseries_mutex);
1046 mutex_lock(&vas_pseries_mutex);
1048 mutex_unlock(&vas_pseries_mutex);
1051 mutex_lock(&vas_pseries_mutex);
1052 atomic_set(&caps->nr_total_credits, new_nr_creds);
1053 rc = reconfig_open_windows(vcaps, new_nr_creds, true);
1054 mutex_unlock(&vas_pseries_mutex);
1057 /* should not happen */
1058 pr_err("Invalid migration action %d\n", action);
1064 * Ignore errors during suspend and return for resume.
1066 if (rc && (action == VAS_RESUME))
1070 pr_info("VAS migration event (%d) successful\n", action);
1076 static int __init pseries_vas_init(void)
1078 struct hv_vas_all_caps *hv_caps;
1082 * Linux supports user space COPY/PASTE only with Radix
1084 if (!radix_enabled()) {
1085 copypaste_feat = false;
1086 pr_err("API is supported only with radix page tables\n");
1090 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
1094 * Get VAS overall capabilities by passing 0 to feature type.
1096 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
1097 (u64)virt_to_phys(hv_caps));
1101 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
1102 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
1104 sysfs_pseries_vas_init(&caps_all);
1107 * QOS capabilities available
1109 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
1110 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
1111 VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
1117 * Default capabilities available
1119 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
1120 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
1121 VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
1123 if (!rc && copypaste_feat) {
1124 if (firmware_has_feature(FW_FEATURE_LPAR))
1125 of_reconfig_notifier_register(&pseries_vas_nb);
1127 pr_info("GZIP feature is available\n");
1130 * Should not happen, but only when get default
1131 * capabilities HCALL failed. So disable copy paste
1134 copypaste_feat = false;
1141 machine_device_initcall(pseries, pseries_vas_init);