1 // SPDX-License-Identifier: GPL-2.0
3 * channel program interfaces
5 * Copyright IBM Corp. 2017
12 #include <linux/slab.h>
13 #include <linux/iommu.h>
14 #include <linux/vfio.h>
15 #include <asm/idals.h>
17 #include "vfio_ccw_cp.h"
20 /* Starting guest physical I/O address. */
21 unsigned long pa_iova;
22 /* Array that stores PFNs of the pages need to pin. */
23 unsigned long *pa_iova_pfn;
24 /* Array that receives PFNs of the pages pinned. */
25 unsigned long *pa_pfn;
26 /* Number of pages pinned from @pa_iova. */
31 struct list_head next;
33 /* Guest physical address of the current chain. */
35 /* Count of the valid ccws in chain. */
37 /* Pinned PAGEs for the original data. */
38 struct pfn_array *ch_pa;
42 * pfn_array_alloc() - alloc memory for PFNs
43 * @pa: pfn_array on which to perform the operation
44 * @iova: target guest physical address
45 * @len: number of bytes that should be pinned from @iova
47 * Attempt to allocate memory for PFNs.
50 * We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in
51 * this structure will be filled in by this function.
54 * 0 if PFNs are allocated
55 * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova_pfn is not NULL
56 * -ENOMEM if alloc failed
58 static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len)
62 if (pa->pa_nr || pa->pa_iova_pfn)
67 pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
71 pa->pa_iova_pfn = kcalloc(pa->pa_nr,
72 sizeof(*pa->pa_iova_pfn) +
75 if (unlikely(!pa->pa_iova_pfn)) {
79 pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
81 pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
82 pa->pa_pfn[0] = -1ULL;
83 for (i = 1; i < pa->pa_nr; i++) {
84 pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
85 pa->pa_pfn[i] = -1ULL;
92 * pfn_array_pin() - Pin user pages in memory
93 * @pa: pfn_array on which to perform the operation
94 * @mdev: the mediated device to perform pin operations
96 * Returns number of pages pinned upon success.
97 * If the pin request partially succeeds, or fails completely,
98 * all pages are left unpinned and a negative error value is returned.
100 static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
104 ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
105 IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
109 } else if (ret > 0 && ret != pa->pa_nr) {
110 vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret);
123 /* Unpin the pages before releasing the memory. */
124 static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
126 /* Only unpin if any pages were pinned to begin with */
128 vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
130 kfree(pa->pa_iova_pfn);
133 static bool pfn_array_iova_pinned(struct pfn_array *pa, unsigned long iova)
135 unsigned long iova_pfn = iova >> PAGE_SHIFT;
138 for (i = 0; i < pa->pa_nr; i++)
139 if (pa->pa_iova_pfn[i] == iova_pfn)
144 /* Create the list of IDAL words for a pfn_array. */
145 static inline void pfn_array_idal_create_words(
146 struct pfn_array *pa,
147 unsigned long *idaws)
152 * Idal words (execept the first one) rely on the memory being 4k
153 * aligned. If a user virtual address is 4K aligned, then it's
154 * corresponding kernel physical address will also be 4K aligned. Thus
155 * there will be no problem here to simply use the phys to create an
159 for (i = 0; i < pa->pa_nr; i++)
160 idaws[i] = pa->pa_pfn[i] << PAGE_SHIFT;
162 /* Adjust the first IDAW, since it may not start on a page boundary */
163 idaws[0] += pa->pa_iova & (PAGE_SIZE - 1);
166 static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len)
169 struct ccw1 *pccw1 = source;
172 for (i = 0; i < len; i++) {
173 ccw0 = *(struct ccw0 *)pccw1;
174 if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
175 pccw1->cmd_code = CCW_CMD_TIC;
179 pccw1->cmd_code = ccw0.cmd_code;
180 pccw1->flags = ccw0.flags;
181 pccw1->count = ccw0.count;
183 pccw1->cda = ccw0.cda;
189 * Within the domain (@mdev), copy @n bytes from a guest physical
190 * address (@iova) to a host physical address (@to).
192 static long copy_from_iova(struct device *mdev,
196 struct pfn_array pa = {0};
201 ret = pfn_array_alloc(&pa, iova, n);
205 ret = pfn_array_pin(&pa, mdev);
207 pfn_array_unpin_free(&pa, mdev);
212 for (i = 0; i < pa.pa_nr; i++) {
213 from = pa.pa_pfn[i] << PAGE_SHIFT;
216 from += iova & (PAGE_SIZE - 1);
217 m -= iova & (PAGE_SIZE - 1);
221 memcpy(to + (n - l), (void *)from, m);
228 pfn_array_unpin_free(&pa, mdev);
234 * Helpers to operate ccwchain.
236 #define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02)
237 #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
238 #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
240 #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
242 #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
244 #define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
245 #define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP)
247 #define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
250 * ccw_does_data_transfer()
252 * Determine whether a CCW will move any data, such that the guest pages
253 * would need to be pinned before performing the I/O.
255 * Returns 1 if yes, 0 if no.
257 static inline int ccw_does_data_transfer(struct ccw1 *ccw)
259 /* If the count field is zero, then no data will be transferred */
263 /* If the command is a NOP, then no data will be transferred */
264 if (ccw_is_noop(ccw))
267 /* If the skip flag is off, then data will be transferred */
268 if (!ccw_is_skip(ccw))
272 * If the skip flag is on, it is only meaningful if the command
273 * code is a read, read backward, sense, or sense ID. In those
274 * cases, no data will be transferred.
276 if (ccw_is_read(ccw) || ccw_is_read_backward(ccw))
279 if (ccw_is_sense(ccw))
282 /* The skip flag is on, but it is ignored for this command code. */
287 * is_cpa_within_range()
289 * @cpa: channel program address being questioned
290 * @head: address of the beginning of a CCW chain
291 * @len: number of CCWs within the chain
293 * Determine whether the address of a CCW (whether a new chain,
294 * or the target of a TIC) falls within a range (including the end points).
296 * Returns 1 if yes, 0 if no.
298 static inline int is_cpa_within_range(u32 cpa, u32 head, int len)
300 u32 tail = head + (len - 1) * sizeof(struct ccw1);
302 return (head <= cpa && cpa <= tail);
305 static inline int is_tic_within_range(struct ccw1 *ccw, u32 head, int len)
307 if (!ccw_is_tic(ccw))
310 return is_cpa_within_range(ccw->cda, head, len);
313 static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
315 struct ccwchain *chain;
319 /* Make ccw address aligned to 8. */
320 size = ((sizeof(*chain) + 7L) & -8L) +
321 sizeof(*chain->ch_ccw) * len +
322 sizeof(*chain->ch_pa) * len;
323 chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
327 data = (u8 *)chain + ((sizeof(*chain) + 7L) & -8L);
328 chain->ch_ccw = (struct ccw1 *)data;
330 data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
331 chain->ch_pa = (struct pfn_array *)data;
335 list_add_tail(&chain->next, &cp->ccwchain_list);
340 static void ccwchain_free(struct ccwchain *chain)
342 list_del(&chain->next);
346 /* Free resource for a ccw that allocated memory for its cda. */
347 static void ccwchain_cda_free(struct ccwchain *chain, int idx)
349 struct ccw1 *ccw = chain->ch_ccw + idx;
354 kfree((void *)(u64)ccw->cda);
358 * ccwchain_calc_length - calculate the length of the ccw chain.
359 * @iova: guest physical address of the target ccw chain
360 * @cp: channel_program on which to perform the operation
362 * This is the chain length not considering any TICs.
363 * You need to do a new round for each TIC target.
365 * The program is also validated for absence of not yet supported
366 * indirect data addressing scenarios.
368 * Returns: the length of the ccw chain or -errno.
370 static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
372 struct ccw1 *ccw = cp->guest_cp;
379 * As we don't want to fail direct addressing even if the
380 * orb specified one of the unsupported formats, we defer
381 * checking for IDAWs in unsupported formats to here.
383 if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw))
387 * We want to keep counting if the current CCW has the
388 * command-chaining flag enabled, or if it is a TIC CCW
389 * that loops back into the current chain. The latter
390 * is used for device orientation, where the CCW PRIOR to
391 * the TIC can either jump to the TIC or a CCW immediately
392 * after the TIC, depending on the results of its operation.
394 if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt))
398 } while (cnt < CCWCHAIN_LEN_MAX + 1);
400 if (cnt == CCWCHAIN_LEN_MAX + 1)
406 static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
408 struct ccwchain *chain;
411 list_for_each_entry(chain, &cp->ccwchain_list, next) {
412 ccw_head = chain->ch_iova;
413 if (is_cpa_within_range(tic->cda, ccw_head, chain->ch_len))
420 static int ccwchain_loop_tic(struct ccwchain *chain,
421 struct channel_program *cp);
423 static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp)
425 struct ccwchain *chain;
428 /* Copy 2K (the most we support today) of possible CCWs */
429 len = copy_from_iova(cp->mdev, cp->guest_cp, cda,
430 CCWCHAIN_LEN_MAX * sizeof(struct ccw1));
434 /* Convert any Format-0 CCWs to Format-1 */
435 if (!cp->orb.cmd.fmt)
436 convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX);
438 /* Count the CCWs in the current chain */
439 len = ccwchain_calc_length(cda, cp);
443 /* Need alloc a new chain for this one. */
444 chain = ccwchain_alloc(cp, len);
447 chain->ch_iova = cda;
449 /* Copy the actual CCWs into the new chain */
450 memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
452 /* Loop for tics on this new chain. */
453 ret = ccwchain_loop_tic(chain, cp);
456 ccwchain_free(chain);
462 static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
467 for (i = 0; i < chain->ch_len; i++) {
468 tic = chain->ch_ccw + i;
470 if (!ccw_is_tic(tic))
473 /* May transfer to an existing chain. */
474 if (tic_target_chain_exists(tic, cp))
477 /* Build a ccwchain for the next segment */
478 ret = ccwchain_handle_ccw(tic->cda, cp);
486 static int ccwchain_fetch_tic(struct ccwchain *chain,
488 struct channel_program *cp)
490 struct ccw1 *ccw = chain->ch_ccw + idx;
491 struct ccwchain *iter;
494 list_for_each_entry(iter, &cp->ccwchain_list, next) {
495 ccw_head = iter->ch_iova;
496 if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) {
497 ccw->cda = (__u32) (addr_t) (((char *)iter->ch_ccw) +
498 (ccw->cda - ccw_head));
506 static int ccwchain_fetch_direct(struct ccwchain *chain,
508 struct channel_program *cp)
511 struct pfn_array *pa;
513 unsigned long *idaws;
516 int idaw_nr, idal_len;
519 ccw = chain->ch_ccw + idx;
524 /* Calculate size of IDAL */
525 if (ccw_is_idal(ccw)) {
526 /* Read first IDAW to see if it's 4K-aligned or not. */
527 /* All subsequent IDAws will be 4K-aligned. */
528 ret = copy_from_iova(cp->mdev, &iova, ccw->cda, sizeof(iova));
534 idaw_nr = idal_nr_words((void *)iova, bytes);
535 idal_len = idaw_nr * sizeof(*idaws);
537 /* Allocate an IDAL from host storage */
538 idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
545 * Allocate an array of pfn's for pages to pin/translate.
546 * The number of pages is actually the count of the idaws
547 * required for the data transfer, since we only only support
550 pa = chain->ch_pa + idx;
551 ret = pfn_array_alloc(pa, iova, bytes);
555 if (ccw_is_idal(ccw)) {
556 /* Copy guest IDAL into host IDAL */
557 ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idal_len);
562 * Copy guest IDAWs into pfn_array, in case the memory they
563 * occupy is not contiguous.
565 for (i = 0; i < idaw_nr; i++)
566 pa->pa_iova_pfn[i] = idaws[i] >> PAGE_SHIFT;
569 * No action is required here; the iova addresses in pfn_array
570 * were initialized sequentially in pfn_array_alloc() beginning
571 * with the contents of ccw->cda.
575 if (ccw_does_data_transfer(ccw)) {
576 ret = pfn_array_pin(pa, cp->mdev);
583 ccw->cda = (__u32) virt_to_phys(idaws);
584 ccw->flags |= CCW_FLAG_IDA;
586 /* Populate the IDAL with pinned/translated addresses from pfn */
587 pfn_array_idal_create_words(pa, idaws);
592 pfn_array_unpin_free(pa, cp->mdev);
602 * To reduce memory copy, we'll pin the cda page in memory,
603 * and to get rid of the cda 2G limitiaion of ccw1, we'll translate
604 * direct ccws to idal ccws.
606 static int ccwchain_fetch_one(struct ccwchain *chain,
608 struct channel_program *cp)
610 struct ccw1 *ccw = chain->ch_ccw + idx;
613 return ccwchain_fetch_tic(chain, idx, cp);
615 return ccwchain_fetch_direct(chain, idx, cp);
619 * cp_init() - allocate ccwchains for a channel program.
620 * @cp: channel_program on which to perform the operation
621 * @mdev: the mediated device to perform pin/unpin operations
622 * @orb: control block for the channel program from the guest
624 * This creates one or more ccwchain(s), and copies the raw data of
625 * the target channel program from @orb->cmd.iova to the new ccwchain(s).
628 * 1. Supports only prefetch enabled mode.
629 * 2. Supports idal(c64) ccw chaining.
630 * 3. Supports 4k idaw.
633 * %0 on success and a negative error value on failure.
635 int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
641 * Only support prefetch enable mode now.
646 INIT_LIST_HEAD(&cp->ccwchain_list);
647 memcpy(&cp->orb, orb, sizeof(*orb));
650 /* Build a ccwchain for the first CCW segment */
651 ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
654 cp->initialized = true;
656 /* It is safe to force: if it was not set but idals used
657 * ccwchain_calc_length would have returned an error.
667 * cp_free() - free resources for channel program.
668 * @cp: channel_program on which to perform the operation
670 * This unpins the memory pages and frees the memory space occupied by
671 * @cp, which must have been returned by a previous call to cp_init().
672 * Otherwise, undefined behavior occurs.
674 void cp_free(struct channel_program *cp)
676 struct ccwchain *chain, *temp;
679 if (!cp->initialized)
682 cp->initialized = false;
683 list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
684 for (i = 0; i < chain->ch_len; i++) {
685 pfn_array_unpin_free(chain->ch_pa + i, cp->mdev);
686 ccwchain_cda_free(chain, i);
688 ccwchain_free(chain);
693 * cp_prefetch() - translate a guest physical address channel program to
694 * a real-device runnable channel program.
695 * @cp: channel_program on which to perform the operation
697 * This function translates the guest-physical-address channel program
698 * and stores the result to ccwchain list. @cp must have been
699 * initialized by a previous call with cp_init(). Otherwise, undefined
701 * For each chain composing the channel program:
702 * - On entry ch_len holds the count of CCWs to be translated.
703 * - On exit ch_len is adjusted to the count of successfully translated CCWs.
704 * This allows cp_free to find in ch_len the count of CCWs to free in a chain.
706 * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
707 * as helpers to do ccw chain translation inside the kernel. Basically
708 * they accept a channel program issued by a virtual machine, and
709 * translate the channel program to a real-device runnable channel
712 * These APIs will copy the ccws into kernel-space buffers, and update
713 * the guest phsical addresses with their corresponding host physical
714 * addresses. Then channel I/O device drivers could issue the
715 * translated channel program to real devices to perform an I/O
718 * These interfaces are designed to support translation only for
719 * channel programs, which are generated and formatted by a
720 * guest. Thus this will make it possible for things like VFIO to
721 * leverage the interfaces to passthrough a channel I/O mediated
724 * We support direct ccw chaining by translating them to idal ccws.
727 * %0 on success and a negative error value on failure.
729 int cp_prefetch(struct channel_program *cp)
731 struct ccwchain *chain;
734 /* this is an error in the caller */
735 if (!cp->initialized)
738 list_for_each_entry(chain, &cp->ccwchain_list, next) {
740 for (idx = 0; idx < len; idx++) {
741 ret = ccwchain_fetch_one(chain, idx, cp);
749 /* Only cleanup the chain elements that were actually translated. */
751 list_for_each_entry_continue(chain, &cp->ccwchain_list, next) {
758 * cp_get_orb() - get the orb of the channel program
759 * @cp: channel_program on which to perform the operation
760 * @intparm: new intparm for the returned orb
761 * @lpm: candidate value of the logical-path mask for the returned orb
763 * This function returns the address of the updated orb of the channel
764 * program. Channel I/O device drivers could use this orb to issue a
767 union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm)
770 struct ccwchain *chain;
773 /* this is an error in the caller */
774 if (!cp->initialized)
779 orb->cmd.intparm = intparm;
781 orb->cmd.key = PAGE_DEFAULT_KEY >> 4;
783 if (orb->cmd.lpm == 0)
786 chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
788 orb->cmd.cpa = (__u32) __pa(cpa);
794 * cp_update_scsw() - update scsw for a channel program.
795 * @cp: channel_program on which to perform the operation
796 * @scsw: I/O results of the channel program and also the target to be
799 * @scsw contains the I/O results of the channel program that pointed
800 * to by @cp. However what @scsw->cpa stores is a host physical
801 * address, which is meaningless for the guest, which is waiting for
804 * This function updates @scsw->cpa to its coressponding guest physical
807 void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
809 struct ccwchain *chain;
810 u32 cpa = scsw->cmd.cpa;
813 if (!cp->initialized)
818 * For now, only update the cmd.cpa part. We may need to deal with
819 * other portions of the schib as well, even if we don't return them
820 * in the ioctl directly. Path status changes etc.
822 list_for_each_entry(chain, &cp->ccwchain_list, next) {
823 ccw_head = (u32)(u64)chain->ch_ccw;
825 * On successful execution, cpa points just beyond the end
828 if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) {
830 * (cpa - ccw_head) is the offset value of the host
831 * physical ccw to its chain head.
832 * Adding this value to the guest physical ccw chain
833 * head gets us the guest cpa.
835 cpa = chain->ch_iova + (cpa - ccw_head);
844 * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
845 * @cp: channel_program on which to perform the operation
846 * @iova: the iova to check
848 * If the @iova is currently pinned for the ccw chain, return true;
851 bool cp_iova_pinned(struct channel_program *cp, u64 iova)
853 struct ccwchain *chain;
856 if (!cp->initialized)
859 list_for_each_entry(chain, &cp->ccwchain_list, next) {
860 for (i = 0; i < chain->ch_len; i++)
861 if (pfn_array_iova_pinned(chain->ch_pa + i, iova))