#define for_each_rmrr_units(rmrr) \
list_for_each_entry(rmrr, &dmar_rmrr_units, list)
- static void device_block_translation(struct device *dev);
static void intel_iommu_domain_free(struct iommu_domain *domain);
int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
#define IDENTMAP_AZALIA 4
const struct iommu_ops intel_iommu_ops;
+ const struct iommu_dirty_ops intel_dirty_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
{
}
/* Some capabilities may be different across iommus */
- static void domain_update_iommu_cap(struct dmar_domain *domain)
+ void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
return domain;
}
- static int domain_attach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info, *curr;
unsigned long ndomains;
return ret;
}
- static void domain_detach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info;
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
+ if (!(prot & DMA_PTE_WRITE) && domain->nested_parent) {
+ pr_err_ratelimited("Read-only mapping is disallowed on the domain which serves as the parent in a nested configuration, due to HW errata (ERRATA_772415_SPR17)\n");
+ return -EINVAL;
+ }
+
attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
attr |= DMA_FL_PTE_PRESENT;
if (domain->use_first_level) {
struct intel_iommu *iommu = NULL;
unsigned long flag;
- for_each_active_iommu(iommu, drhd) {
- iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
- GFP_KERNEL);
- if (!iommu->iommu_state)
- goto nomem;
- }
-
iommu_flush_all();
for_each_active_iommu(iommu, drhd) {
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
return 0;
-
-nomem:
- for_each_active_iommu(iommu, drhd)
- kfree(iommu->iommu_state);
-
- return -ENOMEM;
}
static void iommu_resume(void)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-
- for_each_active_iommu(iommu, drhd)
- kfree(iommu->iommu_state);
}
static struct syscore_ops iommu_syscore_ops = {
* all DMA requests without PASID from the device are blocked. If the page
* table has been set, clean up the data structures.
*/
- static void device_block_translation(struct device *dev)
+ void device_block_translation(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
return NULL;
}
+ static struct iommu_domain *
+ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
+ struct iommu_domain *parent,
+ const struct iommu_user_data *user_data)
+ {
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+ bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
+ struct intel_iommu *iommu = info->iommu;
+ struct iommu_domain *domain;
+
+ /* Must be NESTING domain */
+ if (parent) {
+ if (!nested_supported(iommu) || flags)
+ return ERR_PTR(-EOPNOTSUPP);
+ return intel_nested_domain_alloc(parent, user_data);
+ }
+
+ if (flags &
+ (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (nested_parent && !nested_supported(iommu))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (user_data || (dirty_tracking && !ssads_supported(iommu)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ /*
+ * domain_alloc_user op needs to fully initialize a domain before
+ * return, so uses iommu_domain_alloc() here for simple.
+ */
+ domain = iommu_domain_alloc(dev->bus);
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+
+ if (nested_parent)
+ to_dmar_domain(domain)->nested_parent = true;
+
+ if (dirty_tracking) {
+ if (to_dmar_domain(domain)->use_first_level) {
+ iommu_domain_free(domain);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ domain->dirty_ops = &intel_dirty_ops;
+ }
+
+ return domain;
+ }
+
static void intel_iommu_domain_free(struct iommu_domain *domain)
{
if (domain != &si_domain->domain && domain != &blocking_domain)
domain_exit(to_dmar_domain(domain));
}
- static int prepare_domain_attach_device(struct iommu_domain *domain,
- struct device *dev)
+ int prepare_domain_attach_device(struct iommu_domain *domain,
+ struct device *dev)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
return -EINVAL;
+ if (domain->dirty_ops && !ssads_supported(iommu))
+ return -EINVAL;
+
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
return dmar_platform_optin();
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
return ecap_sc_support(info->iommu->ecap);
+ case IOMMU_CAP_DIRTY_TRACKING:
+ return ssads_supported(info->iommu);
default:
return false;
}
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
return -EOPNOTSUPP;
+ if (domain->dirty_ops)
+ return -EINVAL;
+
if (context_copied(iommu, info->bus, info->devfn))
return -EBUSY;
if (!vtd)
return ERR_PTR(-ENOMEM);
+ vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17;
vtd->cap_reg = iommu->cap;
vtd->ecap_reg = iommu->ecap;
*length = sizeof(*vtd);
return vtd;
}
+ static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
+ bool enable)
+ {
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct device_domain_info *info;
+ int ret;
+
+ spin_lock(&dmar_domain->lock);
+ if (dmar_domain->dirty_tracking == enable)
+ goto out_unlock;
+
+ list_for_each_entry(info, &dmar_domain->devices, link) {
+ ret = intel_pasid_setup_dirty_tracking(info->iommu,
+ info->domain, info->dev,
+ IOMMU_NO_PASID, enable);
+ if (ret)
+ goto err_unwind;
+ }
+
+ dmar_domain->dirty_tracking = enable;
+ out_unlock:
+ spin_unlock(&dmar_domain->lock);
+
+ return 0;
+
+ err_unwind:
+ list_for_each_entry(info, &dmar_domain->devices, link)
+ intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain,
+ info->dev, IOMMU_NO_PASID,
+ dmar_domain->dirty_tracking);
+ spin_unlock(&dmar_domain->lock);
+ return ret;
+ }
+
+ static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+ {
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned long end = iova + size - 1;
+ unsigned long pgsize;
+
+ /*
+ * IOMMUFD core calls into a dirty tracking disabled domain without an
+ * IOVA bitmap set in order to clean dirty bits in all PTEs that might
+ * have occurred when we stopped dirty tracking. This ensures that we
+ * never inherit dirtied bits from a previous cycle.
+ */
+ if (!dmar_domain->dirty_tracking && dirty->bitmap)
+ return -EINVAL;
+
+ do {
+ struct dma_pte *pte;
+ int lvl = 0;
+
+ pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl,
+ GFP_ATOMIC);
+ pgsize = level_size(lvl) << VTD_PAGE_SHIFT;
+ if (!pte || !dma_pte_present(pte)) {
+ iova += pgsize;
+ continue;
+ }
+
+ if (dma_sl_pte_test_and_clear_dirty(pte, flags))
+ iommu_dirty_bitmap_record(dirty, iova, pgsize);
+ iova += pgsize;
+ } while (iova < end);
+
+ return 0;
+ }
+
+ const struct iommu_dirty_ops intel_dirty_ops = {
+ .set_dirty_tracking = intel_iommu_set_dirty_tracking,
+ .read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
+ };
+
const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
+ .domain_alloc_user = intel_iommu_domain_alloc_user,
.probe_device = intel_iommu_probe_device,
.probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
#include <asm/cacheflush.h>
#include <asm/iommu.h>
+ #include <uapi/linux/iommufd.h>
/*
* VT-d hardware uses 4KiB page size regardless of host page size.
#define DMA_FL_PTE_DIRTY BIT_ULL(6)
#define DMA_FL_PTE_XD BIT_ULL(63)
+ #define DMA_SL_PTE_DIRTY_BIT 9
+ #define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT)
+
#define ADDR_WIDTH_5LEVEL (57)
#define ADDR_WIDTH_4LEVEL (48)
#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
#define pasid_supported(iommu) (sm_supported(iommu) && \
ecap_pasid((iommu)->ecap))
+ #define ssads_supported(iommu) (sm_supported(iommu) && \
+ ecap_slads((iommu)->ecap))
+ #define nested_supported(iommu) (sm_supported(iommu) && \
+ ecap_nest((iommu)->ecap))
struct pasid_entry;
struct pasid_state_entry;
* otherwise, goes through the second
* level.
*/
+ u8 dirty_tracking:1; /* Dirty tracking is enabled */
+ u8 nested_parent:1; /* Has other domains nested on it */
spinlock_t lock; /* Protect device tracking lists */
struct list_head devices; /* all devices' list */
struct list_head dev_pasids; /* all attached pasids */
- struct dma_pte *pgd; /* virtual address */
- int gaw; /* max guest address width */
-
- /* adjusted guest address width, 0 is level 2 30-bit */
- int agaw;
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
- u64 max_addr; /* maximum mapped address */
+ union {
+ /* DMA remapping domain */
+ struct {
+ /* virtual address */
+ struct dma_pte *pgd;
+ /* max guest address width */
+ int gaw;
+ /*
+ * adjusted guest address width:
+ * 0: level 2 30-bit
+ * 1: level 3 39-bit
+ * 2: level 4 48-bit
+ * 3: level 5 57-bit
+ */
+ int agaw;
+ /* maximum mapped address */
+ u64 max_addr;
+ };
+
+ /* Nested user domain */
+ struct {
+ /* parent page table which the user domain is nested on */
+ struct dmar_domain *s2_domain;
+ /* user page table pointer (in GPA) */
+ unsigned long s1_pgtbl;
+ /* page table attributes */
+ struct iommu_hwpt_vtd_s1 s1_cfg;
+ };
+ };
struct iommu_domain domain; /* generic domain data structure for
iommu core */
struct iopf_queue *iopf_queue;
unsigned char iopfq_name[16];
struct q_inval *qi; /* Queued invalidation info */
- u32 *iommu_state; /* Store iommu states between suspend and resume.*/
+ u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/
#ifdef CONFIG_IRQ_REMAP
struct ir_table *ir_table; /* Interrupt remapping info */
return (pte->val & 3) != 0;
}
+ static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte,
+ unsigned long flags)
+ {
+ if (flags & IOMMU_DIRTY_NO_CLEAR)
+ return (pte->val & DMA_SL_PTE_DIRTY) != 0;
+
+ return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT,
+ (unsigned long *)&pte->val);
+ }
+
static inline bool dma_pte_superpage(struct dma_pte *pte)
{
return (pte->val & DMA_PTE_LARGE_PAGE);
*/
#define QI_OPT_WAIT_DRAIN BIT(0)
+ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
+ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
+ void device_block_translation(struct device *dev);
+ int prepare_domain_attach_device(struct iommu_domain *domain,
+ struct device *dev);
+ void domain_update_iommu_cap(struct dmar_domain *domain);
+
int dmar_ir_support(void);
void *alloc_pgtable_page(int node, gfp_t gfp);
void free_pgtable_page(void *vaddr);
void iommu_flush_write_buffer(struct intel_iommu *iommu);
struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
+ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
+ const struct iommu_user_data *user_data);
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
/* Device specification max LOAD size */
#define MAX_LOAD_SIZE (BIT_ULL(__mlx5_bit_sz(load_vhca_state_in, size)) - 1)
+#define MAX_CHUNK_SIZE SZ_8M
+
static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev)
{
struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
return found ? buf : NULL;
}
+static void mlx5vf_buf_read_done(struct mlx5_vhca_data_buffer *vhca_buf)
+{
+ struct mlx5_vf_migration_file *migf = vhca_buf->migf;
+
+ if (vhca_buf->stop_copy_chunk_num) {
+ bool is_header = vhca_buf->dma_dir == DMA_NONE;
+ u8 chunk_num = vhca_buf->stop_copy_chunk_num;
+ size_t next_required_umem_size = 0;
+
+ if (is_header)
+ migf->buf_header[chunk_num - 1] = vhca_buf;
+ else
+ migf->buf[chunk_num - 1] = vhca_buf;
+
+ spin_lock_irq(&migf->list_lock);
+ list_del_init(&vhca_buf->buf_elm);
+ if (!is_header) {
+ next_required_umem_size =
+ migf->next_required_umem_size;
+ migf->next_required_umem_size = 0;
+ migf->num_ready_chunks--;
+ }
+ spin_unlock_irq(&migf->list_lock);
+ if (next_required_umem_size)
+ mlx5vf_mig_file_set_save_work(migf, chunk_num,
+ next_required_umem_size);
+ return;
+ }
+
+ spin_lock_irq(&migf->list_lock);
+ list_del_init(&vhca_buf->buf_elm);
+ list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list);
+ spin_unlock_irq(&migf->list_lock);
+}
+
static ssize_t mlx5vf_buf_read(struct mlx5_vhca_data_buffer *vhca_buf,
char __user **buf, size_t *len, loff_t *pos)
{
copy_len -= page_len;
}
- if (*pos >= vhca_buf->start_pos + vhca_buf->length) {
- spin_lock_irq(&vhca_buf->migf->list_lock);
- list_del_init(&vhca_buf->buf_elm);
- list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list);
- spin_unlock_irq(&vhca_buf->migf->list_lock);
- }
+ if (*pos >= vhca_buf->start_pos + vhca_buf->length)
+ mlx5vf_buf_read_done(vhca_buf);
return done;
}
wake_up_interruptible(&migf->poll_wait);
}
-static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf)
+void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf,
+ u8 chunk_num, size_t next_required_umem_size)
+{
+ migf->save_data[chunk_num - 1].next_required_umem_size =
+ next_required_umem_size;
+ migf->save_data[chunk_num - 1].migf = migf;
+ get_file(migf->filp);
+ queue_work(migf->mvdev->cb_wq,
+ &migf->save_data[chunk_num - 1].work);
+}
+
+static struct mlx5_vhca_data_buffer *
+mlx5vf_mig_file_get_stop_copy_buf(struct mlx5_vf_migration_file *migf,
+ u8 index, size_t required_length)
+{
+ struct mlx5_vhca_data_buffer *buf = migf->buf[index];
+ u8 chunk_num;
+
+ WARN_ON(!buf);
+ chunk_num = buf->stop_copy_chunk_num;
+ buf->migf->buf[index] = NULL;
+ /* Checking whether the pre-allocated buffer can fit */
+ if (buf->allocated_length >= required_length)
+ return buf;
+
+ mlx5vf_put_data_buffer(buf);
+ buf = mlx5vf_get_data_buffer(buf->migf, required_length,
+ DMA_FROM_DEVICE);
+ if (IS_ERR(buf))
+ return buf;
+
+ buf->stop_copy_chunk_num = chunk_num;
+ return buf;
+}
+
+static void mlx5vf_mig_file_save_work(struct work_struct *_work)
+{
+ struct mlx5vf_save_work_data *save_data = container_of(_work,
+ struct mlx5vf_save_work_data, work);
+ struct mlx5_vf_migration_file *migf = save_data->migf;
+ struct mlx5vf_pci_core_device *mvdev = migf->mvdev;
+ struct mlx5_vhca_data_buffer *buf;
+
+ mutex_lock(&mvdev->state_mutex);
+ if (migf->state == MLX5_MIGF_STATE_ERROR)
+ goto end;
+
+ buf = mlx5vf_mig_file_get_stop_copy_buf(migf,
+ save_data->chunk_num - 1,
+ save_data->next_required_umem_size);
+ if (IS_ERR(buf))
+ goto err;
+
+ if (mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false))
+ goto err_save;
+
+ goto end;
+
+err_save:
+ mlx5vf_put_data_buffer(buf);
+err:
+ mlx5vf_mark_err(migf);
+end:
+ mlx5vf_state_mutex_unlock(mvdev);
+ fput(migf->filp);
+}
+
+static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf,
+ bool track)
{
size_t size = sizeof(struct mlx5_vf_migration_header) +
sizeof(struct mlx5_vf_migration_tag_stop_copy_data);
to_buff = kmap_local_page(page);
memcpy(to_buff, &header, sizeof(header));
header_buf->length = sizeof(header);
- data.stop_copy_size = cpu_to_le64(migf->buf->allocated_length);
+ data.stop_copy_size = cpu_to_le64(migf->buf[0]->allocated_length);
memcpy(to_buff + sizeof(header), &data, sizeof(data));
header_buf->length += sizeof(data);
kunmap_local(to_buff);
spin_lock_irqsave(&migf->list_lock, flags);
list_add_tail(&header_buf->buf_elm, &migf->buf_list);
spin_unlock_irqrestore(&migf->list_lock, flags);
- migf->pre_copy_initial_bytes = size;
+ if (track)
+ migf->pre_copy_initial_bytes = size;
return 0;
err:
mlx5vf_put_data_buffer(header_buf);
return ret;
}
-static int mlx5vf_prep_stop_copy(struct mlx5_vf_migration_file *migf,
- size_t state_size)
+static int mlx5vf_prep_stop_copy(struct mlx5vf_pci_core_device *mvdev,
+ struct mlx5_vf_migration_file *migf,
+ size_t state_size, u64 full_size,
+ bool track)
{
struct mlx5_vhca_data_buffer *buf;
size_t inc_state_size;
+ int num_chunks;
int ret;
+ int i;
- /* let's be ready for stop_copy size that might grow by 10 percents */
- if (check_add_overflow(state_size, state_size / 10, &inc_state_size))
- inc_state_size = state_size;
+ if (mvdev->chunk_mode) {
+ size_t chunk_size = min_t(size_t, MAX_CHUNK_SIZE, full_size);
- buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
+ /* from firmware perspective at least 'state_size' buffer should be set */
+ inc_state_size = max(state_size, chunk_size);
+ } else {
+ if (track) {
+ /* let's be ready for stop_copy size that might grow by 10 percents */
+ if (check_add_overflow(state_size, state_size / 10, &inc_state_size))
+ inc_state_size = state_size;
+ } else {
+ inc_state_size = state_size;
+ }
+ }
- migf->buf = buf;
- buf = mlx5vf_get_data_buffer(migf,
- sizeof(struct mlx5_vf_migration_header), DMA_NONE);
- if (IS_ERR(buf)) {
- ret = PTR_ERR(buf);
- goto err;
+ /* let's not overflow the device specification max SAVE size */
+ inc_state_size = min_t(size_t, inc_state_size,
+ (BIT_ULL(__mlx5_bit_sz(save_vhca_state_in, size)) - PAGE_SIZE));
+
+ num_chunks = mvdev->chunk_mode ? MAX_NUM_CHUNKS : 1;
+ for (i = 0; i < num_chunks; i++) {
+ buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto err;
+ }
+
+ migf->buf[i] = buf;
+ buf = mlx5vf_get_data_buffer(migf,
+ sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto err;
+ }
+ migf->buf_header[i] = buf;
+ if (mvdev->chunk_mode) {
+ migf->buf[i]->stop_copy_chunk_num = i + 1;
+ migf->buf_header[i]->stop_copy_chunk_num = i + 1;
+ INIT_WORK(&migf->save_data[i].work,
+ mlx5vf_mig_file_save_work);
+ migf->save_data[i].chunk_num = i + 1;
+ }
}
- migf->buf_header = buf;
- ret = mlx5vf_add_stop_copy_header(migf);
+ ret = mlx5vf_add_stop_copy_header(migf, track);
if (ret)
- goto err_header;
+ goto err;
return 0;
-err_header:
- mlx5vf_put_data_buffer(migf->buf_header);
- migf->buf_header = NULL;
err:
- mlx5vf_put_data_buffer(migf->buf);
- migf->buf = NULL;
+ for (i = 0; i < num_chunks; i++) {
+ if (migf->buf[i]) {
+ mlx5vf_put_data_buffer(migf->buf[i]);
+ migf->buf[i] = NULL;
+ }
+ if (migf->buf_header[i]) {
+ mlx5vf_put_data_buffer(migf->buf_header[i]);
+ migf->buf_header[i] = NULL;
+ }
+ }
+
return ret;
}
* As so, the other code below is safe with the proper locks.
*/
ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &inc_length,
- MLX5VF_QUERY_INC);
+ NULL, MLX5VF_QUERY_INC);
if (ret)
goto err_state_unlock;
}
if (migf->state == MLX5_MIGF_STATE_ERROR)
return -ENODEV;
- ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length,
+ ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, NULL,
MLX5VF_QUERY_INC | MLX5VF_QUERY_FINAL);
if (ret)
goto err;
- /* Checking whether we have a matching pre-allocated buffer that can fit */
- if (migf->buf && migf->buf->allocated_length >= length) {
- buf = migf->buf;
- migf->buf = NULL;
- } else {
- buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE);
- if (IS_ERR(buf)) {
- ret = PTR_ERR(buf);
- goto err;
- }
+ buf = mlx5vf_mig_file_get_stop_copy_buf(migf, 0, length);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto err;
}
ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false);
struct mlx5_vf_migration_file *migf;
struct mlx5_vhca_data_buffer *buf;
size_t length;
+ u64 full_size;
int ret;
migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
INIT_LIST_HEAD(&migf->buf_list);
INIT_LIST_HEAD(&migf->avail_list);
spin_lock_init(&migf->list_lock);
- ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, 0);
+ ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, &full_size, 0);
+ if (ret)
+ goto out_pd;
+
+ ret = mlx5vf_prep_stop_copy(mvdev, migf, length, full_size, track);
if (ret)
goto out_pd;
if (track) {
- ret = mlx5vf_prep_stop_copy(migf, length);
- if (ret)
+ /* leave the allocated buffer ready for the stop-copy phase */
+ buf = mlx5vf_alloc_data_buffer(migf,
+ migf->buf[0]->allocated_length, DMA_FROM_DEVICE);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
goto out_pd;
- }
-
- buf = mlx5vf_alloc_data_buffer(migf, length, DMA_FROM_DEVICE);
- if (IS_ERR(buf)) {
- ret = PTR_ERR(buf);
- goto out_pd;
+ }
+ } else {
+ buf = migf->buf[0];
+ migf->buf[0] = NULL;
}
ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, track);
size_t len, loff_t *pos)
{
struct mlx5_vf_migration_file *migf = filp->private_data;
- struct mlx5_vhca_data_buffer *vhca_buf = migf->buf;
- struct mlx5_vhca_data_buffer *vhca_buf_header = migf->buf_header;
+ struct mlx5_vhca_data_buffer *vhca_buf = migf->buf[0];
+ struct mlx5_vhca_data_buffer *vhca_buf_header = migf->buf_header[0];
loff_t requested_length;
bool has_work = false;
ssize_t done = 0;
if (vhca_buf_header->allocated_length < migf->record_size) {
mlx5vf_free_data_buffer(vhca_buf_header);
- migf->buf_header = mlx5vf_alloc_data_buffer(migf,
+ migf->buf_header[0] = mlx5vf_alloc_data_buffer(migf,
migf->record_size, DMA_NONE);
- if (IS_ERR(migf->buf_header)) {
- ret = PTR_ERR(migf->buf_header);
- migf->buf_header = NULL;
+ if (IS_ERR(migf->buf_header[0])) {
+ ret = PTR_ERR(migf->buf_header[0]);
+ migf->buf_header[0] = NULL;
goto out_unlock;
}
- vhca_buf_header = migf->buf_header;
+ vhca_buf_header = migf->buf_header[0];
}
vhca_buf_header->start_pos = migf->max_pos;
if (vhca_buf->allocated_length < size) {
mlx5vf_free_data_buffer(vhca_buf);
- migf->buf = mlx5vf_alloc_data_buffer(migf,
+ migf->buf[0] = mlx5vf_alloc_data_buffer(migf,
size, DMA_TO_DEVICE);
- if (IS_ERR(migf->buf)) {
- ret = PTR_ERR(migf->buf);
- migf->buf = NULL;
+ if (IS_ERR(migf->buf[0])) {
+ ret = PTR_ERR(migf->buf[0]);
+ migf->buf[0] = NULL;
goto out_unlock;
}
- vhca_buf = migf->buf;
+ vhca_buf = migf->buf[0];
}
vhca_buf->start_pos = migf->max_pos;
goto out_pd;
}
- migf->buf = buf;
+ migf->buf[0] = buf;
if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
buf = mlx5vf_alloc_data_buffer(migf,
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
goto out_buf;
}
- migf->buf_header = buf;
+ migf->buf_header[0] = buf;
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
} else {
/* Initial state will be to read the image */
spin_lock_init(&migf->list_lock);
return migf;
out_buf:
- mlx5vf_free_data_buffer(migf->buf);
+ mlx5vf_free_data_buffer(migf->buf[0]);
out_pd:
mlx5vf_cmd_dealloc_pd(migf);
out_free:
mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
cancel_work_sync(&mvdev->saving_migf->async_data.work);
mlx5vf_disable_fd(mvdev->saving_migf);
+ wake_up_interruptible(&mvdev->saving_migf->poll_wait);
mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf);
fput(mvdev->saving_migf->filp);
mvdev->saving_migf = NULL;
if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
ret = mlx5vf_cmd_load_vhca_state(mvdev,
mvdev->resuming_migf,
- mvdev->resuming_migf->buf);
+ mvdev->resuming_migf->buf[0]);
if (ret)
return ERR_PTR(ret);
}
struct mlx5vf_pci_core_device *mvdev = container_of(
vdev, struct mlx5vf_pci_core_device, core_device.vdev);
size_t state_size;
+ u64 total_size;
int ret;
mutex_lock(&mvdev->state_mutex);
- ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
- &state_size, 0);
+ ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &state_size,
+ &total_size, 0);
if (!ret)
- *stop_copy_length = state_size;
+ *stop_copy_length = total_size;
mlx5vf_state_mutex_unlock(mvdev);
return ret;
}
module_pci_driver(mlx5vf_pci_driver);
+ MODULE_IMPORT_NS(IOMMUFD);
MODULE_LICENSE("GPL");