Merge branch 'for-3.19/drivers' of git://git.kernel.dk/linux-block

author Linus Torvalds <[email protected]>

Sat, 13 Dec 2014 22:22:26 +0000 (14:22 -0800)

committer Linus Torvalds <[email protected]>

Sat, 13 Dec 2014 22:22:26 +0000 (14:22 -0800)
author Linus Torvalds <[email protected]>
Sat, 13 Dec 2014 22:22:26 +0000 (14:22 -0800)
committer Linus Torvalds <[email protected]>
Sat, 13 Dec 2014 22:22:26 +0000 (14:22 -0800)
diff --combined drivers/block/nvme-scsi.c

index 0b4b2775600eafc2ec1be3afae29b92f5be4345c,49f86d1a5aa25bba4bebef727545c1a385061e1e..5e78568026c339da939a33acd54cbd80891c5a10
--- 1/drivers/block/nvme-scsi.c
--- 2/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@@ -329,7 -329,7 +329,7 @@@ INQUIRY_EVPD_BIT_MASK) ? 1 : 0
   (GET_U32_FROM_CDB(cdb, READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET))
   
   #define IS_READ_CAP_16(cdb)                                   \
- -((cdb[0] == SERVICE_ACTION_IN && cdb[1] == SAI_READ_CAPACITY_16) ? 1 : 0)
+ +((cdb[0] == SERVICE_ACTION_IN_16 && cdb[1] == SAI_READ_CAPACITY_16) ? 1 : 0)
   
   /* Request Sense Helper Macros */
   #define GET_REQUEST_SENSE_ALLOC_LENGTH(cdb)                   \
@@@ -2105,7 -2105,7 +2105,7 @@@ static int nvme_trans_do_nvme_io(struc
   
                 nvme_offset += unit_num_blocks;
   
-               nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
+               nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
                 if (nvme_sc != NVME_SC_SUCCESS) {
                         nvme_unmap_user_pages(dev,
                                 (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
@@@ -2658,7 -2658,7 +2658,7 @@@ static int nvme_trans_start_stop(struc
                         c.common.opcode = nvme_cmd_flush;
                         c.common.nsid = cpu_to_le32(ns->ns_id);
   
-                       nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
+                       nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
                         res = nvme_trans_status_code(hdr, nvme_sc);
                         if (res)
                                 goto out;
@@@ -2686,7 -2686,7 +2686,7 @@@ static int nvme_trans_synchronize_cache
         c.common.opcode = nvme_cmd_flush;
         c.common.nsid = cpu_to_le32(ns->ns_id);
   
-       nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
+       nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
   
         res = nvme_trans_status_code(hdr, nvme_sc);
         if (res)
@@@ -2894,7 -2894,7 +2894,7 @@@ static int nvme_trans_unmap(struct nvme
         c.dsm.nr = cpu_to_le32(ndesc - 1);
         c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
   
-       nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
+       nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
         res = nvme_trans_status_code(hdr, nvme_sc);
   
         dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
@@@ -2915,6 -2915,14 +2915,14 @@@ static int nvme_scsi_translate(struct n
         if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
                 return -EFAULT;
   
+       /*
+        * Prime the hdr with good status for scsi commands that don't require
+        * an nvme command for translation.
+        */
+       retcode = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
+       if (retcode)
+               return retcode;
+ 
         opcode = cmd[0];
   
         switch (opcode) {
@@@ -2947,7 -2955,7 +2955,7 @@@
         case READ_CAPACITY:
                 retcode = nvme_trans_read_capacity(ns, hdr, cmd);
                 break;
- -      case SERVICE_ACTION_IN:
+ +      case SERVICE_ACTION_IN_16:
                 if (IS_READ_CAP_16(cmd))
                         retcode = nvme_trans_read_capacity(ns, hdr, cmd);
                 else
@@@ -3016,152 -3024,6 +3024,6 @@@ int nvme_sg_io(struct nvme_ns *ns, stru
         return retcode;
   }
   
- #ifdef CONFIG_COMPAT
- typedef struct sg_io_hdr32 {
-       compat_int_t interface_id;      /* [i] 'S' for SCSI generic (required) */
-       compat_int_t dxfer_direction;   /* [i] data transfer direction  */
-       unsigned char cmd_len;          /* [i] SCSI command length ( <= 16 bytes) */
-       unsigned char mx_sb_len;                /* [i] max length to write to sbp */
-       unsigned short iovec_count;     /* [i] 0 implies no scatter gather */
-       compat_uint_t dxfer_len;                /* [i] byte count of data transfer */
-       compat_uint_t dxferp;           /* [i], [*io] points to data transfer memory
-                                             or scatter gather list */
-       compat_uptr_t cmdp;             /* [i], [*i] points to command to perform */
-       compat_uptr_t sbp;              /* [i], [*o] points to sense_buffer memory */
-       compat_uint_t timeout;          /* [i] MAX_UINT->no timeout (unit: millisec) */
-       compat_uint_t flags;            /* [i] 0 -> default, see SG_FLAG... */
-       compat_int_t pack_id;           /* [i->o] unused internally (normally) */
-       compat_uptr_t usr_ptr;          /* [i->o] unused internally */
-       unsigned char status;           /* [o] scsi status */
-       unsigned char masked_status;    /* [o] shifted, masked scsi status */
-       unsigned char msg_status;               /* [o] messaging level data (optional) */
-       unsigned char sb_len_wr;                /* [o] byte count actually written to sbp */
-       unsigned short host_status;     /* [o] errors from host adapter */
-       unsigned short driver_status;   /* [o] errors from software driver */
-       compat_int_t resid;             /* [o] dxfer_len - actual_transferred */
-       compat_uint_t duration;         /* [o] time taken by cmd (unit: millisec) */
-       compat_uint_t info;             /* [o] auxiliary information */
- } sg_io_hdr32_t;  /* 64 bytes long (on sparc32) */
- 
- typedef struct sg_iovec32 {
-       compat_uint_t iov_base;
-       compat_uint_t iov_len;
- } sg_iovec32_t;
- 
- static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iovec_count)
- {
-       sg_iovec_t __user *iov = (sg_iovec_t __user *) (sgio + 1);
-       sg_iovec32_t __user *iov32 = dxferp;
-       int i;
- 
-       for (i = 0; i < iovec_count; i++) {
-               u32 base, len;
- 
-               if (get_user(base, &iov32[i].iov_base) ||
-                   get_user(len, &iov32[i].iov_len) ||
-                   put_user(compat_ptr(base), &iov[i].iov_base) ||
-                   put_user(len, &iov[i].iov_len))
-                       return -EFAULT;
-       }
- 
-       if (put_user(iov, &sgio->dxferp))
-               return -EFAULT;
-       return 0;
- }
- 
- int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg)
- {
-       sg_io_hdr32_t __user *sgio32 = (sg_io_hdr32_t __user *)arg;
-       sg_io_hdr_t __user *sgio;
-       u16 iovec_count;
-       u32 data;
-       void __user *dxferp;
-       int err;
-       int interface_id;
- 
-       if (get_user(interface_id, &sgio32->interface_id))
-               return -EFAULT;
-       if (interface_id != 'S')
-               return -EINVAL;
- 
-       if (get_user(iovec_count, &sgio32->iovec_count))
-               return -EFAULT;
- 
-       {
-               void __user *top = compat_alloc_user_space(0);
-               void __user *new = compat_alloc_user_space(sizeof(sg_io_hdr_t) +
-                                      (iovec_count * sizeof(sg_iovec_t)));
-               if (new > top)
-                       return -EINVAL;
- 
-               sgio = new;
-       }
- 
-       /* Ok, now construct.  */
-       if (copy_in_user(&sgio->interface_id, &sgio32->interface_id,
-                        (2 * sizeof(int)) +
-                        (2 * sizeof(unsigned char)) +
-                        (1 * sizeof(unsigned short)) +
-                        (1 * sizeof(unsigned int))))
-               return -EFAULT;
- 
-       if (get_user(data, &sgio32->dxferp))
-               return -EFAULT;
-       dxferp = compat_ptr(data);
-       if (iovec_count) {
-               if (sg_build_iovec(sgio, dxferp, iovec_count))
-                       return -EFAULT;
-       } else {
-               if (put_user(dxferp, &sgio->dxferp))
-                       return -EFAULT;
-       }
- 
-       {
-               unsigned char __user *cmdp;
-               unsigned char __user *sbp;
- 
-               if (get_user(data, &sgio32->cmdp))
-                       return -EFAULT;
-               cmdp = compat_ptr(data);
- 
-               if (get_user(data, &sgio32->sbp))
-                       return -EFAULT;
-               sbp = compat_ptr(data);
- 
-               if (put_user(cmdp, &sgio->cmdp) ||
-                   put_user(sbp, &sgio->sbp))
-                       return -EFAULT;
-       }
- 
-       if (copy_in_user(&sgio->timeout, &sgio32->timeout,
-                        3 * sizeof(int)))
-               return -EFAULT;
- 
-       if (get_user(data, &sgio32->usr_ptr))
-               return -EFAULT;
-       if (put_user(compat_ptr(data), &sgio->usr_ptr))
-               return -EFAULT;
- 
-       err = nvme_sg_io(ns, sgio);
-       if (err >= 0) {
-               void __user *datap;
- 
-               if (copy_in_user(&sgio32->pack_id, &sgio->pack_id,
-                                sizeof(int)) ||
-                   get_user(datap, &sgio->usr_ptr) ||
-                   put_user((u32)(unsigned long)datap,
-                            &sgio32->usr_ptr) ||
-                   copy_in_user(&sgio32->status, &sgio->status,
-                                (4 * sizeof(unsigned char)) +
-                                (2 * sizeof(unsigned short)) +
-                                (3 * sizeof(int))))
-                       err = -EFAULT;
-       }
- 
-       return err;
- }
- #endif
- 
   int nvme_sg_get_version_num(int __user *ip)
   {
         return put_user(sg_version_num, ip);
diff --combined drivers/md/dm.c

index 8f37ed215b19a1c42876eb350c90866e2d390d26,b1cdf69b11e75b301f9aadcb2d348f255e3bc941..4c06585bf1657b076835c073f973b1dc780968d6
--- 1/drivers/md/dm.c
--- 2/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@@ -19,7 -19,6 +19,7 @@@
   #include <linux/idr.h>
   #include <linux/hdreg.h>
   #include <linux/delay.h>
+ +#include <linux/wait.h>
   
   #include <trace/events/block.h>
   
@@@ -118,7 -117,6 +118,7 @@@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo)
   #define DMF_NOFLUSH_SUSPENDING 5
   #define DMF_MERGE_IS_OPTIONAL 6
   #define DMF_DEFERRED_REMOVE 7
+ +#define DMF_SUSPENDED_INTERNALLY 8
   
   /*
    * A dummy definition to make RCU happy.
@@@ -142,7 -140,7 +142,7 @@@ struct mapped_device 
          * Use dm_get_live_table{_fast} or take suspend_lock for
          * dereference.
          */
- -      struct dm_table *map;
+ +      struct dm_table __rcu *map;
   
         struct list_head table_devices;
         struct mutex table_devices_lock;
@@@ -527,15 -525,14 +527,15 @@@ retry
                 goto out;
   
         tgt = dm_table_get_target(map, 0);
+ +      if (!tgt->type->ioctl)
+ +              goto out;
   
         if (dm_suspended_md(md)) {
                 r = -EAGAIN;
                 goto out;
         }
   
- -      if (tgt->type->ioctl)
- -              r = tgt->type->ioctl(tgt, cmd, arg);
+ +      r = tgt->type->ioctl(tgt, cmd, arg);
   
   out:
         dm_put_live_table(md, srcu_idx);
@@@ -605,13 -602,10 +605,10 @@@ static void end_io_acct(struct dm_io *i
         struct mapped_device *md = io->md;
         struct bio *bio = io->bio;
         unsigned long duration = jiffies - io->start_time;
-       int pending, cpu;
+       int pending;
         int rw = bio_data_dir(bio);
   
-       cpu = part_stat_lock();
-       part_round_stats(cpu, &dm_disk(md)->part0);
-       part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
-       part_stat_unlock();
+       generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);
   
         if (unlikely(dm_stats_used(&md->stats)))
                 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
@@@ -1610,9 -1604,9 +1607,9 @@@ static int dm_merge_bvec(struct request
          * Find maximum amount of I/O that won't need splitting
          */
         max_sectors = min(max_io_len(bvm->bi_sector, ti),
- -                        (sector_t) BIO_MAX_SECTORS);
+ +                        (sector_t) queue_max_sectors(q));
         max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
- -      if (max_size < 0)
+ +      if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */
                 max_size = 0;
   
         /*
@@@ -1624,10 -1618,10 +1621,10 @@@
                 max_size = ti->type->merge(ti, bvm, biovec, max_size);
         /*
          * If the target doesn't support merge method and some of the devices
- -       * provided their merge_bvec method (we know this by looking at
- -       * queue_max_hw_sectors), then we can't allow bios with multiple vector
- -       * entries.  So always set max_size to 0, and the code below allows
- -       * just one page.
+ +       * provided their merge_bvec method (we know this by looking for the
+ +       * max_hw_sectors that dm_set_device_limits may set), then we can't
+ +       * allow bios with multiple vector entries.  So always set max_size
+ +       * to 0, and the code below allows just one page.
          */
         else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
                 max_size = 0;
@@@ -1651,16 -1645,12 +1648,12 @@@ static void _dm_request(struct request_
   {
         int rw = bio_data_dir(bio);
         struct mapped_device *md = q->queuedata;
-       int cpu;
         int srcu_idx;
         struct dm_table *map;
   
         map = dm_get_live_table(md, &srcu_idx);
   
-       cpu = part_stat_lock();
-       part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
-       part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
-       part_stat_unlock();
+       generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0);
   
         /* if we're suspended, we have to queue this io for later */
         if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
@@@ -2335,7 -2325,7 +2328,7 @@@ static struct dm_table *__bind(struct m
   
         merge_is_optional = dm_table_merge_is_optional(t);
   
- -      old_map = md->map;
+ +      old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
         rcu_assign_pointer(md->map, t);
         md->immutable_target_type = dm_table_get_immutable_target_type(t);
   
@@@ -2344,8 -2334,7 +2337,8 @@@
                 set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
         else
                 clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
- -      dm_sync_table(md);
+ +      if (old_map)
+ +              dm_sync_table(md);
   
         return old_map;
   }
@@@ -2355,7 -2344,7 +2348,7 @@@
    */
   static struct dm_table *__unbind(struct mapped_device *md)
   {
- -      struct dm_table *map = md->map;
+ +      struct dm_table *map = rcu_dereference_protected(md->map, 1);
   
         if (!map)
                 return NULL;
@@@ -2720,18 -2709,36 +2713,18 @@@ static void unlock_fs(struct mapped_dev
   }
   
   /*
- - * We need to be able to change a mapping table under a mounted
- - * filesystem.  For example we might want to move some data in
- - * the background.  Before the table can be swapped with
- - * dm_bind_table, dm_suspend must be called to flush any in
- - * flight bios and ensure that any further io gets deferred.
- - */
- -/*
- - * Suspend mechanism in request-based dm.
+ + * If __dm_suspend returns 0, the device is completely quiescent
+ + * now. There is no request-processing activity. All new requests
+ + * are being added to md->deferred list.
    *
- - * 1. Flush all I/Os by lock_fs() if needed.
- - * 2. Stop dispatching any I/O by stopping the request_queue.
- - * 3. Wait for all in-flight I/Os to be completed or requeued.
- - *
- - * To abort suspend, start the request_queue.
+ + * Caller must hold md->suspend_lock
    */
- -int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
+ +static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
+ +                      unsigned suspend_flags, int interruptible)
   {
- -      struct dm_table *map = NULL;
- -      int r = 0;
- -      int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
- -      int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
- -
- -      mutex_lock(&md->suspend_lock);
- -
- -      if (dm_suspended_md(md)) {
- -              r = -EINVAL;
- -              goto out_unlock;
- -      }
- -
- -      map = md->map;
+ +      bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
+ +      bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
+ +      int r;
   
         /*
          * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
@@@ -2740,10 -2747,7 +2733,10 @@@
         if (noflush)
                 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
   
- -      /* This does not get reverted if there's an error later. */
+ +      /*
+ +       * This gets reverted if there's an error later and the targets
+ +       * provide the .presuspend_undo hook.
+ +       */
         dm_table_presuspend_targets(map);
   
         /*
@@@ -2754,10 -2758,8 +2747,10 @@@
          */
         if (!noflush && do_lockfs) {
                 r = lock_fs(md);
- -              if (r)
- -                      goto out_unlock;
+ +              if (r) {
+ +                      dm_table_presuspend_undo_targets(map);
+ +                      return r;
+ +              }
         }
   
         /*
@@@ -2773,8 -2775,7 +2766,8 @@@
          * flush_workqueue(md->wq).
          */
         set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
- -      synchronize_srcu(&md->io_barrier);
+ +      if (map)
+ +              synchronize_srcu(&md->io_barrier);
   
         /*
          * Stop md->queue before flushing md->wq in case request-based
@@@ -2790,12 -2791,11 +2783,12 @@@
          * We call dm_wait_for_completion to wait for all existing requests
          * to finish.
          */
- -      r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
+ +      r = dm_wait_for_completion(md, interruptible);
   
         if (noflush)
                 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
- -      synchronize_srcu(&md->io_barrier);
+ +      if (map)
+ +              synchronize_srcu(&md->io_barrier);
   
         /* were we interrupted ? */
         if (r < 0) {
@@@ -2805,56 -2805,14 +2798,56 @@@
                         start_queue(md->queue);
   
                 unlock_fs(md);
- -              goto out_unlock; /* pushback list is already flushed, so skip flush */
+ +              dm_table_presuspend_undo_targets(map);
+ +              /* pushback list is already flushed, so skip flush */
         }
   
- -      /*
- -       * If dm_wait_for_completion returned 0, the device is completely
- -       * quiescent now. There is no request-processing activity. All new
- -       * requests are being added to md->deferred list.
- -       */
+ +      return r;
+ +}
+ +
+ +/*
+ + * We need to be able to change a mapping table under a mounted
+ + * filesystem.  For example we might want to move some data in
+ + * the background.  Before the table can be swapped with
+ + * dm_bind_table, dm_suspend must be called to flush any in
+ + * flight bios and ensure that any further io gets deferred.
+ + */
+ +/*
+ + * Suspend mechanism in request-based dm.
+ + *
+ + * 1. Flush all I/Os by lock_fs() if needed.
+ + * 2. Stop dispatching any I/O by stopping the request_queue.
+ + * 3. Wait for all in-flight I/Os to be completed or requeued.
+ + *
+ + * To abort suspend, start the request_queue.
+ + */
+ +int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
+ +{
+ +      struct dm_table *map = NULL;
+ +      int r = 0;
+ +
+ +retry:
+ +      mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
+ +
+ +      if (dm_suspended_md(md)) {
+ +              r = -EINVAL;
+ +              goto out_unlock;
+ +      }
+ +
+ +      if (dm_suspended_internally_md(md)) {
+ +              /* already internally suspended, wait for internal resume */
+ +              mutex_unlock(&md->suspend_lock);
+ +              r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
+ +              if (r)
+ +                      return r;
+ +              goto retry;
+ +      }
+ +
+ +      map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
+ +
+ +      r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE);
+ +      if (r)
+ +              goto out_unlock;
   
         set_bit(DMF_SUSPENDED, &md->flags);
   
@@@ -2865,13 -2823,22 +2858,13 @@@ out_unlock
         return r;
   }
   
- -int dm_resume(struct mapped_device *md)
+ +static int __dm_resume(struct mapped_device *md, struct dm_table *map)
   {
- -      int r = -EINVAL;
- -      struct dm_table *map = NULL;
- -
- -      mutex_lock(&md->suspend_lock);
- -      if (!dm_suspended_md(md))
- -              goto out;
- -
- -      map = md->map;
- -      if (!map || !dm_table_get_size(map))
- -              goto out;
- -
- -      r = dm_table_resume_targets(map);
- -      if (r)
- -              goto out;
+ +      if (map) {
+ +              int r = dm_table_resume_targets(map);
+ +              if (r)
+ +                      return r;
+ +      }
   
         dm_queue_flush(md);
   
@@@ -2885,37 -2852,6 +2878,37 @@@
   
         unlock_fs(md);
   
+ +      return 0;
+ +}
+ +
+ +int dm_resume(struct mapped_device *md)
+ +{
+ +      int r = -EINVAL;
+ +      struct dm_table *map = NULL;
+ +
+ +retry:
+ +      mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
+ +
+ +      if (!dm_suspended_md(md))
+ +              goto out;
+ +
+ +      if (dm_suspended_internally_md(md)) {
+ +              /* already internally suspended, wait for internal resume */
+ +              mutex_unlock(&md->suspend_lock);
+ +              r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
+ +              if (r)
+ +                      return r;
+ +              goto retry;
+ +      }
+ +
+ +      map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
+ +      if (!map || !dm_table_get_size(map))
+ +              goto out;
+ +
+ +      r = __dm_resume(md, map);
+ +      if (r)
+ +              goto out;
+ +
         clear_bit(DMF_SUSPENDED, &md->flags);
   
         r = 0;
@@@ -2929,80 -2865,15 +2922,80 @@@ out
    * Internal suspend/resume works like userspace-driven suspend. It waits
    * until all bios finish and prevents issuing new bios to the target drivers.
    * It may be used only from the kernel.
- - *
- - * Internal suspend holds md->suspend_lock, which prevents interaction with
- - * userspace-driven suspend.
    */
   
- -void dm_internal_suspend(struct mapped_device *md)
+ +static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
   {
- -      mutex_lock(&md->suspend_lock);
+ +      struct dm_table *map = NULL;
+ +
+ +      if (dm_suspended_internally_md(md))
+ +              return; /* nested internal suspend */
+ +
+ +      if (dm_suspended_md(md)) {
+ +              set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+ +              return; /* nest suspend */
+ +      }
+ +
+ +      map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
+ +
+ +      /*
+ +       * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is
+ +       * supported.  Properly supporting a TASK_INTERRUPTIBLE internal suspend
+ +       * would require changing .presuspend to return an error -- avoid this
+ +       * until there is a need for more elaborate variants of internal suspend.
+ +       */
+ +      (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE);
+ +
+ +      set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+ +
+ +      dm_table_postsuspend_targets(map);
+ +}
+ +
+ +static void __dm_internal_resume(struct mapped_device *md)
+ +{
+ +      if (!dm_suspended_internally_md(md))
+ +              return; /* resume from nested internal suspend */
+ +
         if (dm_suspended_md(md))
+ +              goto done; /* resume from nested suspend */
+ +
+ +      /*
+ +       * NOTE: existing callers don't need to call dm_table_resume_targets
+ +       * (which may fail -- so best to avoid it for now by passing NULL map)
+ +       */
+ +      (void) __dm_resume(md, NULL);
+ +
+ +done:
+ +      clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+ +      smp_mb__after_atomic();
+ +      wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
+ +}
+ +
+ +void dm_internal_suspend_noflush(struct mapped_device *md)
+ +{
+ +      mutex_lock(&md->suspend_lock);
+ +      __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
+ +      mutex_unlock(&md->suspend_lock);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
+ +
+ +void dm_internal_resume(struct mapped_device *md)
+ +{
+ +      mutex_lock(&md->suspend_lock);
+ +      __dm_internal_resume(md);
+ +      mutex_unlock(&md->suspend_lock);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_internal_resume);
+ +
+ +/*
+ + * Fast variants of internal suspend/resume hold md->suspend_lock,
+ + * which prevents interaction with userspace-driven suspend.
+ + */
+ +
+ +void dm_internal_suspend_fast(struct mapped_device *md)
+ +{
+ +      mutex_lock(&md->suspend_lock);
+ +      if (dm_suspended_md(md) || dm_suspended_internally_md(md))
                 return;
   
         set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
@@@ -3011,9 -2882,9 +3004,9 @@@
         dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
   }
   
- -void dm_internal_resume(struct mapped_device *md)
+ +void dm_internal_resume_fast(struct mapped_device *md)
   {
- -      if (dm_suspended_md(md))
+ +      if (dm_suspended_md(md) || dm_suspended_internally_md(md))
                 goto done;
   
         dm_queue_flush(md);
@@@ -3099,11 -2970,6 +3092,11 @@@ int dm_suspended_md(struct mapped_devic
         return test_bit(DMF_SUSPENDED, &md->flags);
   }
   
+ +int dm_suspended_internally_md(struct mapped_device *md)
+ +{
+ +      return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+ +}
+ +
   int dm_test_deferred_remove_flag(struct mapped_device *md)
   {
         return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
author	Linus Torvalds <[email protected]>
	Sat, 13 Dec 2014 22:22:26 +0000 (14:22 -0800)
committer	Linus Torvalds <[email protected]>
	Sat, 13 Dec 2014 22:22:26 +0000 (14:22 -0800)
		1	2
drivers/block/nvme-scsi.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm.c	patch \|	diff1 \|	diff2 \|	blob \| history