#define DMF_DELETING 4
#define DMF_NOFLUSH_SUSPENDING 5
+/*
+ * Work processed by per-device workqueue.
+ */
+struct dm_wq_req {
+ enum {
+ DM_WQ_FLUSH_ALL,
+ DM_WQ_FLUSH_DEFERRED,
+ } type;
+ struct work_struct work;
+ struct mapped_device *md;
+ void *context;
+};
+
struct mapped_device {
struct rw_semaphore io_lock;
- struct semaphore suspend_lock;
+ struct mutex suspend_lock;
spinlock_t pushback_lock;
rwlock_t map_lock;
atomic_t holders;
struct bio_list deferred;
struct bio_list pushback;
+ /*
+ * Processing queue (flush/barriers)
+ */
+ struct workqueue_struct *wq;
+
/*
* The current mapping.
*/
DMINFO("cleaned up");
}
-int (*_inits[])(void) __initdata = {
+static int (*_inits[])(void) __initdata = {
local_init,
dm_target_init,
dm_linear_init,
dm_stripe_init,
+ dm_kcopyd_init,
dm_interface_init,
};
-void (*_exits[])(void) = {
+static void (*_exits[])(void) = {
local_exit,
dm_target_exit,
dm_linear_exit,
dm_stripe_exit,
+ dm_kcopyd_exit,
dm_interface_exit,
};
return clone;
}
-static void __clone_and_map(struct clone_info *ci)
+static int __clone_and_map(struct clone_info *ci)
{
struct bio *clone, *bio = ci->bio;
- struct dm_target *ti = dm_table_find_target(ci->map, ci->sector);
- sector_t len = 0, max = max_io_len(ci->md, ci->sector, ti);
+ struct dm_target *ti;
+ sector_t len = 0, max;
struct dm_target_io *tio;
+ ti = dm_table_find_target(ci->map, ci->sector);
+ if (!dm_target_is_valid(ti))
+ return -EIO;
+
+ max = max_io_len(ci->md, ci->sector, ti);
+
/*
* Allocate a target io object.
*/
do {
if (offset) {
ti = dm_table_find_target(ci->map, ci->sector);
+ if (!dm_target_is_valid(ti))
+ return -EIO;
+
max = max_io_len(ci->md, ci->sector, ti);
tio = alloc_tio(ci->md);
ci->idx++;
}
+
+ return 0;
}
/*
static int __split_bio(struct mapped_device *md, struct bio *bio)
{
struct clone_info ci;
+ int error = 0;
ci.map = dm_get_table(md);
if (unlikely(!ci.map))
ci.idx = bio->bi_idx;
start_io_acct(ci.io);
- while (ci.sector_count)
- __clone_and_map(&ci);
+ while (ci.sector_count && !error)
+ error = __clone_and_map(&ci);
/* drop the extra reference count */
- dec_pending(ci.io, 0);
+ dec_pending(ci.io, error);
dm_table_put(ci.map);
return 0;
/*
* See if the device with a specific minor # is free.
*/
-static int specific_minor(struct mapped_device *md, int minor)
+static int specific_minor(int minor)
{
int r, m;
return r;
}
-static int next_free_minor(struct mapped_device *md, int *minor)
+static int next_free_minor(int *minor)
{
int r, m;
spin_lock(&_minor_lock);
r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
- if (r) {
+ if (r)
goto out;
- }
if (m >= (1 << MINORBITS)) {
idr_remove(&_minor_idr, m);
static struct mapped_device *alloc_dev(int minor)
{
int r;
- struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+ struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
void *old_md;
if (!md) {
}
if (!try_module_get(THIS_MODULE))
- goto bad0;
+ goto bad_module_get;
/* get a minor number for the dev */
if (minor == DM_ANY_MINOR)
- r = next_free_minor(md, &minor);
+ r = next_free_minor(&minor);
else
- r = specific_minor(md, minor);
+ r = specific_minor(minor);
if (r < 0)
- goto bad1;
+ goto bad_minor;
- memset(md, 0, sizeof(*md));
init_rwsem(&md->io_lock);
- init_MUTEX(&md->suspend_lock);
+ mutex_init(&md->suspend_lock);
spin_lock_init(&md->pushback_lock);
rwlock_init(&md->map_lock);
atomic_set(&md->holders, 1);
md->queue = blk_alloc_queue(GFP_KERNEL);
if (!md->queue)
- goto bad1_free_minor;
+ goto bad_queue;
md->queue->queuedata = md;
md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
if (!md->io_pool)
- goto bad2;
+ goto bad_io_pool;
md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
if (!md->tio_pool)
- goto bad3;
+ goto bad_tio_pool;
md->bs = bioset_create(16, 16);
if (!md->bs)
md->disk = alloc_disk(1);
if (!md->disk)
- goto bad4;
+ goto bad_disk;
atomic_set(&md->pending, 0);
init_waitqueue_head(&md->wait);
add_disk(md->disk);
format_dev_t(md->name, MKDEV(_major, minor));
+ md->wq = create_singlethread_workqueue("kdmflush");
+ if (!md->wq)
+ goto bad_thread;
+
/* Populate the mapping, nobody knows we exist yet */
spin_lock(&_minor_lock);
old_md = idr_replace(&_minor_idr, md, minor);
return md;
- bad4:
+bad_thread:
+ put_disk(md->disk);
+bad_disk:
bioset_free(md->bs);
- bad_no_bioset:
+bad_no_bioset:
mempool_destroy(md->tio_pool);
- bad3:
+bad_tio_pool:
mempool_destroy(md->io_pool);
- bad2:
+bad_io_pool:
blk_cleanup_queue(md->queue);
- bad1_free_minor:
+bad_queue:
free_minor(minor);
- bad1:
+bad_minor:
module_put(THIS_MODULE);
- bad0:
+bad_module_get:
kfree(md);
return NULL;
}
unlock_fs(md);
bdput(md->suspended_bdev);
}
+ destroy_workqueue(md->wq);
mempool_destroy(md->tio_pool);
mempool_destroy(md->io_pool);
bioset_free(md->bs);
list_splice_init(&md->uevent_list, &uevents);
spin_unlock_irqrestore(&md->uevent_lock, flags);
- dm_send_uevents(&uevents, &md->disk->kobj);
+ dm_send_uevents(&uevents, &md->disk->dev.kobj);
atomic_inc(&md->event_nr);
wake_up(&md->eventq);
}
EXPORT_SYMBOL_GPL(dm_put);
+static int dm_wait_for_completion(struct mapped_device *md)
+{
+ int r = 0;
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ smp_mb();
+ if (!atomic_read(&md->pending))
+ break;
+
+ if (signal_pending(current)) {
+ r = -EINTR;
+ break;
+ }
+
+ io_schedule();
+ }
+ set_current_state(TASK_RUNNING);
+
+ return r;
+}
+
/*
* Process the deferred bios
*/
-static void __flush_deferred_io(struct mapped_device *md, struct bio *c)
+static void __flush_deferred_io(struct mapped_device *md)
{
- struct bio *n;
+ struct bio *c;
- while (c) {
- n = c->bi_next;
- c->bi_next = NULL;
+ while ((c = bio_list_pop(&md->deferred))) {
if (__split_bio(md, c))
bio_io_error(c);
- c = n;
}
+
+ clear_bit(DMF_BLOCK_IO, &md->flags);
+}
+
+static void __merge_pushback_list(struct mapped_device *md)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&md->pushback_lock, flags);
+ clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
+ bio_list_merge_head(&md->deferred, &md->pushback);
+ bio_list_init(&md->pushback);
+ spin_unlock_irqrestore(&md->pushback_lock, flags);
+}
+
+static void dm_wq_work(struct work_struct *work)
+{
+ struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
+ struct mapped_device *md = req->md;
+
+ down_write(&md->io_lock);
+ switch (req->type) {
+ case DM_WQ_FLUSH_ALL:
+ __merge_pushback_list(md);
+ /* pass through */
+ case DM_WQ_FLUSH_DEFERRED:
+ __flush_deferred_io(md);
+ break;
+ default:
+ DMERR("dm_wq_work: unrecognised work type %d", req->type);
+ BUG();
+ }
+ up_write(&md->io_lock);
+}
+
+static void dm_wq_queue(struct mapped_device *md, int type, void *context,
+ struct dm_wq_req *req)
+{
+ req->type = type;
+ req->md = md;
+ req->context = context;
+ INIT_WORK(&req->work, dm_wq_work);
+ queue_work(md->wq, &req->work);
+}
+
+static void dm_queue_flush(struct mapped_device *md, int type, void *context)
+{
+ struct dm_wq_req req;
+
+ dm_wq_queue(md, type, context, &req);
+ flush_workqueue(md->wq);
}
/*
{
int r = -EINVAL;
- down(&md->suspend_lock);
+ mutex_lock(&md->suspend_lock);
/* device must be suspended */
if (!dm_suspended(md))
r = __bind(md, table);
out:
- up(&md->suspend_lock);
+ mutex_unlock(&md->suspend_lock);
return r;
}
int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
{
struct dm_table *map = NULL;
- unsigned long flags;
DECLARE_WAITQUEUE(wait, current);
- struct bio *def;
- int r = -EINVAL;
+ int r = 0;
int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
- down(&md->suspend_lock);
+ mutex_lock(&md->suspend_lock);
- if (dm_suspended(md))
+ if (dm_suspended(md)) {
+ r = -EINVAL;
goto out_unlock;
+ }
map = dm_get_table(md);
r = -ENOMEM;
goto flush_and_out;
}
- }
- /*
- * Flush I/O to the device.
- * noflush supersedes do_lockfs, because lock_fs() needs to flush I/Os.
- */
- if (do_lockfs && !noflush) {
- r = lock_fs(md);
- if (r)
- goto out;
+ /*
+ * Flush I/O to the device. noflush supersedes do_lockfs,
+ * because lock_fs() needs to flush I/Os.
+ */
+ if (do_lockfs) {
+ r = lock_fs(md);
+ if (r)
+ goto out;
+ }
}
/*
dm_table_unplug_all(map);
/*
- * Then we wait for the already mapped ios to
- * complete.
+ * Wait for the already-mapped ios to complete.
*/
- while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
-
- if (!atomic_read(&md->pending) || signal_pending(current))
- break;
-
- io_schedule();
- }
- set_current_state(TASK_RUNNING);
+ r = dm_wait_for_completion(md);
down_write(&md->io_lock);
remove_wait_queue(&md->wait, &wait);
- if (noflush) {
- spin_lock_irqsave(&md->pushback_lock, flags);
- clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
- bio_list_merge_head(&md->deferred, &md->pushback);
- bio_list_init(&md->pushback);
- spin_unlock_irqrestore(&md->pushback_lock, flags);
- }
+ if (noflush)
+ __merge_pushback_list(md);
+ up_write(&md->io_lock);
/* were we interrupted ? */
- r = -EINTR;
- if (atomic_read(&md->pending)) {
- clear_bit(DMF_BLOCK_IO, &md->flags);
- def = bio_list_get(&md->deferred);
- __flush_deferred_io(md, def);
- up_write(&md->io_lock);
+ if (r < 0) {
+ dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
+
unlock_fs(md);
goto out; /* pushback list is already flushed, so skip flush */
}
- up_write(&md->io_lock);
dm_table_postsuspend_targets(map);
set_bit(DMF_SUSPENDED, &md->flags);
- r = 0;
-
flush_and_out:
- if (r && noflush) {
+ if (r && noflush)
/*
* Because there may be already I/Os in the pushback list,
* flush them before return.
*/
- down_write(&md->io_lock);
-
- spin_lock_irqsave(&md->pushback_lock, flags);
- clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
- bio_list_merge_head(&md->deferred, &md->pushback);
- bio_list_init(&md->pushback);
- spin_unlock_irqrestore(&md->pushback_lock, flags);
-
- def = bio_list_get(&md->deferred);
- __flush_deferred_io(md, def);
- up_write(&md->io_lock);
- }
+ dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL);
out:
if (r && md->suspended_bdev) {
dm_table_put(map);
out_unlock:
- up(&md->suspend_lock);
+ mutex_unlock(&md->suspend_lock);
return r;
}
int dm_resume(struct mapped_device *md)
{
int r = -EINVAL;
- struct bio *def;
struct dm_table *map = NULL;
- down(&md->suspend_lock);
+ mutex_lock(&md->suspend_lock);
if (!dm_suspended(md))
goto out;
if (r)
goto out;
- down_write(&md->io_lock);
- clear_bit(DMF_BLOCK_IO, &md->flags);
-
- def = bio_list_get(&md->deferred);
- __flush_deferred_io(md, def);
- up_write(&md->io_lock);
+ dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
unlock_fs(md);
dm_table_unplug_all(map);
- kobject_uevent(&md->disk->kobj, KOBJ_CHANGE);
+ dm_kobject_uevent(md);
r = 0;
out:
dm_table_put(map);
- up(&md->suspend_lock);
+ mutex_unlock(&md->suspend_lock);
return r;
}
/*-----------------------------------------------------------------
* Event notification.
*---------------------------------------------------------------*/
+void dm_kobject_uevent(struct mapped_device *md)
+{
+ kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE);
+}
+
uint32_t dm_next_uevent_seq(struct mapped_device *md)
{
return atomic_add_return(1, &md->uevent_seq);