return 0;
fbio = bio;
- cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+ cluster = blk_queue_cluster(q);
seg_size = 0;
nr_phys_segs = 0;
for_each_bio(bio) {
static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
struct bio *nxt)
{
- if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
+ if (!blk_queue_cluster(q))
return 0;
if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
int nsegs, cluster;
nsegs = 0;
- cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+ cluster = blk_queue_cluster(q);
/*
* for each bio in rq
int cpu;
cpu = part_stat_lock();
- part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+ part = req->part;
part_round_stats(cpu, part);
part_dec_in_flight(part, rq_data_dir(req));
+ hd_struct_put(part);
part_stat_unlock();
}
}
unsigned count;
unsigned total_weight;
u64 min_vdisktime;
- struct rb_node *active;
};
#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
.count = 0, .min_vdisktime = 0, }
*/
struct cfq_queue {
/* reference count */
- atomic_t ref;
+ int ref;
/* various state flags, see below */
unsigned int flags;
/* parent cfq_data */
/* group service_tree key */
u64 vdisktime;
unsigned int weight;
- bool on_st;
/* number of cfqq currently on this group */
int nr_cfqq;
struct blkio_group blkg;
#ifdef CONFIG_CFQ_GROUP_IOSCHED
struct hlist_node cfqd_node;
- atomic_t ref;
+ int ref;
#endif
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
u64 vdisktime = st->min_vdisktime;
struct cfq_group *cfqg;
- if (st->active) {
- cfqg = rb_entry_cfqg(st->active);
- vdisktime = cfqg->vdisktime;
- }
-
if (st->left) {
cfqg = rb_entry_cfqg(st->left);
vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
static inline bool cfq_slice_used(struct cfq_queue *cfqq)
{
if (cfq_cfqq_slice_new(cfqq))
- return 0;
+ return false;
if (time_before(jiffies, cfqq->slice_end))
- return 0;
+ return false;
- return 1;
+ return true;
}
/*
struct rb_node *n;
cfqg->nr_cfqq++;
- if (cfqg->on_st)
+ if (!RB_EMPTY_NODE(&cfqg->rb_node))
return;
/*
cfqg->vdisktime = st->min_vdisktime;
__cfq_group_service_tree_add(st, cfqg);
- cfqg->on_st = true;
st->total_weight += cfqg->weight;
}
{
struct cfq_rb_root *st = &cfqd->grp_service_tree;
- if (st->active == &cfqg->rb_node)
- st->active = NULL;
-
BUG_ON(cfqg->nr_cfqq < 1);
cfqg->nr_cfqq--;
return;
cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
- cfqg->on_st = false;
st->total_weight -= cfqg->weight;
if (!RB_EMPTY_NODE(&cfqg->rb_node))
cfq_rb_erase(&cfqg->rb_node, st);
* elevator which will be dropped by either elevator exit
* or cgroup deletion path depending on who is exiting first.
*/
- atomic_set(&cfqg->ref, 1);
+ cfqg->ref = 1;
/*
* Add group onto cgroup list. It might happen that bdi->dev is
- * not initiliazed yet. Initialize this new group without major
+ * not initialized yet. Initialize this new group without major
* and minor info and this info will be filled in once a new thread
* comes for IO. See code above.
*/
static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
{
- atomic_inc(&cfqg->ref);
+ cfqg->ref++;
return cfqg;
}
cfqq->cfqg = cfqg;
/* cfqq reference on cfqg */
- atomic_inc(&cfqq->cfqg->ref);
+ cfqq->cfqg->ref++;
}
static void cfq_put_cfqg(struct cfq_group *cfqg)
struct cfq_rb_root *st;
int i, j;
- BUG_ON(atomic_read(&cfqg->ref) <= 0);
- if (!atomic_dec_and_test(&cfqg->ref))
+ BUG_ON(cfqg->ref <= 0);
+ cfqg->ref--;
+ if (cfqg->ref)
return;
for_each_cfqg_st(cfqg, i, j, st)
- BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
+ BUG_ON(!RB_EMPTY_ROOT(&st->rb));
kfree(cfqg);
}
cfq_group_service_tree_del(cfqd, cfqq->cfqg);
cfqq->orig_cfqg = cfqq->cfqg;
cfqq->cfqg = &cfqd->root_group;
- atomic_inc(&cfqd->root_group.ref);
+ cfqd->root_group.ref++;
group_changed = 1;
} else if (!cfqd->cfq_group_isolation
&& cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
if (cfqq == cfqd->active_queue)
cfqd->active_queue = NULL;
- if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
- cfqd->grp_service_tree.active = NULL;
-
if (cfqd->active_cic) {
put_io_context(cfqd->active_cic->ioc);
cfqd->active_cic = NULL;
* in their service tree.
*/
if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
- return 1;
+ return true;
cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
service_tree->count);
- return 0;
+ return false;
}
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
int process_refs, io_refs;
io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
- process_refs = atomic_read(&cfqq->ref) - io_refs;
+ process_refs = cfqq->ref - io_refs;
BUG_ON(process_refs < 0);
return process_refs;
}
*/
if (new_process_refs >= process_refs) {
cfqq->new_cfqq = new_cfqq;
- atomic_add(process_refs, &new_cfqq->ref);
+ new_cfqq->ref += process_refs;
} else {
new_cfqq->new_cfqq = cfqq;
- atomic_add(new_process_refs, &cfqq->ref);
+ cfqq->ref += new_process_refs;
}
}
unsigned count;
struct cfq_rb_root *st;
unsigned group_slice;
-
- if (!cfqg) {
- cfqd->serving_prio = IDLE_WORKLOAD;
- cfqd->workload_expires = jiffies + 1;
- return;
- }
+ enum wl_prio_t original_prio = cfqd->serving_prio;
/* Choose next priority. RT > BE > IDLE */
if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
return;
}
+ if (original_prio != cfqd->serving_prio)
+ goto new_workload;
+
/*
* For RT and BE, we have to choose also the type
* (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
if (count && !time_after(jiffies, cfqd->workload_expires))
return;
+ new_workload:
/* otherwise select new workload type */
cfqd->serving_type =
cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
if (RB_EMPTY_ROOT(&st->rb))
return NULL;
cfqg = cfq_rb_first_group(st);
- st->active = &cfqg->rb_node;
update_min_vdisktime(st);
return cfqg;
}
goto keep_queue;
}
+ /*
+ * This is a deep seek queue, but the device is much faster than
+ * the queue can deliver, don't idle
+ **/
+ if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
+ (cfq_cfqq_slice_new(cfqq) ||
+ (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
+ cfq_clear_cfqq_deep(cfqq);
+ cfq_clear_cfqq_idle_window(cfqq);
+ }
+
if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
cfqq = NULL;
goto keep_queue;
{
/* the queue hasn't finished any request, can't estimate */
if (cfq_cfqq_slice_new(cfqq))
- return 1;
+ return true;
if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
cfqq->slice_end))
- return 1;
+ return true;
- return 0;
+ return false;
}
static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
struct cfq_data *cfqd = cfqq->cfqd;
struct cfq_group *cfqg, *orig_cfqg;
- BUG_ON(atomic_read(&cfqq->ref) <= 0);
+ BUG_ON(cfqq->ref <= 0);
- if (!atomic_dec_and_test(&cfqq->ref))
+ cfqq->ref--;
+ if (cfqq->ref)
return;
cfq_log_cfqq(cfqd, cfqq, "put_queue");
RB_CLEAR_NODE(&cfqq->p_node);
INIT_LIST_HEAD(&cfqq->fifo);
- atomic_set(&cfqq->ref, 0);
+ cfqq->ref = 0;
cfqq->cfqd = cfqd;
cfq_mark_cfqq_prio_changed(cfqq);
* pin the queue now that it's allocated, scheduler exit will prune it
*/
if (!is_sync && !(*async_cfqq)) {
- atomic_inc(&cfqq->ref);
+ cfqq->ref++;
*async_cfqq = cfqq;
}
- atomic_inc(&cfqq->ref);
+ cfqq->ref++;
return cfqq;
}
if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
return true;
+ /* An idle queue should not be idle now for some reason */
+ if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
+ return true;
+
if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
return false;
}
cfqq->allocated[rw]++;
- atomic_inc(&cfqq->ref);
-
- spin_unlock_irqrestore(q->queue_lock, flags);
-
+ cfqq->ref++;
rq->elevator_private = cic;
rq->elevator_private2 = cfqq;
rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
+
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
return 0;
queue_fail:
if (!cfqd)
return NULL;
+ /*
+ * Don't need take queue_lock in the routine, since we are
+ * initializing the ioscheduler, and nobody is using cfqd
+ */
cfqd->cic_index = i;
/* Init root service tree */
* Take a reference to root group which we never drop. This is just
* to make sure that cfq_put_cfqg() does not try to kfree root group
*/
- atomic_set(&cfqg->ref, 1);
+ cfqg->ref = 1;
rcu_read_lock();
cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
(void *)cfqd, 0);
* will not attempt to free it.
*/
cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
- atomic_inc(&cfqd->oom_cfqq.ref);
+ cfqd->oom_cfqq.ref++;
cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
INIT_LIST_HEAD(&cfqd->cic_list);
#include <linux/hdreg.h>
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
-#include <linux/smp_lock.h>
#include <linux/blktrace_api.h>
#include <asm/uaccess.h>
return -EINVAL;
if (get_user(n, (int __user *) arg))
return -EFAULT;
- if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0)
+ if (!(mode & FMODE_EXCL) &&
+ blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
return -EBUSY;
ret = set_blocksize(bdev, n);
if (!(mode & FMODE_EXCL))
- bd_release(bdev);
+ blkdev_put(bdev, mode | FMODE_EXCL);
return ret;
case BLKPG:
ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
BUG_ON(d->dm_dev.bdev);
- bdev = open_by_devnum(dev, d->dm_dev.mode);
+ bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
- r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md));
- if (r)
- blkdev_put(bdev, d->dm_dev.mode);
- else
- d->dm_dev.bdev = bdev;
- return r;
+
+ r = bd_link_disk_holder(bdev, dm_disk(md));
+ if (r) {
+ blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL);
+ return r;
+ }
+
+ d->dm_dev.bdev = bdev;
+ return 0;
}
/*
if (!d->dm_dev.bdev)
return;
- bd_release_from_disk(d->dm_dev.bdev, dm_disk(md));
- blkdev_put(d->dm_dev.bdev, d->dm_dev.mode);
+ blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL);
d->dm_dev.bdev = NULL;
}
*/
if (q->merge_bvec_fn && !ti->type->merge)
- limits->max_sectors =
- min_not_zero(limits->max_sectors,
- (unsigned int) (PAGE_SIZE >> 9));
+ blk_limits_max_hw_sectors(limits,
+ (unsigned int) (PAGE_SIZE >> 9));
return 0;
}
EXPORT_SYMBOL_GPL(dm_set_device_limits);
*/
q->limits = *limits;
- if (limits->no_cluster)
- queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
- else
- queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
-
if (!dm_table_supports_discards(t))
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
else
bio_put(bio);
}
-static void submit_flushes(mddev_t *mddev)
+static void md_submit_flush_data(struct work_struct *ws);
+
+static void submit_flushes(struct work_struct *ws)
{
+ mddev_t *mddev = container_of(ws, mddev_t, flush_work);
mdk_rdev_t *rdev;
+ INIT_WORK(&mddev->flush_work, md_submit_flush_data);
+ atomic_set(&mddev->flush_pending, 1);
rcu_read_lock();
list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 &&
rdev_dec_pending(rdev, mddev);
}
rcu_read_unlock();
+ if (atomic_dec_and_test(&mddev->flush_pending))
+ queue_work(md_wq, &mddev->flush_work);
}
static void md_submit_flush_data(struct work_struct *ws)
mddev_t *mddev = container_of(ws, mddev_t, flush_work);
struct bio *bio = mddev->flush_bio;
- atomic_set(&mddev->flush_pending, 1);
-
if (bio->bi_size == 0)
/* an empty barrier - all done */
bio_endio(bio, 0);
if (mddev->pers->make_request(mddev, bio))
generic_make_request(bio);
}
- if (atomic_dec_and_test(&mddev->flush_pending)) {
- mddev->flush_bio = NULL;
- wake_up(&mddev->sb_wait);
- }
+
+ mddev->flush_bio = NULL;
+ wake_up(&mddev->sb_wait);
}
void md_flush_request(mddev_t *mddev, struct bio *bio)
mddev->flush_bio = bio;
spin_unlock_irq(&mddev->write_lock);
- atomic_set(&mddev->flush_pending, 1);
- INIT_WORK(&mddev->flush_work, md_submit_flush_data);
-
- submit_flushes(mddev);
-
- if (atomic_dec_and_test(&mddev->flush_pending))
- queue_work(md_wq, &mddev->flush_work);
+ INIT_WORK(&mddev->flush_work, submit_flushes);
+ queue_work(md_wq, &mddev->flush_work);
}
EXPORT_SYMBOL(md_flush_request);
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
rdev->sb_page);
md_super_wait(rdev->mddev);
- return num_sectors / 2; /* kB for sysfs */
+ return num_sectors;
}
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
rdev->sb_page);
md_super_wait(rdev->mddev);
- return num_sectors / 2; /* kB for sysfs */
+ return num_sectors;
}
static struct super_type super_types[] = {
rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
list_add_rcu(&rdev->same_set, &mddev->disks);
- bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
+ bd_link_disk_holder(rdev->bdev, mddev->gendisk);
/* May as well allow recovery to be retried once */
mddev->recovery_disabled = 0;
MD_BUG();
return;
}
- bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
list_del_rcu(&rdev->same_set);
printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
rdev->mddev = NULL;
struct block_device *bdev;
char b[BDEVNAME_SIZE];
- bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+ bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+ shared ? (mdk_rdev_t *)lock_rdev : rdev);
if (IS_ERR(bdev)) {
printk(KERN_ERR "md: could not open %s.\n",
__bdevname(dev, b));
return PTR_ERR(bdev);
}
- err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
- if (err) {
- printk(KERN_ERR "md: could not bd_claim %s.\n",
- bdevname(bdev, b));
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
- return err;
- }
if (!shared)
set_bit(AllReserved, &rdev->flags);
rdev->bdev = bdev;
rdev->bdev = NULL;
if (!bdev)
MD_BUG();
- bd_release(bdev);
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
void md_autodetect_dev(dev_t dev);
goto abort;
mddev->queue->queuedata = mddev;
- /* Can be unlocked because the queue is new: no concurrency */
- queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
-
blk_queue_make_request(mddev->queue, md_make_request);
disk = alloc_disk(1 << shift);
if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &md_bitmap_group))
printk(KERN_DEBUG "pointless warning\n");
+
+ blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
abort:
mutex_unlock(&disks_mutex);
if (!error && mddev->kobj.sd) {
PTR_ERR(rdev));
return PTR_ERR(rdev);
}
- /* set save_raid_disk if appropriate */
+ /* set saved_raid_disk if appropriate */
if (!mddev->persistent) {
if (info->state & (1<<MD_DISK_SYNC) &&
info->raid_disk < mddev->raid_disks)
} else
super_types[mddev->major_version].
validate_super(mddev, rdev);
- rdev->saved_raid_disk = rdev->raid_disk;
+ if (test_bit(In_sync, &rdev->flags))
+ rdev->saved_raid_disk = rdev->raid_disk;
+ else
+ rdev->saved_raid_disk = -1;
clear_bit(In_sync, &rdev->flags); /* just to be sure */
if (info->state & (1<<MD_DISK_WRITEMOSTLY))
|| kthread_should_stop(),
thread->timeout);
- clear_bit(THREAD_WAKEUP, &thread->flags);
-
- thread->run(thread->mddev);
+ if (test_and_clear_bit(THREAD_WAKEUP, &thread->flags))
+ thread->run(thread->mddev);
}
return 0;
}
if (scsi_target_is_busy(starget)) {
- if (list_empty(&sdev->starved_entry)) {
+ if (list_empty(&sdev->starved_entry))
list_add_tail(&sdev->starved_entry,
&shost->starved_list);
- return 0;
- }
+ return 0;
}
/* We're OK to process the command, so we can't be starved */
INIT_LIST_HEAD(&cmd->eh_entry);
- /*
- * Set the serial numbers back to zero
- */
- cmd->serial_number = 0;
-
atomic_inc(&cmd->device->iodone_cnt);
if (cmd->result)
atomic_inc(&cmd->device->ioerr_cnt);
blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
- /* New queue, no concurrency on queue_flags */
if (!shost->use_clustering)
- queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
+ q->limits.cluster = 0;
/*
* set a reasonable default alignment on word boundaries: the
* in.
*
* Returns zero if unsuccessful or an error if TUR failed. For
- * removable media, a return of NOT_READY or UNIT_ATTENTION is
- * translated to success, with the ->changed flag updated.
+ * removable media, UNIT_ATTENTION sets ->changed flag.
**/
int
scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
} while (scsi_sense_valid(sshdr) &&
sshdr->sense_key == UNIT_ATTENTION && --retries);
- if (!sshdr)
- /* could not allocate sense buffer, so can't process it */
- return result;
-
- if (sdev->removable && scsi_sense_valid(sshdr) &&
- (sshdr->sense_key == UNIT_ATTENTION ||
- sshdr->sense_key == NOT_READY)) {
- sdev->changed = 1;
- result = 0;
- }
if (!sshdr_external)
kfree(sshdr);
return result;
#include <linux/blkdev.h>
#include <linux/blkpg.h>
#include <linux/delay.h>
-#include <linux/smp_lock.h>
#include <linux/mutex.h>
#include <linux/string_helpers.h>
#include <linux/async.h>
* quietly refuse to do anything to a changed disc until
* the changed bit has been reset
*/
- /* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */
+ /* printk("SCSI disk has been changed or is not present. Prohibiting further I/O.\n"); */
goto out;
}
*/
if (!scsi_device_online(sdp)) {
set_media_not_present(sdkp);
- retval = 1;
goto out;
}
sshdr);
}
- /*
- * Unable to test, unit probably not ready. This usually
- * means there is no disc in the drive. Mark as changed,
- * and we will figure it out later once the drive is
- * available again.
- */
- if (retval || (scsi_sense_valid(sshdr) &&
- /* 0x3a is medium not present */
- sshdr->asc == 0x3a)) {
+ if (retval) {
set_media_not_present(sdkp);
- retval = 1;
goto out;
}
*/
sdkp->media_present = 1;
- retval = sdp->changed;
- sdp->changed = 0;
out:
- if (retval != sdkp->previous_state)
+ /*
+ * Report a media change under the following conditions:
+ *
+ * Medium is present now and wasn't present before.
+ * Medium wasn't present before and is present now.
+ * Medium was present at all times, but it changed while
+ * we weren't looking (sdp->changed is set).
+ *
+ * If there was no medium before and there is no medium now then
+ * don't report a change, even if a medium was inserted and removed
+ * while we weren't looking.
+ */
+ retval = (sdkp->media_present != sdkp->previous_state ||
+ (sdkp->media_present && sdp->changed));
+ if (retval)
sdev_evt_send_simple(sdp, SDEV_EVT_MEDIA_CHANGE, GFP_KERNEL);
- sdkp->previous_state = retval;
+ sdkp->previous_state = sdkp->media_present;
+
+ /* sdp->changed indicates medium was changed or is not present */
+ sdp->changed = !sdkp->media_present;
kfree(sshdr);
return retval;
}
u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512);
u64 bad_lba;
int info_valid;
+ /*
+ * resid is optional but mostly filled in. When it's unused,
+ * its value is zero, so we assume the whole buffer transferred
+ */
+ unsigned int transferred = scsi_bufflen(scmd) - scsi_get_resid(scmd);
+ unsigned int good_bytes;
if (scmd->request->cmd_type != REQ_TYPE_FS)
return 0;
/* This computation should always be done in terms of
* the resolution of the device's medium.
*/
- return (bad_lba - start_lba) * scmd->device->sector_size;
+ good_bytes = (bad_lba - start_lba) * scmd->device->sector_size;
+ return min(good_bytes, transferred);
}
/**
int old_rcd = sdkp->RCD;
int old_dpofua = sdkp->DPOFUA;
- if (sdp->skip_ms_page_8)
- goto defaults;
-
- if (sdp->type == TYPE_RBC) {
+ if (sdp->skip_ms_page_8) {
+ if (sdp->type == TYPE_RBC)
+ goto defaults;
+ else {
+ modepage = 0x3F;
+ dbd = 0;
+ }
+ } else if (sdp->type == TYPE_RBC) {
modepage = 6;
dbd = 8;
} else {
*/
if (len < 3)
goto bad_sense;
- if (len > 20)
- len = 20;
-
- /* Take headers and block descriptors into account */
- len += data.header_length + data.block_descriptor_length;
- if (len > SD_BUF_SIZE)
- goto bad_sense;
+ else if (len > SD_BUF_SIZE) {
+ sd_printk(KERN_NOTICE, sdkp, "Truncating mode parameter "
+ "data from %d to %d bytes\n", len, SD_BUF_SIZE);
+ len = SD_BUF_SIZE;
+ }
/* Get the data */
res = sd_do_mode_sense(sdp, dbd, modepage, buffer, len, &data, &sshdr);
if (scsi_status_is_good(res)) {
int offset = data.header_length + data.block_descriptor_length;
- if (offset >= SD_BUF_SIZE - 2) {
- sd_printk(KERN_ERR, sdkp, "Malformed MODE SENSE response\n");
- goto defaults;
+ while (offset < len) {
+ u8 page_code = buffer[offset] & 0x3F;
+ u8 spf = buffer[offset] & 0x40;
+
+ if (page_code == 8 || page_code == 6) {
+ /* We're interested only in the first 3 bytes.
+ */
+ if (len - offset <= 2) {
+ sd_printk(KERN_ERR, sdkp, "Incomplete "
+ "mode parameter data\n");
+ goto defaults;
+ } else {
+ modepage = page_code;
+ goto Page_found;
+ }
+ } else {
+ /* Go to the next page */
+ if (spf && len - offset > 3)
+ offset += 4 + (buffer[offset+2] << 8) +
+ buffer[offset+3];
+ else if (!spf && len - offset > 1)
+ offset += 2 + buffer[offset+1];
+ else {
+ sd_printk(KERN_ERR, sdkp, "Incomplete "
+ "mode parameter data\n");
+ goto defaults;
+ }
+ }
}
- if ((buffer[offset] & 0x3f) != modepage) {
+ if (modepage == 0x3F) {
+ sd_printk(KERN_ERR, sdkp, "No Caching mode page "
+ "present\n");
+ goto defaults;
+ } else if ((buffer[offset] & 0x3f) != modepage) {
sd_printk(KERN_ERR, sdkp, "Got wrong page\n");
goto defaults;
}
-
+ Page_found:
if (modepage == 8) {
sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0);
sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0);
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/major.h>
-#include <linux/smp_lock.h>
#include <linux/device_cgroup.h>
#include <linux/highmem.h>
#include <linux/blkdev.h>
return &ei->vfs_inode;
}
-static void bdev_destroy_inode(struct inode *inode)
+static void bdev_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bdev_cachep, bdi);
}
+static void bdev_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, bdev_i_callback);
+}
+
static void init_once(void *foo)
{
struct bdev_inode *ei = (struct bdev_inode *) foo;
mutex_init(&bdev->bd_mutex);
INIT_LIST_HEAD(&bdev->bd_inodes);
INIT_LIST_HEAD(&bdev->bd_list);
- #ifdef CONFIG_SYSFS
- INIT_LIST_HEAD(&bdev->bd_holder_list);
- #endif
inode_init_once(&ei->vfs_inode);
/* Initialize mutex for freeze. */
mutex_init(&bdev->bd_fsfreeze_mutex);
static struct dentry *bd_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
+ return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576);
}
static struct file_system_type bd_type = {
else if (bdev->bd_contains == bdev)
return true; /* is a whole device which isn't held */
- else if (whole->bd_holder == bd_claim)
+ else if (whole->bd_holder == bd_may_claim)
return true; /* is a partition of a device that is being partitioned */
else if (whole->bd_holder != NULL)
return false; /* is a partition of a held device */
}
}
- /* releases bdev_lock */
- static void __bd_abort_claiming(struct block_device *whole, void *holder)
- {
- BUG_ON(whole->bd_claiming != holder);
- whole->bd_claiming = NULL;
- wake_up_bit(&whole->bd_claiming, 0);
-
- spin_unlock(&bdev_lock);
- bdput(whole);
- }
-
- /**
- * bd_abort_claiming - abort claiming a block device
- * @whole: whole block device returned by bd_start_claiming()
- * @holder: holder trying to claim @bdev
- *
- * Abort a claiming block started by bd_start_claiming(). Note that
- * @whole is not the block device to be claimed but the whole device
- * returned by bd_start_claiming().
- *
- * CONTEXT:
- * Grabs and releases bdev_lock.
- */
- static void bd_abort_claiming(struct block_device *whole, void *holder)
- {
- spin_lock(&bdev_lock);
- __bd_abort_claiming(whole, holder); /* releases bdev_lock */
- }
-
- /* increment holders when we have a legitimate claim. requires bdev_lock */
- static void __bd_claim(struct block_device *bdev, struct block_device *whole,
- void *holder)
- {
- /* note that for a whole device bd_holders
- * will be incremented twice, and bd_holder will
- * be set to bd_claim before being set to holder
- */
- whole->bd_holders++;
- whole->bd_holder = bd_claim;
- bdev->bd_holders++;
- bdev->bd_holder = holder;
- }
-
- /**
- * bd_finish_claiming - finish claiming a block device
- * @bdev: block device of interest (passed to bd_start_claiming())
- * @whole: whole block device returned by bd_start_claiming()
- * @holder: holder trying to claim @bdev
- *
- * Finish a claiming block started by bd_start_claiming().
- *
- * CONTEXT:
- * Grabs and releases bdev_lock.
- */
- static void bd_finish_claiming(struct block_device *bdev,
- struct block_device *whole, void *holder)
- {
- spin_lock(&bdev_lock);
- BUG_ON(!bd_may_claim(bdev, whole, holder));
- __bd_claim(bdev, whole, holder);
- __bd_abort_claiming(whole, holder); /* not actually an abort */
- }
-
- /**
- * bd_claim - claim a block device
- * @bdev: block device to claim
- * @holder: holder trying to claim @bdev
- *
- * Try to claim @bdev which must have been opened successfully.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * 0 if successful, -EBUSY if @bdev is already claimed.
- */
- int bd_claim(struct block_device *bdev, void *holder)
- {
- struct block_device *whole = bdev->bd_contains;
- int res;
-
- might_sleep();
-
- spin_lock(&bdev_lock);
- res = bd_prepare_to_claim(bdev, whole, holder);
- if (res == 0)
- __bd_claim(bdev, whole, holder);
- spin_unlock(&bdev_lock);
-
- return res;
- }
- EXPORT_SYMBOL(bd_claim);
-
- void bd_release(struct block_device *bdev)
- {
- spin_lock(&bdev_lock);
- if (!--bdev->bd_contains->bd_holders)
- bdev->bd_contains->bd_holder = NULL;
- if (!--bdev->bd_holders)
- bdev->bd_holder = NULL;
- spin_unlock(&bdev_lock);
- }
-
- EXPORT_SYMBOL(bd_release);
-
#ifdef CONFIG_SYSFS
- /*
- * Functions for bd_claim_by_kobject / bd_release_from_kobject
- *
- * If a kobject is passed to bd_claim_by_kobject()
- * and the kobject has a parent directory,
- * following symlinks are created:
- * o from the kobject to the claimed bdev
- * o from "holders" directory of the bdev to the parent of the kobject
- * bd_release_from_kobject() removes these symlinks.
- *
- * Example:
- * If /dev/dm-0 maps to /dev/sda, kobject corresponding to
- * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
- * /sys/block/dm-0/slaves/sda --> /sys/block/sda
- * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
- */
-
static int add_symlink(struct kobject *from, struct kobject *to)
{
- if (!from || !to)
- return 0;
return sysfs_create_link(from, to, kobject_name(to));
}
static void del_symlink(struct kobject *from, struct kobject *to)
{
- if (!from || !to)
- return;
sysfs_remove_link(from, kobject_name(to));
}
- /*
- * 'struct bd_holder' contains pointers to kobjects symlinked by
- * bd_claim_by_kobject.
- * It's connected to bd_holder_list which is protected by bdev->bd_sem.
- */
- struct bd_holder {
- struct list_head list; /* chain of holders of the bdev */
- int count; /* references from the holder */
- struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */
- struct kobject *hdev; /* e.g. "/block/dm-0" */
- struct kobject *hdir; /* e.g. "/block/sda/holders" */
- struct kobject *sdev; /* e.g. "/block/sda" */
- };
-
- /*
- * Get references of related kobjects at once.
- * Returns 1 on success. 0 on failure.
- *
- * Should call bd_holder_release_dirs() after successful use.
- */
- static int bd_holder_grab_dirs(struct block_device *bdev,
- struct bd_holder *bo)
- {
- if (!bdev || !bo)
- return 0;
-
- bo->sdir = kobject_get(bo->sdir);
- if (!bo->sdir)
- return 0;
-
- bo->hdev = kobject_get(bo->sdir->parent);
- if (!bo->hdev)
- goto fail_put_sdir;
-
- bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
- if (!bo->sdev)
- goto fail_put_hdev;
-
- bo->hdir = kobject_get(bdev->bd_part->holder_dir);
- if (!bo->hdir)
- goto fail_put_sdev;
-
- return 1;
-
- fail_put_sdev:
- kobject_put(bo->sdev);
- fail_put_hdev:
- kobject_put(bo->hdev);
- fail_put_sdir:
- kobject_put(bo->sdir);
-
- return 0;
- }
-
- /* Put references of related kobjects at once. */
- static void bd_holder_release_dirs(struct bd_holder *bo)
- {
- kobject_put(bo->hdir);
- kobject_put(bo->sdev);
- kobject_put(bo->hdev);
- kobject_put(bo->sdir);
- }
-
- static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
- {
- struct bd_holder *bo;
-
- bo = kzalloc(sizeof(*bo), GFP_KERNEL);
- if (!bo)
- return NULL;
-
- bo->count = 1;
- bo->sdir = kobj;
-
- return bo;
- }
-
- static void free_bd_holder(struct bd_holder *bo)
- {
- kfree(bo);
- }
-
/**
- * find_bd_holder - find matching struct bd_holder from the block device
+ * bd_link_disk_holder - create symlinks between holding disk and slave bdev
+ * @bdev: the claimed slave bdev
+ * @disk: the holding disk
*
- * @bdev: struct block device to be searched
- * @bo: target struct bd_holder
- *
- * Returns matching entry with @bo in @bdev->bd_holder_list.
- * If found, increment the reference count and return the pointer.
- * If not found, returns NULL.
- */
- static struct bd_holder *find_bd_holder(struct block_device *bdev,
- struct bd_holder *bo)
- {
- struct bd_holder *tmp;
-
- list_for_each_entry(tmp, &bdev->bd_holder_list, list)
- if (tmp->sdir == bo->sdir) {
- tmp->count++;
- return tmp;
- }
-
- return NULL;
- }
-
- /**
- * add_bd_holder - create sysfs symlinks for bd_claim() relationship
- *
- * @bdev: block device to be bd_claimed
- * @bo: preallocated and initialized by alloc_bd_holder()
- *
- * Add @bo to @bdev->bd_holder_list, create symlinks.
- *
- * Returns 0 if symlinks are created.
- * Returns -ve if something fails.
- */
- static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
- {
- int err;
-
- if (!bo)
- return -EINVAL;
-
- if (!bd_holder_grab_dirs(bdev, bo))
- return -EBUSY;
-
- err = add_symlink(bo->sdir, bo->sdev);
- if (err)
- return err;
-
- err = add_symlink(bo->hdir, bo->hdev);
- if (err) {
- del_symlink(bo->sdir, bo->sdev);
- return err;
- }
-
- list_add_tail(&bo->list, &bdev->bd_holder_list);
- return 0;
- }
-
- /**
- * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
+ * This functions creates the following sysfs symlinks.
*
- * @bdev: block device to be bd_claimed
- * @kobj: holder's kobject
+ * - from "slaves" directory of the holder @disk to the claimed @bdev
+ * - from "holders" directory of the @bdev to the holder @disk
*
- * If there is matching entry with @kobj in @bdev->bd_holder_list
- * and no other bd_claim() from the same kobject,
- * remove the struct bd_holder from the list, delete symlinks for it.
+ * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
+ * passed to bd_link_disk_holder(), then:
*
- * Returns a pointer to the struct bd_holder when it's removed from the list
- * and ready to be freed.
- * Returns NULL if matching claim isn't found or there is other bd_claim()
- * by the same kobject.
- */
- static struct bd_holder *del_bd_holder(struct block_device *bdev,
- struct kobject *kobj)
- {
- struct bd_holder *bo;
-
- list_for_each_entry(bo, &bdev->bd_holder_list, list) {
- if (bo->sdir == kobj) {
- bo->count--;
- BUG_ON(bo->count < 0);
- if (!bo->count) {
- list_del(&bo->list);
- del_symlink(bo->sdir, bo->sdev);
- del_symlink(bo->hdir, bo->hdev);
- bd_holder_release_dirs(bo);
- return bo;
- }
- break;
- }
- }
-
- return NULL;
- }
-
- /**
- * bd_claim_by_kobject - bd_claim() with additional kobject signature
+ * /sys/block/dm-0/slaves/sda --> /sys/block/sda
+ * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
*
- * @bdev: block device to be claimed
- * @holder: holder's signature
- * @kobj: holder's kobject
+ * The caller must have claimed @bdev before calling this function and
+ * ensure that both @bdev and @disk are valid during the creation and
+ * lifetime of these symlinks.
*
- * Do bd_claim() and if it succeeds, create sysfs symlinks between
- * the bdev and the holder's kobject.
- * Use bd_release_from_kobject() when relesing the claimed bdev.
+ * CONTEXT:
+ * Might sleep.
*
- * Returns 0 on success. (same as bd_claim())
- * Returns errno on failure.
+ * RETURNS:
+ * 0 on success, -errno on failure.
*/
- static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
- struct kobject *kobj)
+ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
- int err;
- struct bd_holder *bo, *found;
-
- if (!kobj)
- return -EINVAL;
-
- bo = alloc_bd_holder(kobj);
- if (!bo)
- return -ENOMEM;
+ int ret = 0;
mutex_lock(&bdev->bd_mutex);
- err = bd_claim(bdev, holder);
- if (err)
- goto fail;
+ WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk);
- found = find_bd_holder(bdev, bo);
- if (found)
- goto fail;
+ /* FIXME: remove the following once add_disk() handles errors */
+ if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
+ goto out_unlock;
- err = add_bd_holder(bdev, bo);
- if (err)
- bd_release(bdev);
- else
- bo = NULL;
- fail:
- mutex_unlock(&bdev->bd_mutex);
- free_bd_holder(bo);
- return err;
- }
+ ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ if (ret)
+ goto out_unlock;
- /**
- * bd_release_from_kobject - bd_release() with additional kobject signature
- *
- * @bdev: block device to be released
- * @kobj: holder's kobject
- *
- * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
- */
- static void bd_release_from_kobject(struct block_device *bdev,
- struct kobject *kobj)
- {
- if (!kobj)
- return;
+ ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
+ if (ret) {
+ del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ goto out_unlock;
+ }
- mutex_lock(&bdev->bd_mutex);
- bd_release(bdev);
- free_bd_holder(del_bd_holder(bdev, kobj));
+ bdev->bd_holder_disk = disk;
+ out_unlock:
mutex_unlock(&bdev->bd_mutex);
+ return ret;
}
+ EXPORT_SYMBOL_GPL(bd_link_disk_holder);
- /**
- * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
- *
- * @bdev: block device to be claimed
- * @holder: holder's signature
- * @disk: holder's gendisk
- *
- * Call bd_claim_by_kobject() with getting @disk->slave_dir.
- */
- int bd_claim_by_disk(struct block_device *bdev, void *holder,
- struct gendisk *disk)
+ static void bd_unlink_disk_holder(struct block_device *bdev)
{
- return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir));
- }
- EXPORT_SYMBOL_GPL(bd_claim_by_disk);
+ struct gendisk *disk = bdev->bd_holder_disk;
- /**
- * bd_release_from_disk - wrapper function for bd_release_from_kobject()
- *
- * @bdev: block device to be claimed
- * @disk: holder's gendisk
- *
- * Call bd_release_from_kobject() and put @disk->slave_dir.
- */
- void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
- {
- bd_release_from_kobject(bdev, disk->slave_dir);
- kobject_put(disk->slave_dir);
- }
- EXPORT_SYMBOL_GPL(bd_release_from_disk);
- #endif
+ bdev->bd_holder_disk = NULL;
+ if (!disk)
+ return;
- /*
- * Tries to open block device by device number. Use it ONLY if you
- * really do not have anything better - i.e. when you are behind a
- * truly sucky interface and all you are given is a device number. _Never_
- * to be used for internal purposes. If you ever need it - reconsider
- * your API.
- */
- struct block_device *open_by_devnum(dev_t dev, fmode_t mode)
- {
- struct block_device *bdev = bdget(dev);
- int err = -ENOMEM;
- if (bdev)
- err = blkdev_get(bdev, mode);
- return err ? ERR_PTR(err) : bdev;
+ del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
}
-
- EXPORT_SYMBOL(open_by_devnum);
+ #else
+ static inline void bd_unlink_disk_holder(struct block_device *bdev)
+ { }
+ #endif
/**
* flush_disk - invalidates all buffer-cache entries on a disk
{
struct gendisk *disk = bdev->bd_disk;
const struct block_device_operations *bdops = disk->fops;
+ unsigned int events;
- if (!bdops->media_changed)
- return 0;
- if (!bdops->media_changed(bdev->bd_disk))
+ events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
+ DISK_EVENT_EJECT_REQUEST);
+ if (!(events & DISK_EVENT_MEDIA_CHANGE))
return 0;
flush_disk(bdev);
return ret;
}
- int blkdev_get(struct block_device *bdev, fmode_t mode)
+ /**
+ * blkdev_get - open a block device
+ * @bdev: block_device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is
+ * open with exclusive access. Specifying %FMODE_EXCL with %NULL
+ * @holder is invalid. Exclusive opens may nest for the same @holder.
+ *
+ * On success, the reference count of @bdev is unchanged. On failure,
+ * @bdev is put.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
{
- return __blkdev_get(bdev, mode, 0);
+ struct block_device *whole = NULL;
+ int res;
+
+ WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
+
+ if ((mode & FMODE_EXCL) && holder) {
+ whole = bd_start_claiming(bdev, holder);
+ if (IS_ERR(whole)) {
+ bdput(bdev);
+ return PTR_ERR(whole);
+ }
+ }
+
+ res = __blkdev_get(bdev, mode, 0);
+
+ /* __blkdev_get() may alter read only status, check it afterwards */
+ if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+ __blkdev_put(bdev, mode, 0);
+ res = -EACCES;
+ }
+
+ if (whole) {
+ /* finish claiming */
+ mutex_lock(&bdev->bd_mutex);
+ spin_lock(&bdev_lock);
+
+ if (!res) {
+ BUG_ON(!bd_may_claim(bdev, whole, holder));
+ /*
+ * Note that for a whole device bd_holders
+ * will be incremented twice, and bd_holder
+ * will be set to bd_may_claim before being
+ * set to holder
+ */
+ whole->bd_holders++;
+ whole->bd_holder = bd_may_claim;
+ bdev->bd_holders++;
+ bdev->bd_holder = holder;
+ }
+
+ /* tell others that we're done */
+ BUG_ON(whole->bd_claiming != holder);
+ whole->bd_claiming = NULL;
+ wake_up_bit(&whole->bd_claiming, 0);
+
+ spin_unlock(&bdev_lock);
+
+ /*
+ * Block event polling for write claims. Any write
+ * holder makes the write_holder state stick until all
+ * are released. This is good enough and tracking
+ * individual writeable reference is too fragile given
+ * the way @mode is used in blkdev_get/put().
+ */
+ if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
+ bdev->bd_write_holder = true;
+ disk_block_events(bdev->bd_disk);
+ }
+
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(whole);
+ }
+
+ return res;
}
EXPORT_SYMBOL(blkdev_get);
+ /**
+ * blkdev_get_by_path - open a block device by name
+ * @path: path to the block device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open the blockdevice described by the device file at @path. @mode
+ * and @holder are identical to blkdev_get().
+ *
+ * On success, the returned block_device has reference count of one.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+ */
+ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+ void *holder)
+ {
+ struct block_device *bdev;
+ int err;
+
+ bdev = lookup_bdev(path);
+ if (IS_ERR(bdev))
+ return bdev;
+
+ err = blkdev_get(bdev, mode, holder);
+ if (err)
+ return ERR_PTR(err);
+
+ return bdev;
+ }
+ EXPORT_SYMBOL(blkdev_get_by_path);
+
+ /**
+ * blkdev_get_by_dev - open a block device by device number
+ * @dev: device number of block device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open the blockdevice described by device number @dev. @mode and
+ * @holder are identical to blkdev_get().
+ *
+ * Use it ONLY if you really do not have anything better - i.e. when
+ * you are behind a truly sucky interface and all you are given is a
+ * device number. _Never_ to be used for internal purposes. If you
+ * ever need it - reconsider your API.
+ *
+ * On success, the returned block_device has reference count of one.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+ */
+ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+ {
+ struct block_device *bdev;
+ int err;
+
+ bdev = bdget(dev);
+ if (!bdev)
+ return ERR_PTR(-ENOMEM);
+
+ err = blkdev_get(bdev, mode, holder);
+ if (err)
+ return ERR_PTR(err);
+
+ return bdev;
+ }
+ EXPORT_SYMBOL(blkdev_get_by_dev);
+
static int blkdev_open(struct inode * inode, struct file * filp)
{
- struct block_device *whole = NULL;
struct block_device *bdev;
- int res;
/*
* Preserve backwards compatibility and allow large file access
if (bdev == NULL)
return -ENOMEM;
- if (filp->f_mode & FMODE_EXCL) {
- whole = bd_start_claiming(bdev, filp);
- if (IS_ERR(whole)) {
- bdput(bdev);
- return PTR_ERR(whole);
- }
- }
-
filp->f_mapping = bdev->bd_inode->i_mapping;
- res = blkdev_get(bdev, filp->f_mode);
-
- if (whole) {
- if (res == 0)
- bd_finish_claiming(bdev, whole, filp);
- else
- bd_abort_claiming(whole, filp);
- }
-
- return res;
+ return blkdev_get(bdev, filp->f_mode, filp);
}
static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_part_count--;
if (!--bdev->bd_openers) {
+ WARN_ON_ONCE(bdev->bd_holders);
sync_blockdev(bdev);
kill_bdev(bdev);
}
int blkdev_put(struct block_device *bdev, fmode_t mode)
{
+ if (mode & FMODE_EXCL) {
+ bool bdev_free;
+
+ /*
+ * Release a claim on the device. The holder fields
+ * are protected with bdev_lock. bd_mutex is to
+ * synchronize disk_holder unlinking.
+ */
+ mutex_lock(&bdev->bd_mutex);
+ spin_lock(&bdev_lock);
+
+ WARN_ON_ONCE(--bdev->bd_holders < 0);
+ WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
+
+ /* bd_contains might point to self, check in a separate step */
+ if ((bdev_free = !bdev->bd_holders))
+ bdev->bd_holder = NULL;
+ if (!bdev->bd_contains->bd_holders)
+ bdev->bd_contains->bd_holder = NULL;
+
+ spin_unlock(&bdev_lock);
+
+ /*
+ * If this was the last claim, remove holder link and
+ * unblock evpoll if it was a write holder.
+ */
+ if (bdev_free) {
+ bd_unlink_disk_holder(bdev);
+ if (bdev->bd_write_holder) {
+ disk_unblock_events(bdev->bd_disk);
+ bdev->bd_write_holder = false;
+ } else
+ disk_check_events(bdev->bd_disk);
+ }
+
+ mutex_unlock(&bdev->bd_mutex);
+ } else
+ disk_check_events(bdev->bd_disk);
+
return __blkdev_put(bdev, mode, 0);
}
EXPORT_SYMBOL(blkdev_put);
static int blkdev_close(struct inode * inode, struct file * filp)
{
struct block_device *bdev = I_BDEV(filp->f_mapping->host);
- if (bdev->bd_holder == filp)
- bd_release(bdev);
+
return blkdev_put(bdev, filp->f_mode);
}
}
EXPORT_SYMBOL(lookup_bdev);
- /**
- * open_bdev_exclusive - open a block device by name and set it up for use
- *
- * @path: special file representing the block device
- * @mode: FMODE_... combination to pass be used
- * @holder: owner for exclusion
- *
- * Open the blockdevice described by the special file at @path, claim it
- * for the @holder.
- */
- struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
- {
- struct block_device *bdev, *whole;
- int error;
-
- bdev = lookup_bdev(path);
- if (IS_ERR(bdev))
- return bdev;
-
- whole = bd_start_claiming(bdev, holder);
- if (IS_ERR(whole)) {
- bdput(bdev);
- return whole;
- }
-
- error = blkdev_get(bdev, mode);
- if (error)
- goto out_abort_claiming;
-
- error = -EACCES;
- if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
- goto out_blkdev_put;
-
- bd_finish_claiming(bdev, whole, holder);
- return bdev;
-
- out_blkdev_put:
- blkdev_put(bdev, mode);
- out_abort_claiming:
- bd_abort_claiming(whole, holder);
- return ERR_PTR(error);
- }
-
- EXPORT_SYMBOL(open_bdev_exclusive);
-
- /**
- * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive()
- *
- * @bdev: blockdevice to close
- * @mode: mode, must match that used to open.
- *
- * This is the counterpart to open_bdev_exclusive().
- */
- void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
- {
- bd_release(bdev);
- blkdev_put(bdev, mode);
- }
-
- EXPORT_SYMBOL(close_bdev_exclusive);
-
int __invalidate_device(struct block_device *bdev)
{
struct super_block *sb = get_super(bdev);
device->fs_devices = fs_devices;
fs_devices->num_devices++;
- } else if (strcmp(device->name, path)) {
+ } else if (!device->name || strcmp(device->name, path)) {
name = kstrdup(path, GFP_NOFS);
if (!name)
return -ENOMEM;
kfree(device->name);
device->name = name;
+ if (device->missing) {
+ fs_devices->missing_devices--;
+ device->missing = 0;
+ }
}
if (found_transid > fs_devices->latest_trans) {
continue;
if (device->bdev) {
- close_bdev_exclusive(device->bdev, device->mode);
+ blkdev_put(device->bdev, device->mode);
device->bdev = NULL;
fs_devices->open_devices--;
}
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (device->bdev) {
- close_bdev_exclusive(device->bdev, device->mode);
+ blkdev_put(device->bdev, device->mode);
fs_devices->open_devices--;
}
if (device->writeable) {
int seeding = 1;
int ret = 0;
+ flags |= FMODE_EXCL;
+
list_for_each_entry(device, head, dev_list) {
if (device->bdev)
continue;
if (!device->name)
continue;
- bdev = open_bdev_exclusive(device->name, flags, holder);
+ bdev = blkdev_get_by_path(device->name, flags, holder);
if (IS_ERR(bdev)) {
printk(KERN_INFO "open %s failed\n", device->name);
goto error;
error_brelse:
brelse(bh);
error_close:
- close_bdev_exclusive(bdev, FMODE_READ);
+ blkdev_put(bdev, flags);
error:
continue;
}
mutex_lock(&uuid_mutex);
- bdev = open_bdev_exclusive(path, flags, holder);
+ flags |= FMODE_EXCL;
+ bdev = blkdev_get_by_path(path, flags, holder);
if (IS_ERR(bdev)) {
ret = PTR_ERR(bdev);
brelse(bh);
error_close:
- close_bdev_exclusive(bdev, flags);
+ blkdev_put(bdev, flags);
error:
mutex_unlock(&uuid_mutex);
return ret;
goto out;
}
} else {
- bdev = open_bdev_exclusive(device_path, FMODE_READ,
- root->fs_info->bdev_holder);
+ bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
+ root->fs_info->bdev_holder);
if (IS_ERR(bdev)) {
ret = PTR_ERR(bdev);
goto out;
device->fs_devices->num_devices--;
+ if (device->missing)
+ root->fs_info->fs_devices->missing_devices--;
+
next_device = list_entry(root->fs_info->fs_devices->devices.next,
struct btrfs_device, dev_list);
if (device->bdev == root->fs_info->sb->s_bdev)
root->fs_info->fs_devices->latest_bdev = next_device->bdev;
if (device->bdev) {
- close_bdev_exclusive(device->bdev, device->mode);
+ blkdev_put(device->bdev, device->mode);
device->bdev = NULL;
device->fs_devices->open_devices--;
}
brelse(bh);
error_close:
if (bdev)
- close_bdev_exclusive(bdev, FMODE_READ);
+ blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
out:
mutex_unlock(&root->fs_info->volume_mutex);
mutex_unlock(&uuid_mutex);
if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
return -EINVAL;
- bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
+ bdev = blkdev_get_by_path(device_path, FMODE_EXCL,
+ root->fs_info->bdev_holder);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
mutex_unlock(&root->fs_info->volume_mutex);
return ret;
error:
- close_bdev_exclusive(bdev, 0);
+ blkdev_put(bdev, FMODE_EXCL);
if (seeding_dev) {
mutex_unlock(&uuid_mutex);
up_write(&sb->s_umount);
device->devid = devid;
device->work.func = pending_bios_fn;
device->fs_devices = fs_devices;
+ device->missing = 1;
fs_devices->num_devices++;
+ fs_devices->missing_devices++;
spin_lock_init(&device->io_lock);
INIT_LIST_HEAD(&device->dev_alloc_list);
memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
device = add_missing_dev(root, devid, dev_uuid);
if (!device)
return -ENOMEM;
+ } else if (!device->missing) {
+ /*
+ * this happens when a device that was properly setup
+ * in the device info lists suddenly goes bad.
+ * device->bdev is NULL, and so we have to set
+ * device->missing to one here
+ */
+ root->fs_info->fs_devices->missing_devices++;
+ device->missing = 1;
}
}
int writeable;
int in_fs_metadata;
+ int missing;
spinlock_t io_lock;
struct block_device *bdev;
- /* the mode sent to open_bdev_exclusive */
+ /* the mode sent to blkdev_get */
fmode_t mode;
char *name;
u64 num_devices;
u64 open_devices;
u64 rw_devices;
+ u64 missing_devices;
u64 total_rw_bytes;
struct block_device *latest_bdev;
} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
/* index in the above */
- static inline int major_to_index(int major)
+ static inline int major_to_index(unsigned major)
{
return major % CHRDEV_MAJOR_HASH_SIZE;
}
return ret;
}
-int cdev_index(struct inode *inode)
-{
- int idx;
- struct kobject *kobj;
-
- kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
- if (!kobj)
- return -1;
- kobject_put(kobj);
- return idx;
-}
-
void cd_forget(struct inode *inode)
{
spin_lock(&cdev_lock);
EXPORT_SYMBOL(cdev_alloc);
EXPORT_SYMBOL(cdev_del);
EXPORT_SYMBOL(cdev_add);
-EXPORT_SYMBOL(cdev_index);
EXPORT_SYMBOL(__register_chrdev);
EXPORT_SYMBOL(__unregister_chrdev);
EXPORT_SYMBOL(directly_mappable_cdev_bdi);
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/parser.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
#include <linux/vfs.h>
void ext3_msg(struct super_block *sb, const char *prefix,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk("%sEXT3-fs (%s): ", prefix, sb->s_id);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
+
va_end(args);
}
sb->s_id);
}
-void ext3_error (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext3_error(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
ext3_handle_error(sb);
* case we take the easy way out and panic immediately.
*/
-void ext3_abort (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext3_abort(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
if (test_opt(sb, ERRORS_PANIC))
journal_abort(EXT3_SB(sb)->s_journal, -EIO);
}
-void ext3_warning (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext3_warning(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ",
- sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
}
struct block_device *bdev;
char b[BDEVNAME_SIZE];
- bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+ bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
if (IS_ERR(bdev))
goto fail;
return bdev;
*/
static int ext3_blkdev_put(struct block_device *bdev)
{
- bd_release(bdev);
- return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+ return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
return &ei->vfs_inode;
}
+static void ext3_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+}
+
static void ext3_destroy_inode(struct inode *inode)
{
if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
false);
dump_stack();
}
- kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+ call_rcu(&inode->i_rcu, ext3_i_callback);
}
static void init_once(void *foo)
goto failed_mount;
}
- if (generic_check_addressable(sb->s_blocksize_bits,
- le32_to_cpu(es->s_blocks_count))) {
+ err = generic_check_addressable(sb->s_blocksize_bits,
+ le32_to_cpu(es->s_blocks_count));
+ if (err) {
ext3_msg(sb, KERN_ERR,
"error: filesystem is too large to mount safely");
if (sizeof(sector_t) < 8)
ext3_msg(sb, KERN_ERR,
"error: CONFIG_LBDAF not enabled");
+ ret = err;
goto failed_mount;
}
if (bdev == NULL)
return NULL;
- if (bd_claim(bdev, sb)) {
- ext3_msg(sb, KERN_ERR,
- "error: failed to claim external journal device");
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
- return NULL;
- }
-
blocksize = sb->s_blocksize;
hblock = bdev_logical_block_size(bdev);
if (blocksize < hblock) {
EXT3_SB(sb)->s_journal = journal;
ext3_clear_journal_err(sb, es);
- if (journal_devnum &&
+ if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
es->s_journal_dev = cpu_to_le32(journal_devnum);
void __ext4_error(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ",
- sb->s_id, function, line, current->comm);
- vprintk(fmt, args);
- printk("\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
+ sb->s_id, function, line, current->comm, &vaf);
va_end(args);
ext4_handle_error(sb);
const char *fmt, ...)
{
va_list args;
+ struct va_format vaf;
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
es->s_last_error_block = cpu_to_le64(block);
save_error_info(inode->i_sb, function, line);
va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
inode->i_sb->s_id, function, line, inode->i_ino);
if (block)
- printk("block %llu: ", block);
- printk("comm %s: ", current->comm);
- vprintk(fmt, args);
- printk("\n");
+ printk(KERN_CONT "block %llu: ", block);
+ printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf);
va_end(args);
ext4_handle_error(inode->i_sb);
}
void ext4_error_file(struct file *file, const char *function,
- unsigned int line, const char *fmt, ...)
+ unsigned int line, ext4_fsblk_t block,
+ const char *fmt, ...)
{
va_list args;
+ struct va_format vaf;
struct ext4_super_block *es;
struct inode *inode = file->f_dentry->d_inode;
char pathname[80], *path;
es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
save_error_info(inode->i_sb, function, line);
- va_start(args, fmt);
path = d_path(&(file->f_path), pathname, sizeof(pathname));
- if (!path)
+ if (IS_ERR(path))
path = "(unknown)";
printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu "
- "(comm %s path %s): ",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, path);
- vprintk(fmt, args);
- printk("\n");
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
+ inode->i_sb->s_id, function, line, inode->i_ino);
+ if (block)
+ printk(KERN_CONT "block %llu: ", block);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf);
va_end(args);
ext4_handle_error(inode->i_sb);
panic("EXT4-fs panic from previous error\n");
}
-void ext4_msg (struct super_block * sb, const char *prefix,
- const char *fmt, ...)
+void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
- vprintk(fmt, args);
- printk("\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
va_end(args);
}
void __ext4_warning(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ",
- sb->s_id, function, line);
- vprintk(fmt, args);
- printk("\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
+ sb->s_id, function, line, &vaf);
va_end(args);
}
__releases(bitlock)
__acquires(bitlock)
{
+ struct va_format vaf;
va_list args;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
es->s_last_error_ino = cpu_to_le32(ino);
es->s_last_error_block = cpu_to_le64(block);
__save_error_info(sb, function, line);
+
va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
sb->s_id, function, line, grp);
if (ino)
- printk("inode %lu: ", ino);
+ printk(KERN_CONT "inode %lu: ", ino);
if (block)
- printk("block %llu:", (unsigned long long) block);
- vprintk(fmt, args);
- printk("\n");
+ printk(KERN_CONT "block %llu:", (unsigned long long) block);
+ printk(KERN_CONT "%pV\n", &vaf);
va_end(args);
if (test_opt(sb, ERRORS_CONT)) {
struct block_device *bdev;
char b[BDEVNAME_SIZE];
- bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+ bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
if (IS_ERR(bdev))
goto fail;
return bdev;
*/
static int ext4_blkdev_put(struct block_device *bdev)
{
- bd_release(bdev);
- return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+ return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
- /*
- * Note: We can be called before EXT4_SB(sb)->s_journal is set,
- * therefore it can be null here. Don't check it, just initialize
- * jinode.
- */
- jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0;
- ei->i_delalloc_reserved_flag = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
#ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0;
#endif
+ ei->jinode = NULL;
INIT_LIST_HEAD(&ei->i_completed_io_list);
spin_lock_init(&ei->i_completed_io_lock);
ei->cur_aio_dio = NULL;
return drop;
}
+static void ext4_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+}
+
static void ext4_destroy_inode(struct inode *inode)
{
ext4_ioend_wait(inode);
true);
dump_stack();
}
- kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+ call_rcu(&inode->i_rcu, ext4_i_callback);
}
static void init_once(void *foo)
end_writeback(inode);
dquot_drop(inode);
ext4_discard_preallocations(inode);
- if (EXT4_JOURNAL(inode))
- jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
- &EXT4_I(inode)->jinode);
+ if (EXT4_I(inode)->jinode) {
+ jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
+ EXT4_I(inode)->jinode);
+ jbd2_free_inode(EXT4_I(inode)->jinode);
+ EXT4_I(inode)->jinode = NULL;
+ }
}
static inline void ext4_show_quota_options(struct seq_file *seq,
!(def_mount_opts & EXT4_DEFM_NODELALLOC))
seq_puts(seq, ",nodelalloc");
+ if (test_opt(sb, MBLK_IO_SUBMIT))
+ seq_puts(seq, ",mblk_io_submit");
if (sbi->s_stripe)
seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
/*
.quota_write = ext4_quota_write,
#endif
.bdev_try_to_free_page = bdev_try_to_free_page,
- .trim_fs = ext4_trim_fs
};
static const struct super_operations ext4_nojournal_sops = {
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
- Opt_stripe, Opt_delalloc, Opt_nodelalloc,
- Opt_block_validity, Opt_noblock_validity,
+ Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+ Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard,
{Opt_resize, "resize"},
{Opt_delalloc, "delalloc"},
{Opt_nodelalloc, "nodelalloc"},
+ {Opt_mblk_io_submit, "mblk_io_submit"},
+ {Opt_nomblk_io_submit, "nomblk_io_submit"},
{Opt_block_validity, "block_validity"},
{Opt_noblock_validity, "noblock_validity"},
{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
sbi->s_qf_names[qtype] = NULL;
return 0;
}
- set_opt(sbi->s_mount_opt, QUOTA);
+ set_opt(sb, QUOTA);
return 1;
}
switch (token) {
case Opt_bsd_df:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- clear_opt(sbi->s_mount_opt, MINIX_DF);
+ clear_opt(sb, MINIX_DF);
break;
case Opt_minix_df:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- set_opt(sbi->s_mount_opt, MINIX_DF);
+ set_opt(sb, MINIX_DF);
break;
case Opt_grpid:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- set_opt(sbi->s_mount_opt, GRPID);
+ set_opt(sb, GRPID);
break;
case Opt_nogrpid:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- clear_opt(sbi->s_mount_opt, GRPID);
+ clear_opt(sb, GRPID);
break;
case Opt_resuid:
/* *sb_block = match_int(&args[0]); */
break;
case Opt_err_panic:
- clear_opt(sbi->s_mount_opt, ERRORS_CONT);
- clear_opt(sbi->s_mount_opt, ERRORS_RO);
- set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ clear_opt(sb, ERRORS_CONT);
+ clear_opt(sb, ERRORS_RO);
+ set_opt(sb, ERRORS_PANIC);
break;
case Opt_err_ro:
- clear_opt(sbi->s_mount_opt, ERRORS_CONT);
- clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
- set_opt(sbi->s_mount_opt, ERRORS_RO);
+ clear_opt(sb, ERRORS_CONT);
+ clear_opt(sb, ERRORS_PANIC);
+ set_opt(sb, ERRORS_RO);
break;
case Opt_err_cont:
- clear_opt(sbi->s_mount_opt, ERRORS_RO);
- clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
- set_opt(sbi->s_mount_opt, ERRORS_CONT);
+ clear_opt(sb, ERRORS_RO);
+ clear_opt(sb, ERRORS_PANIC);
+ set_opt(sb, ERRORS_CONT);
break;
case Opt_nouid32:
- set_opt(sbi->s_mount_opt, NO_UID32);
+ set_opt(sb, NO_UID32);
break;
case Opt_debug:
- set_opt(sbi->s_mount_opt, DEBUG);
+ set_opt(sb, DEBUG);
break;
case Opt_oldalloc:
- set_opt(sbi->s_mount_opt, OLDALLOC);
+ set_opt(sb, OLDALLOC);
break;
case Opt_orlov:
- clear_opt(sbi->s_mount_opt, OLDALLOC);
+ clear_opt(sb, OLDALLOC);
break;
#ifdef CONFIG_EXT4_FS_XATTR
case Opt_user_xattr:
- set_opt(sbi->s_mount_opt, XATTR_USER);
+ set_opt(sb, XATTR_USER);
break;
case Opt_nouser_xattr:
- clear_opt(sbi->s_mount_opt, XATTR_USER);
+ clear_opt(sb, XATTR_USER);
break;
#else
case Opt_user_xattr:
#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
case Opt_acl:
- set_opt(sbi->s_mount_opt, POSIX_ACL);
+ set_opt(sb, POSIX_ACL);
break;
case Opt_noacl:
- clear_opt(sbi->s_mount_opt, POSIX_ACL);
+ clear_opt(sb, POSIX_ACL);
break;
#else
case Opt_acl:
"Cannot specify journal on remount");
return 0;
}
- set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
+ set_opt(sb, UPDATE_JOURNAL);
break;
case Opt_journal_dev:
if (is_remount) {
*journal_devnum = option;
break;
case Opt_journal_checksum:
- set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
+ set_opt(sb, JOURNAL_CHECKSUM);
break;
case Opt_journal_async_commit:
- set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
- set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
+ set_opt(sb, JOURNAL_ASYNC_COMMIT);
+ set_opt(sb, JOURNAL_CHECKSUM);
break;
case Opt_noload:
- set_opt(sbi->s_mount_opt, NOLOAD);
+ set_opt(sb, NOLOAD);
break;
case Opt_commit:
if (match_int(&args[0], &option))
return 0;
}
} else {
- clear_opt(sbi->s_mount_opt, DATA_FLAGS);
+ clear_opt(sb, DATA_FLAGS);
sbi->s_mount_opt |= data_opt;
}
break;
case Opt_data_err_abort:
- set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ set_opt(sb, DATA_ERR_ABORT);
break;
case Opt_data_err_ignore:
- clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ clear_opt(sb, DATA_ERR_ABORT);
break;
#ifdef CONFIG_QUOTA
case Opt_usrjquota:
break;
case Opt_quota:
case Opt_usrquota:
- set_opt(sbi->s_mount_opt, QUOTA);
- set_opt(sbi->s_mount_opt, USRQUOTA);
+ set_opt(sb, QUOTA);
+ set_opt(sb, USRQUOTA);
break;
case Opt_grpquota:
- set_opt(sbi->s_mount_opt, QUOTA);
- set_opt(sbi->s_mount_opt, GRPQUOTA);
+ set_opt(sb, QUOTA);
+ set_opt(sb, GRPQUOTA);
break;
case Opt_noquota:
if (sb_any_quota_loaded(sb)) {
"options when quota turned on");
return 0;
}
- clear_opt(sbi->s_mount_opt, QUOTA);
- clear_opt(sbi->s_mount_opt, USRQUOTA);
- clear_opt(sbi->s_mount_opt, GRPQUOTA);
+ clear_opt(sb, QUOTA);
+ clear_opt(sb, USRQUOTA);
+ clear_opt(sb, GRPQUOTA);
break;
#else
case Opt_quota:
sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
break;
case Opt_nobarrier:
- clear_opt(sbi->s_mount_opt, BARRIER);
+ clear_opt(sb, BARRIER);
break;
case Opt_barrier:
if (args[0].from) {
} else
option = 1; /* No argument, default to 1 */
if (option)
- set_opt(sbi->s_mount_opt, BARRIER);
+ set_opt(sb, BARRIER);
else
- clear_opt(sbi->s_mount_opt, BARRIER);
+ clear_opt(sb, BARRIER);
break;
case Opt_ignore:
break;
"Ignoring deprecated bh option");
break;
case Opt_i_version:
- set_opt(sbi->s_mount_opt, I_VERSION);
+ set_opt(sb, I_VERSION);
sb->s_flags |= MS_I_VERSION;
break;
case Opt_nodelalloc:
- clear_opt(sbi->s_mount_opt, DELALLOC);
+ clear_opt(sb, DELALLOC);
+ break;
+ case Opt_mblk_io_submit:
+ set_opt(sb, MBLK_IO_SUBMIT);
+ break;
+ case Opt_nomblk_io_submit:
+ clear_opt(sb, MBLK_IO_SUBMIT);
break;
case Opt_stripe:
if (match_int(&args[0], &option))
sbi->s_stripe = option;
break;
case Opt_delalloc:
- set_opt(sbi->s_mount_opt, DELALLOC);
+ set_opt(sb, DELALLOC);
break;
case Opt_block_validity:
- set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ set_opt(sb, BLOCK_VALIDITY);
break;
case Opt_noblock_validity:
- clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ clear_opt(sb, BLOCK_VALIDITY);
break;
case Opt_inode_readahead_blks:
if (match_int(&args[0], &option))
option);
break;
case Opt_noauto_da_alloc:
- set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+ set_opt(sb, NO_AUTO_DA_ALLOC);
break;
case Opt_auto_da_alloc:
if (args[0].from) {
} else
option = 1; /* No argument, default to 1 */
if (option)
- clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
+ clear_opt(sb, NO_AUTO_DA_ALLOC);
else
- set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+ set_opt(sb,NO_AUTO_DA_ALLOC);
break;
case Opt_discard:
- set_opt(sbi->s_mount_opt, DISCARD);
+ set_opt(sb, DISCARD);
break;
case Opt_nodiscard:
- clear_opt(sbi->s_mount_opt, DISCARD);
+ clear_opt(sb, DISCARD);
break;
case Opt_dioread_nolock:
- set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ set_opt(sb, DIOREAD_NOLOCK);
break;
case Opt_dioread_lock:
- clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ clear_opt(sb, DIOREAD_NOLOCK);
break;
case Opt_init_inode_table:
- set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ set_opt(sb, INIT_INODE_TABLE);
if (args[0].from) {
if (match_int(&args[0], &option))
return 0;
sbi->s_li_wait_mult = option;
break;
case Opt_noinit_inode_table:
- clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ clear_opt(sb, INIT_INODE_TABLE);
break;
default:
ext4_msg(sb, KERN_ERR,
#ifdef CONFIG_QUOTA
if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
- clear_opt(sbi->s_mount_opt, USRQUOTA);
+ clear_opt(sb, USRQUOTA);
if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
- clear_opt(sbi->s_mount_opt, GRPQUOTA);
+ clear_opt(sb, GRPQUOTA);
if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
ext4_msg(sb, KERN_ERR, "old and new quota "
ext4_commit_super(sb, 1);
if (test_opt(sb, DEBUG))
printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
- "bpg=%lu, ipg=%lu, mo=%04x]\n",
+ "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
sb->s_blocksize,
sbi->s_groups_count,
EXT4_BLOCKS_PER_GROUP(sb),
EXT4_INODES_PER_GROUP(sb),
- sbi->s_mount_opt);
+ sbi->s_mount_opt, sbi->s_mount_opt2);
return res;
}
size = flex_group_count * sizeof(struct flex_groups);
sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
if (sbi->s_flex_groups == NULL) {
- sbi->s_flex_groups = vmalloc(size);
- if (sbi->s_flex_groups)
- memset(sbi->s_flex_groups, 0, size);
- }
- if (sbi->s_flex_groups == NULL) {
- ext4_msg(sb, KERN_ERR, "not enough memory for "
- "%u flex groups", flex_group_count);
- goto failed;
+ sbi->s_flex_groups = vzalloc(size);
+ if (sbi->s_flex_groups == NULL) {
+ ext4_msg(sb, KERN_ERR,
+ "not enough memory for %u flex groups",
+ flex_group_count);
+ goto failed;
+ }
}
for (i = 0; i < sbi->s_groups_count; i++) {
struct ext4_li_request *elr;
mutex_lock(&ext4_li_info->li_list_mtx);
- if (list_empty(&ext4_li_info->li_request_list))
- return;
-
list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
elr = list_entry(pos, struct ext4_li_request,
lr_request);
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
- int ret;
+ int ret = 0;
if (sbi->s_li_request != NULL)
return 0;
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
- set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ set_opt(sb, INIT_INODE_TABLE);
if (def_mount_opts & EXT4_DEFM_DEBUG)
- set_opt(sbi->s_mount_opt, DEBUG);
+ set_opt(sb, DEBUG);
if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
"2.6.38");
- set_opt(sbi->s_mount_opt, GRPID);
+ set_opt(sb, GRPID);
}
if (def_mount_opts & EXT4_DEFM_UID16)
- set_opt(sbi->s_mount_opt, NO_UID32);
+ set_opt(sb, NO_UID32);
#ifdef CONFIG_EXT4_FS_XATTR
if (def_mount_opts & EXT4_DEFM_XATTR_USER)
- set_opt(sbi->s_mount_opt, XATTR_USER);
+ set_opt(sb, XATTR_USER);
#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
if (def_mount_opts & EXT4_DEFM_ACL)
- set_opt(sbi->s_mount_opt, POSIX_ACL);
+ set_opt(sb, POSIX_ACL);
#endif
if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
- set_opt(sbi->s_mount_opt, JOURNAL_DATA);
+ set_opt(sb, JOURNAL_DATA);
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
- set_opt(sbi->s_mount_opt, ORDERED_DATA);
+ set_opt(sb, ORDERED_DATA);
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
- set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ set_opt(sb, WRITEBACK_DATA);
if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
- set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ set_opt(sb, ERRORS_PANIC);
else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
- set_opt(sbi->s_mount_opt, ERRORS_CONT);
+ set_opt(sb, ERRORS_CONT);
else
- set_opt(sbi->s_mount_opt, ERRORS_RO);
+ set_opt(sb, ERRORS_RO);
if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
- set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ set_opt(sb, BLOCK_VALIDITY);
if (def_mount_opts & EXT4_DEFM_DISCARD)
- set_opt(sbi->s_mount_opt, DISCARD);
+ set_opt(sb, DISCARD);
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
- set_opt(sbi->s_mount_opt, BARRIER);
+ set_opt(sb, BARRIER);
/*
* enable delayed allocation by default
*/
if (!IS_EXT3_SB(sb) &&
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
- set_opt(sbi->s_mount_opt, DELALLOC);
+ set_opt(sb, DELALLOC);
if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
&journal_devnum, &journal_ioprio, NULL, 0)) {
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
*/
- ret = generic_check_addressable(sb->s_blocksize_bits,
+ err = generic_check_addressable(sb->s_blocksize_bits,
ext4_blocks_count(es));
- if (ret) {
+ if (err) {
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely on this system");
if (sizeof(sector_t) < 8)
ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
+ ret = err;
goto failed_mount;
}
"suppressed and not mounted read-only");
goto failed_mount_wq;
} else {
- clear_opt(sbi->s_mount_opt, DATA_FLAGS);
- set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ clear_opt(sb, DATA_FLAGS);
+ set_opt(sb, WRITEBACK_DATA);
sbi->s_journal = NULL;
needs_recovery = 0;
goto no_journal;
*/
if (jbd2_journal_check_available_features
(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
- set_opt(sbi->s_mount_opt, ORDERED_DATA);
+ set_opt(sb, ORDERED_DATA);
else
- set_opt(sbi->s_mount_opt, JOURNAL_DATA);
+ set_opt(sb, JOURNAL_DATA);
break;
case EXT4_MOUNT_ORDERED_DATA:
(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
"requested data journaling mode");
- clear_opt(sbi->s_mount_opt, DELALLOC);
+ clear_opt(sb, DELALLOC);
}
if (test_opt(sb, DIOREAD_NOLOCK)) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
"option - requested data journaling mode");
- clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ clear_opt(sb, DIOREAD_NOLOCK);
}
if (sb->s_blocksize < PAGE_SIZE) {
ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
"option - block size is too small");
- clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ clear_opt(sb, DIOREAD_NOLOCK);
}
}
if (bdev == NULL)
return NULL;
- if (bd_claim(bdev, sb)) {
- ext4_msg(sb, KERN_ERR,
- "failed to claim external journal device");
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
- return NULL;
- }
-
blocksize = sb->s_blocksize;
hblock = bdev_logical_block_size(bdev);
if (blocksize < hblock) {
return 0;
}
+/*
+ * Structure to save mount options for ext4_remount's benefit
+ */
+struct ext4_mount_options {
+ unsigned long s_mount_opt;
+ unsigned long s_mount_opt2;
+ uid_t s_resuid;
+ gid_t s_resgid;
+ unsigned long s_commit_interval;
+ u32 s_min_batch_time, s_max_batch_time;
+#ifdef CONFIG_QUOTA
+ int s_jquota_fmt;
+ char *s_qf_names[MAXQUOTAS];
+#endif
+};
+
static int ext4_remount(struct super_block *sb, int *flags, char *data)
{
struct ext4_super_block *es;
lock_super(sb);
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
+ old_opts.s_mount_opt2 = sbi->s_mount_opt2;
old_opts.s_resuid = sbi->s_resuid;
old_opts.s_resgid = sbi->s_resgid;
old_opts.s_commit_interval = sbi->s_commit_interval;
restore_opts:
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
+ sbi->s_mount_opt2 = old_opts.s_mount_opt2;
sbi->s_resuid = old_opts.s_resuid;
sbi->s_resgid = old_opts.s_resgid;
sbi->s_commit_interval = old_opts.s_commit_interval;
iput(inode);
return -ENOMEM;
}
- dentry->d_op = &gfs2_dops;
*dptr = dentry;
return 0;
}
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
+ sb->s_d_op = &gfs2_dops;
sb->s_export_op = &gfs2_export_ops;
sb->s_xattr = gfs2_xattr_handlers;
sb->s_qcop = &gfs2_quotactl_ops;
{
struct block_device *bdev;
struct super_block *s;
- fmode_t mode = FMODE_READ;
+ fmode_t mode = FMODE_READ | FMODE_EXCL;
int error;
struct gfs2_args args;
struct gfs2_sbd *sdp;
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
- bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+ bdev = blkdev_get_by_path(dev_name, mode, fs_type);
if (IS_ERR(bdev))
return ERR_CAST(bdev);
goto error_bdev;
if (s->s_root)
- close_bdev_exclusive(bdev, mode);
+ blkdev_put(bdev, mode);
memset(&args, 0, sizeof(args));
args.ar_quota = GFS2_QUOTA_DEFAULT;
deactivate_locked_super(s);
return ERR_PTR(error);
error_bdev:
- close_bdev_exclusive(bdev, mode);
+ blkdev_put(bdev, mode);
return ERR_PTR(error);
}
struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
struct page *page = buf->page;
size_t size;
- int ret;
-
- ret = buf->ops->confirm(pipe, buf);
- if (unlikely(ret))
- return ret;
size = sd->len;
goto out_dput_new;
if (svc_msnfs(ffhp) &&
- ((atomic_read(&odentry->d_count) > 1)
- || (atomic_read(&ndentry->d_count) > 1))) {
+ ((odentry->d_count > 1) || (ndentry->d_count > 1))) {
host_err = -EPERM;
goto out_dput_new;
}
if (type != S_IFDIR) { /* It's UNLINK */
#ifdef MSNFS
if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
- (atomic_read(&rdentry->d_count) > 1)) {
+ (rdentry->d_count > 1)) {
host_err = -EPERM;
} else
#endif
#include <linux/crc32.h>
#include <linux/vfs.h>
#include <linux/writeback.h>
-#include <linux/kobject.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
#include "nilfs.h"
const char *fmt, ...)
{
struct nilfs_sb_info *sbi = NILFS_SB(sb);
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
if (!(sb->s_flags & MS_RDONLY)) {
void nilfs_warning(struct super_block *sb, const char *function,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "NILFS warning (device %s): %s: ",
- sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
}
return &ii->vfs_inode;
}
-void nilfs_destroy_inode(struct inode *inode)
+static void nilfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
+
if (mdi) {
kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
kfree(mdi);
kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
}
+void nilfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, nilfs_i_callback);
+}
+
static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
{
struct the_nilfs *nilfs = sbi->s_nilfs;
static int nilfs_tree_was_touched(struct dentry *root_dentry)
{
- return atomic_read(&root_dentry->d_count) > 1;
+ return root_dentry->d_count > 1;
}
/**
struct nilfs_sb_info *sbi = NILFS_SB(sb);
struct the_nilfs *nilfs = sbi->s_nilfs;
unsigned long old_sb_flags;
- struct nilfs_mount_options old_opts;
+ unsigned long old_mount_opt;
int err;
old_sb_flags = sb->s_flags;
- old_opts.mount_opt = sbi->s_mount_opt;
+ old_mount_opt = sbi->s_mount_opt;
if (!parse_options(data, sb, 1)) {
err = -EINVAL;
restore_opts:
sb->s_flags = old_sb_flags;
- sbi->s_mount_opt = old_opts.mount_opt;
+ sbi->s_mount_opt = old_mount_opt;
return err;
}
{
struct nilfs_super_data sd;
struct super_block *s;
- fmode_t mode = FMODE_READ;
+ fmode_t mode = FMODE_READ | FMODE_EXCL;
struct dentry *root_dentry;
int err, s_new = false;
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
- sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+ sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
if (IS_ERR(sd.bdev))
return ERR_CAST(sd.bdev);
}
if (!s_new)
- close_bdev_exclusive(sd.bdev, mode);
+ blkdev_put(sd.bdev, mode);
return root_dentry;
failed:
if (!s_new)
- close_bdev_exclusive(sd.bdev, mode);
+ blkdev_put(sd.bdev, mode);
return ERR_PTR(err);
}
#define O2HB_DB_TYPE_REGION_LIVENODES 4
#define O2HB_DB_TYPE_REGION_NUMBER 5
#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6
+#define O2HB_DB_TYPE_REGION_PINNED 7
struct o2hb_debug_buf {
int db_type;
int db_size;
#define O2HB_DEBUG_FAILEDREGIONS "failed_regions"
#define O2HB_DEBUG_REGION_NUMBER "num"
#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms"
+#define O2HB_DEBUG_REGION_PINNED "pinned"
static struct dentry *o2hb_debug_dir;
static struct dentry *o2hb_debug_livenodes;
unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
+/*
+ * o2hb_dependent_users tracks the number of registered callbacks that depend
+ * on heartbeat. o2net and o2dlm are two entities that register this callback.
+ * However only o2dlm depends on the heartbeat. It does not want the heartbeat
+ * to stop while a dlm domain is still active.
+ */
+unsigned int o2hb_dependent_users;
+
+/*
+ * In global heartbeat mode, all regions are pinned if there are one or more
+ * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
+ * regions are unpinned if the region count exceeds the cut off or the number
+ * of dependent users falls to zero.
+ */
+#define O2HB_PIN_CUT_OFF 3
+
+/*
+ * In local heartbeat mode, we assume the dlm domain name to be the same as
+ * region uuid. This is true for domains created for the file system but not
+ * necessarily true for userdlm domains. This is a known limitation.
+ *
+ * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
+ * works for both file system and userdlm domains.
+ */
+static int o2hb_region_pin(const char *region_uuid);
+static void o2hb_region_unpin(const char *region_uuid);
+
/* Only sets a new threshold if there are no active regions.
*
* No locking or otherwise interesting code is required for reading
struct config_item hr_item;
struct list_head hr_all_item;
- unsigned hr_unclean_stop:1;
+ unsigned hr_unclean_stop:1,
+ hr_item_pinned:1,
+ hr_item_dropped:1;
/* protected by the hr_callback_sem */
struct task_struct *hr_task;
struct dentry *hr_debug_livenodes;
struct dentry *hr_debug_regnum;
struct dentry *hr_debug_elapsed_time;
+ struct dentry *hr_debug_pinned;
struct o2hb_debug_buf *hr_db_livenodes;
struct o2hb_debug_buf *hr_db_regnum;
struct o2hb_debug_buf *hr_db_elapsed_time;
+ struct o2hb_debug_buf *hr_db_pinned;
/* let the person setting up hb wait for it to return until it
* has reached a 'steady' state. This will be fixed when we have
static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
{
- cancel_delayed_work(®->hr_write_timeout_work);
- flush_scheduled_work();
+ cancel_delayed_work_sync(®->hr_write_timeout_work);
}
static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
config_item_name(®->hr_item));
set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
+
+ /*
+ * If global heartbeat active, unpin all regions if the
+ * region count > CUT_OFF
+ */
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
+ o2hb_region_unpin(NULL);
}
static int o2hb_check_slot(struct o2hb_region *reg,
set_user_nice(current, -20);
+ /* Pin node */
+ o2nm_depend_this_node();
+
while (!kthread_should_stop() && !reg->hr_unclean_stop) {
/* We track the time spent inside
* o2hb_do_disk_heartbeat so that we avoid more than
mlog_errno(ret);
}
+ /* Unpin node */
+ o2nm_undepend_this_node();
+
mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
return 0;
reg->hr_last_timeout_start));
goto done;
+ case O2HB_DB_TYPE_REGION_PINNED:
+ reg = (struct o2hb_region *)db->db_data;
+ out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
+ !!reg->hr_item_pinned);
+ goto done;
+
default:
goto done;
}
memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
+ o2hb_dependent_users = 0;
+
return o2hb_debug_init();
}
debugfs_remove(reg->hr_debug_livenodes);
debugfs_remove(reg->hr_debug_regnum);
debugfs_remove(reg->hr_debug_elapsed_time);
+ debugfs_remove(reg->hr_debug_pinned);
debugfs_remove(reg->hr_debug_dir);
spin_lock(&o2hb_live_lock);
goto out;
reg->hr_bdev = I_BDEV(filp->f_mapping->host);
- ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ);
+ ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
if (ret) {
reg->hr_bdev = NULL;
goto out;
goto bail;
}
+ reg->hr_debug_pinned =
+ o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
+ reg->hr_debug_dir,
+ &(reg->hr_db_pinned),
+ sizeof(*(reg->hr_db_pinned)),
+ O2HB_DB_TYPE_REGION_PINNED,
+ 0, 0, reg);
+ if (!reg->hr_debug_pinned) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
ret = 0;
bail:
return ret;
if (reg == NULL)
return ERR_PTR(-ENOMEM);
- if (strlen(name) > O2HB_MAX_REGION_NAME_LEN)
- return ERR_PTR(-ENAMETOOLONG);
+ if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) {
+ ret = -ENAMETOOLONG;
+ goto free;
+ }
spin_lock(&o2hb_live_lock);
reg->hr_region_num = 0;
O2NM_MAX_REGIONS);
if (reg->hr_region_num >= O2NM_MAX_REGIONS) {
spin_unlock(&o2hb_live_lock);
- return ERR_PTR(-EFBIG);
+ ret = -EFBIG;
+ goto free;
}
set_bit(reg->hr_region_num, o2hb_region_bitmap);
}
ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
if (ret) {
config_item_put(®->hr_item);
- return ERR_PTR(ret);
+ goto free;
}
return ®->hr_item;
+free:
+ kfree(reg);
+ return ERR_PTR(ret);
}
static void o2hb_heartbeat_group_drop_item(struct config_group *group,
{
struct task_struct *hb_task;
struct o2hb_region *reg = to_o2hb_region(item);
+ int quorum_region = 0;
/* stop the thread when the user removes the region dir */
spin_lock(&o2hb_live_lock);
if (o2hb_global_heartbeat_active()) {
clear_bit(reg->hr_region_num, o2hb_region_bitmap);
clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
+ if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
+ quorum_region = 1;
+ clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
}
hb_task = reg->hr_task;
reg->hr_task = NULL;
+ reg->hr_item_dropped = 1;
spin_unlock(&o2hb_live_lock);
if (hb_task)
if (o2hb_global_heartbeat_active())
printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
config_item_name(®->hr_item));
+
config_item_put(item);
+
+ if (!o2hb_global_heartbeat_active() || !quorum_region)
+ return;
+
+ /*
+ * If global heartbeat active and there are dependent users,
+ * pin all regions if quorum region count <= CUT_OFF
+ */
+ spin_lock(&o2hb_live_lock);
+
+ if (!o2hb_dependent_users)
+ goto unlock;
+
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+ o2hb_region_pin(NULL);
+
+unlock:
+ spin_unlock(&o2hb_live_lock);
}
struct o2hb_heartbeat_group_attribute {
}
EXPORT_SYMBOL_GPL(o2hb_setup_callback);
-static struct o2hb_region *o2hb_find_region(const char *region_uuid)
+/*
+ * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ * In global heartbeat mode, region_uuid passed is NULL.
+ *
+ * In local, we only pin the matching region. In global we pin all the active
+ * regions.
+ */
+static int o2hb_region_pin(const char *region_uuid)
{
- struct o2hb_region *p, *reg = NULL;
+ int ret = 0, found = 0;
+ struct o2hb_region *reg;
+ char *uuid;
assert_spin_locked(&o2hb_live_lock);
- list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
- if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
- reg = p;
- break;
+ list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ uuid = config_item_name(®->hr_item);
+
+ /* local heartbeat */
+ if (region_uuid) {
+ if (strcmp(region_uuid, uuid))
+ continue;
+ found = 1;
}
+
+ if (reg->hr_item_pinned || reg->hr_item_dropped)
+ goto skip_pin;
+
+ /* Ignore ENOENT only for local hb (userdlm domain) */
+ ret = o2nm_depend_item(®->hr_item);
+ if (!ret) {
+ mlog(ML_CLUSTER, "Pin region %s\n", uuid);
+ reg->hr_item_pinned = 1;
+ } else {
+ if (ret == -ENOENT && found)
+ ret = 0;
+ else {
+ mlog(ML_ERROR, "Pin region %s fails with %d\n",
+ uuid, ret);
+ break;
+ }
+ }
+skip_pin:
+ if (found)
+ break;
}
- return reg;
+ return ret;
}
-static int o2hb_region_get(const char *region_uuid)
+/*
+ * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ * In global heartbeat mode, region_uuid passed is NULL.
+ *
+ * In local, we only unpin the matching region. In global we unpin all the
+ * active regions.
+ */
+static void o2hb_region_unpin(const char *region_uuid)
{
- int ret = 0;
struct o2hb_region *reg;
+ char *uuid;
+ int found = 0;
- spin_lock(&o2hb_live_lock);
+ assert_spin_locked(&o2hb_live_lock);
- reg = o2hb_find_region(region_uuid);
- if (!reg)
- ret = -ENOENT;
- spin_unlock(&o2hb_live_lock);
+ list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ uuid = config_item_name(®->hr_item);
+ if (region_uuid) {
+ if (strcmp(region_uuid, uuid))
+ continue;
+ found = 1;
+ }
- if (ret)
- goto out;
+ if (reg->hr_item_pinned) {
+ mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
+ o2nm_undepend_item(®->hr_item);
+ reg->hr_item_pinned = 0;
+ }
+ if (found)
+ break;
+ }
+}
- ret = o2nm_depend_this_node();
- if (ret)
- goto out;
+static int o2hb_region_inc_user(const char *region_uuid)
+{
+ int ret = 0;
- ret = o2nm_depend_item(®->hr_item);
- if (ret)
- o2nm_undepend_this_node();
+ spin_lock(&o2hb_live_lock);
-out:
+ /* local heartbeat */
+ if (!o2hb_global_heartbeat_active()) {
+ ret = o2hb_region_pin(region_uuid);
+ goto unlock;
+ }
+
+ /*
+ * if global heartbeat active and this is the first dependent user,
+ * pin all regions if quorum region count <= CUT_OFF
+ */
+ o2hb_dependent_users++;
+ if (o2hb_dependent_users > 1)
+ goto unlock;
+
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+ ret = o2hb_region_pin(NULL);
+
+unlock:
+ spin_unlock(&o2hb_live_lock);
return ret;
}
-static void o2hb_region_put(const char *region_uuid)
+void o2hb_region_dec_user(const char *region_uuid)
{
- struct o2hb_region *reg;
-
spin_lock(&o2hb_live_lock);
- reg = o2hb_find_region(region_uuid);
+ /* local heartbeat */
+ if (!o2hb_global_heartbeat_active()) {
+ o2hb_region_unpin(region_uuid);
+ goto unlock;
+ }
- spin_unlock(&o2hb_live_lock);
+ /*
+ * if global heartbeat active and there are no dependent users,
+ * unpin all quorum regions
+ */
+ o2hb_dependent_users--;
+ if (!o2hb_dependent_users)
+ o2hb_region_unpin(NULL);
- if (reg) {
- o2nm_undepend_item(®->hr_item);
- o2nm_undepend_this_node();
- }
+unlock:
+ spin_unlock(&o2hb_live_lock);
}
int o2hb_register_callback(const char *region_uuid,
}
if (region_uuid) {
- ret = o2hb_region_get(region_uuid);
- if (ret)
+ ret = o2hb_region_inc_user(region_uuid);
+ if (ret) {
+ mlog_errno(ret);
goto out;
+ }
}
down_write(&o2hb_callback_sem);
up_write(&o2hb_callback_sem);
ret = 0;
out:
- mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n",
+ mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
ret, __builtin_return_address(0), hc);
return ret;
}
{
BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
- mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
+ mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
__builtin_return_address(0), hc);
/* XXX Can this happen _with_ a region reference? */
return;
if (region_uuid)
- o2hb_region_put(region_uuid);
+ o2hb_region_dec_user(region_uuid);
down_write(&o2hb_callback_sem);
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
#include <linux/writeback.h>
result = 0;
if (journal->j_dev_bd != NULL) {
- if (journal->j_dev_bd->bd_dev != super->s_dev)
- bd_release(journal->j_dev_bd);
result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
journal->j_dev_bd = NULL;
}
{
int result;
dev_t jdev;
- fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE;
+ fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
char b[BDEVNAME_SIZE];
result = 0;
/* there is no "jdev" option and journal is on separate device */
if ((!jdev_name || !jdev_name[0])) {
- journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
+ if (jdev == super->s_dev)
+ blkdev_mode &= ~FMODE_EXCL;
+ journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
+ journal);
journal->j_dev_mode = blkdev_mode;
if (IS_ERR(journal->j_dev_bd)) {
result = PTR_ERR(journal->j_dev_bd);
"cannot init journal device '%s': %i",
__bdevname(jdev, b), result);
return result;
- } else if (jdev != super->s_dev) {
- result = bd_claim(journal->j_dev_bd, journal);
- if (result) {
- blkdev_put(journal->j_dev_bd, blkdev_mode);
- return result;
- }
-
+ } else if (jdev != super->s_dev)
set_blocksize(journal->j_dev_bd, super->s_blocksize);
- }
return 0;
}
journal->j_dev_mode = blkdev_mode;
- journal->j_dev_bd = open_bdev_exclusive(jdev_name,
- blkdev_mode, journal);
+ journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal);
if (IS_ERR(journal->j_dev_bd)) {
result = PTR_ERR(journal->j_dev_bd);
journal->j_dev_bd = NULL;
{
struct file *file = sd->u.file;
loff_t pos = sd->pos;
- int ret, more;
-
- ret = buf->ops->confirm(pipe, buf);
- if (!ret) {
- more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
- if (file->f_op && file->f_op->sendpage)
- ret = file->f_op->sendpage(file, buf->page, buf->offset,
- sd->len, &pos, more);
- else
- ret = -EINVAL;
- }
+ int more;
- return ret;
+ if (!likely(file->f_op && file->f_op->sendpage))
+ return -EINVAL;
+
+ more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
+ return file->f_op->sendpage(file, buf->page, buf->offset,
+ sd->len, &pos, more);
}
/*
void *fsdata;
int ret;
- /*
- * make sure the data in this buffer is uptodate
- */
- ret = buf->ops->confirm(pipe, buf);
- if (unlikely(ret))
- return ret;
-
offset = sd->pos & ~PAGE_CACHE_MASK;
this_len = sd->len;
if (sd->len > sd->total_len)
sd->len = sd->total_len;
- ret = actor(pipe, buf, sd);
- if (ret <= 0) {
+ ret = buf->ops->confirm(pipe, buf);
+ if (unlikely(ret)) {
if (ret == -ENODATA)
ret = 0;
return ret;
}
+
+ ret = actor(pipe, buf, sd);
+ if (ret <= 0)
+ return ret;
+
buf->offset += ret;
buf->len -= ret;
int ret;
void *data;
- ret = buf->ops->confirm(pipe, buf);
- if (ret)
- return ret;
-
data = buf->ops->map(pipe, buf, 0);
ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
buf->ops->unmap(pipe, buf, data);
static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
size_t len, unsigned int flags);
-/*
- * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
- * location, so checking ->i_pipe is not enough to verify that this is a
- * pipe.
- */
-static inline struct pipe_inode_info *pipe_info(struct inode *inode)
-{
- if (S_ISFIFO(inode->i_mode))
- return inode->i_pipe;
-
- return NULL;
-}
/*
* Determine where to splice to/from.
loff_t offset, *off;
long ret;
- ipipe = pipe_info(in->f_path.dentry->d_inode);
- opipe = pipe_info(out->f_path.dentry->d_inode);
+ ipipe = get_pipe_info(in);
+ opipe = get_pipe_info(out);
if (ipipe && opipe) {
if (off_in || off_out)
char *src;
int ret;
- ret = buf->ops->confirm(pipe, buf);
- if (unlikely(ret))
- return ret;
-
/*
* See if we can use the atomic maps, by prefaulting in the
* pages and doing an atomic copy
int error;
long ret;
- pipe = pipe_info(file->f_path.dentry->d_inode);
+ pipe = get_pipe_info(file);
if (!pipe)
return -EBADF;
};
long ret;
- pipe = pipe_info(file->f_path.dentry->d_inode);
+ pipe = get_pipe_info(file);
if (!pipe)
return -EBADF;
static long do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags)
{
- struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode);
- struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode);
+ struct pipe_inode_info *ipipe = get_pipe_info(in);
+ struct pipe_inode_info *opipe = get_pipe_info(out);
int ret = -EINVAL;
/*
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/backing-dev.h>
+#include <linux/rculist_bl.h>
#include "internal.h"
INIT_LIST_HEAD(&s->s_files);
#endif
INIT_LIST_HEAD(&s->s_instances);
- INIT_HLIST_HEAD(&s->s_anon);
+ INIT_HLIST_BL_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
init_rwsem(&s->s_umount);
{
struct block_device *bdev;
struct super_block *s;
- fmode_t mode = FMODE_READ;
+ fmode_t mode = FMODE_READ | FMODE_EXCL;
int error = 0;
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
- bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+ bdev = blkdev_get_by_path(dev_name, mode, fs_type);
if (IS_ERR(bdev))
return ERR_CAST(bdev);
/*
* s_umount nests inside bd_mutex during
- * __invalidate_device(). close_bdev_exclusive()
- * acquires bd_mutex and can't be called under
- * s_umount. Drop s_umount temporarily. This is safe
- * as we're holding an active reference.
+ * __invalidate_device(). blkdev_put() acquires
+ * bd_mutex and can't be called under s_umount. Drop
+ * s_umount temporarily. This is safe as we're
+ * holding an active reference.
*/
up_write(&s->s_umount);
- close_bdev_exclusive(bdev, mode);
+ blkdev_put(bdev, mode);
down_write(&s->s_umount);
} else {
char b[BDEVNAME_SIZE];
error_s:
error = PTR_ERR(s);
error_bdev:
- close_bdev_exclusive(bdev, mode);
+ blkdev_put(bdev, mode);
error:
return ERR_PTR(error);
}
bdev->bd_super = NULL;
generic_shutdown_super(sb);
sync_blockdev(bdev);
- close_bdev_exclusive(bdev, mode);
+ WARN_ON_ONCE(!(mode & FMODE_EXCL));
+ blkdev_put(bdev, mode | FMODE_EXCL);
}
EXPORT_SYMBOL(kill_block_super);
return mnt;
err:
- mntput(mnt);
+ mntput_long(mnt);
return ERR_PTR(err);
}
{
int error = 0;
- *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp);
+ *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+ mp);
if (IS_ERR(*bdevp)) {
error = PTR_ERR(*bdevp);
printk("XFS: Invalid device [%s], error=%d\n", name, error);
struct block_device *bdev)
{
if (bdev)
- close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
/*
struct xfs_ail *ailp,
xfs_lsn_t threshold_lsn)
{
- ailp->xa_target = threshold_lsn;
- wake_up_process(ailp->xa_task);
+ /* only ever move the target forwards */
+ if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
+ ailp->xa_target = threshold_lsn;
+ wake_up_process(ailp->xa_task);
+ }
}
STATIC int
long tout = 0; /* milliseconds */
while (!kthread_should_stop()) {
- schedule_timeout_interruptible(tout ?
- msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
+ /*
+ * for short sleeps indicating congestion, don't allow us to
+ * get woken early. Otherwise all we do is bang on the AIL lock
+ * without making progress.
+ */
+ if (tout && tout <= 20)
+ __set_current_state(TASK_KILLABLE);
+ else
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(tout ?
+ msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
/* swsusp */
try_to_freeze();
* Slab object creation initialisation for the XFS inode.
* This covers only the idempotent fields in the XFS inode;
* all other fields need to be initialised on allocation
- * from the slab. This avoids the need to repeatedly intialise
+ * from the slab. This avoids the need to repeatedly initialise
* fields in the xfs inode that left in the initialise state
* when freeing the inode.
*/
*/
ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+ lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+ &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
xfs_inactive(ip);
}
void *elevator_private3;
struct gendisk *rq_disk;
+ struct hd_struct *part;
unsigned long start_time;
#ifdef CONFIG_BLK_CGROUP
unsigned long long start_time_ns;
unsigned char misaligned;
unsigned char discard_misaligned;
- unsigned char no_cluster;
+ unsigned char cluster;
signed char discard_zeroes_data;
};
#endif
};
-#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
#define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */
#define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
- (1 << QUEUE_FLAG_CLUSTER) | \
(1 << QUEUE_FLAG_STACKABLE) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
(1 << QUEUE_FLAG_ADD_RANDOM))
#define rq_data_dir(rq) ((rq)->cmd_flags & 1)
+static inline unsigned int blk_queue_cluster(struct request_queue *q)
+{
+ return q->limits.cluster;
+}
+
/*
* We regard a request as sync, if either a read or a sync write
*/
extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
- extern void register_disk(struct gendisk *dev);
extern void generic_make_request(struct bio *bio);
extern void blk_rq_init(struct request_queue *q, struct request *rq);
extern void blk_put_request(struct request *);
extern void blk_cleanup_queue(struct request_queue *);
extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
extern void blk_queue_bounce_limit(struct request_queue *, u64);
+extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
extern void blk_queue_max_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*direct_access) (struct block_device *, sector_t,
void **, unsigned long *);
+ unsigned int (*check_events) (struct gendisk *disk,
+ unsigned int clearing);
+ /* ->media_changed() is DEPRECATED, use ->check_events() instead */
int (*media_changed) (struct gendisk *);
void (*unlock_native_capacity) (struct gendisk *);
int (*revalidate_disk) (struct gendisk *);
#define SEEK_MAX SEEK_END
struct fstrim_range {
- uint64_t start;
- uint64_t len;
- uint64_t minlen;
+ __u64 start;
+ __u64 len;
+ __u64 minlen;
};
/* And dynamically-tunable limits and defaults: */
#include <linux/path.h>
#include <linux/stat.h>
#include <linux/cache.h>
-#include <linux/kobject.h>
#include <linux/list.h>
#include <linux/radix-tree.h>
#include <linux/prio_tree.h>
#include <linux/capability.h>
#include <linux/semaphore.h>
#include <linux/fiemap.h>
+#include <linux/rculist_bl.h>
#include <asm/atomic.h>
#include <asm/byteorder.h>
struct iovec;
struct nameidata;
struct kiocb;
+struct kobject;
struct pipe_inode_info;
struct poll_table_struct;
struct kstatfs;
sector_t (*bmap)(struct address_space *, sector_t);
void (*invalidatepage) (struct page *, unsigned long);
int (*releasepage) (struct page *, gfp_t);
+ void (*freepage)(struct page *);
ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
loff_t offset, unsigned long nr_segs);
int (*get_xip_mem)(struct address_space *, pgoff_t, int,
void * bd_claiming;
void * bd_holder;
int bd_holders;
+ bool bd_write_holder;
#ifdef CONFIG_SYSFS
- struct list_head bd_holder_list;
+ struct gendisk * bd_holder_disk; /* for sysfs slave linkng */
#endif
struct block_device * bd_contains;
unsigned bd_block_size;
#define ACL_NOT_CACHED ((void *)(-1))
struct inode {
+ /* RCU path lookup touches following: */
+ umode_t i_mode;
+ uid_t i_uid;
+ gid_t i_gid;
+ const struct inode_operations *i_op;
+ struct super_block *i_sb;
+
+ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
+ unsigned int i_flags;
+ struct mutex i_mutex;
+
+ unsigned long i_state;
+ unsigned long dirtied_when; /* jiffies of first dirtying */
+
struct hlist_node i_hash;
struct list_head i_wb_list; /* backing dev IO list */
struct list_head i_lru; /* inode LRU list */
struct list_head i_sb_list;
- struct list_head i_dentry;
+ union {
+ struct list_head i_dentry;
+ struct rcu_head i_rcu;
+ };
unsigned long i_ino;
atomic_t i_count;
unsigned int i_nlink;
- uid_t i_uid;
- gid_t i_gid;
dev_t i_rdev;
unsigned int i_blkbits;
u64 i_version;
struct timespec i_ctime;
blkcnt_t i_blocks;
unsigned short i_bytes;
- umode_t i_mode;
- spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
- struct mutex i_mutex;
struct rw_semaphore i_alloc_sem;
- const struct inode_operations *i_op;
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
- struct super_block *i_sb;
struct file_lock *i_flock;
struct address_space *i_mapping;
struct address_space i_data;
struct hlist_head i_fsnotify_marks;
#endif
- unsigned long i_state;
- unsigned long dirtied_when; /* jiffies of first dirtying */
-
- unsigned int i_flags;
-
#ifdef CONFIG_IMA
/* protected by i_lock */
unsigned int i_readcount; /* struct files open RO */
const struct xattr_handler **s_xattr;
struct list_head s_inodes; /* all inodes */
- struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
+ struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
#ifdef CONFIG_SMP
struct list_head __percpu *s_files;
#else
struct list_head s_files;
#endif
- /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
+ /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
struct list_head s_dentry_lru; /* unused dentry lru */
int s_nr_dentry_unused; /* # of dentry on lru */
* generic_show_options()
*/
char __rcu *s_options;
+ const struct dentry_operations *s_d_op; /* default d_op for dentries */
};
extern struct timespec current_fs_time(struct super_block *sb);
int (*setlease)(struct file *, long, struct file_lock **);
};
+#define IPERM_FLAG_RCU 0x0001
+
struct inode_operations {
- int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
+ void * (*follow_link) (struct dentry *, struct nameidata *);
+ int (*permission) (struct inode *, int, unsigned int);
+ int (*check_acl)(struct inode *, int, unsigned int);
+
+ int (*readlink) (struct dentry *, char __user *,int);
+ void (*put_link) (struct dentry *, struct nameidata *, void *);
+
+ int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*unlink) (struct inode *,struct dentry *);
int (*symlink) (struct inode *,struct dentry *,const char *);
int (*mknod) (struct inode *,struct dentry *,int,dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
- int (*readlink) (struct dentry *, char __user *,int);
- void * (*follow_link) (struct dentry *, struct nameidata *);
- void (*put_link) (struct dentry *, struct nameidata *, void *);
void (*truncate) (struct inode *);
- int (*permission) (struct inode *, int);
- int (*check_acl)(struct inode *, int);
int (*setattr) (struct dentry *, struct iattr *);
int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
loff_t len);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
-};
+} ____cacheline_aligned;
struct seq_file;
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
#endif
int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
- int (*trim_fs) (struct super_block *, struct fstrim_range *);
};
/*
int (*set)(struct super_block *,void *),
void *data);
extern struct dentry *mount_pseudo(struct file_system_type *, char *,
- const struct super_operations *ops, unsigned long);
+ const struct super_operations *ops,
+ const struct dentry_operations *dops,
+ unsigned long);
extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
static inline void sb_mark_dirty(struct super_block *sb)
extern void bd_set_size(struct block_device *, loff_t size);
extern void bd_forget(struct inode *inode);
extern void bdput(struct block_device *);
- extern struct block_device *open_by_devnum(dev_t, fmode_t);
extern void invalidate_bdev(struct block_device *);
extern int sync_blockdev(struct block_device *bdev);
extern struct super_block *freeze_bdev(struct block_device *);
extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
- extern int blkdev_get(struct block_device *, fmode_t);
- extern int blkdev_put(struct block_device *, fmode_t);
- extern int bd_claim(struct block_device *, void *);
- extern void bd_release(struct block_device *);
+ extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
+ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+ void *holder);
+ extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
+ void *holder);
+ extern int blkdev_put(struct block_device *bdev, fmode_t mode);
#ifdef CONFIG_SYSFS
- extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *);
- extern void bd_release_from_disk(struct block_device *, struct gendisk *);
+ extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
#else
- #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder)
- #define bd_release_from_disk(bdev, disk) bd_release(bdev)
+ static inline int bd_link_disk_holder(struct block_device *bdev,
+ struct gendisk *disk)
+ {
+ return 0;
+ }
#endif
#endif
extern const char *__bdevname(dev_t, char *buffer);
extern const char *bdevname(struct block_device *bdev, char *buffer);
extern struct block_device *lookup_bdev(const char *);
- extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *);
- extern void close_bdev_exclusive(struct block_device *, fmode_t);
extern void blkdev_show(struct seq_file *,off_t);
#else
#endif
extern int notify_change(struct dentry *, struct iattr *);
extern int inode_permission(struct inode *, int);
-extern int generic_permission(struct inode *, int,
- int (*check_acl)(struct inode *, int));
+extern int generic_permission(struct inode *, int, unsigned int,
+ int (*check_acl)(struct inode *, int, unsigned int));
static inline bool execute_ok(struct inode *inode)
{
extern void end_writeback(struct inode *);
extern void __destroy_inode(struct inode *);
extern struct inode *new_inode(struct super_block *);
+extern void free_inode_nonrcu(struct inode *inode);
extern int should_remove_suid(struct dentry *);
extern int file_remove_suid(struct file *);
{
ino_t res;
+ /*
+ * Don't strictly need d_lock here? If the parent ino could change
+ * then surely we'd have a deeper race in the caller?
+ */
spin_lock(&dentry->d_lock);
res = dentry->d_parent->d_inode->i_ino;
spin_unlock(&dentry->d_lock);
#define UNMAP 0x42
#define READ_TOC 0x43
#define READ_HEADER 0x44
+ #define GET_EVENT_STATUS_NOTIFICATION 0x4a
#define LOG_SELECT 0x4c
#define LOG_SENSE 0x4d
#define XDWRITEREAD_10 0x53
#define PERSISTENT_RESERVE_OUT 0x5f
#define VARIABLE_LENGTH_CMD 0x7f
#define REPORT_LUNS 0xa0
+#define SECURITY_PROTOCOL_IN 0xa2
#define MAINTENANCE_IN 0xa3
#define MAINTENANCE_OUT 0xa4
#define MOVE_MEDIUM 0xa5
#define EXCHANGE_MEDIUM 0xa6
#define READ_12 0xa8
#define WRITE_12 0xaa
+#define READ_MEDIA_SERIAL_NUMBER 0xab
#define WRITE_VERIFY_12 0xae
#define VERIFY_12 0xaf
#define SEARCH_HIGH_12 0xb0
#define SEARCH_EQUAL_12 0xb1
#define SEARCH_LOW_12 0xb2
+#define SECURITY_PROTOCOL_OUT 0xb5
#define READ_ELEMENT_STATUS 0xb8
#define SEND_VOLUME_TAG 0xb6
#define WRITE_LONG_2 0xea
+#define EXTENDED_COPY 0x83
+#define RECEIVE_COPY_RESULTS 0x84
+#define ACCESS_CONTROL_IN 0x86
+#define ACCESS_CONTROL_OUT 0x87
#define READ_16 0x88
#define WRITE_16 0x8a
+#define READ_ATTRIBUTE 0x8c
+#define WRITE_ATTRIBUTE 0x8d
#define VERIFY_16 0x8f
#define WRITE_SAME_16 0x93
#define SERVICE_ACTION_IN 0x9e
/* values for service action in */
#define SAI_READ_CAPACITY_16 0x10
#define SAI_GET_LBA_STATUS 0x12
+/* values for VARIABLE_LENGTH_CMD service action codes
+ * see spc4r17 Section D.3.5, table D.7 and D.8 */
+#define VLC_SA_RECEIVE_CREDENTIAL 0x1800
/* values for maintenance in */
+#define MI_REPORT_IDENTIFYING_INFORMATION 0x05
#define MI_REPORT_TARGET_PGS 0x0a
+#define MI_REPORT_ALIASES 0x0b
+#define MI_REPORT_SUPPORTED_OPERATION_CODES 0x0c
+#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS 0x0d
+#define MI_REPORT_PRIORITY 0x0e
+#define MI_REPORT_TIMESTAMP 0x0f
+#define MI_MANAGEMENT_PROTOCOL_IN 0x10
/* values for maintenance out */
+#define MO_SET_IDENTIFYING_INFORMATION 0x06
#define MO_SET_TARGET_PGS 0x0a
+#define MO_CHANGE_ALIASES 0x0b
+#define MO_SET_PRIORITY 0x0e
+#define MO_SET_TIMESTAMP 0x0f
+#define MO_MANAGEMENT_PROTOCOL_OUT 0x10
/* values for variable length command */
+#define XDREAD_32 0x03
+#define XDWRITE_32 0x04
+#define XPWRITE_32 0x06
+#define XDWRITEREAD_32 0x07
#define READ_32 0x09
#define VERIFY_32 0x0a
#define WRITE_32 0x0b
*
*
* This file is released under the GPLv2.
*
#include "power.h"
-#define HIBERNATE_SIG "LINHIB0001"
+#define HIBERNATE_SIG "S1SUSPEND"
/*
* The swap map is a data structure used for keeping track of each page
return res;
root_swap = res;
- res = blkdev_get(hib_resume_bdev, FMODE_WRITE);
+ res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
if (res)
return res;
{
unsigned int m;
int error = 0;
+ struct bio *bio;
struct timeval start;
struct timeval stop;
unsigned nr_pages;
- size_t off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *page;
+ size_t i, off, unc_len, cmp_len;
+ unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
- page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
- if (!page) {
- printk(KERN_ERR "PM: Failed to allocate LZO page\n");
- return -ENOMEM;
+ for (i = 0; i < LZO_CMP_PAGES; i++) {
+ page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (!page[i]) {
+ printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+
+ while (i)
+ free_page((unsigned long)page[--i]);
+
+ return -ENOMEM;
+ }
}
unc = vmalloc(LZO_UNC_SIZE);
if (!unc) {
printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
- free_page((unsigned long)page);
+
+ for (i = 0; i < LZO_CMP_PAGES; i++)
+ free_page((unsigned long)page[i]);
+
return -ENOMEM;
}
cmp = vmalloc(LZO_CMP_SIZE);
if (!cmp) {
printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
+
vfree(unc);
- free_page((unsigned long)page);
+ for (i = 0; i < LZO_CMP_PAGES; i++)
+ free_page((unsigned long)page[i]);
+
return -ENOMEM;
}
if (!m)
m = 1;
nr_pages = 0;
+ bio = NULL;
do_gettimeofday(&start);
error = snapshot_write_next(snapshot);
goto out_finish;
for (;;) {
- error = swap_read_page(handle, page, NULL); /* sync */
+ error = swap_read_page(handle, page[0], NULL); /* sync */
if (error)
break;
- cmp_len = *(size_t *)page;
+ cmp_len = *(size_t *)page[0];
if (unlikely(!cmp_len ||
cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
printk(KERN_ERR "PM: Invalid LZO compressed length\n");
break;
}
- memcpy(cmp, page, PAGE_SIZE);
- for (off = PAGE_SIZE; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
- error = swap_read_page(handle, page, NULL); /* sync */
+ for (off = PAGE_SIZE, i = 1;
+ off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
+ error = swap_read_page(handle, page[i], &bio);
if (error)
goto out_finish;
+ }
- memcpy(cmp + off, page, PAGE_SIZE);
+ error = hib_wait_on_bio_chain(&bio); /* need all data now */
+ if (error)
+ goto out_finish;
+
+ for (off = 0, i = 0;
+ off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
+ memcpy(cmp + off, page[i], PAGE_SIZE);
}
unc_len = LZO_UNC_SIZE;
vfree(cmp);
vfree(unc);
- free_page((unsigned long)page);
+ for (i = 0; i < LZO_CMP_PAGES; i++)
+ free_page((unsigned long)page[i]);
return error;
}
/**
* swsusp_read - read the hibernation image.
* @flags_p: flags passed by the "frozen" kernel in the image header should
- * be written into this memeory location
+ * be written into this memory location
*/
int swsusp_read(unsigned int *flags_p)
{
int error;
- hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
+ hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
+ FMODE_READ, NULL);
if (!IS_ERR(hib_resume_bdev)) {
set_blocksize(hib_resume_bdev, PAGE_SIZE);
clear_page(swsusp_header);